[pbs-devel] [PATCH v2 proxmox-backup 2/4] datastore: add manifest locking

Stefan Reiter s.reiter at proxmox.com
Thu Oct 15 12:49:14 CEST 2020


Avoid races when updating manifest data by flocking a lock file.
update_manifest is used to ensure updates always happen with the lock
held.

Snapshot deletion also acquires the lock, so it cannot interfere with an
outstanding manifest write.

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---

v2:
* Use seperate manifest lock file
* Change store_manifest to update_manifest to force consumers to use correct
  locking
* Don't hold lock across verify, reload manifest at the end
* Update comments

 src/api2/admin/datastore.rs    |  8 +++----
 src/api2/backup/environment.rs | 13 ++++------
 src/backup/datastore.rs        | 43 +++++++++++++++++++++++++++++-----
 src/backup/manifest.rs         |  1 +
 src/backup/verify.rs           |  9 +++----
 5 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
index de39cd1b..4f15c1cd 100644
--- a/src/api2/admin/datastore.rs
+++ b/src/api2/admin/datastore.rs
@@ -1481,11 +1481,9 @@ fn set_notes(
     let allowed = (user_privs & PRIV_DATASTORE_READ) != 0;
     if !allowed { check_backup_owner(&datastore, backup_dir.group(), &userid)?; }
 
-    let (mut manifest, _) = datastore.load_manifest(&backup_dir)?;
-
-    manifest.unprotected["notes"] = notes.into();
-
-    datastore.store_manifest(&backup_dir, manifest)?;
+    datastore.update_manifest(&backup_dir,|manifest| {
+        manifest.unprotected["notes"] = notes.into();
+    }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?;
 
     Ok(())
 }
diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index f00c2cd3..c4f166df 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -472,16 +472,11 @@ impl BackupEnvironment {
             bail!("backup does not contain valid files (file count == 0)");
         }
 
-        // check manifest
-        let (mut manifest, _) = self.datastore.load_manifest(&self.backup_dir)
-            .map_err(|err| format_err!("unable to load manifest blob - {}", err))?;
-
+        // check for valid manifest and store stats
         let stats = serde_json::to_value(state.backup_stat)?;
-
-        manifest.unprotected["chunk_upload_stats"] = stats;
-
-        self.datastore.store_manifest(&self.backup_dir, manifest)
-            .map_err(|err| format_err!("unable to store manifest blob - {}", err))?;
+        self.datastore.update_manifest(&self.backup_dir, |manifest| {
+            manifest.unprotected["chunk_upload_stats"] = stats;
+        }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?;
 
         if let Some(base) = &self.last_backup {
             let path = self.datastore.snapshot_path(&base.backup_dir);
diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
index ca8ca438..7b2bee7e 100644
--- a/src/backup/datastore.rs
+++ b/src/backup/datastore.rs
@@ -3,17 +3,19 @@ use std::io::{self, Write};
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, Mutex};
 use std::convert::TryFrom;
+use std::time::Duration;
+use std::fs::File;
 
 use anyhow::{bail, format_err, Error};
 use lazy_static::lazy_static;
 
-use proxmox::tools::fs::{replace_file, CreateOptions};
+use proxmox::tools::fs::{replace_file, CreateOptions, open_file_locked};
 
 use super::backup_info::{BackupGroup, BackupDir};
 use super::chunk_store::ChunkStore;
 use super::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
 use super::fixed_index::{FixedIndexReader, FixedIndexWriter};
-use super::manifest::{MANIFEST_BLOB_NAME, CLIENT_LOG_BLOB_NAME, BackupManifest};
+use super::manifest::{MANIFEST_BLOB_NAME, MANIFEST_LOCK_NAME, CLIENT_LOG_BLOB_NAME, BackupManifest};
 use super::index::*;
 use super::{DataBlob, ArchiveType, archive_type};
 use crate::config::datastore;
@@ -235,9 +237,10 @@ impl DataStore {
 
         let full_path = self.snapshot_path(backup_dir);
 
-        let _guard;
+        let (_guard, _manifest_guard);
         if !force {
             _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+            _manifest_guard = self.lock_manifest(backup_dir);
         }
 
         // Acquire lock and keep it during remove operation, so there's no
@@ -665,8 +668,27 @@ impl DataStore {
             digest_str,
             err,
         ))
-     }
+    }
 
+    fn lock_manifest(
+        &self,
+        backup_dir: &BackupDir,
+    ) -> Result<File, Error> {
+        let mut path = self.base_path();
+        path.push(backup_dir.relative_path());
+        path.push(&MANIFEST_LOCK_NAME);
+
+        // update_manifest should never take a long time, so if someone else has
+        // the lock we can simply block a bit and should get it soon
+        open_file_locked(&path, Duration::from_secs(5), true)
+            .map_err(|err| {
+                format_err!(
+                    "unable to acquire manifest lock {:?} - {}", &path, err
+                )
+            })
+    }
+
+    /// Load the manifest without a lock. Must not be written back.
     pub fn load_manifest(
         &self,
         backup_dir: &BackupDir,
@@ -677,11 +699,19 @@ impl DataStore {
         Ok((manifest, raw_size))
     }
 
-    pub fn store_manifest(
+    /// Update the manifest of the specified snapshot. Never write a manifest directly,
+    /// only use this method - anything else may break locking guarantees.
+    pub fn update_manifest(
         &self,
         backup_dir: &BackupDir,
-        manifest: BackupManifest,
+        update_fn: impl FnOnce(&mut BackupManifest),
     ) -> Result<(), Error> {
+
+        let _guard = self.lock_manifest(backup_dir)?;
+        let (mut manifest, _) = self.load_manifest(&backup_dir)?;
+
+        update_fn(&mut manifest);
+
         let manifest = serde_json::to_value(manifest)?;
         let manifest = serde_json::to_string_pretty(&manifest)?;
         let blob = DataBlob::encode(manifest.as_bytes(), None, true)?;
@@ -691,6 +721,7 @@ impl DataStore {
         path.push(backup_dir.relative_path());
         path.push(MANIFEST_BLOB_NAME);
 
+        // atomic replace invalidates flock - no other writes past this point!
         replace_file(&path, raw_data, CreateOptions::new())?;
 
         Ok(())
diff --git a/src/backup/manifest.rs b/src/backup/manifest.rs
index 609cc998..51980a07 100644
--- a/src/backup/manifest.rs
+++ b/src/backup/manifest.rs
@@ -8,6 +8,7 @@ use ::serde::{Deserialize, Serialize};
 use crate::backup::{BackupDir, CryptMode, CryptConfig};
 
 pub const MANIFEST_BLOB_NAME: &str = "index.json.blob";
+pub const MANIFEST_LOCK_NAME: &str = ".index.json.lck";
 pub const CLIENT_LOG_BLOB_NAME: &str = "client.log.blob";
 
 mod hex_csum {
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 05b6ba86..ea3fa760 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -300,7 +300,7 @@ pub fn verify_backup_dir(
         return Ok(true);
     }
 
-    let mut manifest = match datastore.load_manifest(&backup_dir) {
+    let manifest = match datastore.load_manifest(&backup_dir) {
         Ok((manifest, _)) => manifest,
         Err(err) => {
             task_log!(
@@ -367,9 +367,10 @@ pub fn verify_backup_dir(
         state: verify_result,
         upid,
     };
-    manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?;
-    datastore.store_manifest(&backup_dir, manifest)
-        .map_err(|err| format_err!("unable to store manifest blob - {}", err))?;
+    let verify_state = serde_json::to_value(verify_state)?;
+    datastore.update_manifest(&backup_dir, |manifest| {
+        manifest.unprotected["verify_state"] = verify_state;
+    }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?;
 
     Ok(error_count == 0)
 }
-- 
2.20.1






More information about the pbs-devel mailing list