[pbs-devel] [PATCH proxmox-backup 08/11] datastore: add manifest locking
Stefan Reiter
s.reiter at proxmox.com
Wed Oct 14 14:16:36 CEST 2020
Avoid races when updating manifest data by flocking the manifest file
itself. store_manifest is made to require such a lock and will
automatically drop it to ensure safety using Rust's compiler.
Snapshot deletion also acquires the lock, so it cannot interfere with an
outstanding manifest write.
Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
src/api2/admin/datastore.rs | 4 +--
src/api2/backup/environment.rs | 4 +--
src/backup/datastore.rs | 50 ++++++++++++++++++++++++++++++++--
src/backup/verify.rs | 6 ++--
4 files changed, 55 insertions(+), 9 deletions(-)
diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
index 5824611b..11223e6a 100644
--- a/src/api2/admin/datastore.rs
+++ b/src/api2/admin/datastore.rs
@@ -1481,11 +1481,11 @@ fn set_notes(
let allowed = (user_privs & PRIV_DATASTORE_READ) != 0;
if !allowed { check_backup_owner(&datastore, backup_dir.group(), &userid)?; }
- let (mut manifest, _) = datastore.load_manifest(&backup_dir)?;
+ let (mut manifest, manifest_guard) = datastore.load_manifest_locked(&backup_dir)?;
manifest.unprotected["notes"] = notes.into();
- datastore.store_manifest(&backup_dir, manifest)?;
+ datastore.store_manifest(&backup_dir, manifest, manifest_guard)?;
Ok(())
}
diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index f00c2cd3..0e672d8e 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -473,14 +473,14 @@ impl BackupEnvironment {
}
// check manifest
- let (mut manifest, _) = self.datastore.load_manifest(&self.backup_dir)
+ let (mut manifest, manifest_guard) = self.datastore.load_manifest_locked(&self.backup_dir)
.map_err(|err| format_err!("unable to load manifest blob - {}", err))?;
let stats = serde_json::to_value(state.backup_stat)?;
manifest.unprotected["chunk_upload_stats"] = stats;
- self.datastore.store_manifest(&self.backup_dir, manifest)
+ self.datastore.store_manifest(&self.backup_dir, manifest, manifest_guard)
.map_err(|err| format_err!("unable to store manifest blob - {}", err))?;
if let Some(base) = &self.last_backup {
diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
index 8ea9311a..f8c228fc 100644
--- a/src/backup/datastore.rs
+++ b/src/backup/datastore.rs
@@ -3,6 +3,8 @@ use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex};
use std::convert::TryFrom;
+use std::time::Duration;
+use std::fs::File;
use anyhow::{bail, format_err, Error};
use lazy_static::lazy_static;
@@ -24,6 +26,8 @@ use crate::tools::fs::{lock_dir_noblock, DirLockGuard};
use crate::api2::types::{GarbageCollectionStatus, Userid};
use crate::server::UPID;
+pub type ManifestLock = File;
+
lazy_static! {
static ref DATASTORE_MAP: Mutex<HashMap<String, Arc<DataStore>>> = Mutex::new(HashMap::new());
}
@@ -228,9 +232,10 @@ impl DataStore {
let full_path = self.snapshot_path(backup_dir);
- let _guard;
+ let (_guard, _manifest_guard);
if !force {
_guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+ _manifest_guard = self.lock_manifest(backup_dir);
}
// Acquire lock and keep it during remove operation, so there's no
@@ -656,8 +661,47 @@ impl DataStore {
digest_str,
err,
))
- }
+ }
+ fn lock_manifest(
+ &self,
+ backup_dir: &BackupDir,
+ ) -> Result<ManifestLock, Error> {
+ let mut path = self.base_path();
+ path.push(backup_dir.relative_path());
+ path.push(MANIFEST_BLOB_NAME);
+
+ let mut handle = File::open(&path)
+ .map_err(|err| {
+ format_err!("unable to open manifest {:?} for locking - {}", &path, err)
+ })?;
+
+ proxmox::tools::fs::lock_file(&mut handle, true, Some(Duration::from_secs(5)))
+ .map_err(|err| {
+ format_err!(
+ "unable to acquire lock on manifest {:?} - {}", &path, err
+ )
+ })?;
+
+ Ok(handle)
+ }
+
+ /// Load the manifest with a lock, so it can be safely written back again.
+ /// Most operations consist of "load -> edit unprotected -> write back" so the lock is not held
+ /// for long - thus we wait a few seconds for the lock to become available before giving up. In
+ /// case of verify it might take longer, so all callers must either be able to cope with a
+ /// failure or ensure that they are exclusive with verify.
+ pub fn load_manifest_locked(
+ &self,
+ backup_dir: &BackupDir,
+ ) -> Result<(BackupManifest, ManifestLock), Error> {
+ let guard = self.lock_manifest(backup_dir)?;
+ let blob = self.load_blob(backup_dir, MANIFEST_BLOB_NAME)?;
+ let manifest = BackupManifest::try_from(blob)?;
+ Ok((manifest, guard))
+ }
+
+ /// Load the manifest without a lock. Cannot be edited and written back.
pub fn load_manifest(
&self,
backup_dir: &BackupDir,
@@ -668,10 +712,12 @@ impl DataStore {
Ok((manifest, raw_size))
}
+ /// Store a given manifest. Requires a lock acquired with load_manifest_locked for safety.
pub fn store_manifest(
&self,
backup_dir: &BackupDir,
manifest: BackupManifest,
+ _manifest_lock: ManifestLock,
) -> Result<(), Error> {
let manifest = serde_json::to_value(manifest)?;
let manifest = serde_json::to_string_pretty(&manifest)?;
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 05b6ba86..839987e1 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -300,8 +300,8 @@ pub fn verify_backup_dir(
return Ok(true);
}
- let mut manifest = match datastore.load_manifest(&backup_dir) {
- Ok((manifest, _)) => manifest,
+ let (mut manifest, manifest_guard) = match datastore.load_manifest_locked(&backup_dir) {
+ Ok((manifest, guard)) => (manifest, guard),
Err(err) => {
task_log!(
worker,
@@ -368,7 +368,7 @@ pub fn verify_backup_dir(
upid,
};
manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?;
- datastore.store_manifest(&backup_dir, manifest)
+ datastore.store_manifest(&backup_dir, manifest, manifest_guard)
.map_err(|err| format_err!("unable to store manifest blob - {}", err))?;
Ok(error_count == 0)
--
2.20.1
More information about the pbs-devel
mailing list