[pbs-devel] [PATCH v2 proxmox-backup 2/4] datastore: add manifest locking
Dietmar Maurer
dietmar at proxmox.com
Fri Oct 16 08:33:54 CEST 2020
comments inline:
> On 10/15/2020 12:49 PM Stefan Reiter <s.reiter at proxmox.com> wrote:
>
>
> Avoid races when updating manifest data by flocking a lock file.
> update_manifest is used to ensure updates always happen with the lock
> held.
>
> Snapshot deletion also acquires the lock, so it cannot interfere with an
> outstanding manifest write.
>
> Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
> ---
>
> v2:
> * Use seperate manifest lock file
> * Change store_manifest to update_manifest to force consumers to use correct
> locking
> * Don't hold lock across verify, reload manifest at the end
> * Update comments
>
> src/api2/admin/datastore.rs | 8 +++----
> src/api2/backup/environment.rs | 13 ++++------
> src/backup/datastore.rs | 43 +++++++++++++++++++++++++++++-----
> src/backup/manifest.rs | 1 +
> src/backup/verify.rs | 9 +++----
> 5 files changed, 50 insertions(+), 24 deletions(-)
>
> diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
> index de39cd1b..4f15c1cd 100644
> --- a/src/api2/admin/datastore.rs
> +++ b/src/api2/admin/datastore.rs
> @@ -1481,11 +1481,9 @@ fn set_notes(
> let allowed = (user_privs & PRIV_DATASTORE_READ) != 0;
> if !allowed { check_backup_owner(&datastore, backup_dir.group(), &userid)?; }
>
> - let (mut manifest, _) = datastore.load_manifest(&backup_dir)?;
> -
> - manifest.unprotected["notes"] = notes.into();
> -
> - datastore.store_manifest(&backup_dir, manifest)?;
> + datastore.update_manifest(&backup_dir,|manifest| {
> + manifest.unprotected["notes"] = notes.into();
> + }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?;
>
> Ok(())
> }
> diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
> index f00c2cd3..c4f166df 100644
> --- a/src/api2/backup/environment.rs
> +++ b/src/api2/backup/environment.rs
> @@ -472,16 +472,11 @@ impl BackupEnvironment {
> bail!("backup does not contain valid files (file count == 0)");
> }
>
> - // check manifest
> - let (mut manifest, _) = self.datastore.load_manifest(&self.backup_dir)
> - .map_err(|err| format_err!("unable to load manifest blob - {}", err))?;
> -
> + // check for valid manifest and store stats
> let stats = serde_json::to_value(state.backup_stat)?;
> -
> - manifest.unprotected["chunk_upload_stats"] = stats;
> -
> - self.datastore.store_manifest(&self.backup_dir, manifest)
> - .map_err(|err| format_err!("unable to store manifest blob - {}", err))?;
> + self.datastore.update_manifest(&self.backup_dir, |manifest| {
> + manifest.unprotected["chunk_upload_stats"] = stats;
> + }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?;
>
> if let Some(base) = &self.last_backup {
> let path = self.datastore.snapshot_path(&base.backup_dir);
> diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
> index ca8ca438..7b2bee7e 100644
> --- a/src/backup/datastore.rs
> +++ b/src/backup/datastore.rs
> @@ -3,17 +3,19 @@ use std::io::{self, Write};
> use std::path::{Path, PathBuf};
> use std::sync::{Arc, Mutex};
> use std::convert::TryFrom;
> +use std::time::Duration;
> +use std::fs::File;
>
> use anyhow::{bail, format_err, Error};
> use lazy_static::lazy_static;
>
> -use proxmox::tools::fs::{replace_file, CreateOptions};
> +use proxmox::tools::fs::{replace_file, CreateOptions, open_file_locked};
>
> use super::backup_info::{BackupGroup, BackupDir};
> use super::chunk_store::ChunkStore;
> use super::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
> use super::fixed_index::{FixedIndexReader, FixedIndexWriter};
> -use super::manifest::{MANIFEST_BLOB_NAME, CLIENT_LOG_BLOB_NAME, BackupManifest};
> +use super::manifest::{MANIFEST_BLOB_NAME, MANIFEST_LOCK_NAME, CLIENT_LOG_BLOB_NAME, BackupManifest};
> use super::index::*;
> use super::{DataBlob, ArchiveType, archive_type};
> use crate::config::datastore;
> @@ -235,9 +237,10 @@ impl DataStore {
>
> let full_path = self.snapshot_path(backup_dir);
>
> - let _guard;
> + let (_guard, _manifest_guard);
> if !force {
> _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
> + _manifest_guard = self.lock_manifest(backup_dir);
I think this is unnecessary. An update manifest should not block a remove_backup_dir.
What for exactly?
> }
>
> // Acquire lock and keep it during remove operation, so there's no
> @@ -665,8 +668,27 @@ impl DataStore {
> digest_str,
> err,
> ))
> - }
> + }
>
> + fn lock_manifest(
> + &self,
> + backup_dir: &BackupDir,
> + ) -> Result<File, Error> {
> + let mut path = self.base_path();
> + path.push(backup_dir.relative_path());
> + path.push(&MANIFEST_LOCK_NAME);
> +
> + // update_manifest should never take a long time, so if someone else has
> + // the lock we can simply block a bit and should get it soon
> + open_file_locked(&path, Duration::from_secs(5), true)
> + .map_err(|err| {
> + format_err!(
> + "unable to acquire manifest lock {:?} - {}", &path, err
> + )
> + })
> + }
> +
> + /// Load the manifest without a lock. Must not be written back.
> pub fn load_manifest(
> &self,
> backup_dir: &BackupDir,
> @@ -677,11 +699,19 @@ impl DataStore {
> Ok((manifest, raw_size))
> }
>
> - pub fn store_manifest(
> + /// Update the manifest of the specified snapshot. Never write a manifest directly,
> + /// only use this method - anything else may break locking guarantees.
> + pub fn update_manifest(
> &self,
> backup_dir: &BackupDir,
> - manifest: BackupManifest,
> + update_fn: impl FnOnce(&mut BackupManifest),
> ) -> Result<(), Error> {
It should not be possible to update anything outside the "unprotected" property.
> +
> + let _guard = self.lock_manifest(backup_dir)?;
> + let (mut manifest, _) = self.load_manifest(&backup_dir)?;
> +
> + update_fn(&mut manifest);
> +
> let manifest = serde_json::to_value(manifest)?;
> let manifest = serde_json::to_string_pretty(&manifest)?;
> let blob = DataBlob::encode(manifest.as_bytes(), None, true)?;
> @@ -691,6 +721,7 @@ impl DataStore {
> path.push(backup_dir.relative_path());
> path.push(MANIFEST_BLOB_NAME);
>
> + // atomic replace invalidates flock - no other writes past this point!
> replace_file(&path, raw_data, CreateOptions::new())?;
>
> Ok(())
> diff --git a/src/backup/manifest.rs b/src/backup/manifest.rs
> index 609cc998..51980a07 100644
> --- a/src/backup/manifest.rs
> +++ b/src/backup/manifest.rs
> @@ -8,6 +8,7 @@ use ::serde::{Deserialize, Serialize};
> use crate::backup::{BackupDir, CryptMode, CryptConfig};
>
> pub const MANIFEST_BLOB_NAME: &str = "index.json.blob";
> +pub const MANIFEST_LOCK_NAME: &str = ".index.json.lck";
> pub const CLIENT_LOG_BLOB_NAME: &str = "client.log.blob";
>
> mod hex_csum {
> diff --git a/src/backup/verify.rs b/src/backup/verify.rs
> index 05b6ba86..ea3fa760 100644
> --- a/src/backup/verify.rs
> +++ b/src/backup/verify.rs
> @@ -300,7 +300,7 @@ pub fn verify_backup_dir(
> return Ok(true);
> }
>
> - let mut manifest = match datastore.load_manifest(&backup_dir) {
> + let manifest = match datastore.load_manifest(&backup_dir) {
> Ok((manifest, _)) => manifest,
> Err(err) => {
> task_log!(
> @@ -367,9 +367,10 @@ pub fn verify_backup_dir(
> state: verify_result,
> upid,
> };
> - manifest.unprotected["verify_state"] = serde_json::to_value(verify_state)?;
> - datastore.store_manifest(&backup_dir, manifest)
> - .map_err(|err| format_err!("unable to store manifest blob - {}", err))?;
> + let verify_state = serde_json::to_value(verify_state)?;
> + datastore.update_manifest(&backup_dir, |manifest| {
> + manifest.unprotected["verify_state"] = verify_state;
> + }).map_err(|err| format_err!("unable to update manifest blob - {}", err))?;
>
> Ok(error_count == 0)
> }
> --
> 2.20.1
>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
More information about the pbs-devel
mailing list