[pbs-devel] [PATCH proxmox-backup v1 1/3] fix #3935: datastore/api2/backup: move datastore locking to '/run'

Stefan Sterz s.sterz at proxmox.com
Wed Apr 6 11:39:53 CEST 2022


to avoid issues when removing a group or snapshot directory where two
threads hold a lock to the same directory, move locking to the tmpfs
backed '/run' directory. also adds double stat'ing to make it
possible to remove locks without certain race condition issues.

Signed-off-by: Stefan Sterz <s.sterz at proxmox.com>
---
i chose to have the lock be the sha256 hash of the relative path of a
given group/snapshot/manifest in a datastore, because this way we
never run into issues where file names might be too long. this has
been discussed in previously[1]. i could also encode them using
proxmox_sys::systemd::escape_unit, but that would use two characters
for most ascii characters and, thus, we would run into the 255 byte
filename limit even faster in most cases. however, i have no set
opinion here, if the consesus is, that this is a non-issue ill gladly
send a v2.

[1]: https://lists.proxmox.com/pipermail/pbs-devel/2020-December/001669.html

 pbs-datastore/src/datastore.rs       | 129 +++++++++++++++++++++------
 pbs-datastore/src/snapshot_reader.rs |  30 ++++---
 src/api2/backup/environment.rs       |   4 +-
 src/api2/backup/mod.rs               |   5 +-
 src/api2/reader/mod.rs               |   7 +-
 src/backup/verify.rs                 |  12 +--
 6 files changed, 131 insertions(+), 56 deletions(-)

diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index d416c8d8..cb2a8a4e 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -15,7 +15,7 @@ use proxmox_sys::fs::{replace_file, file_read_optional_string, CreateOptions};
 use proxmox_sys::process_locker::ProcessLockSharedGuard;
 use proxmox_sys::WorkerTaskContext;
 use proxmox_sys::{task_log, task_warn};
-use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard};
+use proxmox_sys::error::SysError;
 
 use pbs_api_types::{
     UPID, DataStoreConfig, Authid, GarbageCollectionStatus, HumanByte,
@@ -39,6 +39,8 @@ lazy_static! {
     static ref DATASTORE_MAP: Mutex<HashMap<String, Arc<DataStore>>> = Mutex::new(HashMap::new());
 }
 
+pub const DATASTORE_LOCKS_DIR: &str = "/run/proxmox-backup/locks";
+
 /// checks if auth_id is owner, or, if owner is a token, if
 /// auth_id is the user of the token
 pub fn check_backup_owner(
@@ -288,11 +290,9 @@ impl DataStore {
     /// Remove a complete backup group including all snapshots, returns true
     /// if all snapshots were removed, and false if some were protected
     pub fn remove_backup_group(&self, backup_group: &BackupGroup) ->  Result<bool, Error> {
-
+        let _guard = self.lock_group(backup_group)?;
         let full_path = self.group_path(backup_group);
 
-        let _guard = proxmox_sys::fs::lock_dir_noblock(&full_path, "backup group", "possible running backup")?;
-
         log::info!("removing backup group {:?}", full_path);
 
         let mut removed_all = true;
@@ -308,27 +308,28 @@ impl DataStore {
 
         if removed_all {
             // no snapshots left, we can now safely remove the empty folder
-            std::fs::remove_dir_all(&full_path)
-                .map_err(|err| {
-                    format_err!(
-                        "removing backup group directory {:?} failed - {}",
-                        full_path,
-                        err,
-                    )
-                })?;
+            std::fs::remove_dir_all(&full_path).map_err(|err| {
+                format_err!(
+                    "removing backup group directory {:?} failed - {}",
+                    full_path,
+                    err,
+                )
+            })?;
+
+            if let Ok(path) = self.group_lock_path(backup_group) {
+                let _ = std::fs::remove_file(path);
+            }
         }
 
         Ok(removed_all)
     }
 
     /// Remove a backup directory including all content
-    pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) ->  Result<(), Error> {
-
-        let full_path = self.snapshot_path(backup_dir);
-
+    pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) -> Result<(), Error> {
         let (_guard, _manifest_guard);
+
         if !force {
-            _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+            _guard = self.lock_snapshot(backup_dir)?;
             _manifest_guard = self.lock_manifest(backup_dir)?;
         }
 
@@ -336,6 +337,8 @@ impl DataStore {
             bail!("cannot remove protected snapshot");
         }
 
+        let full_path = self.snapshot_path(backup_dir);
+
         log::info!("removing backup snapshot {:?}", full_path);
         std::fs::remove_dir_all(&full_path)
             .map_err(|err| {
@@ -352,6 +355,10 @@ impl DataStore {
             let _ = std::fs::remove_file(path);
         }
 
+        if let Ok(path) = self.snapshot_lock_path(backup_dir) {
+            let _ = std::fs::remove_file(path);
+        }
+
         Ok(())
     }
 
@@ -427,7 +434,7 @@ impl DataStore {
         &self,
         backup_group: &BackupGroup,
         auth_id: &Authid,
-    ) -> Result<(Authid, DirLockGuard), Error> {
+    ) -> Result<(Authid, BackupLockGuard), Error> {
         // create intermediate path first:
         let mut full_path = self.base_path();
         full_path.push(backup_group.backup_type());
@@ -438,13 +445,13 @@ impl DataStore {
         // create the last component now
         match std::fs::create_dir(&full_path) {
             Ok(_) => {
-                let guard = lock_dir_noblock(&full_path, "backup group", "another backup is already running")?;
+                let guard = self.lock_group(backup_group)?;
                 self.set_owner(backup_group, auth_id, false)?;
                 let owner = self.get_owner(backup_group)?; // just to be sure
                 Ok((owner, guard))
             }
             Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => {
-                let guard = lock_dir_noblock(&full_path, "backup group", "another backup is already running")?;
+                let guard = self.lock_group(backup_group)?;
                 let owner = self.get_owner(backup_group)?; // just to be sure
                 Ok((owner, guard))
             }
@@ -456,14 +463,13 @@ impl DataStore {
     ///
     /// The BackupGroup directory needs to exist.
     pub fn create_locked_backup_dir(&self, backup_dir: &BackupDir)
-        -> Result<(PathBuf, bool, DirLockGuard), Error>
+        -> Result<(PathBuf, bool, BackupLockGuard), Error>
     {
         let relative_path = backup_dir.relative_path();
         let mut full_path = self.base_path();
         full_path.push(&relative_path);
 
-        let lock = ||
-            lock_dir_noblock(&full_path, "snapshot", "internal error - tried creating snapshot that's already in use");
+        let lock = || self.lock_snapshot(backup_dir);
 
         match std::fs::create_dir(&full_path) {
             Ok(_) => Ok((relative_path, true, lock()?)),
@@ -810,7 +816,8 @@ impl DataStore {
         backup_dir: &BackupDir,
     ) -> Result<String, Error> {
         let mut path = format!(
-            "/run/proxmox-backup/locks/{}/{}/{}",
+            "{}/{}/{}/{}",
+            DATASTORE_LOCKS_DIR,
             self.name(),
             backup_dir.group().backup_type(),
             backup_dir.group().backup_id(),
@@ -883,9 +890,7 @@ impl DataStore {
         backup_dir: &BackupDir,
         protection: bool
     ) -> Result<(), Error> {
-        let full_path = self.snapshot_path(backup_dir);
-
-        let _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+        let _guard = self.lock_snapshot(backup_dir)?;
 
         let protected_path = backup_dir.protected_file(self.base_path());
         if protection {
@@ -952,4 +957,72 @@ impl DataStore {
 
         Ok(chunk_list)
     }
-}
+
+    fn snapshot_lock_path(&self, backup_dir: &BackupDir) -> Result<PathBuf, Error> {
+        let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+        std::fs::create_dir_all(&path)?;
+
+        // hash relative path to get a fixed length unique file name
+        let rpath_hash =
+            openssl::sha::sha256(backup_dir.relative_path().to_str().unwrap().as_bytes());
+        Ok(path.join(hex::encode(rpath_hash)))
+    }
+
+    fn group_lock_path(&self, group: &BackupGroup) -> Result<PathBuf, Error> {
+        let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+        std::fs::create_dir_all(&path)?;
+
+        // hash relative path to get a fixed length unique file name
+        let rpath_hash = openssl::sha::sha256(group.group_path().to_str().unwrap().as_bytes());
+        Ok(path.join(hex::encode(rpath_hash)))
+    }
+
+    // this function helps implement the double stat'ing procedure. it
+    // avoids certain race conditions upon lock deletion.
+    fn lock_helper<F>(&self, path: &std::path::Path, lock_fn: F) -> Result<BackupLockGuard, Error>
+    where
+        F: Fn(&std::path::Path) -> Result<BackupLockGuard, Error>,
+    {
+        let stat = match nix::sys::stat::stat(path) {
+            Ok(result) => result.st_ino,
+            // lock hasn't been created yet, ignore check
+            Err(e) if e.not_found() => return Ok(lock_fn(path)?),
+            Err(e) => bail!("could not stat lock file before locking! - {}", e),
+        };
+
+        let lock = lock_fn(path)?;
+
+        if stat != nix::sys::stat::stat(path)?.st_ino {
+            bail!("could not acquire lock, another thread modified the lock file");
+        }
+
+        Ok(lock)
+    }
+
+    /// Lock a snapshot exclusively
+    pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<BackupLockGuard, Error> {
+        self.lock_helper(&self.snapshot_lock_path(backup_dir)?, |p| {
+            open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| {
+                format_err!("unable to acquire snapshot lock {:?} - {}", &p, err)
+            })
+        })
+    }
+
+    /// Acquire a shared lock on a snapshot
+    pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<BackupLockGuard, Error> {
+        self.lock_helper(&self.snapshot_lock_path(backup_dir)?, |p| {
+            open_backup_lockfile(p, Some(Duration::from_secs(0)), false).map_err(|err| {
+                format_err!("unable to acquire shared snapshot lock {:?} - {}", &p, err)
+            })
+        })
+    }
+
+    /// Lock a group exclusively
+    pub fn lock_group(&self, group: &BackupGroup) -> Result<BackupLockGuard, Error> {
+        self.lock_helper(&self.group_lock_path(group)?, |p| {
+            open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| {
+                format_err!("unable to acquire backup group lock {:?} - {}", &p, err)
+            })
+        })
+    }
+}
\ No newline at end of file
diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index 1bbf57e7..3773804d 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -3,10 +3,11 @@ use std::sync::Arc;
 use std::os::unix::io::{AsRawFd, FromRawFd};
 use std::fs::File;
 
-use anyhow::{bail, Error};
+use anyhow::{bail, format_err, Error};
 use nix::dir::Dir;
-
-use proxmox_sys::fs::lock_dir_noblock_shared;
+use nix::sys::stat::Mode;
+use nix::fcntl::OFlag;
+use pbs_config::BackupLockGuard;
 
 use crate::backup_info::BackupDir;
 use crate::index::IndexFile;
@@ -23,19 +24,22 @@ pub struct SnapshotReader {
     datastore_name: String,
     file_list: Vec<String>,
     locked_dir: Dir,
+
+    // while this is never read, the lock needs to be kept until the
+    // reader is dropped to ensure valid locking semantics
+    _lock: BackupLockGuard,
 }
 
 impl SnapshotReader {
 
     /// Lock snapshot, reads the manifest and returns a new instance
     pub fn new(datastore: Arc<DataStore>, snapshot: BackupDir) -> Result<Self, Error> {
+        let lock = datastore.lock_snapshot_shared(&snapshot)?;
 
-        let snapshot_path = datastore.snapshot_path(&snapshot);
+        let path = datastore.snapshot_path(&snapshot);
 
-        let locked_dir = lock_dir_noblock_shared(
-            &snapshot_path,
-            "snapshot",
-            "locked by another operation")?;
+        let locked_dir = Dir::open(&path, OFlag::O_RDONLY, Mode::empty())
+            .map_err(|err| format_err!("unable to open snapshot directory {:?} - {}", path, err))?;
 
         let datastore_name = datastore.name().to_string();
 
@@ -47,7 +51,7 @@ impl SnapshotReader {
             }
         };
 
-        let mut client_log_path = snapshot_path;
+        let mut client_log_path = datastore.snapshot_path(&snapshot);
         client_log_path.push(CLIENT_LOG_BLOB_NAME);
 
         let mut file_list = Vec::new();
@@ -57,7 +61,13 @@ impl SnapshotReader {
             file_list.push(CLIENT_LOG_BLOB_NAME.to_string());
         }
 
-        Ok(Self { snapshot, datastore_name, file_list, locked_dir })
+        Ok(Self {
+            snapshot,
+            datastore_name,
+            file_list,
+            locked_dir,
+            _lock: lock,
+        })
     }
 
     /// Return the snapshot directory
diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index 3e23840f..b4557d42 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -1,7 +1,7 @@
 use anyhow::{bail, format_err, Error};
+use pbs_config::BackupLockGuard;
 use std::sync::{Arc, Mutex};
 use std::collections::HashMap;
-use nix::dir::Dir;
 
 use ::serde::{Serialize};
 use serde_json::{json, Value};
@@ -501,7 +501,7 @@ impl BackupEnvironment {
     /// If verify-new is set on the datastore, this will run a new verify task
     /// for the backup. If not, this will return and also drop the passed lock
     /// immediately.
-    pub fn verify_after_complete(&self, snap_lock: Dir) -> Result<(), Error> {
+    pub fn verify_after_complete(&self, snap_lock: BackupLockGuard) -> Result<(), Error> {
         self.ensure_finished()?;
 
         if !self.datastore.verify_new() {
diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs
index 395edd3d..6e38f3da 100644
--- a/src/api2/backup/mod.rs
+++ b/src/api2/backup/mod.rs
@@ -20,7 +20,7 @@ use pbs_api_types::{
     BACKUP_ID_SCHEMA, BACKUP_TIME_SCHEMA, BACKUP_TYPE_SCHEMA, DATASTORE_SCHEMA,
     CHUNK_DIGEST_SCHEMA, PRIV_DATASTORE_BACKUP, BACKUP_ARCHIVE_NAME_SCHEMA,
 };
-use proxmox_sys::fs::lock_dir_noblock_shared;
+
 use pbs_tools::json::{required_array_param, required_integer_param, required_string_param};
 use pbs_config::CachedUserInfo;
 use pbs_datastore::{DataStore, PROXMOX_BACKUP_PROTOCOL_ID_V1};
@@ -157,8 +157,7 @@ async move {
         }
 
         // lock last snapshot to prevent forgetting/pruning it during backup
-        let full_path = datastore.snapshot_path(&last.backup_dir);
-        Some(lock_dir_noblock_shared(&full_path, "snapshot", "base snapshot is already locked by another operation")?)
+        Some(datastore.lock_snapshot_shared(&last.backup_dir)?)
     } else {
         None
     };
diff --git a/src/api2/reader/mod.rs b/src/api2/reader/mod.rs
index 2b11d1b1..f472686a 100644
--- a/src/api2/reader/mod.rs
+++ b/src/api2/reader/mod.rs
@@ -20,7 +20,7 @@ use pbs_api_types::{
     CHUNK_DIGEST_SCHEMA, PRIV_DATASTORE_READ, PRIV_DATASTORE_BACKUP,
     BACKUP_ARCHIVE_NAME_SCHEMA,
 };
-use proxmox_sys::fs::lock_dir_noblock_shared;
+
 use pbs_tools::json::{required_integer_param, required_string_param};
 use pbs_datastore::{DataStore, PROXMOX_BACKUP_READER_PROTOCOL_ID_V1};
 use pbs_datastore::backup_info::BackupDir;
@@ -114,10 +114,7 @@ fn upgrade_to_backup_reader_protocol(
             }
         }
 
-        let _guard = lock_dir_noblock_shared(
-            &datastore.snapshot_path(&backup_dir),
-            "snapshot",
-            "locked by another operation")?;
+        let _guard = datastore.lock_snapshot_shared(&backup_dir)?;
 
         let path = datastore.base_path();
 
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 307d366c..f4375daa 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -1,4 +1,4 @@
-use nix::dir::Dir;
+use pbs_config::BackupLockGuard;
 use std::collections::HashSet;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex};
@@ -13,7 +13,6 @@ use pbs_datastore::{DataStore, DataBlob, StoreProgress};
 use pbs_datastore::backup_info::{BackupGroup, BackupDir, BackupInfo};
 use pbs_datastore::index::IndexFile;
 use pbs_datastore::manifest::{archive_type, ArchiveType, BackupManifest, FileInfo};
-use proxmox_sys::fs::lock_dir_noblock_shared;
 
 use crate::tools::parallel_handler::ParallelHandler;
 
@@ -300,11 +299,8 @@ pub fn verify_backup_dir(
     upid: UPID,
     filter: Option<&dyn Fn(&BackupManifest) -> bool>,
 ) -> Result<bool, Error> {
-    let snap_lock = lock_dir_noblock_shared(
-        &verify_worker.datastore.snapshot_path(backup_dir),
-        "snapshot",
-        "locked by another operation",
-    );
+    let snap_lock = verify_worker.datastore.lock_snapshot_shared(&backup_dir);
+
     match snap_lock {
         Ok(snap_lock) => {
             verify_backup_dir_with_lock(verify_worker, backup_dir, upid, filter, snap_lock)
@@ -328,7 +324,7 @@ pub fn verify_backup_dir_with_lock(
     backup_dir: &BackupDir,
     upid: UPID,
     filter: Option<&dyn Fn(&BackupManifest) -> bool>,
-    _snap_lock: Dir,
+    _snap_lock: BackupLockGuard,
 ) -> Result<bool, Error> {
     let manifest = match verify_worker.datastore.load_manifest(backup_dir) {
         Ok((manifest, _)) => manifest,
-- 
2.30.2






More information about the pbs-devel mailing list