[pbs-devel] [PATCH proxmox-backup v2 2/4] fix #3935: datastore/api/backup: move datastore locking to '/run'

Stefan Sterz s.sterz at proxmox.com
Wed Apr 13 11:11:49 CEST 2022


to avoid issues when removing a group or snapshot directory where two
threads hold a lock to the same directory, move locking to the tmpfs
backed '/run' directory and use a flat file structure. also adds
double stat'ing to make it possible to remove locks without certain
race condition issues.

Signed-off-by: Stefan Sterz <s.sterz at proxmox.com>
---
 pbs-config/src/lib.rs                |   7 ++
 pbs-datastore/src/datastore.rs       | 126 +++++++++++++++++++++------
 pbs-datastore/src/snapshot_reader.rs |  24 ++++-
 src/api2/backup/environment.rs       |   4 +-
 src/backup/verify.rs                 |   4 +-
 5 files changed, 131 insertions(+), 34 deletions(-)

diff --git a/pbs-config/src/lib.rs b/pbs-config/src/lib.rs
index 118030bc..81a7b243 100644
--- a/pbs-config/src/lib.rs
+++ b/pbs-config/src/lib.rs
@@ -21,6 +21,7 @@ pub use config_version_cache::ConfigVersionCache;
 
 use anyhow::{format_err, Error};
 use nix::unistd::{Gid, Group, Uid, User};
+use std::os::unix::prelude::AsRawFd;
 
 pub use pbs_buildcfg::{BACKUP_USER_NAME, BACKUP_GROUP_NAME};
 
@@ -46,6 +47,12 @@ pub fn backup_group() -> Result<nix::unistd::Group, Error> {
 
 pub struct BackupLockGuard(Option<std::fs::File>);
 
+impl AsRawFd for BackupLockGuard {
+    fn as_raw_fd(&self) -> i32 {
+        self.0.as_ref().map_or(-1, |f| f.as_raw_fd())
+    }
+}
+
 #[doc(hidden)]
 /// Note: do not use for production code, this is only intended for tests
 pub unsafe fn create_mocked_lock() -> BackupLockGuard {
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 3895688d..92d544e5 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -1,5 +1,6 @@
 use std::collections::{HashSet, HashMap};
 use std::io::{self, Write};
+use std::os::unix::prelude::AsRawFd;
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, Mutex};
 use std::convert::TryFrom;
@@ -11,14 +12,13 @@ use lazy_static::lazy_static;
 
 use proxmox_schema::ApiType;
 
-use proxmox_sys::fs::{
-    file_read_optional_string, lock_dir_noblock, lock_dir_noblock_shared, replace_file,
-    CreateOptions, DirLockGuard,
-};
+use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions};
 use proxmox_sys::process_locker::ProcessLockSharedGuard;
 use proxmox_sys::WorkerTaskContext;
 use proxmox_sys::{task_log, task_warn};
 
+use proxmox_sys::systemd::escape_unit;
+
 use pbs_api_types::{
     UPID, DataStoreConfig, Authid, GarbageCollectionStatus, HumanByte,
     ChunkOrder, DatastoreTuning, Operation,
@@ -42,6 +42,8 @@ lazy_static! {
     static ref DATASTORE_MAP: Mutex<HashMap<String, Arc<DataStoreImpl>>> = Mutex::new(HashMap::new());
 }
 
+pub const DATASTORE_LOCKS_DIR: &str = "/run/proxmox-backup/locks";
+
 /// checks if auth_id is owner, or, if owner is a token, if
 /// auth_id is the user of the token
 pub fn check_backup_owner(
@@ -368,6 +370,8 @@ impl DataStore {
                         err,
                     )
                 })?;
+
+            let _ = std::fs::remove_file( self.group_lock_path(backup_group));
         }
 
         Ok(removed_all)
@@ -404,6 +408,8 @@ impl DataStore {
             let _ = std::fs::remove_file(path);
         }
 
+        let _ = std::fs::remove_file(self.snapshot_lock_path(backup_dir));
+
         Ok(())
     }
 
@@ -479,7 +485,7 @@ impl DataStore {
         &self,
         backup_group: &BackupGroup,
         auth_id: &Authid,
-    ) -> Result<(Authid, DirLockGuard), Error> {
+    ) -> Result<(Authid, BackupLockGuard), Error> {
         // create intermediate path first:
         let mut full_path = self.base_path();
         full_path.push(backup_group.backup_type());
@@ -508,7 +514,7 @@ impl DataStore {
     ///
     /// The BackupGroup directory needs to exist.
     pub fn create_locked_backup_dir(&self, backup_dir: &BackupDir)
-        -> Result<(PathBuf, bool, DirLockGuard), Error>
+        -> Result<(PathBuf, bool, BackupLockGuard), Error>
     {
         let relative_path = backup_dir.relative_path();
         let mut full_path = self.base_path();
@@ -1002,30 +1008,96 @@ impl DataStore {
         Ok(chunk_list)
     }
 
-    /// Lock a snapshot exclusively
-    pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
-        lock_dir_noblock(
-            &self.snapshot_path(backup_dir),
-            "snapshot",
-            "possibly running or in use",
-        )
+    /// Encodes a string so it can be used as a lock file name.
+    ///
+    /// The first 64 characters will be the sha256 hash of `path` then
+    /// a hyphen followed by up to 100 characters of the unit encoded
+    /// version of `path`.
+    fn lock_file_name_helper(&self, path: &str) -> String {
+        let enc = escape_unit(path, true);
+        let from = enc.len().checked_sub(100).unwrap_or(0);
+        let hash = hex::encode(openssl::sha::sha256(path.as_bytes()));
+
+        format!("{}-{}", hash, enc[from..].to_string())
+    }
+
+    /// Returns a file name for locking a snapshot.
+    ///
+    /// The lock file will be located in:
+    /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_name_helper(rpath)}`
+    /// where `rpath` is the relative path of the snapshot.
+    fn snapshot_lock_path(&self, backup_dir: &BackupDir) -> PathBuf {
+        let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+
+        let rpath = format!(
+            "{}/{}/{}",
+            backup_dir.group().backup_type(),
+            backup_dir.group().backup_id(),
+            backup_dir.backup_time_string(),
+        );
+
+        path.join(self.lock_file_name_helper(&rpath))
+    }
+
+    /// Returns a file name for locking a group.
+    ///
+    /// The lock file will be located in:
+    /// `${DATASTORE_LOCKS_DIR}/${datastore name}/${lock_file_name_helper(rpath)}`
+    /// where `rpath` is the relative path of the group.
+    fn group_lock_path(&self, group: &BackupGroup) -> PathBuf {
+        let path = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+
+        let rpath = format!("{}/{}", group.backup_type(), group.backup_id());
+
+        path.join(self.lock_file_name_helper(&rpath))
+    }
+
+    /// Helps implement the double stat'ing procedure. It avoids
+    /// certain race conditions upon lock deletion.
+    ///
+    /// It also creates the base directory for lock files.
+    fn lock_helper<F>(&self, path: &std::path::Path, lock_fn: F) -> Result<BackupLockGuard, Error>
+    where
+        F: Fn(&std::path::Path) -> Result<BackupLockGuard, Error>,
+    {
+        let lock_dir = Path::new(DATASTORE_LOCKS_DIR).join(self.name());
+        std::fs::create_dir_all(&lock_dir)?;
+
+        let lock = lock_fn(path)?;
+
+        let stat = nix::sys::stat::fstat(lock.as_raw_fd())?.st_ino;
+
+        if nix::sys::stat::stat(path).map_or(true, |st| stat != st.st_ino) {
+            bail!("could not acquire lock, another thread modified the lock file");
+        }
+
+        Ok(lock)
+    }
+
+    /// Locks a snapshot exclusively.
+    pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<BackupLockGuard, Error> {
+        self.lock_helper(&self.snapshot_lock_path(backup_dir), |p| {
+            open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| {
+                format_err!("unable to acquire snapshot lock {:?} - {}", &p, err)
+            })
+        })
     }
 
-    /// Acquire a shared lock on a snapshot
-    pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
-        lock_dir_noblock_shared(
-            &self.snapshot_path(backup_dir),
-            "snapshot",
-            "possibly running or in use",
-        )
+    /// Acquires a shared lock on a snapshot.
+    pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<BackupLockGuard, Error> {
+        self.lock_helper(&self.snapshot_lock_path(backup_dir), |p| {
+            open_backup_lockfile(p, Some(Duration::from_secs(0)), false).map_err(|err| {
+                format_err!("unable to acquire shared snapshot lock {:?} - {}", &p, err)
+            })
+        })
     }
 
-    /// Lock a group exclusively
-    pub fn lock_group(&self, group: &BackupGroup) -> Result<DirLockGuard, Error> {
-        lock_dir_noblock(
-            &self.group_path(group),
-            "backup group",
-            "possible running backup",
-        )
+    /// Locks a group exclusively.
+    pub fn lock_group(&self, group: &BackupGroup) -> Result<BackupLockGuard, Error> {
+        self.lock_helper(&self.group_lock_path(group), |p| {
+            open_backup_lockfile(p, Some(Duration::from_secs(0)), true).map_err(|err| {
+                format_err!("unable to acquire backup group lock {:?} - {}", &p, err)
+            })
+        })
     }
 }
diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index 0eddac14..7fde3238 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -3,8 +3,11 @@ use std::sync::Arc;
 use std::os::unix::io::{AsRawFd, FromRawFd};
 use std::fs::File;
 
-use anyhow::{bail, Error};
+use anyhow::{bail, format_err, Error};
 use nix::dir::Dir;
+use nix::sys::stat::Mode;
+use nix::fcntl::OFlag;
+use pbs_config::BackupLockGuard;
 
 use crate::backup_info::BackupDir;
 use crate::index::IndexFile;
@@ -22,13 +25,22 @@ pub struct SnapshotReader {
     datastore_name: String,
     file_list: Vec<String>,
     locked_dir: Dir,
+
+    // while this is never read, the lock needs to be kept until the
+    // reader is dropped to ensure valid locking semantics
+    _lock: BackupLockGuard,
 }
 
 impl SnapshotReader {
 
     /// Lock snapshot, reads the manifest and returns a new instance
     pub fn new(datastore: Arc<DataStore>, snapshot: BackupDir) -> Result<Self, Error> {
-        let locked_dir = datastore.lock_snapshot_shared(&snapshot)?;
+        let lock = datastore.lock_snapshot_shared(&snapshot)?;
+
+        let path = datastore.snapshot_path(&snapshot);
+
+        let locked_dir = Dir::open(&path, OFlag::O_RDONLY, Mode::empty())
+            .map_err(|err| format_err!("unable to open snapshot directory {:?} - {}", path, err))?;
 
         let datastore_name = datastore.name().to_string();
 
@@ -50,7 +62,13 @@ impl SnapshotReader {
             file_list.push(CLIENT_LOG_BLOB_NAME.to_string());
         }
 
-        Ok(Self { snapshot, datastore_name, file_list, locked_dir })
+        Ok(Self {
+            snapshot,
+            datastore_name,
+            file_list,
+            locked_dir,
+            _lock: lock,
+        })
     }
 
     /// Return the snapshot directory
diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
index 3e23840f..b4557d42 100644
--- a/src/api2/backup/environment.rs
+++ b/src/api2/backup/environment.rs
@@ -1,7 +1,7 @@
 use anyhow::{bail, format_err, Error};
+use pbs_config::BackupLockGuard;
 use std::sync::{Arc, Mutex};
 use std::collections::HashMap;
-use nix::dir::Dir;
 
 use ::serde::{Serialize};
 use serde_json::{json, Value};
@@ -501,7 +501,7 @@ impl BackupEnvironment {
     /// If verify-new is set on the datastore, this will run a new verify task
     /// for the backup. If not, this will return and also drop the passed lock
     /// immediately.
-    pub fn verify_after_complete(&self, snap_lock: Dir) -> Result<(), Error> {
+    pub fn verify_after_complete(&self, snap_lock: BackupLockGuard) -> Result<(), Error> {
         self.ensure_finished()?;
 
         if !self.datastore.verify_new() {
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index eea0e15d..f4375daa 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -1,4 +1,4 @@
-use nix::dir::Dir;
+use pbs_config::BackupLockGuard;
 use std::collections::HashSet;
 use std::sync::atomic::{AtomicUsize, Ordering};
 use std::sync::{Arc, Mutex};
@@ -324,7 +324,7 @@ pub fn verify_backup_dir_with_lock(
     backup_dir: &BackupDir,
     upid: UPID,
     filter: Option<&dyn Fn(&BackupManifest) -> bool>,
-    _snap_lock: Dir,
+    _snap_lock: BackupLockGuard,
 ) -> Result<bool, Error> {
     let manifest = match verify_worker.datastore.load_manifest(backup_dir) {
         Ok((manifest, _)) => manifest,
-- 
2.30.2






More information about the pbs-devel mailing list