[pbs-devel] [PATCH proxmox-backup 3/7] datastore: prevent in-use deletion with locks instead of heuristic

Stefan Reiter s.reiter at proxmox.com
Tue Aug 4 12:42:01 CEST 2020


Attempt to lock the backup directory to be deleted, if it works keep the
lock until the deletion is complete. This way we ensure that no other
locking operation (e.g. using a snapshot as base for another backup) can
happen concurrently.

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---

For this to actually work the following patches are obviously necessary, but I
wanted to keep them seperate for review.

 src/backup/backup_info.rs | 55 +++++++++++++++++++++++++++++++++++----
 src/backup/datastore.rs   | 48 ++++++++--------------------------
 2 files changed, 61 insertions(+), 42 deletions(-)

diff --git a/src/backup/backup_info.rs b/src/backup/backup_info.rs
index ea917d3c..c35928ce 100644
--- a/src/backup/backup_info.rs
+++ b/src/backup/backup_info.rs
@@ -41,7 +41,7 @@ lazy_static!{
 }
 
 /// Opaque type releasing the corresponding flock when dropped
-pub type BackupGroupGuard = Dir;
+pub type BackupLockGuard = Dir;
 
 /// BackupGroup is a directory containing a list of BackupDir
 #[derive(Debug, Eq, PartialEq, Hash, Clone)]
@@ -91,7 +91,11 @@ impl BackupGroup {
             let backup_dir = BackupDir::new(self.backup_type.clone(), self.backup_id.clone(), dt.timestamp());
             let files = list_backup_files(l2_fd, backup_time)?;
 
-            list.push(BackupInfo { backup_dir, files });
+            list.push(BackupInfo {
+                backup_dir,
+                files,
+                base_path: base_path.to_owned()
+            });
 
             Ok(())
         })?;
@@ -137,7 +141,7 @@ impl BackupGroup {
         Ok(last)
     }
 
-    pub fn lock(&self, base_path: &Path) -> Result<BackupGroupGuard, Error> {
+    pub fn lock(&self, base_path: &Path) -> Result<BackupLockGuard, Error> {
         use nix::fcntl::OFlag;
         use nix::sys::stat::Mode;
 
@@ -299,6 +303,8 @@ pub struct BackupInfo {
     pub backup_dir: BackupDir,
     /// List of data files
     pub files: Vec<String>,
+    /// Full path to dir containing backup_dir
+    pub base_path: PathBuf,
 }
 
 impl BackupInfo {
@@ -309,7 +315,7 @@ impl BackupInfo {
 
         let files = list_backup_files(libc::AT_FDCWD, &path)?;
 
-        Ok(BackupInfo { backup_dir, files })
+        Ok(BackupInfo { backup_dir, files, base_path: base_path.to_owned() })
     }
 
     /// Finds the latest backup inside a backup group
@@ -354,7 +360,11 @@ impl BackupInfo {
 
                     let files = list_backup_files(l2_fd, backup_time)?;
 
-                    list.push(BackupInfo { backup_dir, files });
+                    list.push(BackupInfo {
+                        backup_dir,
+                        files,
+                        base_path: base_path.to_owned()
+                    });
 
                     Ok(())
                 })
@@ -367,6 +377,41 @@ impl BackupInfo {
         // backup is considered unfinished if there is no manifest
         self.files.iter().any(|name| name == super::MANIFEST_BLOB_NAME)
     }
+
+    pub fn lock(&self) -> Result<BackupLockGuard, Error> {
+        use nix::fcntl::OFlag;
+        use nix::sys::stat::Mode;
+
+        let mut path = self.base_path.clone();
+        let dir = self.backup_dir.relative_path();
+        path.push(&dir);
+
+        let mut handle = Dir::open(&path, OFlag::O_RDONLY, Mode::empty())
+            .map_err(|err| {
+                format_err!(
+                    "unable to open snapshot directory {:?} for locking - {}",
+                    &dir,
+                    err,
+                )
+            })?;
+
+        // acquire in non-blocking mode, no point in waiting here since other
+        // backups could still take a very long time
+        proxmox::tools::fs::lock_file(&mut handle, true, Some(Duration::from_nanos(0)))
+            .map_err(|err| {
+                format_err!(
+                    "unable to acquire lock on snapshot {:?} - {}",
+                    &dir,
+                    if err.would_block() {
+                        String::from("snapshot is running or being used as base")
+                    } else {
+                        err.to_string()
+                    }
+                )
+            })?;
+
+        Ok(handle)
+    }
 }
 
 fn list_backup_files<P: ?Sized + nix::NixPath>(dirfd: RawFd, path: &P) -> Result<Vec<String>, Error> {
diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
index ffd64b81..3c374302 100644
--- a/src/backup/datastore.rs
+++ b/src/backup/datastore.rs
@@ -11,7 +11,7 @@ use serde_json::Value;
 
 use proxmox::tools::fs::{replace_file, CreateOptions};
 
-use super::backup_info::{BackupGroup, BackupGroupGuard, BackupDir, BackupInfo};
+use super::backup_info::{BackupGroup, BackupLockGuard, BackupDir, BackupInfo};
 use super::chunk_store::ChunkStore;
 use super::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
 use super::fixed_index::{FixedIndexReader, FixedIndexWriter};
@@ -199,19 +199,13 @@ impl DataStore {
 
         let full_path = self.group_path(backup_group);
 
-        let mut snap_list = backup_group.list_backups(&self.base_path())?;
-        BackupInfo::sort_list(&mut snap_list, false);
-        for snap in snap_list {
-            if snap.is_finished() {
-                break;
-            } else {
-                bail!(
-                    "cannot remove backup group {:?}, contains potentially running backup: {}",
-                    full_path,
-                    snap.backup_dir
-                );
-            }
-        }
+        let _guard = backup_group.lock(&self.base_path()).map_err(|err| {
+            format_err!(
+                "cannot acquire lock on backup group {}: {}",
+                backup_group,
+                err
+            )
+        })?;
 
         log::info!("removing backup group {:?}", full_path);
         std::fs::remove_dir_all(&full_path)
@@ -231,29 +225,9 @@ impl DataStore {
 
         let full_path = self.snapshot_path(backup_dir);
 
+        let _guard;
         if !force {
-            let mut snap_list = backup_dir.group().list_backups(&self.base_path())?;
-            BackupInfo::sort_list(&mut snap_list, false);
-            let mut prev_snap_finished = true;
-            for snap in snap_list {
-                let cur_snap_finished = snap.is_finished();
-                if &snap.backup_dir == backup_dir {
-                    if !cur_snap_finished {
-                        bail!(
-                            "cannot remove currently running snapshot: {:?}",
-                            backup_dir
-                        );
-                    }
-                    if !prev_snap_finished {
-                        bail!(
-                            "cannot remove snapshot {:?}, successor is currently running and potentially based on it",
-                            backup_dir
-                        );
-                    }
-                    break;
-                }
-                prev_snap_finished = cur_snap_finished;
-            }
+            _guard = BackupInfo::new(&self.base_path(), backup_dir.clone())?.lock()?;
         }
 
         log::info!("removing backup snapshot {:?}", full_path);
@@ -326,7 +300,7 @@ impl DataStore {
     /// current owner (instead of setting the owner).
     ///
     /// This also aquires an exclusive lock on the directory and returns the lock guard.
-    pub fn create_locked_backup_group(&self, backup_group: &BackupGroup, userid: &str) -> Result<(String, BackupGroupGuard), Error> {
+    pub fn create_locked_backup_group(&self, backup_group: &BackupGroup, userid: &str) -> Result<(String, BackupLockGuard), Error> {
 
         // create intermediate path first:
         let base_path = self.base_path();
-- 
2.20.1






More information about the pbs-devel mailing list