[pbs-devel] [RFC PATCH 1/5] fix #3935: datastore/api2/backup: move datastore locking to '/run'
Stefan Sterz
s.sterz at proxmox.com
Fri Mar 18 15:06:51 CET 2022
to avoid issues when removing a group or snapshot directory where two
threads hold a lock to the same directory, move locking to the tmpfs
backed '/run' directory
Signed-off-by: Stefan Sterz <s.sterz at proxmox.com>
---
pbs-datastore/src/datastore.rs | 74 ++++++++++++++++++++++------
pbs-datastore/src/snapshot_reader.rs | 12 +----
src/api2/backup/mod.rs | 5 +-
src/api2/reader/mod.rs | 7 +--
src/backup/verify.rs | 8 +--
5 files changed, 66 insertions(+), 40 deletions(-)
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index d416c8d8..389ac298 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -15,7 +15,7 @@ use proxmox_sys::fs::{replace_file, file_read_optional_string, CreateOptions};
use proxmox_sys::process_locker::ProcessLockSharedGuard;
use proxmox_sys::WorkerTaskContext;
use proxmox_sys::{task_log, task_warn};
-use proxmox_sys::fs::{lock_dir_noblock, DirLockGuard};
+use proxmox_sys::fs::DirLockGuard;
use pbs_api_types::{
UPID, DataStoreConfig, Authid, GarbageCollectionStatus, HumanByte,
@@ -288,11 +288,9 @@ impl DataStore {
/// Remove a complete backup group including all snapshots, returns true
/// if all snapshots were removed, and false if some were protected
pub fn remove_backup_group(&self, backup_group: &BackupGroup) -> Result<bool, Error> {
-
+ let _guard = self.lock_group(backup_group)?;
let full_path = self.group_path(backup_group);
- let _guard = proxmox_sys::fs::lock_dir_noblock(&full_path, "backup group", "possible running backup")?;
-
log::info!("removing backup group {:?}", full_path);
let mut removed_all = true;
@@ -322,13 +320,11 @@ impl DataStore {
}
/// Remove a backup directory including all content
- pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) -> Result<(), Error> {
-
- let full_path = self.snapshot_path(backup_dir);
-
+ pub fn remove_backup_dir(&self, backup_dir: &BackupDir, force: bool) -> Result<(), Error> {
let (_guard, _manifest_guard);
+
if !force {
- _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+ _guard = self.lock_snapshot(backup_dir)?;
_manifest_guard = self.lock_manifest(backup_dir)?;
}
@@ -336,6 +332,8 @@ impl DataStore {
bail!("cannot remove protected snapshot");
}
+ let full_path = self.snapshot_path(backup_dir);
+
log::info!("removing backup snapshot {:?}", full_path);
std::fs::remove_dir_all(&full_path)
.map_err(|err| {
@@ -438,13 +436,13 @@ impl DataStore {
// create the last component now
match std::fs::create_dir(&full_path) {
Ok(_) => {
- let guard = lock_dir_noblock(&full_path, "backup group", "another backup is already running")?;
+ let guard = self.lock_group(backup_group)?;
self.set_owner(backup_group, auth_id, false)?;
let owner = self.get_owner(backup_group)?; // just to be sure
Ok((owner, guard))
}
Err(ref err) if err.kind() == io::ErrorKind::AlreadyExists => {
- let guard = lock_dir_noblock(&full_path, "backup group", "another backup is already running")?;
+ let guard = self.lock_group(backup_group)?;
let owner = self.get_owner(backup_group)?; // just to be sure
Ok((owner, guard))
}
@@ -462,8 +460,7 @@ impl DataStore {
let mut full_path = self.base_path();
full_path.push(&relative_path);
- let lock = ||
- lock_dir_noblock(&full_path, "snapshot", "internal error - tried creating snapshot that's already in use");
+ let lock = || self.lock_snapshot(backup_dir);
match std::fs::create_dir(&full_path) {
Ok(_) => Ok((relative_path, true, lock()?)),
@@ -883,9 +880,7 @@ impl DataStore {
backup_dir: &BackupDir,
protection: bool
) -> Result<(), Error> {
- let full_path = self.snapshot_path(backup_dir);
-
- let _guard = lock_dir_noblock(&full_path, "snapshot", "possibly running or in use")?;
+ let _guard = self.lock_snapshot(backup_dir)?;
let protected_path = backup_dir.protected_file(self.base_path());
if protection {
@@ -952,4 +947,51 @@ impl DataStore {
Ok(chunk_list)
}
+
+ fn snapshot_lock_path(&self, backup_dir: &BackupDir) -> Result<PathBuf, Error> {
+ let path = Path::new("/run/proxmox-backup/locks")
+ .join(self.name())
+ .join(backup_dir.relative_path());
+
+ std::fs::create_dir_all(&path)?;
+
+ Ok(path)
+ }
+
+ fn group_lock_path(&self, group: &BackupGroup) -> Result<PathBuf, Error> {
+ let path = Path::new("/run/proxmox-backup/locks")
+ .join(self.name())
+ .join(group.group_path());
+
+ std::fs::create_dir_all(&path)?;
+
+ Ok(path)
+ }
+
+ /// Lock a snapshot exclusively
+ pub fn lock_snapshot(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
+ proxmox_sys::fs::lock_dir_noblock(
+ &self.snapshot_lock_path(backup_dir)?,
+ "snapshot",
+ "possibly running or in use",
+ )
+ }
+
+ /// Acquire a shared lock on a snapshot
+ pub fn lock_snapshot_shared(&self, backup_dir: &BackupDir) -> Result<DirLockGuard, Error> {
+ proxmox_sys::fs::lock_dir_noblock_shared(
+ &self.snapshot_lock_path(backup_dir)?,
+ "snapshot",
+ "possibly running or in use",
+ )
+ }
+
+ /// Lock a group exclusively
+ pub fn lock_group(&self, group: &BackupGroup) -> Result<DirLockGuard, Error> {
+ proxmox_sys::fs::lock_dir_noblock(
+ &self.group_lock_path(group)?,
+ "backup group",
+ "another backup is already running",
+ )
+ }
}
diff --git a/pbs-datastore/src/snapshot_reader.rs b/pbs-datastore/src/snapshot_reader.rs
index 1bbf57e7..30993b0d 100644
--- a/pbs-datastore/src/snapshot_reader.rs
+++ b/pbs-datastore/src/snapshot_reader.rs
@@ -6,8 +6,6 @@ use std::fs::File;
use anyhow::{bail, Error};
use nix::dir::Dir;
-use proxmox_sys::fs::lock_dir_noblock_shared;
-
use crate::backup_info::BackupDir;
use crate::index::IndexFile;
use crate::fixed_index::FixedIndexReader;
@@ -29,13 +27,7 @@ impl SnapshotReader {
/// Lock snapshot, reads the manifest and returns a new instance
pub fn new(datastore: Arc<DataStore>, snapshot: BackupDir) -> Result<Self, Error> {
-
- let snapshot_path = datastore.snapshot_path(&snapshot);
-
- let locked_dir = lock_dir_noblock_shared(
- &snapshot_path,
- "snapshot",
- "locked by another operation")?;
+ let locked_dir = datastore.lock_snapshot_shared(&snapshot)?;
let datastore_name = datastore.name().to_string();
@@ -47,7 +39,7 @@ impl SnapshotReader {
}
};
- let mut client_log_path = snapshot_path;
+ let mut client_log_path = datastore.snapshot_path(&snapshot);
client_log_path.push(CLIENT_LOG_BLOB_NAME);
let mut file_list = Vec::new();
diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs
index 395edd3d..6e38f3da 100644
--- a/src/api2/backup/mod.rs
+++ b/src/api2/backup/mod.rs
@@ -20,7 +20,7 @@ use pbs_api_types::{
BACKUP_ID_SCHEMA, BACKUP_TIME_SCHEMA, BACKUP_TYPE_SCHEMA, DATASTORE_SCHEMA,
CHUNK_DIGEST_SCHEMA, PRIV_DATASTORE_BACKUP, BACKUP_ARCHIVE_NAME_SCHEMA,
};
-use proxmox_sys::fs::lock_dir_noblock_shared;
+
use pbs_tools::json::{required_array_param, required_integer_param, required_string_param};
use pbs_config::CachedUserInfo;
use pbs_datastore::{DataStore, PROXMOX_BACKUP_PROTOCOL_ID_V1};
@@ -157,8 +157,7 @@ async move {
}
// lock last snapshot to prevent forgetting/pruning it during backup
- let full_path = datastore.snapshot_path(&last.backup_dir);
- Some(lock_dir_noblock_shared(&full_path, "snapshot", "base snapshot is already locked by another operation")?)
+ Some(datastore.lock_snapshot_shared(&last.backup_dir)?)
} else {
None
};
diff --git a/src/api2/reader/mod.rs b/src/api2/reader/mod.rs
index 2b11d1b1..f472686a 100644
--- a/src/api2/reader/mod.rs
+++ b/src/api2/reader/mod.rs
@@ -20,7 +20,7 @@ use pbs_api_types::{
CHUNK_DIGEST_SCHEMA, PRIV_DATASTORE_READ, PRIV_DATASTORE_BACKUP,
BACKUP_ARCHIVE_NAME_SCHEMA,
};
-use proxmox_sys::fs::lock_dir_noblock_shared;
+
use pbs_tools::json::{required_integer_param, required_string_param};
use pbs_datastore::{DataStore, PROXMOX_BACKUP_READER_PROTOCOL_ID_V1};
use pbs_datastore::backup_info::BackupDir;
@@ -114,10 +114,7 @@ fn upgrade_to_backup_reader_protocol(
}
}
- let _guard = lock_dir_noblock_shared(
- &datastore.snapshot_path(&backup_dir),
- "snapshot",
- "locked by another operation")?;
+ let _guard = datastore.lock_snapshot_shared(&backup_dir)?;
let path = datastore.base_path();
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index 307d366c..eea0e15d 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -13,7 +13,6 @@ use pbs_datastore::{DataStore, DataBlob, StoreProgress};
use pbs_datastore::backup_info::{BackupGroup, BackupDir, BackupInfo};
use pbs_datastore::index::IndexFile;
use pbs_datastore::manifest::{archive_type, ArchiveType, BackupManifest, FileInfo};
-use proxmox_sys::fs::lock_dir_noblock_shared;
use crate::tools::parallel_handler::ParallelHandler;
@@ -300,11 +299,8 @@ pub fn verify_backup_dir(
upid: UPID,
filter: Option<&dyn Fn(&BackupManifest) -> bool>,
) -> Result<bool, Error> {
- let snap_lock = lock_dir_noblock_shared(
- &verify_worker.datastore.snapshot_path(backup_dir),
- "snapshot",
- "locked by another operation",
- );
+ let snap_lock = verify_worker.datastore.lock_snapshot_shared(&backup_dir);
+
match snap_lock {
Ok(snap_lock) => {
verify_backup_dir_with_lock(verify_worker, backup_dir, upid, filter, snap_lock)
--
2.30.2
More information about the pbs-devel
mailing list