[pbs-devel] [PATCH proxmox-backup 10/17] datastore: implement per-chunk file locking helper for s3 backend
Fabian Grünbichler
f.gruenbichler at proxmox.com
Mon Nov 3 15:51:05 CET 2025
On November 3, 2025 12:31 pm, Christian Ebner wrote:
> Adds a datastore helper method to create per-chunk file locks. These
> will be used to guard chunk operations on s3 backends to guarantee
> exclusive access when performing cache and backend operations.
>
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> pbs-datastore/src/backup_info.rs | 2 +-
> pbs-datastore/src/chunk_store.rs | 26 ++++++++++++++++++++++++++
> pbs-datastore/src/datastore.rs | 12 ++++++++++++
> 3 files changed, 39 insertions(+), 1 deletion(-)
>
> diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs
> index 4b10b6435..70c0fbe8a 100644
> --- a/pbs-datastore/src/backup_info.rs
> +++ b/pbs-datastore/src/backup_info.rs
> @@ -936,7 +936,7 @@ fn lock_file_path_helper(ns: &BackupNamespace, path: PathBuf) -> PathBuf {
> /// deletion.
> ///
> /// It also creates the base directory for lock files.
> -fn lock_helper<F>(
> +pub(crate) fn lock_helper<F>(
> store_name: &str,
> path: &std::path::Path,
> lock_fn: F,
> diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
> index ba7618e40..49687b2fa 100644
> --- a/pbs-datastore/src/chunk_store.rs
> +++ b/pbs-datastore/src/chunk_store.rs
> @@ -8,6 +8,7 @@ use anyhow::{bail, format_err, Context, Error};
> use tracing::{info, warn};
>
> use pbs_api_types::{DatastoreFSyncLevel, GarbageCollectionStatus};
> +use pbs_config::BackupLockGuard;
> use proxmox_io::ReadExt;
> use proxmox_s3_client::S3Client;
> use proxmox_sys::fs::{create_dir, create_path, file_type_from_file_stat, CreateOptions};
> @@ -16,6 +17,7 @@ use proxmox_sys::process_locker::{
> };
> use proxmox_worker_task::WorkerTaskContext;
>
> +use crate::backup_info::DATASTORE_LOCKS_DIR;
> use crate::data_blob::DataChunkBuilder;
> use crate::file_formats::{
> COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0,
> @@ -759,6 +761,30 @@ impl ChunkStore {
> ChunkStore::check_permissions(lockfile_path, 0o644)?;
> Ok(())
> }
> +
> + /// Generates the path to the chunks lock file
> + pub(crate) fn chunk_lock_path(&self, digest: &[u8]) -> PathBuf {
> + let mut lock_path = Path::new(DATASTORE_LOCKS_DIR).join(self.name.clone());
> + let digest_str = hex::encode(digest);
> + lock_path.push(".chunks");
> + let prefix = digest_to_prefix(digest);
> + lock_path.push(&prefix);
> + lock_path.push(&digest_str);
> + lock_path
should we add "s3" or some suffix here, so that if we add another
backend in the future we already have specific paths?
> + }
> +
> + /// Get an exclusive lock on the chunks lock file
> + pub(crate) fn lock_chunk(
> + &self,
> + digest: &[u8],
> + timeout: Duration,
> + ) -> Result<BackupLockGuard, Error> {
> + let lock_path = self.chunk_lock_path(digest);
> + let guard = crate::backup_info::lock_helper(self.name(), &lock_path, |path| {
> + pbs_config::open_backup_lockfile(path, Some(timeout), true)
> + })?;
> + Ok(guard)
> + }
> }
>
> #[test]
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
> index 397c37e56..32f3562b3 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -2568,6 +2568,18 @@ impl DataStore {
> Ok(())
> }
>
> + /// Locks the per chunk lock file if the backend requires it
> + fn lock_chunk_for_backend(&self, digest: &[u8; 32]) -> Result<Option<BackupLockGuard>, Error> {
> + // s3 put request times out after upload_size / 1 Kib/s, so about 2.3 hours for 8 MiB
> + let timeout = Duration::from_secs(3 * 60 * 60);
could move into the S3 branch below.. or be made S3 specific in the
first place, since it is only called/effective there? the renaming
helper needs some rework then I guess..
but I am not sure if this logic here is really sound (any individual
caller waiting for longer than a single uploads max timeout might be
valid, since the locking is not fair and multiple locking attempts might
have queued up), I guess the instances where we end up taking this lock
are few enough that no progress over such a long time makes any progress
within reasonable time unlikely..
we currently take this lock for the duration of a chunk
upload/insertion, for the duration of a chunk rename after corruption
has been detected, and for a batch of GC chunk removal.
> + match self.inner.backend_config.ty.unwrap_or_default() {
> + DatastoreBackendType::Filesystem => Ok(None),
> + DatastoreBackendType::S3 => {
> + self.inner.chunk_store.lock_chunk(digest, timeout).map(Some)
> + }
> + }
> + }
> +
> /// Renames a corrupt chunk, returning the new path if the chunk was renamed successfully.
> /// Returns with `Ok(None)` if the chunk source was not found.
> pub fn rename_corrupt_chunk(&self, digest: &[u8; 32]) -> Result<Option<PathBuf>, Error> {
> --
> 2.47.3
>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
>
More information about the pbs-devel
mailing list