[pbs-devel] [PATCH proxmox-backup 6/7] api: chunk upload: fix race with garbage collection for no-cache on s3
Fabian Grünbichler
f.gruenbichler at proxmox.com
Mon Oct 6 15:18:13 CEST 2025
On October 6, 2025 12:41 pm, Christian Ebner wrote:
> Chunks uploaded to the s3 backend are never inserted into the local
> datastore cache. The presence of the chunk marker file is however
> required for garbage collection to not cleanup the chunks. While the
> marker files are created during phase 1 of the garbage collection for
> indexed chunks, this is not the case for in progress backups with the
> no-cache flag set.
>
> Therefore, mark chunks as in-progress while being uploaded just like
> for the regular mode with cache, but replace this with the zero-sized
> chunk marker file after upload finished to avoid incorrect garbage
> collection cleanup.
>
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> pbs-datastore/src/chunk_store.rs | 13 +++++++++++++
> pbs-datastore/src/datastore.rs | 7 +++++++
> src/api2/backup/upload_chunk.rs | 12 ++++++++++--
> 3 files changed, 30 insertions(+), 2 deletions(-)
>
> diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
> index 22efe4a32..7fd92b626 100644
> --- a/pbs-datastore/src/chunk_store.rs
> +++ b/pbs-datastore/src/chunk_store.rs
> @@ -594,6 +594,19 @@ impl ChunkStore {
> Ok(())
> }
>
> + pub(crate) fn persist_backend_upload_marker(&self, digest: &[u8; 32]) -> Result<(), Error> {
> + if self.datastore_backend_type == DatastoreBackendType::Filesystem {
> + bail!("cannot create backend upload marker, not a cache store");
> + }
> + let (marker_path, _digest_str) = self.chunk_backed_upload_marker_path(digest);
> + let (chunk_path, digest_str) = self.chunk_path(digest);
> + let _lock = self.mutex.lock();
> +
> + std::fs::rename(marker_path, chunk_path).map_err(|err| {
> + format_err!("persisting backup upload marker failed for {digest_str} - {err}")
> + })
> + }
> +
> pub(crate) fn cleanup_backend_upload_marker(&self, digest: &[u8; 32]) -> Result<(), Error> {
> if self.datastore_backend_type == DatastoreBackendType::Filesystem {
> bail!("cannot cleanup backend upload marker, not a cache store");
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
> index 58fb863ec..8b0d4ab5c 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -1894,6 +1894,13 @@ impl DataStore {
> self.inner.chunk_store.insert_backend_upload_marker(digest)
> }
>
> + /// Persist the backend upload marker to be a zero size chunk marker.
> + ///
> + /// Marks the chunk as present in the local store cache without inserting its payload.
> + pub fn persist_backend_upload_marker(&self, digest: &[u8; 32]) -> Result<(), Error> {
> + self.inner.chunk_store.persist_backend_upload_marker(digest)
> + }
> +
> /// Remove the marker file signaling an in-progress upload to the backend
> pub fn cleanup_backend_upload_marker(&self, digest: &[u8; 32]) -> Result<(), Error> {
> self.inner.chunk_store.cleanup_backend_upload_marker(digest)
> diff --git a/src/api2/backup/upload_chunk.rs b/src/api2/backup/upload_chunk.rs
> index d4b1850eb..35d873ebf 100644
> --- a/src/api2/backup/upload_chunk.rs
> +++ b/src/api2/backup/upload_chunk.rs
> @@ -263,10 +263,18 @@ async fn upload_to_backend(
>
> if env.no_cache {
> let object_key = pbs_datastore::s3::object_key_from_digest(&digest)?;
> - let is_duplicate = s3_client
> + env.datastore.insert_backend_upload_marker(&digest)?;
this has the same issue as patch #5 - if two clients attempt to upload
the same digest concurrently, then one of them will fail and abort the
backup..
> + let is_duplicate = match s3_client
> .upload_no_replace_with_retry(object_key, data)
> .await
> - .map_err(|err| format_err!("failed to upload chunk to s3 backend - {err:#}"))?;
> + {
> + Ok(is_duplicate) => is_duplicate,
> + Err(err) => {
> + datastore.cleanup_backend_upload_marker(&digest)?;
> + bail!("failed to upload chunk to s3 backend - {err:#}");
> + }
> + };
> + env.datastore.persist_backend_upload_marker(&digest)?;
and if this fails, the corresponding chunk can never be uploaded again..
> return Ok((digest, size, encoded_size, is_duplicate));
> }
>
> --
> 2.47.3
>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
>
More information about the pbs-devel
mailing list