[pbs-devel] [PATCH proxmox-backup v7 31/38] api: backup: use local datastore cache on s3 backend chunk upload

Christian Ebner c.ebner at proxmox.com
Thu Jul 10 19:07:21 CEST 2025


Take advantage of the local datastore cache to avoid re-uploading of
already known chunks. This not only helps improve the backup/upload
speeds, but also avoids additionally costs by reducing the number of
requests and transferred payload data to the S3 object store api.

If the cache is present, lookup if it contains the chunk, skipping
upload altogether if it is. Otherwise, upload the chunk into memory,
upload it to the S3 object store api and insert it into the local
datastore cache.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 6:
- no changes

 src/api2/backup/upload_chunk.rs | 36 +++++++++++++++++++++++++++++++--
 src/server/pull.rs              |  4 ++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/src/api2/backup/upload_chunk.rs b/src/api2/backup/upload_chunk.rs
index 3ad8c3c75..d97975b34 100644
--- a/src/api2/backup/upload_chunk.rs
+++ b/src/api2/backup/upload_chunk.rs
@@ -2,7 +2,7 @@ use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use anyhow::{bail, format_err, Error};
+use anyhow::{bail, format_err, Context as AnyhowContext, Error};
 use futures::*;
 use hex::FromHex;
 use http_body_util::{BodyDataStream, BodyExt};
@@ -262,8 +262,40 @@ async fn upload_to_backend(
                 );
             }
 
+            // Avoid re-upload to S3 if the chunk is either present in the LRU cache or the chunk
+            // file exists on filesystem. The latter means that the chunk has been present in the
+            // past an was not cleaned up by garbage collection, so contained in the S3 object store.
+            if env.datastore.cache_contains(&digest) {
+                tracing::info!("Skip upload of cached chunk {}", hex::encode(digest));
+                return Ok((digest, size, encoded_size, true));
+            }
+            if let Ok(true) = env.datastore.cond_touch_chunk(&digest, false) {
+                tracing::info!(
+                    "Skip upload of already encountered chunk {}",
+                    hex::encode(digest)
+                );
+                return Ok((digest, size, encoded_size, true));
+            }
+
+            tracing::info!("Upload of new chunk {}", hex::encode(digest));
             let object_key = pbs_datastore::s3::object_key_from_digest(&digest)?;
-            let is_duplicate = s3_client.upload_with_retry(object_key, data, false).await?;
+            let is_duplicate = s3_client
+                .upload_with_retry(object_key, data.clone(), false)
+                .await
+                .context("failed to upload chunk to s3 backend")?;
+
+            // Only insert the chunk into the cache after it has been successufuly uploaded.
+            // Although less performant than doing this in parallel, it is required for consisency
+            // since chunks are considered as present on the backend if the file exists in the local
+            // cache store.
+            let datastore = env.datastore.clone();
+            tracing::info!("Caching of chunk {}", hex::encode(digest));
+            let _ = tokio::task::spawn_blocking(move || {
+                let chunk = DataBlob::from_raw(data.to_vec())?;
+                datastore.cache_insert(&digest, &chunk)
+            })
+            .await?;
+
             Ok((digest, size, encoded_size, is_duplicate))
         }
     }
diff --git a/src/server/pull.rs b/src/server/pull.rs
index fe87359ab..e34766226 100644
--- a/src/server/pull.rs
+++ b/src/server/pull.rs
@@ -173,6 +173,10 @@ async fn pull_index_chunks<I: IndexFile>(
                     target2.insert_chunk(&chunk, &digest)?;
                 }
                 DatastoreBackend::S3(s3_client) => {
+                    if target2.cache_contains(&digest) {
+                        return Ok(());
+                    }
+                    target2.cache_insert(&digest, &chunk)?;
                     let data = chunk.raw_data().to_vec();
                     let upload_data = hyper::body::Bytes::from(data);
                     let object_key = pbs_datastore::s3::object_key_from_digest(&digest)?;
-- 
2.47.2





More information about the pbs-devel mailing list