[pbs-devel] [PATCH proxmox-backup v2 1/1] GC: log progress output for phase 2 on datastores backed by s3

Christian Ebner c.ebner at proxmox.com
Wed Jan 21 11:45:37 CET 2026


Currently there is no progress output for phase 2 of garbage
collection on datastores with s3 backend.

To avoid log spamming, introduce a rate limited logger, updating
the log frequency to max number of outputs for given intervals:
- 30 seconds within the first hour, max 120 lines
- 1 minute for the second hour, max 60 lines
- 5 minutes for [3..24] hour range, max 12 lines per hour
- 15 minutes after the 24 hours, max 96 per day

List request statistics are gathered and displayed as well to get
basic S3 API performance insights.

Example output:
```
...
Start GC phase2 (sweep unused chunks)
2961 chunks processed, removing 0 B in 0 chunks, 0 pending (3 total list requests, 4 ms average)
5961 chunks processed, removing 0 B in 0 chunks, 0 pending (6 total list requests, 1 ms average)
8949 chunks processed, removing 0 B in 0 chunks, 0 pending (9 total list requests, 1 ms average)
processed 9125 chunks, 0 B in 0 chunks deleted
...
```

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 1:
- switch from dedicated thread for logging to inline rate limited logger
- adapt log rate intervals as suggested
- extend statistics

 pbs-datastore/src/datastore.rs | 98 +++++++++++++++++++++++++++++++++-
 1 file changed, 96 insertions(+), 2 deletions(-)

diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 7ad3d917d..51dcde3d8 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -4,11 +4,12 @@ use std::os::unix::ffi::OsStrExt;
 use std::os::unix::io::AsRawFd;
 use std::path::{Path, PathBuf};
 use std::sync::{Arc, LazyLock, Mutex};
-use std::time::{Duration, SystemTime};
+use std::time::{Duration, Instant, SystemTime};
 
 use anyhow::{bail, format_err, Context, Error};
 use http_body_util::BodyExt;
 use hyper::body::Bytes;
+use hyper::http::method::Method;
 use nix::unistd::{unlinkat, UnlinkatFlags};
 use pbs_tools::lru_cache::LruCache;
 use tokio::io::AsyncWriteExt;
@@ -1778,14 +1779,20 @@ impl DataStore {
 
         if let Some(ref s3_client) = s3_client {
             let mut chunk_count = 0;
+            let mut list_request_count = 0;
             let prefix = S3PathPrefix::Some(".chunks/".to_string());
             // Operates in batches of 1000 objects max per request
             let mut list_bucket_result =
                 proxmox_async::runtime::block_on(s3_client.list_objects_v2(&prefix, None))
                     .context("failed to list chunk in s3 object store")?;
+            let mut average_request_duration =
+                s3_client.last_request_duration().unwrap_or_default();
+            list_request_count += 1;
 
             let mut delete_list =
                 S3DeleteList::with_thresholds(S3_DELETE_BATCH_LIMIT, S3_DELETE_DEFER_LIMIT_SECONDS);
+            let mut rate_limited_logger = RateLimitedLogger::with_interval(Duration::from_secs(30));
+
             loop {
                 for content in list_bucket_result.contents {
                     worker.check_abort()?;
@@ -1861,6 +1868,35 @@ impl DataStore {
                     // limit pending deletes to avoid holding too many chunk flocks
                     delete_list.conditional_delete_and_drop_locks(s3_client)?;
                 }
+
+                match rate_limited_logger.elapsed_since_creation() {
+                    e if e >= Duration::from_secs(24 * 60 * 60) => {
+                        // max 96 lines per additional day
+                        rate_limited_logger.update_interval(Duration::from_secs(15 * 60))
+                    }
+                    e if e >= Duration::from_secs(3 * 60 * 60) => {
+                        // max 12 lines per hour for [3..24] hour interval
+                        rate_limited_logger.update_interval(Duration::from_secs(5 * 60))
+                    }
+                    e if e >= Duration::from_secs(60 * 60) => {
+                        // max 60 lines for [1..2] hour interval
+                        rate_limited_logger.update_interval(Duration::from_secs(60))
+                    }
+                    // max 120 lines for first hour (with initial 30s log interval)
+                    _ => (),
+                }
+
+                rate_limited_logger.log_with(|| {
+                    format!(
+                        "{chunk_count} chunks processed, removing {} in {} chunks, {} pending ({} total list requests, {} ms average)",
+                        HumanByte::from(gc_status.removed_bytes),
+                        gc_status.removed_chunks,
+                        delete_list.len(),
+                        s3_client.request_count(Method::GET),
+                        average_request_duration.as_millis(),
+                    )
+                });
+
                 // Process next batch of chunks if there is more
                 if list_bucket_result.is_truncated {
                     list_bucket_result =
@@ -1868,6 +1904,14 @@ impl DataStore {
                             &prefix,
                             list_bucket_result.next_continuation_token.as_deref(),
                         ))?;
+                    list_request_count += 1;
+                    if let Some(duration) = s3_client.last_request_duration() {
+                        average_request_duration += duration;
+                        average_request_duration = average_request_duration
+                            .checked_div(list_request_count)
+                            .unwrap_or_default();
+                    }
+
                     continue;
                 }
 
@@ -1877,7 +1921,11 @@ impl DataStore {
             // delete the last batch of objects, if there are any remaining
             delete_list.delete_and_drop_locks(s3_client)?;
 
-            info!("processed {chunk_count} total chunks");
+            info!(
+                "processed {chunk_count} chunks, {} in {} chunks deleted",
+                HumanByte::from(gc_status.removed_bytes),
+                gc_status.removed_chunks,
+            );
 
             // Phase 2 GC of Filesystem backed storage is phase 3 for S3 backed GC
             info!("Start GC phase3 (sweep unused chunk markers)");
@@ -2876,4 +2924,50 @@ impl S3DeleteList {
         }
         Ok(())
     }
+
+    /// Current number of object keys pending deletion
+    fn len(&self) -> usize {
+        self.list.len()
+    }
+}
+
+struct RateLimitedLogger {
+    interval: Duration,
+    start_time: Instant,
+    last_log_time: Instant,
+}
+
+impl RateLimitedLogger {
+    /// Create a rate limited logger with given maximum log frequency by given interval.
+    fn with_interval(interval: Duration) -> Self {
+        let now = Instant::now();
+        Self {
+            interval,
+            start_time: now,
+            last_log_time: now,
+        }
+    }
+
+    /// Set the maximum log frequency to given interval.
+    fn update_interval(&mut self, interval: Duration) {
+        self.interval = interval;
+    }
+
+    /// Returns the age of the logger instance as time elapsed since creation.
+    fn elapsed_since_creation(&self) -> Duration {
+        self.start_time.elapsed()
+    }
+
+    /// Executes the provided callback if the last invocation exceeds the
+    /// configured interval, logging the returned value.
+    fn log_with<F>(&mut self, callback: F)
+    where
+        F: Fn() -> String,
+    {
+        let now = Instant::now();
+        if now.duration_since(self.last_log_time) >= self.interval {
+            info!("{}", callback());
+            self.last_log_time = now;
+        }
+    }
 }
-- 
2.47.3





More information about the pbs-devel mailing list