[pbs-devel] [PATCH proxmox-backup v2 1/1] GC: log progress output for phase 2 on datastores backed by s3
Christian Ebner
c.ebner at proxmox.com
Wed Jan 21 11:45:37 CET 2026
Currently there is no progress output for phase 2 of garbage
collection on datastores with s3 backend.
To avoid log spamming, introduce a rate limited logger, updating
the log frequency to max number of outputs for given intervals:
- 30 seconds within the first hour, max 120 lines
- 1 minute for the second hour, max 60 lines
- 5 minutes for [3..24] hour range, max 12 lines per hour
- 15 minutes after the 24 hours, max 96 per day
List request statistics are gathered and displayed as well to get
basic S3 API performance insights.
Example output:
```
...
Start GC phase2 (sweep unused chunks)
2961 chunks processed, removing 0 B in 0 chunks, 0 pending (3 total list requests, 4 ms average)
5961 chunks processed, removing 0 B in 0 chunks, 0 pending (6 total list requests, 1 ms average)
8949 chunks processed, removing 0 B in 0 chunks, 0 pending (9 total list requests, 1 ms average)
processed 9125 chunks, 0 B in 0 chunks deleted
...
```
Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 1:
- switch from dedicated thread for logging to inline rate limited logger
- adapt log rate intervals as suggested
- extend statistics
pbs-datastore/src/datastore.rs | 98 +++++++++++++++++++++++++++++++++-
1 file changed, 96 insertions(+), 2 deletions(-)
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 7ad3d917d..51dcde3d8 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -4,11 +4,12 @@ use std::os::unix::ffi::OsStrExt;
use std::os::unix::io::AsRawFd;
use std::path::{Path, PathBuf};
use std::sync::{Arc, LazyLock, Mutex};
-use std::time::{Duration, SystemTime};
+use std::time::{Duration, Instant, SystemTime};
use anyhow::{bail, format_err, Context, Error};
use http_body_util::BodyExt;
use hyper::body::Bytes;
+use hyper::http::method::Method;
use nix::unistd::{unlinkat, UnlinkatFlags};
use pbs_tools::lru_cache::LruCache;
use tokio::io::AsyncWriteExt;
@@ -1778,14 +1779,20 @@ impl DataStore {
if let Some(ref s3_client) = s3_client {
let mut chunk_count = 0;
+ let mut list_request_count = 0;
let prefix = S3PathPrefix::Some(".chunks/".to_string());
// Operates in batches of 1000 objects max per request
let mut list_bucket_result =
proxmox_async::runtime::block_on(s3_client.list_objects_v2(&prefix, None))
.context("failed to list chunk in s3 object store")?;
+ let mut average_request_duration =
+ s3_client.last_request_duration().unwrap_or_default();
+ list_request_count += 1;
let mut delete_list =
S3DeleteList::with_thresholds(S3_DELETE_BATCH_LIMIT, S3_DELETE_DEFER_LIMIT_SECONDS);
+ let mut rate_limited_logger = RateLimitedLogger::with_interval(Duration::from_secs(30));
+
loop {
for content in list_bucket_result.contents {
worker.check_abort()?;
@@ -1861,6 +1868,35 @@ impl DataStore {
// limit pending deletes to avoid holding too many chunk flocks
delete_list.conditional_delete_and_drop_locks(s3_client)?;
}
+
+ match rate_limited_logger.elapsed_since_creation() {
+ e if e >= Duration::from_secs(24 * 60 * 60) => {
+ // max 96 lines per additional day
+ rate_limited_logger.update_interval(Duration::from_secs(15 * 60))
+ }
+ e if e >= Duration::from_secs(3 * 60 * 60) => {
+ // max 12 lines per hour for [3..24] hour interval
+ rate_limited_logger.update_interval(Duration::from_secs(5 * 60))
+ }
+ e if e >= Duration::from_secs(60 * 60) => {
+ // max 60 lines for [1..2] hour interval
+ rate_limited_logger.update_interval(Duration::from_secs(60))
+ }
+ // max 120 lines for first hour (with initial 30s log interval)
+ _ => (),
+ }
+
+ rate_limited_logger.log_with(|| {
+ format!(
+ "{chunk_count} chunks processed, removing {} in {} chunks, {} pending ({} total list requests, {} ms average)",
+ HumanByte::from(gc_status.removed_bytes),
+ gc_status.removed_chunks,
+ delete_list.len(),
+ s3_client.request_count(Method::GET),
+ average_request_duration.as_millis(),
+ )
+ });
+
// Process next batch of chunks if there is more
if list_bucket_result.is_truncated {
list_bucket_result =
@@ -1868,6 +1904,14 @@ impl DataStore {
&prefix,
list_bucket_result.next_continuation_token.as_deref(),
))?;
+ list_request_count += 1;
+ if let Some(duration) = s3_client.last_request_duration() {
+ average_request_duration += duration;
+ average_request_duration = average_request_duration
+ .checked_div(list_request_count)
+ .unwrap_or_default();
+ }
+
continue;
}
@@ -1877,7 +1921,11 @@ impl DataStore {
// delete the last batch of objects, if there are any remaining
delete_list.delete_and_drop_locks(s3_client)?;
- info!("processed {chunk_count} total chunks");
+ info!(
+ "processed {chunk_count} chunks, {} in {} chunks deleted",
+ HumanByte::from(gc_status.removed_bytes),
+ gc_status.removed_chunks,
+ );
// Phase 2 GC of Filesystem backed storage is phase 3 for S3 backed GC
info!("Start GC phase3 (sweep unused chunk markers)");
@@ -2876,4 +2924,50 @@ impl S3DeleteList {
}
Ok(())
}
+
+ /// Current number of object keys pending deletion
+ fn len(&self) -> usize {
+ self.list.len()
+ }
+}
+
+struct RateLimitedLogger {
+ interval: Duration,
+ start_time: Instant,
+ last_log_time: Instant,
+}
+
+impl RateLimitedLogger {
+ /// Create a rate limited logger with given maximum log frequency by given interval.
+ fn with_interval(interval: Duration) -> Self {
+ let now = Instant::now();
+ Self {
+ interval,
+ start_time: now,
+ last_log_time: now,
+ }
+ }
+
+ /// Set the maximum log frequency to given interval.
+ fn update_interval(&mut self, interval: Duration) {
+ self.interval = interval;
+ }
+
+ /// Returns the age of the logger instance as time elapsed since creation.
+ fn elapsed_since_creation(&self) -> Duration {
+ self.start_time.elapsed()
+ }
+
+ /// Executes the provided callback if the last invocation exceeds the
+ /// configured interval, logging the returned value.
+ fn log_with<F>(&mut self, callback: F)
+ where
+ F: Fn() -> String,
+ {
+ let now = Instant::now();
+ if now.duration_since(self.last_log_time) >= self.interval {
+ info!("{}", callback());
+ self.last_log_time = now;
+ }
+ }
}
--
2.47.3
More information about the pbs-devel
mailing list