[pbs-devel] [PATCH RESEND proxmox-backup v2 3/3] GC: S3: phase2: do not force delete for every list iteration
Christian Ebner
c.ebner at proxmox.com
Fri Nov 21 13:04:40 CET 2025
unfortunately you were a bit faster as I was just to reply with another
corner case which needs to be fixed. See inline.
On 11/21/25 12:54 PM, Fabian Grünbichler wrote:
> force delete if the last iteration that started with an empty delete list was
> started more than 5 minutes ago, and at the very end after the last iteration,
> instead of after processing every batch of 1000 listed objects. this
> reduces the number of delete calls made to the backend, making regular
> garbage collections that do not delete most objects cheaper, but means
> holding the flocks for garbage chunks/objects longer.
>
> Suggested-by: Christian Ebner <c.ebner at proxmox.com>
> Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
> ---
>
> Notes:
> v1->v2: reworked to use age-based cutoff
>
> resent with trailer and commit message fixed up, thanks Chris!
>
> pbs-datastore/src/datastore.rs | 25 +++++++++++++++++--------
> 1 file changed, 17 insertions(+), 8 deletions(-)
>
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
> index e9d6b46f3..4cac12406 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -22,7 +22,7 @@ use proxmox_sys::error::SysError;
> use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions};
> use proxmox_sys::linux::procfs::MountInfo;
> use proxmox_sys::process_locker::{ProcessLockExclusiveGuard, ProcessLockSharedGuard};
> -use proxmox_time::TimeSpan;
> +use proxmox_time::{epoch_i64, TimeSpan};
> use proxmox_worker_task::WorkerTaskContext;
>
> use pbs_api_types::{
> @@ -60,6 +60,8 @@ const NAMESPACE_MARKER_FILENAME: &str = ".namespace";
> const CHUNK_LOCK_TIMEOUT: Duration = Duration::from_secs(3 * 60 * 60);
> // s3 deletion batch size to avoid 1024 open files soft limit
> const S3_DELETE_BATCH_LIMIT: usize = 100;
> +// max defer time for s3 batch deletions
> +const S3_DELETE_DEFER_LIMIT_SECONDS: i64 = 60 * 5;
>
> /// checks if auth_id is owner, or, if owner is a token, if
> /// auth_id is the user of the token
> @@ -1660,6 +1662,7 @@ impl DataStore {
> .context("failed to list chunk in s3 object store")?;
>
> let mut delete_list = Vec::with_capacity(S3_DELETE_BATCH_LIMIT);
> + let mut delete_list_age = epoch_i64();
>
> let s3_delete_batch = |delete_list: &mut Vec<(S3ObjectKey, BackupLockGuard)>,
> s3_client: &Arc<S3Client>|
> @@ -1742,16 +1745,12 @@ impl DataStore {
> drop(_guard);
>
> // limit pending deletes to avoid holding too many chunk flocks
> - if delete_list.len() >= S3_DELETE_BATCH_LIMIT {
> + if delete_list.len() >= S3_DELETE_BATCH_LIMIT
> + || epoch_i64() - delete_list_age > S3_DELETE_DEFER_LIMIT_SECONDS
> + {
> s3_delete_batch(&mut delete_list, s3_client)?;
this needs to reset the delete list age as well, as otherwise the next
set of batches being processed might run into this as well if that is
slow for some reason, even if the list is empty
> }
> }
> -
> - // delete the last batch of objects, if there are any remaining
> - if !delete_list.is_empty() {
> - s3_delete_batch(&mut delete_list, s3_client)?;
> - }
> -
> // Process next batch of chunks if there is more
> if list_bucket_result.is_truncated {
> list_bucket_result =
> @@ -1759,11 +1758,21 @@ impl DataStore {
> &prefix,
> list_bucket_result.next_continuation_token.as_deref(),
> ))?;
> + if delete_list.is_empty() {
> + // reset delete list age while queue is empty
> + delete_list_age = epoch_i64();
> + }
> continue;
> }
>
> break;
> }
> +
> + // delete the last batch of objects, if there are any remaining
> + if !delete_list.is_empty() {
> + s3_delete_batch(&mut delete_list, s3_client)?;
> + }
> +
> info!("processed {chunk_count} total chunks");
>
> // Phase 2 GC of Filesystem backed storage is phase 3 for S3 backed GC
More information about the pbs-devel
mailing list