[pbs-devel] [PATCH proxmox-backup v2 1/8] GC: Move S3 delete list state and logic to a dedicated struct
Fabian Grünbichler
f.gruenbichler at proxmox.com
Wed Jan 14 10:18:48 CET 2026
On January 14, 2026 9:22 am, Christian Ebner wrote:
> On 1/13/26 11:22 AM, Fabian Grünbichler wrote:
>> On December 11, 2025 4:38 pm, Christian Ebner wrote:
>>> To better keep track of the state and declutter the code on the
>>> callsites, bundle the S3 delete list and it's logic by a dedicated
>>> struct. Since the check for empty lists is now performed as part
>>> of the deletion related methods, the callsites can get rid of that.
>>>
>>> Further, avoid the proxmox_time::epoch_i64() and use SystemTime and
>>> Duration with their methods directly.
>>>
>>> Suggested-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
>>> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
>>> ---
>>> pbs-datastore/src/datastore.rs | 132 +++++++++++++++++++--------------
>>> 1 file changed, 75 insertions(+), 57 deletions(-)
>>>
>>> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
>>> index 9c57aaac1..58fd034fc 100644
>>> --- a/pbs-datastore/src/datastore.rs
>>> +++ b/pbs-datastore/src/datastore.rs
>>> @@ -24,7 +24,7 @@ use proxmox_sys::error::SysError;
>>> use proxmox_sys::fs::{file_read_optional_string, replace_file, CreateOptions};
>>> use proxmox_sys::linux::procfs::MountInfo;
>>> use proxmox_sys::process_locker::{ProcessLockExclusiveGuard, ProcessLockSharedGuard};
>>> -use proxmox_time::{epoch_i64, TimeSpan};
>>
>> we already use epoch_i64 for other calculations in this module, why not
>> keep it?
>
> Do you mean with respect to the module level import or with respect to
> using the epoch_i64() helper? As far as I see this is only used once to
> get the phase 1 start time apart from the delete list age calculations.
>
> Not sure what we would gain from latter? As this helper wraps around the
> SystemTime anyways [0], so I opted to use that directly instead of
> converting it to the Unix epoch and doing the calculations there.
I meant with respect to using it ;) IMHO it's a bit easier to just
handle i64 values than SystemTime/Duration, but either is fine.
>
> [0]
> https://rustdoc.intra.proxmox.com/trixie/packages/proxmox/src/proxmox_time/posix.rs.html#81-93
>
>>
>>> +use proxmox_time::TimeSpan;
>>> use proxmox_worker_task::WorkerTaskContext;
>>>
>>> use pbs_api_types::{
>>> @@ -64,7 +64,7 @@ const CHUNK_LOCK_TIMEOUT: Duration = Duration::from_secs(3 * 60 * 60);
>>> // s3 deletion batch size to avoid 1024 open files soft limit
>>> const S3_DELETE_BATCH_LIMIT: usize = 100;
>>> // max defer time for s3 batch deletions
>>> -const S3_DELETE_DEFER_LIMIT_SECONDS: i64 = 60 * 5;
>>> +const S3_DELETE_DEFER_LIMIT_SECONDS: Duration = Duration::from_secs(60 * 5);
>>>
>>> /// checks if auth_id is owner, or, if owner is a token, if
>>> /// auth_id is the user of the token
>>> @@ -1689,40 +1689,8 @@ impl DataStore {
>>> proxmox_async::runtime::block_on(s3_client.list_objects_v2(&prefix, None))
>>> .context("failed to list chunk in s3 object store")?;
>>>
>>> - let mut delete_list = Vec::with_capacity(S3_DELETE_BATCH_LIMIT);
>>> - let mut delete_list_age = epoch_i64();
>>> -
>>> - let s3_delete_batch = |delete_list: &mut Vec<(S3ObjectKey, BackupLockGuard)>,
>>> - s3_client: &Arc<S3Client>|
>>> - -> Result<(), Error> {
>>> - let delete_objects_result = proxmox_async::runtime::block_on(
>>> - s3_client.delete_objects(
>>> - &delete_list
>>> - .iter()
>>> - .map(|(key, _)| key.clone())
>>> - .collect::<Vec<S3ObjectKey>>(),
>>> - ),
>>> - )?;
>>> - if let Some(_err) = delete_objects_result.error {
>>> - bail!("failed to delete some objects");
>>> - }
>>> - // drops all chunk flock guards
>>> - delete_list.clear();
>>> - Ok(())
>>> - };
>>> -
>>> - let add_to_delete_list =
>>> - |delete_list: &mut Vec<(S3ObjectKey, BackupLockGuard)>,
>>> - delete_list_age: &mut i64,
>>> - key: S3ObjectKey,
>>> - _chunk_guard: BackupLockGuard| {
>>> - // set age based on first insertion
>>> - if delete_list.is_empty() {
>>> - *delete_list_age = epoch_i64();
>>> - }
>>> - delete_list.push((key, _chunk_guard));
>>> - };
>>> -
>>> + let mut delete_list =
>>> + S3DeleteList::with_thresholds(S3_DELETE_BATCH_LIMIT, S3_DELETE_DEFER_LIMIT_SECONDS);
>>> loop {
>>> for content in list_bucket_result.contents {
>>> worker.check_abort()?;
>>> @@ -1779,12 +1747,7 @@ impl DataStore {
>>> std::fs::remove_file(chunk_path)?;
>>> }
>>> }
>>> - add_to_delete_list(
>>> - &mut delete_list,
>>> - &mut delete_list_age,
>>> - content.key,
>>> - _chunk_guard,
>>> - );
>>> + delete_list.push(content.key, _chunk_guard);
>>> Ok(())
>>> },
>>> )?;
>>> @@ -1792,12 +1755,7 @@ impl DataStore {
>>> } else {
>>> gc_status.removed_chunks += 1;
>>> gc_status.removed_bytes += content.size;
>>> - add_to_delete_list(
>>> - &mut delete_list,
>>> - &mut delete_list_age,
>>> - content.key,
>>> - _chunk_guard,
>>> - );
>>> + delete_list.push(content.key, _chunk_guard);
>>> }
>>>
>>> chunk_count += 1;
>>> @@ -1806,12 +1764,7 @@ impl DataStore {
>>> drop(_guard);
>>>
>>> // limit pending deletes to avoid holding too many chunk flocks
>>> - if delete_list.len() >= S3_DELETE_BATCH_LIMIT
>>> - || (!delete_list.is_empty()
>>> - && epoch_i64() - delete_list_age > S3_DELETE_DEFER_LIMIT_SECONDS)
>>> - {
>>> - s3_delete_batch(&mut delete_list, s3_client)?;
>>> - }
>>> + delete_list.conditional_delete_and_drop_locks(s3_client)?;
>>> }
>>> // Process next batch of chunks if there is more
>>> if list_bucket_result.is_truncated {
>>> @@ -1827,9 +1780,7 @@ impl DataStore {
>>> }
>>>
>>> // delete the last batch of objects, if there are any remaining
>>> - if !delete_list.is_empty() {
>>> - s3_delete_batch(&mut delete_list, s3_client)?;
>>> - }
>>> + delete_list.delete_and_drop_locks(s3_client)?;
>>>
>>> info!("processed {chunk_count} total chunks");
>>>
>>> @@ -2768,3 +2719,70 @@ impl DataStore {
>>> result
>>> }
>>> }
>>> +
>>> +/// Track S3 object keys to be deleted by garbage collection while holding their file lock.
>>> +struct S3DeleteList {
>>> + list: Vec<(S3ObjectKey, BackupLockGuard)>,
>>> + first_entry_added: SystemTime,
>>> + age_threshold: Duration,
>>> + capacity_threshold: usize,
>>> +}
>>> +
>>> +impl S3DeleteList {
>>> + /// Create a new list instance with given capacity and age thresholds.
>>> + fn with_thresholds(capacity_threshold: usize, age_threshold: Duration) -> Self {
>>> + Self {
>>> + first_entry_added: SystemTime::now(), // init only, updated once added
>>> + list: Vec::with_capacity(capacity_threshold),
>>> + age_threshold,
>>> + capacity_threshold,
>>> + }
>>> + }
>>> +
>>> + /// Pushes the current key and backup lock guard to the list, updating the delete list age if
>>> + /// the list was empty before the insert.
>>> + fn push(&mut self, key: S3ObjectKey, guard: BackupLockGuard) {
>>> + // set age based on first insertion
>>> + if self.list.is_empty() {
>>> + self.first_entry_added = SystemTime::now();
>>> + }
>>> + self.list.push((key, guard));
>>> + }
>>> +
>>> + /// Delete the objects in the list via the provided S3 client instance.
>>> + /// Clears the list contents and frees the per-chunk file locks.
>>> + fn delete_and_drop_locks(&mut self, s3_client: &Arc<S3Client>) -> Result<(), Error> {
>>> + if self.list.is_empty() {
>>> + return Ok(());
>>> + }
>>> + let delete_objects_result = proxmox_async::runtime::block_on(
>>> + s3_client.delete_objects(
>>> + &self
>>> + .list
>>> + .iter()
>>> + .map(|(key, _)| key.clone())
>>> + .collect::<Vec<S3ObjectKey>>(),
>>> + ),
>>> + )?;
>>> + if delete_objects_result.error.is_some() {
>>> + bail!("failed to delete some objects");
>>> + }
>>> + // drops all chunk flock guards
>>> + self.list.clear();
>>> + Ok(())
>>> + }
>>> +
>>> + /// Delete the object stored in the list if either the list exceeds the capacity threshold or
>>> + /// the delete list age threshold.
>>> + fn conditional_delete_and_drop_locks(
>>> + &mut self,
>>> + s3_client: &Arc<S3Client>,
>>> + ) -> Result<(), Error> {
>>> + if self.list.len() >= self.capacity_threshold
>>> + || (!self.list.is_empty() && self.first_entry_added.elapsed()? > self.age_threshold)
>>
>> this bails if the clock jumps backwards (further than the
>> first_entry_added timestamp). maybe we should instead always delete? or
>> at least add a meaningful error message/context..
>
> Yes indeed! Deleting this in case the clock jumped to the past is
> probably the best option here, as the local chunk marker file is no
> longer present anyways at this point.
>
> Will adapt the code accordingly.
>
>>
>>> + {
>>> + self.delete_and_drop_locks(s3_client)?;
>>> + }
>>> + Ok(())
>>> + }
>>> +}
>>> --
>>> 2.47.3
>>>
>>>
>>>
>>> _______________________________________________
>>> pbs-devel mailing list
>>> pbs-devel at lists.proxmox.com
>>> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>>>
>>>
>>>
>>
>>
>> _______________________________________________
>> pbs-devel mailing list
>> pbs-devel at lists.proxmox.com
>> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>>
>>
>
>
More information about the pbs-devel
mailing list