[pbs-devel] [PATCH v2 proxmox-backup 4/4] fix #5331: garbage collection: avoid multiple chunk atime updates

Christian Ebner c.ebner at proxmox.com
Mon Mar 10 12:16:34 CET 2025


Reduce the number of atime updates on the same chunk by logically
iterating over image index files, following the incremental backup
logic. By inserting paths for encountered images during
`list_images` using the GroupedImageList structure, the iteration
happens now for the same image filenames in the same image namespace
and group in a order based on the snapshot timestamp. For each image,
keep track of the encountered chunk digests, and remember these as
seen for the next snapshot. Chunks which have been encountered in the
previous image index, but are not present anymore are removed from
the list after each image, in order to reduce memory footprint.

Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=5331
Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 1:
- Use pre-existing datastore iterator helpers, following the logic other
  datastore operations take.
- Chunks are now remembered for all archives per snapshot, not just a
  single archive per snapshot as previously, this mimics more closely
  the backup behaviour.

 pbs-datastore/src/datastore.rs | 117 +++++++++++++++++++++++----------
 1 file changed, 84 insertions(+), 33 deletions(-)

diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index fdbb33a98..a80343d9b 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -25,7 +25,7 @@ use pbs_api_types::{
     MaintenanceMode, MaintenanceType, Operation, UPID,
 };
 
-use crate::backup_info::{BackupDir, BackupGroup};
+use crate::backup_info::{BackupDir, BackupGroup, BackupInfo};
 use crate::chunk_store::ChunkStore;
 use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
 use crate::fixed_index::{FixedIndexReader, FixedIndexWriter};
@@ -970,10 +970,10 @@ impl DataStore {
         ListGroups::new(Arc::clone(self), ns)?.collect()
     }
 
-    fn list_images(&self) -> Result<Vec<PathBuf>, Error> {
+    fn list_images(&self) -> Result<HashSet<PathBuf>, Error> {
         let base = self.base_path();
 
-        let mut list = vec![];
+        let mut list = HashSet::new();
 
         use walkdir::WalkDir;
 
@@ -1021,7 +1021,7 @@ impl DataStore {
                 if archive_type == ArchiveType::FixedIndex
                     || archive_type == ArchiveType::DynamicIndex
                 {
-                    list.push(path);
+                    list.insert(path);
                 }
             }
         }
@@ -1071,6 +1071,7 @@ impl DataStore {
         &self,
         index: Box<dyn IndexFile>,
         file_name: &Path, // only used for error reporting
+        touched_chunks: &mut TouchedChunks,
         status: &mut GarbageCollectionStatus,
         worker: &dyn WorkerTaskContext,
     ) -> Result<(), Error> {
@@ -1081,6 +1082,12 @@ impl DataStore {
             worker.check_abort()?;
             worker.fail_on_shutdown()?;
             let digest = index.index_digest(pos).unwrap();
+
+            // Avoid multiple expensive atime updates by utimensat
+            if touched_chunks.insert(*digest) {
+                continue;
+            }
+
             if !self.inner.chunk_store.cond_touch_chunk(digest, false)? {
                 let hex = hex::encode(digest);
                 warn!(
@@ -1107,43 +1114,87 @@ impl DataStore {
         status: &mut GarbageCollectionStatus,
         worker: &dyn WorkerTaskContext,
     ) -> Result<(), Error> {
-        let image_list = self.list_images()?;
-        let image_count = image_list.len();
-
+        // Iterate twice over the datastore to fetch images, even if this comes with an additional
+        // runtime cost:
+        // - First iteration to find all index files, no matter if they are in a location expected
+        //   by the datastore's hierarchy
+        // - Iterate using the datastore's helpers, so the namespaces, groups and snapshots are
+        //   looked up given the expected hierarchy and iterator logic
+        //
+        // By this it is assured that all index files are used, even if they would not have been
+        // seen by the regular logic and the user is informed by the garbage collection run about
+        // the detected index files not following the iterators logic.
+
+        let mut unprocessed_image_list = self.list_images()?;
+        let image_count = unprocessed_image_list.len();
+
+        // Optimize for avoiding updates of chunks atime in same group for consecutive
+        // snapshots multiple times.
+        let mut touched_chunks = TouchedChunks::new();
+        let mut processed_images = 0;
         let mut last_percentage: usize = 0;
 
-        let mut strange_paths_count: u64 = 0;
-
-        for (i, img) in image_list.into_iter().enumerate() {
-            worker.check_abort()?;
-            worker.fail_on_shutdown()?;
-
-            if let Some(backup_dir_path) = img.parent() {
-                let backup_dir_path = backup_dir_path.strip_prefix(self.base_path())?;
-                if let Some(backup_dir_str) = backup_dir_path.to_str() {
-                    if pbs_api_types::parse_ns_and_snapshot(backup_dir_str).is_err() {
-                        strange_paths_count += 1;
+        let arc_self = Arc::new(self.clone());
+        for namespace in arc_self
+            .recursive_iter_backup_ns(BackupNamespace::root())
+            .context("creating namespace iterator failed")?
+        {
+            let namespace = namespace.context("iterating namespaces failed")?;
+            for group in arc_self.iter_backup_groups(namespace)? {
+                let group = group.context("iterating backup groups failed")?;
+                let mut snapshots = group.list_backups().context("listing snapshots failed")?;
+                // Sort by snapshot timestamp to iterate over consecutive snapshots for each image.
+                BackupInfo::sort_list(&mut snapshots, true);
+                for snapshot in snapshots {
+                    for file in snapshot.files {
+                        worker.check_abort()?;
+                        worker.fail_on_shutdown()?;
+
+                        let mut path = snapshot.backup_dir.full_path();
+                        path.push(file);
+
+                        let index = match self.open_index_reader(&path)? {
+                            Some(index) => index,
+                            None => continue,
+                        };
+                        self.index_mark_used_chunks(
+                            index,
+                            &path,
+                            &mut touched_chunks,
+                            status,
+                            worker,
+                        )?;
+
+                        unprocessed_image_list.remove(&path);
+
+                        let percentage = (processed_images + 1) * 100 / image_count;
+                        if percentage > last_percentage {
+                            info!(
+                                "marked {percentage}% ({} of {image_count} index files)",
+                                processed_images + 1,
+                            );
+                            last_percentage = percentage;
+                        }
+                        processed_images += 1;
                     }
+                    touched_chunks.reset();
                 }
             }
-
-            if let Some(index) = self.open_index_reader(&img)? {
-                self.index_mark_used_chunks(index, &img, status, worker)?;
-            }
-
-            let percentage = (i + 1) * 100 / image_count;
-            if percentage > last_percentage {
-                info!(
-                    "marked {percentage}% ({} of {image_count} index files)",
-                    i + 1,
-                );
-                last_percentage = percentage;
-            }
         }
 
+        let strange_paths_count = unprocessed_image_list.len();
         if strange_paths_count > 0 {
-            info!(
-                "found (and marked) {strange_paths_count} index files outside of expected directory scheme"
+            warn!("found {strange_paths_count} index files outside of expected directory scheme");
+        }
+        for path in unprocessed_image_list {
+            let index = match self.open_index_reader(&path)? {
+                Some(index) => index,
+                None => continue,
+            };
+            self.index_mark_used_chunks(index, &path, &mut touched_chunks, status, worker)?;
+            warn!(
+                "Marked chunks for unexpected index file at '{}'",
+                path.to_string_lossy()
             );
         }
 
-- 
2.39.5





More information about the pbs-devel mailing list