[pbs-devel] [RFC proxmox-backup 4/4] garbage collection: read pruned snapshot index files from trash

Christian Ebner c.ebner at proxmox.com
Wed Apr 16 16:18:03 CEST 2025


Snapshots pruned during phase 1 are now also assured to be included
in the marking phase by reading the index files from trash and
touching these chunks as well.

Clear any trash before starting with phase 1, so that only snapshots
pruned during GC are consided.

Further, drop the retry logic used before to assure eventually newly
written index files are included in the marking phase, if the
previously last one was pruned. This is not necessary anymore as the
previously last one will be read from trash.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
 pbs-datastore/src/datastore.rs | 141 +++++++++++++++------------------
 1 file changed, 65 insertions(+), 76 deletions(-)

diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 97b78f000..688e65247 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -1137,7 +1137,13 @@ impl DataStore {
         //
         // By this it is assured that all index files are used, even if they would not have been
         // seen by the regular logic and the user is informed by the garbage collection run about
-        // the detected index files not following the iterators logic.
+        // the detected index files not following the iterators logic. Further, include trashed
+        // snapshots which have been pruned during garbage collections marking phase.
+
+        let trash = PathBuf::from(".trash/");
+        let mut full_trash_path = self.base_path();
+        full_trash_path.push(&trash);
+        let _ = std::fs::remove_dir_all(full_trash_path);
 
         let mut unprocessed_index_list = self.list_index_files(None)?;
         let mut index_count = unprocessed_index_list.len();
@@ -1154,88 +1160,63 @@ impl DataStore {
             let namespace = namespace.context("iterating namespaces failed")?;
             for group in arc_self.iter_backup_groups(namespace)? {
                 let group = group.context("iterating backup groups failed")?;
+                let mut snapshots = match group.list_backups() {
+                    Ok(snapshots) => snapshots,
+                    Err(err) => {
+                        if group.exists() {
+                            return Err(err).context("listing snapshots failed")?;
+                        }
+                        // vanished, will be covered by trashed list below to avoid
+                        // not touching known chunks.
+                        continue;
+                    }
+                };
 
-                // Avoid race between listing/marking of snapshots by GC and pruning the last
-                // snapshot in the group, following a new snapshot creation. Otherwise known chunks
-                // might only be referenced by the new snapshot, so it must be read as well.
-                let mut retry_counter = 0;
-                'retry: loop {
-                    let _lock = match retry_counter {
-                        0..=9 => None,
-                        10 => Some(
-                            group
-                                .lock()
-                                .context("exhausted retries and failed to lock group")?,
-                        ),
-                        _ => bail!("exhausted retries and unexpected counter overrun"),
-                    };
-
-                    let mut snapshots = match group.list_backups() {
-                        Ok(snapshots) => snapshots,
-                        Err(err) => {
-                            if group.exists() {
-                                return Err(err).context("listing snapshots failed")?;
-                            }
-                            break 'retry;
+                BackupInfo::sort_list(&mut snapshots, true);
+                for snapshot in snapshots.into_iter() {
+                    for file in snapshot.files {
+                        worker.check_abort()?;
+                        worker.fail_on_shutdown()?;
+
+                        match ArchiveType::from_path(&file) {
+                            Ok(ArchiveType::FixedIndex) | Ok(ArchiveType::DynamicIndex) => (),
+                            Ok(ArchiveType::Blob) | Err(_) => continue,
                         }
-                    };
-
-                    // Always start iteration with the last snapshot of the group to reduce race
-                    // window with concurrent backup+prune previous last snapshot. Allows to retry
-                    // without the need to keep track of already processed index files for the
-                    // current group.
-                    BackupInfo::sort_list(&mut snapshots, true);
-                    for (count, snapshot) in snapshots.into_iter().rev().enumerate() {
-                        for file in snapshot.files {
-                            worker.check_abort()?;
-                            worker.fail_on_shutdown()?;
-
-                            match ArchiveType::from_path(&file) {
-                                Ok(ArchiveType::FixedIndex) | Ok(ArchiveType::DynamicIndex) => (),
-                                Ok(ArchiveType::Blob) | Err(_) => continue,
-                            }
 
-                            let mut path = snapshot.backup_dir.full_path();
-                            path.push(file);
-
-                            let index = match self.open_index_reader(&path)? {
-                                Some(index) => index,
-                                None => {
-                                    unprocessed_index_list.remove(&path);
-                                    if count == 0 {
-                                        retry_counter += 1;
-                                        continue 'retry;
-                                    }
-                                    continue;
-                                }
-                            };
-
-                            self.index_mark_used_chunks(
-                                index,
-                                &path,
-                                &mut chunk_lru_cache,
-                                status,
-                                worker,
-                            )?;
-
-                            if !unprocessed_index_list.remove(&path) {
-                                info!("Encountered new index file '{path:?}', increment total index file count");
-                                index_count += 1;
-                            }
+                        let mut path = snapshot.backup_dir.full_path();
+                        path.push(file);
 
-                            let percentage = (processed_index_files + 1) * 100 / index_count;
-                            if percentage > last_percentage {
-                                info!(
-                                    "marked {percentage}% ({} of {index_count} index files)",
-                                    processed_index_files + 1,
-                                );
-                                last_percentage = percentage;
+                        let index = match self.open_index_reader(&path)? {
+                            Some(index) => index,
+                            None => {
+                                unprocessed_index_list.remove(&path);
+                                continue;
                             }
-                            processed_index_files += 1;
+                        };
+
+                        self.index_mark_used_chunks(
+                            index,
+                            &path,
+                            &mut chunk_lru_cache,
+                            status,
+                            worker,
+                        )?;
+
+                        if !unprocessed_index_list.remove(&path) {
+                            info!("Encountered new index file '{path:?}', increment total index file count");
+                            index_count += 1;
                         }
-                    }
 
-                    break;
+                        let percentage = (processed_index_files + 1) * 100 / index_count;
+                        if percentage > last_percentage {
+                            info!(
+                                "marked {percentage}% ({} of {index_count} index files)",
+                                processed_index_files + 1,
+                            );
+                            last_percentage = percentage;
+                        }
+                        processed_index_files += 1;
+                    }
                 }
             }
         }
@@ -1257,6 +1238,14 @@ impl DataStore {
             warn!("Found {strange_paths_count} index files outside of expected directory scheme");
         }
 
+        let trashed_index_list = self.list_index_files(Some(trash))?;
+        for path in trashed_index_list {
+            if let Some(index) = self.open_index_reader(&path)? {
+                info!("Mark chunks for pruned index file found in {path:?}");
+                self.index_mark_used_chunks(index, &path, &mut chunk_lru_cache, status, worker)?;
+            };
+        }
+
         Ok(())
     }
 
-- 
2.39.5





More information about the pbs-devel mailing list