[pbs-devel] [PATCH proxmox-backup v2 2/7] refactor prune.rs - add PruneJob struct for handling prune jobs

Stefan Hanreich s.hanreich at proxmox.com
Wed Nov 30 16:00:57 CET 2022


This encapsulates the previously available methods into a struct that is
responsible for creating PruneJobs. This struct can be used to create a
new, configurable PruneJob. It can handle several configuration options
that needed to be handled by the call sites before. Moving logging and
dry-run into this struct reduces the need for manually writing code for
those two features. Additionally the log messages are unified across all
instances where pruning is used.

This commit only introduces the PruneJob struct, the callsites are only
slightly adjusted so they call the compute_prune_info static method now.
They will be refactored in future commits to use the newly exposed
functionality from PruneJob.

Signed-off-by: Stefan Hanreich <s.hanreich at proxmox.com>
---
 pbs-datastore/src/prune.rs  | 279 ++++++++++++++++++++++--------------
 src/api2/admin/datastore.rs |   4 +-
 src/server/prune_job.rs     |   4 +-
 tests/prune.rs              |   4 +-
 4 files changed, 179 insertions(+), 112 deletions(-)

diff --git a/pbs-datastore/src/prune.rs b/pbs-datastore/src/prune.rs
index 96da5826..b840fef6 100644
--- a/pbs-datastore/src/prune.rs
+++ b/pbs-datastore/src/prune.rs
@@ -1,7 +1,9 @@
 use std::collections::{HashMap, HashSet};
 use std::path::PathBuf;
+use std::sync::Arc;
 
 use anyhow::Error;
+use proxmox_sys::{task_log, task_warn, WorkerTaskContext};
 
 use pbs_api_types::KeepOptions;
 
@@ -36,137 +38,202 @@ impl std::fmt::Display for PruneMark {
     }
 }
 
-fn mark_selections<F: Fn(&BackupInfo) -> Result<String, Error>>(
-    mark: &mut HashMap<PathBuf, PruneMark>,
-    list: &[BackupInfo],
-    keep: usize,
-    select_id: F,
-) -> Result<(), Error> {
-    let mut include_hash = HashSet::new();
-
-    let mut already_included = HashSet::new();
-    for info in list {
-        let backup_id = info.backup_dir.relative_path();
-        if let Some(PruneMark::Keep) = mark.get(&backup_id) {
-            let sel_id: String = select_id(info)?;
-            already_included.insert(sel_id);
-        }
+pub struct PruneJob {
+    marks: Vec<(BackupInfo, PruneMark)>,
+    dry_run: bool,
+    worker: Option<Arc<dyn WorkerTaskContext>>,
+}
+
+impl PruneJob {
+    pub fn new(list: Vec<BackupInfo>, options: &KeepOptions) -> Result<Self, Error> {
+        let mut prune_info = PruneJob::compute_prune_info(list, options)?;
+        prune_info.reverse();
+
+        Ok(Self {
+            marks: prune_info,
+            dry_run: false,
+            worker: None,
+        })
     }
 
-    for info in list {
-        let backup_id = info.backup_dir.relative_path();
-        if mark.get(&backup_id).is_some() {
-            continue;
-        }
-        if info.protected {
-            mark.insert(backup_id, PruneMark::Protected);
-            continue;
-        }
-        let sel_id: String = select_id(info)?;
+    pub fn logging(&mut self, worker: Arc<dyn WorkerTaskContext>) -> &mut Self {
+        self.worker = Some(worker);
+        self
+    }
 
-        if already_included.contains(&sel_id) {
-            continue;
-        }
+    pub fn dry_run(&mut self, enabled: bool) -> &mut Self {
+        self.dry_run = enabled;
+        self
+    }
+
+    pub fn run(&self) {
+        self.run_with_callback(|_, _, _| ())
+    }
 
-        if !include_hash.contains(&sel_id) {
-            if include_hash.len() >= keep {
-                break;
+    pub fn run_with_callback<E>(&self, callback: E)
+    where
+        E: Fn(&BackupInfo, &PruneMark, Result<(), Error>),
+    {
+        for (info, mark) in &self.marks {
+            if let Some(worker) = &self.worker {
+                task_log!(
+                    worker,
+                    "{}{} {}/{}/{}",
+                    if self.dry_run { "would " } else { "" },
+                    mark,
+                    info.backup_dir.backup_type(),
+                    info.backup_dir.backup_id(),
+                    info.backup_dir.backup_time_string()
+                );
             }
-            include_hash.insert(sel_id);
-            mark.insert(backup_id, PruneMark::Keep);
-        } else {
-            mark.insert(backup_id, PruneMark::Remove);
+
+            let mut result = Ok(());
+
+            if !self.dry_run && !mark.keep() {
+                result = info.backup_dir.destroy(false);
+
+                if let (Err(err), Some(worker)) = (&result, &self.worker) {
+                    let path = info.backup_dir.relative_path();
+                    task_warn!(worker, "failed to remove dir {path:?}: {err}")
+                }
+            }
+
+            callback(info, mark, result);
         }
     }
 
-    Ok(())
-}
+    fn mark_selections<E: Fn(&BackupInfo) -> Result<String, Error>>(
+        mark: &mut HashMap<PathBuf, PruneMark>,
+        list: &[BackupInfo],
+        keep: usize,
+        select_id: E,
+    ) -> Result<(), Error> {
+        let mut include_hash = HashSet::new();
 
-fn remove_incomplete_snapshots(mark: &mut HashMap<PathBuf, PruneMark>, list: &[BackupInfo]) {
-    let mut keep_unfinished = true;
-    for info in list.iter() {
-        // backup is considered unfinished if there is no manifest
-        if info.is_finished() {
-            // There is a new finished backup, so there is no need
-            // to keep older unfinished backups.
-            keep_unfinished = false;
-        } else {
+        let mut already_included = HashSet::new();
+        for info in list {
             let backup_id = info.backup_dir.relative_path();
-            if keep_unfinished {
-                // keep first unfinished
-                mark.insert(backup_id, PruneMark::KeepPartial);
+            if let Some(PruneMark::Keep) = mark.get(&backup_id) {
+                let sel_id: String = select_id(info)?;
+                already_included.insert(sel_id);
+            }
+        }
+
+        for info in list {
+            let backup_id = info.backup_dir.relative_path();
+            if mark.get(&backup_id).is_some() {
+                continue;
+            }
+            if info.protected {
+                mark.insert(backup_id, PruneMark::Protected);
+                continue;
+            }
+            let sel_id: String = select_id(info)?;
+
+            if already_included.contains(&sel_id) {
+                continue;
+            }
+
+            if !include_hash.contains(&sel_id) {
+                if include_hash.len() >= keep {
+                    break;
+                }
+                include_hash.insert(sel_id);
+                mark.insert(backup_id, PruneMark::Keep);
             } else {
                 mark.insert(backup_id, PruneMark::Remove);
             }
-            keep_unfinished = false;
         }
-    }
-}
 
-/// This filters incomplete and kept backups.
-pub fn compute_prune_info(
-    mut list: Vec<BackupInfo>,
-    options: &KeepOptions,
-) -> Result<Vec<(BackupInfo, PruneMark)>, Error> {
-    let mut mark = HashMap::new();
+        Ok(())
+    }
 
-    BackupInfo::sort_list(&mut list, false);
+    fn remove_incomplete_snapshots(mark: &mut HashMap<PathBuf, PruneMark>, list: &[BackupInfo]) {
+        let mut keep_unfinished = true;
+        for info in list.iter() {
+            // backup is considered unfinished if there is no manifest
+            if info.is_finished() {
+                // There is a new finished backup, so there is no need
+                // to keep older unfinished backups.
+                keep_unfinished = false;
+            } else {
+                let backup_id = info.backup_dir.relative_path();
+                if keep_unfinished {
+                    // keep first unfinished
+                    mark.insert(backup_id, PruneMark::KeepPartial);
+                } else {
+                    mark.insert(backup_id, PruneMark::Remove);
+                }
+                keep_unfinished = false;
+            }
+        }
+    }
 
-    remove_incomplete_snapshots(&mut mark, &list);
+    /// This filters incomplete and kept backups.
+    pub fn compute_prune_info(
+        mut list: Vec<BackupInfo>,
+        options: &KeepOptions,
+    ) -> Result<Vec<(BackupInfo, PruneMark)>, Error> {
+        let mut mark = HashMap::new();
 
-    if let Some(keep_last) = options.keep_last {
-        mark_selections(&mut mark, &list, keep_last as usize, |info| {
-            Ok(info.backup_dir.backup_time_string().to_owned())
-        })?;
-    }
+        BackupInfo::sort_list(&mut list, false);
 
-    use proxmox_time::strftime_local;
+        Self::remove_incomplete_snapshots(&mut mark, &list);
 
-    if let Some(keep_hourly) = options.keep_hourly {
-        mark_selections(&mut mark, &list, keep_hourly as usize, |info| {
-            strftime_local("%Y/%m/%d/%H", info.backup_dir.backup_time()).map_err(Error::from)
-        })?;
-    }
+        if let Some(keep_last) = options.keep_last {
+            Self::mark_selections(&mut mark, &list, keep_last as usize, |info| {
+                Ok(info.backup_dir.backup_time_string().to_owned())
+            })?;
+        }
 
-    if let Some(keep_daily) = options.keep_daily {
-        mark_selections(&mut mark, &list, keep_daily as usize, |info| {
-            strftime_local("%Y/%m/%d", info.backup_dir.backup_time()).map_err(Error::from)
-        })?;
-    }
+        use proxmox_time::strftime_local;
 
-    if let Some(keep_weekly) = options.keep_weekly {
-        mark_selections(&mut mark, &list, keep_weekly as usize, |info| {
-            // Note: Use iso-week year/week here. This year number
-            // might not match the calendar year number.
-            strftime_local("%G/%V", info.backup_dir.backup_time()).map_err(Error::from)
-        })?;
-    }
+        if let Some(keep_hourly) = options.keep_hourly {
+            Self::mark_selections(&mut mark, &list, keep_hourly as usize, |info| {
+                strftime_local("%Y/%m/%d/%H", info.backup_dir.backup_time()).map_err(Error::from)
+            })?;
+        }
 
-    if let Some(keep_monthly) = options.keep_monthly {
-        mark_selections(&mut mark, &list, keep_monthly as usize, |info| {
-            strftime_local("%Y/%m", info.backup_dir.backup_time()).map_err(Error::from)
-        })?;
-    }
+        if let Some(keep_daily) = options.keep_daily {
+            Self::mark_selections(&mut mark, &list, keep_daily as usize, |info| {
+                strftime_local("%Y/%m/%d", info.backup_dir.backup_time()).map_err(Error::from)
+            })?;
+        }
 
-    if let Some(keep_yearly) = options.keep_yearly {
-        mark_selections(&mut mark, &list, keep_yearly as usize, |info| {
-            strftime_local("%Y", info.backup_dir.backup_time()).map_err(Error::from)
-        })?;
-    }
+        if let Some(keep_weekly) = options.keep_weekly {
+            Self::mark_selections(&mut mark, &list, keep_weekly as usize, |info| {
+                // Note: Use iso-week year/week here. This year number
+                // might not match the calendar year number.
+                strftime_local("%G/%V", info.backup_dir.backup_time()).map_err(Error::from)
+            })?;
+        }
 
-    let prune_info: Vec<(BackupInfo, PruneMark)> = list
-        .into_iter()
-        .map(|info| {
-            let backup_id = info.backup_dir.relative_path();
-            let mark = if info.protected {
-                PruneMark::Protected
-            } else {
-                mark.get(&backup_id).copied().unwrap_or(PruneMark::Remove)
-            };
+        if let Some(keep_monthly) = options.keep_monthly {
+            Self::mark_selections(&mut mark, &list, keep_monthly as usize, |info| {
+                strftime_local("%Y/%m", info.backup_dir.backup_time()).map_err(Error::from)
+            })?;
+        }
 
-            (info, mark)
-        })
-        .collect();
+        if let Some(keep_yearly) = options.keep_yearly {
+            Self::mark_selections(&mut mark, &list, keep_yearly as usize, |info| {
+                strftime_local("%Y", info.backup_dir.backup_time()).map_err(Error::from)
+            })?;
+        }
 
-    Ok(prune_info)
+        let prune_info: Vec<(BackupInfo, PruneMark)> = list
+            .into_iter()
+            .map(|info| {
+                let backup_id = info.backup_dir.relative_path();
+                let mark = if info.protected {
+                    PruneMark::Protected
+                } else {
+                    mark.get(&backup_id).copied().unwrap_or(PruneMark::Remove)
+                };
+
+                (info, mark)
+            })
+            .collect();
+
+        Ok(prune_info)
+    }
 }
diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
index c8e86b07..6797844e 100644
--- a/src/api2/admin/datastore.rs
+++ b/src/api2/admin/datastore.rs
@@ -52,7 +52,7 @@ use pbs_datastore::dynamic_index::{BufferedDynamicReader, DynamicIndexReader, Lo
 use pbs_datastore::fixed_index::FixedIndexReader;
 use pbs_datastore::index::IndexFile;
 use pbs_datastore::manifest::{BackupManifest, CLIENT_LOG_BLOB_NAME, MANIFEST_BLOB_NAME};
-use pbs_datastore::prune::compute_prune_info;
+use pbs_datastore::prune::PruneJob;
 use pbs_datastore::{
     check_backup_owner, task_tracking, BackupDir, BackupGroup, DataStore, LocalChunkReader,
     StoreProgress, CATALOG_NAME,
@@ -982,7 +982,7 @@ pub fn prune(
 
     let list = group.list_backups()?;
 
-    let mut prune_info = compute_prune_info(list, &keep_options)?;
+    let mut prune_info = PruneJob::compute_prune_info(list, &keep_options)?;
 
     prune_info.reverse(); // delete older snapshots first
 
diff --git a/src/server/prune_job.rs b/src/server/prune_job.rs
index 2de34973..b9f7ebdf 100644
--- a/src/server/prune_job.rs
+++ b/src/server/prune_job.rs
@@ -8,7 +8,7 @@ use pbs_api_types::{
     print_store_and_ns, Authid, KeepOptions, Operation, PruneJobOptions, MAX_NAMESPACE_DEPTH,
     PRIV_DATASTORE_MODIFY, PRIV_DATASTORE_PRUNE,
 };
-use pbs_datastore::prune::compute_prune_info;
+use pbs_datastore::prune::PruneJob;
 use pbs_datastore::DataStore;
 use proxmox_rest_server::WorkerTask;
 
@@ -58,7 +58,7 @@ pub fn prune_datastore(
         let ns = group.backup_ns();
         let list = group.list_backups()?;
 
-        let mut prune_info = compute_prune_info(list, &prune_options.keep)?;
+        let mut prune_info = PruneJob::compute_prune_info(list, &prune_options.keep)?;
         prune_info.reverse(); // delete older snapshots first
 
         task_log!(
diff --git a/tests/prune.rs b/tests/prune.rs
index 3b320969..47e99841 100644
--- a/tests/prune.rs
+++ b/tests/prune.rs
@@ -4,7 +4,7 @@ use anyhow::Error;
 
 use pbs_api_types::PruneJobOptions;
 use pbs_datastore::manifest::MANIFEST_BLOB_NAME;
-use pbs_datastore::prune::compute_prune_info;
+use pbs_datastore::prune::PruneJob;
 use pbs_datastore::{BackupDir, BackupInfo};
 
 fn get_prune_list(
@@ -12,7 +12,7 @@ fn get_prune_list(
     return_kept: bool,
     options: &PruneJobOptions,
 ) -> Vec<PathBuf> {
-    let mut prune_info = compute_prune_info(list, &options.keep).unwrap();
+    let mut prune_info = PruneJob::compute_prune_info(list, &options.keep).unwrap();
 
     prune_info.reverse();
 
-- 
2.30.2





More information about the pbs-devel mailing list