[pbs-devel] [PATCH proxmox-backup v6 1/4] fix #4315: jobs: modify GroupFilter so include/exclude is tracked

Philipp Hufnagl p.hufnagl at proxmox.com
Fri Dec 22 17:39:59 CET 2023


After some discussion I canged the include/exclude behavior to first run
all include filter and after that all exclude filter (rather then
allowing to alternate inbetween). This is done by splitting them into 2
lists, running include first.

A lot of discussion happened how edge cases should be handled and we
came to following conclusion:

no include filter + no exclude filter => include all
some include filter + no exclude filter => filter as always
no include filter +  some exclude filter => include all then exclude

Since a GroupFilter now also features an behavior, the Struct has been
renamed To GroupType (since simply type is a keyword). The new
GroupFilter now has a behaviour as a flag 'is_exclude'.

I considered calling it 'is_include' but a reader later then might not
know what the opposite of 'include' is (do not include?  deactivate?). I
also considered making a new enum 'behaviour' but since there are only 2
values I considered it over engeneered.

Signed-off-by: Philipp Hufnagl <p.hufnagl at proxmox.com>
---
 pbs-api-types/src/datastore.rs | 39 ++++++++++++++++++++------
 pbs-api-types/src/jobs.rs      | 51 +++++++++++++++++++++++++---------
 src/api2/tape/backup.rs        | 39 +++++++++++---------------
 src/server/pull.rs             | 46 +++++++++++++-----------------
 4 files changed, 103 insertions(+), 72 deletions(-)

diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs
index 74f610d1..d3d10a3a 100644
--- a/pbs-api-types/src/datastore.rs
+++ b/pbs-api-types/src/datastore.rs
@@ -10,9 +10,9 @@ use proxmox_schema::{
 };
 
 use crate::{
-    Authid, CryptMode, Fingerprint, MaintenanceMode, Userid, DATASTORE_NOTIFY_STRING_SCHEMA,
-    GC_SCHEDULE_SCHEMA, PROXMOX_SAFE_ID_FORMAT, PRUNE_SCHEDULE_SCHEMA, SHA256_HEX_REGEX,
-    SINGLE_LINE_COMMENT_SCHEMA, UPID,
+    Authid, CryptMode, Fingerprint, GroupFilter, MaintenanceMode, Userid,
+    DATASTORE_NOTIFY_STRING_SCHEMA, GC_SCHEDULE_SCHEMA, PROXMOX_SAFE_ID_FORMAT,
+    PRUNE_SCHEDULE_SCHEMA, SHA256_HEX_REGEX, SINGLE_LINE_COMMENT_SCHEMA, UPID,
 };
 
 const_regex! {
@@ -843,19 +843,40 @@ impl BackupGroup {
     }
 
     pub fn matches(&self, filter: &crate::GroupFilter) -> bool {
-        use crate::GroupFilter;
-
-        match filter {
-            GroupFilter::Group(backup_group) => {
+        use crate::FilterType;
+        match &filter.filter_type {
+            FilterType::Group(backup_group) => {
                 match backup_group.parse::<BackupGroup>() {
                     Ok(group) => *self == group,
                     Err(_) => false, // shouldn't happen if value is schema-checked
                 }
             }
-            GroupFilter::BackupType(ty) => self.ty == *ty,
-            GroupFilter::Regex(regex) => regex.is_match(&self.to_string()),
+            FilterType::BackupType(ty) => self.ty == *ty,
+            FilterType::Regex(regex) => regex.is_match(&self.to_string()),
         }
     }
+
+    pub fn apply_filters(&self, filters: &[GroupFilter]) -> bool {
+        let mut is_match: bool;
+        let includes: Vec<&GroupFilter> = filters.iter().filter(|f| !f.is_exclude).collect();
+        let excludes: Vec<&GroupFilter> = filters.iter().filter(|f| f.is_exclude).collect();
+
+        if includes.is_empty() {
+            is_match = true;
+        } else {
+            is_match = includes.iter().any(|filter| self.matches(filter));
+        }
+
+        // only run exclude if includes matched
+        if is_match {
+            is_match = !excludes
+                .iter()
+                .filter(|f| f.is_exclude)
+                .any(|filter| self.matches(filter));
+        }
+
+        is_match
+    }
 }
 
 impl AsRef<BackupGroup> for BackupGroup {
diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs
index 1f5b3cf1..607451ff 100644
--- a/pbs-api-types/src/jobs.rs
+++ b/pbs-api-types/src/jobs.rs
@@ -388,7 +388,7 @@ pub struct TapeBackupJobStatus {
 
 #[derive(Clone, Debug)]
 /// Filter for matching `BackupGroup`s, for use with `BackupGroup::filter`.
-pub enum GroupFilter {
+pub enum FilterType {
     /// BackupGroup type - either `vm`, `ct`, or `host`.
     BackupType(BackupType),
     /// Full identifier of BackupGroup, including type
@@ -397,7 +397,7 @@ pub enum GroupFilter {
     Regex(Regex),
 }
 
-impl PartialEq for GroupFilter {
+impl PartialEq for FilterType {
     fn eq(&self, other: &Self) -> bool {
         match (self, other) {
             (Self::BackupType(a), Self::BackupType(b)) => a == b,
@@ -408,27 +408,52 @@ impl PartialEq for GroupFilter {
     }
 }
 
+#[derive(Clone, Debug)]
+pub struct GroupFilter {
+    pub is_exclude: bool,
+    pub filter_type: FilterType,
+}
+
+impl PartialEq for GroupFilter {
+    fn eq(&self, other: &Self) -> bool {
+        self.filter_type == other.filter_type && self.is_exclude == other.is_exclude
+    }
+}
+
+impl Eq for GroupFilter {}
+
 impl std::str::FromStr for GroupFilter {
     type Err = anyhow::Error;
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s.split_once(':') {
-            Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| GroupFilter::Group(value.to_string())),
-            Some(("type", value)) => Ok(GroupFilter::BackupType(value.parse()?)),
-            Some(("regex", value)) => Ok(GroupFilter::Regex(Regex::new(value)?)),
+        let (is_exclude, type_str) = match s.split_once(':') {
+            Some(("include", value)) => (false, value),
+            Some(("exclude", value)) => (true, value),
+            _ => (false, s),
+        };
+
+        let filter_type = match type_str.split_once(':') {
+            Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| FilterType::Group(value.to_string())),
+            Some(("type", value)) => Ok(FilterType::BackupType(value.parse()?)),
+            Some(("regex", value)) => Ok(FilterType::Regex(Regex::new(value)?)),
             Some((ty, _value)) => Err(format_err!("expected 'group', 'type' or 'regex' prefix, got '{}'", ty)),
             None => Err(format_err!("input doesn't match expected format '<group:GROUP||type:<vm|ct|host>|regex:REGEX>'")),
-        }.map_err(|err| format_err!("'{}' - {}", s, err))
+        }?;
+        Ok(GroupFilter {
+            is_exclude,
+            filter_type,
+        })
     }
 }
 
 // used for serializing below, caution!
 impl std::fmt::Display for GroupFilter {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            GroupFilter::BackupType(backup_type) => write!(f, "type:{}", backup_type),
-            GroupFilter::Group(backup_group) => write!(f, "group:{}", backup_group),
-            GroupFilter::Regex(regex) => write!(f, "regex:{}", regex.as_str()),
+        let exclude = if self.is_exclude { "exclude:" } else { "" };
+        match &self.filter_type {
+            FilterType::BackupType(backup_type) => write!(f, "{}type:{}", exclude, backup_type),
+            FilterType::Group(backup_group) => write!(f, "{}group:{}", exclude, backup_group),
+            FilterType::Regex(regex) => write!(f, "{}regex:{}", exclude, regex.as_str()),
         }
     }
 }
@@ -441,9 +466,9 @@ fn verify_group_filter(input: &str) -> Result<(), anyhow::Error> {
 }
 
 pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new(
-    "Group filter based on group identifier ('group:GROUP'), group type ('type:<vm|ct|host>'), or regex ('regex:RE').")
+    "Group filter based on group identifier ('group:GROUP'), group type ('type:<vm|ct|host>'), or regex ('regex:RE'). Can be inverted by adding 'exclude:' before.")
     .format(&ApiStringFormat::VerifyFn(verify_group_filter))
-    .type_text("<type:<vm|ct|host>|group:GROUP|regex:RE>")
+    .type_text("[<exclude:|include:>]<type:<vm|ct|host>|group:GROUP|regex:RE>")
     .schema();
 
 pub const GROUP_FILTER_LIST_SCHEMA: Schema =
diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs
index 2f9385a7..28d7e720 100644
--- a/src/api2/tape/backup.rs
+++ b/src/api2/tape/backup.rs
@@ -9,13 +9,13 @@ use proxmox_schema::api;
 use proxmox_sys::{task_log, task_warn, WorkerTaskContext};
 
 use pbs_api_types::{
-    print_ns_and_snapshot, print_store_and_ns, Authid, GroupFilter, MediaPoolConfig, Operation,
+    print_ns_and_snapshot, print_store_and_ns, Authid, MediaPoolConfig, Operation,
     TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, Userid, JOB_ID_SCHEMA,
     PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA,
 };
 
 use pbs_config::CachedUserInfo;
-use pbs_datastore::backup_info::{BackupDir, BackupGroup, BackupInfo};
+use pbs_datastore::backup_info::{BackupDir, BackupInfo};
 use pbs_datastore::{DataStore, StoreProgress};
 use proxmox_rest_server::WorkerTask;
 
@@ -411,31 +411,24 @@ fn backup_worker(
 
     group_list.sort_unstable_by(|a, b| a.group().cmp(b.group()));
 
-    let (group_list, group_count) = if let Some(group_filters) = &setup.group_filter {
-        let filter_fn = |group: &BackupGroup, group_filters: &[GroupFilter]| {
-            group_filters.iter().any(|filter| group.matches(filter))
-        };
+    let group_count_full = group_list.len();
 
-        let group_count_full = group_list.len();
-        let list: Vec<BackupGroup> = group_list
+    let group_list = match &setup.group_filter {
+        Some(f) => group_list
             .into_iter()
-            .filter(|group| filter_fn(group, group_filters))
-            .collect();
-        let group_count = list.len();
-        task_log!(
-            worker,
-            "found {} groups (out of {} total)",
-            group_count,
-            group_count_full
-        );
-        (list, group_count)
-    } else {
-        let group_count = group_list.len();
-        task_log!(worker, "found {} groups", group_count);
-        (group_list, group_count)
+            .filter(|group| group.group().apply_filters(f))
+            .collect(),
+        None => group_list,
     };
 
-    let mut progress = StoreProgress::new(group_count as u64);
+    task_log!(
+        worker,
+        "found {} groups (out of {} total)",
+        group_list.len(),
+        group_count_full
+    );
+
+    let mut progress = StoreProgress::new(group_list.len() as u64);
 
     let latest_only = setup.latest_only.unwrap_or(false);
 
diff --git a/src/server/pull.rs b/src/server/pull.rs
index 3b71c156..b60f3a1e 100644
--- a/src/server/pull.rs
+++ b/src/server/pull.rs
@@ -486,7 +486,7 @@ pub(crate) struct PullParameters {
     /// How many levels of sub-namespaces to pull (0 == no recursion, None == maximum recursion)
     max_depth: Option<usize>,
     /// Filters for reducing the pull scope
-    group_filter: Option<Vec<GroupFilter>>,
+    group_filter: Vec<GroupFilter>,
     /// How many snapshots should be transferred at most (taking the newest N snapshots)
     transfer_last: Option<usize>,
 }
@@ -539,6 +539,11 @@ impl PullParameters {
             ns,
         };
 
+        let group_filter = match group_filter {
+            Some(f) => f,
+            None => Vec::<GroupFilter>::new(),
+        };
+
         Ok(Self {
             source,
             target,
@@ -1358,7 +1363,6 @@ pub(crate) async fn pull_ns(
 ) -> Result<(StoreProgress, bool), Error> {
     let mut list: Vec<BackupGroup> = params.source.list_groups(namespace, &params.owner).await?;
 
-    let total_count = list.len();
     list.sort_unstable_by(|a, b| {
         let type_order = a.ty.cmp(&b.ty);
         if type_order == std::cmp::Ordering::Equal {
@@ -1368,27 +1372,17 @@ pub(crate) async fn pull_ns(
         }
     });
 
-    let apply_filters = |group: &BackupGroup, filters: &[GroupFilter]| -> bool {
-        filters.iter().any(|filter| group.matches(filter))
-    };
-
-    let list = if let Some(ref group_filter) = &params.group_filter {
-        let unfiltered_count = list.len();
-        let list: Vec<BackupGroup> = list
-            .into_iter()
-            .filter(|group| apply_filters(group, group_filter))
-            .collect();
-        task_log!(
-            worker,
-            "found {} groups to sync (out of {} total)",
-            list.len(),
-            unfiltered_count
-        );
-        list
-    } else {
-        task_log!(worker, "found {} groups to sync", total_count);
-        list
-    };
+    let unfiltered_count = list.len();
+    let list: Vec<BackupGroup> = list
+        .into_iter()
+        .filter(|group| group.apply_filters(&params.group_filter))
+        .collect();
+    task_log!(
+        worker,
+        "found {} groups to sync (out of {} total)",
+        list.len(),
+        unfiltered_count
+    );
 
     let mut errors = false;
 
@@ -1457,10 +1451,8 @@ pub(crate) async fn pull_ns(
                 if check_backup_owner(&owner, &params.owner).is_err() {
                     continue;
                 }
-                if let Some(ref group_filter) = &params.group_filter {
-                    if !apply_filters(local_group, group_filter) {
-                        continue;
-                    }
+                if !local_group.apply_filters(&params.group_filter) {
+                    continue;
                 }
                 task_log!(worker, "delete vanished group '{local_group}'",);
                 match params
-- 
2.39.2





More information about the pbs-devel mailing list