[pbs-devel] [PATCH proxmox-backup v5 1/4] fix #4315: jobs: modify GroupFilter so include/exclude is tracked

Philipp Hufnagl p.hufnagl at proxmox.com
Mon Dec 18 16:36:35 CET 2023


After some discussion I canged the include/exclude behavior to first run
all include filter and after that all exclude filter (rather then
allowing to alternate inbetween). This is done by splitting them into 2
lists, running include first.

A lot of discussion happened how edge cases should be handled and we
came to following conclusion:

no include filter + no exclude filter => include all
some include filter + no exclude filter => filter as always
no include filter +  some exclude filter => include all then exclude

Since a GroupFilter now also features an behavior, the Struct has been
renamed To GroupType (since simply type is a keyword). The new
GroupFilter now has a behaviour as a flag 'is_exclude'.

I considered calling it 'is_include' but a reader later then might not
know what the opposite of 'include' is (do not include?  deactivate?). I
also considered making a new enum 'behaviour' but since there are only 2
values I considered it over engeneered.

Signed-off-by: Philipp Hufnagl <p.hufnagl at proxmox.com>
---
 pbs-api-types/src/datastore.rs | 11 ++---
 pbs-api-types/src/jobs.rs      | 85 ++++++++++++++++++++++++++++------
 src/api2/tape/backup.rs        | 44 +++++++-----------
 src/server/pull.rs             | 55 ++++++++++------------
 4 files changed, 118 insertions(+), 77 deletions(-)

diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs
index 74f610d1..673b5bcd 100644
--- a/pbs-api-types/src/datastore.rs
+++ b/pbs-api-types/src/datastore.rs
@@ -843,17 +843,16 @@ impl BackupGroup {
     }
 
     pub fn matches(&self, filter: &crate::GroupFilter) -> bool {
-        use crate::GroupFilter;
-
-        match filter {
-            GroupFilter::Group(backup_group) => {
+        use crate::FilterType;
+        match &filter.filter_type {
+            FilterType::Group(backup_group) => {
                 match backup_group.parse::<BackupGroup>() {
                     Ok(group) => *self == group,
                     Err(_) => false, // shouldn't happen if value is schema-checked
                 }
             }
-            GroupFilter::BackupType(ty) => self.ty == *ty,
-            GroupFilter::Regex(regex) => regex.is_match(&self.to_string()),
+            FilterType::BackupType(ty) => self.ty == *ty,
+            FilterType::Regex(regex) => regex.is_match(&self.to_string()),
         }
     }
 }
diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs
index 1f5b3cf1..982f08a6 100644
--- a/pbs-api-types/src/jobs.rs
+++ b/pbs-api-types/src/jobs.rs
@@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
 use proxmox_schema::*;
 
 use crate::{
-    Authid, BackupNamespace, BackupType, RateLimitConfig, Userid, BACKUP_GROUP_SCHEMA,
+    Authid, BackupGroup, BackupNamespace, BackupType, RateLimitConfig, Userid, BACKUP_GROUP_SCHEMA,
     BACKUP_NAMESPACE_SCHEMA, DATASTORE_SCHEMA, DRIVE_NAME_SCHEMA, MEDIA_POOL_NAME_SCHEMA,
     NS_MAX_DEPTH_REDUCED_SCHEMA, PROXMOX_SAFE_ID_FORMAT, REMOTE_ID_SCHEMA,
     SINGLE_LINE_COMMENT_SCHEMA,
@@ -388,7 +388,7 @@ pub struct TapeBackupJobStatus {
 
 #[derive(Clone, Debug)]
 /// Filter for matching `BackupGroup`s, for use with `BackupGroup::filter`.
-pub enum GroupFilter {
+pub enum FilterType {
     /// BackupGroup type - either `vm`, `ct`, or `host`.
     BackupType(BackupType),
     /// Full identifier of BackupGroup, including type
@@ -397,7 +397,7 @@ pub enum GroupFilter {
     Regex(Regex),
 }
 
-impl PartialEq for GroupFilter {
+impl PartialEq for FilterType {
     fn eq(&self, other: &Self) -> bool {
         match (self, other) {
             (Self::BackupType(a), Self::BackupType(b)) => a == b,
@@ -408,27 +408,52 @@ impl PartialEq for GroupFilter {
     }
 }
 
+#[derive(Clone, Debug)]
+pub struct GroupFilter {
+    pub is_exclude: bool,
+    pub filter_type: FilterType,
+}
+
+impl PartialEq for GroupFilter {
+    fn eq(&self, other: &Self) -> bool {
+        self.filter_type == other.filter_type && self.is_exclude == other.is_exclude
+    }
+}
+
+impl Eq for GroupFilter {}
+
 impl std::str::FromStr for GroupFilter {
     type Err = anyhow::Error;
 
     fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s.split_once(':') {
-            Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| GroupFilter::Group(value.to_string())),
-            Some(("type", value)) => Ok(GroupFilter::BackupType(value.parse()?)),
-            Some(("regex", value)) => Ok(GroupFilter::Regex(Regex::new(value)?)),
+        let (is_exclude, type_str) = match s.split_once(':') {
+            Some(("include", value)) => (false, value),
+            Some(("exclude", value)) => (true, value),
+            _ => (false, s),
+        };
+
+        let filter_type = match type_str.split_once(':') {
+            Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| FilterType::Group(value.to_string())),
+            Some(("type", value)) => Ok(FilterType::BackupType(value.parse()?)),
+            Some(("regex", value)) => Ok(FilterType::Regex(Regex::new(value)?)),
             Some((ty, _value)) => Err(format_err!("expected 'group', 'type' or 'regex' prefix, got '{}'", ty)),
             None => Err(format_err!("input doesn't match expected format '<group:GROUP||type:<vm|ct|host>|regex:REGEX>'")),
-        }.map_err(|err| format_err!("'{}' - {}", s, err))
+        }?;
+        Ok(GroupFilter {
+            is_exclude,
+            filter_type,
+        })
     }
 }
 
 // used for serializing below, caution!
 impl std::fmt::Display for GroupFilter {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            GroupFilter::BackupType(backup_type) => write!(f, "type:{}", backup_type),
-            GroupFilter::Group(backup_group) => write!(f, "group:{}", backup_group),
-            GroupFilter::Regex(regex) => write!(f, "regex:{}", regex.as_str()),
+        let exclude = if self.is_exclude { "exclude:" } else { "" };
+        match &self.filter_type {
+            FilterType::BackupType(backup_type) => write!(f, "{}type:{}", exclude, backup_type),
+            FilterType::Group(backup_group) => write!(f, "{}group:{}", exclude, backup_group),
+            FilterType::Regex(regex) => write!(f, "{}regex:{}", exclude, regex.as_str()),
         }
     }
 }
@@ -440,10 +465,42 @@ fn verify_group_filter(input: &str) -> Result<(), anyhow::Error> {
     GroupFilter::from_str(input).map(|_| ())
 }
 
+pub fn split_by_include_exclude(
+    all_filter: Option<Vec<GroupFilter>>,
+) -> (Vec<GroupFilter>, Vec<GroupFilter>) {
+    if let Some(all_filter) = all_filter {
+        all_filter
+            .into_iter()
+            .partition(|filter| !filter.is_exclude)
+    } else {
+        (Vec::new(), Vec::new())
+    }
+}
+
+pub fn apply_filters(
+    group: &BackupGroup,
+    include_filters: &[GroupFilter],
+    exclude_filters: &[GroupFilter],
+) -> bool {
+    let mut is_match: bool;
+
+    if include_filters.is_empty() {
+        is_match = true;
+    } else {
+        is_match = include_filters.iter().any(|filter| group.matches(filter));
+    }
+
+    if !exclude_filters.is_empty() && is_match {
+        is_match = !exclude_filters.iter().any(|filter| group.matches(filter));
+    }
+
+    is_match
+}
+
 pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new(
-    "Group filter based on group identifier ('group:GROUP'), group type ('type:<vm|ct|host>'), or regex ('regex:RE').")
+    "Group filter based on group identifier ('group:GROUP'), group type ('type:<vm|ct|host>'), or regex ('regex:RE'). Can be inverted by adding 'exclude:' before.")
     .format(&ApiStringFormat::VerifyFn(verify_group_filter))
-    .type_text("<type:<vm|ct|host>|group:GROUP|regex:RE>")
+    .type_text("[<exclude:|include:>]<type:<vm|ct|host>|group:GROUP|regex:RE>")
     .schema();
 
 pub const GROUP_FILTER_LIST_SCHEMA: Schema =
diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs
index 2f9385a7..1e2953c4 100644
--- a/src/api2/tape/backup.rs
+++ b/src/api2/tape/backup.rs
@@ -9,9 +9,9 @@ use proxmox_schema::api;
 use proxmox_sys::{task_log, task_warn, WorkerTaskContext};
 
 use pbs_api_types::{
-    print_ns_and_snapshot, print_store_and_ns, Authid, GroupFilter, MediaPoolConfig, Operation,
-    TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, Userid, JOB_ID_SCHEMA,
-    PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA,
+    apply_filters, print_ns_and_snapshot, print_store_and_ns, split_by_include_exclude, Authid,
+    MediaPoolConfig, Operation, TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus,
+    Userid, JOB_ID_SCHEMA, PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA,
 };
 
 use pbs_config::CachedUserInfo;
@@ -411,31 +411,23 @@ fn backup_worker(
 
     group_list.sort_unstable_by(|a, b| a.group().cmp(b.group()));
 
-    let (group_list, group_count) = if let Some(group_filters) = &setup.group_filter {
-        let filter_fn = |group: &BackupGroup, group_filters: &[GroupFilter]| {
-            group_filters.iter().any(|filter| group.matches(filter))
-        };
+    let (include_filter, exclude_filter) = split_by_include_exclude(setup.group_filter.clone());
 
-        let group_count_full = group_list.len();
-        let list: Vec<BackupGroup> = group_list
-            .into_iter()
-            .filter(|group| filter_fn(group, group_filters))
-            .collect();
-        let group_count = list.len();
-        task_log!(
-            worker,
-            "found {} groups (out of {} total)",
-            group_count,
-            group_count_full
-        );
-        (list, group_count)
-    } else {
-        let group_count = group_list.len();
-        task_log!(worker, "found {} groups", group_count);
-        (group_list, group_count)
-    };
+    let group_count_full = group_list.len();
+    // if there are only exclude filter, inculude everything
+    let group_list: Vec<BackupGroup> = group_list
+        .into_iter()
+        .filter(|group| apply_filters(group.group(), &include_filter, &exclude_filter))
+        .collect();
+
+    task_log!(
+        worker,
+        "found {} groups (out of {} total)",
+        group_list.len(),
+        group_count_full
+    );
 
-    let mut progress = StoreProgress::new(group_count as u64);
+    let mut progress = StoreProgress::new(group_list.len() as u64);
 
     let latest_only = setup.latest_only.unwrap_or(false);
 
diff --git a/src/server/pull.rs b/src/server/pull.rs
index 3b71c156..e458008b 100644
--- a/src/server/pull.rs
+++ b/src/server/pull.rs
@@ -15,9 +15,10 @@ use proxmox_sys::{task_log, task_warn};
 use serde_json::json;
 
 use pbs_api_types::{
-    print_store_and_ns, Authid, BackupDir, BackupGroup, BackupNamespace, CryptMode, GroupFilter,
-    GroupListItem, Operation, RateLimitConfig, Remote, SnapshotListItem, MAX_NAMESPACE_DEPTH,
-    PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_READ,
+    apply_filters, print_store_and_ns, split_by_include_exclude, Authid, BackupDir, BackupGroup,
+    BackupNamespace, CryptMode, GroupFilter, GroupListItem, Operation, RateLimitConfig, Remote,
+    SnapshotListItem, MAX_NAMESPACE_DEPTH, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP,
+    PRIV_DATASTORE_READ,
 };
 use pbs_client::{BackupReader, BackupRepository, HttpClient, RemoteChunkReader};
 use pbs_config::CachedUserInfo;
@@ -486,7 +487,8 @@ pub(crate) struct PullParameters {
     /// How many levels of sub-namespaces to pull (0 == no recursion, None == maximum recursion)
     max_depth: Option<usize>,
     /// Filters for reducing the pull scope
-    group_filter: Option<Vec<GroupFilter>>,
+    include_filter: Vec<GroupFilter>,
+    exclude_filter: Vec<GroupFilter>,
     /// How many snapshots should be transferred at most (taking the newest N snapshots)
     transfer_last: Option<usize>,
 }
@@ -539,13 +541,16 @@ impl PullParameters {
             ns,
         };
 
+        let (include_filter, exclude_filter) = split_by_include_exclude(group_filter);
+
         Ok(Self {
             source,
             target,
             owner,
             remove_vanished,
             max_depth,
-            group_filter,
+            include_filter,
+            exclude_filter,
             transfer_last,
         })
     }
@@ -1358,7 +1363,6 @@ pub(crate) async fn pull_ns(
 ) -> Result<(StoreProgress, bool), Error> {
     let mut list: Vec<BackupGroup> = params.source.list_groups(namespace, &params.owner).await?;
 
-    let total_count = list.len();
     list.sort_unstable_by(|a, b| {
         let type_order = a.ty.cmp(&b.ty);
         if type_order == std::cmp::Ordering::Equal {
@@ -1368,27 +1372,18 @@ pub(crate) async fn pull_ns(
         }
     });
 
-    let apply_filters = |group: &BackupGroup, filters: &[GroupFilter]| -> bool {
-        filters.iter().any(|filter| group.matches(filter))
-    };
-
-    let list = if let Some(ref group_filter) = &params.group_filter {
-        let unfiltered_count = list.len();
-        let list: Vec<BackupGroup> = list
-            .into_iter()
-            .filter(|group| apply_filters(group, group_filter))
-            .collect();
-        task_log!(
-            worker,
-            "found {} groups to sync (out of {} total)",
-            list.len(),
-            unfiltered_count
-        );
-        list
-    } else {
-        task_log!(worker, "found {} groups to sync", total_count);
-        list
-    };
+    // if there are only exclude filter, inculude everything
+    let unfiltered_count = list.len();
+    let list: Vec<BackupGroup> = list
+        .into_iter()
+        .filter(|group| apply_filters(&group, &params.include_filter, &params.exclude_filter))
+        .collect();
+    task_log!(
+        worker,
+        "found {} groups to sync (out of {} total)",
+        list.len(),
+        unfiltered_count
+    );
 
     let mut errors = false;
 
@@ -1457,10 +1452,8 @@ pub(crate) async fn pull_ns(
                 if check_backup_owner(&owner, &params.owner).is_err() {
                     continue;
                 }
-                if let Some(ref group_filter) = &params.group_filter {
-                    if !apply_filters(local_group, group_filter) {
-                        continue;
-                    }
+                if !apply_filters(&local_group, &params.include_filter, &params.include_filter) {
+                    continue;
                 }
                 task_log!(worker, "delete vanished group '{local_group}'",);
                 match params
-- 
2.39.2





More information about the pbs-devel mailing list