[pbs-devel] [PATCH proxmox-backup v5 1/4] fix #4315: jobs: modify GroupFilter so include/exclude is tracked

Lukas Wagner l.wagner at proxmox.com
Tue Dec 19 14:22:59 CET 2023


Hey Philipp,

some comments inline. I quite like the changes compared to the last 
version, but I think there are some small aspects that could still be 
improved before this goes in.

On 12/18/23 16:36, Philipp Hufnagl wrote:
> After some discussion I canged the include/exclude behavior to first run
> all include filter and after that all exclude filter (rather then
> allowing to alternate inbetween). This is done by splitting them into 2
> lists, running include first.
> 
> A lot of discussion happened how edge cases should be handled and we
> came to following conclusion:
> 
> no include filter + no exclude filter => include all
> some include filter + no exclude filter => filter as always
> no include filter +  some exclude filter => include all then exclude
> 
> Since a GroupFilter now also features an behavior, the Struct has been
> renamed To GroupType (since simply type is a keyword). The new
> GroupFilter now has a behaviour as a flag 'is_exclude'.
> 
> I considered calling it 'is_include' but a reader later then might not
> know what the opposite of 'include' is (do not include?  deactivate?). I
> also considered making a new enum 'behaviour' but since there are only 2
> values I considered it over engeneered.
> 
> Signed-off-by: Philipp Hufnagl <p.hufnagl at proxmox.com>
> ---
>   pbs-api-types/src/datastore.rs | 11 ++---
>   pbs-api-types/src/jobs.rs      | 85 ++++++++++++++++++++++++++++------
>   src/api2/tape/backup.rs        | 44 +++++++-----------
>   src/server/pull.rs             | 55 ++++++++++------------
>   4 files changed, 118 insertions(+), 77 deletions(-)
> 
> diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs
> index 74f610d1..673b5bcd 100644
> --- a/pbs-api-types/src/datastore.rs
> +++ b/pbs-api-types/src/datastore.rs
> @@ -843,17 +843,16 @@ impl BackupGroup {
>       }
>   
>       pub fn matches(&self, filter: &crate::GroupFilter) -> bool {
> -        use crate::GroupFilter;
> -
> -        match filter {
> -            GroupFilter::Group(backup_group) => {
> +        use crate::FilterType;
> +        match &filter.filter_type {
> +            FilterType::Group(backup_group) => {
>                   match backup_group.parse::<BackupGroup>() {
>                       Ok(group) => *self == group,
>                       Err(_) => false, // shouldn't happen if value is schema-checked
>                   }
>               }
> -            GroupFilter::BackupType(ty) => self.ty == *ty,
> -            GroupFilter::Regex(regex) => regex.is_match(&self.to_string()),
> +            FilterType::BackupType(ty) => self.ty == *ty,
> +            FilterType::Regex(regex) => regex.is_match(&self.to_string()),
>           }
>       }
>   }
> diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs
> index 1f5b3cf1..982f08a6 100644
> --- a/pbs-api-types/src/jobs.rs
> +++ b/pbs-api-types/src/jobs.rs
> @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
>   use proxmox_schema::*;
>   
>   use crate::{
> -    Authid, BackupNamespace, BackupType, RateLimitConfig, Userid, BACKUP_GROUP_SCHEMA,
> +    Authid, BackupGroup, BackupNamespace, BackupType, RateLimitConfig, Userid, BACKUP_GROUP_SCHEMA,
>       BACKUP_NAMESPACE_SCHEMA, DATASTORE_SCHEMA, DRIVE_NAME_SCHEMA, MEDIA_POOL_NAME_SCHEMA,
>       NS_MAX_DEPTH_REDUCED_SCHEMA, PROXMOX_SAFE_ID_FORMAT, REMOTE_ID_SCHEMA,
>       SINGLE_LINE_COMMENT_SCHEMA,
> @@ -388,7 +388,7 @@ pub struct TapeBackupJobStatus {
>   
>   #[derive(Clone, Debug)]
>   /// Filter for matching `BackupGroup`s, for use with `BackupGroup::filter`.
> -pub enum GroupFilter {
> +pub enum FilterType {
>       /// BackupGroup type - either `vm`, `ct`, or `host`.
>       BackupType(BackupType),
>       /// Full identifier of BackupGroup, including type
> @@ -397,7 +397,7 @@ pub enum GroupFilter {
>       Regex(Regex),
>   }
>   
> -impl PartialEq for GroupFilter {
> +impl PartialEq for FilterType {
>       fn eq(&self, other: &Self) -> bool {
>           match (self, other) {
>               (Self::BackupType(a), Self::BackupType(b)) => a == b,
> @@ -408,27 +408,52 @@ impl PartialEq for GroupFilter {
>       }
>   }
>   
> +#[derive(Clone, Debug)]
> +pub struct GroupFilter {
> +    pub is_exclude: bool,
> +    pub filter_type: FilterType,
> +}
> +
> +impl PartialEq for GroupFilter {
> +    fn eq(&self, other: &Self) -> bool {
> +        self.filter_type == other.filter_type && self.is_exclude == other.is_exclude
> +    }
> +}
> +
> +impl Eq for GroupFilter {}
> +
>   impl std::str::FromStr for GroupFilter {
>       type Err = anyhow::Error;
>   
>       fn from_str(s: &str) -> Result<Self, Self::Err> {
> -        match s.split_once(':') {
> -            Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| GroupFilter::Group(value.to_string())),
> -            Some(("type", value)) => Ok(GroupFilter::BackupType(value.parse()?)),
> -            Some(("regex", value)) => Ok(GroupFilter::Regex(Regex::new(value)?)),
> +        let (is_exclude, type_str) = match s.split_once(':') {
> +            Some(("include", value)) => (false, value),
> +            Some(("exclude", value)) => (true, value),
> +            _ => (false, s),
> +        };
> +
> +        let filter_type = match type_str.split_once(':') {
> +            Some(("group", value)) => BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_| FilterType::Group(value.to_string())),
> +            Some(("type", value)) => Ok(FilterType::BackupType(value.parse()?)),
> +            Some(("regex", value)) => Ok(FilterType::Regex(Regex::new(value)?)),
>               Some((ty, _value)) => Err(format_err!("expected 'group', 'type' or 'regex' prefix, got '{}'", ty)),
>               None => Err(format_err!("input doesn't match expected format '<group:GROUP||type:<vm|ct|host>|regex:REGEX>'")),
> -        }.map_err(|err| format_err!("'{}' - {}", s, err))
> +        }?;
> +        Ok(GroupFilter {
> +            is_exclude,
> +            filter_type,
> +        })
>       }
>   }
>   
>   // used for serializing below, caution!
>   impl std::fmt::Display for GroupFilter {
>       fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
> -        match self {
> -            GroupFilter::BackupType(backup_type) => write!(f, "type:{}", backup_type),
> -            GroupFilter::Group(backup_group) => write!(f, "group:{}", backup_group),
> -            GroupFilter::Regex(regex) => write!(f, "regex:{}", regex.as_str()),
> +        let exclude = if self.is_exclude { "exclude:" } else { "" };
> +        match &self.filter_type {
> +            FilterType::BackupType(backup_type) => write!(f, "{}type:{}", exclude, backup_type),
> +            FilterType::Group(backup_group) => write!(f, "{}group:{}", exclude, backup_group),
> +            FilterType::Regex(regex) => write!(f, "{}regex:{}", exclude, regex.as_str()),
>           }

nit: Some of these format string vars can be inlined, so "{}", foo can 
become "{foo}". Only works for variables though, since the {} cannot 
contain arbitrary expressions.

Also, just FIY (no need to change that if you don't like it), you can 
also do the following:

"{exclude}regex:{regex}", regex = regex.as_str()

It's a bit more verbose, but I quite like this style, especially if all 
other vars can be inlined.

>       }
>   }
> @@ -440,10 +465,42 @@ fn verify_group_filter(input: &str) -> Result<(), anyhow::Error> {
>       GroupFilter::from_str(input).map(|_| ())
>   }
>   
> +pub fn split_by_include_exclude(
> +    all_filter: Option<Vec<GroupFilter>>,
> +) -> (Vec<GroupFilter>, Vec<GroupFilter>) {
> +    if let Some(all_filter) = all_filter {
> +        all_filter
> +            .into_iter()
> +            .partition(|filter| !filter.is_exclude)
> +    } else {
> +        (Vec::new(), Vec::new())
> +    }
> +}

... would not be needed then, see following coment

> +
> +pub fn apply_filters(
> +    group: &BackupGroup,
> +    include_filters: &[GroupFilter],
> +    exclude_filters: &[GroupFilter],
> +) -> bool {
> +    let mut is_match: bool;
> +
> +    if include_filters.is_empty() {
> +        is_match = true;
> +    } else {
> +        is_match = include_filters.iter().any(|filter| group.matches(filter));
> +    }
> +
> +    if !exclude_filters.is_empty() && is_match {
> +        is_match = !exclude_filters.iter().any(|filter| group.matches(filter));
> +    }
> +
> +    is_match
> +}
> +

I prefer to avoid freestanding functions; I think this helper should be 
a method on the BackupGroup struct and should be named slightly 
different, e.g.:

   BackupGroup::matches_filters

Also, now that I see the whole code, I think splitting includes/excludes 
beforehand is actually not really necessary.
You could just do a
include_filters.iter().filter(|f| !f.is_exclude).any(|f| group.matches(f))

and the same thing with inverted filter condition for the excludes.

Also, the `!exclude_filters.is_empty()` is not really needed, since in 
that case .iter() won't yield any elements anyway ;)


>   pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new(
> -    "Group filter based on group identifier ('group:GROUP'), group type ('type:<vm|ct|host>'), or regex ('regex:RE').")
> +    "Group filter based on group identifier ('group:GROUP'), group type ('type:<vm|ct|host>'), or regex ('regex:RE'). Can be inverted by adding 'exclude:' before.")
>       .format(&ApiStringFormat::VerifyFn(verify_group_filter))
> -    .type_text("<type:<vm|ct|host>|group:GROUP|regex:RE>")
> +    .type_text("[<exclude:|include:>]<type:<vm|ct|host>|group:GROUP|regex:RE>")
>       .schema();
>   
>   pub const GROUP_FILTER_LIST_SCHEMA: Schema =
> diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs
> index 2f9385a7..1e2953c4 100644
> --- a/src/api2/tape/backup.rs
> +++ b/src/api2/tape/backup.rs
> @@ -9,9 +9,9 @@ use proxmox_schema::api;
>   use proxmox_sys::{task_log, task_warn, WorkerTaskContext};
>   
>   use pbs_api_types::{
> -    print_ns_and_snapshot, print_store_and_ns, Authid, GroupFilter, MediaPoolConfig, Operation,
> -    TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus, Userid, JOB_ID_SCHEMA,
> -    PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA,
> +    apply_filters, print_ns_and_snapshot, print_store_and_ns, split_by_include_exclude, Authid,
> +    MediaPoolConfig, Operation, TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus,
> +    Userid, JOB_ID_SCHEMA, PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE, UPID_SCHEMA,
>   };
>   
>   use pbs_config::CachedUserInfo;
> @@ -411,31 +411,23 @@ fn backup_worker(
>   
>       group_list.sort_unstable_by(|a, b| a.group().cmp(b.group()));
>   
> -    let (group_list, group_count) = if let Some(group_filters) = &setup.group_filter {
> -        let filter_fn = |group: &BackupGroup, group_filters: &[GroupFilter]| {
> -            group_filters.iter().any(|filter| group.matches(filter))
> -        };
> +    let (include_filter, exclude_filter) = split_by_include_exclude(setup.group_filter.clone());
>   
> -        let group_count_full = group_list.len();
> -        let list: Vec<BackupGroup> = group_list
> -            .into_iter()
> -            .filter(|group| filter_fn(group, group_filters))
> -            .collect();
> -        let group_count = list.len();
> -        task_log!(
> -            worker,
> -            "found {} groups (out of {} total)",
> -            group_count,
> -            group_count_full
> -        );
> -        (list, group_count)
> -    } else {
> -        let group_count = group_list.len();
> -        task_log!(worker, "found {} groups", group_count);
> -        (group_list, group_count)
> -    };
> +    let group_count_full = group_list.len();
> +    // if there are only exclude filter, inculude everything
> +    let group_list: Vec<BackupGroup> = group_list
> +        .into_iter()
> +        .filter(|group| apply_filters(group.group(), &include_filter, &exclude_filter))
> +        .collect();
> +
> +    task_log!(
> +        worker,
> +        "found {} groups (out of {} total)",
> +        group_list.len(),
> +        group_count_full
> +    );
>   
> -    let mut progress = StoreProgress::new(group_count as u64);
> +    let mut progress = StoreProgress::new(group_list.len() as u64);
>   
>       let latest_only = setup.latest_only.unwrap_or(false);
>   
> diff --git a/src/server/pull.rs b/src/server/pull.rs
> index 3b71c156..e458008b 100644
> --- a/src/server/pull.rs
> +++ b/src/server/pull.rs
> @@ -15,9 +15,10 @@ use proxmox_sys::{task_log, task_warn};
>   use serde_json::json;
>   
>   use pbs_api_types::{
> -    print_store_and_ns, Authid, BackupDir, BackupGroup, BackupNamespace, CryptMode, GroupFilter,
> -    GroupListItem, Operation, RateLimitConfig, Remote, SnapshotListItem, MAX_NAMESPACE_DEPTH,
> -    PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_READ,
> +    apply_filters, print_store_and_ns, split_by_include_exclude, Authid, BackupDir, BackupGroup,
> +    BackupNamespace, CryptMode, GroupFilter, GroupListItem, Operation, RateLimitConfig, Remote,
> +    SnapshotListItem, MAX_NAMESPACE_DEPTH, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP,
> +    PRIV_DATASTORE_READ,
>   };
>   use pbs_client::{BackupReader, BackupRepository, HttpClient, RemoteChunkReader};
>   use pbs_config::CachedUserInfo;
> @@ -486,7 +487,8 @@ pub(crate) struct PullParameters {
>       /// How many levels of sub-namespaces to pull (0 == no recursion, None == maximum recursion)
>       max_depth: Option<usize>,
>       /// Filters for reducing the pull scope
> -    group_filter: Option<Vec<GroupFilter>>,
> +    include_filter: Vec<GroupFilter>,
> +    exclude_filter: Vec<GroupFilter>,

Nit: better add a second doc comment for exclude_filter, otherwise
`cargo doc` won't pick that up


>       /// How many snapshots should be transferred at most (taking the newest N snapshots)
>       transfer_last: Option<usize>,
>   }
> @@ -539,13 +541,16 @@ impl PullParameters {
>               ns,
>           };
>   
> +        let (include_filter, exclude_filter) = split_by_include_exclude(group_filter);
> +
>           Ok(Self {
>               source,
>               target,
>               owner,
>               remove_vanished,
>               max_depth,
> -            group_filter,
> +            include_filter,
> +            exclude_filter,
>               transfer_last,
>           })
>       }
> @@ -1358,7 +1363,6 @@ pub(crate) async fn pull_ns(
>   ) -> Result<(StoreProgress, bool), Error> {
>       let mut list: Vec<BackupGroup> = params.source.list_groups(namespace, &params.owner).await?;
>   
> -    let total_count = list.len();
>       list.sort_unstable_by(|a, b| {
>           let type_order = a.ty.cmp(&b.ty);
>           if type_order == std::cmp::Ordering::Equal {
> @@ -1368,27 +1372,18 @@ pub(crate) async fn pull_ns(
>           }
>       });
>   
> -    let apply_filters = |group: &BackupGroup, filters: &[GroupFilter]| -> bool {
> -        filters.iter().any(|filter| group.matches(filter))
> -    };
> -
> -    let list = if let Some(ref group_filter) = &params.group_filter {
> -        let unfiltered_count = list.len();
> -        let list: Vec<BackupGroup> = list
> -            .into_iter()
> -            .filter(|group| apply_filters(group, group_filter))
> -            .collect();
> -        task_log!(
> -            worker,
> -            "found {} groups to sync (out of {} total)",
> -            list.len(),
> -            unfiltered_count
> -        );
> -        list
> -    } else {
> -        task_log!(worker, "found {} groups to sync", total_count);
> -        list
> -    };
> +    // if there are only exclude filter, inculude everything
> +    let unfiltered_count = list.len();
> +    let list: Vec<BackupGroup> = list
> +        .into_iter()
> +        .filter(|group| apply_filters(&group, &params.include_filter, &params.exclude_filter))
> +        .collect();
> +    task_log!(
> +        worker,
> +        "found {} groups to sync (out of {} total)",
> +        list.len(),
> +        unfiltered_count
> +    );
>   
>       let mut errors = false;
>   
> @@ -1457,10 +1452,8 @@ pub(crate) async fn pull_ns(
>                   if check_backup_owner(&owner, &params.owner).is_err() {
>                       continue;
>                   }
> -                if let Some(ref group_filter) = &params.group_filter {
> -                    if !apply_filters(local_group, group_filter) {
> -                        continue;
> -                    }
> +                if !apply_filters(&local_group, &params.include_filter, &params.include_filter) {
> +                    continue;
>                   }
>                   task_log!(worker, "delete vanished group '{local_group}'",);
>                   match params

-- 
- Lukas




More information about the pbs-devel mailing list