[pbs-devel] [PATCH proxmox-backup v5 1/4] fix #4315: jobs: modify GroupFilter so include/exclude is tracked

Philipp Hufnagl p.hufnagl at proxmox.com
Fri Dec 22 10:42:36 CET 2023


Hello

Thank you for the deep review!

On 12/19/23 14:22, Lukas Wagner wrote:
> Hey Philipp,
> 
> some comments inline. I quite like the changes compared to the last
> version, but I think there are some small aspects that could still be
> improved before this goes in.
> 
> On 12/18/23 16:36, Philipp Hufnagl wrote:
>> After some discussion I canged the include/exclude behavior to first
>> run
>> all include filter and after that all exclude filter (rather then
>> allowing to alternate inbetween). This is done by splitting them into 2
>> lists, running include first.
>>
>> A lot of discussion happened how edge cases should be handled and we
>> came to following conclusion:
>>
>> no include filter + no exclude filter => include all
>> some include filter + no exclude filter => filter as always
>> no include filter +  some exclude filter => include all then exclude
>>
>> Since a GroupFilter now also features an behavior, the Struct has been
>> renamed To GroupType (since simply type is a keyword). The new
>> GroupFilter now has a behaviour as a flag 'is_exclude'.
>>
>> I considered calling it 'is_include' but a reader later then might not
>> know what the opposite of 'include' is (do not include? 
>> deactivate?). I
>> also considered making a new enum 'behaviour' but since there are
>> only 2
>> values I considered it over engeneered.
>>
>> Signed-off-by: Philipp Hufnagl <p.hufnagl at proxmox.com>
>> ---
>>   pbs-api-types/src/datastore.rs | 11 ++---
>>   pbs-api-types/src/jobs.rs      | 85
>> ++++++++++++++++++++++++++++------
>>   src/api2/tape/backup.rs        | 44 +++++++-----------
>>   src/server/pull.rs             | 55 ++++++++++------------
>>   4 files changed, 118 insertions(+), 77 deletions(-)
>>
>> diff --git a/pbs-api-types/src/datastore.rs
>> b/pbs-api-types/src/datastore.rs
>> index 74f610d1..673b5bcd 100644
>> --- a/pbs-api-types/src/datastore.rs
>> +++ b/pbs-api-types/src/datastore.rs
>> @@ -843,17 +843,16 @@ impl BackupGroup {
>>       }
>>         pub fn matches(&self, filter: &crate::GroupFilter) -> bool {
>> -        use crate::GroupFilter;
>> -
>> -        match filter {
>> -            GroupFilter::Group(backup_group) => {
>> +        use crate::FilterType;
>> +        match &filter.filter_type {
>> +            FilterType::Group(backup_group) => {
>>                   match backup_group.parse::<BackupGroup>() {
>>                       Ok(group) => *self == group,
>>                       Err(_) => false, // shouldn't happen if value
>> is schema-checked
>>                   }
>>               }
>> -            GroupFilter::BackupType(ty) => self.ty == *ty,
>> -            GroupFilter::Regex(regex) =>
>> regex.is_match(&self.to_string()),
>> +            FilterType::BackupType(ty) => self.ty == *ty,
>> +            FilterType::Regex(regex) =>
>> regex.is_match(&self.to_string()),
>>           }
>>       }
>>   }
>> diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs
>> index 1f5b3cf1..982f08a6 100644
>> --- a/pbs-api-types/src/jobs.rs
>> +++ b/pbs-api-types/src/jobs.rs
>> @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
>>   use proxmox_schema::*;
>>     use crate::{
>> -    Authid, BackupNamespace, BackupType, RateLimitConfig, Userid,
>> BACKUP_GROUP_SCHEMA,
>> +    Authid, BackupGroup, BackupNamespace, BackupType,
>> RateLimitConfig, Userid, BACKUP_GROUP_SCHEMA,
>>       BACKUP_NAMESPACE_SCHEMA, DATASTORE_SCHEMA, DRIVE_NAME_SCHEMA,
>> MEDIA_POOL_NAME_SCHEMA,
>>       NS_MAX_DEPTH_REDUCED_SCHEMA, PROXMOX_SAFE_ID_FORMAT,
>> REMOTE_ID_SCHEMA,
>>       SINGLE_LINE_COMMENT_SCHEMA,
>> @@ -388,7 +388,7 @@ pub struct TapeBackupJobStatus {
>>     #[derive(Clone, Debug)]
>>   /// Filter for matching `BackupGroup`s, for use with
>> `BackupGroup::filter`.
>> -pub enum GroupFilter {
>> +pub enum FilterType {
>>       /// BackupGroup type - either `vm`, `ct`, or `host`.
>>       BackupType(BackupType),
>>       /// Full identifier of BackupGroup, including type
>> @@ -397,7 +397,7 @@ pub enum GroupFilter {
>>       Regex(Regex),
>>   }
>>   -impl PartialEq for GroupFilter {
>> +impl PartialEq for FilterType {
>>       fn eq(&self, other: &Self) -> bool {
>>           match (self, other) {
>>               (Self::BackupType(a), Self::BackupType(b)) => a == b,
>> @@ -408,27 +408,52 @@ impl PartialEq for GroupFilter {
>>       }
>>   }
>>   +#[derive(Clone, Debug)]
>> +pub struct GroupFilter {
>> +    pub is_exclude: bool,
>> +    pub filter_type: FilterType,
>> +}
>> +
>> +impl PartialEq for GroupFilter {
>> +    fn eq(&self, other: &Self) -> bool {
>> +        self.filter_type == other.filter_type && self.is_exclude ==
>> other.is_exclude
>> +    }
>> +}
>> +
>> +impl Eq for GroupFilter {}
>> +
>>   impl std::str::FromStr for GroupFilter {
>>       type Err = anyhow::Error;
>>         fn from_str(s: &str) -> Result<Self, Self::Err> {
>> -        match s.split_once(':') {
>> -            Some(("group", value)) =>
>> BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_|
>> GroupFilter::Group(value.to_string())),
>> -            Some(("type", value)) =>
>> Ok(GroupFilter::BackupType(value.parse()?)),
>> -            Some(("regex", value)) =>
>> Ok(GroupFilter::Regex(Regex::new(value)?)),
>> +        let (is_exclude, type_str) = match s.split_once(':') {
>> +            Some(("include", value)) => (false, value),
>> +            Some(("exclude", value)) => (true, value),
>> +            _ => (false, s),
>> +        };
>> +
>> +        let filter_type = match type_str.split_once(':') {
>> +            Some(("group", value)) =>
>> BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_|
>> FilterType::Group(value.to_string())),
>> +            Some(("type", value)) =>
>> Ok(FilterType::BackupType(value.parse()?)),
>> +            Some(("regex", value)) =>
>> Ok(FilterType::Regex(Regex::new(value)?)),
>>               Some((ty, _value)) => Err(format_err!("expected
>> 'group', 'type' or 'regex' prefix, got '{}'", ty)),
>>               None => Err(format_err!("input doesn't match expected
>> format '<group:GROUP||type:<vm|ct|host>|regex:REGEX>'")),
>> -        }.map_err(|err| format_err!("'{}' - {}", s, err))
>> +        }?;
>> +        Ok(GroupFilter {
>> +            is_exclude,
>> +            filter_type,
>> +        })
>>       }
>>   }
>>     // used for serializing below, caution!
>>   impl std::fmt::Display for GroupFilter {
>>       fn fmt(&self, f: &mut std::fmt::Formatter<'_>) ->
>> std::fmt::Result {
>> -        match self {
>> -            GroupFilter::BackupType(backup_type) => write!(f,
>> "type:{}", backup_type),
>> -            GroupFilter::Group(backup_group) => write!(f,
>> "group:{}", backup_group),
>> -            GroupFilter::Regex(regex) => write!(f, "regex:{}",
>> regex.as_str()),
>> +        let exclude = if self.is_exclude { "exclude:" } else { "" };
>> +        match &self.filter_type {
>> +            FilterType::BackupType(backup_type) => write!(f,
>> "{}type:{}", exclude, backup_type),
>> +            FilterType::Group(backup_group) => write!(f,
>> "{}group:{}", exclude, backup_group),
>> +            FilterType::Regex(regex) => write!(f, "{}regex:{}",
>> exclude, regex.as_str()),
>>           }
> 
> nit: Some of these format string vars can be inlined, so "{}", foo can
> become "{foo}". Only works for variables though, since the {} cannot
> contain arbitrary expressions.
> 
> Also, just FIY (no need to change that if you don't like it), you can
> also do the following:
> 
> "{exclude}regex:{regex}", regex = regex.as_str()
> 
> It's a bit more verbose, but I quite like this style, especially if
> all other vars can be inlined.
> 
>>       }
>>   }
>> @@ -440,10 +465,42 @@ fn verify_group_filter(input: &str) ->
>> Result<(), anyhow::Error> {
>>       GroupFilter::from_str(input).map(|_| ())
>>   }
>>   +pub fn split_by_include_exclude(
>> +    all_filter: Option<Vec<GroupFilter>>,
>> +) -> (Vec<GroupFilter>, Vec<GroupFilter>) {
>> +    if let Some(all_filter) = all_filter {
>> +        all_filter
>> +            .into_iter()
>> +            .partition(|filter| !filter.is_exclude)
>> +    } else {
>> +        (Vec::new(), Vec::new())
>> +    }
>> +}
> 
> ... would not be needed then, see following coment
> 
>> +
>> +pub fn apply_filters(
>> +    group: &BackupGroup,
>> +    include_filters: &[GroupFilter],
>> +    exclude_filters: &[GroupFilter],
>> +) -> bool {
>> +    let mut is_match: bool;
>> +
>> +    if include_filters.is_empty() {
>> +        is_match = true;
>> +    } else {
>> +        is_match = include_filters.iter().any(|filter|
>> group.matches(filter));
>> +    }
>> +
>> +    if !exclude_filters.is_empty() && is_match {
>> +        is_match = !exclude_filters.iter().any(|filter|
>> group.matches(filter));
>> +    }
>> +
>> +    is_match
>> +}
>> +
> 
> I prefer to avoid freestanding functions; I think this helper should
> be a method on the BackupGroup struct and should be named slightly
> different, e.g.:
> 
>   BackupGroup::matches_filters

I was wondering about that as well, considering to do it that way. I
think you are right and will change it.

> 
> Also, now that I see the whole code, I think splitting
> includes/excludes beforehand is actually not really necessary.
> You could just do a
> include_filters.iter().filter(|f| !f.is_exclude).any(|f|
> group.matches(f))
> 
> and the same thing with inverted filter condition for the excludes.

Hmm... I see what you mean. Howwevr on the down side, this makes the
code harder to comprehend and may result in more overvhead by
iterating more often through the list. Ill think about it.
> 
> Also, the `!exclude_filters.is_empty()` is not really needed, since in
> that case .iter() won't yield any elements anyway ;)

True :)
> 
> 
>>   pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new(
>> -    "Group filter based on group identifier ('group:GROUP'), group
>> type ('type:<vm|ct|host>'), or regex ('regex:RE').")
>> +    "Group filter based on group identifier ('group:GROUP'), group
>> type ('type:<vm|ct|host>'), or regex ('regex:RE'). Can be inverted
>> by adding 'exclude:' before.")
>>       .format(&ApiStringFormat::VerifyFn(verify_group_filter))
>> -    .type_text("<type:<vm|ct|host>|group:GROUP|regex:RE>")
>> +   
>> .type_text("[<exclude:|include:>]<type:<vm|ct|host>|group:GROUP|regex:RE>")
>>       .schema();
>>     pub const GROUP_FILTER_LIST_SCHEMA: Schema =
>> diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs
>> index 2f9385a7..1e2953c4 100644
>> --- a/src/api2/tape/backup.rs
>> +++ b/src/api2/tape/backup.rs
>> @@ -9,9 +9,9 @@ use proxmox_schema::api;
>>   use proxmox_sys::{task_log, task_warn, WorkerTaskContext};
>>     use pbs_api_types::{
>> -    print_ns_and_snapshot, print_store_and_ns, Authid, GroupFilter,
>> MediaPoolConfig, Operation,
>> -    TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus,
>> Userid, JOB_ID_SCHEMA,
>> -    PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE,
>> UPID_SCHEMA,
>> +    apply_filters, print_ns_and_snapshot, print_store_and_ns,
>> split_by_include_exclude, Authid,
>> +    MediaPoolConfig, Operation, TapeBackupJobConfig,
>> TapeBackupJobSetup, TapeBackupJobStatus,
>> +    Userid, JOB_ID_SCHEMA, PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT,
>> PRIV_TAPE_WRITE, UPID_SCHEMA,
>>   };
>>     use pbs_config::CachedUserInfo;
>> @@ -411,31 +411,23 @@ fn backup_worker(
>>         group_list.sort_unstable_by(|a, b| a.group().cmp(b.group()));
>>   -    let (group_list, group_count) = if let Some(group_filters) =
>> &setup.group_filter {
>> -        let filter_fn = |group: &BackupGroup, group_filters:
>> &[GroupFilter]| {
>> -            group_filters.iter().any(|filter| group.matches(filter))
>> -        };
>> +    let (include_filter, exclude_filter) =
>> split_by_include_exclude(setup.group_filter.clone());
>>   -        let group_count_full = group_list.len();
>> -        let list: Vec<BackupGroup> = group_list
>> -            .into_iter()
>> -            .filter(|group| filter_fn(group, group_filters))
>> -            .collect();
>> -        let group_count = list.len();
>> -        task_log!(
>> -            worker,
>> -            "found {} groups (out of {} total)",
>> -            group_count,
>> -            group_count_full
>> -        );
>> -        (list, group_count)
>> -    } else {
>> -        let group_count = group_list.len();
>> -        task_log!(worker, "found {} groups", group_count);
>> -        (group_list, group_count)
>> -    };
>> +    let group_count_full = group_list.len();
>> +    // if there are only exclude filter, inculude everything
>> +    let group_list: Vec<BackupGroup> = group_list
>> +        .into_iter()
>> +        .filter(|group| apply_filters(group.group(),
>> &include_filter, &exclude_filter))
>> +        .collect();
>> +
>> +    task_log!(
>> +        worker,
>> +        "found {} groups (out of {} total)",
>> +        group_list.len(),
>> +        group_count_full
>> +    );
>>   -    let mut progress = StoreProgress::new(group_count as u64);
>> +    let mut progress = StoreProgress::new(group_list.len() as u64);
>>         let latest_only = setup.latest_only.unwrap_or(false);
>>   diff --git a/src/server/pull.rs b/src/server/pull.rs
>> index 3b71c156..e458008b 100644
>> --- a/src/server/pull.rs
>> +++ b/src/server/pull.rs
>> @@ -15,9 +15,10 @@ use proxmox_sys::{task_log, task_warn};
>>   use serde_json::json;
>>     use pbs_api_types::{
>> -    print_store_and_ns, Authid, BackupDir, BackupGroup,
>> BackupNamespace, CryptMode, GroupFilter,
>> -    GroupListItem, Operation, RateLimitConfig, Remote,
>> SnapshotListItem, MAX_NAMESPACE_DEPTH,
>> -    PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_READ,
>> +    apply_filters, print_store_and_ns, split_by_include_exclude,
>> Authid, BackupDir, BackupGroup,
>> +    BackupNamespace, CryptMode, GroupFilter, GroupListItem,
>> Operation, RateLimitConfig, Remote,
>> +    SnapshotListItem, MAX_NAMESPACE_DEPTH, PRIV_DATASTORE_AUDIT,
>> PRIV_DATASTORE_BACKUP,
>> +    PRIV_DATASTORE_READ,
>>   };
>>   use pbs_client::{BackupReader, BackupRepository, HttpClient,
>> RemoteChunkReader};
>>   use pbs_config::CachedUserInfo;
>> @@ -486,7 +487,8 @@ pub(crate) struct PullParameters {
>>       /// How many levels of sub-namespaces to pull (0 == no
>> recursion, None == maximum recursion)
>>       max_depth: Option<usize>,
>>       /// Filters for reducing the pull scope
>> -    group_filter: Option<Vec<GroupFilter>>,
>> +    include_filter: Vec<GroupFilter>,
>> +    exclude_filter: Vec<GroupFilter>,
> 
> Nit: better add a second doc comment for exclude_filter, otherwise
> `cargo doc` won't pick that up

Do I understand correctly? If I follow your earlier suggesting and
filter inplace, this parameter gets obsolete?
> 
> 
>>       /// How many snapshots should be transferred at most (taking
>> the newest N snapshots)
>>       transfer_last: Option<usize>,
>>   }
>> @@ -539,13 +541,16 @@ impl PullParameters {
>>               ns,
>>           };
>>   +        let (include_filter, exclude_filter) =
>> split_by_include_exclude(group_filter);
>> +
>>           Ok(Self {
>>               source,
>>               target,
>>               owner,
>>               remove_vanished,
>>               max_depth,
>> -            group_filter,
>> +            include_filter,
>> +            exclude_filter,
>>               transfer_last,
>>           })
>>       }
>> @@ -1358,7 +1363,6 @@ pub(crate) async fn pull_ns(
>>   ) -> Result<(StoreProgress, bool), Error> {
>>       let mut list: Vec<BackupGroup> =
>> params.source.list_groups(namespace, &params.owner).await?;
>>   -    let total_count = list.len();
>>       list.sort_unstable_by(|a, b| {
>>           let type_order = a.ty.cmp(&b.ty);
>>           if type_order == std::cmp::Ordering::Equal {
>> @@ -1368,27 +1372,18 @@ pub(crate) async fn pull_ns(
>>           }
>>       });
>>   -    let apply_filters = |group: &BackupGroup, filters:
>> &[GroupFilter]| -> bool {
>> -        filters.iter().any(|filter| group.matches(filter))
>> -    };
>> -
>> -    let list = if let Some(ref group_filter) = &params.group_filter {
>> -        let unfiltered_count = list.len();
>> -        let list: Vec<BackupGroup> = list
>> -            .into_iter()
>> -            .filter(|group| apply_filters(group, group_filter))
>> -            .collect();
>> -        task_log!(
>> -            worker,
>> -            "found {} groups to sync (out of {} total)",
>> -            list.len(),
>> -            unfiltered_count
>> -        );
>> -        list
>> -    } else {
>> -        task_log!(worker, "found {} groups to sync", total_count);
>> -        list
>> -    };
>> +    // if there are only exclude filter, inculude everything
>> +    let unfiltered_count = list.len();
>> +    let list: Vec<BackupGroup> = list
>> +        .into_iter()
>> +        .filter(|group| apply_filters(&group,
>> &params.include_filter, &params.exclude_filter))
>> +        .collect();
>> +    task_log!(
>> +        worker,
>> +        "found {} groups to sync (out of {} total)",
>> +        list.len(),
>> +        unfiltered_count
>> +    );
>>         let mut errors = false;
>>   @@ -1457,10 +1452,8 @@ pub(crate) async fn pull_ns(
>>                   if check_backup_owner(&owner,
>> &params.owner).is_err() {
>>                       continue;
>>                   }
>> -                if let Some(ref group_filter) = &params.group_filter {
>> -                    if !apply_filters(local_group, group_filter) {
>> -                        continue;
>> -                    }
>> +                if !apply_filters(&local_group,
>> &params.include_filter, &params.include_filter) {
>> +                    continue;
>>                   }
>>                   task_log!(worker, "delete vanished group
>> '{local_group}'",);
>>                   match params
> 




More information about the pbs-devel mailing list