[pbs-devel] [PATCH proxmox-backup v5 1/4] fix #4315: jobs: modify GroupFilter so include/exclude is tracked
Lukas Wagner
l.wagner at proxmox.com
Fri Dec 22 10:53:16 CET 2023
On 12/22/23 10:42, Philipp Hufnagl wrote:
> Hello
>
> Thank you for the deep review!
>
> On 12/19/23 14:22, Lukas Wagner wrote:
>> Hey Philipp,
>>
>> some comments inline. I quite like the changes compared to the last
>> version, but I think there are some small aspects that could still be
>> improved before this goes in.
>>
>> On 12/18/23 16:36, Philipp Hufnagl wrote:
>>> After some discussion I canged the include/exclude behavior to first
>>> run
>>> all include filter and after that all exclude filter (rather then
>>> allowing to alternate inbetween). This is done by splitting them into 2
>>> lists, running include first.
>>>
>>> A lot of discussion happened how edge cases should be handled and we
>>> came to following conclusion:
>>>
>>> no include filter + no exclude filter => include all
>>> some include filter + no exclude filter => filter as always
>>> no include filter + some exclude filter => include all then exclude
>>>
>>> Since a GroupFilter now also features an behavior, the Struct has been
>>> renamed To GroupType (since simply type is a keyword). The new
>>> GroupFilter now has a behaviour as a flag 'is_exclude'.
>>>
>>> I considered calling it 'is_include' but a reader later then might not
>>> know what the opposite of 'include' is (do not include?
>>> deactivate?). I
>>> also considered making a new enum 'behaviour' but since there are
>>> only 2
>>> values I considered it over engeneered.
>>>
>>> Signed-off-by: Philipp Hufnagl <p.hufnagl at proxmox.com>
>>> ---
>>> pbs-api-types/src/datastore.rs | 11 ++---
>>> pbs-api-types/src/jobs.rs | 85
>>> ++++++++++++++++++++++++++++------
>>> src/api2/tape/backup.rs | 44 +++++++-----------
>>> src/server/pull.rs | 55 ++++++++++------------
>>> 4 files changed, 118 insertions(+), 77 deletions(-)
>>>
>>> diff --git a/pbs-api-types/src/datastore.rs
>>> b/pbs-api-types/src/datastore.rs
>>> index 74f610d1..673b5bcd 100644
>>> --- a/pbs-api-types/src/datastore.rs
>>> +++ b/pbs-api-types/src/datastore.rs
>>> @@ -843,17 +843,16 @@ impl BackupGroup {
>>> }
>>> pub fn matches(&self, filter: &crate::GroupFilter) -> bool {
>>> - use crate::GroupFilter;
>>> -
>>> - match filter {
>>> - GroupFilter::Group(backup_group) => {
>>> + use crate::FilterType;
>>> + match &filter.filter_type {
>>> + FilterType::Group(backup_group) => {
>>> match backup_group.parse::<BackupGroup>() {
>>> Ok(group) => *self == group,
>>> Err(_) => false, // shouldn't happen if value
>>> is schema-checked
>>> }
>>> }
>>> - GroupFilter::BackupType(ty) => self.ty == *ty,
>>> - GroupFilter::Regex(regex) =>
>>> regex.is_match(&self.to_string()),
>>> + FilterType::BackupType(ty) => self.ty == *ty,
>>> + FilterType::Regex(regex) =>
>>> regex.is_match(&self.to_string()),
>>> }
>>> }
>>> }
>>> diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs
>>> index 1f5b3cf1..982f08a6 100644
>>> --- a/pbs-api-types/src/jobs.rs
>>> +++ b/pbs-api-types/src/jobs.rs
>>> @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
>>> use proxmox_schema::*;
>>> use crate::{
>>> - Authid, BackupNamespace, BackupType, RateLimitConfig, Userid,
>>> BACKUP_GROUP_SCHEMA,
>>> + Authid, BackupGroup, BackupNamespace, BackupType,
>>> RateLimitConfig, Userid, BACKUP_GROUP_SCHEMA,
>>> BACKUP_NAMESPACE_SCHEMA, DATASTORE_SCHEMA, DRIVE_NAME_SCHEMA,
>>> MEDIA_POOL_NAME_SCHEMA,
>>> NS_MAX_DEPTH_REDUCED_SCHEMA, PROXMOX_SAFE_ID_FORMAT,
>>> REMOTE_ID_SCHEMA,
>>> SINGLE_LINE_COMMENT_SCHEMA,
>>> @@ -388,7 +388,7 @@ pub struct TapeBackupJobStatus {
>>> #[derive(Clone, Debug)]
>>> /// Filter for matching `BackupGroup`s, for use with
>>> `BackupGroup::filter`.
>>> -pub enum GroupFilter {
>>> +pub enum FilterType {
>>> /// BackupGroup type - either `vm`, `ct`, or `host`.
>>> BackupType(BackupType),
>>> /// Full identifier of BackupGroup, including type
>>> @@ -397,7 +397,7 @@ pub enum GroupFilter {
>>> Regex(Regex),
>>> }
>>> -impl PartialEq for GroupFilter {
>>> +impl PartialEq for FilterType {
>>> fn eq(&self, other: &Self) -> bool {
>>> match (self, other) {
>>> (Self::BackupType(a), Self::BackupType(b)) => a == b,
>>> @@ -408,27 +408,52 @@ impl PartialEq for GroupFilter {
>>> }
>>> }
>>> +#[derive(Clone, Debug)]
>>> +pub struct GroupFilter {
>>> + pub is_exclude: bool,
>>> + pub filter_type: FilterType,
>>> +}
>>> +
>>> +impl PartialEq for GroupFilter {
>>> + fn eq(&self, other: &Self) -> bool {
>>> + self.filter_type == other.filter_type && self.is_exclude ==
>>> other.is_exclude
>>> + }
>>> +}
>>> +
>>> +impl Eq for GroupFilter {}
>>> +
>>> impl std::str::FromStr for GroupFilter {
>>> type Err = anyhow::Error;
>>> fn from_str(s: &str) -> Result<Self, Self::Err> {
>>> - match s.split_once(':') {
>>> - Some(("group", value)) =>
>>> BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_|
>>> GroupFilter::Group(value.to_string())),
>>> - Some(("type", value)) =>
>>> Ok(GroupFilter::BackupType(value.parse()?)),
>>> - Some(("regex", value)) =>
>>> Ok(GroupFilter::Regex(Regex::new(value)?)),
>>> + let (is_exclude, type_str) = match s.split_once(':') {
>>> + Some(("include", value)) => (false, value),
>>> + Some(("exclude", value)) => (true, value),
>>> + _ => (false, s),
>>> + };
>>> +
>>> + let filter_type = match type_str.split_once(':') {
>>> + Some(("group", value)) =>
>>> BACKUP_GROUP_SCHEMA.parse_simple_value(value).map(|_|
>>> FilterType::Group(value.to_string())),
>>> + Some(("type", value)) =>
>>> Ok(FilterType::BackupType(value.parse()?)),
>>> + Some(("regex", value)) =>
>>> Ok(FilterType::Regex(Regex::new(value)?)),
>>> Some((ty, _value)) => Err(format_err!("expected
>>> 'group', 'type' or 'regex' prefix, got '{}'", ty)),
>>> None => Err(format_err!("input doesn't match expected
>>> format '<group:GROUP||type:<vm|ct|host>|regex:REGEX>'")),
>>> - }.map_err(|err| format_err!("'{}' - {}", s, err))
>>> + }?;
>>> + Ok(GroupFilter {
>>> + is_exclude,
>>> + filter_type,
>>> + })
>>> }
>>> }
>>> // used for serializing below, caution!
>>> impl std::fmt::Display for GroupFilter {
>>> fn fmt(&self, f: &mut std::fmt::Formatter<'_>) ->
>>> std::fmt::Result {
>>> - match self {
>>> - GroupFilter::BackupType(backup_type) => write!(f,
>>> "type:{}", backup_type),
>>> - GroupFilter::Group(backup_group) => write!(f,
>>> "group:{}", backup_group),
>>> - GroupFilter::Regex(regex) => write!(f, "regex:{}",
>>> regex.as_str()),
>>> + let exclude = if self.is_exclude { "exclude:" } else { "" };
>>> + match &self.filter_type {
>>> + FilterType::BackupType(backup_type) => write!(f,
>>> "{}type:{}", exclude, backup_type),
>>> + FilterType::Group(backup_group) => write!(f,
>>> "{}group:{}", exclude, backup_group),
>>> + FilterType::Regex(regex) => write!(f, "{}regex:{}",
>>> exclude, regex.as_str()),
>>> }
>>
>> nit: Some of these format string vars can be inlined, so "{}", foo can
>> become "{foo}". Only works for variables though, since the {} cannot
>> contain arbitrary expressions.
>>
>> Also, just FIY (no need to change that if you don't like it), you can
>> also do the following:
>>
>> "{exclude}regex:{regex}", regex = regex.as_str()
>>
>> It's a bit more verbose, but I quite like this style, especially if
>> all other vars can be inlined.
>>
>>> }
>>> }
>>> @@ -440,10 +465,42 @@ fn verify_group_filter(input: &str) ->
>>> Result<(), anyhow::Error> {
>>> GroupFilter::from_str(input).map(|_| ())
>>> }
>>> +pub fn split_by_include_exclude(
>>> + all_filter: Option<Vec<GroupFilter>>,
>>> +) -> (Vec<GroupFilter>, Vec<GroupFilter>) {
>>> + if let Some(all_filter) = all_filter {
>>> + all_filter
>>> + .into_iter()
>>> + .partition(|filter| !filter.is_exclude)
>>> + } else {
>>> + (Vec::new(), Vec::new())
>>> + }
>>> +}
>>
>> ... would not be needed then, see following coment
>>
>>> +
>>> +pub fn apply_filters(
>>> + group: &BackupGroup,
>>> + include_filters: &[GroupFilter],
>>> + exclude_filters: &[GroupFilter],
>>> +) -> bool {
>>> + let mut is_match: bool;
>>> +
>>> + if include_filters.is_empty() {
>>> + is_match = true;
>>> + } else {
>>> + is_match = include_filters.iter().any(|filter|
>>> group.matches(filter));
>>> + }
>>> +
>>> + if !exclude_filters.is_empty() && is_match {
>>> + is_match = !exclude_filters.iter().any(|filter|
>>> group.matches(filter));
>>> + }
>>> +
>>> + is_match
>>> +}
>>> +
>>
>> I prefer to avoid freestanding functions; I think this helper should
>> be a method on the BackupGroup struct and should be named slightly
>> different, e.g.:
>>
>> BackupGroup::matches_filters
>
> I was wondering about that as well, considering to do it that way. I
> think you are right and will change it.
>
>>
>> Also, now that I see the whole code, I think splitting
>> includes/excludes beforehand is actually not really necessary.
>> You could just do a
>> include_filters.iter().filter(|f| !f.is_exclude).any(|f|
>> group.matches(f))
>>
>> and the same thing with inverted filter condition for the excludes.
>
> Hmm... I see what you mean. Howwevr on the down side, this makes the
> code harder to comprehend and may result in more overvhead by
> iterating more often through the list. Ill think about it.
Yeah, but realistically the additional iterations do not matter at all,
since we probably deal with <20 array elements. It's not like this is in
a tight loop or anything :)
>>
>> Also, the `!exclude_filters.is_empty()` is not really needed, since in
>> that case .iter() won't yield any elements anyway ;)
>
> True :)
>>
>>
>>> pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new(
>>> - "Group filter based on group identifier ('group:GROUP'), group
>>> type ('type:<vm|ct|host>'), or regex ('regex:RE').")
>>> + "Group filter based on group identifier ('group:GROUP'), group
>>> type ('type:<vm|ct|host>'), or regex ('regex:RE'). Can be inverted
>>> by adding 'exclude:' before.")
>>> .format(&ApiStringFormat::VerifyFn(verify_group_filter))
>>> - .type_text("<type:<vm|ct|host>|group:GROUP|regex:RE>")
>>> +
>>> .type_text("[<exclude:|include:>]<type:<vm|ct|host>|group:GROUP|regex:RE>")
>>> .schema();
>>> pub const GROUP_FILTER_LIST_SCHEMA: Schema =
>>> diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs
>>> index 2f9385a7..1e2953c4 100644
>>> --- a/src/api2/tape/backup.rs
>>> +++ b/src/api2/tape/backup.rs
>>> @@ -9,9 +9,9 @@ use proxmox_schema::api;
>>> use proxmox_sys::{task_log, task_warn, WorkerTaskContext};
>>> use pbs_api_types::{
>>> - print_ns_and_snapshot, print_store_and_ns, Authid, GroupFilter,
>>> MediaPoolConfig, Operation,
>>> - TapeBackupJobConfig, TapeBackupJobSetup, TapeBackupJobStatus,
>>> Userid, JOB_ID_SCHEMA,
>>> - PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT, PRIV_TAPE_WRITE,
>>> UPID_SCHEMA,
>>> + apply_filters, print_ns_and_snapshot, print_store_and_ns,
>>> split_by_include_exclude, Authid,
>>> + MediaPoolConfig, Operation, TapeBackupJobConfig,
>>> TapeBackupJobSetup, TapeBackupJobStatus,
>>> + Userid, JOB_ID_SCHEMA, PRIV_DATASTORE_READ, PRIV_TAPE_AUDIT,
>>> PRIV_TAPE_WRITE, UPID_SCHEMA,
>>> };
>>> use pbs_config::CachedUserInfo;
>>> @@ -411,31 +411,23 @@ fn backup_worker(
>>> group_list.sort_unstable_by(|a, b| a.group().cmp(b.group()));
>>> - let (group_list, group_count) = if let Some(group_filters) =
>>> &setup.group_filter {
>>> - let filter_fn = |group: &BackupGroup, group_filters:
>>> &[GroupFilter]| {
>>> - group_filters.iter().any(|filter| group.matches(filter))
>>> - };
>>> + let (include_filter, exclude_filter) =
>>> split_by_include_exclude(setup.group_filter.clone());
>>> - let group_count_full = group_list.len();
>>> - let list: Vec<BackupGroup> = group_list
>>> - .into_iter()
>>> - .filter(|group| filter_fn(group, group_filters))
>>> - .collect();
>>> - let group_count = list.len();
>>> - task_log!(
>>> - worker,
>>> - "found {} groups (out of {} total)",
>>> - group_count,
>>> - group_count_full
>>> - );
>>> - (list, group_count)
>>> - } else {
>>> - let group_count = group_list.len();
>>> - task_log!(worker, "found {} groups", group_count);
>>> - (group_list, group_count)
>>> - };
>>> + let group_count_full = group_list.len();
>>> + // if there are only exclude filter, inculude everything
>>> + let group_list: Vec<BackupGroup> = group_list
>>> + .into_iter()
>>> + .filter(|group| apply_filters(group.group(),
>>> &include_filter, &exclude_filter))
>>> + .collect();
>>> +
>>> + task_log!(
>>> + worker,
>>> + "found {} groups (out of {} total)",
>>> + group_list.len(),
>>> + group_count_full
>>> + );
>>> - let mut progress = StoreProgress::new(group_count as u64);
>>> + let mut progress = StoreProgress::new(group_list.len() as u64);
>>> let latest_only = setup.latest_only.unwrap_or(false);
>>> diff --git a/src/server/pull.rs b/src/server/pull.rs
>>> index 3b71c156..e458008b 100644
>>> --- a/src/server/pull.rs
>>> +++ b/src/server/pull.rs
>>> @@ -15,9 +15,10 @@ use proxmox_sys::{task_log, task_warn};
>>> use serde_json::json;
>>> use pbs_api_types::{
>>> - print_store_and_ns, Authid, BackupDir, BackupGroup,
>>> BackupNamespace, CryptMode, GroupFilter,
>>> - GroupListItem, Operation, RateLimitConfig, Remote,
>>> SnapshotListItem, MAX_NAMESPACE_DEPTH,
>>> - PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_READ,
>>> + apply_filters, print_store_and_ns, split_by_include_exclude,
>>> Authid, BackupDir, BackupGroup,
>>> + BackupNamespace, CryptMode, GroupFilter, GroupListItem,
>>> Operation, RateLimitConfig, Remote,
>>> + SnapshotListItem, MAX_NAMESPACE_DEPTH, PRIV_DATASTORE_AUDIT,
>>> PRIV_DATASTORE_BACKUP,
>>> + PRIV_DATASTORE_READ,
>>> };
>>> use pbs_client::{BackupReader, BackupRepository, HttpClient,
>>> RemoteChunkReader};
>>> use pbs_config::CachedUserInfo;
>>> @@ -486,7 +487,8 @@ pub(crate) struct PullParameters {
>>> /// How many levels of sub-namespaces to pull (0 == no
>>> recursion, None == maximum recursion)
>>> max_depth: Option<usize>,
>>> /// Filters for reducing the pull scope
>>> - group_filter: Option<Vec<GroupFilter>>,
>>> + include_filter: Vec<GroupFilter>,
>>> + exclude_filter: Vec<GroupFilter>,
>>
>> Nit: better add a second doc comment for exclude_filter, otherwise
>> `cargo doc` won't pick that up
>
> Do I understand correctly? If I follow your earlier suggesting and
> filter inplace, this parameter gets obsolete?
Ah yes, sorry for the slight confusion, I wrote this comment before the
other one. This parameter would indeed become obsolete then.
--
- Lukas
More information about the pbs-devel
mailing list