[pbs-devel] [PATCH proxmox-backup v6] fix #4380: check if file is excluded before running `stat()`

Gabriel Goller g.goller at proxmox.com
Tue Aug 22 16:07:33 CEST 2023


Submitted a new version.

On 8/22/23 15:04, Wolfgang Bumiller wrote:
> On Mon, Aug 21, 2023 at 03:08:26PM +0200, Gabriel Goller wrote:
>> Passed a closure with the `stat()` function call to `matches()`. This
>> will traverse through all patterns and try to match using the path only, if a
>> `file_mode` is needed, it will run the closure. This means that if we exclude
>> a file with the `MatchType::ANY_FILE_TYPE`, we will skip it without running
>> `stat()` on it. As we updated the `matches()` function, we also updated all the
>> invocations of it.
>> Added `pathpatterns` crate to local overrides in cargo.toml.
>>
>> Signed-off-by: Gabriel Goller <g.goller at proxmox.com>
>> ---
>>
>> changes v5:
>>   - updated all invocations of `matches()`
>>
>> changes v4:
>>   - match only by path and exclude the matched files, the run `stat()` and
>>     match again, this time using the `file_mode`. This will match everything
>>     twice in the worst case, which is not optimal.
>> changes v3:
>>   - checking for `read` and `execute` permissions before entering directory,
>>     doesn't work because there are a lot of side-effects (executed by
>>     different user, AppArmor, SELinux, ...).
>> changes v2:
>>   - checking for excluded files with `matches()` before executing `stat()`,
>>     this doesn't work because we get the file_mode from `stat()` and don't
>>     want to ignore it when matching.
>>
>>
>>   Cargo.toml                      |  5 +++--
>>   pbs-client/src/catalog_shell.rs |  8 +++----
>>   pbs-client/src/pxar/create.rs   | 38 ++++++++++++++++++++-------------
>>   pbs-client/src/pxar/extract.rs  | 10 +++++----
>>   pbs-datastore/src/catalog.rs    |  6 +++---
>>   5 files changed, 39 insertions(+), 28 deletions(-)
>>
>> diff --git a/Cargo.toml b/Cargo.toml
>> index 5cbae1b8..560794a4 100644
>> --- a/Cargo.toml
>> +++ b/Cargo.toml
>> @@ -264,8 +264,9 @@ proxmox-rrd.workspace = true
>>   #proxmox-sortable-macro = { path = "../proxmox/proxmox-sortable-macro" }
>>   #proxmox-human-byte = { path = "../proxmox/proxmox-human-byte" }
>>   
>> -#proxmox-apt = { path = "../proxmox/proxmox-apt" }
>> -#proxmox-openid = { path = "../proxmox/proxmox-openid" }
>> +#proxmox-apt = { path = "../proxmox-apt" }
>> +#proxmox-openid = { path = "../proxmox-openid-rs" }
>> +#pathpatterns = {path = "../pathpatterns" }
>>   
>>   #pxar = { path = "../pxar" }
>>   
>> diff --git a/pbs-client/src/catalog_shell.rs b/pbs-client/src/catalog_shell.rs
>> index b8aaf8cb..f53b3cc5 100644
>> --- a/pbs-client/src/catalog_shell.rs
>> +++ b/pbs-client/src/catalog_shell.rs
>> @@ -1138,14 +1138,14 @@ impl<'a> ExtractorState<'a> {
>>       pub async fn handle_entry(&mut self, entry: catalog::DirEntry) -> Result<(), Error> {
>>           let match_result = self.match_list.matches(&self.path, entry.get_file_mode());
>>           let did_match = match match_result {
>> -            Some(MatchType::Include) => true,
>> -            Some(MatchType::Exclude) => false,
>> -            None => self.matches,
>> +            Ok(Some(MatchType::Include)) => true,
>> +            Ok(Some(MatchType::Exclude)) => false,
>> +            _ => self.matches,
>>           };
>>   
>>           match (did_match, &entry.attr) {
>>               (_, DirEntryAttribute::Directory { .. }) => {
>> -                self.handle_new_directory(entry, match_result).await?;
>> +                self.handle_new_directory(entry, match_result?).await?;
>>               }
>>               (true, DirEntryAttribute::File { .. }) => {
>>                   self.dir_stack.push(PathStackEntry::new(entry));
>> diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
>> index 2577cf98..2d516cfa 100644
>> --- a/pbs-client/src/pxar/create.rs
>> +++ b/pbs-client/src/pxar/create.rs
>> @@ -21,7 +21,6 @@ use pxar::Metadata;
>>   
>>   use proxmox_io::vec;
>>   use proxmox_lang::c_str;
>> -use proxmox_sys::error::SysError;
>>   use proxmox_sys::fs::{self, acl, xattr};
>>   
>>   use pbs_datastore::catalog::BackupCatalogWriter;
>> @@ -420,7 +419,7 @@ impl Archiver {
>>           for file in dir.iter() {
>>               let file = file?;
>>   
>> -            let file_name = file.file_name().to_owned();
>> +            let file_name = file.file_name();
>>               let file_name_bytes = file_name.to_bytes();
>>               if file_name_bytes == b"." || file_name_bytes == b".." {
>>                   continue;
>> @@ -434,25 +433,34 @@ impl Archiver {
>>               assert_single_path_component(os_file_name)?;
>>               let full_path = self.path.join(os_file_name);
>>   
>> -            let stat = match nix::sys::stat::fstatat(
>> +            let match_path = PathBuf::from("/").join(full_path.clone());
>> +
>> +            let mut stat_results: Option<FileStat> = None;
>> +
>> +            let get_file_mode = || match nix::sys::stat::fstatat(
> You don't need that 'match' here if your cases are basically a no-op.
>
> You *could* attach the `stat failed on ...` context with the file name
> here... but this will make it a bit more tedious to check for `ENOENT`
> as we'd need to use `.downcast()` on the anyhow::Error.
>
> So either that, or we'll need to duplicate the context line down below
> where we do the final `let stat = results.unwrap_or_else(get_file_mode)?`.
>
>>                   dir_fd,
>> -                file_name.as_c_str(),
>> +                file_name.to_owned().as_c_str(),
>>                   nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW,
>>               ) {
>> -                Ok(stat) => stat,
>> -                Err(ref err) if err.not_found() => continue,
> Because the thing is, we still need to handle this case - just noticed
> that this was removed entirely, which is of course not what we want :-)
> If the file gets removed between listing it from the directory and us
> trying to `stat` it, we should just act as if it had never existed.
>
>> -                Err(err) => return Err(err).context(format!("stat failed on {:?}", full_path)),
>> +                Ok(stat) => Ok(stat),
>> +                Err(e) => Err(e),
>>               };
>> -
>> -            let match_path = PathBuf::from("/").join(full_path.clone());
>> -            if self
>> -                .patterns
>> -                .matches(match_path.as_os_str().as_bytes(), Some(stat.st_mode))
>> -                == Some(MatchType::Exclude)
>> +            if Some(MatchType::Exclude)
>> +                == self
>> +                    .patterns
>> +                    .matches(match_path.as_os_str().as_bytes(), || {
>> +                        Ok::<_, Errno>(match &stat_results {
>> +                            Some(result) => result.st_mode,
>> +                            None => stat_results.insert(get_file_mode()?).st_mode,
>> +                        })
>> +                    })
> ... it will instead need to be checked here ^
> Basically, we will need to swap the `if` for a `match` after all, with
> the cases
>      Ok(Some(MatchType::Exclude)) => continue,
>      Ok(_) => (),
>      Err(err) if err.not_found() => continue,
>      err => return err.with_context(...),
> or the final 2 cases when using *not* not changing the `get_file_mode`
> error to `anyhow::Error` would be:
>      Err(err) => match err.downcast::<io::Error>() {
>          Some(err) if err.not_found() => continue,
>          _ => return Err(err),
>      }
>
>
> I hope I didn't miss anything now.
>
>> +                    .with_context(|| format!("stat failed on {full_path:?}"))?
>>               {
>>                   continue;
>>               }
>>   
>> +            let stat = stat_results.map(Ok).unwrap_or_else(get_file_mode)?;
> If the context line is not already in the closure, we'd need to
> duplicate it here.
>
>> +
>>               self.entry_counter += 1;
>>               if self.entry_counter > self.entry_limit {
>>                   bail!(
>> @@ -462,7 +470,7 @@ impl Archiver {
>>               }
>>   
>>               file_list.push(FileListEntry {
>> -                name: file_name,
>> +                name: file_name.to_owned(),
>>                   path: full_path,
>>                   stat,
>>               });
>> @@ -533,7 +541,7 @@ impl Archiver {
>>           let match_path = PathBuf::from("/").join(self.path.clone());
>>           if self
>>               .patterns
>> -            .matches(match_path.as_os_str().as_bytes(), Some(stat.st_mode))
>> +            .matches(match_path.as_os_str().as_bytes(), stat.st_mode)?
>>               == Some(MatchType::Exclude)
>>           {
>>               return Ok(());
>> diff --git a/pbs-client/src/pxar/extract.rs b/pbs-client/src/pxar/extract.rs
>> index 4eb6fb90..e24a1560 100644
>> --- a/pbs-client/src/pxar/extract.rs
>> +++ b/pbs-client/src/pxar/extract.rs
>> @@ -251,22 +251,24 @@ where
>>   
>>           self.extractor.set_path(entry.path().as_os_str().to_owned());
>>   
>> +        // We can `unwrap()` safely here because we get a `Result<_, std::convert::Infallible>`
>>           let match_result = self.match_list.matches(
>>               entry.path().as_os_str().as_bytes(),
>> -            Some(metadata.file_type() as u32),
>> -        );
>> +            metadata.file_type() as u32
>> +        ).unwrap();
>>   
>>           let did_match = match match_result {
>>               Some(MatchType::Include) => true,
>>               Some(MatchType::Exclude) => false,
>> -            None => self.state.current_match,
>> +            _ => self.state.current_match,
>>           };
>>   
>>           let extract_res = match (did_match, entry.kind()) {
>>               (_, EntryKind::Directory) => {
>>                   self.callback(entry.path());
>>   
>> -                let create = self.state.current_match && match_result != Some(MatchType::Exclude);
>> +                let create =
>> +                    self.state.current_match && match_result != Some(MatchType::Exclude);
>>                   let res = self
>>                       .extractor
>>                       .enter_directory(file_name_os.to_owned(), metadata.clone(), create)
>> diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs
>> index 11c14b64..86e20c92 100644
>> --- a/pbs-datastore/src/catalog.rs
>> +++ b/pbs-datastore/src/catalog.rs
>> @@ -678,9 +678,9 @@ impl<R: Read + Seek> CatalogReader<R> {
>>               }
>>               file_path.extend(&e.name);
>>               match match_list.matches(&file_path, e.get_file_mode()) {
>> -                Some(MatchType::Exclude) => continue,
>> -                Some(MatchType::Include) => callback(file_path)?,
>> -                None => (),
>> +                Ok(Some(MatchType::Exclude)) => continue,
>> +                Ok(Some(MatchType::Include)) => callback(file_path)?,
>> +                _ => (),





More information about the pbs-devel mailing list