[pbs-devel] [PATCH v2 proxmox-backup 18/20] pxar/extract: add sequential variant to extract_sub_dir

Stefan Reiter s.reiter at proxmox.com
Wed Mar 24 16:18:25 CET 2021

For streaming pxar files directly from a restore source and extracting
them on the fly, we cannot create an Accessor, and instead have to live
with a sequential Decoder. This supports only the aio::Decoder variant,
since the functions are async anyway.

The original functionality remains in place, the new function is
labelled with a _seq suffix. The recursive function actually doing the
work is changed to take an EitherEntry enum variant that can contain
either an Accessor (recursive operation) or a Decoder (linear

If the seq_ variant is given an encoder where the current position
points to a file, it will only extract/encode this file, if it's a
directory, it will instead extract until it leaves the directory it
started in.

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
 src/pxar/extract.rs | 214 +++++++++++++++++++++++++++++++++-----------
 src/pxar/mod.rs     |   5 +-
 2 files changed, 164 insertions(+), 55 deletions(-)

diff --git a/src/pxar/extract.rs b/src/pxar/extract.rs
index 952e2d20..4370898c 100644
--- a/src/pxar/extract.rs
+++ b/src/pxar/extract.rs
@@ -17,8 +17,9 @@ use nix::sys::stat::Mode;
 use pathpatterns::{MatchEntry, MatchList, MatchType};
 use pxar::format::Device;
-use pxar::Metadata;
+use pxar::{Entry, Metadata, EntryKind};
 use pxar::accessor::aio::{Accessor, FileContents, FileEntry};
+use pxar::decoder::aio::Decoder;
 use proxmox::c_result;
 use proxmox::tools::{
@@ -93,8 +94,6 @@ where
     let mut err_path_stack = vec![OsString::from("/")];
     let mut current_match = options.extract_match_default;
     while let Some(entry) = decoder.next() {
-        use pxar::EntryKind;
         let entry = entry.map_err(|err| format_err!("error reading pxar archive: {}", err))?;
         let file_name_os = entry.file_name();
@@ -552,7 +551,6 @@ where
     T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
     W: tokio::io::AsyncWrite + Unpin + Send + 'static,
-    use pxar::EntryKind;
     Box::pin(async move {
         let metadata = file.entry().metadata();
         let path = file.entry().path().strip_prefix(&prefix)?.to_path_buf();
@@ -612,6 +610,57 @@ where
+enum EitherEntry<
+    'a,
+    S: pxar::decoder::SeqRead + Unpin + Send + 'static,
+    T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
+> {
+    Entry(Entry, &'a mut Decoder<S>),
+    FileEntry(FileEntry<T>, &'a mut Accessor<T>),
+// These types are never constructed, but we need some concrete type fulfilling S and T from
+// EitherEntry so rust is happy with its use in async fns
+type BogusSeqRead = pxar::decoder::sync::StandardReader<std::io::Empty>;
+type BogusReadAt = pxar::accessor::sync::FileRefReader<Arc<std::fs::File>>;
+fn get_extractor<DEST>(destination: DEST, metadata: Metadata) -> Result<Extractor, Error>
+    DEST: AsRef<Path>
+    create_path(
+        &destination,
+        None,
+        Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))),
+    )
+    .map_err(|err| {
+        format_err!(
+            "error creating directory {:?}: {}",
+            destination.as_ref(),
+            err
+        )
+    })?;
+    let dir = Dir::open(
+        destination.as_ref(),
+        OFlag::O_DIRECTORY | OFlag::O_CLOEXEC,
+        Mode::empty(),
+    )
+    .map_err(|err| {
+        format_err!(
+            "unable to open target directory {:?}: {}",
+            destination.as_ref(),
+            err,
+        )
+    })?;
+    Ok(Extractor::new(
+        dir,
+        metadata,
+        false,
+        Flags::DEFAULT,
+    ))
 pub async fn extract_sub_dir<T, DEST, PATH>(
     destination: DEST,
@@ -626,47 +675,83 @@ where
     let root = decoder.open_root().await?;
-    create_path(
-        &destination,
-        None,
-        Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))),
-    )
-    .map_err(|err| format_err!("error creating directory {:?}: {}", destination.as_ref(), err))?;
-    let dir = Dir::open(
-        destination.as_ref(),
-        OFlag::O_DIRECTORY | OFlag::O_CLOEXEC,
-        Mode::empty(),
-    )
-    .map_err(|err| format_err!("unable to open target directory {:?}: {}", destination.as_ref(), err,))?;
-    let mut extractor =  Extractor::new(
-        dir,
+    let mut extractor = get_extractor(
+        destination,
-        false,
-        Flags::DEFAULT,
-    );
+    )?;
     let file = root
-        .lookup(&path).await?
-        .ok_or(format_err!("error opening '{:?}'", path.as_ref()))?;
+        .lookup(&path)
+        .await?
+        .ok_or_else(|| format_err!("error opening '{:?}'", path.as_ref()))?;
-    recurse_files_extractor(&mut extractor, &mut decoder, file, verbose).await
+    let entry: EitherEntry<BogusSeqRead, T> = EitherEntry::FileEntry(file, &mut decoder);
+    do_extract_sub_dir(&mut extractor, entry, verbose).await
-fn recurse_files_extractor<'a, T>(
+pub async fn extract_sub_dir_seq<S, DEST>(
+    destination: DEST,
+    mut decoder: Decoder<S>,
+    verbose: bool,
+) -> Result<(), Error>
+    S: pxar::decoder::SeqRead + Unpin + Send + 'static,
+    DEST: AsRef<Path>,
+    decoder.enable_goodbye_entries(true);
+    let root = match decoder.next().await {
+        Some(Ok(root)) => root,
+        Some(Err(err)) => bail!("error getting root entry from pxar: {}", err),
+        None => bail!("cannot extract empty archive"),
+    };
+    let mut extractor = get_extractor(destination, root.metadata().clone())?;
+    let mut dir_level = 0;
+    let mut next = Some(Ok(root));
+    while let Some(file) = next {
+        match file {
+            Ok(file) => {
+                match file.kind() {
+                    EntryKind::Directory => dir_level += 1,
+                    EntryKind::GoodbyeTable => dir_level -= 1,
+                    _ => {}
+                }
+                let path = file.path().to_owned();
+                let entry: EitherEntry<S, BogusReadAt> = EitherEntry::Entry(file, &mut decoder);
+                if let Err(err) = do_extract_sub_dir(&mut extractor, entry, verbose).await {
+                    eprintln!("error extracting {}: {}", path.display(), err);
+                }
+                // only extract until we leave the directory we started in,
+                // and exit immediately if 'root' is a file and not a folder
+                if dir_level == 0 {
+                    break;
+                }
+            }
+            Err(err) => bail!("error in decoder: {}", err),
+        }
+        next = decoder.next().await;
+    }
+    Ok(())
+fn do_extract_sub_dir<'a, S, T>(
     extractor: &'a mut Extractor,
-    decoder: &'a mut Accessor<T>,
-    file: FileEntry<T>,
+    file: EitherEntry<'a, S, T>,
     verbose: bool,
 ) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>>
+    S: pxar::decoder::SeqRead + Unpin + Send,
     T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
-    use pxar::EntryKind;
     Box::pin(async move {
-        let metadata = file.entry().metadata();
-        let file_name_os = file.file_name();
+        let (metadata, file_name_os, path, kind) = match file {
+            EitherEntry::Entry(ref e, _) => (e.metadata(), e.file_name(), e.path(), e.kind()),
+            EitherEntry::FileEntry(ref fe, _) => (fe.metadata(), fe.file_name(), fe.path(), fe.kind()),
+        };
         // safety check: a file entry in an archive must never contain slashes:
         if file_name_os.as_bytes().contains(&b'/') {
@@ -676,28 +761,32 @@ where
         let file_name = CString::new(file_name_os.as_bytes())
             .map_err(|_| format_err!("encountered file name with null-bytes"))?;
-        if verbose {
-            eprintln!("extracting: {}", file.path().display());
+        if verbose && !matches!(kind, EntryKind::GoodbyeTable) {
+            eprintln!("extracting: {}", path.display());
-        match file.kind() {
+        match kind {
             EntryKind::Directory => {
                     .enter_directory(file_name_os.to_owned(), metadata.clone(), true)
                     .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?;
-                let dir = file.enter_directory().await?;
-                let mut readdir = dir.read_dir();
-                while let Some(entry) = readdir.next().await {
-                    let entry = entry?.decode_entry().await?;
-                    let filename = entry.path().to_path_buf();
+                // for EitherEntry::Entry we detect directory end with GoodbyeTable
+                if let EitherEntry::FileEntry(file, a) = file {
+                    let dir = file.enter_directory().await?;
+                    let mut readdir = dir.read_dir();
+                    while let Some(entry) = readdir.next().await {
+                        let entry = entry?.decode_entry().await?;
+                        let filename = entry.path().to_path_buf();
-                    // log errors and continue
-                    if let Err(err) = recurse_files_extractor(extractor, decoder, entry, verbose).await {
-                        eprintln!("error extracting {:?}: {}", filename.display(), err);
+                        // log errors and continue
+                        let entry: EitherEntry<BogusSeqRead, T> = EitherEntry::FileEntry(entry, a);
+                        if let Err(err) = do_extract_sub_dir(extractor, entry, verbose).await {
+                            eprintln!("error extracting {}: {}", filename.display(), err);
+                        }
+                    extractor.leave_directory()?;
-                extractor.leave_directory()?;
             EntryKind::Symlink(link) => {
                 extractor.extract_symlink(&file_name, metadata, link.as_ref())?;
@@ -720,17 +809,34 @@ where
                     extractor.extract_special(&file_name, metadata, 0)?;
-            EntryKind::File { size, .. } => extractor.async_extract_file(
-                &file_name,
-                metadata,
-                *size,
-                &mut file.contents().await.map_err(|_| {
-                    format_err!("found regular file entry without contents in archive")
-                })?,
-            ).await?,
-            EntryKind::GoodbyeTable => {}, // ignore
+            EntryKind::File { size, .. } => {
+                extractor
+                    .async_extract_file(
+                        &file_name,
+                        metadata,
+                        *size,
+                        &mut match file {
+                            EitherEntry::Entry(_, dec) => Box::new(match dec.contents() {
+                                Some(con) => con,
+                                None => bail!("file without contents found"),
+                            })
+                                as Box<dyn tokio::io::AsyncRead + Unpin + Send>,
+                            EitherEntry::FileEntry(ref fe, _) => {
+                                Box::new(fe.contents().await.map_err(|err| {
+                                    format_err!("file with bad contents found: {}", err)
+                                })?)
+                                    as Box<dyn tokio::io::AsyncRead + Unpin + Send>
+                            }
+                        },
+                    )
+                    .await?
+            }
+            EntryKind::GoodbyeTable => {
+                if let EitherEntry::Entry(_, _) = file {
+                    extractor.leave_directory()?;
+                }
+            }
diff --git a/src/pxar/mod.rs b/src/pxar/mod.rs
index d1302962..0045effc 100644
--- a/src/pxar/mod.rs
+++ b/src/pxar/mod.rs
@@ -59,7 +59,10 @@ mod flags;
 pub use flags::Flags;
 pub use create::{create_archive, PxarCreateOptions};
-pub use extract::{create_zip, extract_archive, extract_sub_dir, ErrorHandler, PxarExtractOptions};
+pub use extract::{
+    create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq,
+    ErrorHandler, PxarExtractOptions,
 /// The format requires to build sorted directory lookup tables in
 /// memory, so we restrict the number of allowed entries to limit

More information about the pbs-devel mailing list