[pbs-devel] [PATCH v2 proxmox-backup 18/20] pxar/extract: add sequential variant to extract_sub_dir
Stefan Reiter
s.reiter at proxmox.com
Wed Mar 24 16:18:25 CET 2021
For streaming pxar files directly from a restore source and extracting
them on the fly, we cannot create an Accessor, and instead have to live
with a sequential Decoder. This supports only the aio::Decoder variant,
since the functions are async anyway.
The original functionality remains in place, the new function is
labelled with a _seq suffix. The recursive function actually doing the
work is changed to take an EitherEntry enum variant that can contain
either an Accessor (recursive operation) or a Decoder (linear
operation).
If the seq_ variant is given an encoder where the current position
points to a file, it will only extract/encode this file, if it's a
directory, it will instead extract until it leaves the directory it
started in.
Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
src/pxar/extract.rs | 214 +++++++++++++++++++++++++++++++++-----------
src/pxar/mod.rs | 5 +-
2 files changed, 164 insertions(+), 55 deletions(-)
diff --git a/src/pxar/extract.rs b/src/pxar/extract.rs
index 952e2d20..4370898c 100644
--- a/src/pxar/extract.rs
+++ b/src/pxar/extract.rs
@@ -17,8 +17,9 @@ use nix::sys::stat::Mode;
use pathpatterns::{MatchEntry, MatchList, MatchType};
use pxar::format::Device;
-use pxar::Metadata;
+use pxar::{Entry, Metadata, EntryKind};
use pxar::accessor::aio::{Accessor, FileContents, FileEntry};
+use pxar::decoder::aio::Decoder;
use proxmox::c_result;
use proxmox::tools::{
@@ -93,8 +94,6 @@ where
let mut err_path_stack = vec![OsString::from("/")];
let mut current_match = options.extract_match_default;
while let Some(entry) = decoder.next() {
- use pxar::EntryKind;
-
let entry = entry.map_err(|err| format_err!("error reading pxar archive: {}", err))?;
let file_name_os = entry.file_name();
@@ -552,7 +551,6 @@ where
T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
W: tokio::io::AsyncWrite + Unpin + Send + 'static,
{
- use pxar::EntryKind;
Box::pin(async move {
let metadata = file.entry().metadata();
let path = file.entry().path().strip_prefix(&prefix)?.to_path_buf();
@@ -612,6 +610,57 @@ where
})
}
+enum EitherEntry<
+ 'a,
+ S: pxar::decoder::SeqRead + Unpin + Send + 'static,
+ T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
+> {
+ Entry(Entry, &'a mut Decoder<S>),
+ FileEntry(FileEntry<T>, &'a mut Accessor<T>),
+}
+
+// These types are never constructed, but we need some concrete type fulfilling S and T from
+// EitherEntry so rust is happy with its use in async fns
+type BogusSeqRead = pxar::decoder::sync::StandardReader<std::io::Empty>;
+type BogusReadAt = pxar::accessor::sync::FileRefReader<Arc<std::fs::File>>;
+
+fn get_extractor<DEST>(destination: DEST, metadata: Metadata) -> Result<Extractor, Error>
+where
+ DEST: AsRef<Path>
+{
+ create_path(
+ &destination,
+ None,
+ Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))),
+ )
+ .map_err(|err| {
+ format_err!(
+ "error creating directory {:?}: {}",
+ destination.as_ref(),
+ err
+ )
+ })?;
+
+ let dir = Dir::open(
+ destination.as_ref(),
+ OFlag::O_DIRECTORY | OFlag::O_CLOEXEC,
+ Mode::empty(),
+ )
+ .map_err(|err| {
+ format_err!(
+ "unable to open target directory {:?}: {}",
+ destination.as_ref(),
+ err,
+ )
+ })?;
+
+ Ok(Extractor::new(
+ dir,
+ metadata,
+ false,
+ Flags::DEFAULT,
+ ))
+}
pub async fn extract_sub_dir<T, DEST, PATH>(
destination: DEST,
@@ -626,47 +675,83 @@ where
{
let root = decoder.open_root().await?;
- create_path(
- &destination,
- None,
- Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))),
- )
- .map_err(|err| format_err!("error creating directory {:?}: {}", destination.as_ref(), err))?;
-
- let dir = Dir::open(
- destination.as_ref(),
- OFlag::O_DIRECTORY | OFlag::O_CLOEXEC,
- Mode::empty(),
- )
- .map_err(|err| format_err!("unable to open target directory {:?}: {}", destination.as_ref(), err,))?;
-
- let mut extractor = Extractor::new(
- dir,
+ let mut extractor = get_extractor(
+ destination,
root.lookup_self().await?.entry().metadata().clone(),
- false,
- Flags::DEFAULT,
- );
+ )?;
let file = root
- .lookup(&path).await?
- .ok_or(format_err!("error opening '{:?}'", path.as_ref()))?;
+ .lookup(&path)
+ .await?
+ .ok_or_else(|| format_err!("error opening '{:?}'", path.as_ref()))?;
- recurse_files_extractor(&mut extractor, &mut decoder, file, verbose).await
+ let entry: EitherEntry<BogusSeqRead, T> = EitherEntry::FileEntry(file, &mut decoder);
+ do_extract_sub_dir(&mut extractor, entry, verbose).await
}
-fn recurse_files_extractor<'a, T>(
+pub async fn extract_sub_dir_seq<S, DEST>(
+ destination: DEST,
+ mut decoder: Decoder<S>,
+ verbose: bool,
+) -> Result<(), Error>
+where
+ S: pxar::decoder::SeqRead + Unpin + Send + 'static,
+ DEST: AsRef<Path>,
+{
+ decoder.enable_goodbye_entries(true);
+ let root = match decoder.next().await {
+ Some(Ok(root)) => root,
+ Some(Err(err)) => bail!("error getting root entry from pxar: {}", err),
+ None => bail!("cannot extract empty archive"),
+ };
+
+ let mut extractor = get_extractor(destination, root.metadata().clone())?;
+ let mut dir_level = 0;
+
+ let mut next = Some(Ok(root));
+ while let Some(file) = next {
+ match file {
+ Ok(file) => {
+ match file.kind() {
+ EntryKind::Directory => dir_level += 1,
+ EntryKind::GoodbyeTable => dir_level -= 1,
+ _ => {}
+ }
+
+ let path = file.path().to_owned();
+ let entry: EitherEntry<S, BogusReadAt> = EitherEntry::Entry(file, &mut decoder);
+ if let Err(err) = do_extract_sub_dir(&mut extractor, entry, verbose).await {
+ eprintln!("error extracting {}: {}", path.display(), err);
+ }
+
+ // only extract until we leave the directory we started in,
+ // and exit immediately if 'root' is a file and not a folder
+ if dir_level == 0 {
+ break;
+ }
+ }
+ Err(err) => bail!("error in decoder: {}", err),
+ }
+ next = decoder.next().await;
+ }
+
+ Ok(())
+}
+
+fn do_extract_sub_dir<'a, S, T>(
extractor: &'a mut Extractor,
- decoder: &'a mut Accessor<T>,
- file: FileEntry<T>,
+ file: EitherEntry<'a, S, T>,
verbose: bool,
) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>>
where
+ S: pxar::decoder::SeqRead + Unpin + Send,
T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
{
- use pxar::EntryKind;
Box::pin(async move {
- let metadata = file.entry().metadata();
- let file_name_os = file.file_name();
+ let (metadata, file_name_os, path, kind) = match file {
+ EitherEntry::Entry(ref e, _) => (e.metadata(), e.file_name(), e.path(), e.kind()),
+ EitherEntry::FileEntry(ref fe, _) => (fe.metadata(), fe.file_name(), fe.path(), fe.kind()),
+ };
// safety check: a file entry in an archive must never contain slashes:
if file_name_os.as_bytes().contains(&b'/') {
@@ -676,28 +761,32 @@ where
let file_name = CString::new(file_name_os.as_bytes())
.map_err(|_| format_err!("encountered file name with null-bytes"))?;
- if verbose {
- eprintln!("extracting: {}", file.path().display());
+ if verbose && !matches!(kind, EntryKind::GoodbyeTable) {
+ eprintln!("extracting: {}", path.display());
}
- match file.kind() {
+ match kind {
EntryKind::Directory => {
extractor
.enter_directory(file_name_os.to_owned(), metadata.clone(), true)
.map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?;
- let dir = file.enter_directory().await?;
- let mut readdir = dir.read_dir();
- while let Some(entry) = readdir.next().await {
- let entry = entry?.decode_entry().await?;
- let filename = entry.path().to_path_buf();
+ // for EitherEntry::Entry we detect directory end with GoodbyeTable
+ if let EitherEntry::FileEntry(file, a) = file {
+ let dir = file.enter_directory().await?;
+ let mut readdir = dir.read_dir();
+ while let Some(entry) = readdir.next().await {
+ let entry = entry?.decode_entry().await?;
+ let filename = entry.path().to_path_buf();
- // log errors and continue
- if let Err(err) = recurse_files_extractor(extractor, decoder, entry, verbose).await {
- eprintln!("error extracting {:?}: {}", filename.display(), err);
+ // log errors and continue
+ let entry: EitherEntry<BogusSeqRead, T> = EitherEntry::FileEntry(entry, a);
+ if let Err(err) = do_extract_sub_dir(extractor, entry, verbose).await {
+ eprintln!("error extracting {}: {}", filename.display(), err);
+ }
}
+ extractor.leave_directory()?;
}
- extractor.leave_directory()?;
}
EntryKind::Symlink(link) => {
extractor.extract_symlink(&file_name, metadata, link.as_ref())?;
@@ -720,17 +809,34 @@ where
extractor.extract_special(&file_name, metadata, 0)?;
}
}
- EntryKind::File { size, .. } => extractor.async_extract_file(
- &file_name,
- metadata,
- *size,
- &mut file.contents().await.map_err(|_| {
- format_err!("found regular file entry without contents in archive")
- })?,
- ).await?,
- EntryKind::GoodbyeTable => {}, // ignore
+ EntryKind::File { size, .. } => {
+ extractor
+ .async_extract_file(
+ &file_name,
+ metadata,
+ *size,
+ &mut match file {
+ EitherEntry::Entry(_, dec) => Box::new(match dec.contents() {
+ Some(con) => con,
+ None => bail!("file without contents found"),
+ })
+ as Box<dyn tokio::io::AsyncRead + Unpin + Send>,
+ EitherEntry::FileEntry(ref fe, _) => {
+ Box::new(fe.contents().await.map_err(|err| {
+ format_err!("file with bad contents found: {}", err)
+ })?)
+ as Box<dyn tokio::io::AsyncRead + Unpin + Send>
+ }
+ },
+ )
+ .await?
+ }
+ EntryKind::GoodbyeTable => {
+ if let EitherEntry::Entry(_, _) = file {
+ extractor.leave_directory()?;
+ }
+ }
}
Ok(())
})
}
-
diff --git a/src/pxar/mod.rs b/src/pxar/mod.rs
index d1302962..0045effc 100644
--- a/src/pxar/mod.rs
+++ b/src/pxar/mod.rs
@@ -59,7 +59,10 @@ mod flags;
pub use flags::Flags;
pub use create::{create_archive, PxarCreateOptions};
-pub use extract::{create_zip, extract_archive, extract_sub_dir, ErrorHandler, PxarExtractOptions};
+pub use extract::{
+ create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq,
+ ErrorHandler, PxarExtractOptions,
+};
/// The format requires to build sorted directory lookup tables in
/// memory, so we restrict the number of allowed entries to limit
--
2.20.1
More information about the pbs-devel
mailing list