[pbs-devel] [PATCH v3 proxmox-backup 18/20] pxar/extract: add sequential variant of extract_sub_dir
Stefan Reiter
s.reiter at proxmox.com
Wed Mar 31 12:22:00 CEST 2021
extract_sub_dir_seq, together with seq_files_extractor, allow extracting
files from a pxar Decoder, along with the existing option for an
Accessor. To facilitate code re-use, some helper functions are extracted
in the process.
Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
v3:
* basically a do-over, no more bogus types
src/pxar/extract.rs | 316 ++++++++++++++++++++++++++++++--------------
src/pxar/mod.rs | 5 +-
2 files changed, 224 insertions(+), 97 deletions(-)
diff --git a/src/pxar/extract.rs b/src/pxar/extract.rs
index 952e2d20..8f85c441 100644
--- a/src/pxar/extract.rs
+++ b/src/pxar/extract.rs
@@ -16,9 +16,10 @@ use nix::fcntl::OFlag;
use nix::sys::stat::Mode;
use pathpatterns::{MatchEntry, MatchList, MatchType};
-use pxar::format::Device;
-use pxar::Metadata;
use pxar::accessor::aio::{Accessor, FileContents, FileEntry};
+use pxar::decoder::aio::Decoder;
+use pxar::format::Device;
+use pxar::{Entry, EntryKind, Metadata};
use proxmox::c_result;
use proxmox::tools::{
@@ -93,8 +94,6 @@ where
let mut err_path_stack = vec![OsString::from("/")];
let mut current_match = options.extract_match_default;
while let Some(entry) = decoder.next() {
- use pxar::EntryKind;
-
let entry = entry.map_err(|err| format_err!("error reading pxar archive: {}", err))?;
let file_name_os = entry.file_name();
@@ -552,7 +551,6 @@ where
T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
W: tokio::io::AsyncWrite + Unpin + Send + 'static,
{
- use pxar::EntryKind;
Box::pin(async move {
let metadata = file.entry().metadata();
let path = file.entry().path().strip_prefix(&prefix)?.to_path_buf();
@@ -612,10 +610,42 @@ where
})
}
+fn get_extractor<DEST>(destination: DEST, metadata: Metadata) -> Result<Extractor, Error>
+where
+ DEST: AsRef<Path>,
+{
+ create_path(
+ &destination,
+ None,
+ Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))),
+ )
+ .map_err(|err| {
+ format_err!(
+ "error creating directory {:?}: {}",
+ destination.as_ref(),
+ err
+ )
+ })?;
+
+ let dir = Dir::open(
+ destination.as_ref(),
+ OFlag::O_DIRECTORY | OFlag::O_CLOEXEC,
+ Mode::empty(),
+ )
+ .map_err(|err| {
+ format_err!(
+ "unable to open target directory {:?}: {}",
+ destination.as_ref(),
+ err,
+ )
+ })?;
+
+ Ok(Extractor::new(dir, metadata, false, Flags::DEFAULT))
+}
pub async fn extract_sub_dir<T, DEST, PATH>(
destination: DEST,
- mut decoder: Accessor<T>,
+ decoder: Accessor<T>,
path: PATH,
verbose: bool,
) -> Result<(), Error>
@@ -626,111 +656,205 @@ where
{
let root = decoder.open_root().await?;
- create_path(
- &destination,
- None,
- Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))),
- )
- .map_err(|err| format_err!("error creating directory {:?}: {}", destination.as_ref(), err))?;
-
- let dir = Dir::open(
- destination.as_ref(),
- OFlag::O_DIRECTORY | OFlag::O_CLOEXEC,
- Mode::empty(),
- )
- .map_err(|err| format_err!("unable to open target directory {:?}: {}", destination.as_ref(), err,))?;
-
- let mut extractor = Extractor::new(
- dir,
+ let mut extractor = get_extractor(
+ destination,
root.lookup_self().await?.entry().metadata().clone(),
- false,
- Flags::DEFAULT,
- );
+ )?;
let file = root
- .lookup(&path).await?
+ .lookup(&path)
+ .await?
.ok_or(format_err!("error opening '{:?}'", path.as_ref()))?;
- recurse_files_extractor(&mut extractor, &mut decoder, file, verbose).await
+ recurse_files_extractor(&mut extractor, file, verbose).await
}
-fn recurse_files_extractor<'a, T>(
+pub async fn extract_sub_dir_seq<S, DEST>(
+ destination: DEST,
+ mut decoder: Decoder<S>,
+ verbose: bool,
+) -> Result<(), Error>
+where
+ S: pxar::decoder::SeqRead + Unpin + Send + 'static,
+ DEST: AsRef<Path>,
+{
+ decoder.enable_goodbye_entries(true);
+ let root = match decoder.next().await {
+ Some(Ok(root)) => root,
+ Some(Err(err)) => bail!("error getting root entry from pxar: {}", err),
+ None => bail!("cannot extract empty archive"),
+ };
+
+ let mut extractor = get_extractor(destination, root.metadata().clone())?;
+
+ if let Err(err) = seq_files_extractor(&mut extractor, decoder, verbose).await {
+ eprintln!("error extracting pxar archive: {}", err);
+ }
+
+ Ok(())
+}
+
+fn extract_special(
+ extractor: &mut Extractor,
+ entry: &Entry,
+ file_name: &CStr,
+) -> Result<(), Error> {
+ let metadata = entry.metadata();
+ match entry.kind() {
+ EntryKind::Symlink(link) => {
+ extractor.extract_symlink(file_name, metadata, link.as_ref())?;
+ }
+ EntryKind::Hardlink(link) => {
+ extractor.extract_hardlink(file_name, link.as_os_str())?;
+ }
+ EntryKind::Device(dev) => {
+ if extractor.contains_flags(Flags::WITH_DEVICE_NODES) {
+ extractor.extract_device(file_name, metadata, dev)?;
+ }
+ }
+ EntryKind::Fifo => {
+ if extractor.contains_flags(Flags::WITH_FIFOS) {
+ extractor.extract_special(file_name, metadata, 0)?;
+ }
+ }
+ EntryKind::Socket => {
+ if extractor.contains_flags(Flags::WITH_SOCKETS) {
+ extractor.extract_special(file_name, metadata, 0)?;
+ }
+ }
+ _ => bail!("extract_special used with unsupported entry kind"),
+ }
+ Ok(())
+}
+
+fn get_filename(entry: &Entry) -> Result<(OsString, CString), Error> {
+ let file_name_os = entry.file_name().to_owned();
+
+ // safety check: a file entry in an archive must never contain slashes:
+ if file_name_os.as_bytes().contains(&b'/') {
+ bail!("archive file entry contains slashes, which is invalid and a security concern");
+ }
+
+ let file_name = CString::new(file_name_os.as_bytes())
+ .map_err(|_| format_err!("encountered file name with null-bytes"))?;
+
+ Ok((file_name_os, file_name))
+}
+
+async fn recurse_files_extractor<'a, T>(
extractor: &'a mut Extractor,
- decoder: &'a mut Accessor<T>,
file: FileEntry<T>,
verbose: bool,
-) -> Pin<Box<dyn Future<Output = Result<(), Error>> + Send + 'a>>
+) -> Result<(), Error>
where
T: Clone + pxar::accessor::ReadAt + Unpin + Send + Sync + 'static,
{
- use pxar::EntryKind;
- Box::pin(async move {
- let metadata = file.entry().metadata();
- let file_name_os = file.file_name();
+ let entry = file.entry();
+ let metadata = entry.metadata();
+ let (file_name_os, file_name) = get_filename(entry)?;
- // safety check: a file entry in an archive must never contain slashes:
- if file_name_os.as_bytes().contains(&b'/') {
- bail!("archive file entry contains slashes, which is invalid and a security concern");
+ if verbose {
+ eprintln!("extracting: {}", file.path().display());
+ }
+
+ match file.kind() {
+ EntryKind::Directory => {
+ extractor
+ .enter_directory(file_name_os.to_owned(), metadata.clone(), true)
+ .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?;
+
+ let dir = file.enter_directory().await?;
+ let mut seq_decoder = dir.decode_full().await?;
+ seq_decoder.enable_goodbye_entries(true);
+ seq_files_extractor(extractor, seq_decoder, verbose).await?;
+ extractor.leave_directory()?;
}
-
- let file_name = CString::new(file_name_os.as_bytes())
- .map_err(|_| format_err!("encountered file name with null-bytes"))?;
-
- if verbose {
- eprintln!("extracting: {}", file.path().display());
+ EntryKind::File { size, .. } => {
+ extractor
+ .async_extract_file(
+ &file_name,
+ metadata,
+ *size,
+ &mut file.contents().await.map_err(|_| {
+ format_err!("found regular file entry without contents in archive")
+ })?,
+ )
+ .await?
}
-
- match file.kind() {
- EntryKind::Directory => {
- extractor
- .enter_directory(file_name_os.to_owned(), metadata.clone(), true)
- .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?;
-
- let dir = file.enter_directory().await?;
- let mut readdir = dir.read_dir();
- while let Some(entry) = readdir.next().await {
- let entry = entry?.decode_entry().await?;
- let filename = entry.path().to_path_buf();
-
- // log errors and continue
- if let Err(err) = recurse_files_extractor(extractor, decoder, entry, verbose).await {
- eprintln!("error extracting {:?}: {}", filename.display(), err);
- }
- }
- extractor.leave_directory()?;
- }
- EntryKind::Symlink(link) => {
- extractor.extract_symlink(&file_name, metadata, link.as_ref())?;
- }
- EntryKind::Hardlink(link) => {
- extractor.extract_hardlink(&file_name, link.as_os_str())?;
- }
- EntryKind::Device(dev) => {
- if extractor.contains_flags(Flags::WITH_DEVICE_NODES) {
- extractor.extract_device(&file_name, metadata, dev)?;
- }
- }
- EntryKind::Fifo => {
- if extractor.contains_flags(Flags::WITH_FIFOS) {
- extractor.extract_special(&file_name, metadata, 0)?;
- }
- }
- EntryKind::Socket => {
- if extractor.contains_flags(Flags::WITH_SOCKETS) {
- extractor.extract_special(&file_name, metadata, 0)?;
- }
- }
- EntryKind::File { size, .. } => extractor.async_extract_file(
- &file_name,
- metadata,
- *size,
- &mut file.contents().await.map_err(|_| {
- format_err!("found regular file entry without contents in archive")
- })?,
- ).await?,
- EntryKind::GoodbyeTable => {}, // ignore
- }
- Ok(())
- })
+ EntryKind::GoodbyeTable => {} // ignore
+ _ => extract_special(extractor, entry, &file_name)?,
+ }
+ Ok(())
}
+async fn seq_files_extractor<'a, T>(
+ extractor: &'a mut Extractor,
+ mut decoder: pxar::decoder::aio::Decoder<T>,
+ verbose: bool,
+) -> Result<(), Error>
+where
+ T: pxar::decoder::SeqRead,
+{
+ let mut dir_level = 0;
+ loop {
+ let entry = match decoder.next().await {
+ Some(entry) => entry?,
+ None => return Ok(()),
+ };
+
+ let metadata = entry.metadata();
+ let (file_name_os, file_name) = get_filename(&entry)?;
+
+ if verbose && !matches!(entry.kind(), EntryKind::GoodbyeTable) {
+ eprintln!("extracting: {}", entry.path().display());
+ }
+
+ if let Err(err) = async {
+ match entry.kind() {
+ EntryKind::Directory => {
+ dir_level += 1;
+ extractor
+ .enter_directory(file_name_os.to_owned(), metadata.clone(), true)
+ .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?;
+ }
+ EntryKind::File { size, .. } => {
+ extractor
+ .async_extract_file(
+ &file_name,
+ metadata,
+ *size,
+ &mut decoder.contents().ok_or_else(|| {
+ format_err!("found regular file entry without contents in archive")
+ })?,
+ )
+ .await?
+ }
+ EntryKind::GoodbyeTable => {
+ dir_level -= 1;
+ extractor.leave_directory()?;
+ }
+ _ => extract_special(extractor, &entry, &file_name)?,
+ }
+ Ok(()) as Result<(), Error>
+ }
+ .await
+ {
+ let display = entry.path().display().to_string();
+ eprintln!(
+ "error extracting {}: {}",
+ if matches!(entry.kind(), EntryKind::GoodbyeTable) {
+ "<directory>"
+ } else {
+ &display
+ },
+ err
+ );
+ }
+
+ if dir_level < 0 {
+ // we've encountered one Goodbye more then Directory, meaning we've left the dir we
+ // started in - exit early, otherwise the extractor might panic
+ return Ok(());
+ }
+ }
+}
diff --git a/src/pxar/mod.rs b/src/pxar/mod.rs
index d1302962..13eb9bd4 100644
--- a/src/pxar/mod.rs
+++ b/src/pxar/mod.rs
@@ -59,7 +59,10 @@ mod flags;
pub use flags::Flags;
pub use create::{create_archive, PxarCreateOptions};
-pub use extract::{create_zip, extract_archive, extract_sub_dir, ErrorHandler, PxarExtractOptions};
+pub use extract::{
+ create_zip, extract_archive, extract_sub_dir, extract_sub_dir_seq, ErrorHandler,
+ PxarExtractOptions,
+};
/// The format requires to build sorted directory lookup tables in
/// memory, so we restrict the number of allowed entries to limit
--
2.20.1
More information about the pbs-devel
mailing list