[pbs-devel] [PATCH v3 pxar 07/58] decoder/accessor: add optional payload input stream
Christian Ebner
c.ebner at proxmox.com
Thu Mar 28 13:36:16 CET 2024
Implement an optional redirection to read the payload for regular files
from a different input stream.
This allows to decode split stream archives.
Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 2:
- pass the payload input on decoder/accessor instantiation in order to
avoid possible adding/removing during decoding/accessing.
- major refactoring
examples/apxar.rs | 2 +-
src/accessor/aio.rs | 10 ++--
src/accessor/mod.rs | 61 ++++++++++++++++++++++---
src/accessor/sync.rs | 8 ++--
src/decoder/aio.rs | 14 ++++--
src/decoder/mod.rs | 106 +++++++++++++++++++++++++++++++++++++++----
src/decoder/sync.rs | 15 ++++--
src/lib.rs | 3 ++
8 files changed, 184 insertions(+), 35 deletions(-)
diff --git a/examples/apxar.rs b/examples/apxar.rs
index 0c62242..d5eb04e 100644
--- a/examples/apxar.rs
+++ b/examples/apxar.rs
@@ -9,7 +9,7 @@ async fn main() {
.await
.expect("failed to open file");
- let mut reader = Decoder::from_tokio(file)
+ let mut reader = Decoder::from_tokio(file, None)
.await
.expect("failed to open pxar archive contents");
diff --git a/src/accessor/aio.rs b/src/accessor/aio.rs
index 98d7755..0ebb921 100644
--- a/src/accessor/aio.rs
+++ b/src/accessor/aio.rs
@@ -39,7 +39,7 @@ impl<T: FileExt> Accessor<FileReader<T>> {
/// by a blocking file.
#[inline]
pub async fn from_file_and_size(input: T, size: u64) -> io::Result<Self> {
- Accessor::new(FileReader::new(input), size).await
+ Accessor::new(FileReader::new(input), size, None).await
}
}
@@ -75,7 +75,7 @@ where
input: T,
size: u64,
) -> io::Result<Accessor<FileRefReader<T>>> {
- Accessor::new(FileRefReader::new(input), size).await
+ Accessor::new(FileRefReader::new(input), size, None).await
}
}
@@ -85,9 +85,11 @@ impl<T: ReadAt> Accessor<T> {
///
/// Note that the `input`'s `SeqRead` implementation must always return `Poll::Ready` and is
/// not allowed to use the `Waker`, as this will cause a `panic!`.
- pub async fn new(input: T, size: u64) -> io::Result<Self> {
+ /// Optionally take the file payloads from the provided input stream rather than the regular
+ /// pxar stream.
+ pub async fn new(input: T, size: u64, payload_input: Option<T>) -> io::Result<Self> {
Ok(Self {
- inner: accessor::AccessorImpl::new(input, size).await?,
+ inner: accessor::AccessorImpl::new(input, size, payload_input).await?,
})
}
diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs
index 6a2de73..4789595 100644
--- a/src/accessor/mod.rs
+++ b/src/accessor/mod.rs
@@ -182,10 +182,11 @@ pub(crate) struct AccessorImpl<T> {
input: T,
size: u64,
caches: Arc<Caches>,
+ payload_input: Option<T>,
}
impl<T: ReadAt> AccessorImpl<T> {
- pub async fn new(input: T, size: u64) -> io::Result<Self> {
+ pub async fn new(input: T, size: u64, payload_input: Option<T>) -> io::Result<Self> {
if size < (size_of::<GoodbyeItem>() as u64) {
io_bail!("too small to contain a pxar archive");
}
@@ -194,6 +195,7 @@ impl<T: ReadAt> AccessorImpl<T> {
input,
size,
caches: Arc::new(Caches::default()),
+ payload_input,
})
}
@@ -207,6 +209,9 @@ impl<T: ReadAt> AccessorImpl<T> {
self.size,
"/".into(),
Arc::clone(&self.caches),
+ self.payload_input
+ .as_ref()
+ .map(|input| input as &dyn ReadAt),
)
.await
}
@@ -228,7 +233,13 @@ async fn get_decoder<T: ReadAt>(
entry_range: Range<u64>,
path: PathBuf,
) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
- DecoderImpl::new_full(SeqReadAtAdapter::new(input, entry_range), path, true).await
+ DecoderImpl::new_full(
+ SeqReadAtAdapter::new(input, entry_range.clone()),
+ path,
+ true,
+ None,
+ )
+ .await
}
// NOTE: This performs the Decoder::read_next_item() behavior! Keep in mind when changing!
@@ -263,6 +274,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
self.size,
"/".into(),
Arc::clone(&self.caches),
+ self.payload_input.clone(),
)
.await
}
@@ -274,6 +286,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
offset,
"/".into(),
Arc::clone(&self.caches),
+ self.payload_input.clone(),
)
.await
}
@@ -293,17 +306,23 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
.next()
.await
.ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??;
+
Ok(FileEntryImpl {
input: self.input.clone(),
entry,
entry_range_info: entry_range_info.clone(),
caches: Arc::clone(&self.caches),
+ payload_input: self.payload_input.clone(),
})
}
/// Allow opening arbitrary contents from a specific range.
pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContentsImpl<T> {
- FileContentsImpl::new(self.input.clone(), range)
+ if let Some(payload_input) = &self.payload_input {
+ FileContentsImpl::new(payload_input.clone(), range)
+ } else {
+ FileContentsImpl::new(self.input.clone(), range)
+ }
}
/// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will
@@ -326,9 +345,12 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
let link_offset = entry_file_offset - link_offset;
- let (mut decoder, entry_offset) =
- get_decoder_at_filename(self.input.clone(), link_offset..self.size, PathBuf::new())
- .await?;
+ let (mut decoder, entry_offset) = get_decoder_at_filename(
+ self.input.clone(),
+ link_offset..self.size,
+ PathBuf::new(),
+ )
+ .await?;
let entry = decoder
.next()
@@ -342,6 +364,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
EntryKind::File {
offset: Some(offset),
size,
+ ..
} => {
let meta_size = offset - link_offset;
let entry_end = link_offset + meta_size + size;
@@ -353,6 +376,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
entry_range: entry_offset..entry_end,
},
caches: Arc::clone(&self.caches),
+ payload_input: self.payload_input.clone(),
})
}
_ => io_bail!("hardlink does not point to a regular file"),
@@ -369,6 +393,7 @@ pub(crate) struct DirectoryImpl<T> {
table: Arc<[GoodbyeItem]>,
path: PathBuf,
caches: Arc<Caches>,
+ payload_input: Option<T>,
}
impl<T: Clone + ReadAt> DirectoryImpl<T> {
@@ -378,6 +403,7 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
end_offset: u64,
path: PathBuf,
caches: Arc<Caches>,
+ payload_input: Option<T>,
) -> io::Result<DirectoryImpl<T>> {
let tail = Self::read_tail_entry(&input, end_offset).await?;
@@ -407,6 +433,7 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone),
path,
caches,
+ payload_input,
};
// sanity check:
@@ -533,6 +560,7 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
entry_range: self.entry_range(),
},
caches: Arc::clone(&self.caches),
+ payload_input: self.payload_input.clone(),
})
}
@@ -685,6 +713,7 @@ pub(crate) struct FileEntryImpl<T: Clone + ReadAt> {
entry: Entry,
entry_range_info: EntryRangeInfo,
caches: Arc<Caches>,
+ payload_input: Option<T>,
}
impl<T: Clone + ReadAt> FileEntryImpl<T> {
@@ -698,6 +727,7 @@ impl<T: Clone + ReadAt> FileEntryImpl<T> {
self.entry_range_info.entry_range.end,
self.entry.path.clone(),
Arc::clone(&self.caches),
+ self.payload_input.clone(),
)
.await
}
@@ -711,14 +741,30 @@ impl<T: Clone + ReadAt> FileEntryImpl<T> {
EntryKind::File {
size,
offset: Some(offset),
+ payload_offset: None,
} => Ok(Some(offset..(offset + size))),
+ // Payload offset beats regular offset if some
+ EntryKind::File {
+ size,
+ offset: Some(_offset),
+ payload_offset: Some(payload_offset),
+ } => {
+ let start_offset = payload_offset + size_of::<format::Header>() as u64;
+ Ok(Some(start_offset..start_offset + size))
+ }
_ => Ok(None),
}
}
pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> {
match self.content_range()? {
- Some(range) => Ok(FileContentsImpl::new(self.input.clone(), range)),
+ Some(range) => {
+ if let Some(ref payload_input) = self.payload_input {
+ Ok(FileContentsImpl::new(payload_input.clone(), range))
+ } else {
+ Ok(FileContentsImpl::new(self.input.clone(), range))
+ }
+ }
None => io_bail!("not a file"),
}
}
@@ -808,6 +854,7 @@ impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
entry,
entry_range_info: self.entry_range_info.clone(),
caches: Arc::clone(&self.caches),
+ payload_input: self.dir.payload_input.clone(),
})
}
diff --git a/src/accessor/sync.rs b/src/accessor/sync.rs
index a777152..6150a18 100644
--- a/src/accessor/sync.rs
+++ b/src/accessor/sync.rs
@@ -31,7 +31,7 @@ impl<T: FileExt> Accessor<FileReader<T>> {
/// Decode a `pxar` archive from a standard file implementing `FileExt`.
#[inline]
pub fn from_file_and_size(input: T, size: u64) -> io::Result<Self> {
- Accessor::new(FileReader::new(input), size)
+ Accessor::new(FileReader::new(input), size, None)
}
}
@@ -64,7 +64,7 @@ where
{
/// Open an `Arc` or `Rc` of `File`.
pub fn from_file_ref_and_size(input: T, size: u64) -> io::Result<Accessor<FileRefReader<T>>> {
- Accessor::new(FileRefReader::new(input), size)
+ Accessor::new(FileRefReader::new(input), size, None)
}
}
@@ -74,9 +74,9 @@ impl<T: ReadAt> Accessor<T> {
///
/// Note that the `input`'s `SeqRead` implementation must always return `Poll::Ready` and is
/// not allowed to use the `Waker`, as this will cause a `panic!`.
- pub fn new(input: T, size: u64) -> io::Result<Self> {
+ pub fn new(input: T, size: u64, payload_input: Option<T>) -> io::Result<Self> {
Ok(Self {
- inner: poll_result_once(accessor::AccessorImpl::new(input, size))?,
+ inner: poll_result_once(accessor::AccessorImpl::new(input, size, payload_input))?,
})
}
diff --git a/src/decoder/aio.rs b/src/decoder/aio.rs
index 4de8c6f..bb032cf 100644
--- a/src/decoder/aio.rs
+++ b/src/decoder/aio.rs
@@ -20,8 +20,12 @@ pub struct Decoder<T> {
impl<T: tokio::io::AsyncRead> Decoder<TokioReader<T>> {
/// Decode a `pxar` archive from a `tokio::io::AsyncRead` input.
#[inline]
- pub async fn from_tokio(input: T) -> io::Result<Self> {
- Decoder::new(TokioReader::new(input)).await
+ pub async fn from_tokio(input: T, payload_input: Option<T>) -> io::Result<Self> {
+ Decoder::new(
+ TokioReader::new(input),
+ payload_input.map(|payload_input| TokioReader::new(payload_input)),
+ )
+ .await
}
}
@@ -30,15 +34,15 @@ impl Decoder<TokioReader<tokio::fs::File>> {
/// Decode a `pxar` archive from a `tokio::io::AsyncRead` input.
#[inline]
pub async fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
- Decoder::from_tokio(tokio::fs::File::open(path.as_ref()).await?).await
+ Decoder::from_tokio(tokio::fs::File::open(path.as_ref()).await?, None).await
}
}
impl<T: SeqRead> Decoder<T> {
/// Create an async decoder from an input implementing our internal read interface.
- pub async fn new(input: T) -> io::Result<Self> {
+ pub async fn new(input: T, payload_input: Option<T>) -> io::Result<Self> {
Ok(Self {
- inner: decoder::DecoderImpl::new(input).await?,
+ inner: decoder::DecoderImpl::new(input, payload_input).await?,
})
}
diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
index f439327..8cc4877 100644
--- a/src/decoder/mod.rs
+++ b/src/decoder/mod.rs
@@ -157,6 +157,10 @@ pub(crate) struct DecoderImpl<T> {
state: State,
with_goodbye_tables: bool,
+ // Payload of regular files might be provided by a different reader
+ payload_input: Option<T>,
+ payload_consumed: u64,
+
/// The random access code uses decoders for sub-ranges which may not end in a `PAYLOAD` for
/// entries like FIFOs or sockets, so there we explicitly allow an item to terminate with EOF.
eof_after_entry: bool,
@@ -167,6 +171,8 @@ enum State {
Default,
InPayload {
offset: u64,
+ size: u64,
+ payload_ref: bool,
},
/// file entries with no data (fifo, socket)
@@ -195,8 +201,8 @@ pub(crate) enum ItemResult {
}
impl<I: SeqRead> DecoderImpl<I> {
- pub async fn new(input: I) -> io::Result<Self> {
- Self::new_full(input, "/".into(), false).await
+ pub async fn new(input: I, payload_input: Option<I>) -> io::Result<Self> {
+ Self::new_full(input, "/".into(), false, payload_input).await
}
pub(crate) fn input(&self) -> &I {
@@ -207,6 +213,7 @@ impl<I: SeqRead> DecoderImpl<I> {
input: I,
path: PathBuf,
eof_after_entry: bool,
+ payload_input: Option<I>,
) -> io::Result<Self> {
let this = DecoderImpl {
input,
@@ -219,6 +226,8 @@ impl<I: SeqRead> DecoderImpl<I> {
path_lengths: Vec::new(),
state: State::Begin,
with_goodbye_tables: false,
+ payload_input,
+ payload_consumed: 0,
eof_after_entry,
};
@@ -242,9 +251,18 @@ impl<I: SeqRead> DecoderImpl<I> {
// hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
self.read_next_item().await?;
}
- State::InPayload { offset } => {
- // We need to skip the current payload first.
- self.skip_entry(offset).await?;
+ State::InPayload {
+ offset,
+ payload_ref,
+ ..
+ } => {
+ if payload_ref {
+ // Update consumed payload as given by the offset referenced by the content reader
+ self.payload_consumed += offset;
+ } else if self.payload_input.is_none() {
+ // Skip remaining payload of current entry in regular stream
+ self.skip_entry(offset).await?;
+ }
self.read_next_item().await?;
}
State::InGoodbyeTable => {
@@ -308,11 +326,19 @@ impl<I: SeqRead> DecoderImpl<I> {
}
pub fn content_reader(&mut self) -> Option<Contents<I>> {
- if let State::InPayload { offset } = &mut self.state {
+ if let State::InPayload {
+ offset,
+ size,
+ payload_ref,
+ } = &mut self.state
+ {
+ if *payload_ref && self.payload_input.is_none() {
+ return None;
+ }
Some(Contents::new(
- &mut self.input,
+ self.payload_input.as_mut().unwrap_or(&mut self.input),
offset,
- self.current_header.content_size(),
+ *size,
))
} else {
None
@@ -531,8 +557,60 @@ impl<I: SeqRead> DecoderImpl<I> {
self.entry.kind = EntryKind::File {
size: self.current_header.content_size(),
offset,
+ payload_offset: None,
+ };
+ self.state = State::InPayload {
+ offset: 0,
+ size: self.current_header.content_size(),
+ payload_ref: false,
+ };
+ return Ok(ItemResult::Entry);
+ }
+ format::PXAR_PAYLOAD_REF => {
+ let offset = seq_read_position(&mut self.input).await.transpose()?;
+ let payload_ref = self.read_payload_ref().await?;
+
+ if let Some(payload_input) = self.payload_input.as_mut() {
+ if seq_read_position(payload_input)
+ .await
+ .transpose()?
+ .is_none()
+ {
+ // Skip payload padding for injected chunks in sequential decoder
+ let to_skip = payload_ref.offset - self.payload_consumed;
+ self.skip_payload(to_skip).await?;
+ }
+ }
+
+ if let Some(payload_input) = self.payload_input.as_mut() {
+ let header: u64 = seq_read_entry(payload_input).await?;
+ if header != format::PXAR_PAYLOAD {
+ io_bail!(
+ "unexpected header in payload input: expected {} , got {header}",
+ format::PXAR_PAYLOAD,
+ );
+ }
+ let size: u64 = seq_read_entry(payload_input).await?;
+ self.payload_consumed += size_of::<Header>() as u64;
+
+ if size != payload_ref.size + size_of::<Header>() as u64 {
+ io_bail!(
+ "encountered payload size mismatch: got {}, expected {size}",
+ payload_ref.size
+ );
+ }
+ }
+
+ self.entry.kind = EntryKind::File {
+ size: payload_ref.size,
+ offset,
+ payload_offset: Some(payload_ref.offset),
+ };
+ self.state = State::InPayload {
+ offset: 0,
+ size: payload_ref.size,
+ payload_ref: true,
};
- self.state = State::InPayload { offset: 0 };
return Ok(ItemResult::Entry);
}
format::PXAR_FILENAME | format::PXAR_GOODBYE => {
@@ -567,6 +645,16 @@ impl<I: SeqRead> DecoderImpl<I> {
Self::skip(&mut self.input, len).await
}
+ async fn skip_payload(&mut self, length: u64) -> io::Result<()> {
+ if let Some(payload_input) = self.payload_input.as_mut() {
+ Self::skip(payload_input, length as usize).await?;
+ self.payload_consumed += length;
+ } else {
+ io_bail!("skip payload called, but got no payload input");
+ }
+ Ok(())
+ }
+
async fn skip(input: &mut I, len: usize) -> io::Result<()> {
let mut len = len;
let scratch = scratch_buffer();
diff --git a/src/decoder/sync.rs b/src/decoder/sync.rs
index 5597a03..caa8bcd 100644
--- a/src/decoder/sync.rs
+++ b/src/decoder/sync.rs
@@ -25,8 +25,11 @@ pub struct Decoder<T> {
impl<T: io::Read> Decoder<StandardReader<T>> {
/// Decode a `pxar` archive from a regular `std::io::Read` input.
#[inline]
- pub fn from_std(input: T) -> io::Result<Self> {
- Decoder::new(StandardReader::new(input))
+ pub fn from_std(input: T, payload_input: Option<T>) -> io::Result<Self> {
+ Decoder::new(
+ StandardReader::new(input),
+ payload_input.map(|payload_input| StandardReader::new(payload_input)),
+ )
}
/// Get a direct reference to the reader contained inside the contained [`StandardReader`].
@@ -38,7 +41,7 @@ impl<T: io::Read> Decoder<StandardReader<T>> {
impl Decoder<StandardReader<std::fs::File>> {
/// Convenience shortcut for `File::open` followed by `Accessor::from_file`.
pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
- Self::from_std(std::fs::File::open(path.as_ref())?)
+ Self::from_std(std::fs::File::open(path.as_ref())?, None)
}
}
@@ -47,9 +50,11 @@ impl<T: SeqRead> Decoder<T> {
///
/// Note that the `input`'s `SeqRead` implementation must always return `Poll::Ready` and is
/// not allowed to use the `Waker`, as this will cause a `panic!`.
- pub fn new(input: T) -> io::Result<Self> {
+ /// The optional payload input must be used to restore regular file payloads for payload references
+ /// encountered within the archive.
+ pub fn new(input: T, payload_input: Option<T>) -> io::Result<Self> {
Ok(Self {
- inner: poll_result_once(decoder::DecoderImpl::new(input))?,
+ inner: poll_result_once(decoder::DecoderImpl::new(input, payload_input))?,
})
}
diff --git a/src/lib.rs b/src/lib.rs
index 210c4b1..ef81a85 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -364,6 +364,9 @@ pub enum EntryKind {
/// The file's byte offset inside the archive, if available.
offset: Option<u64>,
+
+ /// The file's byte offset inside the payload stream, if available.
+ payload_offset: Option<u64>,
},
/// Directory entry. When iterating through an archive, the contents follow next.
--
2.39.2
More information about the pbs-devel
mailing list