[pbs-devel] [PATCH v8 pxar 15/69] format/encoder/decoder: new pxar entry type `Prelude`

Christian Ebner c.ebner at proxmox.com
Tue May 28 11:42:09 CEST 2024


Introduces a new pxar format entry type `Prelude` and the associated
encoder and decoder methods.
A prelude starts with header marker `PXAR_PRELUDE` followed by raw
byte content, used to store additional metadata associated with the
pxar archive, e.g. command line arguments passed on archive creation.

The prelude's content has no fixed encoding format but is stored as
an raw, arbitrary byte slice. A prelude entry is encoded right after
a pxar format version entry, both being encoded in the metadata
archive in case of an archive with dedicated payload output.

The prelude is not backwards compatible to pxar format version 1.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 7:
- no changes

changes since version 6:
- no changes

 examples/mk-format-hashes.rs |  1 +
 src/accessor/mod.rs          | 12 ++++++++++++
 src/decoder/mod.rs           | 31 ++++++++++++++++++++++++++++++-
 src/encoder/aio.rs           | 18 +++++++++++++++---
 src/encoder/mod.rs           | 26 ++++++++++++++++++++++++++
 src/encoder/sync.rs          | 15 ++++++++++++---
 src/format/mod.rs            | 26 ++++++++++++++++++++++++++
 src/lib.rs                   |  3 +++
 tests/simple/fs.rs           |  1 +
 9 files changed, 126 insertions(+), 7 deletions(-)

diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs
index e5d69b1..e998760 100644
--- a/examples/mk-format-hashes.rs
+++ b/examples/mk-format-hashes.rs
@@ -16,6 +16,7 @@ const CONSTANTS: &[(&str, &str, &str)] = &[
         "PXAR_ENTRY_V1",
         "__PROXMOX_FORMAT_ENTRY__",
     ),
+    ("", "PXAR_PRELUDE", "__PROXMOX_FORMAT_PRELUDE__"),
     ("", "PXAR_FILENAME", "__PROXMOX_FORMAT_FILENAME__"),
     ("", "PXAR_SYMLINK", "__PROXMOX_FORMAT_SYMLINK__"),
     ("", "PXAR_DEVICE", "__PROXMOX_FORMAT_DEVICE__"),
diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs
index faab430..e4bf3f9 100644
--- a/src/accessor/mod.rs
+++ b/src/accessor/mod.rs
@@ -310,6 +310,12 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
                 .next()
                 .await
                 .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+
+            if let EntryKind::Prelude(_) = entry.kind() {
+                entry = decoder.next().await.ok_or_else(|| {
+                    io_format_err!("unexpected EOF while decoding directory entry")
+                })??;
+            }
         }
 
         Ok(FileEntryImpl {
@@ -547,6 +553,12 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
                 .next()
                 .await
                 .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+
+            if let EntryKind::Prelude(_) = entry.kind() {
+                entry = decoder.next().await.ok_or_else(|| {
+                    io_format_err!("unexpected EOF while decoding directory entry")
+                })??;
+            }
         }
 
         Ok((entry, decoder))
diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
index 43c83ae..1a1be35 100644
--- a/src/decoder/mod.rs
+++ b/src/decoder/mod.rs
@@ -176,6 +176,7 @@ pub(crate) struct DecoderImpl<T> {
 #[derive(Clone, PartialEq)]
 enum State {
     Begin,
+    Prelude,
     Root,
     Default,
     InPayload {
@@ -264,10 +265,25 @@ impl<I: SeqRead> DecoderImpl<I> {
                 State::Eof => return Ok(None),
                 State::Begin => {
                     let entry = self.read_next_entry().await.map(Some);
+                    // If the first entry is of kind Version, next must be Prelude or Directory
                     if let Ok(Some(ref entry)) = entry {
                         if let EntryKind::Version(version) = entry.kind() {
                             self.version = version.clone();
-                            self.state = State::Root;
+                            self.state = State::Prelude;
+                        }
+                    }
+                    return entry;
+                }
+                State::Prelude => {
+                    let entry = self.read_next_entry().await.map(Some);
+                    if let Ok(Some(ref entry)) = entry {
+                        match entry.kind() {
+                            EntryKind::Prelude(_) => self.state = State::Root,
+                            EntryKind::Directory => self.state = State::InDirectory,
+                            _ => io_bail!(
+                                "expected directory or prelude entry, got entry kind {:?}",
+                                entry.kind()
+                            ),
                         }
                     }
                     return entry;
@@ -433,6 +449,14 @@ impl<I: SeqRead> DecoderImpl<I> {
             self.current_header = header;
             self.entry.kind = EntryKind::Version(self.read_format_version().await?);
 
+            Ok(Some(self.entry.take()))
+        } else if header.htype == format::PXAR_PRELUDE {
+            if previous_state != State::Prelude {
+                io_bail!("Got format version entry at unexpected position");
+            }
+            self.current_header = header;
+            self.entry.kind = EntryKind::Prelude(self.read_prelude().await?);
+
             Ok(Some(self.entry.take()))
         } else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 {
             if header.htype == format::PXAR_ENTRY {
@@ -799,6 +823,11 @@ impl<I: SeqRead> DecoderImpl<I> {
             version => io_bail!("unexpected pxar format version {version}"),
         }
     }
+
+    async fn read_prelude(&mut self) -> io::Result<format::Prelude> {
+        let data = self.read_entry_as_bytes().await?;
+        Ok(format::Prelude { data })
+    }
 }
 
 /// Reader for file contents inside a pxar archive.
diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs
index 46856b0..8973402 100644
--- a/src/encoder/aio.rs
+++ b/src/encoder/aio.rs
@@ -24,8 +24,14 @@ impl<'a, T: tokio::io::AsyncWrite + 'a> Encoder<'a, TokioWriter<T>> {
     pub async fn from_tokio(
         output: PxarVariant<T, T>,
         metadata: &Metadata,
+        prelude: Option<&[u8]>,
     ) -> io::Result<Encoder<'a, TokioWriter<T>>> {
-        Encoder::new(output.wrap(|output| TokioWriter::new(output)), metadata).await
+        Encoder::new(
+            output.wrap(|output| TokioWriter::new(output)),
+            metadata,
+            prelude,
+        )
+        .await
     }
 }
 
@@ -41,6 +47,7 @@ impl<'a> Encoder<'a, TokioWriter<tokio::fs::File>> {
                 tokio::fs::File::create(path.as_ref()).await?,
             )),
             metadata,
+            None,
         )
         .await
     }
@@ -48,10 +55,14 @@ impl<'a> Encoder<'a, TokioWriter<tokio::fs::File>> {
 
 impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     /// Create an asynchronous encoder for an output implementing our internal write interface.
-    pub async fn new(output: PxarVariant<T, T>, metadata: &Metadata) -> io::Result<Encoder<'a, T>> {
+    pub async fn new(
+        output: PxarVariant<T, T>,
+        metadata: &Metadata,
+        prelude: Option<&[u8]>,
+    ) -> io::Result<Encoder<'a, T>> {
         let output = output.wrap_multi(|output| output.into(), |payload_output| payload_output);
         Ok(Self {
-            inner: encoder::EncoderImpl::new(output, metadata).await?,
+            inner: encoder::EncoderImpl::new(output, metadata, prelude).await?,
         })
     }
 
@@ -326,6 +337,7 @@ mod test {
             let mut encoder = Encoder::new(
                 crate::PxarVariant::Unified(DummyOutput),
                 &Metadata::dir_builder(0o700).build(),
+                None,
             )
             .await
             .unwrap();
diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
index 4bed040..a309c0f 100644
--- a/src/encoder/mod.rs
+++ b/src/encoder/mod.rs
@@ -346,6 +346,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
     pub async fn new(
         mut output: PxarVariant<EncoderOutput<'a, T>, T>,
         metadata: &Metadata,
+        prelude: Option<&[u8]>,
     ) -> io::Result<EncoderImpl<'a, T>> {
         if !metadata.is_dir() {
             io_bail!("directory metadata must contain the directory mode flag");
@@ -372,6 +373,9 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         };
 
         this.encode_format_version().await?;
+        if let Some(prelude) = prelude {
+            this.encode_prelude(prelude).await?;
+        }
         this.encode_metadata(metadata).await?;
         let state = this.state_mut()?;
         state.files_offset = state.position();
@@ -773,6 +777,28 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         Ok(())
     }
 
+    async fn encode_prelude(&mut self, prelude: &[u8]) -> io::Result<()> {
+        if self.version == FormatVersion::Version1 {
+            io_bail!("encoding prelude not supported in pxar format version 1");
+        }
+
+        let (mut output, state) = self.output_state()?;
+        if state.write_position != (size_of::<u64>() + size_of::<format::Header>()) as u64 {
+            io_bail!(
+                "prelude must be encoded following the version header, current position {}",
+                state.write_position,
+            );
+        }
+
+        seq_write_pxar_entry(
+            output.archive_mut(),
+            format::PXAR_PRELUDE,
+            prelude,
+            &mut state.write_position,
+        )
+        .await
+    }
+
     async fn encode_format_version(&mut self) -> io::Result<()> {
         let version_bytes = match self.version {
             format::FormatVersion::Version1 => return Ok(()),
diff --git a/src/encoder/sync.rs b/src/encoder/sync.rs
index 5aa8d69..3cfa03b 100644
--- a/src/encoder/sync.rs
+++ b/src/encoder/sync.rs
@@ -28,7 +28,11 @@ impl<'a, T: io::Write + 'a> Encoder<'a, StandardWriter<T>> {
     /// Encode a `pxar` archive into a regular `std::io::Write` output.
     #[inline]
     pub fn from_std(output: T, metadata: &Metadata) -> io::Result<Encoder<'a, StandardWriter<T>>> {
-        Encoder::new(PxarVariant::Unified(StandardWriter::new(output)), metadata)
+        Encoder::new(
+            PxarVariant::Unified(StandardWriter::new(output)),
+            metadata,
+            None,
+        )
     }
 }
 
@@ -41,6 +45,7 @@ impl<'a> Encoder<'a, StandardWriter<std::fs::File>> {
         Encoder::new(
             PxarVariant::Unified(StandardWriter::new(std::fs::File::create(path.as_ref())?)),
             metadata,
+            None,
         )
     }
 }
@@ -52,7 +57,11 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     /// not allowed to use the `Waker`, as this will cause a `panic!`.
     // Optionally attach a dedicated writer to redirect the payloads of regular files to a separate
     // output.
-    pub fn new(output: PxarVariant<T, T>, metadata: &Metadata) -> io::Result<Self> {
+    pub fn new(
+        output: PxarVariant<T, T>,
+        metadata: &Metadata,
+        prelude: Option<&[u8]>,
+    ) -> io::Result<Self> {
         let output = match output {
             PxarVariant::Unified(output) => PxarVariant::Unified(output.into()),
             PxarVariant::Split(output, payload_output) => {
@@ -61,7 +70,7 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
         };
 
         Ok(Self {
-            inner: poll_result_once(encoder::EncoderImpl::new(output, metadata))?,
+            inner: poll_result_once(encoder::EncoderImpl::new(output, metadata, prelude))?,
         })
     }
 
diff --git a/src/format/mod.rs b/src/format/mod.rs
index 9b66fe2..73b06cd 100644
--- a/src/format/mod.rs
+++ b/src/format/mod.rs
@@ -87,6 +87,7 @@ pub const PXAR_FORMAT_VERSION: u64 = 0x730f6c75df16a40d;
 pub const PXAR_ENTRY: u64 = 0xd5956474e588acef;
 /// Previous version of the entry struct
 pub const PXAR_ENTRY_V1: u64 = 0x11da850a1c1cceff;
+pub const PXAR_PRELUDE: u64 = 0xe309d79d9f7b771b;
 pub const PXAR_FILENAME: u64 = 0x16701121063917b3;
 pub const PXAR_SYMLINK: u64 = 0x27f971e7dbf5dc5f;
 pub const PXAR_DEVICE: u64 = 0x9fc9e906586d5ce9;
@@ -147,6 +148,7 @@ impl Header {
     #[inline]
     pub fn max_content_size(&self) -> u64 {
         match self.htype {
+            PXAR_PRELUDE => u64::MAX - (size_of::<Self>() as u64),
             // + null-termination
             PXAR_FILENAME => crate::util::MAX_FILENAME_LEN + 1,
             // + null-termination
@@ -190,6 +192,7 @@ impl Display for Header {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let readable = match self.htype {
             PXAR_FORMAT_VERSION => "FORMAT_VERSION",
+            PXAR_PRELUDE => "PRELUDE",
             PXAR_FILENAME => "FILENAME",
             PXAR_SYMLINK => "SYMLINK",
             PXAR_HARDLINK => "HARDLINK",
@@ -694,6 +697,29 @@ impl Device {
     }
 }
 
+#[derive(Clone, Debug)]
+pub struct Prelude {
+    pub data: Vec<u8>,
+}
+
+impl Prelude {
+    pub fn as_os_str(&self) -> &OsStr {
+        self.as_ref()
+    }
+}
+
+impl AsRef<[u8]> for Prelude {
+    fn as_ref(&self) -> &[u8] {
+        &self.data
+    }
+}
+
+impl AsRef<OsStr> for Prelude {
+    fn as_ref(&self) -> &OsStr {
+        OsStr::from_bytes(&self.data[..self.data.len().max(1) - 1])
+    }
+}
+
 #[cfg(all(test, target_os = "linux"))]
 #[test]
 fn test_linux_devices() {
diff --git a/src/lib.rs b/src/lib.rs
index 7e5b48f..e0c5498 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -345,6 +345,9 @@ pub enum EntryKind {
     /// Pxar file format version
     Version(format::FormatVersion),
 
+    /// Pxar prelude blob
+    Prelude(format::Prelude),
+
     /// Symbolic links.
     Symlink(format::Symlink),
 
diff --git a/tests/simple/fs.rs b/tests/simple/fs.rs
index 8a8c607..96fcee9 100644
--- a/tests/simple/fs.rs
+++ b/tests/simple/fs.rs
@@ -230,6 +230,7 @@ impl Entry {
                 };
             match item.kind() {
                 PxarEntryKind::Version(_) => continue,
+                PxarEntryKind::Prelude(_) => continue,
                 PxarEntryKind::GoodbyeTable => break,
                 PxarEntryKind::File { size, .. } => {
                     let mut data = Vec::new();
-- 
2.39.2





More information about the pbs-devel mailing list