[pbs-devel] [PATCH v5 pxar 14/62] format/encoder/decoder: new pxar entry type `Prelude`

Christian Ebner c.ebner at proxmox.com
Tue May 7 17:51:56 CEST 2024


Introduces a new pxar format entry type `Prelude` and the associated
encoder and decoder methods.
A prelude starts with header marker `PXAR_PRELUDE` followed by raw
byte content, used to store additional metadata associated with the
pxar archive, e.g. command line arguments passed on archive creation.

The prelude's content has no fixed encoding format but is stored as
an raw, arbitrary byte slice. A prelude entry is encoded right after
a pxar format version entry, both being encoded in the metadata
archive in case of an archive with dedicated payload output.

The prelude is not backwards compatible to pxar format version 1.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 4:
- fix decoder state: must be in `InDirectory` after reading the root
  directory entry, when reading the optional prelude

 examples/mk-format-hashes.rs |  1 +
 src/accessor/mod.rs          | 12 ++++++++++++
 src/decoder/mod.rs           | 31 ++++++++++++++++++++++++++++++-
 src/encoder/aio.rs           | 19 ++++++++++++++-----
 src/encoder/mod.rs           | 26 ++++++++++++++++++++++++++
 src/encoder/sync.rs          | 11 +++++++++--
 src/format/mod.rs            | 26 ++++++++++++++++++++++++++
 src/lib.rs                   |  3 +++
 tests/simple/fs.rs           |  1 +
 9 files changed, 122 insertions(+), 8 deletions(-)

diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs
index e5d69b1..e998760 100644
--- a/examples/mk-format-hashes.rs
+++ b/examples/mk-format-hashes.rs
@@ -16,6 +16,7 @@ const CONSTANTS: &[(&str, &str, &str)] = &[
         "PXAR_ENTRY_V1",
         "__PROXMOX_FORMAT_ENTRY__",
     ),
+    ("", "PXAR_PRELUDE", "__PROXMOX_FORMAT_PRELUDE__"),
     ("", "PXAR_FILENAME", "__PROXMOX_FORMAT_FILENAME__"),
     ("", "PXAR_SYMLINK", "__PROXMOX_FORMAT_SYMLINK__"),
     ("", "PXAR_DEVICE", "__PROXMOX_FORMAT_DEVICE__"),
diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs
index 6441baa..a746868 100644
--- a/src/accessor/mod.rs
+++ b/src/accessor/mod.rs
@@ -317,6 +317,12 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
                 .next()
                 .await
                 .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+
+            if let EntryKind::Prelude(_) = entry.kind() {
+                entry = decoder.next().await.ok_or_else(|| {
+                    io_format_err!("unexpected EOF while decoding directory entry")
+                })??;
+            }
         }
 
         Ok(FileEntryImpl {
@@ -564,6 +570,12 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
                 .next()
                 .await
                 .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+
+            if let EntryKind::Prelude(_) = entry.kind() {
+                entry = decoder.next().await.ok_or_else(|| {
+                    io_format_err!("unexpected EOF while decoding directory entry")
+                })??;
+            }
         }
 
         Ok((entry, decoder))
diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
index 305ecf1..21dc208 100644
--- a/src/decoder/mod.rs
+++ b/src/decoder/mod.rs
@@ -177,6 +177,7 @@ pub(crate) struct DecoderImpl<T> {
 #[derive(Clone, PartialEq)]
 enum State {
     Begin,
+    Prelude,
     Root,
     Default,
     InPayload {
@@ -267,10 +268,25 @@ impl<I: SeqRead> DecoderImpl<I> {
                 State::Eof => return Ok(None),
                 State::Begin => {
                     let entry = self.read_next_entry().await.map(Some);
+                    // If the first entry is of kind Version, next must be Prelude or Directory
                     if let Ok(Some(ref entry)) = entry {
                         if let EntryKind::Version(version) = entry.kind() {
                             self.version = version.clone();
-                            self.state = State::Root;
+                            self.state = State::Prelude;
+                        }
+                    }
+                    return entry;
+                }
+                State::Prelude => {
+                    let entry = self.read_next_entry().await.map(Some);
+                    if let Ok(Some(ref entry)) = entry {
+                        match entry.kind() {
+                            EntryKind::Prelude(_) => self.state = State::Root,
+                            EntryKind::Directory => self.state = State::InDirectory,
+                            _ => io_bail!(
+                                "expected directory or prelude entry, got entry kind {:?}",
+                                entry.kind()
+                            ),
                         }
                     }
                     return entry;
@@ -432,6 +448,14 @@ impl<I: SeqRead> DecoderImpl<I> {
             self.current_header = header;
             self.entry.kind = EntryKind::Version(self.read_format_version().await?);
 
+            Ok(Some(self.entry.take()))
+        } else if header.htype == format::PXAR_PRELUDE {
+            if previous_state != State::Prelude {
+                io_bail!("Got format version entry at unexpected position");
+            }
+            self.current_header = header;
+            self.entry.kind = EntryKind::Prelude(self.read_prelude().await?);
+
             Ok(Some(self.entry.take()))
         } else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 {
             if header.htype == format::PXAR_ENTRY {
@@ -794,6 +818,11 @@ impl<I: SeqRead> DecoderImpl<I> {
             version => io_bail!("unexpected pxar format version {version}"),
         }
     }
+
+    async fn read_prelude(&mut self) -> io::Result<format::Prelude> {
+        let data = self.read_entry_as_bytes().await?;
+        Ok(format::Prelude { data })
+    }
 }
 
 /// Reader for file contents inside a pxar archive.
diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs
index e385457..19055ad 100644
--- a/src/encoder/aio.rs
+++ b/src/encoder/aio.rs
@@ -25,11 +25,13 @@ impl<'a, T: tokio::io::AsyncWrite + 'a> Encoder<'a, TokioWriter<T>> {
         output: T,
         metadata: &Metadata,
         payload_output: Option<T>,
+        prelude: Option<&[u8]>,
     ) -> io::Result<Encoder<'a, TokioWriter<T>>> {
         Encoder::new(
             TokioWriter::new(output),
             metadata,
             payload_output.map(|payload_output| TokioWriter::new(payload_output)),
+            prelude,
         )
         .await
     }
@@ -46,6 +48,7 @@ impl<'a> Encoder<'a, TokioWriter<tokio::fs::File>> {
             TokioWriter::new(tokio::fs::File::create(path.as_ref()).await?),
             metadata,
             None,
+            None,
         )
         .await
     }
@@ -57,9 +60,11 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
         output: T,
         metadata: &Metadata,
         payload_output: Option<T>,
+        prelude: Option<&[u8]>,
     ) -> io::Result<Encoder<'a, T>> {
         Ok(Self {
-            inner: encoder::EncoderImpl::new(output.into(), metadata, payload_output).await?,
+            inner: encoder::EncoderImpl::new(output.into(), metadata, payload_output, prelude)
+                .await?,
         })
     }
 
@@ -331,10 +336,14 @@ mod test {
     /// Assert that `Encoder` is `Send`
     fn send_test() {
         let test = async {
-            let mut encoder =
-                Encoder::new(DummyOutput, &Metadata::dir_builder(0o700).build(), None)
-                    .await
-                    .unwrap();
+            let mut encoder = Encoder::new(
+                DummyOutput,
+                &Metadata::dir_builder(0o700).build(),
+                None,
+                None,
+            )
+            .await
+            .unwrap();
             {
                 encoder
                     .create_directory("baba", &Metadata::dir_builder(0o700).build())
diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
index 906ef62..b785ebc 100644
--- a/src/encoder/mod.rs
+++ b/src/encoder/mod.rs
@@ -348,6 +348,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         output: EncoderOutput<'a, T>,
         metadata: &Metadata,
         mut payload_output: Option<T>,
+        prelude: Option<&[u8]>,
     ) -> io::Result<EncoderImpl<'a, T>> {
         if !metadata.is_dir() {
             io_bail!("directory metadata must contain the directory mode flag");
@@ -375,6 +376,9 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         };
 
         this.encode_format_version().await?;
+        if let Some(prelude) = prelude {
+            this.encode_prelude(prelude).await?;
+        }
         this.encode_metadata(metadata).await?;
         let state = this.state_mut()?;
         state.files_offset = state.position();
@@ -777,6 +781,28 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         Ok(())
     }
 
+    async fn encode_prelude(&mut self, prelude: &[u8]) -> io::Result<()> {
+        if self.version == FormatVersion::Version1 {
+            io_bail!("encoding prelude not supported in pxar format version 1");
+        }
+
+        let (output, state) = self.output_state()?;
+        if state.write_position != (size_of::<u64>() + size_of::<format::Header>()) as u64 {
+            io_bail!(
+                "prelude must be encoded following the version header, current position {}",
+                state.write_position,
+            );
+        }
+
+        seq_write_pxar_entry(
+            output,
+            format::PXAR_PRELUDE,
+            prelude,
+            &mut state.write_position,
+        )
+        .await
+    }
+
     async fn encode_format_version(&mut self) -> io::Result<()> {
         let version_bytes = match self.version {
             format::FormatVersion::Version1 => return Ok(()),
diff --git a/src/encoder/sync.rs b/src/encoder/sync.rs
index bc6430a..ffed47b 100644
--- a/src/encoder/sync.rs
+++ b/src/encoder/sync.rs
@@ -28,7 +28,7 @@ impl<'a, T: io::Write + 'a> Encoder<'a, StandardWriter<T>> {
     /// Encode a `pxar` archive into a regular `std::io::Write` output.
     #[inline]
     pub fn from_std(output: T, metadata: &Metadata) -> io::Result<Encoder<'a, StandardWriter<T>>> {
-        Encoder::new(StandardWriter::new(output), metadata, None)
+        Encoder::new(StandardWriter::new(output), metadata, None, None)
     }
 }
 
@@ -42,6 +42,7 @@ impl<'a> Encoder<'a, StandardWriter<std::fs::File>> {
             StandardWriter::new(std::fs::File::create(path.as_ref())?),
             metadata,
             None,
+            None,
         )
     }
 }
@@ -53,12 +54,18 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     /// not allowed to use the `Waker`, as this will cause a `panic!`.
     // Optionally attach a dedicated writer to redirect the payloads of regular files to a separate
     // output.
-    pub fn new(output: T, metadata: &Metadata, payload_output: Option<T>) -> io::Result<Self> {
+    pub fn new(
+        output: T,
+        metadata: &Metadata,
+        payload_output: Option<T>,
+        prelude: Option<&[u8]>,
+    ) -> io::Result<Self> {
         Ok(Self {
             inner: poll_result_once(encoder::EncoderImpl::new(
                 output.into(),
                 metadata,
                 payload_output,
+                prelude,
             ))?,
         })
     }
diff --git a/src/format/mod.rs b/src/format/mod.rs
index 9b66fe2..73b06cd 100644
--- a/src/format/mod.rs
+++ b/src/format/mod.rs
@@ -87,6 +87,7 @@ pub const PXAR_FORMAT_VERSION: u64 = 0x730f6c75df16a40d;
 pub const PXAR_ENTRY: u64 = 0xd5956474e588acef;
 /// Previous version of the entry struct
 pub const PXAR_ENTRY_V1: u64 = 0x11da850a1c1cceff;
+pub const PXAR_PRELUDE: u64 = 0xe309d79d9f7b771b;
 pub const PXAR_FILENAME: u64 = 0x16701121063917b3;
 pub const PXAR_SYMLINK: u64 = 0x27f971e7dbf5dc5f;
 pub const PXAR_DEVICE: u64 = 0x9fc9e906586d5ce9;
@@ -147,6 +148,7 @@ impl Header {
     #[inline]
     pub fn max_content_size(&self) -> u64 {
         match self.htype {
+            PXAR_PRELUDE => u64::MAX - (size_of::<Self>() as u64),
             // + null-termination
             PXAR_FILENAME => crate::util::MAX_FILENAME_LEN + 1,
             // + null-termination
@@ -190,6 +192,7 @@ impl Display for Header {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let readable = match self.htype {
             PXAR_FORMAT_VERSION => "FORMAT_VERSION",
+            PXAR_PRELUDE => "PRELUDE",
             PXAR_FILENAME => "FILENAME",
             PXAR_SYMLINK => "SYMLINK",
             PXAR_HARDLINK => "HARDLINK",
@@ -694,6 +697,29 @@ impl Device {
     }
 }
 
+#[derive(Clone, Debug)]
+pub struct Prelude {
+    pub data: Vec<u8>,
+}
+
+impl Prelude {
+    pub fn as_os_str(&self) -> &OsStr {
+        self.as_ref()
+    }
+}
+
+impl AsRef<[u8]> for Prelude {
+    fn as_ref(&self) -> &[u8] {
+        &self.data
+    }
+}
+
+impl AsRef<OsStr> for Prelude {
+    fn as_ref(&self) -> &OsStr {
+        OsStr::from_bytes(&self.data[..self.data.len().max(1) - 1])
+    }
+}
+
 #[cfg(all(test, target_os = "linux"))]
 #[test]
 fn test_linux_devices() {
diff --git a/src/lib.rs b/src/lib.rs
index a87b5ac..16d69f8 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -345,6 +345,9 @@ pub enum EntryKind {
     /// Pxar file format version
     Version(format::FormatVersion),
 
+    /// Pxar prelude blob
+    Prelude(format::Prelude),
+
     /// Symbolic links.
     Symlink(format::Symlink),
 
diff --git a/tests/simple/fs.rs b/tests/simple/fs.rs
index 8a8c607..96fcee9 100644
--- a/tests/simple/fs.rs
+++ b/tests/simple/fs.rs
@@ -230,6 +230,7 @@ impl Entry {
                 };
             match item.kind() {
                 PxarEntryKind::Version(_) => continue,
+                PxarEntryKind::Prelude(_) => continue,
                 PxarEntryKind::GoodbyeTable => break,
                 PxarEntryKind::File { size, .. } => {
                     let mut data = Vec::new();
-- 
2.39.2





More information about the pbs-devel mailing list