[pbs-devel] [PATCH v3 pxar 13/58] format: add pxar format version entry

Christian Ebner c.ebner at proxmox.com
Thu Mar 28 13:36:22 CET 2024


Adds an additional entry type at the start of each pxar archive
signaling the encoding format version. If not present, the default
version 1 is assumed.

This allows to early on detect the pxar encoding version, allowing tools
to switch mode or bail on non compatible encoder/decoder functionality.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 2:
- not present in previous version

 examples/mk-format-hashes.rs |  5 +++++
 src/decoder/mod.rs           | 29 ++++++++++++++++++++++++++--
 src/encoder/mod.rs           | 37 +++++++++++++++++++++++++++++++++---
 src/format/mod.rs            | 11 +++++++++++
 src/lib.rs                   |  3 +++
 5 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs
index 35cff99..e5d69b1 100644
--- a/examples/mk-format-hashes.rs
+++ b/examples/mk-format-hashes.rs
@@ -1,6 +1,11 @@
 use pxar::format::hash_filename;
 
 const CONSTANTS: &[(&str, &str, &str)] = &[
+    (
+        "Pxar format version entry, fallback to version 1 if not present",
+        "PXAR_FORMAT_VERSION",
+        "__PROXMOX_FORMAT_VERSION__",
+    ),
     (
         "Beginning of an entry (current version).",
         "PXAR_ENTRY",
diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
index 00d9abf..5b2fafb 100644
--- a/src/decoder/mod.rs
+++ b/src/decoder/mod.rs
@@ -17,7 +17,7 @@ use std::task::{Context, Poll};
 
 use endian_trait::Endian;
 
-use crate::format::{self, Header};
+use crate::format::{self, FormatVersion, Header};
 use crate::util::{self, io_err_other};
 use crate::{Entry, EntryKind, Metadata};
 
@@ -164,6 +164,8 @@ pub(crate) struct DecoderImpl<T> {
     /// The random access code uses decoders for sub-ranges which may not end in a `PAYLOAD` for
     /// entries like FIFOs or sockets, so there we explicitly allow an item to terminate with EOF.
     eof_after_entry: bool,
+    /// The format version as determined by the format version header
+    version: format::FormatVersion,
 }
 
 enum State {
@@ -242,6 +244,7 @@ impl<I: SeqRead> DecoderImpl<I> {
             payload_input,
             payload_consumed,
             eof_after_entry,
+            version: FormatVersion::default(),
         };
 
         // this.read_next_entry().await?;
@@ -258,7 +261,16 @@ impl<I: SeqRead> DecoderImpl<I> {
         loop {
             match self.state {
                 State::Eof => return Ok(None),
-                State::Begin => return self.read_next_entry().await.map(Some),
+                State::Begin => {
+                    let entry = self.read_next_entry().await.map(Some);
+                    if let Ok(Some(ref entry)) = entry {
+                        if let EntryKind::Version(version) = entry.kind() {
+                            self.version = version.clone();
+                            return self.read_next_entry().await.map(Some);
+                        }
+                    }
+                    return entry;
+                }
                 State::Default => {
                     // we completely finished an entry, so now we're going "up" in the directory
                     // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
@@ -412,6 +424,11 @@ impl<I: SeqRead> DecoderImpl<I> {
             self.entry.metadata = Metadata::default();
             self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
 
+            Ok(Some(self.entry.take()))
+        } else if header.htype == format::PXAR_FORMAT_VERSION {
+            self.current_header = header;
+            self.entry.kind = EntryKind::Version(self.read_format_version().await?);
+
             Ok(Some(self.entry.take()))
         } else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 {
             if header.htype == format::PXAR_ENTRY {
@@ -777,6 +794,14 @@ impl<I: SeqRead> DecoderImpl<I> {
 
         seq_read_entry(&mut self.input).await
     }
+
+    async fn read_format_version(&mut self) -> io::Result<format::FormatVersion> {
+        match seq_read_entry(&mut self.input).await? {
+            1u64 => Ok(format::FormatVersion::Version1),
+            2u64 => Ok(format::FormatVersion::Version2),
+            _ => io_bail!("unexpected pxar format version"),
+        }
+    }
 }
 
 /// Reader for file contents inside a pxar archive.
diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
index 88c0ed5..9270153 100644
--- a/src/encoder/mod.rs
+++ b/src/encoder/mod.rs
@@ -17,7 +17,7 @@ use endian_trait::Endian;
 
 use crate::binary_tree_array;
 use crate::decoder::{self, SeqRead};
-use crate::format::{self, GoodbyeItem, PayloadRef};
+use crate::format::{self, FormatVersion, GoodbyeItem, PayloadRef};
 use crate::Metadata;
 
 pub mod aio;
@@ -307,6 +307,8 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> {
     /// Since only the "current" entry can be actively writing files, we share the file copy
     /// buffer.
     file_copy_buffer: Arc<Mutex<Vec<u8>>>,
+    /// Pxar format version to encode
+    version: format::FormatVersion,
 }
 
 impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
@@ -320,11 +322,14 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         }
 
         let mut state = EncoderState::default();
-        if let Some(payload_output) = payload_output.as_mut() {
+        let version = if let Some(payload_output) = payload_output.as_mut() {
             let header = format::Header::with_content_size(format::PXAR_PAYLOAD_START_MARKER, 0);
             header.check_header_size()?;
             seq_write_struct(payload_output, header, &mut state.payload_write_position).await?;
-        }
+            format::FormatVersion::Version2
+        } else {
+            format::FormatVersion::default()
+        };
 
         let mut this = Self {
             output,
@@ -334,8 +339,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
             file_copy_buffer: Arc::new(Mutex::new(unsafe {
                 crate::util::vec_new_uninitialized(1024 * 1024)
             })),
+            version,
         };
 
+        this.encode_format_version().await?;
         this.encode_metadata(metadata).await?;
         let state = this.state_mut()?;
         state.files_offset = state.position();
@@ -522,6 +529,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         file_size: u64,
         payload_offset: PayloadOffset,
     ) -> io::Result<()> {
+        if self.version == FormatVersion::Version1 {
+            io_bail!("payload references not supported pxar format version 1");
+        }
+
         if self.payload_output.as_mut().is_none() {
             io_bail!("unable to add payload reference");
         }
@@ -729,6 +740,26 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         Ok(())
     }
 
+    async fn encode_format_version(&mut self) -> io::Result<()> {
+		let version_bytes = match self.version {
+			format::FormatVersion::Version1 => return Ok(()),
+			format::FormatVersion::Version2 => 2u64.to_le_bytes(),
+		};
+
+        let (output, state) = self.output_state()?;
+		if state.write_position != 0 {
+			io_bail!("pxar format version must be encoded at the beginning of an archive");
+		}
+
+        seq_write_pxar_entry(
+            output,
+            format::PXAR_FORMAT_VERSION,
+            &version_bytes,
+            &mut state.write_position,
+        )
+        .await
+    }
+
     async fn encode_metadata(&mut self, metadata: &Metadata) -> io::Result<()> {
         let (output, state) = self.output_state()?;
         seq_write_pxar_struct_entry(
diff --git a/src/format/mod.rs b/src/format/mod.rs
index a672d19..2bf33c9 100644
--- a/src/format/mod.rs
+++ b/src/format/mod.rs
@@ -6,6 +6,7 @@
 //! item data.
 //!
 //! An archive contains items in the following order:
+//!  * `FORMAT_VERSION`     -- (optional for v1), version of encoding format
 //!  * `ENTRY`              -- containing general stat() data and related bits
 //!   * `XATTR`             -- one extended attribute
 //!   * ...                 -- more of these when there are multiple defined
@@ -80,6 +81,8 @@ pub mod mode {
 }
 
 // Generated by `cargo run --example mk-format-hashes`
+/// Pxar format version entry, fallback to version 1 if not present
+pub const PXAR_FORMAT_VERSION: u64 = 0x730f6c75df16a40d;
 /// Beginning of an entry (current version).
 pub const PXAR_ENTRY: u64 = 0xd5956474e588acef;
 /// Previous version of the entry struct
@@ -186,6 +189,7 @@ impl Header {
 impl Display for Header {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let readable = match self.htype {
+            PXAR_FORMAT_VERSION => "FORMAT_VERSION",
             PXAR_FILENAME => "FILENAME",
             PXAR_SYMLINK => "SYMLINK",
             PXAR_HARDLINK => "HARDLINK",
@@ -551,6 +555,13 @@ impl From<&std::fs::Metadata> for Stat {
     }
 }
 
+#[derive(Clone, Debug, Default, PartialEq)]
+pub enum FormatVersion {
+    #[default]
+    Version1,
+    Version2,
+}
+
 #[derive(Clone, Debug)]
 pub struct Filename {
     pub name: Vec<u8>,
diff --git a/src/lib.rs b/src/lib.rs
index ef81a85..a87b5ac 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -342,6 +342,9 @@ impl Acl {
 /// Identifies whether the entry is a file, symlink, directory, etc.
 #[derive(Clone, Debug)]
 pub enum EntryKind {
+    /// Pxar file format version
+    Version(format::FormatVersion),
+
     /// Symbolic links.
     Symlink(format::Symlink),
 
-- 
2.39.2





More information about the pbs-devel mailing list