[pbs-devel] [PATCH v6 pxar 8/29] fix #3174: add pxar format version entry

Christian Ebner c.ebner at proxmox.com
Thu Jan 25 14:25:47 CET 2024


Adds an additional entry type at the start of each pxar archive
signaling the encoding format version. If not present, the default
version 1 is assumed.

This allows to early on detect the pxar encoding version, allowing tools
to switch mode or bail on non compatible encoder/decoder functionality.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
Changes since v5:
- pxar archives in version 2 are prefixed by an entry kind version,
  allowing encoders/decoders to early on detect the used format version.

 examples/mk-format-hashes.rs |  5 +++++
 src/decoder/mod.rs           | 32 +++++++++++++++++++++++++++++++-
 src/encoder/aio.rs           | 23 ++++++++++++++++-------
 src/encoder/mod.rs           | 32 ++++++++++++++++++++++++++++++++
 src/encoder/sync.rs          |  7 ++++---
 src/format/mod.rs            | 11 ++++++++++-
 src/lib.rs                   |  3 +++
 7 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs
index 7fb938d..8a43d81 100644
--- a/examples/mk-format-hashes.rs
+++ b/examples/mk-format-hashes.rs
@@ -1,6 +1,11 @@
 use pxar::format::hash_filename;
 
 const CONSTANTS: &[(&str, &str, &str)] = &[
+    (
+        "Pxar format version entry, fallback to version 1 if not present",
+        "PXAR_FORMAT_VERSION",
+        "__PROXMOX_FORMAT_VERSION__",
+    ),
     (
         "Beginning of an entry (current version).",
         "PXAR_ENTRY",
diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
index a0f322e..ec2f079 100644
--- a/src/decoder/mod.rs
+++ b/src/decoder/mod.rs
@@ -160,6 +160,7 @@ pub(crate) struct DecoderImpl<T> {
     /// The random access code uses decoders for sub-ranges which may not end in a `PAYLOAD` for
     /// entries like FIFOs or sockets, so there we explicitly allow an item to terminate with EOF.
     eof_after_entry: bool,
+    version: format::FormatVersion,
 }
 
 enum State {
@@ -220,6 +221,7 @@ impl<I: SeqRead> DecoderImpl<I> {
             state: State::Begin,
             with_goodbye_tables: false,
             eof_after_entry,
+            version: format::FormatVersion::default(),
         };
 
         // this.read_next_entry().await?;
@@ -236,7 +238,16 @@ impl<I: SeqRead> DecoderImpl<I> {
         loop {
             match self.state {
                 State::Eof => return Ok(None),
-                State::Begin => return self.read_next_entry().await.map(Some),
+                State::Begin => {
+                    let entry = self.read_next_entry().await.map(Some);
+                    if let Ok(Some(ref entry)) = entry {
+                        if let EntryKind::Version(version) = entry.kind() {
+                            self.version = version.clone();
+                            return self.read_next_entry().await.map(Some);
+                        }
+                    }
+                    return entry;
+                }
                 State::Default => {
                     // we completely finished an entry, so now we're going "up" in the directory
                     // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
@@ -277,6 +288,9 @@ impl<I: SeqRead> DecoderImpl<I> {
             match self.current_header.htype {
                 format::PXAR_FILENAME => return self.handle_file_entry().await,
                 format::PXAR_APPENDIX_REF => {
+                    if self.version == format::FormatVersion::Version1 {
+                        io_bail!("appendix reference not supported in pxar format version 1");
+                    }
                     self.state = State::Default;
                     return self.handle_appendix_ref_entry().await;
                 }
@@ -296,6 +310,9 @@ impl<I: SeqRead> DecoderImpl<I> {
                     }
                 }
                 format::PXAR_APPENDIX => {
+                    if self.version == format::FormatVersion::Version1 {
+                        io_bail!("appendix not supported in pxar format version 1");
+                    }
                     self.state = State::Default;
                     return Ok(Some(self.entry.take()));
                 }
@@ -397,6 +414,11 @@ impl<I: SeqRead> DecoderImpl<I> {
             self.entry.metadata = Metadata::default();
             self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
 
+            Ok(Some(self.entry.take()))
+        } else if header.htype == format::PXAR_FORMAT_VERSION {
+            self.current_header = header;
+            self.entry.kind = EntryKind::Version(self.read_format_version().await?);
+
             Ok(Some(self.entry.take()))
         } else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 {
             if header.htype == format::PXAR_ENTRY {
@@ -696,6 +718,14 @@ impl<I: SeqRead> DecoderImpl<I> {
     async fn read_quota_project_id(&mut self) -> io::Result<format::QuotaProjectId> {
         self.read_simple_entry("quota project id").await
     }
+
+    async fn read_format_version(&mut self) -> io::Result<format::FormatVersion> {
+        match seq_read_entry(&mut self.input).await? {
+            1u64 => Ok(format::FormatVersion::Version1),
+            2u64 => Ok(format::FormatVersion::Version2),
+            _ => io_bail!("unexpected pxar format version"),
+        }
+    }
 }
 
 /// Reader for file contents inside a pxar archive.
diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs
index 5a833c5..8f586b6 100644
--- a/src/encoder/aio.rs
+++ b/src/encoder/aio.rs
@@ -24,8 +24,9 @@ impl<'a, T: tokio::io::AsyncWrite + 'a> Encoder<'a, TokioWriter<T>> {
     pub async fn from_tokio(
         output: T,
         metadata: &Metadata,
+        version: format::FormatVersion,
     ) -> io::Result<Encoder<'a, TokioWriter<T>>> {
-        Encoder::new(TokioWriter::new(output), metadata).await
+        Encoder::new(TokioWriter::new(output), metadata, version).await
     }
 }
 
@@ -46,9 +47,13 @@ impl<'a> Encoder<'a, TokioWriter<tokio::fs::File>> {
 
 impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     /// Create an asynchronous encoder for an output implementing our internal write interface.
-    pub async fn new(output: T, metadata: &Metadata) -> io::Result<Encoder<'a, T>> {
+    pub async fn new(
+        output: T,
+        metadata: &Metadata,
+        version: format::FormatVersion,
+    ) -> io::Result<Encoder<'a, T>> {
         Ok(Self {
-            inner: encoder::EncoderImpl::new(output.into(), metadata).await?,
+            inner: encoder::EncoderImpl::new(output.into(), metadata, version).await?,
         })
     }
 
@@ -299,7 +304,7 @@ mod test {
     use std::task::{Context, Poll};
 
     use super::Encoder;
-    use crate::Metadata;
+    use crate::{format, Metadata};
 
     struct DummyOutput;
 
@@ -321,9 +326,13 @@ mod test {
     /// Assert that `Encoder` is `Send`
     fn send_test() {
         let test = async {
-            let mut encoder = Encoder::new(DummyOutput, &Metadata::dir_builder(0o700).build())
-                .await
-                .unwrap();
+            let mut encoder = Encoder::new(
+                DummyOutput,
+                &Metadata::dir_builder(0o700).build(),
+                format::FormatVersion::default(),
+            )
+            .await
+            .unwrap();
             {
                 let mut dir = encoder
                     .create_directory("baba", &Metadata::dir_builder(0o700).build())
diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
index c33b2c3..7e18dc8 100644
--- a/src/encoder/mod.rs
+++ b/src/encoder/mod.rs
@@ -247,6 +247,7 @@ pub async fn encoded_size(filename: &std::ffi::CStr, metadata: &Metadata) -> io:
         file_copy_buffer: Arc::new(Mutex::new(unsafe {
             crate::util::vec_new_uninitialized(1024 * 1024)
         })),
+        version: format::FormatVersion::default(),
     };
 
     this.start_file_do(Some(metadata), filename.to_bytes())
@@ -356,6 +357,8 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> {
     /// Since only the "current" entry can be actively writing files, we share the file copy
     /// buffer.
     file_copy_buffer: Arc<Mutex<Vec<u8>>>,
+    /// Pxar format version to encode
+    version: format::FormatVersion,
 }
 
 impl<'a, T: SeqWrite + 'a> Drop for EncoderImpl<'a, T> {
@@ -377,6 +380,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
     pub async fn new(
         output: EncoderOutput<'a, T>,
         metadata: &Metadata,
+        version: format::FormatVersion,
     ) -> io::Result<EncoderImpl<'a, T>> {
         if !metadata.is_dir() {
             io_bail!("directory metadata must contain the directory mode flag");
@@ -389,8 +393,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
             file_copy_buffer: Arc::new(Mutex::new(unsafe {
                 crate::util::vec_new_uninitialized(1024 * 1024)
             })),
+            version,
         };
 
+        this.encode_format_version().await?;
         this.encode_metadata(metadata).await?;
         this.state.files_offset = this.position();
 
@@ -509,6 +515,9 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         appendix_ref_offset: AppendixRefOffset,
         file_size: u64,
     ) -> io::Result<()> {
+        if self.version == format::FormatVersion::Version1 {
+            io_bail!("unable to add appendix reference for pxar format version 1");
+        }
         self.check()?;
 
         let offset = self.position();
@@ -544,6 +553,9 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         &mut self,
         full_size: AppendixRefOffset,
     ) -> io::Result<AppendixStartOffset> {
+        if self.version == format::FormatVersion::Version1 {
+            io_bail!("unable to add appendix for pxar format version 1");
+        }
         self.check()?;
 
         let data = &full_size.raw().to_le_bytes().to_vec();
@@ -740,6 +752,7 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
             parent: Some(&mut self.state),
             finished: false,
             file_copy_buffer,
+            version: self.version.clone(),
         })
     }
 
@@ -755,6 +768,25 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         Ok(())
     }
 
+    async fn encode_format_version(&mut self) -> io::Result<()> {
+		if self.state.write_position != 0 {
+			io_bail!("pxar format version must be encoded at the beginning of an archive");
+		}
+
+		let version_bytes = match self.version {
+			format::FormatVersion::Version1 => return Ok(()),
+			format::FormatVersion::Version2 => 2u64.to_le_bytes(),
+		};
+
+        seq_write_pxar_entry(
+            self.output.as_mut(),
+            format::PXAR_FORMAT_VERSION,
+            &version_bytes,
+            &mut self.state.write_position,
+        )
+        .await
+    }
+
     async fn encode_metadata(&mut self, metadata: &Metadata) -> io::Result<()> {
         seq_write_pxar_struct_entry(
             self.output.as_mut(),
diff --git a/src/encoder/sync.rs b/src/encoder/sync.rs
index 5ede554..8516662 100644
--- a/src/encoder/sync.rs
+++ b/src/encoder/sync.rs
@@ -28,7 +28,7 @@ impl<'a, T: io::Write + 'a> Encoder<'a, StandardWriter<T>> {
     /// Encode a `pxar` archive into a regular `std::io::Write` output.
     #[inline]
     pub fn from_std(output: T, metadata: &Metadata) -> io::Result<Encoder<'a, StandardWriter<T>>> {
-        Encoder::new(StandardWriter::new(output), metadata)
+        Encoder::new(StandardWriter::new(output), metadata, format::FormatVersion::default())
     }
 }
 
@@ -41,6 +41,7 @@ impl<'a> Encoder<'a, StandardWriter<std::fs::File>> {
         Encoder::new(
             StandardWriter::new(std::fs::File::create(path.as_ref())?),
             metadata,
+            format::FormatVersion::default(),
         )
     }
 }
@@ -50,9 +51,9 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     ///
     /// Note that the `output`'s `SeqWrite` implementation must always return `Poll::Ready` and is
     /// not allowed to use the `Waker`, as this will cause a `panic!`.
-    pub fn new(output: T, metadata: &Metadata) -> io::Result<Self> {
+    pub fn new(output: T, metadata: &Metadata, version: format::FormatVersion) -> io::Result<Self> {
         Ok(Self {
-            inner: poll_result_once(encoder::EncoderImpl::new(output.into(), metadata))?,
+            inner: poll_result_once(encoder::EncoderImpl::new(output.into(), metadata, version))?,
         })
     }
 
diff --git a/src/format/mod.rs b/src/format/mod.rs
index 8016ab1..5154a0a 100644
--- a/src/format/mod.rs
+++ b/src/format/mod.rs
@@ -44,7 +44,7 @@
 //!   * final goodbye table
 //!   * `APPENDIX_TAIL`     -- marks the end of an archive containing a APPENDIX section
 
-use std::cmp::Ordering;
+use std::cmp::{Ordering, PartialEq};
 use std::ffi::{CStr, OsStr};
 use std::fmt;
 use std::fmt::Display;
@@ -88,6 +88,8 @@ pub mod mode {
 }
 
 // Generated by `cargo run --example mk-format-hashes`
+/// Pxar format version entry, fallback to version 1 if not present
+pub const PXAR_FORMAT_VERSION: u64 = 0x730f6c75df16a40d;
 /// Beginning of an entry (current version).
 pub const PXAR_ENTRY: u64 = 0xd5956474e588acef;
 /// Previous version of the entry struct
@@ -561,6 +563,13 @@ pub struct Filename {
     pub name: Vec<u8>,
 }
 
+#[derive(Clone, Debug, Default, PartialEq)]
+pub enum FormatVersion {
+    #[default]
+    Version1,
+    Version2,
+}
+
 #[derive(Clone, Debug)]
 pub struct Symlink {
     pub data: Vec<u8>,
diff --git a/src/lib.rs b/src/lib.rs
index 035f995..a08055e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -342,6 +342,9 @@ impl Acl {
 /// Identifies whether the entry is a file, symlink, directory, etc.
 #[derive(Clone, Debug)]
 pub enum EntryKind {
+    /// Pxar file format version
+    Version(format::FormatVersion),
+
     /// Symbolic links.
     Symlink(format::Symlink),
 
-- 
2.39.2





More information about the pbs-devel mailing list