[pbs-devel] [PATCH v8 pxar 14/69] format/encoder/decoder: new pxar entry type `Version`

Fabian Grünbichler f.gruenbichler at proxmox.com
Mon Jun 3 13:25:58 CEST 2024


On May 28, 2024 11:42 am, Christian Ebner wrote:
> Introduces a new pxar format entry type `Version` and the associated
> encoder and decoder methods. The format version entry is only allowed
> once, as the first entry of the pxar archive, marked with a
> `PXAR_FORMAT_VERSION` header followed by the encoded version number.
> If not present, the default format version 1 is assumed as encoding
> format for the archive.
> 
> The entry allows to early detect incompatibility with an encoded
> archive and bail or switch mode based on the encountered version.
> 
> The format version entry is not backwards compatible to pxar format
> version 1.
> 
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>

it seems there are still some checks missing for this, if I build
patched PBS versions with an added Version 3 but lacking decode support,
I get the expected error in a lot of places, but can for example use the
`pxar-file-download` endpoint to download files from the archive, even
though the server should choke on the archive since it's using an
unknown value in the version header/entry..

ideally they'd be added here in pxar (since who knows, the next format
break might be entirely transparent to PBS :))

> ---
> changes since version 7:
> - no changes
> 
> changes since version 6:
> - no changes
> 
>  examples/mk-format-hashes.rs |  5 +++++
>  src/accessor/mod.rs          | 21 ++++++++++++++++++--
>  src/decoder/mod.rs           | 37 ++++++++++++++++++++++++++++++++++--
>  src/encoder/mod.rs           | 37 +++++++++++++++++++++++++++++++++---
>  src/format/mod.rs            | 11 +++++++++++
>  src/lib.rs                   |  3 +++
>  tests/simple/fs.rs           |  1 +
>  7 files changed, 108 insertions(+), 7 deletions(-)
> 
> diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs
> index 35cff99..e5d69b1 100644
> --- a/examples/mk-format-hashes.rs
> +++ b/examples/mk-format-hashes.rs
> @@ -1,6 +1,11 @@
>  use pxar::format::hash_filename;
>  
>  const CONSTANTS: &[(&str, &str, &str)] = &[
> +    (
> +        "Pxar format version entry, fallback to version 1 if not present",
> +        "PXAR_FORMAT_VERSION",
> +        "__PROXMOX_FORMAT_VERSION__",
> +    ),
>      (
>          "Beginning of an entry (current version).",
>          "PXAR_ENTRY",
> diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs
> index c3a5e14..faab430 100644
> --- a/src/accessor/mod.rs
> +++ b/src/accessor/mod.rs
> @@ -299,11 +299,19 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
>              PathBuf::new(),
>          )
>          .await?;
> -        let entry = decoder
> +        let mut entry = decoder
>              .next()
>              .await
>              .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??;
>  
> +        // Skip over possible Version and Prelude before the root entry of type Directory
> +        if let EntryKind::Version(_) = entry.kind() {
> +            entry = decoder
> +                .next()
> +                .await
> +                .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
> +        }
> +
>          Ok(FileEntryImpl {
>              input: self.input.clone(),
>              entry,
> @@ -528,10 +536,19 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
>          file_name: Option<&Path>,
>      ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> {
>          let mut decoder = self.get_decoder(entry_range, file_name).await?;
> -        let entry = decoder
> +        let mut entry = decoder
>              .next()
>              .await
>              .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
> +
> +        // Skip over possible Version and Prelude before the root entry of type Directory
> +        if let EntryKind::Version(_) = entry.kind() {
> +            entry = decoder
> +                .next()
> +                .await
> +                .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
> +        }
> +
>          Ok((entry, decoder))
>      }
>  
> diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
> index 19b1b5c..43c83ae 100644
> --- a/src/decoder/mod.rs
> +++ b/src/decoder/mod.rs
> @@ -18,7 +18,7 @@ use std::task::{Context, Poll};
>  
>  use endian_trait::Endian;
>  
> -use crate::format::{self, Header};
> +use crate::format::{self, FormatVersion, Header};
>  use crate::util::{self, io_err_other};
>  use crate::{Entry, EntryKind, Metadata, PxarVariant};
>  
> @@ -169,10 +169,14 @@ pub(crate) struct DecoderImpl<T> {
>      /// The random access code uses decoders for sub-ranges which may not end in a `PAYLOAD` for
>      /// entries like FIFOs or sockets, so there we explicitly allow an item to terminate with EOF.
>      eof_after_entry: bool,
> +    /// The format version as determined by the format version header
> +    version: format::FormatVersion,
>  }
>  
> +#[derive(Clone, PartialEq)]
>  enum State {
>      Begin,
> +    Root,
>      Default,
>      InPayload {
>          offset: u64,
> @@ -245,6 +249,7 @@ impl<I: SeqRead> DecoderImpl<I> {
>              with_goodbye_tables: false,
>              payload_consumed,
>              eof_after_entry,
> +            version: FormatVersion::default(),
>          })
>      }
>  
> @@ -257,7 +262,19 @@ impl<I: SeqRead> DecoderImpl<I> {
>          loop {
>              match self.state {
>                  State::Eof => return Ok(None),
> -                State::Begin => return self.read_next_entry().await.map(Some),
> +                State::Begin => {
> +                    let entry = self.read_next_entry().await.map(Some);
> +                    if let Ok(Some(ref entry)) = entry {
> +                        if let EntryKind::Version(version) = entry.kind() {
> +                            self.version = version.clone();
> +                            self.state = State::Root;
> +                        }
> +                    }
> +                    return entry;
> +                }
> +                State::Root => {
> +                    return self.read_next_entry().await.map(Some);
> +                }
>                  State::Default => {
>                      // we completely finished an entry, so now we're going "up" in the directory
>                      // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
> @@ -388,6 +405,7 @@ impl<I: SeqRead> DecoderImpl<I> {
>      }
>  
>      async fn read_next_entry_or_eof(&mut self) -> io::Result<Option<Entry>> {
> +        let previous_state = self.state.clone();
>          self.state = State::Default;
>          self.entry.clear_data();
>  
> @@ -407,6 +425,14 @@ impl<I: SeqRead> DecoderImpl<I> {
>              self.entry.metadata = Metadata::default();
>              self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
>  
> +            Ok(Some(self.entry.take()))
> +        } else if header.htype == format::PXAR_FORMAT_VERSION {
> +            if previous_state != State::Begin {
> +                io_bail!("Got format version entry at unexpected position");
> +            }
> +            self.current_header = header;
> +            self.entry.kind = EntryKind::Version(self.read_format_version().await?);
> +
>              Ok(Some(self.entry.take()))
>          } else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 {
>              if header.htype == format::PXAR_ENTRY {
> @@ -766,6 +792,13 @@ impl<I: SeqRead> DecoderImpl<I> {
>          self.current_header.check_header_size()?;
>          seq_read_entry(self.input.archive_mut()).await
>      }
> +
> +    async fn read_format_version(&mut self) -> io::Result<format::FormatVersion> {
> +        match seq_read_entry(self.input.archive_mut()).await? {
> +            2u64 => Ok(format::FormatVersion::Version2),
> +            version => io_bail!("unexpected pxar format version {version}"),

nit/follow-up: I'd maybe strip the pxar here and in the other format
related bail messages? no other errors here include it, and I think it's
pretty much always implied by the context?

also see below

> +        }
> +    }
>  }
>  
>  /// Reader for file contents inside a pxar archive.
> diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
> index b579e18..4bed040 100644
> --- a/src/encoder/mod.rs
> +++ b/src/encoder/mod.rs
> @@ -17,7 +17,7 @@ use endian_trait::Endian;
>  
>  use crate::binary_tree_array;
>  use crate::decoder::{self, SeqRead};
> -use crate::format::{self, GoodbyeItem, PayloadRef};
> +use crate::format::{self, FormatVersion, GoodbyeItem, PayloadRef};
>  use crate::{Metadata, PxarVariant};
>  
>  pub mod aio;
> @@ -326,6 +326,8 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> {
>      /// Since only the "current" entry can be actively writing files, we share the file copy
>      /// buffer.
>      file_copy_buffer: Arc<Mutex<Vec<u8>>>,
> +    /// Pxar format version to encode
> +    version: format::FormatVersion,
>  }
>  
>  impl<'a, T: SeqWrite + 'a> Drop for EncoderImpl<'a, T> {
> @@ -350,11 +352,14 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
>          }
>  
>          let mut state = EncoderState::default();
> -        if let Some(payload_output) = output.payload_mut() {
> +        let version = if let Some(payload_output) = output.payload_mut() {
>              let header = format::Header::with_content_size(format::PXAR_PAYLOAD_START_MARKER, 0);
>              header.check_header_size()?;
>              seq_write_struct(payload_output, header, &mut state.payload_write_position).await?;
> -        }
> +            format::FormatVersion::Version2
> +        } else {
> +            format::FormatVersion::Version1
> +        };
>  
>          let mut this = Self {
>              output,
> @@ -363,8 +368,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
>              file_copy_buffer: Arc::new(Mutex::new(unsafe {
>                  crate::util::vec_new_uninitialized(1024 * 1024)
>              })),
> +            version,
>          };
>  
> +        this.encode_format_version().await?;
>          this.encode_metadata(metadata).await?;
>          let state = this.state_mut()?;
>          state.files_offset = state.position();
> @@ -547,6 +554,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
>          file_size: u64,
>          payload_offset: PayloadOffset,
>      ) -> io::Result<LinkOffset> {
> +        if self.version == FormatVersion::Version1 {
> +            io_bail!("payload references not supported pxar format version 1");
> +        }
> +
>          if self.output.payload().is_none() {
>              io_bail!("unable to add payload reference");
>          }
> @@ -762,6 +773,26 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
>          Ok(())
>      }
>  
> +    async fn encode_format_version(&mut self) -> io::Result<()> {
> +        let version_bytes = match self.version {
> +            format::FormatVersion::Version1 => return Ok(()),
> +            format::FormatVersion::Version2 => 2u64.to_le_bytes(),

nit: the representation here and on the read side is still pretty far
from eachother, it might be nice to move them next to eachother into a
pair of serialize/deserialize..

> +        };
> +
> +        let (mut output, state) = self.output_state()?;
> +        if state.write_position != 0 {
> +            io_bail!("pxar format version must be encoded at the beginning of an archive");
> +        }
> +
> +        seq_write_pxar_entry(
> +            output.archive_mut(),
> +            format::PXAR_FORMAT_VERSION,
> +            &version_bytes,
> +            &mut state.write_position,
> +        )
> +        .await
> +    }
> +
>      async fn encode_metadata(&mut self, metadata: &Metadata) -> io::Result<()> {
>          let (mut output, state) = self.output_state()?;
>          seq_write_pxar_struct_entry(
> diff --git a/src/format/mod.rs b/src/format/mod.rs
> index 6519bfc..9b66fe2 100644
> --- a/src/format/mod.rs
> +++ b/src/format/mod.rs
> @@ -6,6 +6,7 @@
>  //! item data.
>  //!
>  //! An archive contains items in the following order:
> +//!  * `FORMAT_VERSION`     -- (optional for v1), version of encoding format
>  //!  * `ENTRY`              -- containing general stat() data and related bits
>  //!   * `XATTR`             -- one extended attribute
>  //!   * ...                 -- more of these when there are multiple defined
> @@ -80,6 +81,8 @@ pub mod mode {
>  }
>  
>  // Generated by `cargo run --example mk-format-hashes`
> +/// Pxar format version entry, fallback to version 1 if not present
> +pub const PXAR_FORMAT_VERSION: u64 = 0x730f6c75df16a40d;
>  /// Beginning of an entry (current version).
>  pub const PXAR_ENTRY: u64 = 0xd5956474e588acef;
>  /// Previous version of the entry struct
> @@ -186,6 +189,7 @@ impl Header {
>  impl Display for Header {
>      fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
>          let readable = match self.htype {
> +            PXAR_FORMAT_VERSION => "FORMAT_VERSION",
>              PXAR_FILENAME => "FILENAME",
>              PXAR_SYMLINK => "SYMLINK",
>              PXAR_HARDLINK => "HARDLINK",
> @@ -551,6 +555,13 @@ impl From<&std::fs::Metadata> for Stat {
>      }
>  }
>  
> +#[derive(Clone, Debug, Default, PartialEq)]
> +pub enum FormatVersion {
> +    #[default]
> +    Version1,
> +    Version2,
> +}
> +
>  #[derive(Clone, Debug)]
>  pub struct Filename {
>      pub name: Vec<u8>,
> diff --git a/src/lib.rs b/src/lib.rs
> index bafdfe4..7e5b48f 100644
> --- a/src/lib.rs
> +++ b/src/lib.rs
> @@ -342,6 +342,9 @@ impl Acl {
>  /// Identifies whether the entry is a file, symlink, directory, etc.
>  #[derive(Clone, Debug)]
>  pub enum EntryKind {
> +    /// Pxar file format version
> +    Version(format::FormatVersion),
> +
>      /// Symbolic links.
>      Symlink(format::Symlink),
>  
> diff --git a/tests/simple/fs.rs b/tests/simple/fs.rs
> index 4284805..8a8c607 100644
> --- a/tests/simple/fs.rs
> +++ b/tests/simple/fs.rs
> @@ -229,6 +229,7 @@ impl Entry {
>                      })?))
>                  };
>              match item.kind() {
> +                PxarEntryKind::Version(_) => continue,
>                  PxarEntryKind::GoodbyeTable => break,
>                  PxarEntryKind::File { size, .. } => {
>                      let mut data = Vec::new();
> -- 
> 2.39.2
> 
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
> 
> 
> 




More information about the pbs-devel mailing list