[pbs-devel] [PATCH v2 stable-2 pxar 1/1] format/decoder/accessor: backport pxar entry type `Version`

Fabian Grünbichler f.gruenbichler at proxmox.com
Thu Jun 6 10:21:42 CEST 2024


On June 5, 2024 5:41 pm, Christian Ebner wrote:
> Backports the pxar format entry type `Version` and the associated
> decoder methods. The format version entry is expected once as the
> first entry of the pxar archive, marked with a `PXAR_FORMAT_VERSION`
> header followed by the encoded version number for archives with
> format version 2 or higher.
> If not present, the default format version 1 is assumed as encoding
> format for the archive.
> 
> The entry allows to early detect and bail if an incompatible archive
> version is encountered.
> 
> The format version entry is not backwards compatible to pxar format
> version 1.
> 
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> Note:
> 
> This patch is intended to be applied on a dedicated branch to be forked
> from previous master commit 675ecff32fbeff0973eaea016c4b8f3877015adb
> 
>  examples/mk-format-hashes.rs |  5 +++++
>  src/accessor/mod.rs          | 28 ++++++++++++++++++++++++++--
>  src/decoder/mod.rs           | 28 ++++++++++++++++++++++++++--
>  src/format/mod.rs            | 19 +++++++++++++++++++
>  src/lib.rs                   |  3 +++
>  tests/simple/fs.rs           |  1 +
>  6 files changed, 80 insertions(+), 4 deletions(-)
> 
> diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs
> index 6e00654..afd0924 100644
> --- a/examples/mk-format-hashes.rs
> +++ b/examples/mk-format-hashes.rs
> @@ -1,6 +1,11 @@
>  use pxar::format::hash_filename;
>  
>  const CONSTANTS: &[(&str, &str, &str)] = &[
> +    (
> +        "Pxar format version entry, fallback to version 1 if not present",
> +        "PXAR_FORMAT_VERSION",
> +        "__PROXMOX_FORMAT_VERSION__",
> +    ),
>      (
>          "Beginning of an entry (current version).",
>          "PXAR_ENTRY",
> diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs
> index 6a2de73..73d79e1 100644
> --- a/src/accessor/mod.rs
> +++ b/src/accessor/mod.rs
> @@ -17,7 +17,7 @@ use endian_trait::Endian;
>  
>  use crate::binary_tree_array;
>  use crate::decoder::{self, DecoderImpl};
> -use crate::format::{self, GoodbyeItem};
> +use crate::format::{self, FormatVersion, GoodbyeItem};
>  use crate::util;
>  use crate::{Entry, EntryKind};
>  
> @@ -185,11 +185,23 @@ pub(crate) struct AccessorImpl<T> {
>  }
>  
>  impl<T: ReadAt> AccessorImpl<T> {
> -    pub async fn new(input: T, size: u64) -> io::Result<Self> {
> +    pub async fn new(mut input: T, size: u64) -> io::Result<Self> {
>          if size < (size_of::<GoodbyeItem>() as u64) {
>              io_bail!("too small to contain a pxar archive");
>          }
>  
> +        let header: format::Header = read_entry_at(&mut input, 0).await?;
> +        header.check_header_size()?;
> +
> +        if header.htype == format::PXAR_FORMAT_VERSION {
> +            let version: u64 = read_entry_at(
> +                &mut input,
> +                size_of::<format::Header>() as u64,
> +            )
> +            .await?;
> +            FormatVersion::deserialize(version)?;
> +        }

is there some other way to construct the AccessorImpl? if not, wouldn't
this check here be enough and the ones below can actually never
trigger/happen? see below as well, I think the deserialize could just be
an io_bail

> +
>          Ok(Self {
>              input,
>              size,
> @@ -293,6 +305,12 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
>              .next()
>              .await
>              .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??;
> +
> +        if let EntryKind::Version(_) = entry.kind() {
> +            // client is incompatible with any format version entry (version 1 is never encoded)
> +            io_bail!("got format version not compatible with this client.");
> +        }

since no encoded version can be deserialized by the stable-2 parser,
this cannot happen since the deserializer would have bailed before?

> +
>          Ok(FileEntryImpl {
>              input: self.input.clone(),
>              entry,
> @@ -516,6 +534,12 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
>              .next()
>              .await
>              .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
> +
> +        if let EntryKind::Version(_) = entry.kind() {
> +            // client is incompatible with any format version entry (version 1 is never encoded)
> +            io_bail!("got format version not compatible with this client.");
> +        }

same here

> +
>          Ok((entry, decoder))
>      }
>  
> diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
> index d1fb911..c6eae9f 100644
> --- a/src/decoder/mod.rs
> +++ b/src/decoder/mod.rs
> @@ -17,7 +17,7 @@ use std::task::{Context, Poll};
>  
>  use endian_trait::Endian;
>  
> -use crate::format::{self, Header};
> +use crate::format::{self, FormatVersion, Header};
>  use crate::util::{self, io_err_other};
>  use crate::{Entry, EntryKind, Metadata};
>  
> @@ -162,6 +162,7 @@ pub(crate) struct DecoderImpl<T> {
>      eof_after_entry: bool,
>  }
>  
> +#[derive(Clone, PartialEq)]
>  enum State {
>      Begin,
>      Default,
> @@ -236,7 +237,16 @@ impl<I: SeqRead> DecoderImpl<I> {
>          loop {
>              match self.state {
>                  State::Eof => return Ok(None),
> -                State::Begin => return self.read_next_entry().await.map(Some),
> +                State::Begin => {
> +                    let entry = self.read_next_entry().await.map(Some);
> +                    if let Ok(Some(ref entry)) = entry {
> +                        if let EntryKind::Version(_) = entry.kind() {
> +                            // client is incompatible with any format version entry (version 1 is never encoded)
> +                            io_bail!("got format version not compatible with this client.");

do we want to include the version here? but see below, I think we can
skip this altogether since we never ever will encounter a valid Version
entry..

> +                        }
> +                    }
> +                    return entry;
> +                }
>                  State::Default => {
>                      // we completely finished an entry, so now we're going "up" in the directory
>                      // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
> @@ -354,6 +364,7 @@ impl<I: SeqRead> DecoderImpl<I> {
>      }
>  
>      async fn read_next_entry_or_eof(&mut self) -> io::Result<Option<Entry>> {
> +        let previous_state = self.state.clone();
>          self.state = State::Default;
>          self.entry.clear_data();
>  
> @@ -373,6 +384,14 @@ impl<I: SeqRead> DecoderImpl<I> {
>              self.entry.metadata = Metadata::default();
>              self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
>  
> +            Ok(Some(self.entry.take()))
> +        } else if header.htype == format::PXAR_FORMAT_VERSION {
> +            if previous_state != State::Begin {
> +                io_bail!("Got format version entry at unexpected position");
> +            }

technically any position is unexpected, so we could drop this check
here..

> +            self.current_header = header;
> +            self.entry.kind = EntryKind::Version(self.read_format_version().await?);

we can skip this, since there can never be a valid Version entry, and
just inline read_format_version as a single call to seq_read_entry
followed by bailing?

> +
>              Ok(Some(self.entry.take()))
>          } else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 {
>              if header.htype == format::PXAR_ENTRY {
> @@ -661,6 +680,11 @@ impl<I: SeqRead> DecoderImpl<I> {
>      async fn read_quota_project_id(&mut self) -> io::Result<format::QuotaProjectId> {
>          self.read_simple_entry("quota project id").await
>      }
> +
> +    async fn read_format_version(&mut self) -> io::Result<format::FormatVersion> {
> +        let version: u64 = seq_read_entry(&mut self.input).await?;
> +        FormatVersion::deserialize(version)
> +    }
>  }
>  
>  /// Reader for file contents inside a pxar archive.
> diff --git a/src/format/mod.rs b/src/format/mod.rs
> index bfea9f6..2e21635 100644
> --- a/src/format/mod.rs
> +++ b/src/format/mod.rs
> @@ -6,6 +6,7 @@
>  //! item data.
>  //!
>  //! An archive contains items in the following order:
> +//!  * `FORMAT_VERSION`     -- (optional for v1), version of encoding format
>  //!  * `ENTRY`              -- containing general stat() data and related bits
>  //!   * `XATTR`             -- one extended attribute
>  //!   * ...                 -- more of these when there are multiple defined
> @@ -79,6 +80,8 @@ pub mod mode {
>  }
>  
>  // Generated by `cargo run --example mk-format-hashes`
> +/// Pxar format version entry, fallback to version 1 if not present
> +pub const PXAR_FORMAT_VERSION: u64 = 0x730f6c75df16a40d;
>  /// Beginning of an entry (current version).
>  pub const PXAR_ENTRY: u64 = 0xd5956474e588acef;
>  /// Previous version of the entry struct
> @@ -177,6 +180,7 @@ impl Header {
>  impl Display for Header {
>      fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
>          let readable = match self.htype {
> +            PXAR_FORMAT_VERSION => "FORMAT_VERSION",
>              PXAR_FILENAME => "FILENAME",
>              PXAR_SYMLINK => "SYMLINK",
>              PXAR_HARDLINK => "HARDLINK",
> @@ -540,6 +544,21 @@ impl From<&std::fs::Metadata> for Stat {
>      }
>  }
>  
> +#[derive(Clone, Debug, Default, PartialEq)]
> +pub enum FormatVersion {
> +    #[default]
> +    Version1,
> +}
> +
> +impl FormatVersion {
> +    pub fn deserialize(version: u64) -> Result<FormatVersion, io::Error> {
> +        match version {
> +            1u64 => Ok(FormatVersion::Version1),

the 1u64 here is wrong, right? it can't ever be encoded that way.. so
this can go straight to io_bail!, or we can even skip the deserialize
altogether and just inline that bail above in `read_format_version`

> +            version => io_bail!("incompatible format version {version}")
> +        }
> +    }
> +}
> +
>  #[derive(Clone, Debug)]
>  pub struct Filename {
>      pub name: Vec<u8>,
> diff --git a/src/lib.rs b/src/lib.rs
> index 210c4b1..b63d43c 100644
> --- a/src/lib.rs
> +++ b/src/lib.rs
> @@ -342,6 +342,9 @@ impl Acl {
>  /// Identifies whether the entry is a file, symlink, directory, etc.
>  #[derive(Clone, Debug)]
>  pub enum EntryKind {
> +    /// Pxar file format version
> +    Version(format::FormatVersion),
> +

if we never construct such an entry, since it is always considered
invalid, we can skip this?

>      /// Symbolic links.
>      Symlink(format::Symlink),
>  
> diff --git a/tests/simple/fs.rs b/tests/simple/fs.rs
> index 9a89c4d..fd13e65 100644
> --- a/tests/simple/fs.rs
> +++ b/tests/simple/fs.rs
> @@ -229,6 +229,7 @@ impl Entry {
>                      })?))
>                  };
>              match item.kind() {
> +                PxarEntryKind::Version(_) => continue,

and as a result, this?

>                  PxarEntryKind::GoodbyeTable => break,
>                  PxarEntryKind::File { size, .. } => {
>                      let mut data = Vec::new();
> -- 
> 2.30.2
> 
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
> 
> 
> 




More information about the pbs-devel mailing list