[pbs-devel] [PATCH v3 pxar 13/58] format: add pxar format version entry
Fabian Grünbichler
f.gruenbichler at proxmox.com
Wed Apr 3 13:41:41 CEST 2024
On March 28, 2024 1:36 pm, Christian Ebner wrote:
> Adds an additional entry type at the start of each pxar archive
> signaling the encoding format version. If not present, the default
> version 1 is assumed.
>
> This allows to early on detect the pxar encoding version, allowing tools
> to switch mode or bail on non compatible encoder/decoder functionality.
>
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> changes since version 2:
> - not present in previous version
>
> examples/mk-format-hashes.rs | 5 +++++
> src/decoder/mod.rs | 29 ++++++++++++++++++++++++++--
> src/encoder/mod.rs | 37 +++++++++++++++++++++++++++++++++---
> src/format/mod.rs | 11 +++++++++++
> src/lib.rs | 3 +++
> 5 files changed, 80 insertions(+), 5 deletions(-)
>
> diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs
> index 35cff99..e5d69b1 100644
> --- a/examples/mk-format-hashes.rs
> +++ b/examples/mk-format-hashes.rs
> @@ -1,6 +1,11 @@
> use pxar::format::hash_filename;
>
> const CONSTANTS: &[(&str, &str, &str)] = &[
> + (
> + "Pxar format version entry, fallback to version 1 if not present",
> + "PXAR_FORMAT_VERSION",
> + "__PROXMOX_FORMAT_VERSION__",
> + ),
> (
> "Beginning of an entry (current version).",
> "PXAR_ENTRY",
> diff --git a/src/decoder/mod.rs b/src/decoder/mod.rs
> index 00d9abf..5b2fafb 100644
> --- a/src/decoder/mod.rs
> +++ b/src/decoder/mod.rs
> @@ -17,7 +17,7 @@ use std::task::{Context, Poll};
>
> use endian_trait::Endian;
>
> -use crate::format::{self, Header};
> +use crate::format::{self, FormatVersion, Header};
> use crate::util::{self, io_err_other};
> use crate::{Entry, EntryKind, Metadata};
>
> @@ -164,6 +164,8 @@ pub(crate) struct DecoderImpl<T> {
> /// The random access code uses decoders for sub-ranges which may not end in a `PAYLOAD` for
> /// entries like FIFOs or sockets, so there we explicitly allow an item to terminate with EOF.
> eof_after_entry: bool,
> + /// The format version as determined by the format version header
> + version: format::FormatVersion,
> }
>
> enum State {
> @@ -242,6 +244,7 @@ impl<I: SeqRead> DecoderImpl<I> {
> payload_input,
> payload_consumed,
> eof_after_entry,
> + version: FormatVersion::default(),
> };
>
> // this.read_next_entry().await?;
> @@ -258,7 +261,16 @@ impl<I: SeqRead> DecoderImpl<I> {
> loop {
> match self.state {
> State::Eof => return Ok(None),
> - State::Begin => return self.read_next_entry().await.map(Some),
> + State::Begin => {
> + let entry = self.read_next_entry().await.map(Some);
> + if let Ok(Some(ref entry)) = entry {
> + if let EntryKind::Version(version) = entry.kind() {
> + self.version = version.clone();
> + return self.read_next_entry().await.map(Some);
> + }
> + }
> + return entry;
a bit unsure here, if we want to enforce the order, wouldn't it be more
clean to transition to a new state here rather than adding more nested
ifs over time? ;)
> + }
> State::Default => {
> // we completely finished an entry, so now we're going "up" in the directory
> // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
> @@ -412,6 +424,11 @@ impl<I: SeqRead> DecoderImpl<I> {
> self.entry.metadata = Metadata::default();
> self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
>
> + Ok(Some(self.entry.take()))
> + } else if header.htype == format::PXAR_FORMAT_VERSION {
> + self.current_header = header;
> + self.entry.kind = EntryKind::Version(self.read_format_version().await?);
> +
> Ok(Some(self.entry.take()))
> } else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 {
> if header.htype == format::PXAR_ENTRY {
> @@ -777,6 +794,14 @@ impl<I: SeqRead> DecoderImpl<I> {
>
> seq_read_entry(&mut self.input).await
> }
> +
> + async fn read_format_version(&mut self) -> io::Result<format::FormatVersion> {
> + match seq_read_entry(&mut self.input).await? {
> + 1u64 => Ok(format::FormatVersion::Version1),
this should never happen though, right?
> + 2u64 => Ok(format::FormatVersion::Version2),
also this (cted below)
> + _ => io_bail!("unexpected pxar format version"),
this should maybe include the value? ;)
> + }
> + }
> }
>
> /// Reader for file contents inside a pxar archive.
> diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
> index 88c0ed5..9270153 100644
> --- a/src/encoder/mod.rs
> +++ b/src/encoder/mod.rs
> @@ -17,7 +17,7 @@ use endian_trait::Endian;
>
> use crate::binary_tree_array;
> use crate::decoder::{self, SeqRead};
> -use crate::format::{self, GoodbyeItem, PayloadRef};
> +use crate::format::{self, FormatVersion, GoodbyeItem, PayloadRef};
> use crate::Metadata;
>
> pub mod aio;
> @@ -307,6 +307,8 @@ pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> {
> /// Since only the "current" entry can be actively writing files, we share the file copy
> /// buffer.
> file_copy_buffer: Arc<Mutex<Vec<u8>>>,
> + /// Pxar format version to encode
> + version: format::FormatVersion,
> }
>
> impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> @@ -320,11 +322,14 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> }
>
> let mut state = EncoderState::default();
> - if let Some(payload_output) = payload_output.as_mut() {
> + let version = if let Some(payload_output) = payload_output.as_mut() {
> let header = format::Header::with_content_size(format::PXAR_PAYLOAD_START_MARKER, 0);
> header.check_header_size()?;
> seq_write_struct(payload_output, header, &mut state.payload_write_position).await?;
> - }
> + format::FormatVersion::Version2
> + } else {
> + format::FormatVersion::default()
shouldn't this be Version1 instead of default()? they are the same
*now*, but that might not be the case forever?
> + };
>
> let mut this = Self {
> output,
> @@ -334,8 +339,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> file_copy_buffer: Arc::new(Mutex::new(unsafe {
> crate::util::vec_new_uninitialized(1024 * 1024)
> })),
> + version,
> };
>
> + this.encode_format_version().await?;
> this.encode_metadata(metadata).await?;
> let state = this.state_mut()?;
> state.files_offset = state.position();
> @@ -522,6 +529,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> file_size: u64,
> payload_offset: PayloadOffset,
> ) -> io::Result<()> {
> + if self.version == FormatVersion::Version1 {
> + io_bail!("payload references not supported pxar format version 1");
> + }
> +
> if self.payload_output.as_mut().is_none() {
> io_bail!("unable to add payload reference");
> }
> @@ -729,6 +740,26 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
> Ok(())
> }
>
> + async fn encode_format_version(&mut self) -> io::Result<()> {
> + let version_bytes = match self.version {
> + format::FormatVersion::Version1 => return Ok(()),
> + format::FormatVersion::Version2 => 2u64.to_le_bytes(),
(cted from above) and this here should maybe go together?
> + };
> +
> + let (output, state) = self.output_state()?;
> + if state.write_position != 0 {
> + io_bail!("pxar format version must be encoded at the beginning of an archive");
should this also be enforced while decoding?
should we also encode a/the version of the payload archive?
> + }
> +
> + seq_write_pxar_entry(
> + output,
> + format::PXAR_FORMAT_VERSION,
> + &version_bytes,
> + &mut state.write_position,
> + )
> + .await
> + }
> +
> async fn encode_metadata(&mut self, metadata: &Metadata) -> io::Result<()> {
> let (output, state) = self.output_state()?;
> seq_write_pxar_struct_entry(
> diff --git a/src/format/mod.rs b/src/format/mod.rs
> index a672d19..2bf33c9 100644
> --- a/src/format/mod.rs
> +++ b/src/format/mod.rs
> @@ -6,6 +6,7 @@
> //! item data.
> //!
> //! An archive contains items in the following order:
> +//! * `FORMAT_VERSION` -- (optional for v1), version of encoding format
> //! * `ENTRY` -- containing general stat() data and related bits
> //! * `XATTR` -- one extended attribute
> //! * ... -- more of these when there are multiple defined
> @@ -80,6 +81,8 @@ pub mod mode {
> }
>
> // Generated by `cargo run --example mk-format-hashes`
> +/// Pxar format version entry, fallback to version 1 if not present
> +pub const PXAR_FORMAT_VERSION: u64 = 0x730f6c75df16a40d;
> /// Beginning of an entry (current version).
> pub const PXAR_ENTRY: u64 = 0xd5956474e588acef;
> /// Previous version of the entry struct
> @@ -186,6 +189,7 @@ impl Header {
> impl Display for Header {
> fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
> let readable = match self.htype {
> + PXAR_FORMAT_VERSION => "FORMAT_VERSION",
> PXAR_FILENAME => "FILENAME",
> PXAR_SYMLINK => "SYMLINK",
> PXAR_HARDLINK => "HARDLINK",
> @@ -551,6 +555,13 @@ impl From<&std::fs::Metadata> for Stat {
> }
> }
>
> +#[derive(Clone, Debug, Default, PartialEq)]
> +pub enum FormatVersion {
> + #[default]
> + Version1,
> + Version2,
> +}
> +
> #[derive(Clone, Debug)]
> pub struct Filename {
> pub name: Vec<u8>,
> diff --git a/src/lib.rs b/src/lib.rs
> index ef81a85..a87b5ac 100644
> --- a/src/lib.rs
> +++ b/src/lib.rs
> @@ -342,6 +342,9 @@ impl Acl {
> /// Identifies whether the entry is a file, symlink, directory, etc.
> #[derive(Clone, Debug)]
> pub enum EntryKind {
> + /// Pxar file format version
> + Version(format::FormatVersion),
> +
> /// Symbolic links.
> Symlink(format::Symlink),
>
> --
> 2.39.2
>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
>
More information about the pbs-devel
mailing list