[pbs-devel] [PATCH v4 pxar 7/26] fix #3174: enc/dec: impl PXAR_APPENDIX_TAIL entrytype

Christian Ebner c.ebner at proxmox.com
Thu Nov 9 19:45:55 CET 2023


The PXAR_APPENDIX_TAIL entry marks pxar archives containing an appendix
section. It has the same size as a goodbye tail marker item in order to
be able to easily read and distinguish archives with and without such
section.

This also implements the accessor used by e.g. the fuse implementation
to perform random io on the archive. The accessor reads the last entry
and stores the appendix offset if needed, in order to recalculate the
actual file payload offset within the archive when encountering a
appendix reference entry in the archive.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
Changes since v3:
- correctly propagate appendix_offset to all accessor impls which depend
  on it

Changes since v2:
- fix get_cursor for files located in the appendix

Changes since v1:
- adapt to custom type for appendix start offset

 examples/mk-format-hashes.rs |   5 ++
 examples/pxarcmd.rs          |   4 +-
 src/accessor/mod.rs          | 132 +++++++++++++++++++++++++++++------
 src/encoder/aio.rs           |   9 ++-
 src/encoder/mod.rs           |  19 ++++-
 src/encoder/sync.rs          |   7 +-
 src/format/mod.rs            |   4 ++
 7 files changed, 152 insertions(+), 28 deletions(-)

diff --git a/examples/mk-format-hashes.rs b/examples/mk-format-hashes.rs
index f068edd..7fb938d 100644
--- a/examples/mk-format-hashes.rs
+++ b/examples/mk-format-hashes.rs
@@ -57,6 +57,11 @@ const CONSTANTS: &[(&str, &str, &str)] = &[
         "PXAR_GOODBYE_TAIL_MARKER",
         "__PROXMOX_FORMAT_PXAR_GOODBYE_TAIL_MARKER__",
     ),
+    (
+        "Marks the end of an archive containing an appendix section",
+        "PXAR_APPENDIX_TAIL",
+        "__PROXMOX_FORMAT_APPENDIX_TAIL__",
+    ),
 ];
 
 fn main() {
diff --git a/examples/pxarcmd.rs b/examples/pxarcmd.rs
index e0c779d..c7848cc 100644
--- a/examples/pxarcmd.rs
+++ b/examples/pxarcmd.rs
@@ -105,7 +105,7 @@ fn cmd_create(mut args: std::env::ArgsOs) -> Result<(), Error> {
 
     let mut encoder = Encoder::create(file, &meta)?;
     add_directory(&mut encoder, dir, &dir_path, &mut HashMap::new())?;
-    encoder.finish()?;
+    encoder.finish(None)?;
 
     Ok(())
 }
@@ -145,7 +145,7 @@ fn add_directory<'a, T: SeqWrite + 'a>(
                 root_path,
                 &mut *hardlinks,
             )?;
-            dir.finish()?;
+            dir.finish(None)?;
         } else if file_type.is_symlink() {
             todo!("symlink handling");
         } else if file_type.is_file() {
diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs
index 6a2de73..dd5b559 100644
--- a/src/accessor/mod.rs
+++ b/src/accessor/mod.rs
@@ -182,6 +182,7 @@ pub(crate) struct AccessorImpl<T> {
     input: T,
     size: u64,
     caches: Arc<Caches>,
+    appendix_offset: Option<u64>,
 }
 
 impl<T: ReadAt> AccessorImpl<T> {
@@ -190,10 +191,22 @@ impl<T: ReadAt> AccessorImpl<T> {
             io_bail!("too small to contain a pxar archive");
         }
 
+        let tail_offset = size - (size_of::<GoodbyeItem>() as u64);
+        let tail: GoodbyeItem = read_entry_at(&input, tail_offset).await?;
+
+        let (appendix_offset, size) = if tail.hash == format::PXAR_APPENDIX_TAIL {
+            (Some(tail.offset), size - 40)
+        } else if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
+            io_bail!("no goodbye tail marker found");
+        } else {
+            (None, size)
+        };
+
         Ok(Self {
             input,
             size,
             caches: Arc::new(Caches::default()),
+            appendix_offset,
         })
     }
 
@@ -207,6 +220,7 @@ impl<T: ReadAt> AccessorImpl<T> {
             self.size,
             "/".into(),
             Arc::clone(&self.caches),
+            self.appendix_offset,
         )
         .await
     }
@@ -263,6 +277,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
             self.size,
             "/".into(),
             Arc::clone(&self.caches),
+            self.appendix_offset,
         )
         .await
     }
@@ -274,6 +289,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
             offset,
             "/".into(),
             Arc::clone(&self.caches),
+            self.appendix_offset,
         )
         .await
     }
@@ -298,6 +314,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
             entry,
             entry_range_info: entry_range_info.clone(),
             caches: Arc::clone(&self.caches),
+            appendix_offset: self.appendix_offset,
         })
     }
 
@@ -353,6 +370,7 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
                         entry_range: entry_offset..entry_end,
                     },
                     caches: Arc::clone(&self.caches),
+                    appendix_offset: self.appendix_offset,
                 })
             }
             _ => io_bail!("hardlink does not point to a regular file"),
@@ -369,6 +387,7 @@ pub(crate) struct DirectoryImpl<T> {
     table: Arc<[GoodbyeItem]>,
     path: PathBuf,
     caches: Arc<Caches>,
+    appendix_offset: Option<u64>,
 }
 
 impl<T: Clone + ReadAt> DirectoryImpl<T> {
@@ -378,6 +397,7 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
         end_offset: u64,
         path: PathBuf,
         caches: Arc<Caches>,
+        appendix_offset: Option<u64>,
     ) -> io::Result<DirectoryImpl<T>> {
         let tail = Self::read_tail_entry(&input, end_offset).await?;
 
@@ -407,6 +427,7 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
             table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone),
             path,
             caches,
+            appendix_offset,
         };
 
         // sanity check:
@@ -516,6 +537,32 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
             .next()
             .await
             .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+
+        if let EntryKind::AppendixRef {
+            appendix_offset,
+            file_size,
+        } = entry.kind()
+        {
+            let appendix_start = match self.appendix_offset {
+                Some(appendix_start) => appendix_start,
+                None => io_bail!("missing required appendix start offset information"),
+            };
+
+            let name = file_name.ok_or_else(|| io_format_err!("missing required filename"))?;
+            let c_string = std::ffi::CString::new(name.as_os_str().as_bytes())?;
+            let start =
+                appendix_start + appendix_offset + 16 + c_string.as_bytes_with_nul().len() as u64;
+            let end = start + file_size;
+            decoder = self.get_decoder(start..end, file_name).await?;
+
+            let entry = decoder
+                .next()
+                .await
+                .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+
+            return Ok((entry, decoder));
+        }
+
         Ok((entry, decoder))
     }
 
@@ -533,6 +580,7 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
                 entry_range: self.entry_range(),
             },
             caches: Arc::clone(&self.caches),
+            appendix_offset: self.appendix_offset,
         })
     }
 
@@ -616,36 +664,76 @@ impl<T: Clone + ReadAt> DirectoryImpl<T> {
         }
 
         let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
-        let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
 
-        let entry_range = Range {
-            start: entry_ofs,
-            end: file_ofs + entry.size,
+        let mut head: format::Header = read_entry_at(&self.input, file_ofs).await?;
+        let (file_name, entry_range_info) = if head.htype == format::PXAR_APPENDIX_REF {
+            let appendix_start = match self.appendix_offset {
+                Some(appendix_start) => appendix_start,
+                None => io_bail!("missing required appendix start offset information"),
+            };
+            let bytes = read_exact_data_at(
+                &self.input,
+                head.content_size() as usize,
+                file_ofs + (size_of_val(&head) as u64),
+            )
+            .await?;
+            let appendix_offset = u64::from_le_bytes(bytes[0..8].try_into().unwrap());
+            let offset = appendix_start + appendix_offset;
+            let size = u64::from_le_bytes(bytes[8..16].try_into().unwrap());
+
+            head = read_entry_at(&self.input, offset).await?;
+            let (file_name, entry_ofs) = self.read_filename_entry(head, offset).await?;
+
+            let c_string = std::ffi::CString::new(file_name.as_os_str().as_bytes())?;
+            let start = offset + 16 + c_string.as_bytes_with_nul().len() as u64;
+            if start + size < start {
+                io_bail!(
+                    "bad file: invalid entry ranges for {:?}: \
+                     start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
+                    file_name,
+                    entry_ofs,
+                    offset,
+                    size,
+                );
+            }
+            (file_name, EntryRangeInfo {
+                filename_header_offset: Some(offset),
+                entry_range: Range {
+                    start,
+                    end: start + size,
+                },
+            })
+        } else {
+            let (file_name, entry_ofs) = self.read_filename_entry(head, file_ofs).await?;
+            if file_ofs + entry.size < entry_ofs {
+                io_bail!(
+                    "bad file: invalid entry ranges for {:?}: \
+                     start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
+                    file_name,
+                    entry_ofs,
+                    file_ofs,
+                    entry.size,
+                );
+            }
+            (file_name, EntryRangeInfo {
+                filename_header_offset: Some(file_ofs),
+                entry_range: Range {
+                    start: entry_ofs,
+                    end: file_ofs + entry.size,
+                },
+            })
         };
-        if entry_range.end < entry_range.start {
-            io_bail!(
-                "bad file: invalid entry ranges for {:?}: \
-                 start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
-                file_name,
-                entry_ofs,
-                file_ofs,
-                entry.size,
-            );
-        }
 
         Ok(DirEntryImpl {
             dir: self,
             file_name,
-            entry_range_info: EntryRangeInfo {
-                filename_header_offset: Some(file_ofs),
-                entry_range,
-            },
+            entry_range_info,
             caches: Arc::clone(&self.caches),
+            appendix_offset: self.appendix_offset,
         })
     }
 
-    async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
-        let head: format::Header = read_entry_at(&self.input, file_ofs).await?;
+    async fn read_filename_entry(&self, head: format::Header, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
         if head.htype != format::PXAR_FILENAME {
             io_bail!("expected PXAR_FILENAME header, found: {}", head);
         }
@@ -685,6 +773,7 @@ pub(crate) struct FileEntryImpl<T: Clone + ReadAt> {
     entry: Entry,
     entry_range_info: EntryRangeInfo,
     caches: Arc<Caches>,
+    appendix_offset: Option<u64>,
 }
 
 impl<T: Clone + ReadAt> FileEntryImpl<T> {
@@ -698,6 +787,7 @@ impl<T: Clone + ReadAt> FileEntryImpl<T> {
             self.entry_range_info.entry_range.end,
             self.entry.path.clone(),
             Arc::clone(&self.caches),
+            self.appendix_offset,
         )
         .await
     }
@@ -787,6 +877,7 @@ pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> {
     file_name: PathBuf,
     entry_range_info: EntryRangeInfo,
     caches: Arc<Caches>,
+    appendix_offset: Option<u64>,
 }
 
 impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
@@ -808,6 +899,7 @@ impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
             entry,
             entry_range_info: self.entry_range_info.clone(),
             caches: Arc::clone(&self.caches),
+            appendix_offset: self.appendix_offset,
         })
     }
 
diff --git a/src/encoder/aio.rs b/src/encoder/aio.rs
index 7379940..5a833c5 100644
--- a/src/encoder/aio.rs
+++ b/src/encoder/aio.rs
@@ -108,8 +108,11 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     }
 
     /// Finish this directory. This is mandatory, otherwise the `Drop` handler will `panic!`.
-    pub async fn finish(self) -> io::Result<()> {
-        self.inner.finish().await
+    pub async fn finish(
+        self,
+        appendix_tail: Option<(AppendixStartOffset, AppendixRefOffset)>,
+    ) -> io::Result<()> {
+        self.inner.finish(appendix_tail).await
     }
 
     /// Add size to encoders position and return new position.
@@ -330,7 +333,7 @@ mod test {
                     .await
                     .unwrap();
             }
-            encoder.finish().await.unwrap();
+            encoder.finish(None).await.unwrap();
         };
 
         fn test_send<T: Send>(_: T) {}
diff --git a/src/encoder/mod.rs b/src/encoder/mod.rs
index 14a8262..48a7972 100644
--- a/src/encoder/mod.rs
+++ b/src/encoder/mod.rs
@@ -872,7 +872,10 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         .await
     }
 
-    pub async fn finish(mut self) -> io::Result<()> {
+    pub async fn finish(
+        mut self,
+        appendix_tail: Option<(AppendixStartOffset, AppendixRefOffset)>,
+    ) -> io::Result<()> {
         let tail_bytes = self.finish_goodbye_table().await?;
         seq_write_pxar_entry(
             self.output.as_mut(),
@@ -882,6 +885,20 @@ impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
         )
         .await?;
 
+        if let Some((appendix_start_offset, size)) = appendix_tail {
+            let mut appendix_tail = Vec::new();
+            appendix_tail.append(&mut format::PXAR_APPENDIX_TAIL.to_le_bytes().to_vec());
+            appendix_tail.append(&mut appendix_start_offset.raw().to_le_bytes().to_vec());
+            appendix_tail.append(&mut size.raw().to_le_bytes().to_vec());
+            seq_write_pxar_entry(
+                self.output.as_mut(),
+                format::PXAR_GOODBYE,
+                &appendix_tail,
+                &mut self.state.write_position,
+            )
+            .await?;
+        }
+
         if let EncoderOutput::Owned(output) = &mut self.output {
             flush(output).await?;
         }
diff --git a/src/encoder/sync.rs b/src/encoder/sync.rs
index 48e4c96..af5cc29 100644
--- a/src/encoder/sync.rs
+++ b/src/encoder/sync.rs
@@ -106,8 +106,11 @@ impl<'a, T: SeqWrite + 'a> Encoder<'a, T> {
     }
 
     /// Finish this directory. This is mandatory, otherwise the `Drop` handler will `panic!`.
-    pub fn finish(self) -> io::Result<()> {
-        poll_result_once(self.inner.finish())
+    pub fn finish(
+        self,
+        appendix_tail: Option<(AppendixStartOffset, AppendixRefOffset)>,
+    ) -> io::Result<()> {
+        poll_result_once(self.inner.finish(appendix_tail))
     }
 
     /// Add size to encoders position and return new position.
diff --git a/src/format/mod.rs b/src/format/mod.rs
index 8254df9..8016ab1 100644
--- a/src/format/mod.rs
+++ b/src/format/mod.rs
@@ -41,6 +41,8 @@
 //! are appended. They are NOT guaranteed to follow the full pxar structure and should only be
 //! used to extract the file payloads by given offset.
 //!   * `APPENDIX`          -- pxar archive fragments containing file payloads
+//!   * final goodbye table
+//!   * `APPENDIX_TAIL`     -- marks the end of an archive containing a APPENDIX section
 
 use std::cmp::Ordering;
 use std::ffi::{CStr, OsStr};
@@ -113,6 +115,8 @@ pub const PXAR_APPENDIX_REF: u64 = 0x849b4a17e0234f8e;
 pub const PXAR_GOODBYE: u64 = 0x2fec4fa642d5731d;
 /// The end marker used in the GOODBYE object
 pub const PXAR_GOODBYE_TAIL_MARKER: u64 = 0xef5eed5b753e1555;
+/// Marks the end of an archive containing an appendix section
+pub const PXAR_APPENDIX_TAIL: u64 = 0x5b1b9abb7ae454f1;
 
 #[derive(Debug, Endian)]
 #[repr(C)]
-- 
2.39.2






More information about the pbs-devel mailing list