[pbs-devel] [PATCH v5 proxmox-backup 14/28] fix #3174: catalog: add specialized Archive entry

Christian Ebner c.ebner at proxmox.com
Wed Nov 15 16:47:59 CET 2023


Introduces a specialized pxar directory entry type Archive,
which extends the regular directory entry by storing an additional
optional appendix start offset.

The archive entry type is only used for the top most entries in the
catalog, replacing the currently used directory entry. If this entry
was created by reusing pxar file entries in an appendix section,
the appendix start offset is present and can be used to easily locate
and calculate the referenced file entries within the appendix section
to access them from the catalog shell.

Since the catalog might contain multiple archives, each archive entry
stores its individual appendix start offset.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
Changes since version 4:
- no changes

Changes since version 3:
- no changes

Changes since version 2:
- Make sure DirEntryAttribute::Archive is not flagged as leaf node

Changes since version 1:
- This reworks the Appendix Offset impl of version 1 completely

 pbs-client/src/catalog_shell.rs |   1 +
 pbs-datastore/src/catalog.rs    | 152 +++++++++++++++++++++++++++++++-
 2 files changed, 150 insertions(+), 3 deletions(-)

diff --git a/pbs-client/src/catalog_shell.rs b/pbs-client/src/catalog_shell.rs
index 99416d2f..7deb9d9a 100644
--- a/pbs-client/src/catalog_shell.rs
+++ b/pbs-client/src/catalog_shell.rs
@@ -1144,6 +1144,7 @@ impl<'a> ExtractorState<'a> {
         };
 
         match (did_match, &entry.attr) {
+            (_, DirEntryAttribute::Archive { .. }) |
             (_, DirEntryAttribute::Directory { .. }) => {
                 self.handle_new_directory(entry, match_result?).await?;
             }
diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs
index 8ae7c661..220313c6 100644
--- a/pbs-datastore/src/catalog.rs
+++ b/pbs-datastore/src/catalog.rs
@@ -18,6 +18,11 @@ use crate::file_formats::{PROXMOX_CATALOG_FILE_MAGIC_1_0, PROXMOX_CATALOG_FILE_M
 /// A file list catalog simply stores a directory tree. Such catalogs may be used as index to do a
 /// fast search for files.
 pub trait BackupCatalogWriter {
+    fn start_archive(&mut self, name: &CStr) -> Result<(), Error>;
+    fn end_archive(
+        &mut self,
+        appendix: Option<pxar::encoder::AppendixStartOffset>,
+    ) -> Result<(), Error>;
     fn start_directory(&mut self, name: &CStr) -> Result<(), Error>;
     fn end_directory(&mut self) -> Result<(), Error>;
     fn add_file(
@@ -50,6 +55,7 @@ pub enum CatalogEntryType {
     Directory = b'd',
     File = b'f',
     AppendixRef = b'r',
+    Archive = b'a',
     Symlink = b'l',
     Hardlink = b'h',
     BlockDevice = b'b',
@@ -66,6 +72,7 @@ impl TryFrom<u8> for CatalogEntryType {
             b'd' => CatalogEntryType::Directory,
             b'f' => CatalogEntryType::File,
             b'r' => CatalogEntryType::AppendixRef,
+            b'a' => CatalogEntryType::Archive,
             b'l' => CatalogEntryType::Symlink,
             b'h' => CatalogEntryType::Hardlink,
             b'b' => CatalogEntryType::BlockDevice,
@@ -83,6 +90,7 @@ impl From<&DirEntryAttribute> for CatalogEntryType {
             DirEntryAttribute::Directory { .. } => CatalogEntryType::Directory,
             DirEntryAttribute::File { .. } => CatalogEntryType::File,
             DirEntryAttribute::AppendixRef { .. } => CatalogEntryType::AppendixRef,
+            DirEntryAttribute::Archive { .. } => CatalogEntryType::Archive,
             DirEntryAttribute::Symlink => CatalogEntryType::Symlink,
             DirEntryAttribute::Hardlink => CatalogEntryType::Hardlink,
             DirEntryAttribute::BlockDevice => CatalogEntryType::BlockDevice,
@@ -121,10 +129,22 @@ impl AppendixRefOffset {
     }
 }
 
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub struct AppendixStartOffset {
+    offset: u64,
+}
+
+impl AppendixStartOffset {
+    pub fn raw(&self) -> u64 {
+        self.offset
+    }
+}
+
 #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
 pub enum Offset {
     FileOffset { offset: u64 },
     AppendixRefOffset { offset: u64 },
+    AppendixStartOffset { offset: u64 },
 }
 
 /// Represents a named directory entry
@@ -160,6 +180,10 @@ pub enum DirEntryAttribute {
         ctime: i64,
         appendix_ref_offset: AppendixRefOffset,
     },
+    Archive {
+        start: u64,
+        appendix_offset: AppendixStartOffset,
+    },
     Symlink,
     Hardlink,
     BlockDevice,
@@ -236,6 +260,13 @@ impl DirEntry {
                     },
                 }
             }
+            (CatalogEntryType::Archive, Some(Offset::AppendixStartOffset { offset })) => DirEntry {
+                name,
+                attr: DirEntryAttribute::Archive {
+                    start,
+                    appendix_offset: AppendixStartOffset { offset },
+                },
+            },
             _ => panic!("unexpected parameters '{etype}' and '{offset:?}'"),
         }
     }
@@ -246,6 +277,7 @@ impl DirEntry {
             DirEntryAttribute::Directory { .. } => pxar::mode::IFDIR,
             DirEntryAttribute::File { .. } => pxar::mode::IFREG,
             DirEntryAttribute::AppendixRef { .. } => pxar::mode::IFREG,
+            DirEntryAttribute::Archive { .. } => pxar::mode::IFDIR,
             DirEntryAttribute::Symlink => pxar::mode::IFLNK,
             DirEntryAttribute::Hardlink => return None,
             DirEntryAttribute::BlockDevice => pxar::mode::IFBLK,
@@ -258,6 +290,12 @@ impl DirEntry {
     /// Check if DirEntry is a directory
     pub fn is_directory(&self) -> bool {
         matches!(self.attr, DirEntryAttribute::Directory { .. })
+            || matches!(self.attr, DirEntryAttribute::Archive { .. })
+    }
+
+    /// Check if DirEntry is an archive
+    pub fn is_archive(&self) -> bool {
+        matches!(self.attr, DirEntryAttribute::Archive { .. })
     }
 
     /// Check if DirEntry is a symlink
@@ -285,6 +323,20 @@ impl DirInfo {
 
     fn encode_entry<W: Write>(writer: &mut W, entry: &DirEntry, pos: u64) -> Result<(), Error> {
         match entry {
+            DirEntry {
+                name,
+                attr:
+                    DirEntryAttribute::Archive {
+                        start,
+                        appendix_offset,
+                    },
+            } => {
+                writer.write_all(&[CatalogEntryType::Archive as u8])?;
+                catalog_encode_u64(writer, name.len() as u64)?;
+                writer.write_all(name)?;
+                catalog_encode_u64(writer, appendix_offset.raw())?;
+                catalog_encode_u64(writer, pos - start)?;
+            }
             DirEntry {
                 name,
                 attr: DirEntryAttribute::Directory { start },
@@ -427,6 +479,19 @@ impl DirInfo {
             cursor.read_exact(name)?;
 
             let cont = match etype {
+                CatalogEntryType::Archive => {
+                    let offset = catalog_decode_u64(&mut cursor)?;
+                    let start = catalog_decode_u64(&mut cursor)?;
+                    callback(
+                        etype,
+                        name,
+                        start,
+                        0,
+                        0,
+                        0,
+                        Some(Offset::AppendixStartOffset { offset }),
+                    )?
+                }
                 CatalogEntryType::Directory => {
                     let offset = catalog_decode_u64(&mut cursor)?;
                     callback(etype, name, offset, 0, 0, 0, None)?
@@ -533,6 +598,51 @@ impl<W: Write> CatalogWriter<W> {
 }
 
 impl<W: Write> BackupCatalogWriter for CatalogWriter<W> {
+    fn start_archive(&mut self, name: &CStr) -> Result<(), Error> {
+        let new = DirInfo::new(name.to_owned());
+        self.dirstack.push(new);
+        Ok(())
+    }
+
+    fn end_archive(
+        &mut self,
+        appendix: Option<pxar::encoder::AppendixStartOffset>,
+    ) -> Result<(), Error> {
+        let (start, name) = match self.dirstack.pop() {
+            Some(dir) => {
+                let start = self.pos;
+                let (name, data) = dir.encode(start)?;
+                self.write_all(&data)?;
+                (start, name)
+            }
+            None => {
+                bail!("got unexpected end_directory level 0");
+            }
+        };
+
+        let current = self
+            .dirstack
+            .last_mut()
+            .ok_or_else(|| format_err!("outside root"))?;
+        let name = name.to_bytes().to_vec();
+        let appendix_offset = if let Some(appendix) = appendix {
+            AppendixStartOffset {
+                offset: appendix.raw(),
+            }
+        } else {
+            AppendixStartOffset { offset: 0 }
+        };
+        current.entries.push(DirEntry {
+            name,
+            attr: DirEntryAttribute::Archive {
+                start,
+                appendix_offset,
+            },
+        });
+
+        Ok(())
+    }
+
     fn start_directory(&mut self, name: &CStr) -> Result<(), Error> {
         let new = DirInfo::new(name.to_owned());
         self.dirstack.push(new);
@@ -746,10 +856,33 @@ impl<R: Read + Seek> CatalogReader<R> {
         })
     }
 
+    pub fn appendix_offset(
+        &mut self,
+        archive_name: &[u8],
+    ) -> Result<Option<AppendixStartOffset>, Error> {
+        let root = self.root()?;
+        let dir_entry = self.lookup(&root, archive_name)?.unwrap();
+        if let DirEntry {
+            attr: DirEntryAttribute::Archive {
+                appendix_offset, ..
+            },
+            ..
+        } = dir_entry
+        {
+            if appendix_offset.raw() != 0 {
+                return Ok(Some(appendix_offset));
+            } else {
+                return Ok(None);
+            }
+        }
+        Ok(None)
+    }
+
     /// Read all directory entries
     pub fn read_dir(&mut self, parent: &DirEntry) -> Result<Vec<DirEntry>, Error> {
         let start = match parent.attr {
             DirEntryAttribute::Directory { start } => start,
+            DirEntryAttribute::Archive { start, .. } => start,
             _ => bail!("parent is not a directory - internal error"),
         };
 
@@ -813,6 +946,7 @@ impl<R: Read + Seek> CatalogReader<R> {
     ) -> Result<Option<DirEntry>, Error> {
         let start = match parent.attr {
             DirEntryAttribute::Directory { start } => start,
+            DirEntryAttribute::Archive { start, .. } => start,
             _ => bail!("parent is not a directory - internal error"),
         };
 
@@ -822,7 +956,7 @@ impl<R: Read + Seek> CatalogReader<R> {
         DirInfo::parse(
             &data,
             self.magic,
-            |etype, name, offset, size, mtime, ctime, link_offset| {
+            |etype, name, offset, size, mtime, ctime, archive_offset| {
                 if name != filename {
                     return Ok(true);
                 }
@@ -834,7 +968,7 @@ impl<R: Read + Seek> CatalogReader<R> {
                     size,
                     mtime,
                     ctime,
-                    link_offset,
+                    archive_offset,
                 );
                 item = Some(entry);
                 Ok(false) // stop parsing
@@ -868,6 +1002,14 @@ impl<R: Read + Seek> CatalogReader<R> {
                 path.push(name);
 
                 match etype {
+                    CatalogEntryType::Archive => {
+                        log::info!("{} {:?}", etype, path);
+                        if offset > start {
+                            bail!("got wrong archive offset ({} > {})", offset, start);
+                        }
+                        let pos = start - offset;
+                        self.dump_dir(&path, pos)?;
+                    }
                     CatalogEntryType::Directory => {
                         log::info!("{} {:?}", etype, path);
                         if offset > start {
@@ -1208,7 +1350,11 @@ impl ArchiveEntry {
                 Some(entry_type) => CatalogEntryType::from(entry_type).to_string(),
                 None => "v".to_owned(),
             },
-            leaf: !matches!(entry_type, None | Some(DirEntryAttribute::Directory { .. })),
+            leaf: !matches!(
+                entry_type,
+                None | Some(DirEntryAttribute::Directory { .. })
+                    | Some(DirEntryAttribute::Archive { .. })
+            ),
             size,
             mtime: match entry_type {
                 Some(DirEntryAttribute::File { mtime, .. }) => Some(*mtime),
-- 
2.39.2






More information about the pbs-devel mailing list