[pbs-devel] [RFC v2 proxmox-backup 11/23] fix #3174: catalog: introduce extended format v2

Christian Ebner c.ebner at proxmox.com
Mon Oct 9 13:51:27 CEST 2023


Increments the catalog file format to version 2. The new catalog
format introduces an extension to file entries in order to store the
additional ctime and pxar archive file offsets, needed for metadata
based file change detection and pxar file entry reuse in the pxar
appendix section.

ctime is introduced, in order to allow to detect also file status
changes since the last backup run, e.g. by updated extended
attributes, which will not increment mtime.

Inclusion of the pxar archive file offset allows to calculate the file
entry size in the archive needed for re-indexing chunks containing
this entry and the offset relative to the appendix start in the pxar
archive, needed for calculation of the bytes to skip over during
sequential decoding while restoring the pxar archive.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
Changes since version 1:
not present in previous version

 pbs-client/src/pxar/create.rs                 |  31 +-
 pbs-datastore/src/catalog.rs                  | 323 ++++++++++++++----
 pbs-datastore/src/file_formats.rs             |   3 +
 .../src/proxmox_restore_daemon/api.rs         |   6 +-
 4 files changed, 282 insertions(+), 81 deletions(-)

diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
index e7053d9e..a2338218 100644
--- a/pbs-client/src/pxar/create.rs
+++ b/pbs-client/src/pxar/create.rs
@@ -390,12 +390,6 @@ impl Archiver {
         patterns_count: usize,
     ) -> Result<(), Error> {
         let content = generate_pxar_excludes_cli(&self.patterns[..patterns_count]);
-        if let Some(ref catalog) = self.catalog {
-            catalog
-                .lock()
-                .unwrap()
-                .add_file(file_name, content.len() as u64, 0)?;
-        }
 
         let mut metadata = Metadata::default();
         metadata.stat.mode = pxar::format::mode::IFREG | 0o600;
@@ -405,6 +399,14 @@ impl Archiver {
             .await?;
         file.write_all(&content).await?;
 
+        if let Some(ref catalog) = self.catalog {
+            let link_offset = file.file_offset();
+            catalog
+                .lock()
+                .unwrap()
+                .add_file(file_name, content.len() as u64, 0, 0, link_offset)?;
+        }
+
         Ok(())
     }
 
@@ -572,17 +574,20 @@ impl Archiver {
                 }
 
                 let file_size = stat.st_size as u64;
-                if let Some(ref catalog) = self.catalog {
-                    catalog
-                        .lock()
-                        .unwrap()
-                        .add_file(c_file_name, file_size, stat.st_mtime)?;
-                }
-
                 let offset: LinkOffset = self
                     .add_regular_file(encoder, fd, file_name, &metadata, file_size)
                     .await?;
 
+                if let Some(ref catalog) = self.catalog {
+                    catalog.lock().unwrap().add_file(
+                        c_file_name,
+                        file_size,
+                        stat.st_mtime,
+                        stat.st_ctime,
+                        offset,
+                    )?;
+                }
+
                 if stat.st_nlink > 1 {
                     self.hardlinks
                         .insert(link_info, (self.path.clone(), offset));
diff --git a/pbs-datastore/src/catalog.rs b/pbs-datastore/src/catalog.rs
index 86e20c92..c4d1a4de 100644
--- a/pbs-datastore/src/catalog.rs
+++ b/pbs-datastore/src/catalog.rs
@@ -11,7 +11,7 @@ use pathpatterns::{MatchList, MatchType};
 use proxmox_io::ReadExt;
 use proxmox_schema::api;
 
-use crate::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
+use crate::file_formats::{PROXMOX_CATALOG_FILE_MAGIC_1_0, PROXMOX_CATALOG_FILE_MAGIC_2_0};
 
 /// Trait for writing file list catalogs.
 ///
@@ -20,7 +20,14 @@ use crate::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
 pub trait BackupCatalogWriter {
     fn start_directory(&mut self, name: &CStr) -> Result<(), Error>;
     fn end_directory(&mut self) -> Result<(), Error>;
-    fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error>;
+    fn add_file(
+        &mut self,
+        name: &CStr,
+        size: u64,
+        mtime: i64,
+        ctime: i64,
+        file_offset: pxar::encoder::LinkOffset,
+    ) -> Result<(), Error>;
     fn add_symlink(&mut self, name: &CStr) -> Result<(), Error>;
     fn add_hardlink(&mut self, name: &CStr) -> Result<(), Error>;
     fn add_block_device(&mut self, name: &CStr) -> Result<(), Error>;
@@ -81,6 +88,21 @@ impl fmt::Display for CatalogEntryType {
     }
 }
 
+#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Ord, PartialOrd)]
+pub struct FileOffset {
+    offset: u64,
+}
+
+impl FileOffset {
+    pub fn raw(&self) -> u64 {
+        self.offset
+    }
+}
+#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
+pub enum Offset {
+    FileOffset { offset: u64 },
+}
+
 /// Represents a named directory entry
 ///
 /// The ``attr`` property contain the exact type with type specific
@@ -91,11 +113,23 @@ pub struct DirEntry {
     pub attr: DirEntryAttribute,
 }
 
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct CatalogV2Extension {
+    pub ctime: i64,
+    pub file_offset: FileOffset,
+}
+
 /// Used to specific additional attributes inside DirEntry
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub enum DirEntryAttribute {
-    Directory { start: u64 },
-    File { size: u64, mtime: i64 },
+    Directory {
+        start: u64,
+    },
+    File {
+        size: u64,
+        mtime: i64,
+        extension: Option<CatalogV2Extension>,
+    },
     Symlink,
     Hardlink,
     BlockDevice,
@@ -105,40 +139,63 @@ pub enum DirEntryAttribute {
 }
 
 impl DirEntry {
-    fn new(etype: CatalogEntryType, name: Vec<u8>, start: u64, size: u64, mtime: i64) -> Self {
-        match etype {
-            CatalogEntryType::Directory => DirEntry {
+    fn new(
+        etype: CatalogEntryType,
+        name: Vec<u8>,
+        start: u64,
+        size: u64,
+        mtime: i64,
+        ctime: i64,
+        offset: Option<Offset>,
+    ) -> Self {
+        match (etype, offset) {
+            (CatalogEntryType::Directory, None) => DirEntry {
                 name,
                 attr: DirEntryAttribute::Directory { start },
             },
-            CatalogEntryType::File => DirEntry {
-                name,
-                attr: DirEntryAttribute::File { size, mtime },
-            },
-            CatalogEntryType::Symlink => DirEntry {
+            (CatalogEntryType::File, offset) => {
+                let extension = if let Some(Offset::FileOffset { offset }) = offset {
+                    Some(CatalogV2Extension {
+                        ctime,
+                        file_offset: FileOffset { offset },
+                    })
+                } else {
+                    None
+                };
+                DirEntry {
+                    name,
+                    attr: DirEntryAttribute::File {
+                        size,
+                        mtime,
+                        extension,
+                    },
+                }
+            }
+            (CatalogEntryType::Symlink, None) => DirEntry {
                 name,
                 attr: DirEntryAttribute::Symlink,
             },
-            CatalogEntryType::Hardlink => DirEntry {
+            (CatalogEntryType::Hardlink, None) => DirEntry {
                 name,
                 attr: DirEntryAttribute::Hardlink,
             },
-            CatalogEntryType::BlockDevice => DirEntry {
+            (CatalogEntryType::BlockDevice, None) => DirEntry {
                 name,
                 attr: DirEntryAttribute::BlockDevice,
             },
-            CatalogEntryType::CharDevice => DirEntry {
+            (CatalogEntryType::CharDevice, None) => DirEntry {
                 name,
                 attr: DirEntryAttribute::CharDevice,
             },
-            CatalogEntryType::Fifo => DirEntry {
+            (CatalogEntryType::Fifo, None) => DirEntry {
                 name,
                 attr: DirEntryAttribute::Fifo,
             },
-            CatalogEntryType::Socket => DirEntry {
+            (CatalogEntryType::Socket, None) => DirEntry {
                 name,
                 attr: DirEntryAttribute::Socket,
             },
+            _ => panic!("unexpected parameters '{etype}' and '{offset:?}'"),
         }
     }
 
@@ -197,13 +254,22 @@ impl DirInfo {
             }
             DirEntry {
                 name,
-                attr: DirEntryAttribute::File { size, mtime },
+                attr:
+                    DirEntryAttribute::File {
+                        size,
+                        mtime,
+                        extension,
+                    },
             } => {
                 writer.write_all(&[CatalogEntryType::File as u8])?;
                 catalog_encode_u64(writer, name.len() as u64)?;
                 writer.write_all(name)?;
                 catalog_encode_u64(writer, *size)?;
                 catalog_encode_i64(writer, *mtime)?;
+                if let Some(CatalogV2Extension { ctime, file_offset }) = extension {
+                    catalog_encode_i64(writer, *ctime)?;
+                    catalog_encode_u64(writer, file_offset.raw())?;
+                }
             }
             DirEntry {
                 name,
@@ -271,8 +337,11 @@ impl DirInfo {
         Ok((self.name, data))
     }
 
-    fn parse<C: FnMut(CatalogEntryType, &[u8], u64, u64, i64) -> Result<bool, Error>>(
+    fn parse<
+        C: FnMut(CatalogEntryType, &[u8], u64, u64, i64, i64, Option<Offset>) -> Result<bool, Error>,
+    >(
         data: &[u8],
+        catalog_version: Option<[u8; 8]>,
         mut callback: C,
     ) -> Result<(), Error> {
         let mut cursor = data;
@@ -300,14 +369,28 @@ impl DirInfo {
             let cont = match etype {
                 CatalogEntryType::Directory => {
                     let offset = catalog_decode_u64(&mut cursor)?;
-                    callback(etype, name, offset, 0, 0)?
+                    callback(etype, name, offset, 0, 0, 0, None)?
                 }
                 CatalogEntryType::File => {
                     let size = catalog_decode_u64(&mut cursor)?;
                     let mtime = catalog_decode_i64(&mut cursor)?;
-                    callback(etype, name, 0, size, mtime)?
+                    let (ctime, offset) = if let Some(version) = catalog_version {
+                        let mut ctime = 0;
+                        let mut offset = None;
+                        if version == PROXMOX_CATALOG_FILE_MAGIC_2_0 {
+                            ctime = catalog_decode_i64(&mut cursor)?;
+                            let file_offset = catalog_decode_u64(&mut cursor)?;
+                            offset = Some(Offset::FileOffset {
+                                offset: file_offset,
+                            })
+                        }
+                        (ctime, offset)
+                    } else {
+                        (0, None)
+                    };
+                    callback(etype, name, 0, size, mtime, ctime, offset)?
                 }
-                _ => callback(etype, name, 0, 0, 0)?,
+                _ => callback(etype, name, 0, 0, 0, 0, None)?,
             };
             if !cont {
                 return Ok(());
@@ -342,7 +425,7 @@ impl<W: Write> CatalogWriter<W> {
             dirstack: vec![DirInfo::new_rootdir()],
             pos: 0,
         };
-        me.write_all(&PROXMOX_CATALOG_FILE_MAGIC_1_0)?;
+        me.write_all(&PROXMOX_CATALOG_FILE_MAGIC_2_0)?;
         Ok(me)
     }
 
@@ -407,15 +490,29 @@ impl<W: Write> BackupCatalogWriter for CatalogWriter<W> {
         Ok(())
     }
 
-    fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error> {
+    fn add_file(
+        &mut self,
+        name: &CStr,
+        size: u64,
+        mtime: i64,
+        ctime: i64,
+        file_offset: pxar::encoder::LinkOffset,
+    ) -> Result<(), Error> {
         let dir = self
             .dirstack
             .last_mut()
             .ok_or_else(|| format_err!("outside root"))?;
         let name = name.to_bytes().to_vec();
+        let file_offset = FileOffset {
+            offset: file_offset.raw(),
+        };
         dir.entries.push(DirEntry {
             name,
-            attr: DirEntryAttribute::File { size, mtime },
+            attr: DirEntryAttribute::File {
+                size,
+                mtime,
+                extension: Some(CatalogV2Extension { ctime, file_offset }),
+            },
         });
         Ok(())
     }
@@ -502,12 +599,16 @@ impl<W: Write> BackupCatalogWriter for CatalogWriter<W> {
 /// Read Catalog files
 pub struct CatalogReader<R> {
     reader: R,
+    magic: Option<[u8; 8]>,
 }
 
 impl<R: Read + Seek> CatalogReader<R> {
     /// Create a new CatalogReader instance
     pub fn new(reader: R) -> Self {
-        Self { reader }
+        Self {
+            reader,
+            magic: None,
+        }
     }
 
     /// Print whole catalog to stdout
@@ -528,8 +629,11 @@ impl<R: Read + Seek> CatalogReader<R> {
         self.reader.seek(SeekFrom::Start(0))?;
         let mut magic = [0u8; 8];
         self.reader.read_exact(&mut magic)?;
-        if magic != PROXMOX_CATALOG_FILE_MAGIC_1_0 {
-            bail!("got unexpected magic number for catalog");
+        match magic {
+            PROXMOX_CATALOG_FILE_MAGIC_1_0 | PROXMOX_CATALOG_FILE_MAGIC_2_0 => {
+                self.magic = Some(magic)
+            }
+            _ => bail!("got unexpected magic number for catalog"),
         }
         self.reader.seek(SeekFrom::End(-8))?;
         let start = unsafe { self.reader.read_le_value::<u64>()? };
@@ -550,11 +654,23 @@ impl<R: Read + Seek> CatalogReader<R> {
 
         let mut entry_list = Vec::new();
 
-        DirInfo::parse(&data, |etype, name, offset, size, mtime| {
-            let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
-            entry_list.push(entry);
-            Ok(true)
-        })?;
+        DirInfo::parse(
+            &data,
+            self.magic,
+            |etype, name, offset, size, mtime, ctime, link_offset| {
+                let entry = DirEntry::new(
+                    etype,
+                    name.to_vec(),
+                    start - offset,
+                    size,
+                    mtime,
+                    ctime,
+                    link_offset,
+                );
+                entry_list.push(entry);
+                Ok(true)
+            },
+        )?;
 
         Ok(entry_list)
     }
@@ -600,15 +716,27 @@ impl<R: Read + Seek> CatalogReader<R> {
         let data = self.read_raw_dirinfo_block(start)?;
 
         let mut item = None;
-        DirInfo::parse(&data, |etype, name, offset, size, mtime| {
-            if name != filename {
-                return Ok(true);
-            }
+        DirInfo::parse(
+            &data,
+            self.magic,
+            |etype, name, offset, size, mtime, ctime, link_offset| {
+                if name != filename {
+                    return Ok(true);
+                }
 
-            let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
-            item = Some(entry);
-            Ok(false) // stop parsing
-        })?;
+                let entry = DirEntry::new(
+                    etype,
+                    name.to_vec(),
+                    start - offset,
+                    size,
+                    mtime,
+                    ctime,
+                    link_offset,
+                );
+                item = Some(entry);
+                Ok(false) // stop parsing
+            },
+        )?;
 
         Ok(item)
     }
@@ -628,35 +756,51 @@ impl<R: Read + Seek> CatalogReader<R> {
     pub fn dump_dir(&mut self, prefix: &std::path::Path, start: u64) -> Result<(), Error> {
         let data = self.read_raw_dirinfo_block(start)?;
 
-        DirInfo::parse(&data, |etype, name, offset, size, mtime| {
-            let mut path = std::path::PathBuf::from(prefix);
-            let name: &OsStr = OsStrExt::from_bytes(name);
-            path.push(name);
-
-            match etype {
-                CatalogEntryType::Directory => {
-                    log::info!("{} {:?}", etype, path);
-                    if offset > start {
-                        bail!("got wrong directory offset ({} > {})", offset, start);
+        DirInfo::parse(
+            &data,
+            self.magic,
+            |etype, name, offset, size, mtime, ctime, link_offset| {
+                let mut path = std::path::PathBuf::from(prefix);
+                let name: &OsStr = OsStrExt::from_bytes(name);
+                path.push(name);
+
+                match etype {
+                    CatalogEntryType::Directory => {
+                        log::info!("{} {:?}", etype, path);
+                        if offset > start {
+                            bail!("got wrong directory offset ({} > {})", offset, start);
+                        }
+                        let pos = start - offset;
+                        self.dump_dir(&path, pos)?;
                     }
-                    let pos = start - offset;
-                    self.dump_dir(&path, pos)?;
-                }
-                CatalogEntryType::File => {
-                    let mut mtime_string = mtime.to_string();
-                    if let Ok(s) = proxmox_time::strftime_local("%FT%TZ", mtime) {
-                        mtime_string = s;
+                    CatalogEntryType::File => {
+                        let mut mtime_string = mtime.to_string();
+                        let mut ctime_string = ctime.to_string();
+                        if let Ok(s) = proxmox_time::strftime_local("%FT%TZ", mtime) {
+                            mtime_string = s;
+                        }
+                        if let Ok(s) = proxmox_time::strftime_local("%FT%TZ", ctime) {
+                            ctime_string = s;
+                        }
+
+                        log::info!(
+                            "{} {:?} {} {} {} {:?}",
+                            etype,
+                            path,
+                            size,
+                            mtime_string,
+                            ctime_string,
+                            link_offset
+                        );
+                    }
+                    _ => {
+                        log::info!("{} {:?}", etype, path);
                     }
-
-                    log::info!("{} {:?} {} {}", etype, path, size, mtime_string,);
-                }
-                _ => {
-                    log::info!("{} {:?}", etype, path);
                 }
-            }
 
-            Ok(true)
-        })
+                Ok(true)
+            },
+        )
     }
 
     /// Finds all entries matching the given match patterns and calls the
@@ -705,9 +849,24 @@ impl<R: Read + Seek> CatalogReader<R> {
             components.push(b'/');
             components.extend(&direntry.name);
             let mut entry = ArchiveEntry::new(&components, Some(&direntry.attr));
-            if let DirEntryAttribute::File { size, mtime } = direntry.attr {
+            if let DirEntryAttribute::File {
+                size,
+                mtime,
+                extension,
+            } = direntry.attr
+            {
                 entry.size = size.into();
                 entry.mtime = mtime.into();
+                entry.ctime = None;
+                entry.file_offset = None;
+                if let Some(CatalogV2Extension {
+                    ctime,
+                    file_offset: FileOffset { offset },
+                }) = extension
+                {
+                    entry.ctime = ctime.into();
+                    entry.file_offset = offset.into();
+                }
             }
             res.push(entry);
         }
@@ -916,6 +1075,12 @@ pub struct ArchiveEntry {
     /// The file "last modified" time stamp, if entry_type is 'f' (file)
     #[serde(skip_serializing_if = "Option::is_none")]
     pub mtime: Option<i64>,
+    /// The file "last status change" time stamp,  if entry_type is 'f' (file)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub ctime: Option<i64>,
+    /// The file archive offset, if entry_type is 'f' (file)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_offset: Option<u64>,
 }
 
 impl ArchiveEntry {
@@ -946,6 +1111,30 @@ impl ArchiveEntry {
                 Some(DirEntryAttribute::File { mtime, .. }) => Some(*mtime),
                 _ => None,
             },
+            ctime: match entry_type {
+                Some(DirEntryAttribute::File { extension, .. }) => {
+                    if let Some(CatalogV2Extension { ctime, .. }) = extension {
+                        Some(*ctime)
+                    } else {
+                        None
+                    }
+                }
+                _ => None,
+            },
+            file_offset: match entry_type {
+                Some(DirEntryAttribute::File { extension, .. }) => {
+                    if let Some(CatalogV2Extension {
+                        file_offset: FileOffset { offset },
+                        ..
+                    }) = extension
+                    {
+                        Some(*offset)
+                    } else {
+                        None
+                    }
+                }
+                _ => None,
+            },
         }
     }
 }
diff --git a/pbs-datastore/src/file_formats.rs b/pbs-datastore/src/file_formats.rs
index 73d67e20..4181f0f1 100644
--- a/pbs-datastore/src/file_formats.rs
+++ b/pbs-datastore/src/file_formats.rs
@@ -5,6 +5,9 @@ use endian_trait::Endian;
 // openssl::sha::sha256(b"Proxmox Backup Catalog file v1.0")[0..8]
 pub const PROXMOX_CATALOG_FILE_MAGIC_1_0: [u8; 8] = [145, 253, 96, 249, 196, 103, 88, 213];
 
+// openssl::sha::sha256(b"Proxmox Backup Catalog file v2.0")[0..8]
+pub const PROXMOX_CATALOG_FILE_MAGIC_2_0: [u8; 8] = [204, 223, 24, 211, 187, 125, 183, 226];
+
 // openssl::sha::sha256(b"Proxmox Backup uncompressed blob v1.0")[0..8]
 pub const UNCOMPRESSED_BLOB_MAGIC_1_0: [u8; 8] = [66, 171, 56, 7, 190, 131, 112, 161];
 
diff --git a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs b/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
index c4e97d33..fb12befa 100644
--- a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
+++ b/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
@@ -24,7 +24,7 @@ use proxmox_sys::fs::read_subdir;
 
 use pbs_api_types::file_restore::{FileRestoreFormat, RestoreDaemonStatus};
 use pbs_client::pxar::{create_archive, Flags, PxarCreateOptions, ENCODER_MAX_ENTRIES};
-use pbs_datastore::catalog::{ArchiveEntry, DirEntryAttribute};
+use pbs_datastore::catalog::{ArchiveEntry, CatalogV2Extension, DirEntryAttribute, FileOffset};
 use pbs_tools::json::required_string_param;
 
 use pxar::encoder::aio::TokioWriter;
@@ -109,6 +109,10 @@ fn get_dir_entry(path: &Path) -> Result<DirEntryAttribute, Error> {
         libc::S_IFREG => DirEntryAttribute::File {
             size: stat.st_size as u64,
             mtime: stat.st_mtime,
+            extension: Some(CatalogV2Extension {
+                ctime: stat.st_ctime,
+                file_offset: FileOffset::default(),
+            }),
         },
         libc::S_IFDIR => DirEntryAttribute::Directory { start: 0 },
         _ => bail!("unsupported file type: {}", stat.st_mode),
-- 
2.39.2






More information about the pbs-devel mailing list