[pbs-devel] [PATCH v6 proxmox-backup 49/65] pxar: create: keep track of reused chunks and files

Christian Ebner c.ebner at proxmox.com
Tue May 14 12:34:05 CEST 2024


Track and log reused or reencoded files as well as the reused chunks
and their paddings.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
 pbs-client/src/pxar/create.rs | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
index b2932c973..b03bd5a17 100644
--- a/pbs-client/src/pxar/create.rs
+++ b/pbs-client/src/pxar/create.rs
@@ -141,6 +141,17 @@ struct HardLinkInfo {
     st_ino: u64,
 }
 
+#[derive(Default)]
+struct ReuseStats {
+    files_reused_count: u64,
+    files_hardlink_count: u64,
+    files_reencoded_count: u64,
+    total_injected_count: u64,
+    partial_chunks_count: u64,
+    total_injected_size: u64,
+    total_reused_payload_size: u64,
+}
+
 struct Archiver {
     feature_flags: Flags,
     fs_feature_flags: Flags,
@@ -164,6 +175,7 @@ struct Archiver {
     cached_range: Range<u64>,
     cached_last_chunk: Option<ReusableDynamicEntry>,
     caching_enabled: bool,
+    reuse_stats: ReuseStats,
 }
 
 type Encoder<'a, T> = pxar::encoder::aio::Encoder<'a, T>;
@@ -273,6 +285,7 @@ where
         cached_last_chunk: None,
         cached_hardlinks: HashSet::new(),
         caching_enabled: false,
+        reuse_stats: ReuseStats::default(),
     };
 
     archiver
@@ -802,15 +815,22 @@ impl Archiver {
                 }
 
                 let offset: LinkOffset = if let Some(payload_offset) = payload_offset {
+                    self.reuse_stats.total_reused_payload_size +=
+                        file_size + size_of::<pxar::format::Header>() as u64;
+                    self.reuse_stats.files_reused_count += 1;
+
                     encoder
                         .add_payload_ref(metadata, file_name, file_size, payload_offset)
                         .await?
                 } else {
+                    self.reuse_stats.files_reencoded_count += 1;
+
                     self.add_regular_file(encoder, fd, file_name, metadata, file_size)
                         .await?
                 };
 
                 if stat.st_nlink > 1 {
+                    self.reuse_stats.files_hardlink_count += 1;
                     self.hardlinks
                         .insert(link_info, (self.path.clone(), offset));
                 }
@@ -1147,6 +1167,13 @@ impl Archiver {
                     HumanByte::from(chunk.padding),
                     HumanByte::from(chunk.size()),
                 );
+                self.reuse_stats.total_injected_size += chunk.size();
+                self.reuse_stats.total_injected_count += 1;
+
+                if chunk.padding > 0 {
+                    self.reuse_stats.partial_chunks_count += 1;
+                }
+
                 size = size.add(chunk.size());
                 chunk_list.push(chunk.clone());
             }
-- 
2.39.2





More information about the pbs-devel mailing list