[pbs-devel] [RFC proxmox-backup 19/20] fix #3174: archiver: reuse files with unchanged metadata
Christian Ebner
c.ebner at proxmox.com
Fri Sep 22 09:16:20 CEST 2023
During pxar archive encoding, check regular files against their
previous backup catalogs metadata, if present.
Instead of re-encoding files with unchanged metadata with file size over
a given threshold limit, mark the entries as appendix references in the
pxar archive and append the chunks containing the file payload in the
appendix.
Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
pbs-client/src/pxar/create.rs | 149 +++++++++++++++++++++-
src/tape/file_formats/snapshot_archive.rs | 2 +-
2 files changed, 147 insertions(+), 4 deletions(-)
diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
index d6afc465..cb9af26f 100644
--- a/pbs-client/src/pxar/create.rs
+++ b/pbs-client/src/pxar/create.rs
@@ -24,7 +24,7 @@ use proxmox_io::vec;
use proxmox_lang::c_str;
use proxmox_sys::fs::{self, acl, xattr};
-use pbs_datastore::catalog::{BackupCatalogWriter, CatalogReader};
+use pbs_datastore::catalog::{BackupCatalogWriter, CatalogReader, DirEntryAttribute};
use pbs_datastore::dynamic_index::{DynamicEntry, DynamicIndexReader};
use crate::inject_reused_chunks::InjectChunks;
@@ -32,6 +32,8 @@ use crate::pxar::metadata::errno_is_unsupported;
use crate::pxar::tools::assert_single_path_component;
use crate::pxar::Flags;
+const MAX_FILE_SIZE: u64 = 128;
+
/// Pxar options for creating a pxar archive/stream
#[derive(Default)]
pub struct PxarCreateOptions {
@@ -218,7 +220,14 @@ where
archiver
.archive_dir_contents(&mut encoder, source_dir, true)
.await?;
- encoder.finish().await?;
+
+ if archiver.inject.1.len() > 0 {
+ let (appendix_offset, appendix_size) = archiver.add_appendix(&mut encoder).await?;
+ encoder.finish(Some((appendix_offset, appendix_size))).await?;
+ } else {
+ encoder.finish(None).await?;
+ }
+
Ok(())
}
@@ -529,6 +538,132 @@ impl Archiver {
Ok(())
}
+ async fn add_appendix<T: SeqWrite + Send>(
+ &mut self,
+ encoder: &mut Encoder<'_, T>,
+ ) -> Result<(LinkOffset, u64), Error> {
+ let total = self
+ .inject
+ .1
+ .iter()
+ .fold(0, |sum, inject| sum + inject.end());
+ let appendix_offset = encoder.add_appendix(total).await?;
+ let mut boundaries = self.forced_boundaries.lock().unwrap();
+ let mut position = encoder.position_add(0);
+
+ // Inject reused chunks in patches of 128 to not exceed upload post req size limit
+ for injects in self.inject.1.chunks(128) {
+ let size = injects
+ .iter()
+ .fold(0, |sum, inject| sum + inject.end() as usize);
+ let inject_chunks = InjectChunks {
+ boundary: position,
+ chunks: injects.to_vec(),
+ size,
+ };
+ boundaries.push_back(inject_chunks);
+ position = encoder.position_add(size as u64);
+ }
+
+ Ok((appendix_offset, total))
+ }
+
+ async fn reuse_if_metadata_unchanged<T: SeqWrite + Send>(
+ &mut self,
+ encoder: &mut Encoder<'_, T>,
+ c_file_name: &CStr,
+ metadata: &Metadata,
+ stat: &FileStat,
+ ) -> Result<bool, Error> {
+ let prev_ref = match self.previous_ref {
+ None => return Ok(false),
+ Some(ref mut prev_ref) => prev_ref
+ };
+
+ let path = Path::new(prev_ref.archive_name.as_str()).join(self.path.clone());
+ let catalog_entry = prev_ref
+ .catalog
+ .lookup_recursive(path.as_os_str().as_bytes())?;
+
+ match catalog_entry.attr {
+ DirEntryAttribute::File {
+ size,
+ mtime,
+ link_offset,
+ } => {
+ let file_size = stat.st_size as u64;
+ if mtime == stat.st_mtime && size == file_size {
+ if let Some(ref catalog) = self.catalog {
+ catalog.lock().unwrap().add_file(
+ c_file_name,
+ file_size,
+ stat.st_mtime,
+ link_offset,
+ )?;
+ }
+
+ // Filename header
+ let mut metadata_bytes = std::mem::size_of::<pxar::format::Header>();
+ // Filename payload
+ metadata_bytes += std::mem::size_of_val(c_file_name);
+ // Metadata with headers and payloads
+ metadata_bytes += metadata.calculate_byte_len();
+ // Payload header
+ metadata_bytes += std::mem::size_of::<pxar::format::Header>();
+
+ let metadata_bytes = u64::try_from(metadata_bytes)?;
+ let chunk_start_offset = link_offset.raw();
+ let start = chunk_start_offset;
+ let end = chunk_start_offset + metadata_bytes + file_size;
+ let (indices, total_size, padding_start) =
+ prev_ref.index.indices(start, end)?;
+
+ let mut appendix_offset = self.inject.0 as u64 + padding_start;
+
+ if let (Some(current_end), Some(new_start)) =
+ (self.inject.1.last(), indices.first())
+ {
+ if new_start.digest() == current_end.digest() {
+ // Already got that chunk, do not append it again and correct
+ // appendix_offset to be relative to chunk before this one
+ appendix_offset -= new_start.end();
+ if indices.len() > 1 {
+ // Append all following chunks
+ self.inject.0 += indices[1..]
+ .iter()
+ .fold(0, |sum, index| sum + index.end() as usize);
+ self.inject.1.extend_from_slice(&indices[1..]);
+ }
+ }
+ } else {
+ self.inject.0 += total_size;
+ self.inject.1.extend_from_slice(&indices);
+ }
+
+ let file_name: &Path = OsStr::from_bytes(c_file_name.to_bytes()).as_ref();
+ let _offset = self
+ .add_appendix_ref(
+ encoder,
+ file_name,
+ &metadata,
+ appendix_offset,
+ file_size,
+ )
+ .await?;
+
+ return Ok(true);
+ }
+ }
+ DirEntryAttribute::Hardlink => {
+ // Catalog contains a hardlink, but the hard link was not present in the current
+ // pxar archive. So be sure to reencode this file instead of reusing it.
+ return Ok(false)
+ }
+ _ => println!("Unexpected attribute type, expected 'File' or 'Hardlink'"),
+ }
+ Ok(false)
+ }
+
async fn add_entry<T: SeqWrite + Send>(
&mut self,
encoder: &mut Encoder<'_, T>,
@@ -595,6 +730,14 @@ impl Archiver {
}
let file_size = stat.st_size as u64;
+ if file_size > MAX_FILE_SIZE
+ && self
+ .reuse_if_metadata_unchanged(encoder, c_file_name, &metadata, stat)
+ .await?
+ {
+ return Ok(());
+ }
+
let offset: LinkOffset = self
.add_regular_file(encoder, fd, file_name, &metadata, file_size)
.await?;
@@ -712,7 +855,7 @@ impl Archiver {
self.fs_feature_flags = old_fs_feature_flags;
self.current_st_dev = old_st_dev;
- encoder.finish().await?;
+ encoder.finish(None).await?;
result
}
diff --git a/src/tape/file_formats/snapshot_archive.rs b/src/tape/file_formats/snapshot_archive.rs
index 252384b5..4bbf4727 100644
--- a/src/tape/file_formats/snapshot_archive.rs
+++ b/src/tape/file_formats/snapshot_archive.rs
@@ -88,7 +88,7 @@ pub fn tape_write_snapshot_archive<'a>(
proxmox_lang::io_bail!("file '{}' shrunk while reading", filename);
}
}
- encoder.finish()?;
+ encoder.finish(None)?;
Ok(())
});
--
2.39.2
More information about the pbs-devel
mailing list