[pbs-devel] [PATCH v6 proxmox-backup 22/29] fix #3174: client: Add detection mode to backup creation

Christian Ebner c.ebner at proxmox.com
Thu Jan 25 14:26:01 CET 2024


Introduces the `change-detection-mode` parameter to change file
encoding behavior.

When set to `metadata`, the catalog for the previous backup run and
the corresponding index file are fetched from the server and used as
reference during pxar archive creation.
This allows the archiver to skip encoding of file payloads for
unchanged regular files and referencing their existing chunks to be
included in the new backups index file instead, creating a pxar
archive with appendix section containing the payloads as concatenation
of chunks.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
Changes since v5:
- fix formatting using `cargo fmt`

 proxmox-backup-client/src/main.rs | 123 ++++++++++++++++++++++++++++--
 1 file changed, 115 insertions(+), 8 deletions(-)

diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/src/main.rs
index a087b43b..470399e8 100644
--- a/proxmox-backup-client/src/main.rs
+++ b/proxmox-backup-client/src/main.rs
@@ -1,5 +1,6 @@
 use std::collections::{HashSet, VecDeque};
 use std::io::{self, Read, Seek, SeekFrom, Write};
+use std::os::unix::fs::OpenOptionsExt;
 use std::path::{Path, PathBuf};
 use std::pin::Pin;
 use std::sync::{Arc, Mutex};
@@ -43,10 +44,10 @@ use pbs_client::tools::{
     CHUNK_SIZE_SCHEMA, REPO_URL_SCHEMA,
 };
 use pbs_client::{
-    delete_ticket_info, parse_backup_specification, view_task_result, BackupReader,
-    BackupRepository, BackupSpecificationType, BackupStats, BackupWriter, ChunkStream,
-    FixedChunkStream, HttpClient, PxarBackupStream, RemoteChunkReader, UploadOptions,
-    BACKUP_SOURCE_SCHEMA,
+    delete_ticket_info, parse_backup_detection_mode_specification, parse_backup_specification,
+    view_task_result, BackupDetectionMode, BackupReader, BackupRepository, BackupSpecificationType,
+    BackupStats, BackupWriter, ChunkStream, FixedChunkStream, HttpClient, PxarBackupStream,
+    RemoteChunkReader, UploadOptions, BACKUP_DETECTION_MODE_SPEC, BACKUP_SOURCE_SCHEMA,
 };
 use pbs_datastore::catalog::{CatalogReader, CatalogWriter};
 use pbs_datastore::chunk_store::verify_chunk_size;
@@ -666,6 +667,10 @@ fn spawn_catalog_upload(
                schema: TRAFFIC_CONTROL_BURST_SCHEMA,
                optional: true,
            },
+           "change-detection-mode": {
+               schema: BACKUP_DETECTION_MODE_SPEC,
+               optional: true,
+           },
            "exclude": {
                type: Array,
                description: "List of paths or patterns for matching files to exclude.",
@@ -849,7 +854,20 @@ async fn create_backup(
 
     let backup_time = backup_time_opt.unwrap_or_else(epoch_i64);
 
-    let client = connect_rate_limited(&repo, rate_limit)?;
+    let cd_mode = param["change-detection-mode"].as_str().unwrap_or("data");
+    let detection_mode = parse_backup_detection_mode_specification(cd_mode)?;
+
+    let client = connect_rate_limited(&repo, rate_limit.clone())?;
+    let backup_group = BackupGroup::new(backup_type, backup_id);
+
+    let previous_snapshot = if let BackupDetectionMode::Metadata(_) = detection_mode {
+        api_datastore_latest_snapshot(&client, &repo.store(), &backup_ns, backup_group)
+            .await
+            .ok()
+    } else {
+        None
+    };
+
     record_repository(&repo);
 
     let snapshot = BackupDir::from((backup_type, backup_id.to_owned(), backup_time));
@@ -959,8 +977,8 @@ async fn create_backup(
         log::info!("{} {} '{}' to '{}' as {}", what, desc, file, repo, target);
     };
 
-    for (backup_type, filename, target, size) in upload_list {
-        match (backup_type, dry_run) {
+    for (backup_spec_type, filename, target, size) in upload_list {
+        match (backup_spec_type, dry_run) {
             // dry-run
             (BackupSpecificationType::CONFIG, true) => log_file("config file", &filename, &target),
             (BackupSpecificationType::LOGFILE, true) => log_file("log file", &filename, &target),
@@ -1006,12 +1024,52 @@ async fn create_backup(
 
                 log_file("directory", &filename, &target);
 
+                let known_chunks = Arc::new(Mutex::new(HashSet::new()));
+                let mut previous_ref = None;
+
+                if let BackupDetectionMode::Metadata(ref archives) = detection_mode {
+                    if archives.is_empty() || archives.contains(&target) {
+                        if let Some(ref manifest) = previous_manifest {
+                            let reference_catalog = download_reference_catalog(
+                                &repo,
+                                previous_snapshot.as_ref().unwrap(),
+                                &backup_ns,
+                                crypt_config.clone(),
+                            )
+                            .await;
+
+                            if let Ok(reference_catalog) = reference_catalog {
+                                let reference_index = client
+                                    .download_previous_dynamic_index(
+                                        &target,
+                                        &manifest,
+                                        known_chunks.clone(),
+                                    )
+                                    .await;
+
+                                if let Ok(reference_index) = reference_index {
+                                    log::info!(
+                                        "Using previous catalog as metadata reference for '{target}'"
+                                    );
+                                    previous_ref = Some(pbs_client::pxar::PxarPrevRef {
+                                        index: reference_index,
+                                        catalog: reference_catalog,
+                                        archive_name: target.clone(),
+                                    });
+                                }
+                            }
+                        } else {
+                            log::info!("No previous manifest, fallback to regular mode");
+                        };
+                    }
+                }
+
                 let pxar_options = pbs_client::pxar::PxarCreateOptions {
                     device_set: devices.clone(),
                     patterns: pattern_list.clone(),
                     entries_max: entries_max as usize,
                     skip_lost_and_found,
-                    previous_ref: None,
+                    previous_ref,
                     archive_name: Some(std::ffi::CString::new(target.as_str())?),
                 };
 
@@ -1112,6 +1170,55 @@ async fn create_backup(
     Ok(Value::Null)
 }
 
+async fn download_reference_catalog(
+    repo: &BackupRepository,
+    previous_snapshot: &BackupDir,
+    backup_ns: &BackupNamespace,
+    crypt_config: Option<Arc<CryptConfig>>,
+) -> Result<CatalogReader<std::fs::File>, Error> {
+    let http_reader_client = connect(&repo)?;
+    let backup_reader = BackupReader::start(
+        &http_reader_client,
+        crypt_config.clone(),
+        repo.store(),
+        &backup_ns,
+        &previous_snapshot,
+        true,
+    )
+    .await?;
+
+    let (manifest, _) = backup_reader.download_manifest().await?;
+    manifest.check_fingerprint(crypt_config.as_ref().map(Arc::as_ref))?;
+
+    let index = backup_reader
+        .download_dynamic_index(&manifest, CATALOG_NAME)
+        .await?;
+    let most_used = index.find_most_used_chunks(8);
+    let file_info = manifest.lookup_file_info(CATALOG_NAME)?;
+
+    let chunk_reader = RemoteChunkReader::new(
+        backup_reader,
+        crypt_config.clone(),
+        file_info.chunk_crypt_mode(),
+        most_used,
+    );
+
+    let mut reader = BufferedDynamicReader::new(index, chunk_reader);
+
+    let mut catalogfile = std::fs::OpenOptions::new()
+        .write(true)
+        .read(true)
+        .custom_flags(libc::O_TMPFILE)
+        .open("/tmp")?;
+
+    std::io::copy(&mut reader, &mut catalogfile)
+        .map_err(|err| format_err!("failed to download reference catalog - {}", err))?;
+
+    catalogfile.seek(SeekFrom::Start(0))?;
+
+    Ok(CatalogReader::new(catalogfile))
+}
+
 async fn dump_image<W: Write>(
     client: Arc<BackupReader>,
     crypt_config: Option<Arc<CryptConfig>>,
-- 
2.39.2





More information about the pbs-devel mailing list