[pbs-devel] [RFC proxmox-backup 20/20] fix #3174: client: Add incremental flag to backup creation

Christian Ebner c.ebner at proxmox.com
Fri Sep 22 09:16:21 CEST 2023


When set, the catalog for the previous backup run and the corresponding
index file are fetched from the server and used as reference during pxar
archive creation.
This allows the archiver to skip encoding of file payloads for unchanged
regular files and referencing their existing chunks to be included in the
new backups index file instead, creating a pxar archive with appendix
section containing the payloads as concatenation of chunks.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
 proxmox-backup-client/src/main.rs | 107 ++++++++++++++++++++++++++++--
 1 file changed, 103 insertions(+), 4 deletions(-)

diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/src/main.rs
index 5945ae5d..90c73a55 100644
--- a/proxmox-backup-client/src/main.rs
+++ b/proxmox-backup-client/src/main.rs
@@ -1,5 +1,6 @@
 use std::collections::{HashSet, VecDeque};
 use std::io::{self, Read, Seek, SeekFrom, Write};
+use std::os::unix::fs::OpenOptionsExt;
 use std::path::{Path, PathBuf};
 use std::pin::Pin;
 use std::sync::{Arc, Mutex};
@@ -687,6 +688,12 @@ fn spawn_catalog_upload(
                optional: true,
                default: false,
            },
+           "incremental": {
+               type: Boolean,
+               description: "Only read files modified since last full-backup.",
+               optional: true,
+               default: false,
+           },
        }
    }
 )]
@@ -696,6 +703,7 @@ async fn create_backup(
     all_file_systems: bool,
     skip_lost_and_found: bool,
     dry_run: bool,
+    incremental: bool,
     _info: &ApiMethod,
     _rpcenv: &mut dyn RpcEnvironment,
 ) -> Result<Value, Error> {
@@ -849,7 +857,17 @@ async fn create_backup(
 
     let backup_time = backup_time_opt.unwrap_or_else(epoch_i64);
 
-    let client = connect_rate_limited(&repo, rate_limit)?;
+    let client = connect_rate_limited(&repo, rate_limit.clone())?;
+    let backup_group = BackupGroup::new(backup_type, backup_id);
+
+    let previous_snapshot = if incremental {
+        let snapshot =
+            api_datastore_latest_snapshot(&client, &repo.store(), &backup_ns, backup_group).await?;
+        Some(snapshot)
+    } else {
+        None
+    };
+
     record_repository(&repo);
 
     let snapshot = BackupDir::from((backup_type, backup_id.to_owned(), backup_time));
@@ -959,8 +977,8 @@ async fn create_backup(
         log::info!("{} {} '{}' to '{}' as {}", what, desc, file, repo, target);
     };
 
-    for (backup_type, filename, target, size) in upload_list {
-        match (backup_type, dry_run) {
+    for (backup_spec_type, filename, target, size) in upload_list {
+        match (backup_spec_type, dry_run) {
             // dry-run
             (BackupSpecificationType::CONFIG, true) => log_file("config file", &filename, &target),
             (BackupSpecificationType::LOGFILE, true) => log_file("log file", &filename, &target),
@@ -1010,12 +1028,44 @@ async fn create_backup(
                     .unwrap()
                     .start_directory(std::ffi::CString::new(target.as_str())?.as_c_str())?;
 
+                let known_chunks = Arc::new(Mutex::new(HashSet::new()));
+                let previous_ref = if incremental {
+                    match previous_manifest {
+                        None => None,
+                        Some(ref manifest) => {
+                            let reference_index = client
+                                .download_previous_dynamic_index(
+                                    &target,
+                                    &manifest,
+                                    known_chunks.clone(),
+                                )
+                                .await?;
+
+                            let reference_catalog = download_reference_catalog(
+                                &repo,
+                                previous_snapshot.as_ref().unwrap(),
+                                &backup_ns,
+                                crypt_config.clone(),
+                            )
+                            .await?;
+
+                            Some(pbs_client::pxar::PxarPrevRef {
+                                index: reference_index,
+                                catalog: reference_catalog,
+                                archive_name: target.clone(),
+                            })
+                        }
+                    }
+                } else {
+                    None
+                };
+
                 let pxar_options = pbs_client::pxar::PxarCreateOptions {
                     device_set: devices.clone(),
                     patterns: pattern_list.clone(),
                     entries_max: entries_max as usize,
                     skip_lost_and_found,
-                    previous_ref: None,
+                    previous_ref,
                 };
 
                 let upload_options = UploadOptions {
@@ -1116,6 +1166,55 @@ async fn create_backup(
     Ok(Value::Null)
 }
 
+async fn download_reference_catalog(
+    repo: &BackupRepository,
+    previous_snapshot: &BackupDir,
+    backup_ns: &BackupNamespace,
+    crypt_config: Option<Arc<CryptConfig>>,
+) -> Result<CatalogReader<std::fs::File>, Error> {
+    let http_reader_client = connect(&repo)?;
+    let backup_reader = BackupReader::start(
+        http_reader_client,
+        crypt_config.clone(),
+        repo.store(),
+        &backup_ns,
+        &previous_snapshot,
+        true,
+    )
+    .await?;
+
+    let (manifest, _) = backup_reader.download_manifest().await?;
+    manifest.check_fingerprint(crypt_config.as_ref().map(Arc::as_ref))?;
+
+    let index = backup_reader
+        .download_dynamic_index(&manifest, CATALOG_NAME)
+        .await?;
+    let most_used = index.find_most_used_chunks(8);
+    let file_info = manifest.lookup_file_info(CATALOG_NAME)?;
+
+    let chunk_reader = RemoteChunkReader::new(
+        backup_reader,
+        crypt_config.clone(),
+        file_info.chunk_crypt_mode(),
+        most_used,
+    );
+
+    let mut reader = BufferedDynamicReader::new(index, chunk_reader);
+
+    let mut catalogfile = std::fs::OpenOptions::new()
+        .write(true)
+        .read(true)
+        .custom_flags(libc::O_TMPFILE)
+        .open("/tmp")?;
+
+    std::io::copy(&mut reader, &mut catalogfile)
+        .map_err(|err| format_err!("failed to download reference catalog - {}", err))?;
+
+    catalogfile.seek(SeekFrom::Start(0))?;
+
+    Ok(CatalogReader::new(catalogfile))
+}
+
 async fn dump_image<W: Write>(
     client: Arc<BackupReader>,
     crypt_config: Option<Arc<CryptConfig>>,
-- 
2.39.2






More information about the pbs-devel mailing list