[pbs-devel] [PATCH proxmox-backup v3 1/3] partial fix #3701: sync job: pull: add transfer-last parameter

Stefan Hanreich s.hanreich at proxmox.com
Thu Apr 13 17:41:54 CEST 2023


Specifying the transfer-last parameter limits the amount of backups
that get synced via the pull command/sync job. The parameter specifies
how many of the N latest backups should get pulled/synced. All other
backups will get skipped.

This is particularly useful in situations where the sync target has
less disk space than the source. Syncing all backups from the source
is not possible if there is not enough disk space on the target.
Additionally this can be used for limiting the amount of data
transferred, reducing load on the network.

The newest backup will always get re-synced, regardless of the setting
of the transfer-last parameter.

Signed-off-by: Stefan Hanreich <s.hanreich at proxmox.com>
---
 pbs-api-types/src/jobs.rs         | 11 +++++
 src/api2/config/sync.rs           |  9 ++++
 src/api2/pull.rs                  | 10 ++++-
 src/bin/proxmox-backup-manager.rs | 11 ++++-
 src/server/pull.rs                | 68 +++++++++++++++++++++++--------
 5 files changed, 91 insertions(+), 18 deletions(-)

diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs
index cf7618c4..b9f57719 100644
--- a/pbs-api-types/src/jobs.rs
+++ b/pbs-api-types/src/jobs.rs
@@ -444,6 +444,11 @@ pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new(
 pub const GROUP_FILTER_LIST_SCHEMA: Schema =
     ArraySchema::new("List of group filters.", &GROUP_FILTER_SCHEMA).schema();
 
+pub const TRANSFER_LAST_SCHEMA: Schema =
+    IntegerSchema::new("The maximum amount of snapshots to be transferred (per group).")
+        .minimum(1)
+        .schema();
+
 #[api(
     properties: {
         id: {
@@ -493,6 +498,10 @@ pub const GROUP_FILTER_LIST_SCHEMA: Schema =
             schema: GROUP_FILTER_LIST_SCHEMA,
             optional: true,
         },
+        "transfer-last": {
+            schema: TRANSFER_LAST_SCHEMA,
+            optional: true,
+        },
     }
 )]
 #[derive(Serialize, Deserialize, Clone, Updater, PartialEq)]
@@ -522,6 +531,8 @@ pub struct SyncJobConfig {
     pub group_filter: Option<Vec<GroupFilter>>,
     #[serde(flatten)]
     pub limit: RateLimitConfig,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub transfer_last: Option<usize>,
 }
 
 impl SyncJobConfig {
diff --git a/src/api2/config/sync.rs b/src/api2/config/sync.rs
index bd7373df..01e5f2ce 100644
--- a/src/api2/config/sync.rs
+++ b/src/api2/config/sync.rs
@@ -215,6 +215,8 @@ pub enum DeletableProperty {
     RemoteNs,
     /// Delete the max_depth property,
     MaxDepth,
+    /// Delete the transfer_last property,
+    TransferLast,
 }
 
 #[api(
@@ -309,6 +311,9 @@ pub fn update_sync_job(
                 DeletableProperty::MaxDepth => {
                     data.max_depth = None;
                 }
+                DeletableProperty::TransferLast => {
+                    data.transfer_last = None;
+                }
             }
         }
     }
@@ -343,6 +348,9 @@ pub fn update_sync_job(
     if let Some(group_filter) = update.group_filter {
         data.group_filter = Some(group_filter);
     }
+    if let Some(transfer_last) = update.transfer_last {
+        data.transfer_last = Some(transfer_last);
+    }
 
     if update.limit.rate_in.is_some() {
         data.limit.rate_in = update.limit.rate_in;
@@ -507,6 +515,7 @@ acl:1:/remote/remote1/remotestore1:write at pbs:RemoteSyncOperator
         group_filter: None,
         schedule: None,
         limit: pbs_api_types::RateLimitConfig::default(), // no limit
+        transfer_last: None,
     };
 
     // should work without ACLs
diff --git a/src/api2/pull.rs b/src/api2/pull.rs
index b2473ec8..daeba7cf 100644
--- a/src/api2/pull.rs
+++ b/src/api2/pull.rs
@@ -10,6 +10,7 @@ use pbs_api_types::{
     Authid, BackupNamespace, GroupFilter, RateLimitConfig, SyncJobConfig, DATASTORE_SCHEMA,
     GROUP_FILTER_LIST_SCHEMA, NS_MAX_DEPTH_REDUCED_SCHEMA, PRIV_DATASTORE_BACKUP,
     PRIV_DATASTORE_PRUNE, PRIV_REMOTE_READ, REMOTE_ID_SCHEMA, REMOVE_VANISHED_BACKUPS_SCHEMA,
+    TRANSFER_LAST_SCHEMA,
 };
 use pbs_config::CachedUserInfo;
 use proxmox_rest_server::WorkerTask;
@@ -76,6 +77,7 @@ impl TryFrom<&SyncJobConfig> for PullParameters {
             sync_job.max_depth,
             sync_job.group_filter.clone(),
             sync_job.limit.clone(),
+            sync_job.transfer_last,
         )
     }
 }
@@ -201,7 +203,11 @@ pub fn do_sync_job(
             limit: {
                 type: RateLimitConfig,
                 flatten: true,
-            }
+            },
+            "transfer-last": {
+                schema: TRANSFER_LAST_SCHEMA,
+                optional: true,
+            },
         },
     },
     access: {
@@ -225,6 +231,7 @@ async fn pull(
     max_depth: Option<usize>,
     group_filter: Option<Vec<GroupFilter>>,
     limit: RateLimitConfig,
+    transfer_last: Option<usize>,
     rpcenv: &mut dyn RpcEnvironment,
 ) -> Result<String, Error> {
     let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
@@ -257,6 +264,7 @@ async fn pull(
         max_depth,
         group_filter,
         limit,
+        transfer_last,
     )?;
     let client = pull_params.client().await?;
 
diff --git a/src/bin/proxmox-backup-manager.rs b/src/bin/proxmox-backup-manager.rs
index 740fdc49..b4cb6cb3 100644
--- a/src/bin/proxmox-backup-manager.rs
+++ b/src/bin/proxmox-backup-manager.rs
@@ -13,7 +13,7 @@ use pbs_api_types::percent_encoding::percent_encode_component;
 use pbs_api_types::{
     BackupNamespace, GroupFilter, RateLimitConfig, SyncJobConfig, DATASTORE_SCHEMA,
     GROUP_FILTER_LIST_SCHEMA, IGNORE_VERIFIED_BACKUPS_SCHEMA, NS_MAX_DEPTH_SCHEMA,
-    REMOTE_ID_SCHEMA, REMOVE_VANISHED_BACKUPS_SCHEMA, UPID_SCHEMA,
+    REMOTE_ID_SCHEMA, REMOVE_VANISHED_BACKUPS_SCHEMA, TRANSFER_LAST_SCHEMA, UPID_SCHEMA,
     VERIFICATION_OUTDATED_AFTER_SCHEMA,
 };
 use pbs_client::{display_task_log, view_task_result};
@@ -272,6 +272,10 @@ fn task_mgmt_cli() -> CommandLineInterface {
                 schema: OUTPUT_FORMAT,
                 optional: true,
             },
+            "transfer-last": {
+                schema: TRANSFER_LAST_SCHEMA,
+                optional: true,
+            },
         }
    }
 )]
@@ -287,6 +291,7 @@ async fn pull_datastore(
     max_depth: Option<usize>,
     group_filter: Option<Vec<GroupFilter>>,
     limit: RateLimitConfig,
+    transfer_last: Option<usize>,
     param: Value,
 ) -> Result<Value, Error> {
     let output_format = get_output_format(&param);
@@ -319,6 +324,10 @@ async fn pull_datastore(
         args["remove-vanished"] = Value::from(remove_vanished);
     }
 
+    if transfer_last.is_some() {
+        args["transfer-last"] = json!(transfer_last)
+    }
+
     let mut limit_json = json!(limit);
     let limit_map = limit_json
         .as_object_mut()
diff --git a/src/server/pull.rs b/src/server/pull.rs
index 65eedf2c..37058f9b 100644
--- a/src/server/pull.rs
+++ b/src/server/pull.rs
@@ -59,6 +59,8 @@ pub(crate) struct PullParameters {
     group_filter: Option<Vec<GroupFilter>>,
     /// Rate limits for all transfers from `remote`
     limit: RateLimitConfig,
+    /// How many snapshots should be transferred at most (taking the newest N snapshots)
+    transfer_last: Option<usize>,
 }
 
 impl PullParameters {
@@ -78,6 +80,7 @@ impl PullParameters {
         max_depth: Option<usize>,
         group_filter: Option<Vec<GroupFilter>>,
         limit: RateLimitConfig,
+        transfer_last: Option<usize>,
     ) -> Result<Self, Error> {
         let store = DataStore::lookup_datastore(store, Some(Operation::Write))?;
 
@@ -109,6 +112,7 @@ impl PullParameters {
             max_depth,
             group_filter,
             limit,
+            transfer_last,
         })
     }
 
@@ -537,15 +541,24 @@ async fn pull_snapshot_from(
     Ok(())
 }
 
+enum SkipReason {
+    AlreadySynced,
+    TransferLast,
+}
+
 struct SkipInfo {
     oldest: i64,
     newest: i64,
-    count: u64,
+    already_synced_count: u64,
+    transfer_last_count: u64,
 }
 
 impl SkipInfo {
-    fn update(&mut self, backup_time: i64) {
-        self.count += 1;
+    fn update(&mut self, backup_time: i64, skip_reason: SkipReason) {
+        match skip_reason {
+            SkipReason::AlreadySynced => self.already_synced_count += 1,
+            SkipReason::TransferLast => self.transfer_last_count += 1,
+        }
 
         if backup_time < self.oldest {
             self.oldest = backup_time;
@@ -556,8 +569,13 @@ impl SkipInfo {
         }
     }
 
+    fn count(&self) -> u64 {
+        self.already_synced_count
+            .saturating_add(self.transfer_last_count)
+    }
+
     fn affected(&self) -> Result<String, Error> {
-        match self.count {
+        match self.count() {
             0 => Ok(String::new()),
             1 => Ok(proxmox_time::epoch_to_rfc3339_utc(self.oldest)?),
             _ => Ok(format!(
@@ -574,12 +592,23 @@ impl std::fmt::Display for SkipInfo {
         write!(
             f,
             "skipped: {} snapshot(s) ({}) older than the newest local snapshot",
-            self.count,
+            self.count(),
             self.affected().map_err(|_| std::fmt::Error)?
         )
     }
 }
 
+impl Default for SkipInfo {
+    fn default() -> Self {
+        SkipInfo {
+            oldest: i64::MAX,
+            newest: i64::MIN,
+            already_synced_count: 0,
+            transfer_last_count: 0,
+        }
+    }
+}
+
 /// Pulls a group according to `params`.
 ///
 /// Pulling a group consists of the following steps:
@@ -632,6 +661,7 @@ async fn pull_group(
     let fingerprint = client.fingerprint();
 
     let last_sync = params.store.last_successful_backup(&target_ns, group)?;
+    let last_sync_time = last_sync.unwrap_or(i64::MIN);
 
     let mut remote_snapshots = std::collections::HashSet::new();
 
@@ -640,11 +670,14 @@ async fn pull_group(
 
     progress.group_snapshots = list.len() as u64;
 
-    let mut skip_info = SkipInfo {
-        oldest: i64::MAX,
-        newest: i64::MIN,
-        count: 0,
-    };
+    let mut skip_info = SkipInfo::default();
+
+    let total_amount = list.len();
+
+    let cutoff = params
+        .transfer_last
+        .map(|count| total_amount.saturating_sub(count))
+        .unwrap_or_default();
 
     for (pos, item) in list.into_iter().enumerate() {
         let snapshot = item.backup;
@@ -661,11 +694,14 @@ async fn pull_group(
 
         remote_snapshots.insert(snapshot.time);
 
-        if let Some(last_sync_time) = last_sync {
-            if last_sync_time > snapshot.time {
-                skip_info.update(snapshot.time);
-                continue;
-            }
+        if last_sync_time > snapshot.time {
+            skip_info.update(snapshot.time, SkipReason::AlreadySynced);
+            continue;
+        }
+
+        if pos < cutoff && last_sync_time != snapshot.time {
+            skip_info.update(snapshot.time, SkipReason::TransferLast);
+            continue;
         }
 
         // get updated auth_info (new tickets)
@@ -725,7 +761,7 @@ async fn pull_group(
         }
     }
 
-    if skip_info.count > 0 {
+    if skip_info.count() > 0 {
         task_log!(worker, "{}", skip_info);
     }
 
-- 
2.30.2





More information about the pbs-devel mailing list