[pbs-devel] [PATCH proxmox-backup v3 1/3] partial fix #3701: sync job: pull: add transfer-last parameter
Fabian Grünbichler
f.gruenbichler at proxmox.com
Mon Apr 17 10:56:13 CEST 2023
On April 13, 2023 5:41 pm, Stefan Hanreich wrote:
> Specifying the transfer-last parameter limits the amount of backups
> that get synced via the pull command/sync job. The parameter specifies
> how many of the N latest backups should get pulled/synced. All other
> backups will get skipped.
>
> This is particularly useful in situations where the sync target has
> less disk space than the source. Syncing all backups from the source
> is not possible if there is not enough disk space on the target.
> Additionally this can be used for limiting the amount of data
> transferred, reducing load on the network.
>
> The newest backup will always get re-synced, regardless of the setting
> of the transfer-last parameter.
we had some off-list discussion about the logging issue, so mostly
rehashing this here..
>
> Signed-off-by: Stefan Hanreich <s.hanreich at proxmox.com>
> ---
> pbs-api-types/src/jobs.rs | 11 +++++
> src/api2/config/sync.rs | 9 ++++
> src/api2/pull.rs | 10 ++++-
> src/bin/proxmox-backup-manager.rs | 11 ++++-
> src/server/pull.rs | 68 +++++++++++++++++++++++--------
> 5 files changed, 91 insertions(+), 18 deletions(-)
>
> diff --git a/pbs-api-types/src/jobs.rs b/pbs-api-types/src/jobs.rs
> index cf7618c4..b9f57719 100644
> --- a/pbs-api-types/src/jobs.rs
> +++ b/pbs-api-types/src/jobs.rs
> @@ -444,6 +444,11 @@ pub const GROUP_FILTER_SCHEMA: Schema = StringSchema::new(
> pub const GROUP_FILTER_LIST_SCHEMA: Schema =
> ArraySchema::new("List of group filters.", &GROUP_FILTER_SCHEMA).schema();
>
> +pub const TRANSFER_LAST_SCHEMA: Schema =
> + IntegerSchema::new("The maximum amount of snapshots to be transferred (per group).")
nit: the description could also be read as "only transfer X snapshots in
this sync invocation", maybe we can find some phrasing that includes
- if there are more snapshots, they are skipped
- count starts at the end, not at the beginning (implied by "last" in
the name)
maybe something like
"Limit transfer to last N snapshots (per group), skipping others."
> + .minimum(1)
> + .schema();
> +
> #[api(
> properties: {
> id: {
> @@ -493,6 +498,10 @@ pub const GROUP_FILTER_LIST_SCHEMA: Schema =
> schema: GROUP_FILTER_LIST_SCHEMA,
> optional: true,
> },
> + "transfer-last": {
> + schema: TRANSFER_LAST_SCHEMA,
> + optional: true,
> + },
> }
> )]
> #[derive(Serialize, Deserialize, Clone, Updater, PartialEq)]
> @@ -522,6 +531,8 @@ pub struct SyncJobConfig {
> pub group_filter: Option<Vec<GroupFilter>>,
> #[serde(flatten)]
> pub limit: RateLimitConfig,
> + #[serde(skip_serializing_if = "Option::is_none")]
> + pub transfer_last: Option<usize>,
> }
>
> impl SyncJobConfig {
> diff --git a/src/api2/config/sync.rs b/src/api2/config/sync.rs
> index bd7373df..01e5f2ce 100644
> --- a/src/api2/config/sync.rs
> +++ b/src/api2/config/sync.rs
> @@ -215,6 +215,8 @@ pub enum DeletableProperty {
> RemoteNs,
> /// Delete the max_depth property,
> MaxDepth,
> + /// Delete the transfer_last property,
> + TransferLast,
> }
>
> #[api(
> @@ -309,6 +311,9 @@ pub fn update_sync_job(
> DeletableProperty::MaxDepth => {
> data.max_depth = None;
> }
> + DeletableProperty::TransferLast => {
> + data.transfer_last = None;
> + }
> }
> }
> }
> @@ -343,6 +348,9 @@ pub fn update_sync_job(
> if let Some(group_filter) = update.group_filter {
> data.group_filter = Some(group_filter);
> }
> + if let Some(transfer_last) = update.transfer_last {
> + data.transfer_last = Some(transfer_last);
> + }
>
> if update.limit.rate_in.is_some() {
> data.limit.rate_in = update.limit.rate_in;
> @@ -507,6 +515,7 @@ acl:1:/remote/remote1/remotestore1:write at pbs:RemoteSyncOperator
> group_filter: None,
> schedule: None,
> limit: pbs_api_types::RateLimitConfig::default(), // no limit
> + transfer_last: None,
> };
>
> // should work without ACLs
> diff --git a/src/api2/pull.rs b/src/api2/pull.rs
> index b2473ec8..daeba7cf 100644
> --- a/src/api2/pull.rs
> +++ b/src/api2/pull.rs
> @@ -10,6 +10,7 @@ use pbs_api_types::{
> Authid, BackupNamespace, GroupFilter, RateLimitConfig, SyncJobConfig, DATASTORE_SCHEMA,
> GROUP_FILTER_LIST_SCHEMA, NS_MAX_DEPTH_REDUCED_SCHEMA, PRIV_DATASTORE_BACKUP,
> PRIV_DATASTORE_PRUNE, PRIV_REMOTE_READ, REMOTE_ID_SCHEMA, REMOVE_VANISHED_BACKUPS_SCHEMA,
> + TRANSFER_LAST_SCHEMA,
> };
> use pbs_config::CachedUserInfo;
> use proxmox_rest_server::WorkerTask;
> @@ -76,6 +77,7 @@ impl TryFrom<&SyncJobConfig> for PullParameters {
> sync_job.max_depth,
> sync_job.group_filter.clone(),
> sync_job.limit.clone(),
> + sync_job.transfer_last,
> )
> }
> }
> @@ -201,7 +203,11 @@ pub fn do_sync_job(
> limit: {
> type: RateLimitConfig,
> flatten: true,
> - }
> + },
> + "transfer-last": {
> + schema: TRANSFER_LAST_SCHEMA,
> + optional: true,
> + },
> },
> },
> access: {
> @@ -225,6 +231,7 @@ async fn pull(
> max_depth: Option<usize>,
> group_filter: Option<Vec<GroupFilter>>,
> limit: RateLimitConfig,
> + transfer_last: Option<usize>,
> rpcenv: &mut dyn RpcEnvironment,
> ) -> Result<String, Error> {
> let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
> @@ -257,6 +264,7 @@ async fn pull(
> max_depth,
> group_filter,
> limit,
> + transfer_last,
> )?;
> let client = pull_params.client().await?;
>
> diff --git a/src/bin/proxmox-backup-manager.rs b/src/bin/proxmox-backup-manager.rs
> index 740fdc49..b4cb6cb3 100644
> --- a/src/bin/proxmox-backup-manager.rs
> +++ b/src/bin/proxmox-backup-manager.rs
> @@ -13,7 +13,7 @@ use pbs_api_types::percent_encoding::percent_encode_component;
> use pbs_api_types::{
> BackupNamespace, GroupFilter, RateLimitConfig, SyncJobConfig, DATASTORE_SCHEMA,
> GROUP_FILTER_LIST_SCHEMA, IGNORE_VERIFIED_BACKUPS_SCHEMA, NS_MAX_DEPTH_SCHEMA,
> - REMOTE_ID_SCHEMA, REMOVE_VANISHED_BACKUPS_SCHEMA, UPID_SCHEMA,
> + REMOTE_ID_SCHEMA, REMOVE_VANISHED_BACKUPS_SCHEMA, TRANSFER_LAST_SCHEMA, UPID_SCHEMA,
> VERIFICATION_OUTDATED_AFTER_SCHEMA,
> };
> use pbs_client::{display_task_log, view_task_result};
> @@ -272,6 +272,10 @@ fn task_mgmt_cli() -> CommandLineInterface {
> schema: OUTPUT_FORMAT,
> optional: true,
> },
> + "transfer-last": {
> + schema: TRANSFER_LAST_SCHEMA,
> + optional: true,
> + },
> }
> }
> )]
> @@ -287,6 +291,7 @@ async fn pull_datastore(
> max_depth: Option<usize>,
> group_filter: Option<Vec<GroupFilter>>,
> limit: RateLimitConfig,
> + transfer_last: Option<usize>,
> param: Value,
> ) -> Result<Value, Error> {
> let output_format = get_output_format(¶m);
> @@ -319,6 +324,10 @@ async fn pull_datastore(
> args["remove-vanished"] = Value::from(remove_vanished);
> }
>
> + if transfer_last.is_some() {
> + args["transfer-last"] = json!(transfer_last)
> + }
> +
> let mut limit_json = json!(limit);
> let limit_map = limit_json
> .as_object_mut()
> diff --git a/src/server/pull.rs b/src/server/pull.rs
> index 65eedf2c..37058f9b 100644
> --- a/src/server/pull.rs
> +++ b/src/server/pull.rs
> @@ -59,6 +59,8 @@ pub(crate) struct PullParameters {
> group_filter: Option<Vec<GroupFilter>>,
> /// Rate limits for all transfers from `remote`
> limit: RateLimitConfig,
> + /// How many snapshots should be transferred at most (taking the newest N snapshots)
> + transfer_last: Option<usize>,
> }
>
> impl PullParameters {
> @@ -78,6 +80,7 @@ impl PullParameters {
> max_depth: Option<usize>,
> group_filter: Option<Vec<GroupFilter>>,
> limit: RateLimitConfig,
> + transfer_last: Option<usize>,
> ) -> Result<Self, Error> {
> let store = DataStore::lookup_datastore(store, Some(Operation::Write))?;
>
> @@ -109,6 +112,7 @@ impl PullParameters {
> max_depth,
> group_filter,
> limit,
> + transfer_last,
> })
> }
>
> @@ -537,15 +541,24 @@ async fn pull_snapshot_from(
> Ok(())
> }
>
> +enum SkipReason {
> + AlreadySynced,
> + TransferLast,
> +}
> +
this could be kept, but
> struct SkipInfo {
> oldest: i64,
> newest: i64,
> - count: u64,
> + already_synced_count: u64,
> + transfer_last_count: u64,
> }
instead of this, we could just store the reason here.
>
> impl SkipInfo {
> - fn update(&mut self, backup_time: i64) {
> - self.count += 1;
> + fn update(&mut self, backup_time: i64, skip_reason: SkipReason) {
> + match skip_reason {
> + SkipReason::AlreadySynced => self.already_synced_count += 1,
> + SkipReason::TransferLast => self.transfer_last_count += 1,
> + }
this would then not be needed
>
> if backup_time < self.oldest {
> self.oldest = backup_time;
> @@ -556,8 +569,13 @@ impl SkipInfo {
> }
> }
>
> + fn count(&self) -> u64 {
> + self.already_synced_count
> + .saturating_add(self.transfer_last_count)
> + }
> +
neither would this
> fn affected(&self) -> Result<String, Error> {
> - match self.count {
> + match self.count() {
or this
> 0 => Ok(String::new()),
> 1 => Ok(proxmox_time::epoch_to_rfc3339_utc(self.oldest)?),
> _ => Ok(format!(
> @@ -574,12 +592,23 @@ impl std::fmt::Display for SkipInfo {
> write!(
> f,
> "skipped: {} snapshot(s) ({}) older than the newest local snapshot",
> - self.count,
> + self.count(),
here, only the format string would need to be conditionalized based on
the `reason`..
> self.affected().map_err(|_| std::fmt::Error)?
> )
> }
> }
>
> +impl Default for SkipInfo {
> + fn default() -> Self {
> + SkipInfo {
> + oldest: i64::MAX,
> + newest: i64::MIN,
> + already_synced_count: 0,
> + transfer_last_count: 0,
> + }
> + }
> +}
> +
not sure whether this is needed.. probably we'd rather have a
constructor taking a `SkipReason`?
> /// Pulls a group according to `params`.
> ///
> /// Pulling a group consists of the following steps:
> @@ -632,6 +661,7 @@ async fn pull_group(
> let fingerprint = client.fingerprint();
>
> let last_sync = params.store.last_successful_backup(&target_ns, group)?;
> + let last_sync_time = last_sync.unwrap_or(i64::MIN);
>
> let mut remote_snapshots = std::collections::HashSet::new();
>
> @@ -640,11 +670,14 @@ async fn pull_group(
>
> progress.group_snapshots = list.len() as u64;
>
> - let mut skip_info = SkipInfo {
> - oldest: i64::MAX,
> - newest: i64::MIN,
> - count: 0,
> - };
> + let mut skip_info = SkipInfo::default();
here, we would then construct 2 `SkipInfo`s, one for "old" snapshots,
one for "new, but skipped by transfer-last".
> +
> + let total_amount = list.len();
> +
> + let cutoff = params
> + .transfer_last
> + .map(|count| total_amount.saturating_sub(count))
> + .unwrap_or_default();
>
> for (pos, item) in list.into_iter().enumerate() {
> let snapshot = item.backup;
> @@ -661,11 +694,14 @@ async fn pull_group(
>
> remote_snapshots.insert(snapshot.time);
>
> - if let Some(last_sync_time) = last_sync {
> - if last_sync_time > snapshot.time {
> - skip_info.update(snapshot.time);
> - continue;
> - }
> + if last_sync_time > snapshot.time {
> + skip_info.update(snapshot.time, SkipReason::AlreadySynced);
> + continue;
> + }
> +
> + if pos < cutoff && last_sync_time != snapshot.time {
> + skip_info.update(snapshot.time, SkipReason::TransferLast);
> + continue;
> }
>
and ideally here, we could find good criteria for printing the
corresponding SkipInfo at the point were we switch from skipping to
transferring for both cases..
maybe
else if info.count > 0 {
print info;
clear info; // prevents taking this if again
}
for the first one and
else if pos >= cutoff && other_info.count > 0 {
..
}
for the second one would be enough?
that way the sequence of logs would be:
- skipped old (happens in most jobs, except for first run)
- re-sync of last synced snapshot (if it still exists on source)
- skipped because of transfer-last (if set and skips something)
- sync of new snapshots (if they exist)
which fits the chronological order of the snapshots contained in the log
messages, which would be nicer compared to
- re-sync last synced snapshot
- sync new snapshots
- print skip info of old snapshots
- print skip info of transfer-last snapshots
in my opinion.
in any case, tracking the two skip infos separately allows us to
- give more information (end of regular skip, start of transfer-last
skip - there can be a snapshot inbetween that is re-synced after all)
- not handle newlines in a weird way
- save a line in the log, since we can just print each on one line,
instead of the combined one + two lines for the counts
if we ever add more criteria on the snapshot level that cause skipping,
we can still re-evaluate and possible have some kind of map/vec for the
counts, and move the reason to the key/index, but for two kinds there's
no reason to not keep it simple for now IMHO.
> // get updated auth_info (new tickets)
> @@ -725,7 +761,7 @@ async fn pull_group(
> }
> }
>
> - if skip_info.count > 0 {
> + if skip_info.count() > 0 {
> task_log!(worker, "{}", skip_info);
> }
>
> --
> 2.30.2
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
>
More information about the pbs-devel
mailing list