[pve-devel] [PATCH v2 storage 05/13] Add prune_backups to storage API
Fabian Ebner
f.ebner at proxmox.com
Mon Jun 29 11:43:24 CEST 2020
On 6/15/20 2:21 PM, Thomas Lamprecht wrote:
> Am 6/10/20 um 1:23 PM schrieb Fabian Ebner:
>> Implement it for generic storages supporting backups
>> (i.e. directory-based storages) and add a wrapper for PBS.
>>
>> Signed-off-by: Fabian Ebner <f.ebner at proxmox.com>
>> ---
>>
>> Changes in v2:
>> * Return actual volid in PBS using the new print_volid helper
>> * Split out prune_mark_backup_group and move it to Storage.pm
>> Like this it can be nicely reused from vzdump
>> * Use // {} trick to avoid some defined-ness checks for $archive_info
>
> in general looks OK, great! A few nits and notes inline.
>
>>
>> PVE/Storage.pm | 103 +++++++++++++++-
>> PVE/Storage/PBSPlugin.pm | 50 ++++++++
>> PVE/Storage/Plugin.pm | 57 +++++++++
>> test/prune_backups_test.pm | 237 +++++++++++++++++++++++++++++++++++++
>> test/run_plugin_tests.pl | 1 +
>> 5 files changed, 446 insertions(+), 2 deletions(-)
>> create mode 100644 test/prune_backups_test.pm
>>
>> diff --git a/PVE/Storage.pm b/PVE/Storage.pm
>> index a459572..cac67bb 100755
>> --- a/PVE/Storage.pm
>> +++ b/PVE/Storage.pm
>> @@ -40,11 +40,11 @@ use PVE::Storage::DRBDPlugin;
>> use PVE::Storage::PBSPlugin;
>>
>> # Storage API version. Icrement it on changes in storage API interface.
>> -use constant APIVER => 5;
>> +use constant APIVER => 6;
>> # Age is the number of versions we're backward compatible with.
>> # This is like having 'current=APIVER' and age='APIAGE' in libtool,
>> # see https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html
>> -use constant APIAGE => 4;
>> +use constant APIAGE => 5;
>>
>> # load standard plugins
>> PVE::Storage::DirPlugin->register();
>> @@ -1536,6 +1536,105 @@ sub extract_vzdump_config {
>> }
>> }
>>
>> +sub prune_backups {
>> + my ($cfg, $storeid, $opts_override, $vmid, $dryrun) = @_;
>> +
>> + my $scfg = storage_config($cfg, $storeid);
>> + die "storage '$storeid' does not support backups\n" if !$scfg->{content}->{backup};
>> +
>> + my $prune_opts = $opts_override;
>> + if (!defined($prune_opts)) {
>> + die "no prune-backups options configured for storage '$storeid'\n"
>> + if !defined($scfg->{'prune-backups'});
>> + $prune_opts = PVE::JSONSchema::parse_property_string('prune-backups', $scfg->{'prune-backups'});
>> + }
>> +
>> + my $all_zero = 1;
>> + foreach my $opt (keys %{$prune_opts}) {
>> + $all_zero = 0 if defined($prune_opts->{$opt}) && $prune_opts->{$opt} > 0;
>> + }
>
> nit, you use the hash values directly:
>
> for my $opt (values %$prune_opts) {
> $all_zero = 0 if defined($opt) && $opt > 0;
> }
>
Ack.
>> + die "at least one keep-option must be set and positive\n" if $all_zero;
>> +
>> + my $plugin = PVE::Storage::Plugin->lookup($scfg->{type});
>> + return $plugin->prune_backups($scfg, $storeid, $prune_opts, $vmid, $dryrun);
>> +}
>> +
>> +my $prune_mark = sub {
>> + my ($prune_entries, $keep_count, $id_func) = @_;
>> +
>> + return if !defined($keep_count);
>> +
>> + my $already_included = {};
>> +
>> + # determine time covered by previous selections
>> + foreach my $prune_entry (@{$prune_entries}) {
>> + my $mark = $prune_entry->{mark};
>> + if (defined($mark) && $mark eq 'keep') {
>> + my $id = $id_func->($prune_entry->{ctime});
>> + $already_included->{$id} = 1;
>> + }
>> + }
>> +
>> + my $newly_included = {};
>> +
>> + foreach my $prune_entry (@{$prune_entries}) {
>> + next if defined($prune_entry->{mark});
>> +
>> + my $id = $id_func->($prune_entry->{ctime});
>> + next if $already_included->{$id};
>> +
>> + if (!$newly_included->{$id}) {
>> + last if scalar(keys %{$newly_included}) >= $keep_count;
>> + $newly_included->{$id} = 1;
>> + $prune_entry->{mark} = 'keep';
>> + } else {
>> + $prune_entry->{mark} = 'remove';
>> + }
>> + }
>> +};
>> +
>> +sub prune_mark_backup_group {
>> + my ($backup_group, $prune_opts) = @_;
>> +
>> + my @prune_list = sort { $b->{ctime} <=> $a->{ctime} } @{$backup_group};
>> +
>> + $prune_mark->(\@prune_list, $prune_opts->{'keep-last'}, sub {
>> + my ($ctime) = @_;
>> + return $ctime;
>> + });
>> + $prune_mark->(\@prune_list, $prune_opts->{'keep-hourly'}, sub {
>> + my ($ctime) = @_;
>> + my (undef, undef, $hour, $day, $month, $year) = localtime($ctime);
>> + return "$hour/$day/$month/$year";
>> + });
>> + $prune_mark->(\@prune_list, $prune_opts->{'keep-daily'}, sub {
>> + my ($ctime) = @_;
>> + my (undef, undef, undef, $day, $month, $year) = localtime($ctime);
>> + return "$day/$month/$year";
>> + });
>> + $prune_mark->(\@prune_list, $prune_opts->{'keep-weekly'}, sub {
>> + my ($ctime) = @_;
>> + my ($sec, $min, $hour, $day, $month, $year) = localtime($ctime);
>> + my $iso_week = int(strftime("%V", $sec, $min, $hour, $day, $month - 1, $year - 1900));
>> + my $iso_week_year = int(strftime("%G", $sec, $min, $hour, $day, $month - 1, $year - 1900));
>> + return "$iso_week/$iso_week_year";
>> + });
>> + $prune_mark->(\@prune_list, $prune_opts->{'keep-monthly'}, sub {
>> + my ($ctime) = @_;
>> + my (undef, undef, undef, undef, $month, $year) = localtime($ctime);
>> + return "$month/$year";
>> + });
>> + $prune_mark->(\@prune_list, $prune_opts->{'keep-yearly'}, sub {
>> + my ($ctime) = @_;
>> + my $year = (localtime($ctime))[5];
>> + return "$year";
>> + });
>> +
>> + foreach my $prune_entry (@prune_list) {
>> + $prune_entry->{mark} //= 'remove';
>> + }
>> +}
>> +
>> sub volume_export {
>> my ($cfg, $fh, $volid, $format, $snapshot, $base_snapshot, $with_snapshots) = @_;
>>
>> diff --git a/PVE/Storage/PBSPlugin.pm b/PVE/Storage/PBSPlugin.pm
>> index f029e55..576cc8b 100644
>> --- a/PVE/Storage/PBSPlugin.pm
>> +++ b/PVE/Storage/PBSPlugin.pm
>> @@ -181,6 +181,56 @@ sub extract_vzdump_config {
>> return $config;
>> }
>>
>> +sub prune_backups {
>> + my ($class, $scfg, $storeid, $prune_opts, $vmid, $dryrun) = @_;
>> +
>> + my $backups = $class->list_volumes($storeid, $scfg, $vmid, ['backup']);
>> +
>> + my $backup_groups = {};
>> + foreach my $backup (@{$backups}) {
>> + (my $guest_type = $backup->{format}) =~ s/^pbs-//;
>> + my $backup_group = "$guest_type/$backup->{vmid}";
>> + $backup_groups->{$backup_group} = 1;
>> + }
>> +
>> + my @param;
>> + foreach my $prune_option (keys %{$prune_opts}) {
>> + push @param, "--$prune_option";
>> + push @param, "$prune_opts->{$prune_option}";
>> + }
>> +
>> + push @param, '--dry-run' if $dryrun;
>> + push @param, '--output-format=json';
>> +
>> + my @prune_list;
>> +
>> + foreach my $backup_group (keys %{$backup_groups}) {
>> + my $json_str;
>> + my $outfunc = sub { $json_str .= "$_[0]\n" };
>> +
>> + run_raw_client_cmd($scfg, $storeid, 'prune', [ $backup_group, @param ],
>> + outfunc => $outfunc, errmsg => 'proxmox-backup-client failed');
>> +
>> + my $res = decode_json($json_str);
>> + foreach my $backup (@{$res}) {
>> + my $type = $backup->{'backup-type'};
>> + my $vmid = $backup->{'backup-id'};
>> + my $ctime = $backup->{'backup-time'};
>> + my $volid = print_volid($storeid, $type, $vmid, $ctime);
>> + my $prune_entry = {
>> + volid => $volid,
>> + type => $type eq 'vm' ? 'qemu' : 'lxc',
>> + ctime => $ctime,
>> + vmid => $vmid,
>> + mark => $backup->{keep} ? 'keep' : 'remove',
>> + };
>> + push @prune_list, $prune_entry;
>> + }
>> + }
>> +
>> + return @prune_list;
>> +}
>> +
>> sub on_add_hook {
>> my ($class, $storeid, $scfg, %param) = @_;
>>
>> diff --git a/PVE/Storage/Plugin.pm b/PVE/Storage/Plugin.pm
>> index 2351ec8..1008e3a 100644
>> --- a/PVE/Storage/Plugin.pm
>> +++ b/PVE/Storage/Plugin.pm
>> @@ -1166,6 +1166,63 @@ sub check_connection {
>> return 1;
>> }
>>
>> +sub prune_backups {
>> + my ($class, $scfg, $storeid, $prune_opts, $vmid, $dryrun) = @_;
>> +
>> + my $backups = $class->list_volumes($storeid, $scfg, $vmid, ['backup']);
>> +
>> + my $backup_groups = {};
>> + my @prune_list;
>> + foreach my $backup (@{$backups}) {
>> + my $volid = $backup->{volid};
>> + my $backup_vmid = $backup->{vmid};
>> +
>> + my $prune_entry = {
>> + volid => $volid,
>> + vmid => $backup_vmid,
>> + ctime => $backup->{ctime},
>> + };
>> +
>> + my $archive_info = eval { PVE::Storage::archive_info($volid) } // {};
>> +
>> + my $type = $archive_info->{type} // 'unknown';
>> + $prune_entry->{type} = $type;
>> +
>> + if ($archive_info->{is_std_name}) {
>> + $prune_entry->{ctime} = $archive_info->{ctime};
>> + my $group = "$type/$backup_vmid";
>> + push @{$backup_groups->{$group}}, $prune_entry;
>> + } else {
>> + # ignore backups that don't use the standard naming scheme
>> + $prune_entry->{mark} = 'protected';
>> + }
>> +
>> + push @prune_list, $prune_entry;
>> + }
>> +
>> + foreach my $backup_group (values %{$backup_groups}) {
>> + PVE::Storage::prune_mark_backup_group($backup_group, $prune_opts);
>> + }
>> +
>> + if (!$dryrun) {
>> + foreach my $prune_entry (@prune_list) {
>> + next if $prune_entry->{mark} ne 'remove';
>> +
>> + my $volid = $prune_entry->{volid};
>> + eval {
>> + my (undef, $volname) = parse_volume_id($volid);
>> + my $archive_path = $class->filesystem_path($scfg, $volname);
>> + PVE::Storage::archive_remove($archive_path);
>> + };
>> + if (my $err = $@) {
>> + warn "error when removing backup '$volid' - $err\n";
>> + }
>> + }
>> + }
>> +
>> + return @prune_list;
>> +}
>> +
>> # Import/Export interface:
>> # Any path based storage is assumed to support 'raw' and 'tar' streams, so
>> # the default implementations will return this if $scfg->{path} is set,
>> diff --git a/test/prune_backups_test.pm b/test/prune_backups_test.pm
>> new file mode 100644
>> index 0000000..1ce1b30
>> --- /dev/null
>> +++ b/test/prune_backups_test.pm
>> @@ -0,0 +1,237 @@
>> +package PVE::Storage::TestPruneBackups;
>> +
>> +use strict;
>> +use warnings;
>> +
>> +use lib qw(..);
>> +
>> +use PVE::Storage;
>> +use Test::More;
>> +use Test::MockModule;
>> +
>> +my $storeid = 'BackTest123';
>> +my @vmids = (1234, 9001);
>> +
>> +# only includes the information needed for prune_backups
>> +my $mocked_backups_list = [];
>> +
>> +foreach my $vmid (@vmids) {
>> + push @{$mocked_backups_list},
>> + (
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2018_05_26-11_18_21.tar.zst",
>> + 'ctime' => 1527333501,
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_18_21.tar.zst",
>> + 'ctime' => 1577791101,
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_19_21.tar.zst",
>> + 'ctime' => 1577787561,
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2020_01_01-11_18_21.tar.zst",
>> + 'ctime' => 1577877501,
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2020_01_01-12_18_21.tar.zst",
>> + 'ctime' => 1577881101,
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-lxc-$vmid-2020_01_01-12_18_21.tar.zst",
>> + 'ctime' => 1577881101,
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-$vmid-renamed.tar.zst",
>> + 'ctime' => 1234,
>> + 'vmid' => $vmid,
>> + },
>> + );
>> +};
>> +my $mock_plugin = Test::MockModule->new('PVE::Storage::Plugin');
>> +$mock_plugin->redefine(list_volumes => sub {
>> + my ($class, $storeid, $scfg, $vmid, $content_types) = @_;
>> +
>> + return $mocked_backups_list if !defined($vmid);
>> +
>> + my @list = grep { $_->{vmid} eq $vmid } @{$mocked_backups_list};
>> + return \@list;
>> +});
>> +
>> +sub generate_expected {
>> + my ($vmids, $marks) = @_;
>> +
>> + my @expected;
>> + foreach my $vmid (@{$vmids}) {
>> + push @expected,
>> + (
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2018_05_26-11_18_21.tar.zst",
>> + 'type' => 'qemu',
>> + 'ctime' => 1527333501,
>
> maybe we could have a $time variable with some start value (the highest used one) and declare it
> in the tests like, for example:
>
> ctime => $time - (12 * 60 * 60),
>
> To make it slightly clearer what different backups times we have, just and idea though.
>
Yes, makes most sense for backups that are relatively close together,
but even the difference between "2018_05_26-11_18_21" and
"2020_01_01-12_18_21" should become more readable when expressed as such
a calculation.
>> + 'mark' => $marks->[0],
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_18_21.tar.zst",
>> + 'type' => 'qemu',
>> + 'ctime' => 1577791101,
>> + 'mark' => $marks->[1],
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_19_21.tar.zst",
>> + 'type' => 'qemu',
>> + 'ctime' => 1577791161,
>> + 'mark' => $marks->[2],
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2020_01_01-11_18_21.tar.zst",
>> + 'type' => 'qemu',
>> + 'ctime' => 1577877501,
>> + 'mark' => $marks->[3],
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2020_01_01-12_18_21.tar.zst",
>> + 'type' => 'qemu',
>> + 'ctime' => 1577881101,
>> + 'mark' => $marks->[4],
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-lxc-$vmid-2020_01_01-12_18_21.tar.zst",
>> + 'type' => 'lxc',
>> + 'ctime' => 1577881101,
>> + 'mark' => $marks->[5],
>> + 'vmid' => $vmid,
>> + },
>> + {
>> + 'volid' => "$storeid:backup/vzdump-$vmid-renamed.tar.zst",
>> + 'type' => 'unknown',
>> + 'ctime' => 1234,
>> + 'mark' => 'protected',
>> + 'vmid' => $vmid,
>> + },
>> + );
>> + }
>> + my @sorted = sort { $a->{volid} cmp $b->{volid} } @expected;
>> + return \@sorted;
>> +}
>> +
>> +# an array of test cases, each test is comprised of the following keys:
>> +# description => to identify a single test
>> +# vmid => VMID or undef for all
>> +# prune_opts => override options from storage configuration
>> +# expected => what prune_backups should return
>> +#
>> +# most of them are created further below
>> +my $tests = [
>> + {
>> + description => 'last=3, multiple VMs',
>> + prune_opts => {
>> + 'keep-last' => 3,
>> + },
>> + expected => generate_expected(\@vmids, ['remove', 'remove', 'keep', 'keep', 'keep', 'keep']),
>
> Hmm, maybe we want to annotate the protected for the return value, even if it's internally
> the same as keep. Would allow annotating that in the webinterface/API for dry-run.
>
The 'protected' mark is returned by prune_backups and in the API. It's
not passed along to the generate_expected helper, because the mark for
the irregular backup is always 'protected', so I hard-coded it inside
the helper.
> Also, with a mix of CT and VM backups of the same vmid the type is ignored it seems?
> Could be unexpected - I mean we don't really support that case, so can be OK too -
> just wondering if you thought about this.
>
Just as with PBS a backup group is determined by type+ID, so if there
are backups for an LXC and a VM with the same ID, prune will treat them
as two groups not affecting each other.
There is currently no way to specify the full group when calling
prune_backups (only the ID can be passed along), but as you said, it's
not supported, and if it ever /is/ needed, that $vmid parameter could be
extended.
>> + },
>> + {
>> + description => 'weekly=2, one VM',
>> + vmid => $vmids[0],
>> + prune_opts => {
>> + 'keep-weekly' => 2,
>> + },
>> + expected => generate_expected([$vmids[0]], ['keep', 'remove', 'remove', 'remove', 'keep', 'keep']),
>> + },
>> + {
>> + description => 'daily=weekly=monthly=1, multiple VMs',
>> + prune_opts => {
>> + 'keep-daily' => 1,
>> + 'keep-weekly' => 1,
>> + 'keep-monthly' => 1,
>> + },
>> + expected => generate_expected(\@vmids, ['keep', 'remove', 'keep', 'remove', 'keep', 'keep']),
>> + },
>> + {
>> + description => 'hourly=4, one VM',
>> + vmid => $vmids[0],
>> + prune_opts => {
>> + 'keep-hourly' => 4,
>> + },
>> + expected => generate_expected([$vmids[0]], ['keep', 'remove', 'keep', 'keep', 'keep', 'keep']),
>> + },
>> + {
>> + description => 'yearly=2, multiple VMs',
>> + prune_opts => {
>> + 'keep-yearly' => 2,
>> + },
>> + expected => generate_expected(\@vmids, ['remove', 'remove', 'keep', 'remove', 'keep', 'keep']),
>> + },
>> + {
>> + description => 'last=2,hourly=2 one VM',
>> + vmid => $vmids[0],
>> + prune_opts => {
>> + 'keep-last' => 2,
>> + 'keep-hourly' => 2,
>> + },
>> + expected => generate_expected([$vmids[0]], ['keep', 'remove', 'keep', 'keep', 'keep', 'keep']),
>> + },
>> + {
>> + description => 'last=1,monthly=2, multiple VMs',
>> + prune_opts => {
>> + 'keep-last' => 1,
>> + 'keep-monthly' => 2,
>> + },
>> + expected => generate_expected(\@vmids, ['keep', 'remove', 'keep', 'remove', 'keep', 'keep']),
>> + },
>> + {
>> + description => 'monthly=3, one VM',
>> + vmid => $vmids[0],
>> + prune_opts => {
>> + 'keep-monthly' => 3,
>> + },
>> + expected => generate_expected([$vmids[0]], ['keep', 'remove', 'keep', 'remove', 'keep', 'keep']),
>> + },
>> + {
>> + description => 'last=daily=weekly=1, multiple VMs',
>> + prune_opts => {
>> + 'keep-last' => 1,
>> + 'keep-daily' => 1,
>> + 'keep-weekly' => 1,
>> + },
>> + expected => generate_expected(\@vmids, ['keep', 'remove', 'keep', 'remove', 'keep', 'keep']),
>> + },
>> + {
>> + description => 'daily=2, one VM',
>> + vmid => $vmids[0],
>> + prune_opts => {
>> + 'keep-daily' => 2,
>> + },
>> + expected => generate_expected([$vmids[0]], ['remove', 'remove', 'keep', 'remove', 'keep', 'keep']),
>> + },
>> +];
>> +
>> +plan tests => scalar @$tests;
>> +
>> +for my $tt (@$tests) {
>> +
>> + my $got = eval {
>> + my @list = PVE::Storage::Plugin->prune_backups($tt->{scfg}, $storeid, $tt->{prune_opts}, $tt->{vmid}, 1);
>> + my @sorted = sort { $a->{volid} cmp $b->{volid} } @list;
>> + return \@sorted;
>> + };
>> + $got = $@ if $@;
>> +
>> + is_deeply($got, $tt->{expected}, $tt->{description}) || diag(explain($got));
>> +}
>> +
>> +done_testing();
>> +
>> +1;
>> diff --git a/test/run_plugin_tests.pl b/test/run_plugin_tests.pl
>> index 54322bb..d33429a 100755
>> --- a/test/run_plugin_tests.pl
>> +++ b/test/run_plugin_tests.pl
>> @@ -16,6 +16,7 @@ my $res = $harness->runtests(
>> "path_to_volume_id_test.pm",
>> "get_subdir_test.pm",
>> "filesystem_path_test.pm",
>> + "prune_backups_test.pm",
>> );
>>
>> exit -1 if !$res || $res->{failed} || $res->{parse_errors};
>>
>
More information about the pve-devel
mailing list