[pve-devel] [PATCH storage 4/6] Add prune_backups to storage API
Fabian Grünbichler
f.gruenbichler at proxmox.com
Tue Jul 7 08:38:19 CEST 2020
On June 4, 2020 11:08 am, Fabian Ebner wrote:
> Implement it for generic storages supporting backups (i.e.
> directory-based storages) and add a wrapper for PBS.
>
> Signed-off-by: Fabian Ebner <f.ebner at proxmox.com>
> ---
> PVE/Storage.pm | 27 ++++-
> PVE/Storage/PBSPlugin.pm | 50 ++++++++
> PVE/Storage/Plugin.pm | 128 ++++++++++++++++++++
> test/prune_backups_test.pm | 237 +++++++++++++++++++++++++++++++++++++
> test/run_plugin_tests.pl | 1 +
> 5 files changed, 441 insertions(+), 2 deletions(-)
> create mode 100644 test/prune_backups_test.pm
>
> diff --git a/PVE/Storage.pm b/PVE/Storage.pm
> index 07a4f53..e1d0d02 100755
> --- a/PVE/Storage.pm
> +++ b/PVE/Storage.pm
> @@ -40,11 +40,11 @@ use PVE::Storage::DRBDPlugin;
> use PVE::Storage::PBSPlugin;
>
> # Storage API version. Icrement it on changes in storage API interface.
> -use constant APIVER => 5;
> +use constant APIVER => 6;
> # Age is the number of versions we're backward compatible with.
> # This is like having 'current=APIVER' and age='APIAGE' in libtool,
> # see https://www.gnu.org/software/libtool/manual/html_node/Libtool-versioning.html
> -use constant APIAGE => 4;
> +use constant APIAGE => 5;
>
> # load standard plugins
> PVE::Storage::DirPlugin->register();
> @@ -1522,6 +1522,29 @@ sub extract_vzdump_config {
> }
> }
>
> +sub prune_backups {
> + my ($cfg, $storeid, $opts_override, $vmid, $dryrun) = @_;
s/opts_override/opts/
s/prune_opts/opts/
or maybe $keep? since we only have either, and they both represent the
same options..
> +
> + my $scfg = storage_config($cfg, $storeid);
> + die "storage '$storeid' does not support backups\n" if !$scfg->{content}->{backup};
> +
> + my $prune_opts = $opts_override;
not needed then
> + if (!defined($prune_opts)) {
> + die "no prune-backups options configured for storage '$storeid'\n"
> + if !defined($scfg->{'prune-backups'});
> + $prune_opts = PVE::JSONSchema::parse_property_string('prune-backups', $scfg->{'prune-backups'});
> + }
> +
> + my $all_zero = 1;
> + foreach my $opt (keys %{$prune_opts}) {
> + $all_zero = 0 if defined($prune_opts->{$opt}) && $prune_opts->{$opt} > 0;
> + }
> + die "at least one keep-option must be set and positive\n" if $all_zero;
if we switch to integer:
die ...
if !grep { defined($opts->{$_}) && $opts->{$_} } keys %$opts;
or we could use the new format validator functionality to ensure this
directly in parse_property_string ;)
> +
> + my $plugin = PVE::Storage::Plugin->lookup($scfg->{type});
> + return $plugin->prune_backups($scfg, $storeid, $prune_opts, $vmid, $dryrun);
> +}
> +
> sub volume_export {
> my ($cfg, $fh, $volid, $format, $snapshot, $base_snapshot, $with_snapshots) = @_;
>
> diff --git a/PVE/Storage/PBSPlugin.pm b/PVE/Storage/PBSPlugin.pm
> index fba4b2b..f37bd66 100644
> --- a/PVE/Storage/PBSPlugin.pm
> +++ b/PVE/Storage/PBSPlugin.pm
> @@ -173,6 +173,56 @@ sub extract_vzdump_config {
> return $config;
> }
>
> +sub prune_backups {
> + my ($class, $scfg, $storeid, $prune_opts, $vmid, $dryrun) = @_;
> +
> + my $backups = $class->list_volumes($storeid, $scfg, $vmid, ['backup']);
> +
> + my $backup_groups = {};
> + foreach my $backup (@{$backups}) {
> + (my $guest_type = $backup->{format}) =~ s/^pbs-//;
> + my $backup_group = "$guest_type/$backup->{vmid}";
> + $backup_groups->{$backup_group} = 1;
> + }
> +
> + my @param;
> + foreach my $prune_option (keys %{$prune_opts}) {
> + push @param, "--$prune_option";
> + push @param, "$prune_opts->{$prune_option}";
> + }
> +
> + push @param, '--dry-run' if $dryrun;
> + push @param, '--output-format=json';
> +
> + my @prune_list;
why not a reference? would make extending this easier in the future wrt
return values
> +
> + foreach my $backup_group (keys %{$backup_groups}) {
> + my $json_str;
> + my $outfunc = sub { $json_str .= "$_[0]\n" };
a simple
sub { $json_str .= shift }
should be enough, since we don't actually care about the new lines?
> +
> + run_raw_client_cmd($scfg, $storeid, 'prune', [ $backup_group, @param ],
> + outfunc => $outfunc, errmsg => 'proxmox-backup-client failed');
error handling here
> +
> + my $res = decode_json($json_str);
and here would be nice (at least to get better error messages, possibly
to warn and continue with pruning other backup groups and die at the
end?)
> + foreach my $backup (@{$res}) {
> + my $type = $backup->{'backup-type'};
> + my $vmid = $backup->{'backup-id'};
> + my $ctime = $backup->{'backup-time'};
> + my $time_str = localtime($ctime);
> + my $prune_entry = {
> + volid => "$type/$vmid-$time_str",
> + type => $type eq 'vm' ? 'qemu' : 'lxc',
> + ctime => $ctime,
> + vmid => $vmid,
> + mark => $backup->{keep} ? 'keep' : 'remove',
depending on how stable this interface is, it might make sense to ensure
all the expected keys are actually defined..
> + };
> + push @prune_list, $prune_entry;
> + }
> + }
> +
> + return @prune_list;
> +}
> +
> sub on_add_hook {
> my ($class, $storeid, $scfg, %param) = @_;
>
> diff --git a/PVE/Storage/Plugin.pm b/PVE/Storage/Plugin.pm
> index 1ba1b99..a11bba5 100644
> --- a/PVE/Storage/Plugin.pm
> +++ b/PVE/Storage/Plugin.pm
> @@ -8,6 +8,7 @@ use File::chdir;
> use File::Path;
> use File::Basename;
> use File::stat qw();
> +use POSIX qw(strftime);
>
> use PVE::Tools qw(run_command);
> use PVE::JSONSchema qw(get_standard_option register_standard_option);
> @@ -1167,6 +1168,133 @@ sub check_connection {
> return 1;
> }
>
> +my $prune_backups_mark = sub {
> + my ($prune_entries, $keep_count, $id_func) = @_;
> +
> + return if !defined($keep_count);
defined is not needed? currently it's >= 1 anyway, and even if we switch
to >= 0 we'd return on 0 here
> +
> + my $already_included = {};
> +
> + # determine time covered by previous selections
> + foreach my $prune_entry (@{$prune_entries}) {
> + my $mark = $prune_entry->{mark};
> + if (defined($mark) && $mark eq 'keep') {
> + my $id = $id_func->($prune_entry->{ctime});
> + $already_included->{$id} = 1;
> + }
> + }
since $prune_entries needs to be sorted already for all of this to work,
couldn't we combine the two loops? like so:
diff --git a/PVE/Storage/Plugin.pm b/PVE/Storage/Plugin.pm
index d47e837..8d5d24d 100644
--- a/PVE/Storage/Plugin.pm
+++ b/PVE/Storage/Plugin.pm
@@ -1173,22 +1173,18 @@ my $prune_backups_mark = sub {
return if !defined($keep_count);
my $already_included = {};
+ my $newly_included = {};
# determine time covered by previous selections
foreach my $prune_entry (@{$prune_entries}) {
my $mark = $prune_entry->{mark};
+ my $id = $id_func->($prune_entry->{ctime});
+
if (defined($mark) && $mark eq 'keep') {
- my $id = $id_func->($prune_entry->{ctime});
$already_included->{$id} = 1;
}
- }
- my $newly_included = {};
-
- foreach my $prune_entry (@{$prune_entries}) {
- next if defined($prune_entry->{mark});
-
- my $id = $id_func->($prune_entry->{ctime});
+ next if defined($mark);
next if $already_included->{$id};
if (!$newly_included->{$id}) {
> +
> + my $newly_included = {};
> +
> + foreach my $prune_entry (@{$prune_entries}) {
> + next if defined($prune_entry->{mark});
> +
> + my $id = $id_func->($prune_entry->{ctime});
> + next if $already_included->{$id};
> +
> + if (!$newly_included->{$id}) {
> + last if scalar(keys %{$newly_included}) >= $keep_count;
shouldn't this one be at the start of the loop body?
> + $newly_included->{$id} = 1;
> + $prune_entry->{mark} = 'keep';
> + } else {
> + $prune_entry->{mark} = 'remove';
> + }
> + }
> +};
> +
> +sub prune_backups {
> + my ($class, $scfg, $storeid, $prune_opts, $vmid, $dryrun) = @_;
> +
> + my $backups = $class->list_volumes($storeid, $scfg, $vmid, ['backup']);
> +
> + my $backup_groups = {};
> + my @prune_list;
> + foreach my $backup (@{$backups}) {
> + my $volid = $backup->{volid};
> + my $backup_vmid = $backup->{vmid};
> +
> + my $prune_entry = {
> + volid => $volid,
> + vmid => $backup_vmid,
> + ctime => $backup->{ctime},
> + };
> +
> + my $archive_info = eval { PVE::Storage::archive_info($volid) };
> +
> + my $type = defined($archive_info) ? $archive_info->{type} : 'unknown';
> + $prune_entry->{type} = $type;
> +
> + if (defined($archive_info) && $archive_info->{is_std_name}) {
> + $prune_entry->{ctime} = $archive_info->{ctime};
> + my $group = "$type/$backup_vmid";
> + push @{$backup_groups->{$group}}, $prune_entry;
> + } else {
> + # ignore backups that don't use the standard naming scheme
> + $prune_entry->{mark} = 'protected';
> + }
> +
> + push @prune_list, $prune_entry;
> + }
> +
> + foreach my $backup_group (values %{$backup_groups}) {
> + my @prune_list_for_group = sort { $b->{ctime} <=> $a->{ctime} }
> + @{$backup_group};
> +
> + $prune_backups_mark->(\@prune_list_for_group, $prune_opts->{'keep-last'}, sub {
> + my ($ctime) = @_;
> + return $ctime;
> + });
> + $prune_backups_mark->(\@prune_list_for_group, $prune_opts->{'keep-hourly'}, sub {
> + my ($ctime) = @_;
> + my (undef, undef, $hour, $day, $month, $year) = localtime($ctime);
> + return "$hour/$day/$month/$year";
> + });
> + $prune_backups_mark->(\@prune_list_for_group, $prune_opts->{'keep-daily'}, sub {
> + my ($ctime) = @_;
> + my (undef, undef, undef, $day, $month, $year) = localtime($ctime);
> + return "$day/$month/$year";
> + });
> + $prune_backups_mark->(\@prune_list_for_group, $prune_opts->{'keep-weekly'}, sub {
> + my ($ctime) = @_;
> + my ($sec, $min, $hour, $day, $month, $year) = localtime($ctime);
> + my $iso_week = int(strftime("%V", $sec, $min, $hour, $day, $month - 1, $year - 1900));
> + my $iso_week_year = int(strftime("%G", $sec, $min, $hour, $day, $month - 1, $year - 1900));
> + return "$iso_week/$iso_week_year";
> + });
> + $prune_backups_mark->(\@prune_list_for_group, $prune_opts->{'keep-monthly'}, sub {
> + my ($ctime) = @_;
> + my (undef, undef, undef, undef, $month, $year) = localtime($ctime);
> + return "$month/$year";
> + });
> + $prune_backups_mark->(\@prune_list_for_group, $prune_opts->{'keep-yearly'}, sub {
> + my ($ctime) = @_;
> + my $year = (localtime($ctime))[5];
> + return "$year";
this is broken. e.g., see this map of volids to $ctime to $year:
'local:backup/vzdump-lxc-12345-1900_01_01-01_01_01.tar' => {
'ctime' => 946684861,
'year' => '100'
},
'local:backup/vzdump-lxc-12345-1950_01_01-01_01_01.tar' => {
'ctime' => 2524608061,
'year' => '150'
},
'local:backup/vzdump-lxc-12345-2000_01_01-01_01_01.tar' => {
'ctime' => 946684861,
'year' => '100'
},
'local:backup/vzdump-lxc-12345-2050_01_01-01_01_01.tar' => {
'ctime' => 2524608061,
'year' => '150'
},
a) we also don't want to implement buggy behaviour that bites us in 30
years
b) people might think they are clever and add a backup with a timestamp
long in the future thinking that will make it safe, and it actually does
the opposite
(the bug is in archive_info, and probably needs fixing there. also a
great example why adding extreme cases to test cases is important, that
would have hopefully caught this just by virtue of getting sorted wrong)
> + });
> + }
> +
> + foreach my $prune_entry (@prune_list) {
> + $prune_entry->{mark} = 'remove' if !defined($prune_entry->{mark});
> + }
> +
> + if (!$dryrun) {
> + foreach my $prune_entry (@prune_list) {
> + next if $prune_entry->{mark} ne 'remove';
> +
> + my $volid = $prune_entry->{volid};
> + eval {
> + my (undef, $volname) = parse_volume_id($volid);
> + $class->free_image($storeid, $scfg, $volname);
why not PVE::Storage::archive_remove() to remove the log file as well?
> + };
> + if (my $err = $@) {
> + warn "error when removing backup '$volid' - $err\n";
> + }
> + }
> + }
> +
> + return @prune_list;
> +}
> +
> # Import/Export interface:
> # Any path based storage is assumed to support 'raw' and 'tar' streams, so
> # the default implementations will return this if $scfg->{path} is set,
> diff --git a/test/prune_backups_test.pm b/test/prune_backups_test.pm
> new file mode 100644
> index 0000000..1ce1b30
> --- /dev/null
> +++ b/test/prune_backups_test.pm
> @@ -0,0 +1,237 @@
> +package PVE::Storage::TestPruneBackups;
> +
> +use strict;
> +use warnings;
> +
> +use lib qw(..);
> +
> +use PVE::Storage;
> +use Test::More;
> +use Test::MockModule;
> +
> +my $storeid = 'BackTest123';
> +my @vmids = (1234, 9001);
> +
> +# only includes the information needed for prune_backups
> +my $mocked_backups_list = [];
> +
> +foreach my $vmid (@vmids) {
> + push @{$mocked_backups_list},
> + (
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2018_05_26-11_18_21.tar.zst",
> + 'ctime' => 1527333501,
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_18_21.tar.zst",
> + 'ctime' => 1577791101,
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_19_21.tar.zst",
> + 'ctime' => 1577787561,
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2020_01_01-11_18_21.tar.zst",
> + 'ctime' => 1577877501,
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2020_01_01-12_18_21.tar.zst",
> + 'ctime' => 1577881101,
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-lxc-$vmid-2020_01_01-12_18_21.tar.zst",
> + 'ctime' => 1577881101,
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-$vmid-renamed.tar.zst",
> + 'ctime' => 1234,
> + 'vmid' => $vmid,
> + },
> + );
> +};
> +my $mock_plugin = Test::MockModule->new('PVE::Storage::Plugin');
> +$mock_plugin->redefine(list_volumes => sub {
> + my ($class, $storeid, $scfg, $vmid, $content_types) = @_;
> +
> + return $mocked_backups_list if !defined($vmid);
> +
> + my @list = grep { $_->{vmid} eq $vmid } @{$mocked_backups_list};
> + return \@list;
> +});
> +
> +sub generate_expected {
> + my ($vmids, $marks) = @_;
> +
> + my @expected;
> + foreach my $vmid (@{$vmids}) {
> + push @expected,
> + (
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2018_05_26-11_18_21.tar.zst",
> + 'type' => 'qemu',
> + 'ctime' => 1527333501,
> + 'mark' => $marks->[0],
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_18_21.tar.zst",
> + 'type' => 'qemu',
> + 'ctime' => 1577791101,
> + 'mark' => $marks->[1],
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2019_12_31-11_19_21.tar.zst",
> + 'type' => 'qemu',
> + 'ctime' => 1577791161,
> + 'mark' => $marks->[2],
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2020_01_01-11_18_21.tar.zst",
> + 'type' => 'qemu',
> + 'ctime' => 1577877501,
> + 'mark' => $marks->[3],
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-qemu-$vmid-2020_01_01-12_18_21.tar.zst",
> + 'type' => 'qemu',
> + 'ctime' => 1577881101,
> + 'mark' => $marks->[4],
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-lxc-$vmid-2020_01_01-12_18_21.tar.zst",
> + 'type' => 'lxc',
> + 'ctime' => 1577881101,
> + 'mark' => $marks->[5],
> + 'vmid' => $vmid,
> + },
> + {
> + 'volid' => "$storeid:backup/vzdump-$vmid-renamed.tar.zst",
> + 'type' => 'unknown',
> + 'ctime' => 1234,
> + 'mark' => 'protected',
> + 'vmid' => $vmid,
> + },
> + );
> + }
> + my @sorted = sort { $a->{volid} cmp $b->{volid} } @expected;
> + return \@sorted;
> +}
> +
> +# an array of test cases, each test is comprised of the following keys:
> +# description => to identify a single test
> +# vmid => VMID or undef for all
> +# prune_opts => override options from storage configuration
> +# expected => what prune_backups should return
> +#
> +# most of them are created further below
> +my $tests = [
> + {
> + description => 'last=3, multiple VMs',
> + prune_opts => {
> + 'keep-last' => 3,
> + },
> + expected => generate_expected(\@vmids, ['remove', 'remove', 'keep', 'keep', 'keep', 'keep']),
> + },
> + {
> + description => 'weekly=2, one VM',
> + vmid => $vmids[0],
> + prune_opts => {
> + 'keep-weekly' => 2,
> + },
> + expected => generate_expected([$vmids[0]], ['keep', 'remove', 'remove', 'remove', 'keep', 'keep']),
> + },
> + {
> + description => 'daily=weekly=monthly=1, multiple VMs',
> + prune_opts => {
> + 'keep-daily' => 1,
> + 'keep-weekly' => 1,
> + 'keep-monthly' => 1,
> + },
> + expected => generate_expected(\@vmids, ['keep', 'remove', 'keep', 'remove', 'keep', 'keep']),
> + },
> + {
> + description => 'hourly=4, one VM',
> + vmid => $vmids[0],
> + prune_opts => {
> + 'keep-hourly' => 4,
> + },
> + expected => generate_expected([$vmids[0]], ['keep', 'remove', 'keep', 'keep', 'keep', 'keep']),
> + },
> + {
> + description => 'yearly=2, multiple VMs',
> + prune_opts => {
> + 'keep-yearly' => 2,
> + },
> + expected => generate_expected(\@vmids, ['remove', 'remove', 'keep', 'remove', 'keep', 'keep']),
> + },
> + {
> + description => 'last=2,hourly=2 one VM',
> + vmid => $vmids[0],
> + prune_opts => {
> + 'keep-last' => 2,
> + 'keep-hourly' => 2,
> + },
> + expected => generate_expected([$vmids[0]], ['keep', 'remove', 'keep', 'keep', 'keep', 'keep']),
> + },
> + {
> + description => 'last=1,monthly=2, multiple VMs',
> + prune_opts => {
> + 'keep-last' => 1,
> + 'keep-monthly' => 2,
> + },
> + expected => generate_expected(\@vmids, ['keep', 'remove', 'keep', 'remove', 'keep', 'keep']),
> + },
> + {
> + description => 'monthly=3, one VM',
> + vmid => $vmids[0],
> + prune_opts => {
> + 'keep-monthly' => 3,
> + },
> + expected => generate_expected([$vmids[0]], ['keep', 'remove', 'keep', 'remove', 'keep', 'keep']),
> + },
> + {
> + description => 'last=daily=weekly=1, multiple VMs',
> + prune_opts => {
> + 'keep-last' => 1,
> + 'keep-daily' => 1,
> + 'keep-weekly' => 1,
> + },
> + expected => generate_expected(\@vmids, ['keep', 'remove', 'keep', 'remove', 'keep', 'keep']),
> + },
> + {
> + description => 'daily=2, one VM',
> + vmid => $vmids[0],
> + prune_opts => {
> + 'keep-daily' => 2,
> + },
> + expected => generate_expected([$vmids[0]], ['remove', 'remove', 'keep', 'remove', 'keep', 'keep']),
> + },
> +];
> +
> +plan tests => scalar @$tests;
> +
> +for my $tt (@$tests) {
> +
> + my $got = eval {
> + my @list = PVE::Storage::Plugin->prune_backups($tt->{scfg}, $storeid, $tt->{prune_opts}, $tt->{vmid}, 1);
> + my @sorted = sort { $a->{volid} cmp $b->{volid} } @list;
> + return \@sorted;
> + };
> + $got = $@ if $@;
> +
> + is_deeply($got, $tt->{expected}, $tt->{description}) || diag(explain($got));
> +}
> +
> +done_testing();
> +
> +1;
> diff --git a/test/run_plugin_tests.pl b/test/run_plugin_tests.pl
> index 54322bb..d33429a 100755
> --- a/test/run_plugin_tests.pl
> +++ b/test/run_plugin_tests.pl
> @@ -16,6 +16,7 @@ my $res = $harness->runtests(
> "path_to_volume_id_test.pm",
> "get_subdir_test.pm",
> "filesystem_path_test.pm",
> + "prune_backups_test.pm",
> );
>
> exit -1 if !$res || $res->{failed} || $res->{parse_errors};
> --
> 2.20.1
>
>
> _______________________________________________
> pve-devel mailing list
> pve-devel at pve.proxmox.com
> https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
>
>
More information about the pve-devel
mailing list