[pve-devel] [PATCH v4 qemu-server 7/9] fix #5440: vzdump: better cleanup fleecing images after hard errors

Fiona Ebner f.ebner at proxmox.com
Mon Nov 11 14:54:34 CET 2024


By recording the allocated fleecing images in the VM config, they
are not immediately orphaned, should a hard error occur during
backup that prevents cleanup.

They are attempted to be cleaned up during the next backup run.

Suggested-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
---

Changes in v4:
* detach left-over fleecing images from a running VM before attempting
  cleanup
* more logging

 PVE/QemuConfig.pm        | 68 ++++++++++++++++++++++++++++++++++++++++
 PVE/VZDump/QemuServer.pm | 36 ++++++++++++++++-----
 2 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/PVE/QemuConfig.pm b/PVE/QemuConfig.pm
index 8e8a7828..3084f831 100644
--- a/PVE/QemuConfig.pm
+++ b/PVE/QemuConfig.pm
@@ -13,6 +13,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer;
 use PVE::QemuServer::Machine;
 use PVE::QemuServer::Memory qw(get_current_memory);
+use PVE::RESTEnvironment qw(log_warn);
 use PVE::Storage;
 use PVE::Tools;
 use PVE::Format qw(render_bytes render_duration);
@@ -573,4 +574,71 @@ sub has_cloudinit {
     return $found;
 }
 
+# Caller is expected to deal with volumes from an already existing 'fleecing-images' entry in the
+# configuration first.
+sub record_fleecing_images {
+    my ($vmid, $volids) = @_;
+
+    return if scalar($volids->@*) == 0;
+
+    PVE::QemuConfig->lock_config($vmid, sub {
+	my $conf = PVE::QemuConfig->load_config($vmid);
+	$conf->{'fleecing-images'} = join(',', $volids->@*);
+	PVE::QemuConfig->write_config($vmid, $conf);
+    });
+}
+
+sub cleanup_fleecing_images {
+    my ($vmid, $storecfg, $log_func) = @_;
+
+    if (!$log_func) {
+	$log_func = sub {
+	    my ($level, $line) = @_;
+	    chomp($line);
+	    if ($level eq 'info') {
+		print "$line\n";
+	    } else {
+		log_warn($line);
+	    }
+	};
+    }
+
+    my $volids = [];
+    my $failed = [];
+
+    # detach any left-overs from a running VM
+    if (PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+	my $block_info = mon_cmd($vmid, "query-block");
+	for my $info ($block_info->@*) {
+	    my $device_id = $info->{device};
+	    next if $device_id !~ m/-fleecing$/;
+
+	    $log_func->('info', "detaching (old) fleecing image for '$device_id'");
+	    $device_id =~ s/^drive-//; # re-added by qemu_drivedel()
+	    eval { PVE::QemuServer::qemu_drivedel($vmid, $device_id) };
+	    $log_func->('warn', "error detaching (old) fleecing image '$device_id' - $@") if $@;
+	}
+    }
+
+    PVE::QemuConfig->lock_config($vmid, sub {
+	my $conf = PVE::QemuConfig->load_config($vmid);
+	if ($conf->{'fleecing-images'}) {
+	    $volids = [PVE::Tools::split_list($conf->{'fleecing-images'})];
+	    delete $conf->{'fleecing-images'};
+	    PVE::QemuConfig->write_config($vmid, $conf);
+	}
+    });
+
+    for my $volid ($volids->@*) {
+	$log_func->('info', "removing (old) fleecing image '$volid'");
+	eval { PVE::Storage::vdisk_free($storecfg, $volid); };
+	if (my $err = $@) {
+	    $log_func->('warn', "error removing fleecing image '$volid' - $err");
+	    push $failed->@*, $volid;
+	}
+    }
+
+    record_fleecing_images($vmid, $failed);
+}
+
 1;
diff --git a/PVE/VZDump/QemuServer.pm b/PVE/VZDump/QemuServer.pm
index 17f63568..240e1e95 100644
--- a/PVE/VZDump/QemuServer.pm
+++ b/PVE/VZDump/QemuServer.pm
@@ -530,15 +530,25 @@ sub get_and_check_pbs_encryption_config {
     die "internal error - unhandled case for getting & checking PBS encryption ($keyfile, $master_keyfile)!";
 }
 
+# Helper is intended to be called from allocate_fleecing_images() only. Otherwise, fleecing volids
+# have already been recorded in the configuration and PVE::QemuConfig::cleanup_fleecing_images()
+# should be used instead.
 my sub cleanup_fleecing_images {
-    my ($self, $disks) = @_;
+    my ($self, $vmid, $disks) = @_;
+
+    my $failed = [];
 
     for my $di ($disks->@*) {
 	if (my $volid = $di->{'fleece-volid'}) {
 	    eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); };
-	    $self->log('warn', "error removing fleecing image '$volid' - $@") if $@;
+	    if (my $err = $@) {
+		$self->log('warn', "error removing fleecing image '$volid' - $err");
+		push $failed->@*, $volid;
+	    }
 	}
     }
+
+    PVE::QemuConfig::record_fleecing_images($vmid, $failed);
 }
 
 my sub allocate_fleecing_images {
@@ -546,8 +556,7 @@ my sub allocate_fleecing_images {
 
     die "internal error - no fleecing storage specified\n" if !$fleecing_storeid;
 
-    # TODO what about potential left-over images from a failed attempt? Just
-    # auto-remove? While unlikely, could conflict with manually created image from user...
+    my $fleece_volids = [];
 
     eval {
 	my $n = 0; # counter for fleecing image names
@@ -564,6 +573,8 @@ my sub allocate_fleecing_images {
 		$di->{'fleece-volid'} = PVE::Storage::vdisk_alloc(
 		    $self->{storecfg}, $fleecing_storeid, $vmid, $format, $name, $size);
 
+		push $fleece_volids->@*, $di->{'fleece-volid'};
+
 		$n++;
 	    } else {
 		die "implement me (type '$di->{type}')";
@@ -571,9 +582,11 @@ my sub allocate_fleecing_images {
 	}
     };
     if (my $err = $@) {
-	cleanup_fleecing_images($self, $disks);
+	cleanup_fleecing_images($self, $vmid, $disks);
 	die $err;
     }
+
+    PVE::QemuConfig::record_fleecing_images($vmid, $fleece_volids);
 }
 
 my sub detach_fleecing_images {
@@ -633,6 +646,13 @@ my sub check_and_prepare_fleecing {
 	$use_fleecing = 0;
     }
 
+    # clean up potential left-overs from a previous attempt
+    eval {
+	PVE::QemuConfig::cleanup_fleecing_images(
+	    $vmid, $self->{storecfg}, sub { $self->log($_[0], $_[1]); });
+    };
+    $self->log('warn', "attempt to clean up left-over fleecing images failed - $@") if $@;
+
     if ($use_fleecing) {
 	my ($default_format, $valid_formats) = PVE::Storage::storage_default_format(
 	    $self->{storecfg}, $fleecing_opts->{storage});
@@ -794,7 +814,8 @@ sub archive_pbs {
 
     if ($use_fleecing) {
 	detach_fleecing_images($task->{disks}, $vmid);
-	cleanup_fleecing_images($self, $task->{disks});
+	PVE::QemuConfig::cleanup_fleecing_images(
+	    $vmid, $self->{storecfg}, sub { $self->log($_[0], $_[1]); });
     }
 
     die $err if $err;
@@ -994,7 +1015,8 @@ sub archive_vma {
 
     if ($use_fleecing) {
 	detach_fleecing_images($task->{disks}, $vmid);
-	cleanup_fleecing_images($self, $task->{disks});
+	PVE::QemuConfig::cleanup_fleecing_images(
+	    $vmid, $self->{storecfg}, sub { $self->log($_[0], $_[1]); });
     }
 
     if ($err) {
-- 
2.39.5





More information about the pve-devel mailing list