[pve-devel] [PATCH v2 qemu-server 1/4] add qemu_drive_mirror_monitor completion modes

Mira Limbeck m.limbeck at proxmox.com
Tue Mar 17 20:56:09 CET 2020


With Qemu 4.2 we encountered a problem with unix sockets and SSH socket
forwarding for drive-mirror. It seems the socket gets reopened again and
again after it closes for some reason. This can be worked around by
specifying 'block-job-cancel' instead of 'block-job-complete' when we're
not interested in swapping the disks again from NBD to their original
protocol. This is always the case when we use drive-mirror for live
migrating a VM.

qemu_drive_mirror is used for migration and for clone_disk. All in all
we have 3 cases to handle. Either the 'skip' case which skips the
completion of the job. The 'wait' case which was the default before and
still is when $completion is undefined. And the new 'wait_noswap' case
which is used for the live migration.
If 'wait_noswap' is specified, we issue a 'block-job-cancel' once the block
job is in 'ready' state. This completes the block job without swapping the
disks.

clone_disk always uses 'block-job-cancel' via the qemu_blockjobs_cancel
sub.

Signed-off-by: Mira Limbeck <m.limbeck at proxmox.com>
---
v2:
 - introduced this patch

 PVE/API2/Qemu.pm   |  3 ++-
 PVE/QemuMigrate.pm |  4 ++--
 PVE/QemuServer.pm  | 20 +++++++++++++++-----
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index d8d3f3e..21a0eae 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -2938,6 +2938,7 @@ __PACKAGE__->register_method({
 		    foreach my $opt (keys %$drives) {
 			my $drive = $drives->{$opt};
 			my $skipcomplete = ($total_jobs != $i); # finish after last drive
+			my $completion = $skipcomplete ? 'skip' : 'wait';
 
 			my $src_sid = PVE::Storage::parse_volume_id($drive->{file});
 			my $storage_list = [ $src_sid ];
@@ -2946,7 +2947,7 @@ __PACKAGE__->register_method({
 
 			my $newdrive = PVE::QemuServer::clone_disk($storecfg, $vmid, $running, $opt, $drive, $snapname,
 								   $newid, $storage, $format, $fullclone->{$opt}, $newvollist,
-								   $jobs, $skipcomplete, $oldconf->{agent}, $clonelimit);
+								   $jobs, $completion, $oldconf->{agent}, $clonelimit);
 
 			$newconf->{$opt} = PVE::QemuServer::print_drive($newdrive);
 
diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index 44e4c57..10c0ff2 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -703,7 +703,7 @@ sub phase2 {
 	    my $bwlimit = PVE::Storage::get_bandwidth_limit('migration', [$source_sid, $target_sid], $opt_bwlimit);
 
 	    $self->log('info', "$drive: start migration to $nbd_uri");
-	    PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 1, undef, $bwlimit);
+	    PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 'skip', undef, $bwlimit);
 	}
     }
 
@@ -968,7 +968,7 @@ sub phase3_cleanup {
 
     if ($self->{storage_migration}) {
 	# finish block-job
-	eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $self->{storage_migration_jobs}); };
+	eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $self->{storage_migration_jobs}, 'wait_noswap'); };
 
 	if (my $err = $@) {
 	    eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $self->{storage_migration_jobs}) };
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index b2ff515..429ec05 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -6521,7 +6521,7 @@ sub qemu_img_format {
 }
 
 sub qemu_drive_mirror {
-    my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $skipcomplete, $qga, $bwlimit) = @_;
+    my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit) = @_;
 
     $jobs = {} if !$jobs;
 
@@ -6563,11 +6563,13 @@ sub qemu_drive_mirror {
 	die "mirroring error: $err\n";
     }
 
-    qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $skipcomplete, $qga);
+    qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
 }
 
 sub qemu_drive_mirror_monitor {
-    my ($vmid, $vmiddst, $jobs, $skipcomplete, $qga) = @_;
+    my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
+
+    $completion //= 'wait'; # same semantic as with 'skipcomplete' before
 
     eval {
 	my $err_complete = 0;
@@ -6612,7 +6614,7 @@ sub qemu_drive_mirror_monitor {
 
 	    if ($readycounter == scalar(keys %$jobs)) {
 		print "all mirroring jobs are ready \n";
-		last if $skipcomplete; #do the complete later
+		last if $completion eq 'skip'; #do the complete later
 
 		if ($vmiddst && $vmiddst != $vmid) {
 		    my $agent_running = $qga && qga_check_running($vmid);
@@ -6642,7 +6644,15 @@ sub qemu_drive_mirror_monitor {
 			# try to switch the disk if source and destination are on the same guest
 			print "$job: Completing block job...\n";
 
-			eval { mon_cmd($vmid, "block-job-complete", device => $job) };
+			my $op;
+			if ($completion eq 'wait') {
+			    $op = 'block-job-complete';
+			} elsif ($completion eq 'wait_noswap') {
+			    $op = 'block-job-cancel';
+			} else {
+			    die "invalid completion value: $completion\n";
+			}
+			eval { mon_cmd($vmid, $op, device => $job) };
 			if ($@ =~ m/cannot be completed/) {
 			    print "$job: Block job cannot be completed, try again.\n";
 			    $err_complete++;
-- 
2.20.1





More information about the pve-devel mailing list