[pve-devel] [PATCH 2/6] qemu_drive_mirror : handle multiple jobs

Wolfgang Bumiller w.bumiller at proxmox.com
Thu Nov 10 13:21:00 CET 2016


On Tue, Nov 08, 2016 at 04:29:30AM +0100, Alexandre Derumier wrote:
> we can use multiple drive_mirror in parralel.
> 
> block-job-complete can be skipped, if we want to add more mirror job later.
> 
> also add support for nbd uri to qemu_drive_mirror
> 
> Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
> ---
>  PVE/QemuServer.pm | 171 +++++++++++++++++++++++++++++++++++++++---------------
>  1 file changed, 123 insertions(+), 48 deletions(-)
> 
> diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
> index 54edd96..e989670 100644
> --- a/PVE/QemuServer.pm
> +++ b/PVE/QemuServer.pm
> @@ -5824,91 +5824,165 @@ sub qemu_img_format {
>  }
>  
>  sub qemu_drive_mirror {
> -    my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized) = @_;
> +    my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $skipcomplete) = @_;
>  
> -    my $storecfg = PVE::Storage::config();
> -    my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
> +    $jobs = {} if !$jobs;
> +
> +    my $qemu_target;
> +    my $format;
>  
> -    my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
> +    if($dst_volid =~ /^nbd:(localhost|[\d\.]+|\[[\d\.:a-fA-F]+\]):(\d+)/) {
> +	$qemu_target = $dst_volid;
> +	my $server = $1;
> +	my $port = $2;
> +	$format = "nbd";
> +	die "can't connect remote nbd server $server:$port" if !PVE::Network::tcp_ping($server, $port, 2);

I'm not all too happy about this check here as it is a TOCTOU race.
(I'm not happy about some the other uses of it as well, but some are
only for status quieries, iow. to display information, where it's fine.)

However, in this case, if broken/missing connections can still not be
caught (like in my previous tests), then this only hides 99.99% of the
cases while still wrongly deleting disks in the other 0.01%, which is
unacceptable behavior.

> +    } else {
> +
> +	my $storecfg = PVE::Storage::config();
> +	my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
> +
> +	my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
>  
> -    my $format = qemu_img_format($dst_scfg, $dst_volname);
> +	$format = qemu_img_format($dst_scfg, $dst_volname);
>  
> -    my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
> +	my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
>  
> -    my $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
> +	$qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
> +    }
>  
>      my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
>      $opts->{format} = $format if $format;
>  
> -    print "drive mirror is starting (scanning bitmap) : this step can take some minutes/hours, depend of disk size and storage speed\n";
> +    print "drive mirror is starting for drive-$drive\n";
>  
> -    my $finish_job = sub {
> -	while (1) {
> -	    my $stats = vm_mon_cmd($vmid, "query-block-jobs");
> -	    my $stat = @$stats[0];
> -	    last if !$stat;
> -	    sleep 1;
> -	}
> -    };
> +    eval { vm_mon_cmd($vmid, "drive-mirror", %$opts); }; #if a job already run for this device,it's throw an error
> +    if (my $err = $@) {
> +	eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };

$jobs is still empty at this point. The assignment below should be moved
up.

> +	die "mirroring error: $err";
> +    }
> +
> +    $jobs->{"drive-$drive"} = {};
This one ^

> +
> +    qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $skipcomplete);
> +}
> +
> +sub qemu_drive_mirror_monitor {
> +    my ($vmid, $vmiddst, $jobs, $skipcomplete) = @_;
>  
>      eval {
> -    vm_mon_cmd($vmid, "drive-mirror", %$opts);
> +
> +	my $err_complete = 0;
> +
>  	while (1) {
> +	    die "storage migration timed out\n" if $err_complete > 300;
> +
>  	    my $stats = vm_mon_cmd($vmid, "query-block-jobs");
> -	    my $stat = @$stats[0];
> -	    die "mirroring job seem to have die. Maybe do you have bad sectors?" if !$stat;
> -	    die "error job is not mirroring" if $stat->{type} ne "mirror";
>  
> -	    my $busy = $stat->{busy};
> -	    my $ready = $stat->{ready};
> +	    my $running_mirror_jobs = {};
> +	    foreach my $stat (@$stats) {
> +		next if $stat->{type} ne 'mirror';
> +		$running_mirror_jobs->{$stat->{device}} = $stat;
> +	    }
>  
> -	    if (my $total = $stat->{len}) {
> -		my $transferred = $stat->{offset} || 0;
> -		my $remaining = $total - $transferred;
> -		my $percent = sprintf "%.2f", ($transferred * 100 / $total);
> +	    my $readycounter = 0;
>  
> -		print "transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n";
> -	    }
> +	    foreach my $job (keys %$jobs) {
> +
> +	        if(defined($jobs->{$job}->{complete}) && !defined($running_mirror_jobs->{$job})) {
> +		    print "$job : finished\n";
> +		    delete $jobs->{$job};
> +		    next;
> +		}
> +
> +		die "$job: mirroring has been cancelled. Maybe do you have bad sectors?" if !defined($running_mirror_jobs->{$job});
>  
> +		my $busy = $running_mirror_jobs->{$job}->{busy};
> +		my $ready = $running_mirror_jobs->{$job}->{ready};
> +		if (my $total = $running_mirror_jobs->{$job}->{len}) {
> +		    my $transferred = $running_mirror_jobs->{$job}->{offset} || 0;
> +		    my $remaining = $total - $transferred;
> +		    my $percent = sprintf "%.2f", ($transferred * 100 / $total);
>  
> -	    if ($stat->{ready} eq 'true') {
> +		    print "$job: transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n";
> +		}
>  
> -		last if $vmiddst != $vmid;
> +		$readycounter++ if $running_mirror_jobs->{$job}->{ready} eq 'true';
> +	    }
>  
> -		# try to switch the disk if source and destination are on the same guest
> -		eval { vm_mon_cmd($vmid, "block-job-complete", device => "drive-$drive") };
> -		if (!$@) {
> -		    &$finish_job();
> +	    last if scalar(keys %$jobs) == 0;
> +
> +	    if ($readycounter == scalar(keys %$jobs)) {
> +		print "all mirroring jobs are ready \n";
> +		last if $skipcomplete; #do the complete later
> +
> +		if ($vmiddst && $vmiddst != $vmid) {
> +		    # if we clone a disk for a new target vm, we don't switch the disk
> +		    PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
>  		    last;
> +		} else {
> +
> +		    foreach my $job (keys %$jobs) {
> +			# try to switch the disk if source and destination are on the same guest
> +			print "$job : Try to complete block job\n";
> +
> +			eval { vm_mon_cmd($vmid, "block-job-complete", device => $job) };
> +			if ($@ =~ m/cannot be completed/) {
> +			    print "$job : block job cannot be complete. Try again \n";
> +			    $err_complete++;
> +			}else {
> +			    print "$job : complete ok : flushing pending writes\n";
> +			    $jobs->{$job}->{complete} = 1;
> +			}
> +		    }
>  		}
> -		die $@ if $@ !~ m/cannot be completed/;
>  	    }
>  	    sleep 1;
>  	}
> -
> -
>      };
>      my $err = $@;
>  
> -    my $cancel_job = sub {
> -	vm_mon_cmd($vmid, "block-job-cancel", device => "drive-$drive");
> -	&$finish_job();
> -    };
> -
>      if ($err) {
> -	eval { &$cancel_job(); };
> +	eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
>  	die "mirroring error: $err";
>      }
>  
> -    if ($vmiddst != $vmid) {
> -	# if we clone a disk for a new target vm, we don't switch the disk
> -	&$cancel_job(); # so we call block-job-cancel
> +}
> +
> +sub qemu_blockjobs_cancel {
> +    my ($vmid, $jobs) = @_;
> +
> +    foreach my $job (keys %$jobs) {
> +	print "$job: try to cancel block job\n";
> +	eval { vm_mon_cmd($vmid, "block-job-cancel", device => $job); };
> +	$jobs->{$job}->{cancel} = 1;
> +    }
> +
> +    while (1) {
> +	my $stats = vm_mon_cmd($vmid, "query-block-jobs");
> +
> +	my $running_jobs = {};
> +	foreach my $stat (@$stats) {
> +	    $running_jobs->{$stat->{device}} = $stat;
> +	}
> +
> +	foreach my $job (keys %$jobs) {
> +
> +	    if(defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
> +		print "$job : finished\n";
> +		delete $jobs->{$job};
> +	    }
> +	}
> +
> +	last if scalar(keys %$jobs) == 0;
> +
> +	sleep 1;
>      }
>  }
>  
>  sub clone_disk {
>      my ($storecfg, $vmid, $running, $drivename, $drive, $snapname,
> -	$newvmid, $storage, $format, $full, $newvollist) = @_;
> +	$newvmid, $storage, $format, $full, $newvollist, $jobs, $skipcomplete) = @_;
>  
>      my $newvolid;
>  
> @@ -5917,6 +5991,7 @@ sub clone_disk {
>  	$newvolid = PVE::Storage::vdisk_clone($storecfg,  $drive->{file}, $newvmid, $snapname);
>  	push @$newvollist, $newvolid;
>      } else {
> +
>  	my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
>  	$storeid = $storage if $storage;
>  
> @@ -5949,7 +6024,7 @@ sub clone_disk {
>  		    if $drive->{iothread};
>  	    }
>  
> -	    qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit);
> +	    qemu_drive_mirror($vmid, $drivename, $newvolid, $newvmid, $sparseinit, $jobs, $skipcomplete);
>  	}
>      }
>  
> -- 
> 2.1.4




More information about the pve-devel mailing list