[pve-devel] [PATCH 1/7] split qemu_drive_mirror_monitor from qemu_drive_mirror.

Wolfgang Bumiller w.bumiller at proxmox.com
Thu Oct 20 09:43:44 CEST 2016


On Thu, Oct 20, 2016 at 02:35:10AM +0200, Alexandre Derumier wrote:
> we can use multiple drive_mirror in parralel
> 
> Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
> ---
>  PVE/QemuServer.pm | 98 +++++++++++++++++++++++++++++++++----------------------
>  1 file changed, 59 insertions(+), 39 deletions(-)
> 
> diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
> index 728110f..a5aa4c7 100644
> --- a/PVE/QemuServer.pm
> +++ b/PVE/QemuServer.pm
> @@ -5817,70 +5817,90 @@ sub qemu_drive_mirror {
>      my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
>      $opts->{format} = $format if $format;
>  
> -    print "drive mirror is starting (scanning bitmap) : this step can take some minutes/hours, depend of disk size and storage speed\n";
> +    print "drive mirror is starting for drive-$drive\n";
>  
> -    my $finish_job = sub {
> -	while (1) {
> -	    my $stats = vm_mon_cmd($vmid, "query-block-jobs");
> -	    my $stat = @$stats[0];
> -	    last if !$stat;
> -	    sleep 1;
> +    vm_mon_cmd($vmid, "drive-mirror", %$opts);

This is now outside an eval block which otherwise had a cancel_job()
error case. (Note that it looks like the error cases of vm_mon_cmd()
*should* not need a cancel_job() call, but it's better to be safe than
sorry.

> +
> +}
> +
> +sub qemu_drive_mirror_monitor {
> +    my ($vmid, $vmiddst, $skipcomplete) = @_;
> +
> +    my $err_complete = 0;
> +    my $last_nb_running_jobs = 0;
> +    my @drives = ();
> +
> +    my $cancel_job = sub {
> +	my $drives = @_;
> +	foreach my $drive (@drives) {
> +	    vm_mon_cmd($vmid, "block-job-cancel", device => $drive);
>  	}
>      };
>  
>      eval {
> -    vm_mon_cmd($vmid, "drive-mirror", %$opts);
>  	while (1) {
>  	    my $stats = vm_mon_cmd($vmid, "query-block-jobs");
> -	    my $stat = @$stats[0];
> -	    die "mirroring job seem to have die. Maybe do you have bad sectors?" if !$stat;
> -	    die "error job is not mirroring" if $stat->{type} ne "mirror";
>  
> -	    my $busy = $stat->{busy};
> -	    my $ready = $stat->{ready};
> +	    die "too much complete error, migration can't finish" if $err_complete > 300;

I don't know what this error message is trying to tell me, but from the
code below it looks like a 5 minute timeout. Maybe mention that in the
message ;-)
(Maybe call it $job_complete_retries or something.)

> +	    die "Some mirroring jobs seem to be aborded. Maybe do you have bad sectors?" if @$stats < $last_nb_running_jobs;
>  
> -	    if (my $total = $stat->{len}) {
> -		my $transferred = $stat->{offset} || 0;
> -		my $remaining = $total - $transferred;
> -		my $percent = sprintf "%.2f", ($transferred * 100 / $total);
> +	    last if @$stats == 0 && $last_nb_running_jobs == 0;  #no more block-job running
> +	    my $readycounter = 0;
> +	    @drives = ();
> +	    $last_nb_running_jobs = @$stats;
>  
> -		print "transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n";
> -	    }
> +	    foreach my $stat (@$stats) {
> +		die "error job is not mirroring" if $stat->{type} ne "mirror";
>  
> +		my $busy = $stat->{busy};
> +		my $ready = $stat->{ready};
> +		push @drives, $stat->{device};
> +		if (my $total = $stat->{len}) {
> +		    my $transferred = $stat->{offset} || 0;
> +		    my $remaining = $total - $transferred;
> +		    my $percent = sprintf "%.2f", ($transferred * 100 / $total);
>  
> -	    if ($stat->{ready} eq 'true') {
> +		    print "$stat->{device} transferred: $transferred bytes remaining: $remaining bytes total: $total bytes progression: $percent % busy: $busy ready: $ready \n";
> +		}
> +
> +		$readycounter++ if $stat->{ready} eq 'true';
> +	    }
>  
> -		last if $vmiddst != $vmid;
> +	    if ($readycounter == @$stats) {
> +		print "all drives are ready \n";
> +		last if $skipcomplete; #do the complete later
>  
> -		# try to switch the disk if source and destination are on the same guest
> -		eval { vm_mon_cmd($vmid, "block-job-complete", device => "drive-$drive") };
> -		if (!$@) {
> -		    &$finish_job();
> -		    last;
> +		if ($vmiddst && $vmiddst != $vmid) {
> +		    # if we clone a disk for a new target vm, we don't switch the disk
> +		    eval { &$cancel_job(@drives); };
> +		    $last_nb_running_jobs = 0;
> +		} else {
> +		    foreach my $drive (@drives) {
> +			# try to switch the disk if source and destination are on the same guest
> +			print "Try to complete block job for drive $drive \n";
> +
> +			eval { vm_mon_cmd($vmid, "block-job-complete", device => $drive) };
> +			if ($@ =~ m/cannot be completed/) {
> +			    print "block job cannot be complete for drive $drive. Try again \n";
> +			    $err_complete++;
> +			}else {
> +			    print "complete ok for drive $drive \n";
> +			    $last_nb_running_jobs--;
> +			}
> +		    }
>  		}
> -		die $@ if $@ !~ m/cannot be completed/;
>  	    }
> +	    last if $last_nb_running_jobs == 0;
>  	    sleep 1;
>  	}
> -
> -
>      };
>      my $err = $@;
>  
> -    my $cancel_job = sub {
> -	vm_mon_cmd($vmid, "block-job-cancel", device => "drive-$drive");
> -	&$finish_job();
> -    };
> -
>      if ($err) {
> -	eval { &$cancel_job(); };
> +	eval { &$cancel_job(@drives); };
>  	die "mirroring error: $err";
>      }
>  
> -    if ($vmiddst != $vmid) {
> -	# if we clone a disk for a new target vm, we don't switch the disk
> -	&$cancel_job(); # so we call block-job-cancel
> -    }
>  }
>  
>  sub clone_disk {
> -- 
> 2.1.4
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel at pve.proxmox.com
> http://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel




More information about the pve-devel mailing list