[pve-devel] [RFC qemu-server 3/3] migrate: add live-migration of replicated disks

Fabian Grünbichler f.gruenbichler at proxmox.com
Thu Jul 18 14:43:48 CEST 2019


with incremental drive-mirror and dirty-bitmap tracking.

1.) get replicated disks that are currently referenced by running VM
2.) add a block-dirty-bitmap to each of them
3.) replicate ALL replicated disks
4.) pass bitmaps from 2) to drive-mirror for disks from 1)
5.) skip replicated disks when cleaning up volumes on either source or
target

added error handling is just removing the bitmaps if an error occurs at
any point after 2, except when the handover to the target node has
already happened, since the bitmaps are cleaned up together with the
source VM in that case.

Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
---

Notes:
    tested with single and multiple replicated disks, with and without (other)
    - local, replicated, unreferenced disks
    - local, non-replicated referenced disks
    - local, non-replicated unreferenced disks
    - shared disks
    
    and with switching targetstorage (obviously the replicated disks don't switch
    storage in that case, but the others do as before ;))
    
    one thing to note is that since we add the bitmaps before making the
    replication (snapshots), they will contain some writes that have already been
    part of the replication triggered in 3. AFAICT this should not be a
    problem, but we could also play it safer and do a
    
    - freeze
    - add bitmaps
    - start replication (which will unfreeze after taking the snapshots)
    
    sequence?
    
    the bitmap info is stored in the same hash like the other live local disk
    migration stuff, even though it comes from a different source and at a
    different time. I initially had it in a separate hash in $self->{bitmaps}, both
    variants are equally unelegant IMHO :P

 PVE/QemuMigrate.pm | 53 ++++++++++++++++++++++++++++++++++++++++++++--
 PVE/QemuServer.pm  |  7 ++++++
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index 52f065e..f12446a 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -446,9 +446,32 @@ sub sync_disks {
 	my $rep_cfg = PVE::ReplicationConfig->new();
 
 	if (my $jobcfg = $rep_cfg->find_local_replication_job($vmid, $self->{node})) {
-	    die "can't live migrate VM with replicated volumes\n" if $self->{running};
 	    my $start_time = time();
 	    my $logfunc = sub { my ($msg) = @_;  $self->log('info', $msg); };
+
+	    if ($self->{running}) {
+		my $live_replicatable_volumes = {};
+		PVE::QemuServer::foreach_drive($conf, sub {
+		    my ($ds, $drive) = @_;
+
+		    my $volid = $drive->{file};
+		    $live_replicatable_volumes->{$ds} = $volid
+			if defined($replicatable_volumes->{$volid});
+		});
+		foreach my $drive (keys %$live_replicatable_volumes) {
+		    my $volid = $live_replicatable_volumes->{$drive};
+
+		    my $bitmap = "repl_$drive";
+
+		    # start tracking before replication to get full delta + a few duplicates
+		    $self->log('info', "$drive: start tracking writes using block-dirty-bitmap '$bitmap'");
+		    PVE::QemuServer::vm_mon_cmd($vmid, 'block-dirty-bitmap-add', node => "drive-$drive", name => $bitmap);
+
+		    # other info comes from target node in phase 2
+		    $self->{target_drive}->{$drive}->{bitmap} = $bitmap;
+		}
+	    }
+
 	    $rep_volumes = PVE::Replication::run_replication(
 	       'PVE::QemuConfig', $jobcfg, $start_time, $start_time, $logfunc);
 	    $self->{replicated_volumes} = $rep_volumes;
@@ -487,6 +510,8 @@ sub cleanup_remotedisks {
     my ($self) = @_;
 
     foreach my $target_drive (keys %{$self->{target_drive}}) {
+	# don't clean up replicated disks!
+	next if defined($self->{target_drive}->{$target_drive}->{bitmap});
 
 	my $drive = PVE::QemuServer::parse_drive($target_drive, $self->{target_drive}->{$target_drive}->{volid});
 	my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
@@ -501,6 +526,16 @@ sub cleanup_remotedisks {
     }
 }
 
+sub cleanup_bitmaps {
+    my ($self) = @_;
+    foreach my $drive (%{$self->{target_drive}}) {
+	my $bitmap = $self->{target_drive}->{$drive}->{bitmap};
+	next if !$bitmap;
+	$self->log('info', "$drive: removing block-dirty-bitmap '$bitmap'");
+	PVE::QemuServer::vm_mon_cmd($self->{vmid}, 'block-dirty-bitmap-remove', node => "drive-$drive", name => $bitmap);
+    }
+}
+
 sub phase1 {
     my ($self, $vmid) = @_;
 
@@ -534,6 +569,12 @@ sub phase1_cleanup {
 	    # fixme: try to remove ?
 	}
     }
+
+    eval { $self->cleanup_bitmaps() };
+    if (my $err =$@) {
+	$self->log('err', $err);
+    }
+
 }
 
 sub phase2 {
@@ -679,9 +720,10 @@ sub phase2 {
 	    my $source_sid = PVE::Storage::Plugin::parse_volume_id($conf->{$drive});
 	    my $target_sid = PVE::Storage::Plugin::parse_volume_id($target->{volid});
 	    my $bwlimit = PVE::Storage::get_bandwidth_limit('migrate', [$source_sid, $target_sid], $opt_bwlimit);
+	    my $bitmap = $target->{bitmap};
 
 	    $self->log('info', "$drive: start migration to $nbd_uri");
-	    PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 1, undef, $bwlimit);
+	    PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 1, undef, $bwlimit, $bitmap);
 	}
     }
 
@@ -898,6 +940,10 @@ sub phase2_cleanup {
 	if (my $err = $@) {
 	    $self->log('err', $err);
 	}
+	eval { $self->cleanup_bitmaps() };
+	if (my $err =$@) {
+	    $self->log('err', $err);
+	}
     }
 
     my $nodename = PVE::INotify::nodename();
@@ -1057,6 +1103,9 @@ sub phase3_cleanup {
 	my $volids = $self->{online_local_volumes};
 
 	foreach my $volid (@$volids) {
+	    # keep replicated volumes!
+	    next if $self->{replicated_volumes}->{$volid};
+
 	    eval { PVE::Storage::vdisk_free($self->{storecfg}, $volid); };
 	    if (my $err = $@) {
 		$self->log('err', "removing local copy of '$volid' failed - $err");
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 72c5d2d..fcbe54a 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -5267,11 +5267,18 @@ sub vm_start {
 		$local_volumes->{$ds} = [$volid, $storeid, $volname];
 	    });
 
+	    my $replicatable_volumes = PVE::QemuConfig->get_replicatable_volumes($storecfg, $vmid, $conf);
+
 	    my $format = undef;
 
 	    foreach my $opt (sort keys %$local_volumes) {
 
 		my ($volid, $storeid, $volname) = @{$local_volumes->{$opt}};
+		if ($replicatable_volumes->{$volid}) {
+		    # re-use existing, replicated volume with bitmap on source side
+		    $local_volumes->{$opt} = $conf->{${opt}};
+		    next;
+		}
 		my $drive = parse_drive($opt, $conf->{$opt});
 
 		#if remote storage is specified, use default format
-- 
2.20.1





More information about the pve-devel mailing list