[pve-devel] [PATCH 6/6] add live storage migration with vm migration

Alexandre Derumier aderumier at odiso.com
Wed Oct 26 10:24:01 CEST 2016


This allow to migrate disks on local storage  to a remote node storage.

When the target node start, a new volumes are created and exposed through qemu embedded nbd server.

qemu drive-mirror is launch on source vm for each disk with nbd server as target.

when drive-mirror reach 100% of 1 disk, we don't complete the block jobs and begin mirror of next disk.
(mirroring are parralel, but we try to mirroring them 1 by 1 to avoid storage && network overload)

Then we live migrate the vm to destination node. (drive-mirror still occur at the same time).

We the vm is livemigrate (source vm paused, target vm pause), we complete the block jobs mirror.

When is done we stop the source vm and resume the target vm

Signed-off-by: Alexandre Derumier <aderumier at odiso.com>
---
 PVE/API2/Qemu.pm   |   7 ++++
 PVE/QemuMigrate.pm | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index da533df..0f40b76 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -2670,6 +2670,10 @@ __PACKAGE__->register_method({
 		description => "Allow to migrate VMs which use local devices. Only root may use this option.",
 		optional => 1,
 	    },
+	    targetstorage => get_standard_option('pve-storage-id', {
+                description => "Target storage.",
+                optional => 1,
+	    }),
 	},
     },
     returns => {
@@ -2696,6 +2700,9 @@ __PACKAGE__->register_method({
 
 	my $vmid = extract_param($param, 'vmid');
 
+	raise_param_exc({ targetstorage => "Live Storage migration can only be done online" })
+	    if !$param->{online} && $param->{targetstorage};
+
 	raise_param_exc({ force => "Only root may use this option." })
 	    if $param->{force} && $authuser ne 'root at pam';
 
diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index 22a49ef..c1eb364 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -186,8 +186,10 @@ sub prepare {
 	my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
 
 	# check if storage is available on both nodes
+	my $targetsid = $self->{opts}->{targetstorage} ? $self->{opts}->{targetstorage} : $sid;
+
 	my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid);
-	PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node});
+	PVE::Storage::storage_check_node($self->{storecfg}, $targetsid, $self->{node});
 
 	if ($scfg->{shared}) {
 	    # PVE::Storage::activate_storage checks this for non-shared storages
@@ -214,8 +216,6 @@ sub prepare {
 sub sync_disks {
     my ($self, $vmid) = @_;
 
-    $self->log('info', "copying disk images");
-
     my $conf = $self->{vmconf};
 
     # local volumes which have been copied
@@ -290,6 +290,7 @@ sub sync_disks {
 
 	    my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
 
+	    my $targetsid = $self->{opts}->{targetstorage} ? $self->{opts}->{targetstorage} : $sid; 
 	    # check if storage is available on both nodes
 	    my $scfg = PVE::Storage::storage_check_node($self->{storecfg}, $sid);
 	    PVE::Storage::storage_check_node($self->{storecfg}, $sid, $self->{node});
@@ -362,14 +363,17 @@ sub sync_disks {
 	    $self->log('warn', "$err");
 	}
 
-	if ($self->{running} && !$sharedvm) {
-	    die "can't do online migration - VM uses local disks\n";
+	if ($self->{running} && !$sharedvm && !$self->{opts}->{targetstorage}) {
+	    $self->{opts}->{targetstorage} = 1; #use same sid for remote local
 	}
 
 	if ($abort) {
 	    die "can't migrate VM - check log\n";
 	}
 
+	$self->{local_volumes} = $local_volumes;
+	return if $self->{running} && $self->{opts}->{targetstorage};
+
 	# additional checks for local storage
 	foreach my $volid (keys %$local_volumes) {
 	    my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
@@ -387,6 +391,8 @@ sub sync_disks {
 	    }
 	}
 
+	$self->log('info', "copying disk images");
+
 	foreach my $volid (keys %$local_volumes) {
 	    my ($sid, $volname) = PVE::Storage::parse_volume_id($volid);
 	    push @{$self->{volumes}}, $volid;
@@ -396,6 +402,24 @@ sub sync_disks {
     die "Failed to sync data - $@" if $@;
 }
 
+sub cleanup_remotedisks {
+    my ($self) = @_;
+
+    foreach my $target_drive (keys %{$self->{target_drive}}) {
+
+	my $drive = PVE::QemuServer::parse_drive($target_drive, $self->{target_drive}->{$target_drive}->{volid});
+	my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
+
+	my $cmd = [@{$self->{rem_ssh}}, 'pvesm', 'free', "$storeid:$volname"];
+
+	eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) };
+	if (my $err = $@) {
+	    $self->log('err', $err);
+	    $self->{errors} = 1;
+	}
+    }
+}
+
 sub phase1 {
     my ($self, $vmid) = @_;
 
@@ -452,7 +476,7 @@ sub phase2 {
 	$spice_ticket = $res->{ticket};
     }
 
-    push @$cmd , 'qm', 'start', $vmid, '--skiplock', '--migratedfrom', $nodename;
+    push @$cmd , 'qm', 'start', $vmid, '--skiplock', '--migratedfrom', $nodename, '--targetstorage', $self->{opts}->{targetstorage};
 
     # we use TCP only for unsecure migrations as TCP ssh forward tunnels often
     # did appeared to late (they are hard, if not impossible, to check for)
@@ -496,6 +520,16 @@ sub phase2 {
         elsif ($line =~ m/^spice listens on port (\d+)$/) {
 	    $spice_port = int($1);
 	}
+        elsif ($line =~ m/^storage migration listens on nbd:(localhost|[\d\.]+|\[[\d\.:a-fA-F]+\]):(\d+):exportname=(\S+) volume:(\S+)$/) {
+	    my $volid = $4;
+	    my $nbd_uri = "nbd:$1:$2:exportname=$3";	
+	    my $targetdrive = $3;
+	    $targetdrive =~ s/drive-//g;
+
+	    $self->{target_drive}->{$targetdrive}->{volid} = $volid;
+	    $self->{target_drive}->{$targetdrive}->{nbd_uri} = $nbd_uri;
+
+	}
     }, errfunc => sub {
 	my $line = shift;
 	$self->log('info', $line);
@@ -540,6 +574,19 @@ sub phase2 {
     }
 
     my $start = time();
+
+    if ($self->{opts}->{targetstorage}) {
+	$self->{storage_migration} = 1;
+	$self->{storage_migration_jobs} = {};
+	$self->log('info', "starting storage migration");
+
+	die "the number of destination local disk is not equal to number of source local disk" if (scalar(keys %{$self->{target_drive}}) != scalar(keys %{$self->{local_volumes}}));
+	foreach my $drive (keys %{$self->{target_drive}}){
+	    $self->log('info', "$drive: start migration to to $self->{target_drive}->{$drive}->{nbd_uri}");
+	    PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $self->{target_drive}->{$drive}->{nbd_uri}, $vmid, undef, $self->{storage_migration_jobs}, 1);
+	}
+    }
+
     $self->log('info', "starting online/live migration on $ruri");
     $self->{livemigration} = 1;
 
@@ -739,6 +786,19 @@ sub phase2_cleanup {
     }
 
     # cleanup ressources on target host
+    if ( $self->{storage_migration} ) {
+
+	eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid) };
+	if (my $err = $@) {
+	    $self->log('err', $err);
+	}
+
+	eval { PVE::QemuMigrate::cleanup_remotedisks($self) };
+	if (my $err = $@) {
+	    $self->log('err', $err);
+	}
+    }
+
     my $nodename = PVE::INotify::nodename();
  
     my $cmd = [@{$self->{rem_ssh}}, 'qm', 'stop', $vmid, '--skiplock', '--migratedfrom', $nodename];
@@ -755,6 +815,9 @@ sub phase2_cleanup {
 	    $self->{errors} = 1;
 	}
     }
+
+
+
 }
 
 sub phase3 {
@@ -780,6 +843,25 @@ sub phase3_cleanup {
     my $conf = $self->{vmconf};
     return if $self->{phase2errors};
 
+    if ($self->{storage_migration}) {
+
+	eval { PVE::QemuServer::qemu_drive_mirror_monitor($vmid, undef, $self->{storage_migration_jobs}); }; #finish block-job
+
+	if (my $err = $@) {
+	    #what do we do ? abort migration ?
+	    eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid) };
+	    eval { PVE::QemuMigrate::cleanup_remotedisks($self) };
+
+	} else {
+
+	    foreach my $target_drive (keys %{$self->{target_drive}}) {
+		my $drive = PVE::QemuServer::parse_drive($target_drive, $self->{target_drive}->{$target_drive}->{volid});
+		$conf->{$target_drive} = PVE::QemuServer::print_drive($vmid, $drive);
+		PVE::QemuConfig->write_config($vmid, $conf);
+	    }
+	}
+    }
+
     # move config to remote node
     my $conffile = PVE::QemuConfig->config_file($vmid);
     my $newconffile = PVE::QemuConfig->config_file($vmid, $self->{node});
@@ -787,6 +869,7 @@ sub phase3_cleanup {
     die "Failed to move config to node '$self->{node}' - rename failed: $!\n"
         if !rename($conffile, $newconffile);
 
+
     if ($self->{livemigration}) {
 	# now that config file is move, we can resume vm on target if livemigrate
 	my $cmd = [@{$self->{rem_ssh}}, 'qm', 'resume', $vmid, '--skiplock', '--nocheck'];
@@ -834,6 +917,19 @@ sub phase3_cleanup {
 	$self->{errors} = 1;
     }
 
+    if($self->{storage_migration}) {
+	#delete source volids ?
+
+	#stop nbd server to remote vm
+	my $cmd = [@{$self->{rem_ssh}}, 'qm', 'nbdstop', $vmid];
+
+	eval{ PVE::Tools::run_command($cmd, outfunc => sub {}, errfunc => sub {}) };
+	if (my $err = $@) {
+	    $self->log('err', $err);
+	    $self->{errors} = 1;
+	}
+    }
+
     # clear migrate lock
     my $cmd = [ @{$self->{rem_ssh}}, 'qm', 'unlock', $vmid ];
     $self->cmd_logerr($cmd, errmsg => "failed to clear migrate lock");
-- 
2.1.4




More information about the pve-devel mailing list