[PATCH qemu-server 1/1] qemu: add offline migration from dead node
Alexandre Derumier
alexandre.derumier at groupe-cyllene.com
Mon Mar 24 12:15:29 CET 2025
verify that node is dead from corosync && ssh
and move config file from /etc/pve directly
Signed-off-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
---
PVE/API2/Qemu.pm | 56 ++++++++++++++++++++++++++++++++++++++++++++----
1 file changed, 52 insertions(+), 4 deletions(-)
diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index 156b1c7b..58c454a6 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -4764,6 +4764,9 @@ __PACKAGE__->register_method({
description => "Target node.",
completion => \&PVE::Cluster::complete_migration_target,
}),
+ deadnode => get_standard_option('pve-node', {
+ description => "Dead source node.",
+ }),
online => {
type => 'boolean',
description => "Use online/live migration if VM is running. Ignored if VM is stopped.",
@@ -4813,8 +4816,9 @@ __PACKAGE__->register_method({
my $authuser = $rpcenv->get_user();
my $target = extract_param($param, 'target');
+ my $deadnode = extract_param($param, 'deadnode');
- my $localnode = PVE::INotify::nodename();
+ my $localnode = $deadnode ? $deadnode : PVE::INotify::nodename();
raise_param_exc({ target => "target is local node."}) if $target eq $localnode;
PVE::Cluster::check_cfs_quorum();
@@ -4835,14 +4839,43 @@ __PACKAGE__->register_method({
raise_param_exc({ migration_network => "Only root may use this option." })
if $param->{migration_network} && $authuser ne 'root at pam';
+ raise_param_exc({ deadnode => "Only root may use this option." })
+ if $param->{deadnode} && $authuser ne 'root at pam';
+
# test if VM exists
- my $conf = PVE::QemuConfig->load_config($vmid);
+ my $conf = $deadnode ? PVE::QemuConfig->load_config($vmid, $deadnode) : PVE::QemuConfig->load_config($vmid);
# try to detect errors early
PVE::QemuConfig->check_lock($conf);
- if (PVE::QemuServer::check_running($vmid)) {
+ if ($deadnode) {
+ die "Can't do online migration of a dead node.\n" if $param->{online};
+ my $members = PVE::Cluster::get_members();
+ die "The deadnode $deadnode seem to be alive" if $members->{$deadnode} && $members->{$deadnode}->{online};
+
+ print "test if deadnode $deadnode respond to ping\n";
+ eval {
+ PVE::Tools::run_command("/usr/bin/ping -c 1 $members->{$deadnode}->{ip}");
+ };
+ if(!$@){
+ die "error: ping to target $deadnode is still working. Node seem to be alive.";
+ }
+
+ #make an extra ssh connection to double check that it's not just a corosync crash
+ my $sshinfo = PVE::SSHInfo::get_ssh_info($deadnode);
+ my $sshcmd = PVE::SSHInfo::ssh_info_to_command($sshinfo);
+ push @$sshcmd, 'hostname';
+ print "test if deadnode $deadnode respond to ssh\n";
+ eval {
+ PVE::Tools::run_command($sshcmd, timeout => 1);
+ };
+ if(!$@){
+ die "error: ssh connection to target $deadnode is still working. Node seem to be alive.";
+ }
+
+
+ } elsif (PVE::QemuServer::check_running($vmid)) {
die "can't migrate running VM without --online\n" if !$param->{online};
my $repl_conf = PVE::ReplicationConfig->new();
@@ -4881,7 +4914,22 @@ __PACKAGE__->register_method({
PVE::QemuServer::check_storage_availability($storecfg, $conf, $target);
}
- if (PVE::HA::Config::vm_is_ha_managed($vmid) && $rpcenv->{type} ne 'ha') {
+ if ($deadnode) {
+ my $realcmd = sub {
+ my $config_fn = PVE::QemuConfig->config_file($vmid, $deadnode);
+ my $new_config_fn = PVE::QemuConfig->config_file($vmid, $target);
+
+ rename($config_fn, $new_config_fn)
+ or die "failed to move config file to node '$target': $!\n";
+ };
+
+ my $worker = sub {
+ return PVE::GuestHelpers::guest_migration_lock($vmid, 10, $realcmd);
+ };
+
+ return $rpcenv->fork_worker('qmigrate', $vmid, $authuser, $worker);
+
+ } elsif (PVE::HA::Config::vm_is_ha_managed($vmid) && $rpcenv->{type} ne 'ha') {
my $hacmd = sub {
my $upid = shift;
--
2.39.5
More information about the pve-devel
mailing list