[pve-devel] [PATCH qemu-server v2 8/13] fix #5180: migrate: integrate helper for live-migrating conntrack info
Christoph Heiss
c.heiss at proxmox.com
Thu Apr 24 13:19:28 CEST 2025
Fixes #5180 [0].
This implements for live-migration:
a) the dbus-vmstate is started on the target side, together with the VM
b) the dbus-vmstate helper is started on the source side
c) everything is cleaned up properly, in any case
It is currently off-by-default and must be enabled via the optional
`with-conntrack-state` migration parameter.
The conntrack entry migration is done in such a way that it can
soft-fail, w/o impacting the actual migration, i.e. considering it
"best-effort".
A failed conntrack entry migration does not have any real impact on
functionality, other than it might exhibit the problems as lined out in
the issue [0].
For remote migrations, only a warning is thrown for now. Cross-cluster
migration has stricter requirements and is not a "one-size-fits-it-all".
E.g. the most promentient issue if the network segmentation is
different, which would make the conntrack entries useless or require
careful rewriting.
[0] https://bugzilla.proxmox.com/show_bug.cgi?id=5180
Signed-off-by: Christoph Heiss <c.heiss at proxmox.com>
---
Changes v1 -> v2:
* rebased on latest master
* change to systemctl start/stop for starting/stopping the
dbus-vmstate service
PVE/API2/Qemu.pm | 72 ++++++++++++++++++++
PVE/CLI/qm.pm | 5 ++
PVE/QemuMigrate.pm | 64 ++++++++++++++++++
PVE/QemuServer.pm | 6 ++
PVE/QemuServer/DBusVMState.pm | 120 ++++++++++++++++++++++++++++++++++
PVE/QemuServer/Makefile | 1 +
6 files changed, 268 insertions(+)
create mode 100644 PVE/QemuServer/DBusVMState.pm
diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index 626cce45..4d3ed5b6 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -41,6 +41,7 @@ use PVE::QemuServer::QMPHelpers;
use PVE::QemuServer::RNG;
use PVE::QemuServer::USB;
use PVE::QemuServer::Virtiofs qw(max_virtiofs);
+use PVE::QemuServer::DBusVMState;
use PVE::QemuMigrate;
use PVE::RPCEnvironment;
use PVE::AccessControl;
@@ -3145,6 +3146,12 @@ __PACKAGE__->register_method({
default => 'max(30, vm memory in GiB)',
optional => 1,
},
+ 'with-conntrack-state' => {
+ type => 'boolean',
+ optional => 1,
+ default => 0,
+ description => 'Whether to migrate conntrack entries for running VMs.',
+ }
},
},
returns => {
@@ -3175,6 +3182,7 @@ __PACKAGE__->register_method({
my $migration_network = $get_root_param->('migration_network');
my $targetstorage = $get_root_param->('targetstorage');
my $force_cpu = $get_root_param->('force-cpu');
+ my $with_conntrack_state = $get_root_param->('with-conntrack-state');
my $storagemap;
@@ -3246,6 +3254,7 @@ __PACKAGE__->register_method({
nbd_proto_version => $nbd_protocol_version,
replicated_volumes => $replicated_volumes,
offline_volumes => $offline_volumes,
+ with_conntrack_state => $with_conntrack_state,
};
my $params = {
@@ -4792,6 +4801,11 @@ __PACKAGE__->register_method({
type => 'object',
description => "Object of mapped resources with additional information such if they're live migratable.",
},
+ 'has-dbus-vmstate' => {
+ type => 'boolean',
+ description => 'Whether the VM host supports migrating additional VM state, '
+ . 'such as conntrack entries.',
+ }
},
},
code => sub {
@@ -4860,6 +4874,7 @@ __PACKAGE__->register_method({
$res->{local_resources} = $local_resources;
$res->{'mapped-resources'} = [ sort keys $mapped_resources->%* ];
$res->{'mapped-resource-info'} = $mapped_resources;
+ $res->{'has-dbus-vmstate'} = 1;
return $res;
@@ -4921,6 +4936,12 @@ __PACKAGE__->register_method({
minimum => '0',
default => 'migrate limit from datacenter or storage config',
},
+ 'with-conntrack-state' => {
+ type => 'boolean',
+ optional => 1,
+ default => 0,
+ description => 'Whether to migrate conntrack entries for running VMs.',
+ }
},
},
returns => {
@@ -4976,6 +4997,7 @@ __PACKAGE__->register_method({
} else {
warn "VM isn't running. Doing offline migration instead.\n" if $param->{online};
$param->{online} = 0;
+ $param->{'with-conntrack-state'} = 0;
}
my $storecfg = PVE::Storage::config();
@@ -6263,6 +6285,7 @@ __PACKAGE__->register_method({
warn $@ if $@;
}
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($state->{vmid});
PVE::QemuServer::destroy_vm($state->{storecfg}, $state->{vmid}, 1);
}
@@ -6436,4 +6459,53 @@ __PACKAGE__->register_method({
return { socket => $socket };
}});
+__PACKAGE__->register_method({
+ name => 'dbus_vmstate',
+ path => '{vmid}/dbus-vmstate',
+ method => 'POST',
+ proxyto => 'node',
+ description => 'Stop the dbus-vmstate helper for the given VM if running.',
+ permissions => {
+ check => ['perm', '/vms/{vmid}', [ 'VM.Migrate' ]],
+ },
+ parameters => {
+ additionalProperties => 0,
+ properties => {
+ node => get_standard_option('pve-node'),
+ vmid => get_standard_option('pve-vmid', { completion => \&PVE::QemuServer::complete_vmid }),
+ action => {
+ type => 'string',
+ enum => [qw(start stop)],
+ description => 'Action to perform on the DBus VMState helper.',
+ optional => 0,
+ },
+ },
+ },
+ returns => {
+ type => 'null',
+ },
+ code => sub {
+ my ($param) = @_;
+ my ($node, $vmid, $action) = $param->@{qw(node vmid action)};
+
+ my $nodename = PVE::INotify::nodename();
+ if ($node ne 'localhost' && $node ne $nodename) {
+ raise_param_exc({ node => "node needs to be 'localhost' or local hostname '$nodename'" });
+ }
+
+ if (!PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+ raise_param_exc({ node => "VM $vmid not running locally on node '$nodename'" });
+ }
+
+ if ($action eq 'start') {
+ syslog('info', "starting dbus-vmstate helper for VM $vmid\n");
+ PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+ } elsif ($action eq 'stop') {
+ syslog('info', "stopping dbus-vmstate helper for VM $vmid\n");
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+ } else {
+ die "unknown action $action\n";
+ }
+ }});
+
1;
diff --git a/PVE/CLI/qm.pm b/PVE/CLI/qm.pm
index 3e3a4c91..32c7629c 100755
--- a/PVE/CLI/qm.pm
+++ b/PVE/CLI/qm.pm
@@ -36,6 +36,7 @@ use PVE::QemuServer::Agent qw(agent_available);
use PVE::QemuServer::ImportDisk;
use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::QMPHelpers;
+use PVE::QemuServer::DBusVMState;
use PVE::QemuServer;
use PVE::CLIHandler;
@@ -965,6 +966,10 @@ __PACKAGE__->register_method({
# vm was shutdown from inside the guest or crashed, doing api cleanup
PVE::QemuServer::vm_stop_cleanup($storecfg, $vmid, $conf, 0, 0, 1);
}
+
+ # ensure that no dbus-vmstate helper is left running in any case
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+
PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-stop');
$restart = eval { PVE::QemuServer::clear_reboot_request($vmid) };
diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
index b7bf2aa3..7665c398 100644
--- a/PVE/QemuMigrate.pm
+++ b/PVE/QemuMigrate.pm
@@ -32,6 +32,7 @@ use PVE::QemuServer::Machine;
use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::Memory qw(get_current_memory);
use PVE::QemuServer::QMPHelpers;
+use PVE::QemuServer::DBusVMState;
use PVE::QemuServer;
use PVE::AbstractMigrate;
@@ -230,6 +231,21 @@ sub prepare {
# Do not treat a suspended VM as paused, as it might wake up
# during migration and remain paused after migration finishes.
$self->{vm_was_paused} = 1 if PVE::QemuServer::vm_is_paused($vmid, 0);
+
+ if ($self->{opts}->{'with-conntrack-state'}) {
+ if ($self->{opts}->{remote}) {
+ # shouldn't be reached in normal circumstances anyway, as we prevent it on
+ # an API level
+ $self->log('warn', 'conntrack state migration not supported for remote migrations, '
+ . 'active connections might get dropped');
+ $self->{opts}->{'with-conntrack-state'} = 0;
+ } else {
+ PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+ }
+ } else {
+ $self->log('warn', 'conntrack state migration not supported or enabled, '
+ . 'active connections might get dropped');
+ }
}
my ($loc_res, $mapped_res, $missing_mappings_by_node) = PVE::QemuServer::check_local_resources($conf, $running, 1);
@@ -873,6 +889,14 @@ sub phase1_cleanup {
if (my $err =$@) {
$self->log('err', $err);
}
+
+ if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+ # if the VM is running, that means we also tried to migrate additional
+ # state via our dbus-vmstate helper
+ # only need to locally stop it, on the target the VM cleanup will
+ # handle it
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+ }
}
sub phase2_start_local_cluster {
@@ -919,6 +943,10 @@ sub phase2_start_local_cluster {
push @$cmd, '--targetstorage', ($self->{opts}->{targetstorage} // '1');
}
+ if ($self->{opts}->{'with-conntrack-state'}) {
+ push @$cmd, '--with-conntrack-state';
+ }
+
my $spice_port;
my $input = "nbd_protocol_version: $migrate->{nbd_proto_version}\n";
@@ -1472,6 +1500,13 @@ sub phase2_cleanup {
$self->log('err', $err);
}
+ if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+ # if the VM is running, that means we also tried to migrate additional
+ # state via our dbus-vmstate helper
+ # only need to locally stop it, on the target the VM cleanup will
+ # handle it
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+ }
if ($self->{tunnel}) {
eval { PVE::Tunnel::finish_tunnel($self->{tunnel}); };
@@ -1594,6 +1629,35 @@ sub phase3_cleanup {
$self->log('info', "skipping guest fstrim, because VM is paused");
}
}
+
+ if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+ # if the VM is running, that means we also migrated additional
+ # state via our dbus-vmstate helper
+ $self->log('info', 'stopping migration dbus-vmstate helpers');
+
+ # first locally
+ my $num = PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+ if (defined($num)) {
+ my $plural = $num != 1 ? "entries" : "entry";
+ $self->log('info', "migrated $num conntrack state $plural");
+ }
+
+ # .. and then remote
+ my $targetnode = $self->{node};
+ eval {
+ # FIXME: introduce proper way to call API methods on another node?
+ # See also e.g. pve-network/src/PVE/API2/Network/SDN.pm, which
+ # does something similar.
+ PVE::Tools::run_command([
+ 'pvesh', 'create',
+ "/nodes/$targetnode/qemu/$vmid/dbus-vmstate",
+ '--action', 'stop',
+ ]);
+ };
+ if (my $err = $@) {
+ $self->log('warn', "failed to stop dbus-vmstate on $targetnode: $err\n");
+ }
+ }
}
# close tunnel on successful migration, on error phase2_cleanup closed it
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 577959a4..7ff87667 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -65,6 +65,7 @@ use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd
use PVE::QemuServer::RNG qw(parse_rng print_rng_device_commandline print_rng_object_commandline);
use PVE::QemuServer::USB;
use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd);
+use PVE::QemuServer::DBusVMState;
my $have_sdn;
eval {
@@ -5610,6 +5611,7 @@ sub vm_start {
# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
# contained in config
+# with_conntrack_state => whether to start the dbus-vmstate helper for conntrack state migration
sub vm_start_nolock {
my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
@@ -6011,6 +6013,10 @@ sub vm_start_nolock {
}
}
+ # conntrack migration is only supported for intra-cluster migrations
+ if ($migrate_opts->{with_conntrack_state} && !$migrate_opts->{remote_node}) {
+ PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+ }
} else {
mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
if !$statefile && $conf->{balloon};
diff --git a/PVE/QemuServer/DBusVMState.pm b/PVE/QemuServer/DBusVMState.pm
new file mode 100644
index 00000000..5b3a79b0
--- /dev/null
+++ b/PVE/QemuServer/DBusVMState.pm
@@ -0,0 +1,120 @@
+package PVE::QemuServer::DBusVMState;
+
+use strict;
+use warnings;
+
+use PVE::SafeSyslog;
+use PVE::Systemd;
+use PVE::Tools;
+
+use constant {
+ DBUS_VMSTATE_EXE => '/usr/libexec/qemu-server/dbus-vmstate',
+};
+
+# Retrieves a property from an object from a specific interface name.
+# In contrast to accessing the property directly by using $obj->Property, this
+# actually respects the owner of the object and thus can be used for interfaces
+# with might have multiple (queued) owners on the DBus.
+my sub dbus_get_property {
+ my ($obj, $interface, $name) = @_;
+
+ my $con = $obj->{service}->get_bus()->get_connection();
+
+ my $call = $con->make_method_call_message(
+ $obj->{service}->get_service_name(),
+ $obj->{object_path},
+ 'org.freedesktop.DBus.Properties',
+ 'Get',
+ );
+
+ $call->set_destination($obj->get_service()->get_owner_name());
+ $call->append_args_list($interface, $name);
+
+ my @reply = $con->send_with_reply_and_block($call, 10 * 1000)->get_args_list();
+ return $reply[0];
+}
+
+# Starts the dbus-vmstate helper D-Bus service daemon and adds the needed
+# object to the appropriate QEMU instance for the specified VM.
+sub qemu_add_dbus_vmstate {
+ my ($vmid) = @_;
+
+ if (!PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+ die "VM $vmid must be running locally\n";
+ }
+
+ # In case some leftover, previous instance is running, stop it. Otherwise
+ # we run into errors, as a systemd service instance is unique.
+ if (defined(qemu_del_dbus_vmstate($vmid, quiet => 1))) {
+ warn "stopped previously running dbus-vmstate helper for VM $vmid\n";
+ }
+
+ # Start the actual service, which will then register itself with QEMU.
+ eval { PVE::Tools::run_command(['systemctl', 'start', "pve-dbus-vmstate\@$vmid"]) };
+ if (my $err = $@) {
+ die "failed to start DBus VMState service for VM $vmid: $err\n";
+ }
+}
+
+# Stops the dbus-vmstate helper D-Bus service daemon and removes the associated
+# object from QEMU for the specified VM.
+#
+# Returns the number of migrated conntrack entries, or undef in case of error.
+sub qemu_del_dbus_vmstate {
+ my ($vmid, %params) = @_;
+
+ my $num_entries = undef;
+ my $dbus = Net::DBus->system();
+ my $dbus_obj = $dbus->get_bus_object();
+
+ my $owners = eval { $dbus_obj->ListQueuedOwners('org.qemu.VMState1') };
+ if (my $err = $@) {
+ syslog('warn', "failed to retrieve org.qemu.VMState1 owners: $err\n")
+ if !$params{quiet};
+ return undef;
+ }
+
+ # Iterate through all name owners for 'org.qemu.VMState1' and compare
+ # the ID. If we found the corresponding one for $vmid, retrieve the
+ # `NumMigratedEntries` property and call the `Quit()` method on it.
+ # Any D-Bus interaction might die/croak, so try to be careful here and
+ # swallow any hard errors.
+ foreach my $owner (@$owners) {
+ my $service = eval { Net::DBus::RemoteService->new($dbus, $owner, 'org.qemu.VMState1') };
+ if (my $err = $@) {
+ syslog('warn', "failed to get org.qemu.VMState1 service from D-Bus $owner: $err\n")
+ if !$params{quiet};
+ next;
+ }
+
+ my $object = eval { $service->get_object('/org/qemu/VMState1') };
+ if (my $err = $@) {
+ syslog('warn', "failed to get /org/qemu/VMState1 object from D-Bus $owner: $err\n")
+ if !$params{quiet};
+ next;
+ }
+
+ my $id = eval { dbus_get_property($object, 'org.qemu.VMState1', 'Id') };
+ if (defined($id) && $id eq "pve-vmstate-$vmid") {
+ my $helperobj = eval { $service->get_object('/org/qemu/VMState1', 'com.proxmox.VMStateHelper') };
+ if (my $err = $@) {
+ syslog('warn', "found dbus-vmstate helper, but does not implement com.proxmox.VMStateHelper? ($err)\n")
+ if !$params{quiet};
+ last;
+ }
+
+ $num_entries = eval { dbus_get_property($object, 'com.proxmox.VMStateHelper', 'NumMigratedEntries') };
+ eval { $object->Quit() };
+ if (my $err = $@) {
+ syslog('warn', "failed to call quit on dbus-vmstate for VM $vmid: $err\n")
+ if !$params{quiet};
+ }
+
+ last;
+ }
+ }
+
+ return $num_entries;
+}
+
+1;
diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile
index 8bcd484e..7fd850d0 100644
--- a/PVE/QemuServer/Makefile
+++ b/PVE/QemuServer/Makefile
@@ -4,6 +4,7 @@ SOURCES=PCI.pm \
Memory.pm \
ImportDisk.pm \
Cloudinit.pm \
+ DBusVMState.pm \
Agent.pm \
Helpers.pm \
Monitor.pm \
--
2.49.0
More information about the pve-devel
mailing list