[pve-devel] [PATCH qemu-server v3 08/14] fix #5180: migrate: integrate helper for live-migrating conntrack info
Christoph Heiss
c.heiss at proxmox.com
Thu Jul 3 13:54:10 CEST 2025
Fixes #5180 [0].
This implements for live-migration:
a) the dbus-vmstate is started on the target side, together with the VM
b) the dbus-vmstate helper is started on the source side
c) everything is cleaned up properly, in any case
It is currently off-by-default and must be enabled via the optional
`with-conntrack-state` migration parameter.
The conntrack entry migration is done in such a way that it can
soft-fail, w/o impacting the actual migration, i.e. considering it
"best-effort".
A failed conntrack entry migration does not have any real impact on
functionality, other than it might exhibit the problems as lined out in
the issue [0].
For remote migrations, only a warning is thrown for now. Cross-cluster
migration has stricter requirements and is not a "one-size-fits-it-all".
E.g. the most promentient issue if the network segmentation is
different, which would make the conntrack entries useless or require
careful rewriting.
[0] https://bugzilla.proxmox.com/show_bug.cgi?id=5180
Signed-off-by: Christoph Heiss <c.heiss at proxmox.com>
---
Changes v1 -> v2:
* rebased on latest master
* change to systemctl start/stop for starting/stopping the
dbus-vmstate service
Changes v2 -> v3:
* formatted using perltidy
* slightly adjusted warning message for disabled case for better
readability
src/PVE/API2/Qemu.pm | 75 ++++++++++++++++++
src/PVE/CLI/qm.pm | 5 ++
src/PVE/QemuMigrate.pm | 73 +++++++++++++++++
src/PVE/QemuServer.pm | 6 ++
src/PVE/QemuServer/DBusVMState.pm | 125 ++++++++++++++++++++++++++++++
src/PVE/QemuServer/Makefile | 1 +
6 files changed, 285 insertions(+)
create mode 100644 src/PVE/QemuServer/DBusVMState.pm
diff --git a/src/PVE/API2/Qemu.pm b/src/PVE/API2/Qemu.pm
index 6565ce71..020f1815 100644
--- a/src/PVE/API2/Qemu.pm
+++ b/src/PVE/API2/Qemu.pm
@@ -46,6 +46,7 @@ use PVE::QemuServer::RNG;
use PVE::QemuServer::RunState;
use PVE::QemuServer::USB;
use PVE::QemuServer::Virtiofs qw(max_virtiofs);
+use PVE::QemuServer::DBusVMState;
use PVE::QemuMigrate;
use PVE::QemuMigrate::Helpers;
use PVE::RPCEnvironment;
@@ -3371,6 +3372,12 @@ __PACKAGE__->register_method({
default => 'max(30, vm memory in GiB)',
optional => 1,
},
+ 'with-conntrack-state' => {
+ type => 'boolean',
+ optional => 1,
+ default => 0,
+ description => 'Whether to migrate conntrack entries for running VMs.',
+ },
},
},
returns => {
@@ -3401,6 +3408,7 @@ __PACKAGE__->register_method({
my $migration_network = $get_root_param->('migration_network');
my $targetstorage = $get_root_param->('targetstorage');
my $force_cpu = $get_root_param->('force-cpu');
+ my $with_conntrack_state = $get_root_param->('with-conntrack-state');
my $storagemap;
@@ -3479,6 +3487,7 @@ __PACKAGE__->register_method({
nbd_proto_version => $nbd_protocol_version,
replicated_volumes => $replicated_volumes,
offline_volumes => $offline_volumes,
+ with_conntrack_state => $with_conntrack_state,
};
my $params = {
@@ -5143,6 +5152,11 @@ __PACKAGE__->register_method({
description =>
"Object of mapped resources with additional information such if they're live migratable.",
},
+ 'has-dbus-vmstate' => {
+ type => 'boolean',
+ description => 'Whether the VM host supports migrating additional VM state, '
+ . 'such as conntrack entries.',
+ },
},
},
code => sub {
@@ -5211,6 +5225,7 @@ __PACKAGE__->register_method({
$res->{local_resources} = $local_resources;
$res->{'mapped-resources'} = [sort keys $mapped_resources->%*];
$res->{'mapped-resource-info'} = $mapped_resources;
+ $res->{'has-dbus-vmstate'} = 1;
return $res;
@@ -5283,6 +5298,12 @@ __PACKAGE__->register_method({
minimum => '0',
default => 'migrate limit from datacenter or storage config',
},
+ 'with-conntrack-state' => {
+ type => 'boolean',
+ optional => 1,
+ default => 0,
+ description => 'Whether to migrate conntrack entries for running VMs.',
+ },
},
},
returns => {
@@ -5339,6 +5360,7 @@ __PACKAGE__->register_method({
} else {
warn "VM isn't running. Doing offline migration instead.\n" if $param->{online};
$param->{online} = 0;
+ $param->{'with-conntrack-state'} = 0;
}
my $storecfg = PVE::Storage::config();
@@ -6682,6 +6704,7 @@ __PACKAGE__->register_method({
warn $@ if $@;
}
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($state->{vmid});
PVE::QemuServer::destroy_vm($state->{storecfg}, $state->{vmid}, 1);
}
@@ -6865,4 +6888,56 @@ __PACKAGE__->register_method({
},
});
+__PACKAGE__->register_method({
+ name => 'dbus_vmstate',
+ path => '{vmid}/dbus-vmstate',
+ method => 'POST',
+ proxyto => 'node',
+ description => 'Stop the dbus-vmstate helper for the given VM if running.',
+ permissions => {
+ check => ['perm', '/vms/{vmid}', ['VM.Migrate']],
+ },
+ parameters => {
+ additionalProperties => 0,
+ properties => {
+ node => get_standard_option('pve-node'),
+ vmid =>
+ get_standard_option('pve-vmid', { completion => \&PVE::QemuServer::complete_vmid }),
+ action => {
+ type => 'string',
+ enum => [qw(start stop)],
+ description => 'Action to perform on the DBus VMState helper.',
+ optional => 0,
+ },
+ },
+ },
+ returns => {
+ type => 'null',
+ },
+ code => sub {
+ my ($param) = @_;
+ my ($node, $vmid, $action) = $param->@{qw(node vmid action)};
+
+ my $nodename = PVE::INotify::nodename();
+ if ($node ne 'localhost' && $node ne $nodename) {
+ raise_param_exc(
+ { node => "node needs to be 'localhost' or local hostname '$nodename'" });
+ }
+
+ if (!PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+ raise_param_exc({ node => "VM $vmid not running locally on node '$nodename'" });
+ }
+
+ if ($action eq 'start') {
+ syslog('info', "starting dbus-vmstate helper for VM $vmid\n");
+ PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+ } elsif ($action eq 'stop') {
+ syslog('info', "stopping dbus-vmstate helper for VM $vmid\n");
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+ } else {
+ die "unknown action $action\n";
+ }
+ },
+});
+
1;
diff --git a/src/PVE/CLI/qm.pm b/src/PVE/CLI/qm.pm
index f3e9a702..3e778ad1 100755
--- a/src/PVE/CLI/qm.pm
+++ b/src/PVE/CLI/qm.pm
@@ -37,6 +37,7 @@ use PVE::QemuServer::ImportDisk;
use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::QMPHelpers;
use PVE::QemuServer::RunState;
+use PVE::QemuServer::DBusVMState;
use PVE::QemuServer;
use PVE::CLIHandler;
@@ -1054,6 +1055,10 @@ __PACKAGE__->register_method({
# vm was shutdown from inside the guest or crashed, doing api cleanup
PVE::QemuServer::vm_stop_cleanup($storecfg, $vmid, $conf, 0, 0, 1);
}
+
+ # ensure that no dbus-vmstate helper is left running in any case
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+
PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-stop');
$restart = eval { PVE::QemuServer::clear_reboot_request($vmid) };
diff --git a/src/PVE/QemuMigrate.pm b/src/PVE/QemuMigrate.pm
index f46bdf40..a6922198 100644
--- a/src/PVE/QemuMigrate.pm
+++ b/src/PVE/QemuMigrate.pm
@@ -35,6 +35,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::Memory qw(get_current_memory);
use PVE::QemuServer::Network;
use PVE::QemuServer::QMPHelpers;
+use PVE::QemuServer::DBusVMState;
use PVE::QemuServer;
use PVE::AbstractMigrate;
@@ -238,6 +239,27 @@ sub prepare {
# Do not treat a suspended VM as paused, as it might wake up
# during migration and remain paused after migration finishes.
$self->{vm_was_paused} = 1 if PVE::QemuServer::vm_is_paused($vmid, 0);
+
+ if ($self->{opts}->{'with-conntrack-state'}) {
+ if ($self->{opts}->{remote}) {
+ # shouldn't be reached in normal circumstances anyway, as we prevent it on
+ # an API level
+ $self->log(
+ 'warn',
+ 'conntrack state migration not supported for remote migrations, '
+ . 'active connections might get dropped',
+ );
+ $self->{opts}->{'with-conntrack-state'} = 0;
+ } else {
+ PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+ }
+ } else {
+ $self->log(
+ 'warn',
+ 'conntrack state migration not supported or disabled, '
+ . 'active connections might get dropped',
+ );
+ }
}
my ($loc_res, $mapped_res, $missing_mappings_by_node) =
@@ -920,6 +942,14 @@ sub phase1_cleanup {
if (my $err = $@) {
$self->log('err', $err);
}
+
+ if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+ # if the VM is running, that means we also tried to migrate additional
+ # state via our dbus-vmstate helper
+ # only need to locally stop it, on the target the VM cleanup will
+ # handle it
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+ }
}
sub phase2_start_local_cluster {
@@ -966,6 +996,10 @@ sub phase2_start_local_cluster {
push @$cmd, '--targetstorage', ($self->{opts}->{targetstorage} // '1');
}
+ if ($self->{opts}->{'with-conntrack-state'}) {
+ push @$cmd, '--with-conntrack-state';
+ }
+
my $spice_port;
my $input = "nbd_protocol_version: $migrate->{nbd_proto_version}\n";
@@ -1560,6 +1594,14 @@ sub phase2_cleanup {
$self->log('err', $err);
}
+ if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+ # if the VM is running, that means we also tried to migrate additional
+ # state via our dbus-vmstate helper
+ # only need to locally stop it, on the target the VM cleanup will
+ # handle it
+ PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+ }
+
if ($self->{tunnel}) {
eval { PVE::Tunnel::finish_tunnel($self->{tunnel}); };
if (my $err = $@) {
@@ -1683,6 +1725,37 @@ sub phase3_cleanup {
$self->log('info', "skipping guest fstrim, because VM is paused");
}
}
+
+ if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+ # if the VM is running, that means we also migrated additional
+ # state via our dbus-vmstate helper
+ $self->log('info', 'stopping migration dbus-vmstate helpers');
+
+ # first locally
+ my $num = PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+ if (defined($num)) {
+ my $plural = $num != 1 ? "entries" : "entry";
+ $self->log('info', "migrated $num conntrack state $plural");
+ }
+
+ # .. and then remote
+ my $targetnode = $self->{node};
+ eval {
+ # FIXME: introduce proper way to call API methods on another node?
+ # See also e.g. pve-network/src/PVE/API2/Network/SDN.pm, which
+ # does something similar.
+ PVE::Tools::run_command([
+ 'pvesh',
+ 'create',
+ "/nodes/$targetnode/qemu/$vmid/dbus-vmstate",
+ '--action',
+ 'stop',
+ ]);
+ };
+ if (my $err = $@) {
+ $self->log('warn', "failed to stop dbus-vmstate on $targetnode: $err\n");
+ }
+ }
}
# close tunnel on successful migration, on error phase2_cleanup closed it
diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm
index e7c98520..9f1219a9 100644
--- a/src/PVE/QemuServer.pm
+++ b/src/PVE/QemuServer.pm
@@ -86,6 +86,7 @@ use PVE::QemuServer::RunState;
use PVE::QemuServer::StateFile;
use PVE::QemuServer::USB;
use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd);
+use PVE::QemuServer::DBusVMState;
my $have_ha_config;
eval {
@@ -5413,6 +5414,7 @@ sub vm_start {
# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
# contained in config
+# with_conntrack_state => whether to start the dbus-vmstate helper for conntrack state migration
sub vm_start_nolock {
my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
@@ -5783,6 +5785,10 @@ sub vm_start_nolock {
}
}
+ # conntrack migration is only supported for intra-cluster migrations
+ if ($migrate_opts->{with_conntrack_state} && !$migrate_opts->{remote_node}) {
+ PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+ }
} else {
mon_cmd($vmid, "balloon", value => $conf->{balloon} * 1024 * 1024)
if !$statefile && $conf->{balloon};
diff --git a/src/PVE/QemuServer/DBusVMState.pm b/src/PVE/QemuServer/DBusVMState.pm
new file mode 100644
index 00000000..36ff8168
--- /dev/null
+++ b/src/PVE/QemuServer/DBusVMState.pm
@@ -0,0 +1,125 @@
+package PVE::QemuServer::DBusVMState;
+
+use strict;
+use warnings;
+
+use PVE::SafeSyslog;
+use PVE::Systemd;
+use PVE::Tools;
+
+use constant {
+ DBUS_VMSTATE_EXE => '/usr/libexec/qemu-server/dbus-vmstate',
+};
+
+# Retrieves a property from an object from a specific interface name.
+# In contrast to accessing the property directly by using $obj->Property, this
+# actually respects the owner of the object and thus can be used for interfaces
+# with might have multiple (queued) owners on the DBus.
+my sub dbus_get_property {
+ my ($obj, $interface, $name) = @_;
+
+ my $con = $obj->{service}->get_bus()->get_connection();
+
+ my $call = $con->make_method_call_message(
+ $obj->{service}->get_service_name(),
+ $obj->{object_path},
+ 'org.freedesktop.DBus.Properties',
+ 'Get',
+ );
+
+ $call->set_destination($obj->get_service()->get_owner_name());
+ $call->append_args_list($interface, $name);
+
+ my @reply = $con->send_with_reply_and_block($call, 10 * 1000)->get_args_list();
+ return $reply[0];
+}
+
+# Starts the dbus-vmstate helper D-Bus service daemon and adds the needed
+# object to the appropriate QEMU instance for the specified VM.
+sub qemu_add_dbus_vmstate {
+ my ($vmid) = @_;
+
+ if (!PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+ die "VM $vmid must be running locally\n";
+ }
+
+ # In case some leftover, previous instance is running, stop it. Otherwise
+ # we run into errors, as a systemd service instance is unique.
+ if (defined(qemu_del_dbus_vmstate($vmid, quiet => 1))) {
+ warn "stopped previously running dbus-vmstate helper for VM $vmid\n";
+ }
+
+ # Start the actual service, which will then register itself with QEMU.
+ eval { PVE::Tools::run_command(['systemctl', 'start', "pve-dbus-vmstate\@$vmid"]) };
+ if (my $err = $@) {
+ die "failed to start DBus VMState service for VM $vmid: $err\n";
+ }
+}
+
+# Stops the dbus-vmstate helper D-Bus service daemon and removes the associated
+# object from QEMU for the specified VM.
+#
+# Returns the number of migrated conntrack entries, or undef in case of error.
+sub qemu_del_dbus_vmstate {
+ my ($vmid, %params) = @_;
+
+ my $num_entries = undef;
+ my $dbus = Net::DBus->system();
+ my $dbus_obj = $dbus->get_bus_object();
+
+ my $owners = eval { $dbus_obj->ListQueuedOwners('org.qemu.VMState1') };
+ if (my $err = $@) {
+ syslog('warn', "failed to retrieve org.qemu.VMState1 owners: $err\n")
+ if !$params{quiet};
+ return undef;
+ }
+
+ # Iterate through all name owners for 'org.qemu.VMState1' and compare
+ # the ID. If we found the corresponding one for $vmid, retrieve the
+ # `NumMigratedEntries` property and call the `Quit()` method on it.
+ # Any D-Bus interaction might die/croak, so try to be careful here and
+ # swallow any hard errors.
+ foreach my $owner (@$owners) {
+ my $service = eval { Net::DBus::RemoteService->new($dbus, $owner, 'org.qemu.VMState1') };
+ if (my $err = $@) {
+ syslog('warn', "failed to get org.qemu.VMState1 service from D-Bus $owner: $err\n")
+ if !$params{quiet};
+ next;
+ }
+
+ my $object = eval { $service->get_object('/org/qemu/VMState1') };
+ if (my $err = $@) {
+ syslog('warn', "failed to get /org/qemu/VMState1 object from D-Bus $owner: $err\n")
+ if !$params{quiet};
+ next;
+ }
+
+ my $id = eval { dbus_get_property($object, 'org.qemu.VMState1', 'Id') };
+ if (defined($id) && $id eq "pve-vmstate-$vmid") {
+ my $helperobj =
+ eval { $service->get_object('/org/qemu/VMState1', 'com.proxmox.VMStateHelper') };
+ if (my $err = $@) {
+ syslog(
+ 'warn',
+ "found dbus-vmstate helper, but does not implement com.proxmox.VMStateHelper? ($err)\n",
+ ) if !$params{quiet};
+ last;
+ }
+
+ $num_entries = eval {
+ dbus_get_property($object, 'com.proxmox.VMStateHelper', 'NumMigratedEntries');
+ };
+ eval { $object->Quit() };
+ if (my $err = $@) {
+ syslog('warn', "failed to call quit on dbus-vmstate for VM $vmid: $err\n")
+ if !$params{quiet};
+ }
+
+ last;
+ }
+ }
+
+ return $num_entries;
+}
+
+1;
diff --git a/src/PVE/QemuServer/Makefile b/src/PVE/QemuServer/Makefile
index ca30a0ad..23c136bc 100644
--- a/src/PVE/QemuServer/Makefile
+++ b/src/PVE/QemuServer/Makefile
@@ -8,6 +8,7 @@ SOURCES=Agent.pm \
CGroup.pm \
Cloudinit.pm \
CPUConfig.pm \
+ DBusVMState.pm \
Drive.pm \
Helpers.pm \
ImportDisk.pm \
--
2.49.0
More information about the pve-devel
mailing list