[pve-devel] [PATCH qemu-server v3 08/14] fix #5180: migrate: integrate helper for live-migrating conntrack info

Christoph Heiss c.heiss at proxmox.com
Thu Jul 3 13:54:10 CEST 2025


Fixes #5180 [0].

This implements for live-migration:
a) the dbus-vmstate is started on the target side, together with the VM
b) the dbus-vmstate helper is started on the source side
c) everything is cleaned up properly, in any case

It is currently off-by-default and must be enabled via the optional
`with-conntrack-state` migration parameter.

The conntrack entry migration is done in such a way that it can
soft-fail, w/o impacting the actual migration, i.e. considering it
"best-effort".

A failed conntrack entry migration does not have any real impact on
functionality, other than it might exhibit the problems as lined out in
the issue [0].

For remote migrations, only a warning is thrown for now. Cross-cluster
migration has stricter requirements and is not a "one-size-fits-it-all".
E.g. the most promentient issue if the network segmentation is
different, which would make the conntrack entries useless or require
careful rewriting.

[0] https://bugzilla.proxmox.com/show_bug.cgi?id=5180

Signed-off-by: Christoph Heiss <c.heiss at proxmox.com>
---
Changes v1 -> v2:
  * rebased on latest master
  * change to systemctl start/stop for starting/stopping the
    dbus-vmstate service

Changes v2 -> v3:
  * formatted using perltidy
  * slightly adjusted warning message for disabled case for better
    readability

 src/PVE/API2/Qemu.pm              |  75 ++++++++++++++++++
 src/PVE/CLI/qm.pm                 |   5 ++
 src/PVE/QemuMigrate.pm            |  73 +++++++++++++++++
 src/PVE/QemuServer.pm             |   6 ++
 src/PVE/QemuServer/DBusVMState.pm | 125 ++++++++++++++++++++++++++++++
 src/PVE/QemuServer/Makefile       |   1 +
 6 files changed, 285 insertions(+)
 create mode 100644 src/PVE/QemuServer/DBusVMState.pm

diff --git a/src/PVE/API2/Qemu.pm b/src/PVE/API2/Qemu.pm
index 6565ce71..020f1815 100644
--- a/src/PVE/API2/Qemu.pm
+++ b/src/PVE/API2/Qemu.pm
@@ -46,6 +46,7 @@ use PVE::QemuServer::RNG;
 use PVE::QemuServer::RunState;
 use PVE::QemuServer::USB;
 use PVE::QemuServer::Virtiofs qw(max_virtiofs);
+use PVE::QemuServer::DBusVMState;
 use PVE::QemuMigrate;
 use PVE::QemuMigrate::Helpers;
 use PVE::RPCEnvironment;
@@ -3371,6 +3372,12 @@ __PACKAGE__->register_method({
                 default => 'max(30, vm memory in GiB)',
                 optional => 1,
             },
+            'with-conntrack-state' => {
+                type => 'boolean',
+                optional => 1,
+                default => 0,
+                description => 'Whether to migrate conntrack entries for running VMs.',
+            },
         },
     },
     returns => {
@@ -3401,6 +3408,7 @@ __PACKAGE__->register_method({
         my $migration_network = $get_root_param->('migration_network');
         my $targetstorage = $get_root_param->('targetstorage');
         my $force_cpu = $get_root_param->('force-cpu');
+        my $with_conntrack_state = $get_root_param->('with-conntrack-state');
 
         my $storagemap;
 
@@ -3479,6 +3487,7 @@ __PACKAGE__->register_method({
                     nbd_proto_version => $nbd_protocol_version,
                     replicated_volumes => $replicated_volumes,
                     offline_volumes => $offline_volumes,
+                    with_conntrack_state => $with_conntrack_state,
                 };
 
                 my $params = {
@@ -5143,6 +5152,11 @@ __PACKAGE__->register_method({
                 description =>
                     "Object of mapped resources with additional information such if they're live migratable.",
             },
+            'has-dbus-vmstate' => {
+                type => 'boolean',
+                description => 'Whether the VM host supports migrating additional VM state, '
+                    . 'such as conntrack entries.',
+            },
         },
     },
     code => sub {
@@ -5211,6 +5225,7 @@ __PACKAGE__->register_method({
         $res->{local_resources} = $local_resources;
         $res->{'mapped-resources'} = [sort keys $mapped_resources->%*];
         $res->{'mapped-resource-info'} = $mapped_resources;
+        $res->{'has-dbus-vmstate'} = 1;
 
         return $res;
 
@@ -5283,6 +5298,12 @@ __PACKAGE__->register_method({
                 minimum => '0',
                 default => 'migrate limit from datacenter or storage config',
             },
+            'with-conntrack-state' => {
+                type => 'boolean',
+                optional => 1,
+                default => 0,
+                description => 'Whether to migrate conntrack entries for running VMs.',
+            },
         },
     },
     returns => {
@@ -5339,6 +5360,7 @@ __PACKAGE__->register_method({
         } else {
             warn "VM isn't running. Doing offline migration instead.\n" if $param->{online};
             $param->{online} = 0;
+            $param->{'with-conntrack-state'} = 0;
         }
 
         my $storecfg = PVE::Storage::config();
@@ -6682,6 +6704,7 @@ __PACKAGE__->register_method({
                             warn $@ if $@;
                         }
 
+                        PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($state->{vmid});
                         PVE::QemuServer::destroy_vm($state->{storecfg}, $state->{vmid}, 1);
                     }
 
@@ -6865,4 +6888,56 @@ __PACKAGE__->register_method({
     },
 });
 
+__PACKAGE__->register_method({
+    name => 'dbus_vmstate',
+    path => '{vmid}/dbus-vmstate',
+    method => 'POST',
+    proxyto => 'node',
+    description => 'Stop the dbus-vmstate helper for the given VM if running.',
+    permissions => {
+        check => ['perm', '/vms/{vmid}', ['VM.Migrate']],
+    },
+    parameters => {
+        additionalProperties => 0,
+        properties => {
+            node => get_standard_option('pve-node'),
+            vmid =>
+                get_standard_option('pve-vmid', { completion => \&PVE::QemuServer::complete_vmid }),
+            action => {
+                type => 'string',
+                enum => [qw(start stop)],
+                description => 'Action to perform on the DBus VMState helper.',
+                optional => 0,
+            },
+        },
+    },
+    returns => {
+        type => 'null',
+    },
+    code => sub {
+        my ($param) = @_;
+        my ($node, $vmid, $action) = $param->@{qw(node vmid action)};
+
+        my $nodename = PVE::INotify::nodename();
+        if ($node ne 'localhost' && $node ne $nodename) {
+            raise_param_exc(
+                { node => "node needs to be 'localhost' or local hostname '$nodename'" });
+        }
+
+        if (!PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+            raise_param_exc({ node => "VM $vmid not running locally on node '$nodename'" });
+        }
+
+        if ($action eq 'start') {
+            syslog('info', "starting dbus-vmstate helper for VM $vmid\n");
+            PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+        } elsif ($action eq 'stop') {
+            syslog('info', "stopping dbus-vmstate helper for VM $vmid\n");
+            PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+        } else {
+            die "unknown action $action\n";
+        }
+    },
+});
+
 1;
diff --git a/src/PVE/CLI/qm.pm b/src/PVE/CLI/qm.pm
index f3e9a702..3e778ad1 100755
--- a/src/PVE/CLI/qm.pm
+++ b/src/PVE/CLI/qm.pm
@@ -37,6 +37,7 @@ use PVE::QemuServer::ImportDisk;
 use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::QMPHelpers;
 use PVE::QemuServer::RunState;
+use PVE::QemuServer::DBusVMState;
 use PVE::QemuServer;
 
 use PVE::CLIHandler;
@@ -1054,6 +1055,10 @@ __PACKAGE__->register_method({
                     # vm was shutdown from inside the guest or crashed, doing api cleanup
                     PVE::QemuServer::vm_stop_cleanup($storecfg, $vmid, $conf, 0, 0, 1);
                 }
+
+                # ensure that no dbus-vmstate helper is left running in any case
+                PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+
                 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-stop');
 
                 $restart = eval { PVE::QemuServer::clear_reboot_request($vmid) };
diff --git a/src/PVE/QemuMigrate.pm b/src/PVE/QemuMigrate.pm
index f46bdf40..a6922198 100644
--- a/src/PVE/QemuMigrate.pm
+++ b/src/PVE/QemuMigrate.pm
@@ -35,6 +35,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::Memory qw(get_current_memory);
 use PVE::QemuServer::Network;
 use PVE::QemuServer::QMPHelpers;
+use PVE::QemuServer::DBusVMState;
 use PVE::QemuServer;
 
 use PVE::AbstractMigrate;
@@ -238,6 +239,27 @@ sub prepare {
         # Do not treat a suspended VM as paused, as it might wake up
         # during migration and remain paused after migration finishes.
         $self->{vm_was_paused} = 1 if PVE::QemuServer::vm_is_paused($vmid, 0);
+
+        if ($self->{opts}->{'with-conntrack-state'}) {
+            if ($self->{opts}->{remote}) {
+                # shouldn't be reached in normal circumstances anyway, as we prevent it on
+                # an API level
+                $self->log(
+                    'warn',
+                    'conntrack state migration not supported for remote migrations, '
+                        . 'active connections might get dropped',
+                );
+                $self->{opts}->{'with-conntrack-state'} = 0;
+            } else {
+                PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+            }
+        } else {
+            $self->log(
+                'warn',
+                'conntrack state migration not supported or disabled, '
+                    . 'active connections might get dropped',
+            );
+        }
     }
 
     my ($loc_res, $mapped_res, $missing_mappings_by_node) =
@@ -920,6 +942,14 @@ sub phase1_cleanup {
     if (my $err = $@) {
         $self->log('err', $err);
     }
+
+    if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+        # if the VM is running, that means we also tried to migrate additional
+        # state via our dbus-vmstate helper
+        # only need to locally stop it, on the target the VM cleanup will
+        # handle it
+        PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+    }
 }
 
 sub phase2_start_local_cluster {
@@ -966,6 +996,10 @@ sub phase2_start_local_cluster {
         push @$cmd, '--targetstorage', ($self->{opts}->{targetstorage} // '1');
     }
 
+    if ($self->{opts}->{'with-conntrack-state'}) {
+        push @$cmd, '--with-conntrack-state';
+    }
+
     my $spice_port;
     my $input = "nbd_protocol_version: $migrate->{nbd_proto_version}\n";
 
@@ -1560,6 +1594,14 @@ sub phase2_cleanup {
         $self->log('err', $err);
     }
 
+    if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+        # if the VM is running, that means we also tried to migrate additional
+        # state via our dbus-vmstate helper
+        # only need to locally stop it, on the target the VM cleanup will
+        # handle it
+        PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+    }
+
     if ($self->{tunnel}) {
         eval { PVE::Tunnel::finish_tunnel($self->{tunnel}); };
         if (my $err = $@) {
@@ -1683,6 +1725,37 @@ sub phase3_cleanup {
                 $self->log('info', "skipping guest fstrim, because VM is paused");
             }
         }
+
+        if ($self->{running} && $self->{opts}->{'with-conntrack-state'}) {
+            # if the VM is running, that means we also migrated additional
+            # state via our dbus-vmstate helper
+            $self->log('info', 'stopping migration dbus-vmstate helpers');
+
+            # first locally
+            my $num = PVE::QemuServer::DBusVMState::qemu_del_dbus_vmstate($vmid);
+            if (defined($num)) {
+                my $plural = $num != 1 ? "entries" : "entry";
+                $self->log('info', "migrated $num conntrack state $plural");
+            }
+
+            # .. and then remote
+            my $targetnode = $self->{node};
+            eval {
+                # FIXME: introduce proper way to call API methods on another node?
+                # See also e.g. pve-network/src/PVE/API2/Network/SDN.pm, which
+                # does something similar.
+                PVE::Tools::run_command([
+                    'pvesh',
+                    'create',
+                    "/nodes/$targetnode/qemu/$vmid/dbus-vmstate",
+                    '--action',
+                    'stop',
+                ]);
+            };
+            if (my $err = $@) {
+                $self->log('warn', "failed to stop dbus-vmstate on $targetnode: $err\n");
+            }
+        }
     }
 
     # close tunnel on successful migration, on error phase2_cleanup closed it
diff --git a/src/PVE/QemuServer.pm b/src/PVE/QemuServer.pm
index e7c98520..9f1219a9 100644
--- a/src/PVE/QemuServer.pm
+++ b/src/PVE/QemuServer.pm
@@ -86,6 +86,7 @@ use PVE::QemuServer::RunState;
 use PVE::QemuServer::StateFile;
 use PVE::QemuServer::USB;
 use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd);
+use PVE::QemuServer::DBusVMState;
 
 my $have_ha_config;
 eval {
@@ -5413,6 +5414,7 @@ sub vm_start {
 #   replicated_volumes => which volids should be re-used with bitmaps for nbd migration
 #   offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
 #       contained in config
+#   with_conntrack_state => whether to start the dbus-vmstate helper for conntrack state migration
 sub vm_start_nolock {
     my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
 
@@ -5783,6 +5785,10 @@ sub vm_start_nolock {
             }
         }
 
+        # conntrack migration is only supported for intra-cluster migrations
+        if ($migrate_opts->{with_conntrack_state} && !$migrate_opts->{remote_node}) {
+            PVE::QemuServer::DBusVMState::qemu_add_dbus_vmstate($vmid);
+        }
     } else {
         mon_cmd($vmid, "balloon", value => $conf->{balloon} * 1024 * 1024)
             if !$statefile && $conf->{balloon};
diff --git a/src/PVE/QemuServer/DBusVMState.pm b/src/PVE/QemuServer/DBusVMState.pm
new file mode 100644
index 00000000..36ff8168
--- /dev/null
+++ b/src/PVE/QemuServer/DBusVMState.pm
@@ -0,0 +1,125 @@
+package PVE::QemuServer::DBusVMState;
+
+use strict;
+use warnings;
+
+use PVE::SafeSyslog;
+use PVE::Systemd;
+use PVE::Tools;
+
+use constant {
+    DBUS_VMSTATE_EXE => '/usr/libexec/qemu-server/dbus-vmstate',
+};
+
+# Retrieves a property from an object from a specific interface name.
+# In contrast to accessing the property directly by using $obj->Property, this
+# actually respects the owner of the object and thus can be used for interfaces
+# with might have multiple (queued) owners on the DBus.
+my sub dbus_get_property {
+    my ($obj, $interface, $name) = @_;
+
+    my $con = $obj->{service}->get_bus()->get_connection();
+
+    my $call = $con->make_method_call_message(
+        $obj->{service}->get_service_name(),
+        $obj->{object_path},
+        'org.freedesktop.DBus.Properties',
+        'Get',
+    );
+
+    $call->set_destination($obj->get_service()->get_owner_name());
+    $call->append_args_list($interface, $name);
+
+    my @reply = $con->send_with_reply_and_block($call, 10 * 1000)->get_args_list();
+    return $reply[0];
+}
+
+# Starts the dbus-vmstate helper D-Bus service daemon and adds the needed
+# object to the appropriate QEMU instance for the specified VM.
+sub qemu_add_dbus_vmstate {
+    my ($vmid) = @_;
+
+    if (!PVE::QemuServer::Helpers::vm_running_locally($vmid)) {
+        die "VM $vmid must be running locally\n";
+    }
+
+    # In case some leftover, previous instance is running, stop it. Otherwise
+    # we run into errors, as a systemd service instance is unique.
+    if (defined(qemu_del_dbus_vmstate($vmid, quiet => 1))) {
+        warn "stopped previously running dbus-vmstate helper for VM $vmid\n";
+    }
+
+    # Start the actual service, which will then register itself with QEMU.
+    eval { PVE::Tools::run_command(['systemctl', 'start', "pve-dbus-vmstate\@$vmid"]) };
+    if (my $err = $@) {
+        die "failed to start DBus VMState service for VM $vmid: $err\n";
+    }
+}
+
+# Stops the dbus-vmstate helper D-Bus service daemon and removes the associated
+# object from QEMU for the specified VM.
+#
+# Returns the number of migrated conntrack entries, or undef in case of error.
+sub qemu_del_dbus_vmstate {
+    my ($vmid, %params) = @_;
+
+    my $num_entries = undef;
+    my $dbus = Net::DBus->system();
+    my $dbus_obj = $dbus->get_bus_object();
+
+    my $owners = eval { $dbus_obj->ListQueuedOwners('org.qemu.VMState1') };
+    if (my $err = $@) {
+        syslog('warn', "failed to retrieve org.qemu.VMState1 owners: $err\n")
+            if !$params{quiet};
+        return undef;
+    }
+
+    # Iterate through all name owners for 'org.qemu.VMState1' and compare
+    # the ID. If we found the corresponding one for $vmid, retrieve the
+    # `NumMigratedEntries` property and call the `Quit()` method on it.
+    # Any D-Bus interaction might die/croak, so try to be careful here and
+    # swallow any hard errors.
+    foreach my $owner (@$owners) {
+        my $service = eval { Net::DBus::RemoteService->new($dbus, $owner, 'org.qemu.VMState1') };
+        if (my $err = $@) {
+            syslog('warn', "failed to get org.qemu.VMState1 service from D-Bus $owner: $err\n")
+                if !$params{quiet};
+            next;
+        }
+
+        my $object = eval { $service->get_object('/org/qemu/VMState1') };
+        if (my $err = $@) {
+            syslog('warn', "failed to get /org/qemu/VMState1 object from D-Bus $owner: $err\n")
+                if !$params{quiet};
+            next;
+        }
+
+        my $id = eval { dbus_get_property($object, 'org.qemu.VMState1', 'Id') };
+        if (defined($id) && $id eq "pve-vmstate-$vmid") {
+            my $helperobj =
+                eval { $service->get_object('/org/qemu/VMState1', 'com.proxmox.VMStateHelper') };
+            if (my $err = $@) {
+                syslog(
+                    'warn',
+                    "found dbus-vmstate helper, but does not implement com.proxmox.VMStateHelper? ($err)\n",
+                ) if !$params{quiet};
+                last;
+            }
+
+            $num_entries = eval {
+                dbus_get_property($object, 'com.proxmox.VMStateHelper', 'NumMigratedEntries');
+            };
+            eval { $object->Quit() };
+            if (my $err = $@) {
+                syslog('warn', "failed to call quit on dbus-vmstate for VM $vmid: $err\n")
+                    if !$params{quiet};
+            }
+
+            last;
+        }
+    }
+
+    return $num_entries;
+}
+
+1;
diff --git a/src/PVE/QemuServer/Makefile b/src/PVE/QemuServer/Makefile
index ca30a0ad..23c136bc 100644
--- a/src/PVE/QemuServer/Makefile
+++ b/src/PVE/QemuServer/Makefile
@@ -8,6 +8,7 @@ SOURCES=Agent.pm	\
 	CGroup.pm	\
 	Cloudinit.pm	\
 	CPUConfig.pm	\
+	DBusVMState.pm	\
 	Drive.pm	\
 	Helpers.pm	\
 	ImportDisk.pm	\
-- 
2.49.0





More information about the pve-devel mailing list