[pve-devel] [PATCH qemu-server v2 34/49] block job: add blockdev mirror
Fiona Ebner
f.ebner at proxmox.com
Tue Jul 1 17:40:54 CEST 2025
With blockdev-mirror, it is possible to change the aio setting on the
fly and this is useful for migrations between storages where one wants
to use io_uring by default and the other doesn't.
The node below the top throttle node needs to be replaced so that the
limits stay intact and that the top node still has the drive ID as the
node name. That node is not necessarily a format node. For example, it
could also be a zeroinit node from an earlier mirror operation. So
query QEMU itself.
QEMU automatically drops nodes after mirror only if they were
implicitly added, i.e. not explicitly added via blockdev-add. Since a
previous mirror target is explicitly added (and not just implicitly as
the child of a top throttle node), it is necessary to detach the
appropriate block node after mirror.
Already mock blockdev_mirror in the tests.
Co-developed-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
---
Changes in v2:
* Use assert parameter for get_node_name_below_throttle().
* Use attach() helper with the new 'no-throttle' option.
* More compact POD.
src/PVE/QemuServer/BlockJob.pm | 141 ++++++++++++++++++++++
src/test/MigrationTest/QemuMigrateMock.pm | 8 ++
2 files changed, 149 insertions(+)
diff --git a/src/PVE/QemuServer/BlockJob.pm b/src/PVE/QemuServer/BlockJob.pm
index 68d0431f..9131780e 100644
--- a/src/PVE/QemuServer/BlockJob.pm
+++ b/src/PVE/QemuServer/BlockJob.pm
@@ -4,12 +4,14 @@ use strict;
use warnings;
use JSON;
+use Storable qw(dclone);
use PVE::Format qw(render_duration render_bytes);
use PVE::RESTEnvironment qw(log_warn);
use PVE::Storage;
use PVE::QemuServer::Agent qw(qga_check_running);
+use PVE::QemuServer::Blockdev;
use PVE::QemuServer::Drive qw(checked_volume_format);
use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::RunState;
@@ -187,10 +189,17 @@ sub qemu_drive_mirror_monitor {
print "$job_id: Completing block job...\n";
my $completion_command;
+ # For blockdev, need to detach appropriate node. QEMU will only drop it if
+ # it was implicitly added (e.g. as the child of a top throttle node), but
+ # not if it was explicitly added via blockdev-add (e.g. as a previous mirror
+ # target).
+ my $detach_node_name;
if ($completion eq 'complete') {
$completion_command = 'block-job-complete';
+ $detach_node_name = $jobs->{$job_id}->{'source-node-name'};
} elsif ($completion eq 'cancel') {
$completion_command = 'block-job-cancel';
+ $detach_node_name = $jobs->{$job_id}->{'target-node-name'};
} else {
die "invalid completion value: $completion\n";
}
@@ -202,6 +211,9 @@ sub qemu_drive_mirror_monitor {
} elsif ($err) {
die "$job_id: block job cannot be completed - $err\n";
} else {
+ $jobs->{$job_id}->{'detach-node-name'} = $detach_node_name
+ if $detach_node_name;
+
print "$job_id: Completed successfully.\n";
$jobs->{$job_id}->{complete} = 1;
}
@@ -347,6 +359,135 @@ sub qemu_drive_mirror_switch_to_active_mode {
}
}
+=pod
+
+=head3 blockdev_mirror
+
+ blockdev_mirror($source, $dest, $jobs, $completion, $options)
+
+Mirrors the volume of a running VM specified by C<$source> to destination C<$dest>.
+
+=over
+
+=item C<$source>: The source information consists of:
+
+=over
+
+=item C<< $source->{vmid} >>: The ID of the running VM the source volume belongs to.
+
+=item C<< $source->{drive} >>: The drive configuration of the source volume as currently attached to
+the VM.
+
+=item C<< $source->{bitmap} >>: (optional) Use incremental mirroring based on the specified bitmap.
+
+=back
+
+=item C<$dest>: The destination information consists of:
+
+=over
+
+=item C<< $dest->{volid} >>: The volume ID of the target volume.
+
+=item C<< $dest->{vmid} >>: (optional) The ID of the VM the target volume belongs to. Defaults to
+C<< $source->{vmid} >>.
+
+=item C<< $dest->{'zero-initialized'} >>: (optional) True, if the target volume is zero-initialized.
+
+=back
+
+=item C<$jobs>: (optional) Other jobs in the transaction when multiple volumes should be mirrored.
+All jobs must be ready before completion can happen.
+
+=item C<$completion>: Completion mode, default is C<complete>:
+
+=over
+
+=item C<complete>: Wait until all jobs are ready, block-job-complete them (default). This means
+switching the orignal drive to use the new target.
+
+=item C<cancel>: Wait until all jobs are ready, block-job-cancel them. This means not switching thex
+original drive to use the new target.
+
+=item C<skip>: Wait until all jobs are ready, return with block jobs in ready state.
+
+=item C<auto>: Wait until all jobs disappear, only use for jobs which complete automatically.
+
+=back
+
+=item C<$options>: Further options:
+
+=over
+
+=item C<< $options->{'guest-agent'} >>: If the guest agent is configured for the VM. It will be used
+to freeze and thaw the filesystems for consistency when the target belongs to a different VM.
+
+=item C<< $options->{'bwlimit'} >>: The bandwidth limit to use for the mirroring operation, in
+KiB/s.
+
+=back
+
+=back
+
+=cut
+
+sub blockdev_mirror {
+ my ($source, $dest, $jobs, $completion, $options) = @_;
+
+ my $vmid = $source->{vmid};
+
+ my $drive_id = PVE::QemuServer::Drive::get_drive_id($source->{drive});
+ my $device_id = "drive-$drive_id";
+
+ my $storecfg = PVE::Storage::config();
+
+ # Need to replace the node below the top node. This is not necessarily a format node, for
+ # example, it can also be a zeroinit node by a previous mirror! So query QEMU itself.
+ my $source_node_name =
+ PVE::QemuServer::Blockdev::get_node_name_below_throttle($vmid, $device_id, 1);
+
+ # Copy original drive config (aio, cache, discard, ...):
+ my $dest_drive = dclone($source->{drive});
+ delete($dest_drive->{format}); # cannot use the source's format
+ $dest_drive->{file} = $dest->{volid};
+
+ # Mirror happens below the throttle filter, so if the target is for the same VM, it will end up
+ # below the source's throttle filter, which is inserted for the drive device.
+ my $attach_dest_opts = { 'no-throttle' => 1 };
+ $attach_dest_opts->{'zero-initialized'} = 1 if $dest->{'zero-initialized'};
+
+ # Note that if 'aio' is not explicitly set, i.e. default, it can change if source and target
+ # don't both allow or both not allow 'io_uring' as the default.
+ my $target_node_name =
+ PVE::QemuServer::Blockdev::attach($storecfg, $vmid, $dest_drive, $attach_dest_opts);
+
+ $jobs = {} if !$jobs;
+ my $jobid = "mirror-$drive_id";
+ $jobs->{$jobid} = {
+ 'source-node-name' => $source_node_name,
+ 'target-node-name' => $target_node_name,
+ };
+
+ my $qmp_opts = common_mirror_qmp_options(
+ $device_id, $target_node_name, $source->{bitmap}, $options->{bwlimit},
+ );
+
+ $qmp_opts->{'job-id'} = "$jobid";
+ $qmp_opts->{replaces} = "$source_node_name";
+
+ # if a job already runs for this device we get an error, catch it for cleanup
+ eval { mon_cmd($vmid, "blockdev-mirror", $qmp_opts->%*); };
+ if (my $err = $@) {
+ eval { qemu_blockjobs_cancel($vmid, $jobs) };
+ log_warn("unable to cancel block jobs - $@");
+ eval { PVE::QemuServer::Blockdev::detach($vmid, $target_node_name); };
+ log_warn("unable to delete blockdev '$target_node_name' - $@");
+ die "error starting blockdev mirrror - $err";
+ }
+ qemu_drive_mirror_monitor(
+ $vmid, $dest->{vmid}, $jobs, $completion, $options->{'guest-agent'}, 'mirror',
+ );
+}
+
sub mirror {
my ($source, $dest, $jobs, $completion, $options) = @_;
diff --git a/src/test/MigrationTest/QemuMigrateMock.pm b/src/test/MigrationTest/QemuMigrateMock.pm
index 25a4f9b2..c52df84b 100644
--- a/src/test/MigrationTest/QemuMigrateMock.pm
+++ b/src/test/MigrationTest/QemuMigrateMock.pm
@@ -9,6 +9,7 @@ use Test::MockModule;
use MigrationTest::Shared;
use PVE::API2::Qemu;
+use PVE::QemuServer::Drive;
use PVE::Storage;
use PVE::Tools qw(file_set_contents file_get_contents);
@@ -167,6 +168,13 @@ $qemu_server_blockjob_module->mock(
common_mirror_mock($vmid, $drive_id);
},
+ blockdev_mirror => sub {
+ my ($source, $dest, $jobs, $completion, $options) = @_;
+
+ my $drive_id = PVE::QemuServer::Drive::get_drive_id($source->{drive});
+
+ common_mirror_mock($source->{vmid}, $drive_id);
+ },
qemu_drive_mirror_monitor => sub {
my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
--
2.47.2
More information about the pve-devel
mailing list