[pve-devel] [PATCH qemu-server 17/31] block job: add blockdev mirror

Fabian Grünbichler f.gruenbichler at proxmox.com
Mon Jun 30 12:15:02 CEST 2025


On June 27, 2025 5:57 pm, Fiona Ebner wrote:
> With blockdev-mirror, it is possible to change the aio setting on the
> fly and this is useful for migrations between storages where one wants
> to use io_uring by default and the other doesn't.
> 
> The node below the top throttle node needs to be replaced so that the
> limits stay intact and that the top node still has the drive ID as the
> node name. That node is not necessarily a format node. For example, it
> could also be a zeroinit node from an earlier mirror operation. So
> query QEMU itself.
> 
> QEMU automatically drops nodes after mirror only if they were
> implicitly added, i.e. not explicitly added via blockdev-add. Since a
> previous mirror target is explicitly added (and not just implicitly as
> the child of a top throttle node), it is necessary to detach the
> appropriate block node after mirror.
> 
> Already mock blockdev_mirror in the tests.
> 
> Co-developed-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
> Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
> ---
> 
> NOTE: Changes since last series:
> * Query QEMU for file child.
> * Remove appropriate node after mirror.
> * Delete format property from cloned drive hash for destination.
> 
>  src/PVE/QemuServer/BlockJob.pm            | 176 ++++++++++++++++++++++
>  src/test/MigrationTest/QemuMigrateMock.pm |   8 +
>  2 files changed, 184 insertions(+)
> 
> diff --git a/src/PVE/QemuServer/BlockJob.pm b/src/PVE/QemuServer/BlockJob.pm
> index 68d0431f..212d6a4f 100644
> --- a/src/PVE/QemuServer/BlockJob.pm
> +++ b/src/PVE/QemuServer/BlockJob.pm
> @@ -4,12 +4,14 @@ use strict;
>  use warnings;
>  
>  use JSON;
> +use Storable qw(dclone);
>  
>  use PVE::Format qw(render_duration render_bytes);
>  use PVE::RESTEnvironment qw(log_warn);
>  use PVE::Storage;
>  
>  use PVE::QemuServer::Agent qw(qga_check_running);
> +use PVE::QemuServer::Blockdev;
>  use PVE::QemuServer::Drive qw(checked_volume_format);
>  use PVE::QemuServer::Monitor qw(mon_cmd);
>  use PVE::QemuServer::RunState;
> @@ -187,10 +189,17 @@ sub qemu_drive_mirror_monitor {
>                          print "$job_id: Completing block job...\n";
>  
>                          my $completion_command;
> +                        # For blockdev, need to detach appropriate node. QEMU will only drop it if
> +                        # it was implicitly added (e.g. as the child of a top throttle node), but
> +                        # not if it was explicitly added via blockdev-add (e.g. as a previous mirror
> +                        # target).
> +                        my $detach_node_name;
>                          if ($completion eq 'complete') {
>                              $completion_command = 'block-job-complete';
> +                            $detach_node_name = $jobs->{$job_id}->{'source-node-name'};
>                          } elsif ($completion eq 'cancel') {
>                              $completion_command = 'block-job-cancel';
> +                            $detach_node_name = $jobs->{$job_id}->{'target-node-name'};
>                          } else {
>                              die "invalid completion value: $completion\n";
>                          }
> @@ -202,6 +211,9 @@ sub qemu_drive_mirror_monitor {
>                          } elsif ($err) {
>                              die "$job_id: block job cannot be completed - $err\n";
>                          } else {
> +                            $jobs->{$job_id}->{'detach-node-name'} = $detach_node_name
> +                                if $detach_node_name;
> +
>                              print "$job_id: Completed successfully.\n";
>                              $jobs->{$job_id}->{complete} = 1;
>                          }
> @@ -347,6 +359,170 @@ sub qemu_drive_mirror_switch_to_active_mode {
>      }
>  }
>  
> +=pod
> +
> +=head3 blockdev_mirror
> +
> +    blockdev_mirror($source, $dest, $jobs, $completion, $options)
> +
> +Mirrors the volume of a running VM specified by C<$source> to destination C<$dest>.
> +
> +=over
> +
> +=item C<$source>
> +
> +The source information consists of:
> +
> +=over
> +
> +=item C<< $source->{vmid} >>
> +
> +The ID of the running VM the source volume belongs to.
> +
> +=item C<< $source->{drive} >>
> +
> +The drive configuration of the source volume as currently attached to the VM.
> +
> +=item C<< $source->{bitmap} >>
> +
> +(optional) Use incremental mirroring based on the specified bitmap.
> +
> +=back
> +
> +=item C<$dest>
> +
> +The destination information consists of:
> +
> +=over
> +
> +=item C<< $dest->{volid} >>
> +
> +The volume ID of the target volume.
> +
> +=item C<< $dest->{vmid} >>
> +
> +(optional) The ID of the VM the target volume belongs to. Defaults to C<< $source->{vmid} >>.
> +
> +=item C<< $dest->{'zero-initialized'} >>
> +
> +(optional) True, if the target volume is zero-initialized.
> +
> +=back
> +
> +=item C<$jobs>
> +
> +(optional) Other jobs in the transaction when multiple volumes should be mirrored. All jobs must be
> +ready before completion can happen.
> +
> +=item C<$completion>
> +
> +Completion mode, default is C<complete>:
> +
> +=over
> +
> +=item C<complete>
> +
> +Wait until all jobs are ready, block-job-complete them (default). This means switching the orignal
> +drive to use the new target.
> +
> +=item C<cancel>
> +
> +Wait until all jobs are ready, block-job-cancel them. This means not switching the original drive
> +to use the new target.
> +
> +=item C<skip>
> +
> +Wait until all jobs are ready, return with block jobs in ready state.
> +
> +=item C<auto>
> +
> +Wait until all jobs disappear, only use for jobs which complete automatically.
> +
> +=back
> +
> +=item C<$options>
> +
> +Further options:
> +
> +=over
> +
> +=item C<< $options->{'guest-agent'} >>
> +
> +If the guest agent is configured for the VM. It will be used to freeze and thaw the filesystems for
> +consistency when the target belongs to a different VM.
> +
> +=item C<< $options->{'bwlimit'} >>
> +
> +The bandwidth limit to use for the mirroring operation, in KiB/s.
> +
> +=back
> +
> +=back
> +
> +=cut
> +
> +sub blockdev_mirror {
> +    my ($source, $dest, $jobs, $completion, $options) = @_;
> +
> +    my $vmid = $source->{vmid};
> +
> +    my $drive_id = PVE::QemuServer::Drive::get_drive_id($source->{drive});
> +    my $device_id = "drive-$drive_id";
> +
> +    my $storecfg = PVE::Storage::config();
> +
> +    # Need to replace the node below the top node. This is not necessarily a format node, for
> +    # example, it can also be a zeroinit node by a previous mirror! So query QEMU itself.
> +    my $child_info = mon_cmd($vmid, 'block-node-query-file-child', 'node-name' => $device_id);
> +    my $source_node_name = $child_info->{'node-name'};

isn't this semantically equivalent to get_node_name_below_throttle? that
one does a few more checks and is slightly more expensive, but
validating that the top node is a throttle node as expected might be a
good thing here as well?

depending on how we see things, we might want to add a `$assert`
parameter to that helper though for call sites that are only happening
in blockdev context - to avoid the fallback in case the top node is not
a throttle group, and instead die?

> +
> +    # Copy original drive config (aio, cache, discard, ...):
> +    my $dest_drive = dclone($source->{drive});
> +    delete($dest_drive->{format}); # cannot use the source's format
> +    $dest_drive->{file} = $dest->{volid};
> +
> +    my $generate_blockdev_opts = {};
> +    $generate_blockdev_opts->{'zero-initialized'} = 1 if $dest->{'zero-initialized'};
> +
> +    # Note that if 'aio' is not explicitly set, i.e. default, it can change if source and target
> +    # don't both allow or both not allow 'io_uring' as the default.
> +    my $target_drive_blockdev = PVE::QemuServer::Blockdev::generate_drive_blockdev(
> +        $storecfg, $dest_drive, $generate_blockdev_opts,
> +    );
> +    # Top node is the throttle group, must use the file child.
> +    my $target_blockdev = $target_drive_blockdev->{file};

should we have an option for generate_drive_blockdev to skip the
throttle group/top node? then we could just use Blockdev::attach here..

at least if we make that return the top-level node name or blockdev..

> +
> +    PVE::QemuServer::Monitor::mon_cmd($vmid, 'blockdev-add', $target_blockdev->%*);
> +    my $target_node_name = $target_blockdev->{'node-name'};
> +
> +    $jobs = {} if !$jobs;
> +    my $jobid = "mirror-$drive_id";
> +    $jobs->{$jobid} = {
> +        'source-node-name' => $source_node_name,
> +        'target-node-name' => $target_node_name,
> +    };
> +
> +    my $qmp_opts = common_mirror_qmp_options(
> +        $device_id, $target_node_name, $source->{bitmap}, $options->{bwlimit},
> +    );
> +
> +    $qmp_opts->{'job-id'} = "$jobid";
> +    $qmp_opts->{replaces} = "$source_node_name";
> +
> +    # if a job already runs for this device we get an error, catch it for cleanup
> +    eval { mon_cmd($vmid, "blockdev-mirror", $qmp_opts->%*); };
> +    if (my $err = $@) {
> +        eval { qemu_blockjobs_cancel($vmid, $jobs) };
> +        log_warn("unable to cancel block jobs - $@");
> +        eval { PVE::QemuServer::Blockdev::detach($vmid, $target_node_name); };
> +        log_warn("unable to delete blockdev '$target_node_name' - $@");
> +        die "error starting blockdev mirrror - $err";
> +    }
> +    qemu_drive_mirror_monitor(
> +        $vmid, $dest->{vmid}, $jobs, $completion, $options->{'guest-agent'}, 'mirror',
> +    );
> +}
> +
>  sub mirror {
>      my ($source, $dest, $jobs, $completion, $options) = @_;
>  
> diff --git a/src/test/MigrationTest/QemuMigrateMock.pm b/src/test/MigrationTest/QemuMigrateMock.pm
> index 25a4f9b2..c52df84b 100644
> --- a/src/test/MigrationTest/QemuMigrateMock.pm
> +++ b/src/test/MigrationTest/QemuMigrateMock.pm
> @@ -9,6 +9,7 @@ use Test::MockModule;
>  use MigrationTest::Shared;
>  
>  use PVE::API2::Qemu;
> +use PVE::QemuServer::Drive;
>  use PVE::Storage;
>  use PVE::Tools qw(file_set_contents file_get_contents);
>  
> @@ -167,6 +168,13 @@ $qemu_server_blockjob_module->mock(
>  
>          common_mirror_mock($vmid, $drive_id);
>      },
> +    blockdev_mirror => sub {
> +        my ($source, $dest, $jobs, $completion, $options) = @_;
> +
> +        my $drive_id = PVE::QemuServer::Drive::get_drive_id($source->{drive});
> +
> +        common_mirror_mock($source->{vmid}, $drive_id);
> +    },
>      qemu_drive_mirror_monitor => sub {
>          my ($vmid, $vmiddst, $jobs, $completion, $qga) = @_;
>  
> -- 
> 2.47.2
> 
> 
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
> 
> 
> 




More information about the pve-devel mailing list