[pve-devel] [PATCH v3 qemu-server 08/11] blockdev: convert drive_mirror to blockdev_mirror
Fabian Grünbichler
f.gruenbichler at proxmox.com
Wed Jan 8 16:19:40 CET 2025
> Alexandre Derumier via pve-devel <pve-devel at lists.proxmox.com> hat am 16.12.2024 10:12 CET geschrieben:
> Signed-off-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
> ---
> PVE/QemuMigrate.pm | 2 +-
> PVE/QemuServer.pm | 106 +++++++++++++++++++++++++++++++++++----------
> 2 files changed, 83 insertions(+), 25 deletions(-)
>
> diff --git a/PVE/QemuMigrate.pm b/PVE/QemuMigrate.pm
> index ed5ede30..88627ce4 100644
> --- a/PVE/QemuMigrate.pm
> +++ b/PVE/QemuMigrate.pm
> @@ -1134,7 +1134,7 @@ sub phase2 {
> my $bitmap = $target->{bitmap};
>
> $self->log('info', "$drive: start migration to $nbd_uri");
> - PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 'skip', undef, $bwlimit, $bitmap);
> + PVE::QemuServer::qemu_drive_mirror($vmid, $drive, $source_drive, $nbd_uri, $vmid, undef, $self->{storage_migration_jobs}, 'skip', undef, $bwlimit, $bitmap);
> }
>
> if (PVE::QemuServer::QMPHelpers::runs_at_least_qemu_version($vmid, 8, 2)) {
> diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
> index 6bebb906..3d7c41ee 100644
> --- a/PVE/QemuServer.pm
> +++ b/PVE/QemuServer.pm
> @@ -8184,59 +8184,85 @@ sub qemu_img_convert {
> }
>
> sub qemu_drive_mirror {
> - my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
> + my ($vmid, $driveid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
the $driveid is contained in $drive (in the form of index and interface). this would still be a breaking change since $drive before was the $driveid, and now it's the parsed drive ;)
>
> $jobs = {} if !$jobs;
> + my $deviceid = "drive-$driveid";
> + my $dst_format;
> + my $dst_path = $dst_volid;
> + my $jobid = "mirror-$deviceid";
> + $jobs->{$jobid} = {};
>
> - my $qemu_target;
> - my $format;
> - $jobs->{"drive-$drive"} = {};
> + my $storecfg = PVE::Storage::config();
>
> if ($dst_volid =~ /^nbd:/) {
> - $qemu_target = $dst_volid;
> - $format = "nbd";
> + $dst_format = "nbd";
> } else {
> - my $storecfg = PVE::Storage::config();
> -
> - $format = checked_volume_format($storecfg, $dst_volid);
> -
> - my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
> -
> - $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
> + $dst_format = checked_volume_format($storecfg, $dst_volid);
> + $dst_path = PVE::Storage::path($storecfg, $dst_volid);
> + }
> +
> + # copy original drive config (aio,cache,discard,...)
> + my $dst_drive = dclone($drive);
> + $dst_drive->{format} = $dst_format;
> + $dst_drive->{file} = $dst_path;
> + $dst_drive->{zeroinit} = 1 if $is_zero_initialized;
> + #improve: if target storage don't support aio uring,change it to default native
> + #and remove clone_disk_check_io_uring()
> +
> + #add new block device
> + my $nodes = get_blockdev_nodes($vmid);
> +
> + my $target_fmt_nodename = get_blockdev_nextid("fmt-$deviceid", $nodes);
> + my $target_file_nodename = get_blockdev_nextid("file-$deviceid", $nodes);
> + my $target_file_blockdev = generate_file_blockdev($storecfg, $dst_drive, $target_file_nodename);
> + my $target_nodename = undef;
> +
> + if ($dst_format eq 'nbd') {
> + #nbd file don't have fmt
> + $target_nodename = $target_file_nodename;
> + PVE::QemuServer::Monitor::mon_cmd($vmid, 'blockdev-add', %$target_file_blockdev);
> + } else {
> + $target_nodename = $target_fmt_nodename;
> + my $target_fmt_blockdev = generate_format_blockdev($storecfg, $dst_drive, $target_fmt_nodename, $target_file_blockdev);
> + PVE::QemuServer::Monitor::mon_cmd($vmid, 'blockdev-add', %$target_fmt_blockdev);
> }
>
> + #we replace the original src_fmt node in the blockdev graph
> + my $src_fmt_nodename = find_fmt_nodename_drive($storecfg, $vmid, $drive, $nodes);
> my $opts = {
> + 'job-id' => $jobid,
> timeout => 10,
> - device => "drive-$drive",
> - mode => "existing",
> + device => $deviceid,
> + replaces => $src_fmt_nodename,
> sync => "full",
> - target => $qemu_target,
> + target => $target_nodename,
> 'auto-dismiss' => JSON::false,
> };
> - $opts->{format} = $format if $format;
>
> if (defined($src_bitmap)) {
> $opts->{sync} = 'incremental';
> - $opts->{bitmap} = $src_bitmap;
> + $opts->{bitmap} = $src_bitmap; ##FIXME: how to handle bitmap ? special proxmox patch ?
> print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
> }
>
> if (defined($bwlimit)) {
> $opts->{speed} = $bwlimit * 1024;
> - print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
> + print "drive mirror is starting for $deviceid with bandwidth limit: ${bwlimit} KB/s\n";
> } else {
> - print "drive mirror is starting for drive-$drive\n";
> + print "drive mirror is starting for $deviceid\n";
> }
>
> # if a job already runs for this device we get an error, catch it for cleanup
> - eval { mon_cmd($vmid, "drive-mirror", %$opts); };
> + eval { mon_cmd($vmid, "blockdev-mirror", %$opts); };
> +
> if (my $err = $@) {
> eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
> + #FIXME: delete blockdev after job cancel
wouldn't we also need to keep track of the device IDs and pass those to the monitor invocation below? if the block job fails or gets canceled, we also need cleanup there..
> warn "$@\n" if $@;
> die "mirroring error: $err\n";
> }
> -
> - qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
> + qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga, 'mirror');
> }
>
> # $completion can be either
> @@ -8595,7 +8621,7 @@ sub clone_disk {
>
> my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
> if ($use_drive_mirror) {
> - qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
> + qemu_drive_mirror($vmid, $src_drivename, $drive, $newvolid, $newvmid, $sparseinit, $jobs,
> $completion, $qga, $bwlimit);
> } else {
> if ($dst_drivename eq 'efidisk0') {
> @@ -9130,6 +9156,38 @@ sub delete_ifaces_ipams_ips {
> }
> }
>
> +sub find_fmt_nodename_drive {
> + my ($storecfg, $vmid, $drive, $nodes) = @_;
> +
> + my $volid = $drive->{file};
> + my $format = checked_volume_format($storecfg, $volid);
$format is not used?
> + my $path = PVE::Storage::path($storecfg, $volid);
is this guaranteed to be stable? also across versions? and including external storage plugins?
> +
> + my $node = find_blockdev_node($nodes, $path, 'fmt');
that one is only added in a later patch.. but I don't think lookups by path are a good idea, we should probably have a deterministic node naming concept instead? e.g., encode the drive + snapshot name?
> + return $node->{'node-name'};
> +}
> +
> +sub get_blockdev_nextid {
> + my ($nodename, $nodes) = @_;
> + my $version = 0;
> + for my $nodeid (keys %$nodes) {
> + if ($nodeid =~ m/^$nodename-(\d+)$/) {
> + my $current_version = $1;
> + $version = $current_version if $current_version >= $version;
> + }
> + }
> + $version++;
> + return "$nodename-$version";
since we shouldn't ever have more than one job for a drive running (right?), couldn't we just have a deterministic name for this? that would also simplify cleanup, including cleanup of a failed cleanup ;)
> +}
> +
> +sub get_blockdev_nodes {
> + my ($vmid) = @_;
> +
> + my $nodes = PVE::QemuServer::Monitor::mon_cmd($vmid, "query-named-block-nodes");
> + $nodes = { map { $_->{'node-name'} => $_ } $nodes->@* };
> + return $nodes;
> +}
> +
> sub encode_json_ordered {
> return JSON->new->canonical->allow_nonref->encode( $_[0] );
> }
> --
> 2.39.5
More information about the pve-devel
mailing list