[pve-devel] [PATCH qemu-server 20/22] blockdev: add helpers to generate blockdev commandline
Fabian Grünbichler
f.gruenbichler at proxmox.com
Fri Jun 13 13:35:59 CEST 2025
On June 12, 2025 4:02 pm, Fiona Ebner wrote:
> The drive device and node structure is:
> front-end device {ide-hd,scsi-hd,virtio-blk-pci} (id=$drive_id)
> - throttle node (node-name=$drive_id)
should we prefix this is well to not "use" the basic name (e.g., in case
in the future a new "topmost" node comes along)?
> - format node (node-name=f$encoded-info)
> - file node (node-name=e$encoded-info)
>
> The node-name can only be 31 characters long and needs to start with a
> letter. The throttle node will stay inserted below the front-end
> device. The other nodes might change however, because of drive
> mirroring and similar. There currently are no good helpers to
> query/walk the block graph via QMP, x-debug-query-block-graph is
> experimental and for debugging only. Therefore, necessary information
> is encoded in the node name to be able to find it again. In
> particular, this is the volume ID, the drive ID and optionally a
> snapshot name. As long as the configuration file matches with the
> running instance, this is enough to find the correct node for
> block operations like mirror and resize.
>
> The 'snapshot' option, for QEMU's snapshot mode, i.e. writes are only
> temporary, is not yet supported.
>
> Originally-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
> [FE: split up patch
> expand commit message
> explicitly test for drivers with aio setting
> improve readonly handling
> improve CD-ROM handling
> fix failure for storage named 'nbd' by always using full regex
> improve node name generation
> fail when drive->{snapshot} is set]
> Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
> ---
>
> If we do not get around to implement the 'snapshot' option in time for
> PVE 9, we can still fall back to the legacy drive for that (and warn
> users that not all operations might work with such drives).
>
> PVE/QemuServer/Blockdev.pm | 172 ++++++++++++++++++++++++++++++++++++-
> PVE/QemuServer/Makefile | 1 +
> 2 files changed, 172 insertions(+), 1 deletion(-)
>
> diff --git a/PVE/QemuServer/Blockdev.pm b/PVE/QemuServer/Blockdev.pm
> index b1150141..2d3760f0 100644
> --- a/PVE/QemuServer/Blockdev.pm
> +++ b/PVE/QemuServer/Blockdev.pm
> @@ -3,7 +3,42 @@ package PVE::QemuServer::Blockdev;
> use strict;
> use warnings;
>
> -use PVE::QemuServer::Drive;
> +use Digest::SHA;
> +use Fcntl qw(S_ISBLK S_ISCHR);
> +use File::stat;
> +use JSON;
> +
> +use PVE::Storage;
> +
> +use PVE::QemuServer::Drive qw(drive_is_cdrom);
> +
> +my sub get_node_name {
> + my ($type, $drive_id, $volid, $snap) = @_;
> +
> + my $info = "drive=$drive_id,";
> + $info .= "snap=$snap," if defined($snap);
> + $info .= "volid=$volid";
> +
> + my $hash = substr(Digest::SHA::sha256_hex($info), 0, 30);
> +
> + my $prefix = "";
> + if ($type eq 'fmt') {
> + $prefix = 'f';
> + } elsif ($type eq 'file') {
> + $prefix = 'e';
> + } else {
> + die "unknown node type '$type'";
> + }
> + # node-name must start with an alphabetical character
> + return "${prefix}${hash}";
> +}
> +
> +my sub read_only_json_option {
> + my ($drive, $options) = @_;
> +
> + return JSON::true if $drive->{ro} || drive_is_cdrom($drive) || $options->{'read-only'};
> + return JSON::false;
should we maybe have a generic jsonify-helper instead? this is only
called twice, but we have (counting this as well) three call sites here
that basically do
foo ? JSON::true : JSON::false
which could become `json_bool(foo)`
with a few more in PVE::VZDump::QemuServer and other qemu-server
modules..
we could still have a drive_is_ro helper in any case ;)
> +}
>
> sub generate_throttle_group {
> my ($drive) = @_;
> @@ -41,4 +76,139 @@ sub generate_throttle_group {
> };
> }
>
> +sub generate_blockdev_drive_cache {
> + my ($drive, $scfg) = @_;
> +
> + my $cache_direct = PVE::QemuServer::Drive::drive_uses_cache_direct($drive, $scfg);
> + my $cache = {};
> + $cache->{direct} = $cache_direct ? JSON::true : JSON::false;
> + $cache->{'no-flush'} = $drive->{cache} && $drive->{cache} eq 'unsafe' ? JSON::true : JSON::false;
> + return $cache;
> +}
> +
> +sub generate_file_blockdev {
> + my ($storecfg, $drive, $options) = @_;
> +
> + my $blockdev = {};
> + my $scfg = undef;
> +
> + die "generate_file_blockdev called without volid/path\n" if !$drive->{file};
> + die "generate_file_blockdev called with 'none'\n" if $drive->{file} eq 'none';
> + # FIXME use overlay and new config option to define storage for temp write device
> + die "'snapshot' option is not yet supported for '-blockdev'\n" if $drive->{snapshot};
> +
> + my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive);
> +
> + if ($drive->{file} eq 'cdrom') {
> + my $path = PVE::QemuServer::Drive::get_iso_path($storecfg, $drive->{file});
> + $blockdev = { driver => 'host_cdrom', filename => $path };
> + } elsif ($drive->{file} =~ m|^/|) {
> + my $path = $drive->{file};
> + # The 'file' driver only works for regular files. The check below is taken from
> + # block/file-posix.c:hdev_probe_device() in QEMU. To detect CD-ROM host devices, QEMU issues
> + # an ioctl, while the code here relies on the media=cdrom flag instead.
> + my $st = File::stat::stat($path) or die "stat for '$path' failed - $!\n";
> + my $driver = 'file';
> + if (S_ISCHR($st->mode) || S_ISBLK($st->mode)) {
> + $driver = drive_is_cdrom($drive) ? 'host_cdrom' : 'host_device';
> + }
> + $blockdev = { driver => $driver, filename => $path };
> + } else {
> + my $volid = $drive->{file};
> + my ($storeid) = PVE::Storage::parse_volume_id($volid);
> +
> + my $vtype = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[0];
> + die "$drive_id: explicit media parameter is required for iso images\n"
> + if !defined($drive->{media}) && defined($vtype) && $vtype eq 'iso';
> +
> + my $storage_opts = { hints => {} };
> + $storage_opts->{hints}->{'efi-disk'} = 1 if $drive->{interface} eq 'efidisk';
> + $storage_opts->{'snapshot-name'} = $options->{'snapshot-name'}
> + if defined($options->{'snapshot-name'});
> + $blockdev = PVE::Storage::qemu_blockdev_options($storecfg, $volid, $storage_opts);
> + $scfg = PVE::Storage::storage_config($storecfg, $storeid);
> + }
> +
> + $blockdev->{cache} = generate_blockdev_drive_cache($drive, $scfg);
> +
> + my $driver = $blockdev->{driver};
> + # only certain drivers have the aio setting
> + if ($driver eq 'file' || $driver eq 'host_cdrom' || $driver eq 'host_device') {
> + $blockdev->{aio} = PVE::QemuServer::Drive::aio_cmdline_option(
> + $scfg, $drive, $blockdev->{cache}->{direct});
> + }
> +
> + if (!drive_is_cdrom($drive)) {
> + $blockdev->{discard} = $drive->{discard} && $drive->{discard} eq 'on' ? 'unmap' : 'ignore';
> + $blockdev->{'detect-zeroes'} = PVE::QemuServer::Drive::detect_zeroes_cmdline_option($drive);
> + }
> +
> + $blockdev->{'node-name'} = get_node_name(
> + 'file', $drive_id, $drive->{file}, $options->{'snapshot-name'});
> +
> + $blockdev->{'read-only'} = read_only_json_option($drive, $options);
> +
> + return $blockdev;
> +}
> +
> +sub generate_format_blockdev {
> + my ($storecfg, $drive, $child, $options) = @_;
> +
> + die "generate_format_blockdev called without volid/path\n" if !$drive->{file};
> + die "generate_format_blockdev called with 'none'\n" if $drive->{file} eq 'none';
> +
> + my $scfg;
> + my $format;
> + my $volid = $drive->{file};
> + my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive);
> + my ($storeid) = PVE::Storage::parse_volume_id($volid, 1);
> +
> + # For PVE-managed volumes, use the format from the storage layer and prevent overrides via the
> + # drive's 'format' option. For unmanaged volumes, fallback to 'raw' to avoid auto-detection by
> + # QEMU.
> + if ($storeid) {
> + $scfg = PVE::Storage::storage_config($storecfg, $storeid);
> + $format = PVE::QemuServer::Drive::checked_volume_format($storecfg, $volid);
> + if ($drive->{format} && $drive->{format} ne $format) {
> + die "drive '$drive->{interface}$drive->{index}' - volume '$volid'"
> + ." - 'format=$drive->{format}' option different from storage format '$format'\n";
> + }
> + } else {
> + $format = $drive->{format} // 'raw';
> + }
> +
> + # define cache option on both format && file node like libvirt does
> + my $cache = generate_blockdev_drive_cache($drive, $scfg);
> +
> + my $node_name = get_node_name('fmt', $drive_id, $drive->{file}, $options->{'snapshot-name'});
> +
> + return {
> + 'node-name' => "$node_name",
> + driver => "$format",
> + file => $child,
> + cache => $cache,
> + 'read-only' => read_only_json_option($drive, $options),
> + };
> +}
> +
> +sub generate_drive_blockdev {
> + my ($storecfg, $drive, $options) = @_;
> +
> + my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive);
> +
> + die "generate_drive_blockdev called without volid/path\n" if !$drive->{file};
> + die "generate_drive_blockdev called with 'none'\n" if $drive->{file} eq 'none';
> +
> + my $child = generate_file_blockdev($storecfg, $drive, $options);
> + $child = generate_format_blockdev($storecfg, $drive, $child, $options);
> +
> + # this is the top filter entry point, use $drive-drive_id as nodename
> + return {
> + driver => "throttle",
> + 'node-name' => "drive-$drive_id",
> + 'throttle-group' => "throttle-drive-$drive_id",
> + file => $child,
> + };
> +}
> +
> 1;
> diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile
> index 8054a93b..e3671e5b 100644
> --- a/PVE/QemuServer/Makefile
> +++ b/PVE/QemuServer/Makefile
> @@ -12,6 +12,7 @@ SOURCES=PCI.pm \
> MetaInfo.pm \
> CPUConfig.pm \
> CGroup.pm \
> + Blockdev.pm \
already added by the previous patch ;)
should we sort this list?
> Drive.pm \
> QMPHelpers.pm \
> StateFile.pm \
> --
> 2.39.5
>
>
>
> _______________________________________________
> pve-devel mailing list
> pve-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
>
>
>
More information about the pve-devel
mailing list