[pve-devel] [PATCH qemu-server 20/22] blockdev: add helpers to generate blockdev commandline

Fabian Grünbichler f.gruenbichler at proxmox.com
Fri Jun 13 13:35:59 CEST 2025


On June 12, 2025 4:02 pm, Fiona Ebner wrote:
> The drive device and node structure is:
> front-end device {ide-hd,scsi-hd,virtio-blk-pci} (id=$drive_id)
>  - throttle node (node-name=$drive_id)

should we prefix this is well to not "use" the basic name (e.g., in case
in the future a new "topmost" node comes along)?

>   - format node  (node-name=f$encoded-info)
>    - file node   (node-name=e$encoded-info)
> 
> The node-name can only be 31 characters long and needs to start with a
> letter. The throttle node will stay inserted below the front-end
> device. The other nodes might change however, because of drive
> mirroring and similar. There currently are no good helpers to
> query/walk the block graph via QMP, x-debug-query-block-graph is
> experimental and for debugging only. Therefore, necessary information
> is encoded in the node name to be able to find it again. In
> particular, this is the volume ID, the drive ID and optionally a
> snapshot name. As long as the configuration file matches with the
> running instance, this is enough to find the correct node for
> block operations like mirror and resize.
> 
> The 'snapshot' option, for QEMU's snapshot mode, i.e. writes are only
> temporary, is not yet supported.
> 
> Originally-by: Alexandre Derumier <alexandre.derumier at groupe-cyllene.com>
> [FE: split up patch
>      expand commit message
>      explicitly test for drivers with aio setting
>      improve readonly handling
>      improve CD-ROM handling
>      fix failure for storage named 'nbd' by always using full regex
>      improve node name generation
>      fail when drive->{snapshot} is set]
> Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
> ---
> 
> If we do not get around to implement the 'snapshot' option in time for
> PVE 9, we can still fall back to the legacy drive for that (and warn
> users that not all operations might work with such drives).
> 
>  PVE/QemuServer/Blockdev.pm | 172 ++++++++++++++++++++++++++++++++++++-
>  PVE/QemuServer/Makefile    |   1 +
>  2 files changed, 172 insertions(+), 1 deletion(-)
> 
> diff --git a/PVE/QemuServer/Blockdev.pm b/PVE/QemuServer/Blockdev.pm
> index b1150141..2d3760f0 100644
> --- a/PVE/QemuServer/Blockdev.pm
> +++ b/PVE/QemuServer/Blockdev.pm
> @@ -3,7 +3,42 @@ package PVE::QemuServer::Blockdev;
>  use strict;
>  use warnings;
>  
> -use PVE::QemuServer::Drive;
> +use Digest::SHA;
> +use Fcntl qw(S_ISBLK S_ISCHR);
> +use File::stat;
> +use JSON;
> +
> +use PVE::Storage;
> +
> +use PVE::QemuServer::Drive qw(drive_is_cdrom);
> +
> +my sub get_node_name {
> +    my ($type, $drive_id, $volid, $snap) = @_;
> +
> +    my $info = "drive=$drive_id,";
> +    $info .= "snap=$snap," if defined($snap);
> +    $info .= "volid=$volid";
> +
> +    my $hash = substr(Digest::SHA::sha256_hex($info), 0, 30);
> +
> +    my $prefix = "";
> +    if ($type eq 'fmt') {
> +	$prefix = 'f';
> +    } elsif ($type eq 'file') {
> +	$prefix = 'e';
> +    } else {
> +	die "unknown node type '$type'";
> +    }
> +    # node-name must start with an alphabetical character
> +    return "${prefix}${hash}";
> +}
> +
> +my sub read_only_json_option {
> +    my ($drive, $options) = @_;
> +
> +    return JSON::true if $drive->{ro} || drive_is_cdrom($drive) || $options->{'read-only'};
> +    return JSON::false;

should we maybe have a generic jsonify-helper instead? this is only
called twice, but we have (counting this as well) three call sites here
that basically do

foo ? JSON::true : JSON::false

which could become `json_bool(foo)`

with a few more in PVE::VZDump::QemuServer and other qemu-server
modules..

we could still have a drive_is_ro helper in any case ;)

> +}
>  
>  sub generate_throttle_group {
>      my ($drive) = @_;
> @@ -41,4 +76,139 @@ sub generate_throttle_group {
>      };
>  }
>  
> +sub generate_blockdev_drive_cache {
> +    my ($drive, $scfg) = @_;
> +
> +    my $cache_direct = PVE::QemuServer::Drive::drive_uses_cache_direct($drive, $scfg);
> +    my $cache = {};
> +    $cache->{direct} = $cache_direct ? JSON::true : JSON::false;
> +    $cache->{'no-flush'} = $drive->{cache} && $drive->{cache} eq 'unsafe' ? JSON::true : JSON::false;
> +    return $cache;
> +}
> +
> +sub generate_file_blockdev {
> +    my ($storecfg, $drive, $options) = @_;
> +
> +    my $blockdev = {};
> +    my $scfg = undef;
> +
> +    die "generate_file_blockdev called without volid/path\n" if !$drive->{file};
> +    die "generate_file_blockdev called with 'none'\n" if $drive->{file} eq 'none';
> +    # FIXME use overlay and new config option to define storage for temp write device
> +    die "'snapshot' option is not yet supported for '-blockdev'\n" if $drive->{snapshot};
> +
> +    my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive);
> +
> +    if ($drive->{file} eq 'cdrom') {
> +	my $path = PVE::QemuServer::Drive::get_iso_path($storecfg, $drive->{file});
> +	$blockdev = { driver => 'host_cdrom', filename => $path };
> +    } elsif ($drive->{file} =~ m|^/|) {
> +	my $path = $drive->{file};
> +	# The 'file' driver only works for regular files. The check below is taken from
> +	# block/file-posix.c:hdev_probe_device() in QEMU. To detect CD-ROM host devices, QEMU issues
> +	# an ioctl, while the code here relies on the media=cdrom flag instead.
> +	my $st = File::stat::stat($path) or die "stat for '$path' failed - $!\n";
> +	my $driver = 'file';
> +	if (S_ISCHR($st->mode) || S_ISBLK($st->mode)) {
> +	    $driver = drive_is_cdrom($drive) ? 'host_cdrom' : 'host_device';
> +	}
> +	$blockdev = { driver => $driver, filename => $path };
> +    } else {
> +	my $volid = $drive->{file};
> +	my ($storeid) = PVE::Storage::parse_volume_id($volid);
> +
> +	my $vtype = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[0];
> +	die "$drive_id: explicit media parameter is required for iso images\n"
> +	    if !defined($drive->{media}) && defined($vtype) && $vtype eq 'iso';
> +
> +	my $storage_opts = { hints => {} };
> +	$storage_opts->{hints}->{'efi-disk'} = 1 if $drive->{interface} eq 'efidisk';
> +	$storage_opts->{'snapshot-name'} = $options->{'snapshot-name'}
> +	    if defined($options->{'snapshot-name'});
> +	$blockdev = PVE::Storage::qemu_blockdev_options($storecfg, $volid, $storage_opts);
> +	$scfg = PVE::Storage::storage_config($storecfg, $storeid);
> +    }
> +
> +    $blockdev->{cache} = generate_blockdev_drive_cache($drive, $scfg);
> +
> +    my $driver = $blockdev->{driver};
> +    # only certain drivers have the aio setting
> +    if ($driver eq 'file' || $driver eq 'host_cdrom' || $driver eq 'host_device') {
> +	$blockdev->{aio} = PVE::QemuServer::Drive::aio_cmdline_option(
> +	    $scfg, $drive, $blockdev->{cache}->{direct});
> +    }
> +
> +    if (!drive_is_cdrom($drive)) {
> +	$blockdev->{discard} = $drive->{discard} && $drive->{discard} eq 'on' ? 'unmap' : 'ignore';
> +	$blockdev->{'detect-zeroes'} = PVE::QemuServer::Drive::detect_zeroes_cmdline_option($drive);
> +    }
> +
> +    $blockdev->{'node-name'} = get_node_name(
> +	'file', $drive_id, $drive->{file}, $options->{'snapshot-name'});
> +
> +    $blockdev->{'read-only'} = read_only_json_option($drive, $options);
> +
> +    return $blockdev;
> +}
> +
> +sub generate_format_blockdev {
> +    my ($storecfg, $drive, $child, $options) = @_;
> +
> +    die "generate_format_blockdev called without volid/path\n" if !$drive->{file};
> +    die "generate_format_blockdev called with 'none'\n" if $drive->{file} eq 'none';
> +
> +    my $scfg;
> +    my $format;
> +    my $volid = $drive->{file};
> +    my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive);
> +    my ($storeid) = PVE::Storage::parse_volume_id($volid, 1);
> +
> +    # For PVE-managed volumes, use the format from the storage layer and prevent overrides via the
> +    # drive's 'format' option. For unmanaged volumes, fallback to 'raw' to avoid auto-detection by
> +    # QEMU.
> +    if ($storeid) {
> +	$scfg = PVE::Storage::storage_config($storecfg, $storeid);
> +	$format = PVE::QemuServer::Drive::checked_volume_format($storecfg, $volid);
> +	if ($drive->{format} && $drive->{format} ne $format) {
> +	    die "drive '$drive->{interface}$drive->{index}' - volume '$volid'"
> +		." - 'format=$drive->{format}' option different from storage format '$format'\n";
> +	}
> +    } else {
> +	$format = $drive->{format} // 'raw';
> +    }
> +
> +    # define cache option on both format && file node like libvirt does
> +    my $cache = generate_blockdev_drive_cache($drive, $scfg);
> +
> +    my $node_name = get_node_name('fmt', $drive_id, $drive->{file}, $options->{'snapshot-name'});
> +
> +    return {
> +	'node-name' => "$node_name",
> +	driver => "$format",
> +	file => $child,
> +	cache => $cache,
> +	'read-only' => read_only_json_option($drive, $options),
> +    };
> +}
> +
> +sub generate_drive_blockdev {
> +    my ($storecfg, $drive, $options) = @_;
> +
> +    my $drive_id = PVE::QemuServer::Drive::get_drive_id($drive);
> +
> +    die "generate_drive_blockdev called without volid/path\n" if !$drive->{file};
> +    die "generate_drive_blockdev called with 'none'\n" if $drive->{file} eq 'none';
> +
> +    my $child = generate_file_blockdev($storecfg, $drive, $options);
> +    $child = generate_format_blockdev($storecfg, $drive, $child, $options);
> +
> +    # this is the top filter entry point, use $drive-drive_id as nodename
> +    return {
> +	driver => "throttle",
> +	'node-name' => "drive-$drive_id",
> +	'throttle-group' => "throttle-drive-$drive_id",
> +	file => $child,
> +    };
> +}
> +
>  1;
> diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile
> index 8054a93b..e3671e5b 100644
> --- a/PVE/QemuServer/Makefile
> +++ b/PVE/QemuServer/Makefile
> @@ -12,6 +12,7 @@ SOURCES=PCI.pm		\
>  	MetaInfo.pm	\
>  	CPUConfig.pm	\
>  	CGroup.pm	\
> +	Blockdev.pm	\

already added by the previous patch ;)

should we sort this list?

>  	Drive.pm	\
>  	QMPHelpers.pm	\
>  	StateFile.pm	\
> -- 
> 2.39.5
> 
> 
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
> 
> 
> 




More information about the pve-devel mailing list