[pve-devel] [PATCH qemu-server 2/3] fix #3784: Parameter for guest vIOMMU & machine as property-string

Dominik Csapak d.csapak at proxmox.com
Mon Oct 24 16:19:23 CEST 2022


high level comments first:

while it seems to work (just tested if there are iommu groups in the vm),
i'm missing some reasons for the decisions made here:

e.g. i guess we want to enable 'intremap' and that implies
'kernel-irqchip' cannot be 'full', but why do we want 'split' here?

also, why cache-mode=on?

when i look at the (rather sparse) documentation here:
https://wiki.qemu.org/Features/VT-d

there are some options we probably want to set, e.g. device-iotbl
for the vioummu device, but also maybe iommu_platform on the hostpci
devices?

i didn't read all of their docs, but when we add such a feature
it would be good to have the default options chosen carefully
and explain why we chose them, so that we can look that info
up later, or argue for a change (when necessary)

some comments inline:

On 9/21/22 11:07, Markus Frank wrote:
> vIOMMU enables the option to passthrough pci devices to guest-vms
> in guest-vms for nested Virtualisation.
> 
> Signed-off-by: Markus Frank <m.frank at proxmox.com>
> ---
> Changed the machine parameter to allow multiple machine-specific
> parameters via property_string, but also allow old configs (via
> default_key)
> 
>   PVE/API2/Qemu.pm          |  7 ++---
>   PVE/QemuConfig.pm         |  3 ++-
>   PVE/QemuServer.pm         | 55 ++++++++++++++++++++++++++++++++++++---
>   PVE/QemuServer/Machine.pm |  6 +++--
>   4 files changed, 62 insertions(+), 9 deletions(-)
> 
> diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
> index 3ec31c2..fe94c74 100644
> --- a/PVE/API2/Qemu.pm
> +++ b/PVE/API2/Qemu.pm
> @@ -970,12 +970,13 @@ __PACKAGE__->register_method({
>   		    if ((!defined($conf->{vmgenid}) || $conf->{vmgenid} eq '1') && $arch ne 'aarch64') {
>   			$conf->{vmgenid} = PVE::QemuServer::generate_uuid();
>   		    }
> -
> -		    my $machine = $conf->{machine};
> +		    my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
> +		    my $machine = $machine_conf->{type};
>   		    if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
>   			# always pin Windows' machine version on create, they get to easily confused
>   			if (PVE::QemuServer::windows_version($conf->{ostype})) {
> -			    $conf->{machine} = PVE::QemuServer::windows_get_pinned_machine_version($machine);
> +			    $machine_conf->{type} = PVE::QemuServer::windows_get_pinned_machine_version($machine);
> +			    $conf->{machine} = print_property_string($machine_conf);

normally we give the format to 'print_property_string' so that's missing here

>   			}
>   		    }
>   
> diff --git a/PVE/QemuConfig.pm b/PVE/QemuConfig.pm
> index 482c7ab..f8155c4 100644
> --- a/PVE/QemuConfig.pm
> +++ b/PVE/QemuConfig.pm
> @@ -433,7 +433,8 @@ sub __snapshot_rollback_hook {
>   	} else {
>   	    # Note: old code did not store 'machine', so we try to be smart
>   	    # and guess the snapshot was generated with kvm 1.4 (pc-i440fx-1.4).
> -	    $data->{forcemachine} = $conf->{machine} || 'pc-i440fx-1.4';
> +	    my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
> +	    $data->{forcemachine} = $machine_conf->{type} || 'pc-i440fx-1.4';
>   
>   	    # we remove the 'machine' configuration if not explicitly specified
>   	    # in the original config.
> diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
> index c706653..b9f74dd 100644
> --- a/PVE/QemuServer.pm
> +++ b/PVE/QemuServer.pm
> @@ -111,6 +111,24 @@ PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
>       optional => 1,
>   });
>   
> +my $machine_fmt = {
> +    type => {
> +	default_key => 1,
> +	type => 'string',
> +	description => "Specifies the Qemu machine type.",
> +	pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
> +	format_description => "type",
> +	maxLength => 40,
> +	optional => 1,
> +    },

you should be able to reuse the 'pve-qemu-machine' standard option here by making use
of the second parameter like this:

type => get_standard_option('pve-qemu-machine', {
     default-_key => 1,
     ...
})

> +    viommu => {
> +	type => 'boolean',
> +	description => "enable guest vIOMMU (needs kvm to be enabled and q35 to be set as machine)",
> +	default => 0,
> +	optional => 1,
> +    },
> +};
> +
>   PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
>   	description => "Specifies the Qemu machine type.",
>   	type => 'string',
> @@ -627,7 +645,12 @@ EODESCR
>   	pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
>   	format_description => 'QEMU -cpu parameter'
>       },
> -    machine => get_standard_option('pve-qemu-machine'),
> +    machine => {
> +	description => "Specifies the Qemu machine type.",
> +	type => 'string',
> +	optional => 1,
> +	format => $machine_fmt,
> +    },
>       arch => {
>   	description => "Virtual processor architecture. Defaults to the host.",
>   	optional => 1,
> @@ -2095,6 +2118,16 @@ sub parse_watchdog {
>       return $res;
>   }
>   
> +sub parse_machine {
> +    my ($value) = @_;
> +
> +    return if !$value;
> +
> +    my $res = eval { parse_property_string($machine_fmt, $value) };
> +    warn $@ if $@;
> +    return $res;
> +}
> +
>   sub parse_guest_agent {
>       my ($conf) = @_;
>   
> @@ -2166,8 +2199,9 @@ sub qemu_created_version_fixups {
>       # check if we need to apply some handling for VMs that always use the latest machine version but
>       # had a machine version transition happen that affected HW such that, e.g., an OS config change
>       # would be required (we do not want to pin machine version for non-windows OS type)
> +    my $machine_conf = parse_machine($conf->{machine});
>       if (
> -	(!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
> +	(!defined($machine_conf->{type}) || $machine_conf->{type} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
>   	&& (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
>   	&& (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
>   	&& min_version($kvmver, 6, 1) # only need to apply the change since 6.1
> @@ -3267,7 +3301,8 @@ sub windows_get_pinned_machine_version {
>   sub get_vm_machine {
>       my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
>   
> -    my $machine = $forcemachine || $conf->{machine};
> +    my $machine_conf = parse_machine($conf->{machine});
> +    my $machine = $forcemachine || $machine_conf->{type};
>   
>       if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
>   	$kvmversion //= kvm_user_version();
> @@ -3482,6 +3517,8 @@ sub config_to_command {
>       my $kvm = $conf->{kvm};
>       my $nodename = nodename();
>   
> +    my $machine_conf = parse_machine($conf->{machine});
> +
>       my $arch = get_vm_arch($conf);
>       my $kvm_binary = get_command_for_arch($arch);
>       my $kvmver = kvm_user_version($kvm_binary);
> @@ -3535,6 +3572,14 @@ sub config_to_command {
>       my $use_old_bios_files = undef;
>       ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
>   
> +    if ($machine_conf->{viommu} && (!$kvm || !$q35)) {
> +        die "to use vIOMMU please enable kvm and set the machine type to q35";
> +    }


i don't know how feasible it is, but wouldn't it be better to also
check that when setting in the api? that way most users cannot
set this combination in the first place

> +
> +    if ($machine_conf->{viommu}) {
> +        push @$devices, '-device', 'intel-iommu,intremap=on,caching-mode=on';
> +    }
> +
>       push @$cmd, $kvm_binary;
>   
>       push @$cmd, '-id', $vmid;
> @@ -4080,6 +4125,10 @@ sub config_to_command {
>       }
>       push @$machineFlags, "type=${machine_type_min}";
>   
> +    if ($machine_conf->{viommu}) {
> +        push @$machineFlags, 'kernel-irqchip=split';
> +    }
> +
>       push @$cmd, @$devices;
>       push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
>       push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
> diff --git a/PVE/QemuServer/Machine.pm b/PVE/QemuServer/Machine.pm
> index d9429ed..33f9a64 100644
> --- a/PVE/QemuServer/Machine.pm
> +++ b/PVE/QemuServer/Machine.pm
> @@ -15,7 +15,8 @@ our $PVE_MACHINE_VERSION = {
>   sub machine_type_is_q35 {
>       my ($conf) = @_;
>   
> -    return $conf->{machine} && ($conf->{machine} =~ m/q35/) ? 1 : 0;
> +    my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
> +    return $machine_conf->{type} && ($machine_conf->{type} =~ m/q35/) ? 1 : 0;
>   }
>   
>   sub current_from_query_machines {
> @@ -120,7 +121,8 @@ sub qemu_machine_pxe {
>   
>       my $machine =  get_current_qemu_machine($vmid);
>   
> -    if ($conf->{machine} && $conf->{machine} =~ m/\.pxe$/) {
> +    my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
> +    if ($conf->{machine} && $machine_conf->{type} =~ m/\.pxe$/) {

i guess you meant 'if ($machine_conf->{type}' here instead of 'if ($conf->{machine}' ?

>   	$machine .= '.pxe';
>       }
>   






More information about the pve-devel mailing list