[pve-devel] [PATCH qemu-server v4 2/5] fix #3784: Parameter for guest vIOMMU & machine as property-string

Markus Frank m.frank at proxmox.com
Fri Nov 25 15:08:54 CET 2022


vIOMMU enables the option to passthrough pci devices to L2 VMs
in L1 VMs via Nested Virtualisation.

QEMU-Parameters:
https://www.qemu.org/docs/master/system/qemu-manpage.html
https://wiki.qemu.org/Features/VT-d

-machine ...,kernel-irqchip=split:

"split" because of intremap see below.


-device intel-iommu:

* caching-mode=on:

"It is required for -device vfio-pci to work with the VT-d device, because host
assigned devices requires to setup the DMA mapping on the host before guest DMA
starts."

* intremap=on:

"This enables interrupt remapping feature. It's required to enable complete
x2apic. Currently it only supports kvm kernel-irqchip modes off or split, while
full kernel-irqchip is not yet supported."


Signed-off-by: Markus Frank <m.frank at proxmox.com>
---

for dmar on virtio-devices:

* device-iotlb

"This enables device-iotlb capability for the emulated VT-d device. So far
virtio/vhost should be the only real user for this parameter, paired with
ats=on configured for the device."

* disable-legacy=on,disable-modern=off,iommu_platform=on,ats=on:

I did not find any good documentation.
Maybe someone can explain these parameters and how to use them right.
As I tried them with virtio-net-pci I got about 4-9 times less transfer-speed
when sending then without them.

However these Parameters seem not to be necessary for passthroughing
Assigned Devices, so I would say "dmar for virtio" would be its own
separate feature.

v4:
* added kvm/q35 checks in API
* reused pve-qemu-machine

v3:
* replaced old machine type with property-string with viommu-parameter

v2:
* moved viommu-parameter inside of machine_fmt and added it the new
parameter machine_properties
new Config -> machine_properties: viommu=1,etc
* check if kvm and q35 are set


 PVE/API2/Qemu.pm          | 21 ++++++++++++---
 PVE/QemuConfig.pm         |  3 ++-
 PVE/QemuServer.pm         | 55 ++++++++++++++++++++++++++++++++++++---
 PVE/QemuServer/Machine.pm |  6 +++--
 4 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index badfc37..5268e56 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -979,13 +979,19 @@ __PACKAGE__->register_method({
 			$conf->{vmgenid} = PVE::QemuServer::generate_uuid();
 		    }
 
-		    my $machine = $conf->{machine};
+		    my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
+		    my $machine = $machine_conf->{type};
 		    if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
 			# always pin Windows' machine version on create, they get to easily confused
-			if (PVE::QemuServer::Helpers::windows_version($conf->{ostype})) {
-			    $conf->{machine} = PVE::QemuServer::windows_get_pinned_machine_version($machine);
+			if (PVE::QemuServer::windows_version($conf->{ostype})) {
+			    $machine_conf->{type} = PVE::QemuServer::windows_get_pinned_machine_version($machine);
+			    $conf->{machine} = PVE::QemuServer::print_machine($machine_conf);
 			}
 		    }
+		    my $q35 = $machine_conf->{type} && ($machine_conf->{type} =~ m/q35/) ? 1 : 0;
+		    if ((!$conf->{kvm} || !$q35) && $machine_conf->{viommu}) {
+			die "to use vIOMMU please enable kvm and set the machine type to q35\n"
+		    }
 
 		    PVE::QemuConfig->write_config($vmid, $conf);
 
@@ -1770,7 +1776,14 @@ my $update_vm_api  = sub {
 		} elsif ($opt eq 'tags') {
 		    assert_tag_permissions($vmid, $conf->{$opt}, $param->{$opt}, $rpcenv, $authuser);
 		    $conf->{pending}->{$opt} = PVE::GuestHelpers::get_unique_tags($param->{$opt});
-		} else {
+		} elsif ($opt eq 'machine') {
+		    my $machine_conf = PVE::QemuServer::parse_machine($param->{$opt});
+		    my $q35 = $machine_conf->{type} && ($machine_conf->{type} =~ m/q35/) ? 1 : 0;
+		    if ((!$conf->{kvm} || !$q35) && $machine_conf->{viommu}) {
+			die "to use vIOMMU please enable kvm and set the machine type to q35\n"
+		    }
+		    $conf->{pending}->{$opt} = $param->{$opt};
+		}else {
 		    $conf->{pending}->{$opt} = $param->{$opt};
 
 		    if ($opt eq 'boot') {
diff --git a/PVE/QemuConfig.pm b/PVE/QemuConfig.pm
index 051382c..7c998ef 100644
--- a/PVE/QemuConfig.pm
+++ b/PVE/QemuConfig.pm
@@ -433,7 +433,8 @@ sub __snapshot_rollback_hook {
 	} else {
 	    # Note: old code did not store 'machine', so we try to be smart
 	    # and guess the snapshot was generated with kvm 1.4 (pc-i440fx-1.4).
-	    $data->{forcemachine} = $conf->{machine} || 'pc-i440fx-1.4';
+	    my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
+	    $data->{forcemachine} = $machine_conf->{type} || 'pc-i440fx-1.4';
 
 	    # we remove the 'machine' configuration if not explicitly specified
 	    # in the original config.
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index a746b3d..05eb757 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -123,6 +123,19 @@ PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
 	optional => 1,
 });
 
+my $machine_fmt = {
+    type => get_standard_option('pve-qemu-machine', {
+	default_key => 1,
+	format_description => "pve-qemu-machine-type",
+    }),
+    viommu => {
+	type => 'boolean',
+	description => "enable guest vIOMMU (needs kvm to be enabled and q35 to be set as machine)",
+	default => 0,
+	optional => 1,
+    },
+};
+
 # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
 my $nodename_cache;
 sub nodename {
@@ -625,7 +638,12 @@ EODESCR
 	pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
 	format_description => 'QEMU -cpu parameter'
     },
-    machine => get_standard_option('pve-qemu-machine'),
+    machine => {
+	description => "Specifies the Qemu machine type.",
+	type => 'string',
+	optional => 1,
+	format => $machine_fmt,
+    },
     arch => {
 	description => "Virtual processor architecture. Defaults to the host.",
 	optional => 1,
@@ -2128,6 +2146,21 @@ sub parse_watchdog {
     return $res;
 }
 
+sub parse_machine {
+    my ($value) = @_;
+
+    return if !$value;
+
+    my $res = eval { parse_property_string($machine_fmt, $value) };
+    die $@ if $@;
+    return $res;
+}
+
+sub print_machine {
+    my ($machine_conf) = @_;
+    return PVE::JSONSchema::print_property_string($machine_conf, $machine_fmt);
+}
+
 sub parse_guest_agent {
     my ($conf) = @_;
 
@@ -2199,8 +2232,9 @@ sub qemu_created_version_fixups {
     # check if we need to apply some handling for VMs that always use the latest machine version but
     # had a machine version transition happen that affected HW such that, e.g., an OS config change
     # would be required (we do not want to pin machine version for non-windows OS type)
+    my $machine_conf = parse_machine($conf->{machine});
     if (
-	(!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
+	(!defined($machine_conf->{type}) || $machine_conf->{type} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
 	&& (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
 	&& (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
 	&& min_version($kvmver, 6, 1) # only need to apply the change since 6.1
@@ -3327,7 +3361,8 @@ sub windows_get_pinned_machine_version {
 sub get_vm_machine {
     my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
 
-    my $machine = $forcemachine || $conf->{machine};
+    my $machine_conf = parse_machine($conf->{machine});
+    my $machine = $forcemachine || $machine_conf->{type};
 
     if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
 	$kvmversion //= kvm_user_version();
@@ -3526,6 +3561,8 @@ sub config_to_command {
     my $kvm = $conf->{kvm};
     my $nodename = nodename();
 
+    my $machine_conf = parse_machine($conf->{machine});
+
     my $arch = get_vm_arch($conf);
     my $kvm_binary = get_command_for_arch($arch);
     my $kvmver = kvm_user_version($kvm_binary);
@@ -3579,6 +3616,14 @@ sub config_to_command {
     my $use_old_bios_files = undef;
     ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
 
+    if ($machine_conf->{viommu} && (!$kvm || !$q35)) {
+        die "to use vIOMMU please enable kvm and set the machine type to q35\n";
+    }
+
+    if ($machine_conf->{viommu}) {
+	push @$devices, '-device', "intel-iommu,intremap=on,caching-mode=on";
+    }
+
     my $cmd = [];
     if ($conf->{affinity}) {
 	push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
@@ -4132,6 +4177,10 @@ sub config_to_command {
     }
     push @$machineFlags, "type=${machine_type_min}";
 
+    if ($machine_conf->{viommu}) {
+	push @$machineFlags, 'kernel-irqchip=split';
+    }
+
     push @$cmd, @$devices;
     push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
     push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
diff --git a/PVE/QemuServer/Machine.pm b/PVE/QemuServer/Machine.pm
index d9429ed..bfbde59 100644
--- a/PVE/QemuServer/Machine.pm
+++ b/PVE/QemuServer/Machine.pm
@@ -15,7 +15,8 @@ our $PVE_MACHINE_VERSION = {
 sub machine_type_is_q35 {
     my ($conf) = @_;
 
-    return $conf->{machine} && ($conf->{machine} =~ m/q35/) ? 1 : 0;
+    my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
+    return $machine_conf->{type} && ($machine_conf->{type} =~ m/q35/) ? 1 : 0;
 }
 
 sub current_from_query_machines {
@@ -120,7 +121,8 @@ sub qemu_machine_pxe {
 
     my $machine =  get_current_qemu_machine($vmid);
 
-    if ($conf->{machine} && $conf->{machine} =~ m/\.pxe$/) {
+    my $machine_conf = PVE::QemuServer::parse_machine($conf->{machine});
+    if ($machine_conf->{type} && $machine_conf->{type} =~ m/\.pxe$/) {
 	$machine .= '.pxe';
     }
 
-- 
2.30.2






More information about the pve-devel mailing list