[pve-devel] [PATCH qemu-server v2 5/5] add mediated devices support

Dominik Csapak d.csapak at proxmox.com
Wed Nov 14 10:44:28 CET 2018


with this, we are able to create and use mediated devices,
which include Intel GVT-g (aka KVMGT) and Nvidia vGPUs, and probably more
types of devices in the future

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
changes from v1:
* improved cleanup loop
* fixed error message for not available instances

 PVE/QemuServer.pm     | 54 ++++++++++++++++++++++++++++++++++++++++++++++-----
 PVE/QemuServer/PCI.pm | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 101 insertions(+), 5 deletions(-)

diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 0d169da..3a756df 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -1245,6 +1245,17 @@ EODESCR
 	optional => 1,
 	default => 0,
     },
+    'mdev' => {
+	type => 'string',
+        format_description => 'string',
+	pattern => '[^/\.:]+',
+	optional => 1,
+	description => <<EODESCR
+The type of mediated device to use.
+An instance of this type will be created on startup of the VM and
+will be cleaned up when the VM stops.
+EODESCR
+    }
 };
 PVE::JSONSchema::register_format('pve-qm-hostpci', $hostpci_fmt);
 
@@ -3540,6 +3551,15 @@ sub config_to_command {
 	}
 	my $pcidevices = $d->{pciid};
 	my $multifunction = 1 if @$pcidevices > 1;
+	my $sysfspath;
+	if ($d->{mdev} && scalar(@$pcidevices) == 1) {
+	    my $id = $pcidevices->[0]->{id};
+	    my $function = $pcidevices->[0]->{function};
+	    my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $i);
+	    $sysfspath = "$pcisysfs/devices/0000:$id.$function/$uuid";
+	} elsif ($d->{mdev}) {
+	    warn "ignoring mediated device with multifunction device\n";
+	}
 
 	my $j=0;
         foreach my $pcidevice (@$pcidevices) {
@@ -3548,7 +3568,13 @@ sub config_to_command {
 	    $id .= ".$j" if $multifunction;
 	    my $addr = $pciaddr;
 	    $addr .= ".$j" if $multifunction;
-	    my $devicestr = "vfio-pci,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr";
+	    my $devicestr = "vfio-pci";
+	    if ($sysfspath) {
+		$devicestr .= ",sysfsdev=$sysfspath";
+	    } else {
+		$devicestr .= ",host=$pcidevice->{id}.$pcidevice->{function}";
+	    }
+	    $devicestr .= ",id=$id$addr";
 
 	    if($j == 0){
 		$devicestr .= "$rombar$xvga";
@@ -5143,10 +5169,16 @@ sub vm_start {
 		my $info = PVE::QemuServer::PCI::pci_device_info("0000:$pciid");
 		die "IOMMU not present\n" if !PVE::QemuServer::PCI::check_iommu_support();
 		die "no pci device info for device '$pciid'\n" if !$info;
-		die "can't unbind/bind pci group to vfio '$pciid'\n"
-		    if !PVE::QemuServer::PCI::pci_dev_group_bind_to_vfio($pciid);
-		die "can't reset pci device '$pciid'\n"
-		    if $info->{has_fl_reset} and !PVE::QemuServer::PCI::pci_dev_reset($info);
+
+		if ($d->{mdev}) {
+		    my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $i);
+		    PVE::QemuServer::PCI::pci_create_mdev_device($pciid, $uuid, $d->{mdev});
+		} else {
+		    die "can't unbind/bind pci group to vfio '$pciid'\n"
+			if !PVE::QemuServer::PCI::pci_dev_group_bind_to_vfio($pciid);
+		    die "can't reset pci device '$pciid'\n"
+			if $info->{has_fl_reset} and !PVE::QemuServer::PCI::pci_dev_reset($info);
+		}
 	  }
         }
 
@@ -5386,6 +5418,18 @@ sub vm_stop_cleanup {
 	    unlink "/var/run/qemu-server/${vmid}.$ext";
 	}
 
+	foreach my $key (keys %$conf) {
+	    next if $key !~ m/^hostpci(\d+)$/;
+	    my $hostpciindex = $1;
+	    my $d = parse_hostpci($conf->{$key});
+	    my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $hostpciindex);
+
+	    foreach my $pci (@{$d->{pciid}}) {
+		my $pciid = $pci->{id} . "." . $pci->{function};
+		PVE::QemuServer::PCI::pci_cleanup_mdev_device($pciid, $uuid);
+	    }
+	}
+
 	vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
     };
     warn $@ if $@; # avoid errors - just warn
diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm
index 57d2f5c..19aebd7 100644
--- a/PVE/QemuServer/PCI.pm
+++ b/PVE/QemuServer/PCI.pm
@@ -303,4 +303,56 @@ sub pci_dev_group_bind_to_vfio {
     return 1;
 }
 
+sub pci_create_mdev_device {
+    my ($pciid, $uuid, $type) = @_;
+
+    my $basedir = "$pcisysfs/devices/0000:$pciid";
+    my $mdev_dir = "$basedir/mdev_supported_types";
+
+    die "pci device '$pciid' does not support mediated devices \n"
+	if !-d $mdev_dir;
+
+    die "pci device '$pciid' has no type '$type'\n"
+	if !-d "$mdev_dir/$type";
+
+    if (-d "$basedir/$uuid") {
+	# it already exists, checking type
+	my $typelink = readlink("$basedir/$uuid/mdev_type");
+	my ($existingtype) = $typelink =~ m|/([^/]+)$|;
+	die "mdev instance '$uuid' already exits, but type is not '$type'\n"
+	    if $type ne $existingtype;
+
+	# instance exists, so use it but warn the user
+	warn "mdev instance '$uuid' already existed, using it.\n";
+	return undef;
+    }
+
+    my $instances = file_read_firstline("$mdev_dir/$type/available_instances");
+    my ($avail) = $instances =~ m/^(\d+)$/;
+    die "pci device '$pciid' has no available instances of '$type'\n"
+	if $avail < 1;
+
+    die "could not create 'type' for pci devices '$pciid'\n"
+	if !file_write("$mdev_dir/$type/create", $uuid);
+
+    return undef;
+}
+
+sub pci_cleanup_mdev_device {
+    my ($pciid, $uuid) = @_;
+
+    my $basedir = "$pcisysfs/devices/0000:$pciid/$uuid";
+
+    return file_write("$basedir/remove", "1");
+}
+
+# encode the hostpci index and vmid into the uuid
+sub generate_mdev_uuid {
+    my ($vmid, $hostpciindex) = @_;
+
+    my $string = sprintf("%08d-0000-0000-0000-%012d", $hostpciindex, $vmid);
+
+    return $string;
+}
+
 1;
-- 
2.11.0





More information about the pve-devel mailing list