[pve-devel] [PATCH qemu-server 2/2] add mediated devices support
Dominik Csapak
d.csapak at proxmox.com
Tue Nov 13 15:18:16 CET 2018
with this, we are able to create and use mediated devices,
which include Intel GVT-g (aka KVMGT) and Nvidia vGPUs, and probably more
types of devices in the future
Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
PVE/QemuServer.pm | 54 ++++++++++++++++++++++++++++++++++++++++++++++-----
PVE/QemuServer/PCI.pm | 52 +++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 101 insertions(+), 5 deletions(-)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 71ef81c..ccd6211 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -1231,6 +1231,17 @@ EODESCR
optional => 1,
default => 0,
},
+ 'mdev' => {
+ type => 'string',
+ format_description => 'string',
+ pattern => '[^/\.:]+',
+ optional => 1,
+ description => <<EODESCR
+The type of mediated device to use.
+An instance of this type will be created on startup of the VM and
+will be cleaned up when the VM stops.
+EODESCR
+ }
};
PVE::JSONSchema::register_format('pve-qm-hostpci', $hostpci_fmt);
@@ -3401,6 +3412,15 @@ sub config_to_command {
}
my $pcidevices = $d->{pciid};
my $multifunction = 1 if @$pcidevices > 1;
+ my $sysfspath;
+ if ($d->{mdev} && scalar(@$pcidevices) == 1) {
+ my $id = $pcidevices->[0]->{id};
+ my $function = $pcidevices->[0]->{function};
+ my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $i);
+ $sysfspath = "$pcisysfs/devices/0000:$id.$function/$uuid";
+ } elsif ($d->{mdev}) {
+ warn "ignoring mediated device with multifunction device\n";
+ }
my $j=0;
foreach my $pcidevice (@$pcidevices) {
@@ -3409,7 +3429,13 @@ sub config_to_command {
$id .= ".$j" if $multifunction;
my $addr = $pciaddr;
$addr .= ".$j" if $multifunction;
- my $devicestr = "vfio-pci,host=$pcidevice->{id}.$pcidevice->{function},id=$id$addr";
+ my $devicestr = "vfio-pci";
+ if ($sysfspath) {
+ $devicestr .= ",sysfsdev=$sysfspath";
+ } else {
+ $devicestr .= ",host=$pcidevice->{id}.$pcidevice->{function}";
+ }
+ $devicestr .= ",id=$id$addr";
if($j == 0){
$devicestr .= "$rombar$xvga";
@@ -5027,10 +5053,16 @@ sub vm_start {
my $info = PVE::QemuServer::PCI::pci_device_info("0000:$pciid");
die "IOMMU not present\n" if !PVE::QemuServer::PCI::check_iommu_support();
die "no pci device info for device '$pciid'\n" if !$info;
- die "can't unbind/bind pci group to vfio '$pciid'\n"
- if !PVE::QemuServer::PCI::pci_dev_group_bind_to_vfio($pciid);
- die "can't reset pci device '$pciid'\n"
- if $info->{has_fl_reset} and !PVE::QemuServer::PCI::pci_dev_reset($info);
+
+ if ($d->{mdev}) {
+ my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $i);
+ PVE::QemuServer::PCI::pci_create_mdev_device($pciid, $uuid, $d->{mdev});
+ } else {
+ die "can't unbind/bind pci group to vfio '$pciid'\n"
+ if !PVE::QemuServer::PCI::pci_dev_group_bind_to_vfio($pciid);
+ die "can't reset pci device '$pciid'\n"
+ if $info->{has_fl_reset} and !PVE::QemuServer::PCI::pci_dev_reset($info);
+ }
}
}
@@ -5270,6 +5302,18 @@ sub vm_stop_cleanup {
unlink "/var/run/qemu-server/${vmid}.$ext";
}
+ foreach my $key (keys %$conf) {
+ next if $key !~ m/^hostpci(\d+)$/;
+ my $hostpciindex = $1;
+ my $d = parse_hostpci($conf->{$key});
+
+ foreach my $pci (@{$d->{pciid}}) {
+ my $pciid = $pci->{id} . "." . $pci->{function};
+ my $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $hostpciindex);
+ PVE::QemuServer::PCI::pci_cleanup_mdev_device($pciid, $uuid);
+ }
+ }
+
vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
};
warn $@ if $@; # avoid errors - just warn
diff --git a/PVE/QemuServer/PCI.pm b/PVE/QemuServer/PCI.pm
index ab534f4..c6ef78f 100644
--- a/PVE/QemuServer/PCI.pm
+++ b/PVE/QemuServer/PCI.pm
@@ -292,4 +292,56 @@ sub pci_dev_group_bind_to_vfio {
return 1;
}
+sub pci_create_mdev_device {
+ my ($pciid, $uuid, $type) = @_;
+
+ my $basedir = "$pcisysfs/devices/0000:$pciid";
+ my $mdev_dir = "$basedir/mdev_supported_types";
+
+ die "pci device '$pciid' does not support mediated devices \n"
+ if !-d $mdev_dir;
+
+ die "pci device '$pciid' has no type '$type'\n"
+ if !-d "$mdev_dir/$type";
+
+ if (-d "$basedir/$uuid") {
+ # it already exists, checking type
+ my $typelink = readlink("$basedir/$uuid/mdev_type");
+ my ($existingtype) = $typelink =~ m|/([^/]+)$|;
+ die "mdev instance '$uuid' already exits, but type is not '$type'\n"
+ if $type ne $existingtype;
+
+ # instance exists, so use it but warn the user
+ warn "mdev instance '$uuid' already existed, using it.\n";
+ return undef;
+ }
+
+ my $instances = file_read_firstline("$mdev_dir/$type/available_instances");
+ my ($avail) = $instances =~ m/^(\d+)$/;
+ die "pci device '$pciid' has no available instances of '$type'\n"
+ if $avail < 1;
+
+ die "could not create 'type' for pci devices '$pciid'\n"
+ if !file_write("$mdev_dir/$type/create", $uuid);
+
+ return undef;
+}
+
+sub pci_cleanup_mdev_device {
+ my ($pciid, $uuid) = @_;
+
+ my $basedir = "$pcisysfs/devices/0000:$pciid/$uuid";
+
+ return file_write("$basedir/remove", "1");
+}
+
+# encode the hostpci index and vmid into the uuid
+sub generate_mdev_uuid {
+ my ($vmid, $hostpciindex) = @_;
+
+ my $string = sprintf("%08d-0000-0000-0000-%012d", $hostpciindex, $vmid);
+
+ return $string;
+}
+
1;
--
2.11.0
More information about the pve-devel
mailing list