[pve-devel] [RFC PATCH common] SysFSTools: mdev: retrieve Nvidia vGPU description from nvidia-smi
Christoph Heiss
c.heiss at proxmox.com
Mon Oct 28 12:31:11 CET 2024
This calls the `nvidia-smi` to retrieve vGPU type properties and parses
them into a property string - much like the old vGPU mdev interface
presented them as description directly.
Unfortunately, `nvidia-smi` does not support some machine-readable
output format for the `vgpu` subcommand, so we're basically stuck with
parsing the human-readable.
The result is cached in /var/tmp, so that subsequent invocations do not
need to call `nvidia-smi` and parse the whole output again, as suggested
by Dominik off-list.
The final description for the devices is a proper property string and
looks something like e.g. this:
class=NVS,framebuffer-size=24576MiB,license=GRID-Virtual-Apps-3.0,max-instances=1,max-instances-per-vm=1,max-resolution=1280x1024,num-heads=1,fps-limit=60FPS
Signed-off-by: Christoph Heiss <c.heiss at proxmox.com>
---
Sending this as RFC for now, to see if the current approach is
acceptable.
There is also `/usr/share/nvidia/vgpu/vgpuConfig.xml`, which contains
information about all the available profiles, but it's missing some
important (runtime) information unfortunately - such as frame rate
limit.
And FWIW, these properties could also be retrieved without going through
nvidia-smi using the NVML API directly [0], the same API nvidia-smi uses
anyway under the hood.
But that would require either using something like e.g. DynaLoader in
perl [1] or calling it from Rust using e.g. the nvml-wrapper-sys [2] and
wrapping it using perlmod.
Both ways would be a bit involved of course, but also a lot more
future-proof than parsing the human-readable output from `nvidia-smi`.
If preferred I'd be happy to re-write it in some way or another.
[0] https://docs.nvidia.com/deploy/nvml-api/group__nvmlVgpu.html#group__nvmlVgpu
[1] https://perldoc.perl.org/DynaLoader
[2] https://docs.rs/nvml-wrapper-sys/0.8.0/nvml_wrapper_sys/index.html
src/PVE/SysFSTools.pm | 99 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 98 insertions(+), 1 deletion(-)
diff --git a/src/PVE/SysFSTools.pm b/src/PVE/SysFSTools.pm
index 0bde6d7..fc6282d 100644
--- a/src/PVE/SysFSTools.pm
+++ b/src/PVE/SysFSTools.pm
@@ -4,8 +4,10 @@ use strict;
use warnings;
use IO::File;
+use JSON qw(decode_json encode_json);
use PVE::Tools qw(file_read_firstline dir_glob_foreach);
+use PVE::JSONSchema;
my $pcisysfs = "/sys/bus/pci";
my $domainregex = "[a-f0-9]{4,}";
@@ -145,6 +147,98 @@ sub lspci {
return $devices;
}
+my sub nvidia_parse_vgpu_config_from_smi {
+ # generic properties which values will be taken as-is
+ my $generic_propmap = {
+ 'Class' => 'class',
+ 'Max Instances' => 'max-instances',
+ 'Max Instances Per VM' => 'max-instances-per-vm',
+ 'FB Memory' => 'framebuffer-size',
+ 'Frame Rate Limit' => 'fps-limit',
+ 'Display Heads' => 'num-heads',
+ 'Placement Size' => 'placement-size',
+ 'GRID License' => 'license',
+ };
+
+ my $prop_schema = {
+ 'class' => {},
+ 'max-instances' => {},
+ 'max-instances-per-vm' => {},
+ 'framebuffer-size' => {},
+ 'num-heads' => {},
+ 'max-resolution' => {},
+ 'license' => {},
+ 'fps-limit' => { optional => 1 },
+ 'placement-size' => { optional => 1 },
+ };
+
+ my $configs = {};
+ my $cur_id;
+
+ my $command = ['nvidia-smi', 'vgpu', '--creatable', '--verbose'];
+ my $parsefn = sub {
+ my ($line) = @_;
+ return if $line =~ m/^GPU/;
+
+ my @parts = split(':', $line);
+ return if scalar(@parts) != 2;
+
+ my ($key, $value) = @parts;
+
+ $key =~ s/^\s+|\s+$//g; # trim whitespace from start and end
+ $value =~ s/\s+//g; # trim all whitespace
+ $value =~ s/,/-/g; # replace any commas with dashes
+
+ if ($key eq 'vGPU Type ID') {
+ $cur_id = hex($value);
+ } elsif (defined($generic_propmap->{$key}) && $value ne 'N/A') {
+ $configs->{$cur_id}->{$generic_propmap->{$key}} = $value;
+ }
+
+ # `nvidia-smi` prints these keys/values in a deterministic order,
+ # so the order they appear in can be relied upon.
+ if ($key eq 'Maximum X Resolution') {
+ $configs->{$cur_id}->{'max-resolution'} = $value;
+ } elsif ($key eq 'Maximum Y Resolution') {
+ $configs->{$cur_id}->{'max-resolution'} .= "x$value";
+ }
+ };
+
+ eval {
+ PVE::Tools::run_command($command, outfunc => $parsefn);
+ };
+
+ if (my $err = $@) {
+ warn "failed to run nvidia-smi: $err\n";
+ return undef;
+ }
+
+ for my $k (keys %$configs) {
+ $configs->{$k} = PVE::JSONSchema::print_property_string($configs->{$k}, $prop_schema);
+ }
+
+ return $configs;
+}
+
+my sub nvidia_parse_vgpu_config_cached {
+ my $cachefile = '/var/tmp/pve-nvidia-vgpu-configs.json';
+
+ # First try reading from cached file
+ eval {
+ my $contents = PVE::Tools::file_get_contents($cachefile);
+ return decode_json($contents);
+ };
+
+ # Otherwise, go the slow path and parse it from nvidia-smi
+ my $configs = nvidia_parse_vgpu_config_from_smi();
+ return {} if !defined($configs);
+
+ # .. and cache it
+ PVE::Tools::file_set_contents($cachefile, encode_json($configs));
+
+ return $configs;
+}
+
#
# return format:
# [
@@ -152,6 +246,7 @@ sub lspci {
# type => 'FooType_1',
# description => "a longer description with custom format\nand newlines",
# available => 5,
+# name => "human-readable name of mdev/vGPU"
# },
# ...
# ]
@@ -188,6 +283,8 @@ sub get_mdev_types {
});
} elsif (-f $nvidia_path) {
my $creatable = PVE::Tools::file_get_contents($nvidia_path);
+ my $configs = nvidia_parse_vgpu_config_cached();
+
for my $line (split("\n", $creatable)) {
next if $line =~ m/^ID/; # header
next if $line !~ m/^(.*?)\s*:\s*(.*)$/;
@@ -196,7 +293,7 @@ sub get_mdev_types {
push $types->@*, {
type => "nvidia-$id", # backwards compatibility
- description => "", # TODO, read from xml/nvidia-smi ?
+ description => $configs->{$id} || '',
available => 1,
name => $name,
}
--
2.46.0
More information about the pve-devel
mailing list