[pve-devel] [PATCH v3 container 1/1] Add device passthrough

Filip Schauer f.schauer at proxmox.com
Mon Nov 13 11:33:10 CET 2023


Patch v4 available:

https://lists.proxmox.com/pipermail/pve-devel/2023-November/060022.html

On 10/11/2023 11:44, Wolfgang Bumiller wrote:
> On Tue, Nov 07, 2023 at 02:46:42PM +0100, Filip Schauer wrote:
>> Add a dev[n] argument to the container config to pass devices through to
>> a container. A device can be passed by its path. Additionally the access
>> mode, uid and gid can be specified through their respective properties.
>>
>> Signed-off-by: Filip Schauer <f.schauer at proxmox.com>
>> ---
>>   src/PVE/LXC.pm            | 29 +++++++++++++-
>>   src/PVE/LXC/Config.pm     | 79 +++++++++++++++++++++++++++++++++++++++
>>   src/PVE/LXC/Tools.pm      | 23 +++++++++---
>>   src/lxc-pve-autodev-hook  | 14 ++++++-
>>   src/lxc-pve-prestart-hook | 63 +++++++++++++++++++++++++++++++
>>   5 files changed, 201 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
>> index 7ec816b..162f093 100644
>> --- a/src/PVE/LXC.pm
>> +++ b/src/PVE/LXC.pm
>> @@ -5,7 +5,7 @@ use warnings;
>>   
>>   use Cwd qw();
>>   use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED EEXIST);
>> -use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY);
>> +use Fcntl qw(O_RDONLY O_WRONLY O_NOFOLLOW O_DIRECTORY :mode);
>>   use File::Path;
>>   use File::Spec;
>>   use IO::Poll qw(POLLIN POLLHUP);
>> @@ -639,6 +639,33 @@ sub update_lxc_config {
>>   	$raw .= "lxc.mount.auto = sys:mixed\n";
>>       }
>>   
>> +    PVE::LXC::Config->foreach_passthrough_device($conf, sub {
>> +	my ($key, $device) = @_;
>> +
>> +	die "Path is not defined for passthrough device $key"
>> +	    unless (defined($device->{path}));
>> +
>> +	my $absolute_path = $device->{path};
>> +
>> +	die "Device $absolute_path does not exist\n"
>> +		unless (-e $absolute_path);
>> +
>> +	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
>> +
>> +	die "Could not get mode of device $absolute_path\n"
>> +	    if $mode !~ /^([\d]+)$/;
>> +	$mode = $1;
>> +
>> +	die "Could not get device ID of $absolute_path\n"
>> +	    if $rdev !~ /^([\d]+)$/;
>> +	$rdev = $1;
> ^ The above 2 checks don't seem necessary. `stat()` cannot return
> different types of values there.
> Just check them for definedness once together.
>
>> +
>> +	my $major = PVE::Tools::dev_t_major($rdev);
>> +	my $minor = PVE::Tools::dev_t_minor($rdev);
>> +	my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
>> +	$raw .= "lxc.cgroup2.devices.allow = $device_type_char $major:$minor rw\n";
>> +    });
>> +
>>       # WARNING: DO NOT REMOVE this without making sure that loop device nodes
>>       # cannot be exposed to the container with r/w access (cgroup perms).
>>       # When this is enabled mounts will still remain in the monitor's namespace
>> diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
>> index 56e1f10..b4fa147 100644
>> --- a/src/PVE/LXC/Config.pm
>> +++ b/src/PVE/LXC/Config.pm
>> @@ -29,6 +29,7 @@ mkdir $lockdir;
>>   mkdir "/etc/pve/nodes/$nodename/lxc";
>>   my $MAX_MOUNT_POINTS = 256;
>>   my $MAX_UNUSED_DISKS = $MAX_MOUNT_POINTS;
>> +my $MAX_DEVICES = 256;
>>   
>>   # BEGIN implemented abstract methods from PVE::AbstractConfig
>>   
>> @@ -908,6 +909,57 @@ for (my $i = 0; $i < $MAX_UNUSED_DISKS; $i++) {
>>       }
>>   }
>>   
>> +PVE::JSONSchema::register_format('pve-lxc-dev-string', \&verify_lxc_dev_string);
>> +sub verify_lxc_dev_string {
>> +    my ($dev, $noerr) = @_;
>> +
>> +    if (
>> +	$dev =~ m@/\.\.?/@ ||
>> +	$dev =~ m@/\.\.?$@ ||
>> +	$dev !~ m!^/dev/!
>> +    ) {
>> +	return undef if $noerr;
>> +	die "$dev is not a valid device path\n";
>> +    }
>> +
>> +    return $dev;
>> +}
>> +
>> +my $dev_desc = {
>> +    path => {
>> +	optional => 1,
>> +	type => 'string',
>> +	default_key => 1,
>> +	format => 'pve-lxc-dev-string',
>> +	format_description => 'Path',
>> +	description => 'Device to pass through to the container',
>> +	verbose_description => 'Path to the device to pass through to the container',
>> +    },
>> +    mode => {
>> +	optional => 1,
>> +	type => 'integer',
>> +	description => 'Access mode (in octal) to be set on the device node',
>> +    },
>> +    uid => {
>> +	optional => 1,
>> +	type => 'integer',
>> +	description => 'User ID to be assigned to the device node',
>> +    },
>> +    gid => {
>> +	optional => 1,
>> +	type => 'integer',
>> +	description => 'Group ID to be assigned to the device node',
>> +    },
>> +};
>> +
>> +for (my $i = 0; $i < $MAX_DEVICES; $i++) {
>> +    $confdesc->{"dev$i"} = {
>> +	optional => 1,
>> +	type => 'string', format => $dev_desc,
>> +	description => "Device to pass through to the container",
>> +    }
>> +}
>> +
>>   sub parse_pct_config {
>>       my ($filename, $raw, $strict) = @_;
>>   
>> @@ -1255,6 +1307,21 @@ sub parse_volume {
>>       return;
>>   }
>>   
>> +sub parse_device {
>> +    my ($class, $device_string, $noerr) = @_;
>> +
>> +    my $res;
>> +    eval { $res = PVE::JSONSchema::parse_property_string($dev_desc, $device_string) };
>> +    if ($@) {
>> +	return undef if $noerr;
>> +	die $@;
>> +    }
>> +
>> +    die "Path has to be defined" unless (defined($res->{path}));
>> +
>> +    return $res;
>> +}
>> +
>>   sub print_volume {
>>       my ($class, $key, $volume) = @_;
>>   
>> @@ -1762,4 +1829,16 @@ sub get_derived_property {
>>       }
>>   }
>>   
>> +sub foreach_passthrough_device {
>> +    my ($class, $conf, $func, @param) = @_;
>> +
>> +    foreach my $key (keys %$conf) {
>> +	next if $key !~ m/^dev(\d+)$/;
>> +
>> +	my $device = $class->parse_device($conf->{$key});
>> +
>> +	$func->($key, $device, @param);
>> +    }
>> +}
>> +
>>   1;
>> diff --git a/src/PVE/LXC/Tools.pm b/src/PVE/LXC/Tools.pm
>> index 62cdbc1..c9fb834 100644
>> --- a/src/PVE/LXC/Tools.pm
>> +++ b/src/PVE/LXC/Tools.pm
>> @@ -81,10 +81,9 @@ sub lxc_hook($$&) {
>>       $code->($ct_name, $common_vars, $namespaces, $args);
>>   }
>>   
>> -sub for_current_devices($&) {
>> -    my ($vmid, $code) = @_;
>> +sub for_devices {
>> +    my ($devlist_file, $vmid, $code) = @_;
>>   
>> -    my $devlist_file = "/var/lib/lxc/$vmid/devices";
>>       my $fd;
>>   
>>       if (! open $fd, '<', $devlist_file) {
>> @@ -93,8 +92,8 @@ sub for_current_devices($&) {
>>       }
>>   
>>       while (defined(my $line = <$fd>)) {
>> -	if ($line !~ m@^(b):(\d+):(\d+):/dev/(\S+)\s*$@) {
>> -	    warn "invalid .pve-devices entry: $line\n";
>> +	if ($line !~ m@^(b|c):(\d+):(\d+):/dev/(\S+)\s*$@) {
>> +	    warn "invalid $devlist_file entry: $line\n";
>>   	    next;
>>   	}
>>   
>> @@ -117,6 +116,20 @@ sub for_current_devices($&) {
>>       close $fd;
>>   }
>>   
>> +sub for_current_devices($&) {
>> +    my ($vmid, $code) = @_;
>> +
>> +    my $devlist_file = "/var/lib/lxc/$vmid/devices";
>> +    for_devices($devlist_file, $vmid, $code);
>> +}
>> +
>> +sub for_passthrough_devices($&) {
>> +    my ($vmid, $code) = @_;
>> +
>> +    my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough_devices";
>> +    for_devices($passthrough_devlist_file, $vmid, $code);
>> +}
>> +
>>   sub cgroup_do_write($$) {
>>       my ($path, $value) = @_;
>>       my $fd;
>> diff --git a/src/lxc-pve-autodev-hook b/src/lxc-pve-autodev-hook
>> index 3c45949..cd2b92a 100755
>> --- a/src/lxc-pve-autodev-hook
>> +++ b/src/lxc-pve-autodev-hook
>> @@ -3,8 +3,9 @@
>>   use strict;
>>   use warnings;
>>   
>> -use File::Path;
>>   use File::Basename;
>> +use File::Path;
>> +use File::Touch;
>>   
>>   use PVE::LXC::Tools;
>>   use PVE::Tools;
>> @@ -14,6 +15,17 @@ PVE::LXC::Tools::lxc_hook('autodev', 'lxc', sub {
>>   
>>       my $root = $vars->{ROOTFS_MOUNT};
>>   
>> +    PVE::LXC::Tools::for_passthrough_devices($vmid, sub {
>> +	my ($type, $major, $minor, $dev) = @_;
>> +
>> +	my $rel_devpath = "/dev/$dev";
>> +	my $rel_dir = dirname($rel_devpath);
>> +	File::Path::mkpath("$root/$rel_dir");
>> +	touch("$root/dev/$dev");
> ^ Hint: `touch` is not a syscall. This does an `utimensat()` to update
> the file mtimes. This isn't really necessary, and an easy way to just
> create a file and do nothing if it already exists is to just use mknod
> with S_IFREG for the file type in the mode bits and zero for the device
> type:
>      PVE::Tools::mknod("$root/dev/$dev", S_IFREG, 0);
>
>
>> +
>> +	PVE::Tools::mount("/var/lib/lxc/$vmid/passthrough/dev/$dev", "$root/dev/$dev", 0, 4096, 0);
> Don't use magic numbers. Please place the MS_BIND constant in PVE::Tools
> instead.
>
> Also die() if mount fails.
>
>> +    });
>> +
>>       PVE::LXC::Tools::for_current_devices($vmid, sub {
>>   	my ($type, $major, $minor, $dev) = @_;
>>   
>> diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
>> index 936d0bf..68c7cec 100755
>> --- a/src/lxc-pve-prestart-hook
>> +++ b/src/lxc-pve-prestart-hook
>> @@ -6,6 +6,7 @@ use strict;
>>   use warnings;
>>   
>>   use Fcntl qw(O_DIRECTORY :mode);
>> +use File::Basename;
>>   use File::Path;
>>   use POSIX;
>>   
>> @@ -118,6 +119,55 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
>>   
>>       PVE::LXC::Config->foreach_volume($conf, $setup_mountpoint);
>>   
>> +    # Device passthrough
>> +    my $passthrough_devlist_file = "/var/lib/lxc/$vmid/passthrough_devices";
>> +    unlink $passthrough_devlist_file;
>> +    my $passthrough_devices = [];
>> +
>> +    my $passthrough_dir = "/var/lib/lxc/$vmid/passthrough";
>> +    File::Path::make_path($passthrough_dir);
>> +    PVE::Tools::mount("none", $passthrough_dir, "tmpfs", 0, "size=8k");
> ^ checking that `mount` succeeded is particularly important here
>
>> +
>> +    my $setup_passthrough_device = sub {
>> +	my ($key, $device) = @_;
>> +
>> +	my $absolute_path = $device->{path};
>> +	my ($mode, $rdev) = (stat($absolute_path))[2, 6];
>> +
>> +	die "Could not get mode of device $absolute_path\n"
>> +	    if $mode !~ /^([\d]+)$/;
>> +	$mode = $1;
>> +
>> +	die "Could not get device ID of $absolute_path\n"
>> +	    if $rdev !~ /^([\d]+)$/;
>> +	$rdev = $1;
> ^ As above for the 2 checks with the stat() returned values.
>
>> +
>> +	my $passthrough_device_path = "$passthrough_dir$absolute_path";
> ^ The above concatenation would probably be more readable with `.`
> instead of quotes ($passthrough_dir . $absolute_path), or with `{}`
> around the names.
>
>> +
>> +	die "passthrough device path is invalid\n"
>> +	    if $passthrough_device_path !~ /^([\w\/]+)$/;
>> +	$passthrough_device_path = $1;
> ^ Why this check? Both portions should be fine - $passthrough_dir is
> trivial and $absolute_path comes from the parse function after
> verification (and is already used in `stat()` so definitely contains a
> valid path we also already used.
>
>> +
>> +	File::Path::make_path(dirname($passthrough_device_path));
>> +	PVE::Tools::mknod($passthrough_device_path, $mode, $rdev);
> ^ `mknod` should be checked for errors.
>
>> +
>> +	# Use chmod because umask could mess with the access mode on mknod
>> +	my $passthrough_mode = 0660;
>> +	$passthrough_mode = oct($device->{mode}) if (defined($device->{mode}));
>> +	chmod $passthrough_mode, $passthrough_device_path;
> ^ should be checked for errors.
>
>> +
>> +	# Set uid and gid of the device node
>> +	my $uid = $rootuid;
>> +	my $gid = $rootgid;
>> +	$uid += $device->{uid} if (defined($device->{uid}));
>> +	$gid += $device->{gid} if (defined($device->{gid}));
> ^ id mappings can constist of multiple ranges.
> You get the mapping from `parse_id_maps` (the 1st return value in the
> line fetching `$root*id`).
> It's an array of tuples of the form:
>      [ "g" or "u", <ns id>, <host id>, <length> ]
> Make a helper to map ids using that array (see [1]).
> $device->{*id} is from the perspective of the container and you want to
> map from that to the host.
>
> [1] https://git.proxmox.com/?p=pve-lxc-syscalld.git;a=blob;f=src/process/id_map.rs;h=d61d489da7af7418908fd8d85f1660c435b7c73e;hb=refs/heads/master#l26
>
>> +	chown $uid, $gid, $passthrough_device_path;
> ^ check for errors
>
>> +
>> +	push @$passthrough_devices, $absolute_path;
>      push @$passthrough_devices, [$absolute_path, $mode, $rdev];
> to avoid the extra stat below...
>
>> +    };
>> +
>> +    PVE::LXC::Config->foreach_passthrough_device($conf, $setup_passthrough_device);
>> +
>>       my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
>>       $lxc_setup->pre_start_hook();
>>   
>> @@ -140,6 +190,19 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
>>   	}
>>   	PVE::Tools::file_set_contents($devlist_file, $devlist);
>>       }
>> +
>> +    if (@$passthrough_devices) {
>> +	my $devlist = '';
>> +	foreach my $dev (@$passthrough_devices) {
>> +	    my ($mode, $rdev) = (stat($dev))[2, 6];
>> +	    next if !$mode || !$rdev;
> ...and then resolve the tuple via:
>      ($dev, my $mode, my $rdev) = @$dev;
>
>> +	    my $major = PVE::Tools::dev_t_major($rdev);
>> +	    my $minor = PVE::Tools::dev_t_minor($rdev);
>> +	    my $device_type_char = S_ISBLK($mode) ? 'b' : 'c';
>> +	    $devlist .= "$device_type_char:$major:$minor:$dev\n";
>> +	}
>> +	PVE::Tools::file_set_contents($passthrough_devlist_file, $devlist);
>> +    }
>>   });
>>   
>>   # Leftover cgroups prevent lxc from starting without any useful information
>> -- 
>> 2.39.2





More information about the pve-devel mailing list