[pve-devel] [RFC common v3 10/34] env: add module with helpers to run a Perl subroutine in a user namespace

Thomas Lamprecht t.lamprecht at proxmox.com
Mon Nov 11 19:33:02 CET 2024


Am 07.11.24 um 17:51 schrieb Fiona Ebner:
> The first use case is running the container backup subroutine for
> external providers inside a user namespace. That allows them to see
> the filesystem to back-up from the containers perspective and also
> improves security because of isolation.
> 
> Copied and adapted the relevant parts from the pve-buildpkg
> repository.
> 
> Originally-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
> [FE: add $idmap parameter, drop $aux_groups parameter]
> Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
> ---
> 
> New in v3.
> 
>  src/Makefile   |   1 +
>  src/PVE/Env.pm | 136 +++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 137 insertions(+)
>  create mode 100644 src/PVE/Env.pm
> 
> diff --git a/src/Makefile b/src/Makefile
> index 2d8bdc4..dba26e3 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -15,6 +15,7 @@ LIB_SOURCES = \
>  	Certificate.pm \
>  	CpuSet.pm \
>  	Daemon.pm \
> +	Env.pm \
>  	Exception.pm \
>  	Format.pm \
>  	INotify.pm \
> diff --git a/src/PVE/Env.pm b/src/PVE/Env.pm
> new file mode 100644
> index 0000000..e11bec0
> --- /dev/null
> +++ b/src/PVE/Env.pm
> @@ -0,0 +1,136 @@
> +package PVE::Env;

can this module and it's name be more specific to doing stuff with/in namespaces?

e.g. PVE::Namespaces or PVE::Sys::Namespaces (there might be other stuff that might
fit well in a future libproxmox-sys-perl and Proxmox::Sys::* respectively, so
maybe that module path would be better?)

I'd also make all sub's private if not really intended to be used outside
this module.

If the more general fork/wait-child helpers are needed elsewhere, or deemed
to be useful, then they could go in their own module, like e.g. PVE::Sys::Process

> +
> +use strict;
> +use warnings;
> +
> +use Fcntl qw(O_WRONLY);
> +use POSIX qw(EINTR);
> +use Socket;
> +
> +require qw(syscall.ph);
> +
> +use constant {CLONE_NEWNS   => 0x00020000,
> +              CLONE_NEWUSER => 0x10000000};
> +
> +sub unshare($) {
> +    my ($flags) = @_;
> +    return 0 == syscall(272, $flags);
> +}
> +
> +sub __set_id_map($$$) {
> +    my ($pid, $what, $value) = @_;
> +    sysopen(my $fd, "/proc/$pid/${what}_map", O_WRONLY)
> +	or die "failed to open child process' ${what}_map\n";
> +    my $rc = syswrite($fd, $value);
> +    if (!$rc || $rc != length($value)) {
> +	die "failed to set sub$what: $!\n";
> +    }
> +    close($fd);
> +}
> +
> +sub set_id_map($$) {
> +    my ($pid, $id_map) = @_;
> +
> +    my $gid_map = '';
> +    my $uid_map = '';
> +
> +    for my $map ($id_map->@*) {
> +	my ($type, $ct, $host, $length) = $map->@*;
> +
> +	$gid_map .= "$ct $host $length\n" if $type eq 'g';
> +	$uid_map .= "$ct $host $length\n" if $type eq 'u';
> +    }
> +
> +    __set_id_map($pid, 'gid', $gid_map) if $gid_map;
> +    __set_id_map($pid, 'uid', $uid_map) if $uid_map;
> +}
> +
> +sub wait_for_child($;$) {
> +    my ($pid, $noerr) = @_;
> +    my $interrupts = 0;
> +    while (waitpid($pid, 0) != $pid) {
> +	if ($! == EINTR) {
> +	    warn "interrupted...\n";
> +	    kill(($interrupts > 3 ? 9 : 15), $pid);
> +	    $interrupts++;
> +	}
> +    }
> +    my $status = POSIX::WEXITSTATUS($?);
> +    return $status if $noerr;
> +
> +    if ($? == -1) {
> +	die "failed to execute\n";
> +    } elsif (POSIX::WIFSIGNALED($?)) {
> +	my $sig = POSIX::WTERMSIG($?);
> +	die "got signal $sig\n";
> +    } elsif ($status != 0) {
> +	warn "exit code $status\n";
> +    }
> +    return $status;
> +}
> +
> +sub forked(&%) {

FWIW, there's some "forked" method in test/lock_file.pl that this might replace too,
if it stay public.

> +    my ($code, %opts) = @_;
> +
> +    pipe(my $except_r, my $except_w) or die "pipe: $!\n";
> +
> +    my $pid = fork();
> +    die "fork failed: $!\n" if !defined($pid);
> +
> +    if ($pid == 0) {
> +	close($except_r);
> +	eval { $code->() };
> +	if ($@) {
> +	    print {$except_w} $@;
> +	    $except_w->flush();
> +	    POSIX::_exit(1);
> +	}
> +	POSIX::_exit(0);
> +    }
> +    close($except_w);
> +
> +    my $err;
> +    if (my $afterfork = $opts{afterfork}) {
> +	eval { $afterfork->($pid); };
> +	if ($err = $@) {
> +	    kill(15, $pid);
> +	    $opts{noerr} = 1;
> +	}
> +    }
> +    if (!$err) {
> +	$err = do { local $/ = undef; <$except_r> };
> +    }
> +    my $rv = wait_for_child($pid, $opts{noerr});
> +    die $err if $err;
> +    die "an unknown error occurred\n" if $rv != 0;
> +    return $rv;
> +}
> +
> +sub run_in_userns(&;$) {
> +    my ($code, $id_map) = @_;
> +    socketpair(my $sp, my $sc, AF_UNIX, SOCK_STREAM, PF_UNSPEC)
> +	or die "socketpair: $!\n";
> +    forked(sub {
> +	close($sp);
> +	unshare(CLONE_NEWUSER|CLONE_NEWNS) or die "unshare(NEWUSER|NEWNS): $!\n";
> +	syswrite($sc, "1\n") == 2 or die "write: $!\n";
> +	shutdown($sc, 1);
> +	my $two = <$sc>;
> +	die "failed to sync with parent process\n" if $two ne "2\n";
> +	close($sc);
> +	$! = undef;
> +	($(, $)) = (0, 0); die "$!\n" if $!;
> +	($<, $>) = (0, 0); die "$!\n" if $!;
> +	$code->();
> +    }, afterfork => sub {
> +	my ($pid) = @_;
> +	close($sc);
> +	my $one = <$sp>;
> +	die "failed to sync with userprocess\n" if $one ne "1\n";
> +	set_id_map($pid, $id_map);
> +	syswrite($sp, "2\n") == 2 or die "write: $!\n";
> +	close($sp);
> +    });
> +}
> +
> +1;





More information about the pve-devel mailing list