[pve-devel] [PATCH container 6/8] prestart-hook: use staged mountpoints on newer kernels

Wolfgang Bumiller w.bumiller at proxmox.com
Fri Nov 8 11:06:07 CET 2019


This way we operate on defined paths in the monitor
namespace (/run/pve/mountpoint/{rootfs,mp0,mp1,...}) while
performing the mount, and can use `move_mount()` without
passing the MOVE_MOUNT_T_SYMLINKS flag when putting the
hierarchy in place.

Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
---
 src/lxc-pve-prestart-hook | 79 +++++++++++++++++++++++++++++++++------
 1 file changed, 68 insertions(+), 11 deletions(-)

diff --git a/src/lxc-pve-prestart-hook b/src/lxc-pve-prestart-hook
index c0965ab..d01c202 100755
--- a/src/lxc-pve-prestart-hook
+++ b/src/lxc-pve-prestart-hook
@@ -5,9 +5,10 @@ package lxc_pve_prestart_hook;
 use strict;
 use warnings;
 
-use POSIX;
+use Errno qw(ENOSYS);
+use Fcntl qw(O_DIRECTORY :mode);
 use File::Path;
-use Fcntl ':mode';
+use POSIX;
 
 use PVE::Cluster;
 use PVE::LXC::Config;
@@ -15,7 +16,8 @@ use PVE::LXC::Setup;
 use PVE::LXC::Tools;
 use PVE::LXC;
 use PVE::Storage;
-use PVE::Tools;
+use PVE::Syscall qw(:fsmount);
+use PVE::Tools qw(AT_FDCWD O_PATH);
 
 PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
     my ($vmid, $vars, undef, undef) = @_;
@@ -51,19 +53,74 @@ PVE::LXC::Tools::lxc_hook('pre-start', 'lxc', sub {
 
     my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
 
-    my $setup_mountpoint = sub {
-	my ($ms, $mountpoint) = @_;
-
-	#return if $ms eq 'rootfs';
-	my (undef, undef, $dev) = PVE::LXC::mountpoint_mount($mountpoint, $rootdir, $storage_cfg, undef, $rootuid, $rootgid);
-	push @$devices, $dev if $dev && $mountpoint->{quota};
-    };
-
     # Unmount first when the user mounted the container with "pct mount".
     eval {
 	PVE::Tools::run_command(['umount', '--recursive', $rootdir], outfunc => sub {}, errfunc => sub {});
     };
 
+    if (PVE::Tools::fsopen(0, 0)) {
+	die "kernel behaved unexpectedly: fsopen(NULL, 0) did not fail!\n";
+    }
+
+    my $setup_mountpoint;
+    if ($! == ENOSYS) {
+	# Legacy mode for old kernels:
+	$setup_mountpoint = sub {
+	    my ($opt, $mountpoint) = @_;
+
+	    my (undef, undef, $dev) = PVE::LXC::mountpoint_mount(
+		$mountpoint,
+		$rootdir,
+		$storage_cfg,
+		undef,
+		$rootuid,
+		$rootgid,
+	    );
+	    push @$devices, $dev if $dev && $mountpoint->{quota};
+	};
+    } else {
+	# With newer kernels we stage mount points and then use move_mount().
+	my $rootdir_fd = undef;
+	$setup_mountpoint = sub {
+	    my ($opt, $mountpoint) = @_;
+
+	    my $dir = PVE::LXC::get_staging_mount_path($opt);
+	    my (undef, undef, $dev, $mount_fd) = PVE::LXC::mountpoint_stage(
+		$mountpoint,
+		$dir,
+		$storage_cfg,
+		undef,
+		$rootuid,
+		$rootgid,
+	    );
+
+	    my ($dfd, $ddir);
+	    if ($rootdir_fd) {
+		$dfd = fileno($rootdir_fd);
+		$ddir = './' . $mountpoint->{mp};
+	    } else {
+		# Assert that 'rootfs' is the first one:
+		die "foreach_mount() error\n" if $opt ne 'rootfs';
+
+		# $rootdir is not controlled by the container, so we can use it directly
+		$dfd = AT_FDCWD;
+		$ddir = $rootdir;
+	    }
+
+	    # NOTE: when we have openat2() available, even better: use that with fd-as-chroot mode
+	    # and MOVE_MOUNT_T_EMPTY_PATH...
+	    PVE::Tools::move_mount(fileno($mount_fd), '', $dfd, $ddir, &MOVE_MOUNT_F_EMPTY_PATH)
+		or die "failed to move '$opt' into container hierarchy: $!\n";
+
+	    # From now on we mount inside our rootfs:
+	    if (!$rootdir_fd) {
+		$rootdir_fd = $mount_fd;
+	    }
+
+	    push @$devices, $dev if $dev && $mountpoint->{quota};
+	};
+    }
+
     PVE::LXC::Config->foreach_mountpoint($conf, $setup_mountpoint);
 
     my $lxc_setup = PVE::LXC::Setup->new($conf, $rootdir);
-- 
2.20.1





More information about the pve-devel mailing list