[pve-devel] [PATCH container 1/3] extract apparmor profile & namespace switch to its own helper

Filip Schauer f.schauer at proxmox.com
Mon Dec 16 18:21:30 CET 2024

Signed-off-by: Filip Schauer <f.schauer at proxmox.com>
 src/PVE/LXC.pm | 71 +++++++++++++++++++++++++++++---------------------
 1 file changed, 41 insertions(+), 30 deletions(-)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index e78e365..12a4378 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -1982,15 +1982,42 @@ sub __mountpoint_mount {
     die "unsupported storage";
+my $enter_mnt_ns_and_change_aa_profile = sub {
+    my ($mnt_ns, $aa_profile, $code_pre_changeprofile) = @_;
+    # Grab a file descriptor to our apparmor label file so we can change the profile
+    sysopen(my $aa_fd, "/proc/self/attr/current", O_WRONLY)
+	or die "failed to open '/proc/self/attr/current' for writing: $!\n";
+    # But switch namespaces first, to make sure the namespace switches aren't blocked by
+    # apparmor.
+    PVE::Tools::setns(fileno($mnt_ns), PVE::Tools::CLONE_NEWNS);
+    chdir('/')
+	or die "failed to change root directory within mount namespace: $!\n";
+    $code_pre_changeprofile->() if defined($code_pre_changeprofile);
+    # Now switch our apparmor profile:
+    my $data = "changeprofile $aa_profile";
+    my $data_written = syswrite($aa_fd, $data, length($data));
+    if (!defined($data_written) || $data_written != length($data)) {
+	die "failed to change apparmor profile: $!\n";
+    }
+    # Check errors on close as well:
+    close($aa_fd)
+	or die "failed to change apparmor profile (close() failed): $!\n";
 sub mountpoint_hotplug :prototype($$$$$) {
     my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_;
     my (undef, $root_uid, $root_gid) = PVE::LXC::parse_id_maps($conf);
-    # We do the rest in a fork with an unshared mount namespace, because:
-    #  -) change our papparmor profile to that of /usr/bin/lxc-start
-    #  -) we're now going to 'stage' # the mountpoint, then grab it, then move into the
-    #     container's namespace, then mount it.
+    # We do the rest in a fork with an unshared mount namespace:
+    #  -) change our apparmor profile to 'pve-container-mounthotplug', which is '/usr/bin/lxc-start'
+    #     with move_mount privileges on every mount.
+    #  -) we're now going to 'stage' the mountpoint, then grab it, then move into the container's
+    #     namespace, then mount it.
     PVE::Tools::run_fork(sub {
 	# Pin the container pid longer, we also need to get its monitor/parent:
@@ -2003,32 +2030,16 @@ sub mountpoint_hotplug :prototype($$$$$) {
 	my $ct_mnt_ns = $get_container_namespace->($vmid, $ct_pid, 'mnt');
 	my $monitor_mnt_ns = $get_container_namespace->($vmid, $monitor_pid, 'mnt');
-	# Grab a file descriptor to our apparmor label file so we can change into the 'lxc-start'
-	# profile to lower our privileges to the same level we have in the start hook:
-	sysopen(my $aa_fd, "/proc/self/attr/current", O_WRONLY)
-	    or die "failed to open '/proc/self/attr/current' for writing: $!\n";
-	# But switch namespaces first, to make sure the namespace switches aren't blocked by
-	# apparmor.
-	# Change into the monitor's mount namespace. We "pin" the mount into the monitor's
-	# namespace for it to remain active there since the container will be able to unmount
-	# hotplugged mount points and thereby potentially free up loop devices, which is a security
-	# concern.
-	PVE::Tools::setns(fileno($monitor_mnt_ns), PVE::Tools::CLONE_NEWNS);
-	chdir('/')
-	    or die "failed to change root directory within the monitor's mount namespace: $!\n";
-	my $dir = get_staging_mount_path($opt);
-	# Now switch our apparmor profile before mounting:
-	my $data = 'changeprofile pve-container-mounthotplug';
-	my $data_written = syswrite($aa_fd, $data, length($data));
-	if (!defined($data_written) || $data_written != length($data)) {
-	    die "failed to change apparmor profile: $!\n";
-	}
-	# Check errors on close as well:
-	close($aa_fd)
-	    or die "failed to change apparmor profile (close() failed): $!\n";
+	# -) Change into the monitor's mount namespace. We "pin" the mount into the monitor's
+	#    namespace for it to remain active there since the container will be able to unmount
+	#    hotplugged mount points and thereby potentially free up loop devices, which is a
+	#    security concern.
+	# -) Prepare the staging directory.
+	# -) Switch to the 'pve-container-mounthotplug' apparmor profile.
+	my $dir;
+	$enter_mnt_ns_and_change_aa_profile->($monitor_mnt_ns, "pve-container-mounthotplug", sub {
+	    $dir = get_staging_mount_path($opt);
+	});
 	my $mount_fd = mountpoint_stage($mp, $dir, $storage_cfg, undef, $root_uid, $root_gid);

More information about the pve-devel mailing list