[pve-devel] [PATCH v2 container 9/9] implement mountpoint hotplugging

Wolfgang Bumiller w.bumiller at proxmox.com
Wed Nov 13 10:33:19 CET 2019


Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
---
Changes to v1:
  Use the new can_use_new_mount_api() to prevent mp hotplug attempts on
  older kernels.

 src/PVE/LXC.pm        | 44 +++++++++++++++++++++++++++++++++++++++++++
 src/PVE/LXC/Config.pm | 28 ++++++++++++++++++++++++++-
 2 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index 77b1a43..c022355 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -1625,6 +1625,50 @@ sub __mountpoint_mount {
     die "unsupported storage";
 }
 
+sub mountpoint_hotplug($$$) {
+    my ($vmid, $conf, $opt, $mp, $storage_cfg) = @_;
+
+    my (undef, $rootuid, $rootgid) = PVE::LXC::parse_id_maps($conf);
+
+    # We do the rest in a fork with an unshared mount namespace, since we're now going to 'stage'
+    # the mountpoint, then grab it, then move into the container's namespace, then mount it.
+
+    PVE::Tools::run_fork(sub {
+	# Pin the container pid longer, we also need to get its monitor/parent:
+	my ($ct_pid, $ct_pidfd) = open_lxc_pid($vmid)
+	    or die "failed to open pidfd of container $vmid\'s init process\n";
+
+	my ($monitor_pid, $monitor_pidfd) = open_ppid($ct_pid)
+	    or die "failed to open pidfd of container $vmid\'s monitor process\n";
+
+	my $ct_mnt_ns = $get_container_namespace->($vmid, $ct_pid, 'mnt');
+	my $monitor_mnt_ns = $get_container_namespace->($vmid, $monitor_pid, 'mnt');
+
+	# Change into the monitor's mount namespace. We "pin" the mount into the monitor's
+	# namespace for it to remain active there since the container will be able to unmount
+	# hotplugged mount points and thereby potentially free up loop devices, which is a security
+	# concern.
+	PVE::Tools::setns(fileno($monitor_mnt_ns), PVE::Tools::CLONE_NEWNS);
+	chdir('/')
+	    or die "failed to change root directory within the monitor's mount namespace: $!\n";
+
+	my $dir = get_staging_mount_path($opt);
+	my $mount_fd = mountpoint_stage($mp, $dir, $storage_cfg, undef, $rootuid, $rootgid);
+
+	PVE::Tools::setns(fileno($ct_mnt_ns), PVE::Tools::CLONE_NEWNS);
+	chdir('/')
+	    or die "failed to change root directory within the container's mount namespace: $!\n";
+
+	PVE::Tools::move_mount(
+	    fileno($mount_fd),
+	    "",
+	    &AT_FDCWD,
+	    $mp->{mp},
+	    &MOVE_MOUNT_F_EMPTY_PATH,
+	);
+    });
+}
+
 # Create a directory in the mountpoint staging tempfs.
 sub get_staging_mount_path($) {
     my ($opt) = @_;
diff --git a/src/PVE/LXC/Config.pm b/src/PVE/LXC/Config.pm
index 44d7f93..ecf60ef 100644
--- a/src/PVE/LXC/Config.pm
+++ b/src/PVE/LXC/Config.pm
@@ -1217,6 +1217,14 @@ sub vmconfig_hotplug_pending {
 		if (!$hotplug_memory_done) { # don't call twice if both opts are passed
 		    $hotplug_memory->($conf->{pending}->{memory}, $conf->{pending}->{swap});
 		}
+	    } elsif ($opt =~ m/^mp(\d+)$/) {
+		if (!PVE::LXC::Tools::can_use_new_mount_api()) {
+		    die "skip\n";
+		}
+
+		$class->vmconfig_apply_pending_mountpoint($vmid, $conf, $opt, $storecfg, 1);
+		# vmconfig_apply_pending_mountpoint modifies the value if it creates a new disk
+		$value = $conf->{pending}->{$opt};
 	    } else {
 		die "skip\n"; # skip non-hotpluggable
 	    }
@@ -1306,14 +1314,32 @@ sub vmconfig_apply_pending_mountpoint {
     my $old = $conf->{$opt};
     if ($mp->{type} eq 'volume') {
 	if ($mp->{volume} =~ $PVE::LXC::NEW_DISK_RE) {
+	    my $original_value = $conf->{pending}->{$opt};
 	    my $vollist = PVE::LXC::create_disks(
 		$storecfg,
 		$vmid,
-		{ $opt => $conf->{pending}->{$opt} },
+		{ $opt => $original_value },
 		$conf,
 		1,
 	    );
+	    if ($running) {
+		# Re-parse mount point:
+		my $mp = $class->parse_ct_mountpoint($conf->{pending}->{$opt});
+		eval {
+		    PVE::LXC::mountpoint_hotplug($vmid, $conf, $opt, $mp, $storecfg);
+		};
+		my $err = $@;
+		if ($err) {
+		    PVE::LXC::destroy_disks($storecfg, $vollist);
+		    # The pending-changes code collects errors but keeps on looping through further
+		    # pending changes, so unroll the change in $conf as well if destroy_disks()
+		    # didn't die().
+		    $conf->{pending}->{$opt} = $original_value;
+		    die $err;
+		}
+	    }
 	} else {
+	    die "skip\n" if $running; # TODO: "changing" mount points
 	    $rescan_volume->($storecfg, $mp);
 	    $conf->{pending}->{$opt} = $class->print_ct_mountpoint($mp);
 	}
-- 
2.20.1





More information about the pve-devel mailing list