[pve-devel] [PATCH container 1/8] implement "staged mountpoints"

Wolfgang Bumiller w.bumiller at proxmox.com
Fri Nov 8 11:06:02 CET 2019


Staging a mount point requires the new kernel mount API and
will mount the volume at a fixed path, then use open_tree()
to "pick it up" into a file descriptor.

For most of our volumes we wouldn't need the temp directory,
but some things cannot be handled with _only_ the new API
(like single-step read-only bind mounts). Additionally, the
'mount' command figures out file systems automatically and
has a bunch of helpers we'd need to reimplement, so instead,
go through our usual mount code and then pick up the result.

This can then be used to implement mount point hotplugging,
as with the open file descriptor we can move into the
container's namespace and issue a `move_mount()` to put the
mount point in place in the running container.

Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
---
 src/PVE/LXC.pm | 47 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 4 deletions(-)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index cdf6d64..b5a97b8 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -11,7 +11,7 @@ use File::Path;
 use File::Spec;
 use Cwd qw();
 use Fcntl qw(O_RDONLY O_NOFOLLOW O_DIRECTORY);
-use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED);
+use Errno qw(ELOOP ENOTDIR EROFS ECONNREFUSED ENOSYS);
 use IO::Socket::UNIX;
 
 use PVE::Exception qw(raise_perm_exc);
@@ -19,12 +19,12 @@ use PVE::Storage;
 use PVE::SafeSyslog;
 use PVE::INotify;
 use PVE::JSONSchema qw(get_standard_option);
-use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full O_PATH);
+use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full O_PATH AT_FDCWD);
 use PVE::CpuSet;
 use PVE::Network;
 use PVE::AccessControl;
 use PVE::ProcFSTools;
-use PVE::Syscall;
+use PVE::Syscall qw(:fsmount);
 use PVE::LXC::Config;
 use PVE::GuestHelpers;
 
@@ -1397,9 +1397,44 @@ sub __mount_prepare_rootdir {
     return ($rootdir, $mount_path, $mpfd, $parentfd, $last_dir);
 }
 
-# use $rootdir = undef to just return the corresponding mount path
+# use $rootdir = undef to just return the corresponding mount path,
 sub mountpoint_mount {
     my ($mountpoint, $rootdir, $storage_cfg, $snapname, $rootuid, $rootgid) = @_;
+    return __mountpoint_mount($mountpoint, $rootdir, $storage_cfg, $snapname, $rootuid, $rootgid, undef);
+}
+
+sub mountpoint_stage {
+    my ($mountpoint, $stage_dir, $storage_cfg, $snapname, $rootuid, $rootgid) = @_;
+    my ($path, $loop, $dev) =
+	__mountpoint_mount($mountpoint, $stage_dir, $storage_cfg, $snapname, $rootuid, $rootgid, 1);
+
+    if (!defined($path)) {
+	return undef if $! == ENOSYS;
+	die "failed to mount subvolume: $!\n";
+    }
+
+    my $err;
+    my $fd = PVE::Tools::open_tree(&AT_FDCWD, $stage_dir, &OPEN_TREE_CLOEXEC | &OPEN_TREE_CLONE)
+	or die "open_tree() on mount point failed: $!\n";
+
+    return wantarray ? ($path, $loop, $dev, $fd) : $fd;
+}
+
+# Use $stage_mount, $rootdir is treated as a temporary path to "stage" the file system. The user
+#   can then open a file descriptor to it which can be used with the `move_mount` syscall.
+#   Note that if the kernel does not support the new mount API, this will not perform any action
+#   and return `undef` with $! = ENOSYS.
+sub __mountpoint_mount {
+    my ($mountpoint, $rootdir, $storage_cfg, $snapname, $rootuid, $rootgid, $stage_mount) = @_;
+
+    if (defined($stage_mount)) {
+	# Test the kernel:
+	my $fd = PVE::Tools::fsopen("ext4", &FSOPEN_CLOEXEC)
+	    or return undef;
+	# If we get here, the kernel is new enough to support our syscalls required for staging
+	# mount points.
+	close($fd);
+    }
 
     my $volid = $mountpoint->{volume};
     my $mount = $mountpoint->{mp};
@@ -1418,6 +1453,10 @@ sub mountpoint_mount {
 	($rootdir, $mount_path, $mpfd, $parentfd, $last_dir) =
 	    __mount_prepare_rootdir($rootdir, $mount, $rootuid, $rootgid);
     }
+
+    if (defined($stage_mount)) {
+	$mount_path = $rootdir;
+    }
     
     my ($storage, $volname) = PVE::Storage::parse_volume_id($volid, 1);
 
-- 
2.20.1





More information about the pve-devel mailing list