[pve-devel] [PATCH qemu-server v9 5/7] fix #1027: virtio-fs support

Markus Frank m.frank at proxmox.com
Fri Mar 1 09:05:24 CET 2024


add support for sharing directories with a guest vm

virtio-fs needs virtiofsd to be started.
In order to start virtiofsd as a process (despite being a daemon it is does not
run in the background), a double-fork is used.

virtiofsd should close itself together with qemu.

There are the parameters dirid and the optional parameters direct-io, cache and
writeback. Additionally the xattr & acl parameter overwrite the directory
mapping settings for xattr & acl.

The dirid gets mapped to the path on the current node and is also used as a
mount tag (name used to mount the device on the guest).

example config:
```
virtiofs0: foo,direct-io=1,cache=always,acl=1
virtiofs1: dirid=bar,cache=never,xattr=1,writeback=1
```

For information on the optional parameters see the coherent doc patch
and the official gitlab README:
https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md

Also add a permission check for virtiofs directory access.

Signed-off-by: Markus Frank <m.frank at proxmox.com>
---
 PVE/API2/Qemu.pm           |  39 ++++++-
 PVE/QemuServer.pm          |  19 +++-
 PVE/QemuServer/Makefile    |   3 +-
 PVE/QemuServer/Memory.pm   |  34 ++++--
 PVE/QemuServer/Virtiofs.pm | 212 +++++++++++++++++++++++++++++++++++++
 5 files changed, 296 insertions(+), 11 deletions(-)
 create mode 100644 PVE/QemuServer/Virtiofs.pm

diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index cdc8f7a..20edbfb 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -648,6 +648,32 @@ my sub check_vm_create_hostpci_perm {
     return 1;
 };
 
+my sub check_dir_perm {
+    my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_;
+
+    return 1 if $authuser eq 'root at pam';
+
+    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
+
+    my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $value);
+    $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
+
+    return 1;
+};
+
+my sub check_vm_create_dir_perm {
+    my ($rpcenv, $authuser, $vmid, $pool, $param) = @_;
+
+    return 1 if $authuser eq 'root at pam';
+
+    for my $opt (keys %{$param}) {
+	next if $opt !~ m/^virtiofs\d+$/;
+	check_dir_perm($rpcenv, $authuser, $vmid, $pool, $opt, $param->{$opt});
+    }
+
+    return 1;
+};
+
 my $check_vm_modify_config_perm = sub {
     my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
 
@@ -658,7 +684,7 @@ my $check_vm_modify_config_perm = sub {
 	# else, as there the permission can be value dependend
 	next if PVE::QemuServer::is_valid_drivename($opt);
 	next if $opt eq 'cdrom';
-	next if $opt =~ m/^(?:unused|serial|usb|hostpci)\d+$/;
+	next if $opt =~ m/^(?:unused|serial|usb|hostpci|virtiofs)\d+$/;
 	next if $opt eq 'tags';
 
 
@@ -954,6 +980,7 @@ __PACKAGE__->register_method({
 	    &$check_vm_create_serial_perm($rpcenv, $authuser, $vmid, $pool, $param);
 	    check_vm_create_usb_perm($rpcenv, $authuser, $vmid, $pool, $param);
 	    check_vm_create_hostpci_perm($rpcenv, $authuser, $vmid, $pool, $param);
+	    check_vm_create_dir_perm($rpcenv, $authuser, $vmid, $pool, $param);
 
 	    PVE::QemuServer::check_bridge_access($rpcenv, $authuser, $param);
 	    &$check_cpu_model_access($rpcenv, $authuser, $param);
@@ -1828,6 +1855,10 @@ my $update_vm_api  = sub {
 		    check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
 		    PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
 		    PVE::QemuConfig->write_config($vmid, $conf);
+		} elsif ($opt =~ m/^virtiofs\d$/) {
+		    check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
+		    PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
+		    PVE::QemuConfig->write_config($vmid, $conf);
 		} elsif ($opt eq 'tags') {
 		    assert_tag_permissions($vmid, $val, '', $rpcenv, $authuser);
 		    delete $conf->{$opt};
@@ -1914,6 +1945,12 @@ my $update_vm_api  = sub {
 		    }
 		    check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
 		    $conf->{pending}->{$opt} = $param->{$opt};
+		} elsif ($opt =~ m/^virtiofs\d$/) {
+		    if (my $oldvalue = $conf->{$opt}) {
+			check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $oldvalue);
+		    }
+		    check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
+		    $conf->{pending}->{$opt} = $param->{$opt};
 		} elsif ($opt eq 'tags') {
 		    assert_tag_permissions($vmid, $conf->{$opt}, $param->{$opt}, $rpcenv, $authuser);
 		    $conf->{pending}->{$opt} = PVE::GuestHelpers::get_unique_tags($param->{$opt});
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index b45507a..06182bf 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -34,6 +34,7 @@ use PVE::DataCenterConfig;
 use PVE::Exception qw(raise raise_param_exc);
 use PVE::Format qw(render_duration render_bytes);
 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
+use PVE::Mapping::Dir;
 use PVE::Mapping::PCI;
 use PVE::Mapping::USB;
 use PVE::INotify;
@@ -60,6 +61,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
 use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
 use PVE::QemuServer::USB;
+use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd);
 
 my $have_sdn;
 eval {
@@ -959,6 +961,10 @@ my $netdesc = {
 
 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
 
+for (my $i = 0; $i < max_virtiofs(); $i++)  {
+    $confdesc->{"virtiofs$i"} = get_standard_option('pve-qm-virtiofs');
+}
+
 my $ipconfig_fmt = {
     ip => {
 	type => 'string',
@@ -3808,7 +3814,7 @@ sub config_to_command {
     }
 
     PVE::QemuServer::Memory::config(
-	$conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
+	$conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd, $machineFlags);
 
     push @$cmd, '-S' if $conf->{freeze};
 
@@ -4084,6 +4090,8 @@ sub config_to_command {
     }
     push @$machineFlags, "type=${machine_type_min}";
 
+    PVE::QemuServer::Virtiofs::config($conf, $vmid, $devices, $machineFlags);
+
     push @$cmd, @$devices;
     push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
     push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
@@ -5865,6 +5873,8 @@ sub vm_start_nolock {
 	PVE::Tools::run_fork sub {
 	    PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
 
+	    my $virtiofs_sockets = start_all_virtiofsd($conf, $vmid);
+
 	    my $tpmpid;
 	    if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
 		# start the TPM emulator so QEMU can connect on start
@@ -5877,8 +5887,10 @@ sub vm_start_nolock {
 		    warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
 		    kill 'TERM', $tpmpid;
 		}
+		PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);
 		die "QEMU exited with code $exitcode\n";
 	    }
+	    PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);
 	};
     };
 
@@ -6504,7 +6516,10 @@ sub check_mapping_access {
 	    } else {
 		die "either 'host' or 'mapping' must be set.\n";
 	    }
-       }
+	} elsif ($opt =~ m/^virtiofs\d$/) {
+	    my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+	    $rpcenv->check_full($user, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
+	}
    }
 };
 
diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile
index ac26e56..d1bf8bb 100644
--- a/PVE/QemuServer/Makefile
+++ b/PVE/QemuServer/Makefile
@@ -11,7 +11,8 @@ SOURCES=PCI.pm		\
 	CPUConfig.pm	\
 	CGroup.pm	\
 	Drive.pm	\
-	QMPHelpers.pm
+	QMPHelpers.pm	\
+	Virtiofs.pm
 
 .PHONY: install
 install: ${SOURCES}
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index f365f2d..691f9b3 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -10,6 +10,7 @@ use PVE::Exception qw(raise raise_param_exc);
 use PVE::QemuServer::Helpers qw(parse_number_sets);
 use PVE::QemuServer::Monitor qw(mon_cmd);
 use PVE::QemuServer::QMPHelpers qw(qemu_devicedel qemu_objectdel);
+use PVE::QemuServer::Virtiofs;
 
 use base qw(Exporter);
 
@@ -336,7 +337,7 @@ sub qemu_memdevices_list {
 }
 
 sub config {
-    my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd) = @_;
+    my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd, $machine_flags) = @_;
 
     my $memory = get_current_memory($conf->{memory});
     my $static_memory = 0;
@@ -367,6 +368,16 @@ sub config {
 
     die "numa needs to be enabled to use hugepages" if $conf->{hugepages} && !$conf->{numa};
 
+    my $virtiofs_enabled = 0;
+    for (my $i = 0; $i < PVE::QemuServer::Virtiofs::max_virtiofs(); $i++) {
+	my $opt = "virtiofs$i";
+	next if !$conf->{$opt};
+	my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+	if ($virtiofs) {
+	    $virtiofs_enabled = 1;
+	}
+    }
+
     if ($conf->{numa}) {
 
 	my $numa_totalmemory = undef;
@@ -379,7 +390,8 @@ sub config {
 	    my $numa_memory = $numa->{memory};
 	    $numa_totalmemory += $numa_memory;
 
-	    my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
+	    my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+	    my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
 
 	    # cpus
 	    my $cpulists = $numa->{cpus};
@@ -404,7 +416,7 @@ sub config {
 	    }
 
 	    push @$cmd, '-object', $mem_object;
-	    push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+	    push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
 	}
 
 	die "total memory for NUMA nodes must be equal to vm static memory\n"
@@ -418,15 +430,21 @@ sub config {
 		die "host NUMA node$i doesn't exist\n"
 		    if !host_numanode_exists($i) && $conf->{hugepages};
 
-		my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
-		push @$cmd, '-object', $mem_object;
-
 		my $cpus = ($cores * $i);
 		$cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
 
-		push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+		my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+		my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
+		push @$cmd, '-object', $mem_object;
+		push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
 	    }
 	}
+    } elsif ($virtiofs_enabled) {
+	# kvm: '-machine memory-backend' and '-numa memdev' properties are
+	# mutually exclusive
+	push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem'
+	    .",size=$conf->{memory}M,share=on";
+	push @$machine_flags, 'memory-backend=virtiofs-mem';
     }
 
     if ($hotplug) {
@@ -453,6 +471,8 @@ sub print_mem_object {
 	my $path = hugepages_mount_path($hugepages_size);
 
 	return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
+    } elsif ($id =~ m/^virtiofs-mem/) {
+	return "memory-backend-memfd,id=$id,size=${size}M,share=on";
     } else {
 	return "memory-backend-ram,id=$id,size=${size}M";
     }
diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm
new file mode 100644
index 0000000..4c59348
--- /dev/null
+++ b/PVE/QemuServer/Virtiofs.pm
@@ -0,0 +1,212 @@
+package PVE::QemuServer::Virtiofs;
+
+use strict;
+use warnings;
+
+use Fcntl;
+use IO::Socket::UNIX;
+use POSIX;
+use PVE::JSONSchema qw(get_standard_option parse_property_string);
+use PVE::Mapping::Dir;
+
+use base qw(Exporter);
+
+our @EXPORT_OK = qw(
+max_virtiofs
+assert_virtiofs_config
+start_virtiofsd
+start_all_virtiofsd
+virtiofs_qemu_param
+close_sockets
+);
+
+my $MAX_VIRTIOFS = 10;
+my $socket_path_root = "/run/qemu-server/virtiofsd";
+
+my $virtiofs_fmt = {
+    'dirid' => {
+	type => 'string',
+	default_key => 1,
+	description => "Mapping identifier of the directory mapping to be"
+	    ." shared with the guest. Also used as a mount tag inside the VM.",
+	format_description => 'mapping-id',
+	format => 'pve-configid',
+    },
+    'cache' => {
+	type => 'string',
+	description => "The caching policy the file system should use (auto, always, never).",
+	enum => [qw(auto always never)],
+	default => "auto",
+	optional => 1,
+    },
+    'direct-io' => {
+	type => 'boolean',
+	description => "Honor the O_DIRECT flag passed down by guest applications.",
+	default => 0,
+	optional => 1,
+    },
+    writeback => {
+	type => 'boolean',
+	description => "Enable writeback cache. If enabled, writes may be cached"
+	    ." in the guest until the file is closed or an fsync is performed.",
+	default => 0,
+	optional => 1,
+    },
+    xattr => {
+	type => 'boolean',
+	description => "Overwrite the xattr option from mapping and explicitly"
+	    ." enable/disable support for extended attributes for the VM.",
+	default => "use value from mapping",
+	optional => 1,
+    },
+    acl => {
+	type => 'boolean',
+	description => "Overwrite the acl option from mapping and explicitly"
+	    ." enable/disable support for posix ACLs (enabled acl implies xattr)"
+	    ." for the VM.",
+	default => "use value from mapping",
+	optional => 1,
+    },
+};
+PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
+
+my $virtiofsdesc = {
+    optional => 1,
+    type => 'string', format => $virtiofs_fmt,
+    description => "share a directory between host and guest",
+};
+PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
+
+sub max_virtiofs {
+    return $MAX_VIRTIOFS;
+}
+
+sub assert_virtiofs_config {
+    my ($conf, $virtiofs) = @_;
+    my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+    my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+    my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
+    if ($acl && PVE::QemuServer::windows_version($conf->{ostype})) {
+	log_warn(
+	    "Please disable ACLs for virtiofs on Windows VMs, otherwise"
+	    ." the virtiofs shared directory cannot be mounted."
+	);
+    }
+
+    if (!$node_list || scalar($node_list->@*) != 1) {
+	die "virtiofs needs exactly one mapping for this node\n";
+    }
+
+    eval PVE::Mapping::Dir::assert_valid($node_list->[0]);
+    if (my $err = $@) {
+	die "Directory Mapping invalid: $err\n";
+    }
+}
+
+sub config {
+    my ($conf, $vmid, $devices, $machine_flags) = @_;
+    for (my $i = 0; $i < max_virtiofs(); $i++) {
+	my $opt = "virtiofs$i";
+
+	next if !$conf->{$opt};
+	my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+	next if !$virtiofs;
+
+	assert_virtiofs_config($conf, $virtiofs);
+
+	push @$devices, '-chardev', "socket,id=virtfs$i,path=$socket_path_root/vm$vmid-fs$i";
+
+	# queue-size is set 1024 because of bug with Windows guests:
+	# https://bugzilla.redhat.com/show_bug.cgi?id=1873088
+	# 1024 is also always used in the virtiofs documentations:
+	# https://gitlab.com/virtio-fs/virtiofsd#examples
+	push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
+	    .",chardev=virtfs$i,tag=$virtiofs->{dirid}";
+    }
+}
+
+sub start_all_virtiofsd {
+    my ($conf, $vmid) = @_;
+    my $virtiofs_sockets = [];
+    for (my $i = 0; $i < max_virtiofs(); $i++) {
+	my $opt = "virtiofs$i";
+
+	next if !$conf->{$opt};
+	my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+	next if !$virtiofs;
+
+	my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs);
+	push @$virtiofs_sockets, $virtiofs_socket;
+    }
+    return $virtiofs_sockets;
+}
+
+sub close_sockets {
+    my @sockets = @_;
+    for my $socket (@sockets) {
+	shutdown($socket, 2);
+	close($socket);
+    }
+}
+
+sub start_virtiofsd {
+    my ($vmid, $fsid, $virtiofs) = @_;
+
+    my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+    my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+    # Default to dir config xattr & acl settings
+    my $xattr = $virtiofs->{xattr} // $dir_cfg->{xattr};
+    my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
+
+    my $node_cfg = $node_list->[0];
+    my $path = $node_cfg->{path};
+    mkdir $socket_path_root;
+    my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
+    unlink($socket_path);
+    my $socket = IO::Socket::UNIX->new(
+	Type => SOCK_STREAM,
+	Local => $socket_path,
+	Listen => 1,
+    ) or die "cannot create socket - $!\n";
+
+    my $flags = fcntl($socket, F_GETFD, 0)
+	or die "failed to get file descriptor flags: $!\n";
+    fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
+	or die "failed to remove FD_CLOEXEC from file descriptor\n";
+
+    my $fd = $socket->fileno();
+
+    my $virtiofsd_bin = '/usr/libexec/virtiofsd';
+
+    my $pid = fork();
+    if ($pid == 0) {
+	setsid();
+	$0 = "task pve-vm$vmid-virtiofs$fsid";
+	my $pid2 = fork();
+	if ($pid2 == 0) {
+	    my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
+	    push @$cmd, '--xattr' if $xattr;
+	    push @$cmd, '--posix-acl' if $acl;
+	    push @$cmd, '--announce-submounts' if ($node_cfg->{submounts});
+	    push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'});
+	    push @$cmd, "--cache=$virtiofs->{'cache'}" if ($virtiofs->{'cache'});
+	    push @$cmd, "--writeback" if ($virtiofs->{'writeback'});
+	    push @$cmd, '--syslog';
+	    exec(@$cmd);
+	} elsif (!defined($pid2)) {
+	    die "could not fork to start virtiofsd\n";
+	} else {
+	    POSIX::_exit(0);
+	}
+    } elsif (!defined($pid)) {
+	die "could not fork to start virtiofsd\n";
+    } else {
+	waitpid($pid, 0);
+    }
+
+    # return socket to keep it alive,
+    # so that QEMU will wait for virtiofsd to start
+    return $socket;
+}
-- 
2.39.2





More information about the pve-devel mailing list