[pve-devel] [PATCH qemu-server v12 5/12] fix #1027: virtio-fs support
Markus Frank
m.frank at proxmox.com
Fri Nov 15 15:00:30 CET 2024
add support for sharing directories with a guest vm.
virtio-fs needs virtiofsd to be started.
In order to start virtiofsd as a process (despite being a daemon it is
does not run in the background), a double-fork is used.
virtiofsd should close itself together with QEMU.
There are the parameters dirid and the optional parameters direct-io,
cache and writeback. Additionally the xattr & acl parameter overwrite
the directory mapping settings for xattr & acl.
The dirid gets mapped to the path on the current node and is also used
as a mount tag (name used to mount the device on the guest).
example config:
```
virtiofs0: foo,direct-io=1,cache=always,acl=1
virtiofs1: dirid=bar,cache=never,xattr=1,writeback=1
```
For information on the optional parameters see the coherent doc patch
and the official gitlab README:
https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md
Also add a permission check for virtiofs directory access.
Signed-off-by: Markus Frank <m.frank at proxmox.com>
---
PVE/API2/Qemu.pm | 40 ++++++-
PVE/QemuServer.pm | 22 +++-
PVE/QemuServer/Makefile | 3 +-
PVE/QemuServer/Memory.pm | 23 ++--
PVE/QemuServer/Virtiofs.pm | 227 +++++++++++++++++++++++++++++++++++++
5 files changed, 304 insertions(+), 11 deletions(-)
create mode 100644 PVE/QemuServer/Virtiofs.pm
diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index 1c3cb271..a369a32b 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -37,6 +37,7 @@ use PVE::QemuServer::Memory qw(get_current_memory);
use PVE::QemuServer::PCI;
use PVE::QemuServer::QMPHelpers;
use PVE::QemuServer::USB;
+use PVE::QemuServer::Virtiofs;
use PVE::QemuMigrate;
use PVE::RPCEnvironment;
use PVE::AccessControl;
@@ -721,6 +722,32 @@ my sub check_vm_create_hostpci_perm {
return 1;
};
+my sub check_dir_perm {
+ my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_;
+
+ return 1 if $authuser eq 'root at pam';
+
+ $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
+
+ my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $value);
+ $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
+
+ return 1;
+};
+
+my sub check_vm_create_dir_perm {
+ my ($rpcenv, $authuser, $vmid, $pool, $param) = @_;
+
+ return 1 if $authuser eq 'root at pam';
+
+ for my $opt (keys %{$param}) {
+ next if $opt !~ m/^virtiofs\d+$/;
+ check_dir_perm($rpcenv, $authuser, $vmid, $pool, $opt, $param->{$opt});
+ }
+
+ return 1;
+};
+
my $check_vm_modify_config_perm = sub {
my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
@@ -731,7 +758,7 @@ my $check_vm_modify_config_perm = sub {
# else, as there the permission can be value dependend
next if PVE::QemuServer::is_valid_drivename($opt);
next if $opt eq 'cdrom';
- next if $opt =~ m/^(?:unused|serial|usb|hostpci)\d+$/;
+ next if $opt =~ m/^(?:unused|serial|usb|hostpci|virtiofs)\d+$/;
next if $opt eq 'tags';
@@ -1025,6 +1052,7 @@ __PACKAGE__->register_method({
&$check_vm_create_serial_perm($rpcenv, $authuser, $vmid, $pool, $param);
check_vm_create_usb_perm($rpcenv, $authuser, $vmid, $pool, $param);
check_vm_create_hostpci_perm($rpcenv, $authuser, $vmid, $pool, $param);
+ check_vm_create_dir_perm($rpcenv, $authuser, $vmid, $pool, $param);
PVE::QemuServer::check_bridge_access($rpcenv, $authuser, $param);
&$check_cpu_model_access($rpcenv, $authuser, $param);
@@ -1919,6 +1947,10 @@ my $update_vm_api = sub {
check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
PVE::QemuConfig->write_config($vmid, $conf);
+ } elsif ($opt =~ m/^virtiofs\d$/) {
+ check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
+ PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
+ PVE::QemuConfig->write_config($vmid, $conf);
} elsif ($opt eq 'tags') {
assert_tag_permissions($vmid, $val, '', $rpcenv, $authuser);
delete $conf->{$opt};
@@ -2008,6 +2040,12 @@ my $update_vm_api = sub {
}
check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
$conf->{pending}->{$opt} = $param->{$opt};
+ } elsif ($opt =~ m/^virtiofs\d$/) {
+ if (my $oldvalue = $conf->{$opt}) {
+ check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $oldvalue);
+ }
+ check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
+ $conf->{pending}->{$opt} = $param->{$opt};
} elsif ($opt eq 'tags') {
assert_tag_permissions($vmid, $conf->{$opt}, $param->{$opt}, $rpcenv, $authuser);
$conf->{pending}->{$opt} = PVE::GuestHelpers::get_unique_tags($param->{$opt});
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index cb1e0b82..015d4676 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -35,6 +35,7 @@ use PVE::Exception qw(raise raise_param_exc);
use PVE::Format qw(render_duration render_bytes);
use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
use PVE::HA::Config;
+use PVE::Mapping::Dir;
use PVE::Mapping::PCI;
use PVE::Mapping::USB;
use PVE::INotify;
@@ -62,6 +63,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
use PVE::QemuServer::USB;
+use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd);
my $have_sdn;
eval {
@@ -957,6 +959,10 @@ my $netdesc = {
PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
+for (my $i = 0; $i < max_virtiofs(); $i++) {
+ $confdesc->{"virtiofs$i"} = get_standard_option('pve-qm-virtiofs');
+}
+
my $ipconfig_fmt = {
ip => {
type => 'string',
@@ -3872,8 +3878,11 @@ sub config_to_command {
push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
}
+ my $virtiofs_enabled = PVE::QemuServer::Virtiofs::virtiofs_enabled($conf);
+
PVE::QemuServer::Memory::config(
- $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
+ $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd,
+ $machineFlags, $virtiofs_enabled);
push @$cmd, '-S' if $conf->{freeze};
@@ -4160,6 +4169,8 @@ sub config_to_command {
}
}
+ PVE::QemuServer::Virtiofs::config($conf, $vmid, $devices);
+
push @$cmd, @$devices;
push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
@@ -5945,6 +5956,8 @@ sub vm_start_nolock {
PVE::Tools::run_fork sub {
PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
+ my $virtiofs_sockets = start_all_virtiofsd($conf, $vmid);
+
my $tpmpid;
if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
# start the TPM emulator so QEMU can connect on start
@@ -5957,8 +5970,10 @@ sub vm_start_nolock {
warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
kill 'TERM', $tpmpid;
}
+ PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);
die "QEMU exited with code $exitcode\n";
}
+ PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);
};
};
@@ -6616,7 +6631,10 @@ sub check_mapping_access {
} else {
die "either 'host' or 'mapping' must be set.\n";
}
- }
+ } elsif ($opt =~ m/^virtiofs\d$/) {
+ my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ $rpcenv->check_full($user, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
+ }
}
};
diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile
index ac26e56f..d1bf8bb8 100644
--- a/PVE/QemuServer/Makefile
+++ b/PVE/QemuServer/Makefile
@@ -11,7 +11,8 @@ SOURCES=PCI.pm \
CPUConfig.pm \
CGroup.pm \
Drive.pm \
- QMPHelpers.pm
+ QMPHelpers.pm \
+ Virtiofs.pm
.PHONY: install
install: ${SOURCES}
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index f365f2d1..490ba378 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -336,7 +336,7 @@ sub qemu_memdevices_list {
}
sub config {
- my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd) = @_;
+ my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd, $machine_flags, $virtiofs_enabled) = @_;
my $memory = get_current_memory($conf->{memory});
my $static_memory = 0;
@@ -379,7 +379,8 @@ sub config {
my $numa_memory = $numa->{memory};
$numa_totalmemory += $numa_memory;
- my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
+ my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+ my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
# cpus
my $cpulists = $numa->{cpus};
@@ -404,7 +405,7 @@ sub config {
}
push @$cmd, '-object', $mem_object;
- push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+ push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
}
die "total memory for NUMA nodes must be equal to vm static memory\n"
@@ -418,15 +419,21 @@ sub config {
die "host NUMA node$i doesn't exist\n"
if !host_numanode_exists($i) && $conf->{hugepages};
- my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
- push @$cmd, '-object', $mem_object;
-
my $cpus = ($cores * $i);
$cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
- push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+ my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+ my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
+ push @$cmd, '-object', $mem_object;
+ push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
}
}
+ } elsif ($virtiofs_enabled) {
+ # kvm: '-machine memory-backend' and '-numa memdev' properties are
+ # mutually exclusive
+ push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem'
+ .",size=$conf->{memory}M,share=on";
+ push @$machine_flags, 'memory-backend=virtiofs-mem';
}
if ($hotplug) {
@@ -453,6 +460,8 @@ sub print_mem_object {
my $path = hugepages_mount_path($hugepages_size);
return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
+ } elsif ($id =~ m/^virtiofs-mem/) {
+ return "memory-backend-memfd,id=$id,size=${size}M,share=on";
} else {
return "memory-backend-ram,id=$id,size=${size}M";
}
diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm
new file mode 100644
index 00000000..bdd9fafd
--- /dev/null
+++ b/PVE/QemuServer/Virtiofs.pm
@@ -0,0 +1,227 @@
+package PVE::QemuServer::Virtiofs;
+
+use strict;
+use warnings;
+
+use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC);
+use IO::Socket::UNIX;
+use POSIX;
+use Socket qw(SOCK_STREAM);
+
+use PVE::JSONSchema qw(get_standard_option parse_property_string);
+use PVE::Mapping::Dir;
+use PVE::RESTEnvironment qw(log_warn);
+
+use base qw(Exporter);
+
+our @EXPORT_OK = qw(
+max_virtiofs
+start_all_virtiofsd
+);
+
+my $MAX_VIRTIOFS = 10;
+my $socket_path_root = "/run/qemu-server/virtiofsd";
+
+my $virtiofs_fmt = {
+ 'dirid' => {
+ type => 'string',
+ default_key => 1,
+ description => "Mapping identifier of the directory mapping to be shared with the guest."
+ ." Also used as a mount tag inside the VM.",
+ format_description => 'mapping-id',
+ format => 'pve-configid',
+ },
+ 'cache' => {
+ type => 'string',
+ description => "The caching policy the file system should use (auto, always, never).",
+ enum => [qw(auto always never)],
+ default => "auto",
+ optional => 1,
+ },
+ 'direct-io' => {
+ type => 'boolean',
+ description => "Honor the O_DIRECT flag passed down by guest applications.",
+ default => 0,
+ optional => 1,
+ },
+ writeback => {
+ type => 'boolean',
+ description => "Enable writeback cache. If enabled, writes may be cached in the guest until"
+ ." the file is closed or an fsync is performed.",
+ default => 0,
+ optional => 1,
+ },
+ xattr => {
+ type => 'boolean',
+ description => "Overwrite the xattr option from mapping and explicitly enable/disable"
+ ." support for extended attributes for the VM.",
+ default => "use value from mapping",
+ optional => 1,
+ },
+ acl => {
+ type => 'boolean',
+ description => "Overwrite the acl option from mapping and explicitly enable/disable support"
+ ." for posix ACLs (enabled acl implies xattr) for the VM.",
+ default => "use value from mapping",
+ optional => 1,
+ },
+};
+PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
+
+my $virtiofsdesc = {
+ optional => 1,
+ type => 'string', format => $virtiofs_fmt,
+ description => "Configuration for sharing a directory between host and guest using Virtio-fs.",
+};
+PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
+
+sub max_virtiofs {
+ return $MAX_VIRTIOFS;
+}
+
+sub assert_virtiofs_config {
+ my ($conf, $virtiofs) = @_;
+
+ my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+ my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+ my $acl = $virtiofs->{acl} // $dir_cfg->{acl};
+ if ($acl && PVE::QemuServer::Helpers::windows_version($conf->{ostype})) {
+ log_warn(
+ "Please disable ACLs for virtiofs on Windows VMs, otherwise"
+ ." the virtiofs shared directory cannot be mounted."
+ );
+ }
+
+ if (!$node_list || scalar($node_list->@*) != 1) {
+ die "virtiofs needs exactly one mapping for this node\n";
+ }
+
+ eval { PVE::Mapping::Dir::assert_valid($node_list->[0]) };
+ die "directory mapping invalid: $@\n" if $@;
+}
+
+sub config {
+ my ($conf, $vmid, $devices) = @_;
+
+ for (my $i = 0; $i < max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+
+ next if !$conf->{$opt};
+ my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ next if !$virtiofs;
+
+ assert_virtiofs_config($conf, $virtiofs);
+
+ push @$devices, '-chardev', "socket,id=virtiofs$i,path=$socket_path_root/vm$vmid-fs$i";
+
+ # queue-size is set 1024 because of bug with Windows guests:
+ # https://bugzilla.redhat.com/show_bug.cgi?id=1873088
+ # 1024 is also always used in the virtiofs documentations:
+ # https://gitlab.com/virtio-fs/virtiofsd#examples
+ push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
+ .",chardev=virtiofs$i,tag=$virtiofs->{dirid}";
+ }
+}
+
+sub virtiofs_enabled {
+ my ($conf) = @_;
+
+ my $virtiofs_enabled = 0;
+ for (my $i = 0; $i < max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+ next if !$conf->{$opt};
+ my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ if ($virtiofs) {
+ $virtiofs_enabled = 1;
+ last;
+ }
+ }
+ return $virtiofs_enabled;
+}
+
+sub start_all_virtiofsd {
+ my ($conf, $vmid) = @_;
+ my $virtiofs_sockets = [];
+ for (my $i = 0; $i < max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+
+ next if !$conf->{$opt};
+ my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ next if !$virtiofs;
+
+ my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs);
+ push @$virtiofs_sockets, $virtiofs_socket;
+ }
+ return $virtiofs_sockets;
+}
+
+sub start_virtiofsd {
+ my ($vmid, $fsid, $virtiofs) = @_;
+
+ mkdir $socket_path_root;
+ my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
+ unlink($socket_path);
+ my $socket = IO::Socket::UNIX->new(
+ Type => SOCK_STREAM,
+ Local => $socket_path,
+ Listen => 1,
+ ) or die "cannot create socket - $!\n";
+
+ my $flags = fcntl($socket, F_GETFD, 0)
+ or die "failed to get file descriptor flags: $!\n";
+ fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
+ or die "failed to remove FD_CLOEXEC from file descriptor\n";
+
+ my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+ my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+ my $node_cfg = $node_list->[0];
+
+ my $virtiofsd_bin = '/usr/libexec/virtiofsd';
+ my $fd = $socket->fileno();
+ my $path = $node_cfg->{path};
+
+ # Default to dir config xattr & acl settings
+ my $xattr = $virtiofs->{xattr} // $dir_cfg->{xattr};
+ my $acl = $virtiofs->{acl} // $dir_cfg->{acl};
+
+ my $could_not_fork_err = "could not fork to start virtiofsd\n";
+ my $pid = fork();
+ if ($pid == 0) {
+ setsid();
+ $0 = "task pve-vm$vmid-virtiofs$fsid";
+ my $pid2 = fork();
+ if ($pid2 == 0) {
+ my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
+ push @$cmd, '--xattr' if $xattr;
+ push @$cmd, '--posix-acl' if $acl;
+ push @$cmd, '--announce-submounts' if ($node_cfg->{submounts});
+ push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'});
+ push @$cmd, '--cache='.$virtiofs->{cache} if ($virtiofs->{cache});
+ push @$cmd, '--writeback' if ($virtiofs->{'writeback'});
+ push @$cmd, '--syslog';
+ exec(@$cmd);
+ } elsif (!defined($pid2)) {
+ die $could_not_fork_err;
+ } else {
+ POSIX::_exit(0);
+ }
+ } elsif (!defined($pid)) {
+ die $could_not_fork_err;
+ } else {
+ waitpid($pid, 0);
+ }
+
+ # return socket to keep it alive,
+ # so that QEMU will wait for virtiofsd to start
+ return $socket;
+}
+
+sub close_sockets {
+ my @sockets = @_;
+ for my $socket (@sockets) {
+ shutdown($socket, 2);
+ close($socket);
+ }
+}
+1;
--
2.39.5
More information about the pve-devel
mailing list