[pve-devel] [PATCH qemu-server v10 5/11] fix #1027: virtio-fs support
Markus Frank
m.frank at proxmox.com
Tue May 14 12:54:36 CEST 2024
add support for sharing directories with a guest vm
virtio-fs needs virtiofsd to be started.
In order to start virtiofsd as a process (despite being a daemon it is does not
run in the background), a double-fork is used.
virtiofsd should close itself together with qemu.
There are the parameters dirid and the optional parameters direct-io, cache and
writeback. Additionally the xattr & acl parameter overwrite the directory
mapping settings for xattr & acl.
The dirid gets mapped to the path on the current node and is also used as a
mount tag (name used to mount the device on the guest).
example config:
```
virtiofs0: foo,direct-io=1,cache=always,acl=1
virtiofs1: dirid=bar,cache=never,xattr=1,writeback=1
```
For information on the optional parameters see the coherent doc patch
and the official gitlab README:
https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md
Also add a permission check for virtiofs directory access.
Signed-off-by: Markus Frank <m.frank at proxmox.com>
---
PVE/API2/Qemu.pm | 39 ++++++-
PVE/QemuServer.pm | 19 +++-
PVE/QemuServer/Makefile | 3 +-
PVE/QemuServer/Memory.pm | 34 ++++--
PVE/QemuServer/Virtiofs.pm | 212 +++++++++++++++++++++++++++++++++++++
5 files changed, 296 insertions(+), 11 deletions(-)
create mode 100644 PVE/QemuServer/Virtiofs.pm
diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index 2a349c8..5d97896 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -695,6 +695,32 @@ my sub check_vm_create_hostpci_perm {
return 1;
};
+my sub check_dir_perm {
+ my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_;
+
+ return 1 if $authuser eq 'root at pam';
+
+ $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
+
+ my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $value);
+ $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
+
+ return 1;
+};
+
+my sub check_vm_create_dir_perm {
+ my ($rpcenv, $authuser, $vmid, $pool, $param) = @_;
+
+ return 1 if $authuser eq 'root at pam';
+
+ for my $opt (keys %{$param}) {
+ next if $opt !~ m/^virtiofs\d+$/;
+ check_dir_perm($rpcenv, $authuser, $vmid, $pool, $opt, $param->{$opt});
+ }
+
+ return 1;
+};
+
my $check_vm_modify_config_perm = sub {
my ($rpcenv, $authuser, $vmid, $pool, $key_list) = @_;
@@ -705,7 +731,7 @@ my $check_vm_modify_config_perm = sub {
# else, as there the permission can be value dependend
next if PVE::QemuServer::is_valid_drivename($opt);
next if $opt eq 'cdrom';
- next if $opt =~ m/^(?:unused|serial|usb|hostpci)\d+$/;
+ next if $opt =~ m/^(?:unused|serial|usb|hostpci|virtiofs)\d+$/;
next if $opt eq 'tags';
@@ -999,6 +1025,7 @@ __PACKAGE__->register_method({
&$check_vm_create_serial_perm($rpcenv, $authuser, $vmid, $pool, $param);
check_vm_create_usb_perm($rpcenv, $authuser, $vmid, $pool, $param);
check_vm_create_hostpci_perm($rpcenv, $authuser, $vmid, $pool, $param);
+ check_vm_create_dir_perm($rpcenv, $authuser, $vmid, $pool, $param);
PVE::QemuServer::check_bridge_access($rpcenv, $authuser, $param);
&$check_cpu_model_access($rpcenv, $authuser, $param);
@@ -1899,6 +1926,10 @@ my $update_vm_api = sub {
check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
PVE::QemuConfig->write_config($vmid, $conf);
+ } elsif ($opt =~ m/^virtiofs\d$/) {
+ check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $val);
+ PVE::QemuConfig->add_to_pending_delete($conf, $opt, $force);
+ PVE::QemuConfig->write_config($vmid, $conf);
} elsif ($opt eq 'tags') {
assert_tag_permissions($vmid, $val, '', $rpcenv, $authuser);
delete $conf->{$opt};
@@ -1987,6 +2018,12 @@ my $update_vm_api = sub {
}
check_hostpci_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
$conf->{pending}->{$opt} = $param->{$opt};
+ } elsif ($opt =~ m/^virtiofs\d$/) {
+ if (my $oldvalue = $conf->{$opt}) {
+ check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $oldvalue);
+ }
+ check_dir_perm($rpcenv, $authuser, $vmid, undef, $opt, $param->{$opt});
+ $conf->{pending}->{$opt} = $param->{$opt};
} elsif ($opt eq 'tags') {
assert_tag_permissions($vmid, $conf->{$opt}, $param->{$opt}, $rpcenv, $authuser);
$conf->{pending}->{$opt} = PVE::GuestHelpers::get_unique_tags($param->{$opt});
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 9032d29..a0600d7 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -35,6 +35,7 @@ use PVE::Exception qw(raise raise_param_exc);
use PVE::Format qw(render_duration render_bytes);
use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
use PVE::HA::Config;
+use PVE::Mapping::Dir;
use PVE::Mapping::PCI;
use PVE::Mapping::USB;
use PVE::INotify;
@@ -61,6 +62,7 @@ use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
use PVE::QemuServer::USB;
+use PVE::QemuServer::Virtiofs qw(max_virtiofs start_all_virtiofsd);
my $have_sdn;
eval {
@@ -956,6 +958,10 @@ my $netdesc = {
PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
+for (my $i = 0; $i < max_virtiofs(); $i++) {
+ $confdesc->{"virtiofs$i"} = get_standard_option('pve-qm-virtiofs');
+}
+
my $ipconfig_fmt = {
ip => {
type => 'string',
@@ -3807,7 +3813,7 @@ sub config_to_command {
}
PVE::QemuServer::Memory::config(
- $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
+ $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd, $machineFlags);
push @$cmd, '-S' if $conf->{freeze};
@@ -4094,6 +4100,8 @@ sub config_to_command {
}
}
+ PVE::QemuServer::Virtiofs::config($conf, $vmid, $devices, $machineFlags);
+
push @$cmd, @$devices;
push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
@@ -5880,6 +5888,8 @@ sub vm_start_nolock {
PVE::Tools::run_fork sub {
PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
+ my $virtiofs_sockets = start_all_virtiofsd($conf, $vmid);
+
my $tpmpid;
if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
# start the TPM emulator so QEMU can connect on start
@@ -5892,8 +5902,10 @@ sub vm_start_nolock {
warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
kill 'TERM', $tpmpid;
}
+ PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);
die "QEMU exited with code $exitcode\n";
}
+ PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);
};
};
@@ -6527,7 +6539,10 @@ sub check_mapping_access {
} else {
die "either 'host' or 'mapping' must be set.\n";
}
- }
+ } elsif ($opt =~ m/^virtiofs\d$/) {
+ my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ $rpcenv->check_full($user, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
+ }
}
};
diff --git a/PVE/QemuServer/Makefile b/PVE/QemuServer/Makefile
index ac26e56..d1bf8bb 100644
--- a/PVE/QemuServer/Makefile
+++ b/PVE/QemuServer/Makefile
@@ -11,7 +11,8 @@ SOURCES=PCI.pm \
CPUConfig.pm \
CGroup.pm \
Drive.pm \
- QMPHelpers.pm
+ QMPHelpers.pm \
+ Virtiofs.pm
.PHONY: install
install: ${SOURCES}
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index f365f2d..691f9b3 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -10,6 +10,7 @@ use PVE::Exception qw(raise raise_param_exc);
use PVE::QemuServer::Helpers qw(parse_number_sets);
use PVE::QemuServer::Monitor qw(mon_cmd);
use PVE::QemuServer::QMPHelpers qw(qemu_devicedel qemu_objectdel);
+use PVE::QemuServer::Virtiofs;
use base qw(Exporter);
@@ -336,7 +337,7 @@ sub qemu_memdevices_list {
}
sub config {
- my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd) = @_;
+ my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd, $machine_flags) = @_;
my $memory = get_current_memory($conf->{memory});
my $static_memory = 0;
@@ -367,6 +368,16 @@ sub config {
die "numa needs to be enabled to use hugepages" if $conf->{hugepages} && !$conf->{numa};
+ my $virtiofs_enabled = 0;
+ for (my $i = 0; $i < PVE::QemuServer::Virtiofs::max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+ next if !$conf->{$opt};
+ my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ if ($virtiofs) {
+ $virtiofs_enabled = 1;
+ }
+ }
+
if ($conf->{numa}) {
my $numa_totalmemory = undef;
@@ -379,7 +390,8 @@ sub config {
my $numa_memory = $numa->{memory};
$numa_totalmemory += $numa_memory;
- my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
+ my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+ my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
# cpus
my $cpulists = $numa->{cpus};
@@ -404,7 +416,7 @@ sub config {
}
push @$cmd, '-object', $mem_object;
- push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+ push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
}
die "total memory for NUMA nodes must be equal to vm static memory\n"
@@ -418,15 +430,21 @@ sub config {
die "host NUMA node$i doesn't exist\n"
if !host_numanode_exists($i) && $conf->{hugepages};
- my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
- push @$cmd, '-object', $mem_object;
-
my $cpus = ($cores * $i);
$cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
- push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+ my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+ my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
+ push @$cmd, '-object', $mem_object;
+ push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
}
}
+ } elsif ($virtiofs_enabled) {
+ # kvm: '-machine memory-backend' and '-numa memdev' properties are
+ # mutually exclusive
+ push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem'
+ .",size=$conf->{memory}M,share=on";
+ push @$machine_flags, 'memory-backend=virtiofs-mem';
}
if ($hotplug) {
@@ -453,6 +471,8 @@ sub print_mem_object {
my $path = hugepages_mount_path($hugepages_size);
return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
+ } elsif ($id =~ m/^virtiofs-mem/) {
+ return "memory-backend-memfd,id=$id,size=${size}M,share=on";
} else {
return "memory-backend-ram,id=$id,size=${size}M";
}
diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm
new file mode 100644
index 0000000..4c59348
--- /dev/null
+++ b/PVE/QemuServer/Virtiofs.pm
@@ -0,0 +1,212 @@
+package PVE::QemuServer::Virtiofs;
+
+use strict;
+use warnings;
+
+use Fcntl;
+use IO::Socket::UNIX;
+use POSIX;
+use PVE::JSONSchema qw(get_standard_option parse_property_string);
+use PVE::Mapping::Dir;
+
+use base qw(Exporter);
+
+our @EXPORT_OK = qw(
+max_virtiofs
+assert_virtiofs_config
+start_virtiofsd
+start_all_virtiofsd
+virtiofs_qemu_param
+close_sockets
+);
+
+my $MAX_VIRTIOFS = 10;
+my $socket_path_root = "/run/qemu-server/virtiofsd";
+
+my $virtiofs_fmt = {
+ 'dirid' => {
+ type => 'string',
+ default_key => 1,
+ description => "Mapping identifier of the directory mapping to be"
+ ." shared with the guest. Also used as a mount tag inside the VM.",
+ format_description => 'mapping-id',
+ format => 'pve-configid',
+ },
+ 'cache' => {
+ type => 'string',
+ description => "The caching policy the file system should use (auto, always, never).",
+ enum => [qw(auto always never)],
+ default => "auto",
+ optional => 1,
+ },
+ 'direct-io' => {
+ type => 'boolean',
+ description => "Honor the O_DIRECT flag passed down by guest applications.",
+ default => 0,
+ optional => 1,
+ },
+ writeback => {
+ type => 'boolean',
+ description => "Enable writeback cache. If enabled, writes may be cached"
+ ." in the guest until the file is closed or an fsync is performed.",
+ default => 0,
+ optional => 1,
+ },
+ xattr => {
+ type => 'boolean',
+ description => "Overwrite the xattr option from mapping and explicitly"
+ ." enable/disable support for extended attributes for the VM.",
+ default => "use value from mapping",
+ optional => 1,
+ },
+ acl => {
+ type => 'boolean',
+ description => "Overwrite the acl option from mapping and explicitly"
+ ." enable/disable support for posix ACLs (enabled acl implies xattr)"
+ ." for the VM.",
+ default => "use value from mapping",
+ optional => 1,
+ },
+};
+PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
+
+my $virtiofsdesc = {
+ optional => 1,
+ type => 'string', format => $virtiofs_fmt,
+ description => "share a directory between host and guest",
+};
+PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
+
+sub max_virtiofs {
+ return $MAX_VIRTIOFS;
+}
+
+sub assert_virtiofs_config {
+ my ($conf, $virtiofs) = @_;
+ my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+ my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+ my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
+ if ($acl && PVE::QemuServer::windows_version($conf->{ostype})) {
+ log_warn(
+ "Please disable ACLs for virtiofs on Windows VMs, otherwise"
+ ." the virtiofs shared directory cannot be mounted."
+ );
+ }
+
+ if (!$node_list || scalar($node_list->@*) != 1) {
+ die "virtiofs needs exactly one mapping for this node\n";
+ }
+
+ eval PVE::Mapping::Dir::assert_valid($node_list->[0]);
+ if (my $err = $@) {
+ die "Directory Mapping invalid: $err\n";
+ }
+}
+
+sub config {
+ my ($conf, $vmid, $devices, $machine_flags) = @_;
+ for (my $i = 0; $i < max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+
+ next if !$conf->{$opt};
+ my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ next if !$virtiofs;
+
+ assert_virtiofs_config($conf, $virtiofs);
+
+ push @$devices, '-chardev', "socket,id=virtfs$i,path=$socket_path_root/vm$vmid-fs$i";
+
+ # queue-size is set 1024 because of bug with Windows guests:
+ # https://bugzilla.redhat.com/show_bug.cgi?id=1873088
+ # 1024 is also always used in the virtiofs documentations:
+ # https://gitlab.com/virtio-fs/virtiofsd#examples
+ push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
+ .",chardev=virtfs$i,tag=$virtiofs->{dirid}";
+ }
+}
+
+sub start_all_virtiofsd {
+ my ($conf, $vmid) = @_;
+ my $virtiofs_sockets = [];
+ for (my $i = 0; $i < max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+
+ next if !$conf->{$opt};
+ my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ next if !$virtiofs;
+
+ my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs);
+ push @$virtiofs_sockets, $virtiofs_socket;
+ }
+ return $virtiofs_sockets;
+}
+
+sub close_sockets {
+ my @sockets = @_;
+ for my $socket (@sockets) {
+ shutdown($socket, 2);
+ close($socket);
+ }
+}
+
+sub start_virtiofsd {
+ my ($vmid, $fsid, $virtiofs) = @_;
+
+ my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+ my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+ # Default to dir config xattr & acl settings
+ my $xattr = $virtiofs->{xattr} // $dir_cfg->{xattr};
+ my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
+
+ my $node_cfg = $node_list->[0];
+ my $path = $node_cfg->{path};
+ mkdir $socket_path_root;
+ my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
+ unlink($socket_path);
+ my $socket = IO::Socket::UNIX->new(
+ Type => SOCK_STREAM,
+ Local => $socket_path,
+ Listen => 1,
+ ) or die "cannot create socket - $!\n";
+
+ my $flags = fcntl($socket, F_GETFD, 0)
+ or die "failed to get file descriptor flags: $!\n";
+ fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
+ or die "failed to remove FD_CLOEXEC from file descriptor\n";
+
+ my $fd = $socket->fileno();
+
+ my $virtiofsd_bin = '/usr/libexec/virtiofsd';
+
+ my $pid = fork();
+ if ($pid == 0) {
+ setsid();
+ $0 = "task pve-vm$vmid-virtiofs$fsid";
+ my $pid2 = fork();
+ if ($pid2 == 0) {
+ my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
+ push @$cmd, '--xattr' if $xattr;
+ push @$cmd, '--posix-acl' if $acl;
+ push @$cmd, '--announce-submounts' if ($node_cfg->{submounts});
+ push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'});
+ push @$cmd, "--cache=$virtiofs->{'cache'}" if ($virtiofs->{'cache'});
+ push @$cmd, "--writeback" if ($virtiofs->{'writeback'});
+ push @$cmd, '--syslog';
+ exec(@$cmd);
+ } elsif (!defined($pid2)) {
+ die "could not fork to start virtiofsd\n";
+ } else {
+ POSIX::_exit(0);
+ }
+ } elsif (!defined($pid)) {
+ die "could not fork to start virtiofsd\n";
+ } else {
+ waitpid($pid, 0);
+ }
+
+ # return socket to keep it alive,
+ # so that QEMU will wait for virtiofsd to start
+ return $socket;
+}
--
2.39.2
More information about the pve-devel
mailing list