[pve-devel] [PATCH qemu-server v10 5/11] fix #1027: virtio-fs support
Fiona Ebner
f.ebner at proxmox.com
Wed Jun 12 13:50:15 CEST 2024
Am 14.05.24 um 12:54 schrieb Markus Frank:
> add support for sharing directories with a guest vm
>
> virtio-fs needs virtiofsd to be started.
> In order to start virtiofsd as a process (despite being a daemon it is does not
> run in the background), a double-fork is used.
>
> virtiofsd should close itself together with qemu.
Nit: s/qemu/QEMU/
>
> There are the parameters dirid and the optional parameters direct-io, cache and
> writeback. Additionally the xattr & acl parameter overwrite the directory
> mapping settings for xattr & acl.
>
> The dirid gets mapped to the path on the current node and is also used as a
> mount tag (name used to mount the device on the guest).
>
> example config:
> ```
> virtiofs0: foo,direct-io=1,cache=always,acl=1
> virtiofs1: dirid=bar,cache=never,xattr=1,writeback=1
> ```
>
> For information on the optional parameters see the coherent doc patch
> and the official gitlab README:
> https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md
>
> Also add a permission check for virtiofs directory access.
>
> Signed-off-by: Markus Frank <m.frank at proxmox.com>
> ---
> PVE/API2/Qemu.pm | 39 ++++++-
> PVE/QemuServer.pm | 19 +++-
> PVE/QemuServer/Makefile | 3 +-
> PVE/QemuServer/Memory.pm | 34 ++++--
> PVE/QemuServer/Virtiofs.pm | 212 +++++++++++++++++++++++++++++++++++++
> 5 files changed, 296 insertions(+), 11 deletions(-)
> create mode 100644 PVE/QemuServer/Virtiofs.pm
>
> diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
> index 2a349c8..5d97896 100644
> --- a/PVE/API2/Qemu.pm
> +++ b/PVE/API2/Qemu.pm
> @@ -695,6 +695,32 @@ my sub check_vm_create_hostpci_perm {
> return 1;
> };
>
> +my sub check_dir_perm {
> + my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_;
> +
> + return 1 if $authuser eq 'root at pam';
> +
> + $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
> +
> + my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $value);
The "pve-qm-virtiofs" format is registered by PVE::Qemu::Virtiofs so
there should be a use statement for that module at the top.
> + $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
> +
> + return 1;
> +};
> +
---snip---
> diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
> index f365f2d..691f9b3 100644
> --- a/PVE/QemuServer/Memory.pm
> +++ b/PVE/QemuServer/Memory.pm
> @@ -10,6 +10,7 @@ use PVE::Exception qw(raise raise_param_exc);
> use PVE::QemuServer::Helpers qw(parse_number_sets);
> use PVE::QemuServer::Monitor qw(mon_cmd);
> use PVE::QemuServer::QMPHelpers qw(qemu_devicedel qemu_objectdel);
> +use PVE::QemuServer::Virtiofs;
Can't we avoid this use statement? You could check on the call site and
pass in $virtiofs_enabled as a parameter to sub config(). Introducing
that parameter and the necessary changes in config() could also be a
preparatory patch. I.e. initially, there would be no caller passing in
1, but a later patch would change that.
>
> use base qw(Exporter);
>
> @@ -336,7 +337,7 @@ sub qemu_memdevices_list {
> }
>
> sub config {
> - my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd) = @_;
> + my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd, $machine_flags) = @_;
>
> my $memory = get_current_memory($conf->{memory});
> my $static_memory = 0;
> @@ -367,6 +368,16 @@ sub config {
>
> die "numa needs to be enabled to use hugepages" if $conf->{hugepages} && !$conf->{numa};
>
> + my $virtiofs_enabled = 0;
> + for (my $i = 0; $i < PVE::QemuServer::Virtiofs::max_virtiofs(); $i++) {
> + my $opt = "virtiofs$i";
> + next if !$conf->{$opt};
> + my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> + if ($virtiofs) {
> + $virtiofs_enabled = 1;
> + }
> + }
> +
> if ($conf->{numa}) {
>
> my $numa_totalmemory = undef;
> @@ -379,7 +390,8 @@ sub config {
> my $numa_memory = $numa->{memory};
> $numa_totalmemory += $numa_memory;
>
> - my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
> + my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
> + my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
>
> # cpus
> my $cpulists = $numa->{cpus};
> @@ -404,7 +416,7 @@ sub config {
> }
>
> push @$cmd, '-object', $mem_object;
> - push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
> + push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
> }
>
> die "total memory for NUMA nodes must be equal to vm static memory\n"
> @@ -418,15 +430,21 @@ sub config {
> die "host NUMA node$i doesn't exist\n"
> if !host_numanode_exists($i) && $conf->{hugepages};
>
> - my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
> - push @$cmd, '-object', $mem_object;
> -
> my $cpus = ($cores * $i);
> $cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
>
> - push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
> + my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
> + my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
> + push @$cmd, '-object', $mem_object;
> + push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
> }
> }
> + } elsif ($virtiofs_enabled) {
> + # kvm: '-machine memory-backend' and '-numa memdev' properties are
> + # mutually exclusive
> + push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem'
> + .",size=$conf->{memory}M,share=on";
> + push @$machine_flags, 'memory-backend=virtiofs-mem';
> }
>
> if ($hotplug) {
> @@ -453,6 +471,8 @@ sub print_mem_object {
> my $path = hugepages_mount_path($hugepages_size);
>
> return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
> + } elsif ($id =~ m/^virtiofs-mem/) {
> + return "memory-backend-memfd,id=$id,size=${size}M,share=on";
> } else {
> return "memory-backend-ram,id=$id,size=${size}M";
> }
> diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm
> new file mode 100644
> index 0000000..4c59348
> --- /dev/null
> +++ b/PVE/QemuServer/Virtiofs.pm
> @@ -0,0 +1,212 @@
> +package PVE::QemuServer::Virtiofs;
> +
> +use strict;
> +use warnings;
> +
> +use Fcntl;
Should be use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC); since you use those
constants below.
> +use IO::Socket::UNIX;
> +use POSIX;
Nit: please add a blank line here.
> +use PVE::JSONSchema qw(get_standard_option parse_property_string);
> +use PVE::Mapping::Dir;
> +
> +use base qw(Exporter);
> +
> +our @EXPORT_OK = qw(
> +max_virtiofs
> +assert_virtiofs_config
> +start_virtiofsd
> +start_all_virtiofsd
> +virtiofs_qemu_param
This function does not exist?
> +close_sockets
This is too generic of a name to export. All the callers use the prefix
luckily, so this export can just be dropped.
Since you only import max_virtiofs and start_all_virtiofsd in
QemuServer.pm, why not just export those two?
> +);
> +
> +my $MAX_VIRTIOFS = 10;
> +my $socket_path_root = "/run/qemu-server/virtiofsd";
> +
> +my $virtiofs_fmt = {
> + 'dirid' => {
> + type => 'string',
> + default_key => 1,
> + description => "Mapping identifier of the directory mapping to be"
Style nit: you can go until 100 characters with your description lines
> + ." shared with the guest. Also used as a mount tag inside the VM.",
> + format_description => 'mapping-id',
> + format => 'pve-configid',
> + },
> + 'cache' => {
> + type => 'string',
> + description => "The caching policy the file system should use (auto, always, never).",
> + enum => [qw(auto always never)],
> + default => "auto",
> + optional => 1,
> + },
> + 'direct-io' => {
> + type => 'boolean',
> + description => "Honor the O_DIRECT flag passed down by guest applications.",
> + default => 0,
> + optional => 1,
> + },
> + writeback => {
> + type => 'boolean',
> + description => "Enable writeback cache. If enabled, writes may be cached"
> + ." in the guest until the file is closed or an fsync is performed.",
> + default => 0,
> + optional => 1,
> + },
> + xattr => {
> + type => 'boolean',
> + description => "Overwrite the xattr option from mapping and explicitly"
> + ." enable/disable support for extended attributes for the VM.",
> + default => "use value from mapping",
> + optional => 1,
> + },
> + acl => {
> + type => 'boolean',
> + description => "Overwrite the acl option from mapping and explicitly"
> + ." enable/disable support for posix ACLs (enabled acl implies xattr)"
> + ." for the VM.",
> + default => "use value from mapping",
> + optional => 1,
> + },
> +};
> +PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
> +
> +my $virtiofsdesc = {
> + optional => 1,
> + type => 'string', format => $virtiofs_fmt,
> + description => "share a directory between host and guest",
Maybe add "Configuration for sharing" at the beginning and "using
Virtio-fs" at the end?
> +};
> +PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
> +
> +sub max_virtiofs {
> + return $MAX_VIRTIOFS;
> +}
> +
> +sub assert_virtiofs_config {
> + my ($conf, $virtiofs) = @_;
Style nit: missing blank line, also for other functions in the module
> + my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
> + my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
> +
> + my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
Style nit: using single quotes for 'acl' key
> + if ($acl && PVE::QemuServer::windows_version($conf->{ostype})) {
There is no use statement for PVE::QemuServer at the beginning and that
would create a cyclic dependency. However, the function actually lives
in PVE::QemuServer::Helpers, so please use it from there.
> + log_warn(
log_warn was never imported.
> + "Please disable ACLs for virtiofs on Windows VMs, otherwise"
> + ." the virtiofs shared directory cannot be mounted."
> + );
> + }
> +
> + if (!$node_list || scalar($node_list->@*) != 1) {
> + die "virtiofs needs exactly one mapping for this node\n";
> + }
> +
> + eval PVE::Mapping::Dir::assert_valid($node_list->[0]);
Missing curly braces after eval
> + if (my $err = $@) {
> + die "Directory Mapping invalid: $err\n";
Please do not capitalize "Mapping", "Directory" also doesn't need to be,
but can be fine. Could also be done as a one-liner with
die "...: $@\n" if $@;
> + }
> +}
> +
> +sub config {
> + my ($conf, $vmid, $devices, $machine_flags) = @_;
$machine_flags is not used?
> + for (my $i = 0; $i < max_virtiofs(); $i++) {
> + my $opt = "virtiofs$i";
> +
> + next if !$conf->{$opt};
> + my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> + next if !$virtiofs;
> +
> + assert_virtiofs_config($conf, $virtiofs);
> +
> + push @$devices, '-chardev', "socket,id=virtfs$i,path=$socket_path_root/vm$vmid-fs$i";
Is using "virtfs" rather than "virtiofs" for the id intentional? Would
need to be adapted below too
> +
> + # queue-size is set 1024 because of bug with Windows guests:
> + # https://bugzilla.redhat.com/show_bug.cgi?id=1873088
> + # 1024 is also always used in the virtiofs documentations:
> + # https://gitlab.com/virtio-fs/virtiofsd#examples
> + push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
> + .",chardev=virtfs$i,tag=$virtiofs->{dirid}";
> + }
> +}
> +
> +sub start_all_virtiofsd {
> + my ($conf, $vmid) = @_;
> + my $virtiofs_sockets = [];
> + for (my $i = 0; $i < max_virtiofs(); $i++) {
> + my $opt = "virtiofs$i";
> +
> + next if !$conf->{$opt};
> + my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> + next if !$virtiofs;
> +
> + my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs);
> + push @$virtiofs_sockets, $virtiofs_socket;
> + }
> + return $virtiofs_sockets;
> +}
> +
> +sub close_sockets {
> + my @sockets = @_;
> + for my $socket (@sockets) {
> + shutdown($socket, 2);
> + close($socket);
> + }
> +}
> +
> +sub start_virtiofsd {
> + my ($vmid, $fsid, $virtiofs) = @_;
> +
> + my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
> + my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
> +
> + # Default to dir config xattr & acl settings
> + my $xattr = $virtiofs->{xattr} // $dir_cfg->{xattr};
> + my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};
Style nit: using single quotes for 'acl' key
> +
> + my $node_cfg = $node_list->[0];
> + my $path = $node_cfg->{path};
Style nit: let's move the $path variable closer to where it is used,
i.e. after the $socket_path stuff ;)
> + mkdir $socket_path_root;
> + my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
> + unlink($socket_path);
> + my $socket = IO::Socket::UNIX->new(
> + Type => SOCK_STREAM,
Missing use Socket qw(SOCK_STREAM); at the top
> + Local => $socket_path,
> + Listen => 1,
> + ) or die "cannot create socket - $!\n";
> +
> + my $flags = fcntl($socket, F_GETFD, 0)
> + or die "failed to get file descriptor flags: $!\n";
> + fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
> + or die "failed to remove FD_CLOEXEC from file descriptor\n";
> +
> + my $fd = $socket->fileno();
> +
> + my $virtiofsd_bin = '/usr/libexec/virtiofsd';
> +
> + my $pid = fork();
> + if ($pid == 0) {
> + setsid();
POSIX::setsid();
> + $0 = "task pve-vm$vmid-virtiofs$fsid";
> + my $pid2 = fork();
> + if ($pid2 == 0) {
> + my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
> + push @$cmd, '--xattr' if $xattr;
> + push @$cmd, '--posix-acl' if $acl;
> + push @$cmd, '--announce-submounts' if ($node_cfg->{submounts});
Style nit: superfluous parentheses for post-if, also below
> + push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'});
> + push @$cmd, "--cache=$virtiofs->{'cache'}" if ($virtiofs->{'cache'});
> + push @$cmd, "--writeback" if ($virtiofs->{'writeback'});
> + push @$cmd, '--syslog';
> + exec(@$cmd);
> + } elsif (!defined($pid2)) {
> + die "could not fork to start virtiofsd\n";
> + } else {
> + POSIX::_exit(0);
> + }
> + } elsif (!defined($pid)) {
> + die "could not fork to start virtiofsd\n";
> + } else {
> + waitpid($pid, 0);
> + }
> +
> + # return socket to keep it alive,
> + # so that QEMU will wait for virtiofsd to start
> + return $socket;
> +}
Module should end with "1;"
More information about the pve-devel
mailing list