[pve-devel] [PATCH qemu-server v10 5/11] fix #1027: virtio-fs support

Fiona Ebner f.ebner at proxmox.com
Wed Jun 12 13:50:15 CEST 2024


Am 14.05.24 um 12:54 schrieb Markus Frank:
> add support for sharing directories with a guest vm
> 
> virtio-fs needs virtiofsd to be started.
> In order to start virtiofsd as a process (despite being a daemon it is does not
> run in the background), a double-fork is used.
> 
> virtiofsd should close itself together with qemu.

Nit: s/qemu/QEMU/

> 
> There are the parameters dirid and the optional parameters direct-io, cache and
> writeback. Additionally the xattr & acl parameter overwrite the directory
> mapping settings for xattr & acl.
> 
> The dirid gets mapped to the path on the current node and is also used as a
> mount tag (name used to mount the device on the guest).
> 
> example config:
> ```
> virtiofs0: foo,direct-io=1,cache=always,acl=1
> virtiofs1: dirid=bar,cache=never,xattr=1,writeback=1
> ```
> 
> For information on the optional parameters see the coherent doc patch
> and the official gitlab README:
> https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md
> 
> Also add a permission check for virtiofs directory access.
> 
> Signed-off-by: Markus Frank <m.frank at proxmox.com>
> ---
>  PVE/API2/Qemu.pm           |  39 ++++++-
>  PVE/QemuServer.pm          |  19 +++-
>  PVE/QemuServer/Makefile    |   3 +-
>  PVE/QemuServer/Memory.pm   |  34 ++++--
>  PVE/QemuServer/Virtiofs.pm | 212 +++++++++++++++++++++++++++++++++++++
>  5 files changed, 296 insertions(+), 11 deletions(-)
>  create mode 100644 PVE/QemuServer/Virtiofs.pm
> 
> diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
> index 2a349c8..5d97896 100644
> --- a/PVE/API2/Qemu.pm
> +++ b/PVE/API2/Qemu.pm
> @@ -695,6 +695,32 @@ my sub check_vm_create_hostpci_perm {
>      return 1;
>  };
>  
> +my sub check_dir_perm {
> +    my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_;
> +
> +    return 1 if $authuser eq 'root at pam';
> +
> +    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
> +
> +    my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $value);

The "pve-qm-virtiofs" format is registered by PVE::Qemu::Virtiofs so
there should be a use statement for that module at the top.

> +    $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
> +
> +    return 1;
> +};
> +

---snip---

> diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
> index f365f2d..691f9b3 100644
> --- a/PVE/QemuServer/Memory.pm
> +++ b/PVE/QemuServer/Memory.pm
> @@ -10,6 +10,7 @@ use PVE::Exception qw(raise raise_param_exc);
>  use PVE::QemuServer::Helpers qw(parse_number_sets);
>  use PVE::QemuServer::Monitor qw(mon_cmd);
>  use PVE::QemuServer::QMPHelpers qw(qemu_devicedel qemu_objectdel);
> +use PVE::QemuServer::Virtiofs;

Can't we avoid this use statement? You could check on the call site and
pass in $virtiofs_enabled as a parameter to sub config(). Introducing
that parameter and the necessary changes in config() could also be a
preparatory patch. I.e. initially, there would be no caller passing in
1, but a later patch would change that.

>  
>  use base qw(Exporter);
>  
> @@ -336,7 +337,7 @@ sub qemu_memdevices_list {
>  }
>  
>  sub config {
> -    my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd) = @_;
> +    my ($conf, $vmid, $sockets, $cores, $hotplug, $cmd, $machine_flags) = @_;
>  
>      my $memory = get_current_memory($conf->{memory});
>      my $static_memory = 0;
> @@ -367,6 +368,16 @@ sub config {
>  
>      die "numa needs to be enabled to use hugepages" if $conf->{hugepages} && !$conf->{numa};
>  
> +    my $virtiofs_enabled = 0;
> +    for (my $i = 0; $i < PVE::QemuServer::Virtiofs::max_virtiofs(); $i++) {
> +	my $opt = "virtiofs$i";
> +	next if !$conf->{$opt};
> +	my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +	if ($virtiofs) {
> +	    $virtiofs_enabled = 1;
> +	}
> +    }
> +
>      if ($conf->{numa}) {
>  
>  	my $numa_totalmemory = undef;
> @@ -379,7 +390,8 @@ sub config {
>  	    my $numa_memory = $numa->{memory};
>  	    $numa_totalmemory += $numa_memory;
>  
> -	    my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
> +	    my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
> +	    my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
>  
>  	    # cpus
>  	    my $cpulists = $numa->{cpus};
> @@ -404,7 +416,7 @@ sub config {
>  	    }
>  
>  	    push @$cmd, '-object', $mem_object;
> -	    push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
> +	    push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
>  	}
>  
>  	die "total memory for NUMA nodes must be equal to vm static memory\n"
> @@ -418,15 +430,21 @@ sub config {
>  		die "host NUMA node$i doesn't exist\n"
>  		    if !host_numanode_exists($i) && $conf->{hugepages};
>  
> -		my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
> -		push @$cmd, '-object', $mem_object;
> -
>  		my $cpus = ($cores * $i);
>  		$cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
>  
> -		push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
> +		my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
> +		my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
> +		push @$cmd, '-object', $mem_object;
> +		push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
>  	    }
>  	}
> +    } elsif ($virtiofs_enabled) {
> +	# kvm: '-machine memory-backend' and '-numa memdev' properties are
> +	# mutually exclusive
> +	push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem'
> +	    .",size=$conf->{memory}M,share=on";
> +	push @$machine_flags, 'memory-backend=virtiofs-mem';
>      }
>  
>      if ($hotplug) {
> @@ -453,6 +471,8 @@ sub print_mem_object {
>  	my $path = hugepages_mount_path($hugepages_size);
>  
>  	return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
> +    } elsif ($id =~ m/^virtiofs-mem/) {
> +	return "memory-backend-memfd,id=$id,size=${size}M,share=on";
>      } else {
>  	return "memory-backend-ram,id=$id,size=${size}M";
>      }
> diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm
> new file mode 100644
> index 0000000..4c59348
> --- /dev/null
> +++ b/PVE/QemuServer/Virtiofs.pm
> @@ -0,0 +1,212 @@
> +package PVE::QemuServer::Virtiofs;
> +
> +use strict;
> +use warnings;
> +
> +use Fcntl;

Should be use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC); since you use those
constants below.

> +use IO::Socket::UNIX;
> +use POSIX;

Nit: please add a blank line here.

> +use PVE::JSONSchema qw(get_standard_option parse_property_string);
> +use PVE::Mapping::Dir;
> +
> +use base qw(Exporter);
> +
> +our @EXPORT_OK = qw(
> +max_virtiofs
> +assert_virtiofs_config
> +start_virtiofsd
> +start_all_virtiofsd
> +virtiofs_qemu_param

This function does not exist?

> +close_sockets

This is too generic of a name to export. All the callers use the prefix
luckily, so this export can just be dropped.

Since you only import max_virtiofs and start_all_virtiofsd in
QemuServer.pm, why not just export those two?

> +);
> +
> +my $MAX_VIRTIOFS = 10;
> +my $socket_path_root = "/run/qemu-server/virtiofsd";
> +
> +my $virtiofs_fmt = {
> +    'dirid' => {
> +	type => 'string',
> +	default_key => 1,
> +	description => "Mapping identifier of the directory mapping to be"

Style nit: you can go until 100 characters with your description lines

> +	    ." shared with the guest. Also used as a mount tag inside the VM.",
> +	format_description => 'mapping-id',
> +	format => 'pve-configid',
> +    },
> +    'cache' => {
> +	type => 'string',
> +	description => "The caching policy the file system should use (auto, always, never).",
> +	enum => [qw(auto always never)],
> +	default => "auto",
> +	optional => 1,
> +    },
> +    'direct-io' => {
> +	type => 'boolean',
> +	description => "Honor the O_DIRECT flag passed down by guest applications.",
> +	default => 0,
> +	optional => 1,
> +    },
> +    writeback => {
> +	type => 'boolean',
> +	description => "Enable writeback cache. If enabled, writes may be cached"
> +	    ." in the guest until the file is closed or an fsync is performed.",
> +	default => 0,
> +	optional => 1,
> +    },
> +    xattr => {
> +	type => 'boolean',
> +	description => "Overwrite the xattr option from mapping and explicitly"
> +	    ." enable/disable support for extended attributes for the VM.",
> +	default => "use value from mapping",
> +	optional => 1,
> +    },
> +    acl => {
> +	type => 'boolean',
> +	description => "Overwrite the acl option from mapping and explicitly"
> +	    ." enable/disable support for posix ACLs (enabled acl implies xattr)"
> +	    ." for the VM.",
> +	default => "use value from mapping",
> +	optional => 1,
> +    },
> +};
> +PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
> +
> +my $virtiofsdesc = {
> +    optional => 1,
> +    type => 'string', format => $virtiofs_fmt,
> +    description => "share a directory between host and guest",

Maybe add "Configuration for sharing" at the beginning and "using
Virtio-fs" at the end?

> +};
> +PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
> +
> +sub max_virtiofs {
> +    return $MAX_VIRTIOFS;
> +}
> +
> +sub assert_virtiofs_config {
> +    my ($conf, $virtiofs) = @_;

Style nit: missing blank line, also for other functions in the module

> +    my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
> +    my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
> +
> +    my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};

Style nit: using single quotes for 'acl' key

> +    if ($acl && PVE::QemuServer::windows_version($conf->{ostype})) {

There is no use statement for PVE::QemuServer at the beginning and that
would create a cyclic dependency. However, the function actually lives
in PVE::QemuServer::Helpers, so please use it from there.

> +	log_warn(

log_warn was never imported.

> +	    "Please disable ACLs for virtiofs on Windows VMs, otherwise"
> +	    ." the virtiofs shared directory cannot be mounted."
> +	);
> +    }
> +
> +    if (!$node_list || scalar($node_list->@*) != 1) {
> +	die "virtiofs needs exactly one mapping for this node\n";
> +    }
> +
> +    eval PVE::Mapping::Dir::assert_valid($node_list->[0]);

Missing curly braces after eval

> +    if (my $err = $@) {
> +	die "Directory Mapping invalid: $err\n";

Please do not capitalize "Mapping", "Directory" also doesn't need to be,
but can be fine. Could also be done as a one-liner with
die "...: $@\n" if $@;

> +    }
> +}
> +
> +sub config {
> +    my ($conf, $vmid, $devices, $machine_flags) = @_;

$machine_flags is not used?

> +    for (my $i = 0; $i < max_virtiofs(); $i++) {
> +	my $opt = "virtiofs$i";
> +
> +	next if !$conf->{$opt};
> +	my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +	next if !$virtiofs;
> +
> +	assert_virtiofs_config($conf, $virtiofs);
> +
> +	push @$devices, '-chardev', "socket,id=virtfs$i,path=$socket_path_root/vm$vmid-fs$i";

Is using "virtfs" rather than "virtiofs" for the id intentional? Would
need to be adapted below too

> +
> +	# queue-size is set 1024 because of bug with Windows guests:
> +	# https://bugzilla.redhat.com/show_bug.cgi?id=1873088
> +	# 1024 is also always used in the virtiofs documentations:
> +	# https://gitlab.com/virtio-fs/virtiofsd#examples
> +	push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
> +	    .",chardev=virtfs$i,tag=$virtiofs->{dirid}";
> +    }
> +}
> +
> +sub start_all_virtiofsd {
> +    my ($conf, $vmid) = @_;
> +    my $virtiofs_sockets = [];
> +    for (my $i = 0; $i < max_virtiofs(); $i++) {
> +	my $opt = "virtiofs$i";
> +
> +	next if !$conf->{$opt};
> +	my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +	next if !$virtiofs;
> +
> +	my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs);
> +	push @$virtiofs_sockets, $virtiofs_socket;
> +    }
> +    return $virtiofs_sockets;
> +}
> +
> +sub close_sockets {
> +    my @sockets = @_;
> +    for my $socket (@sockets) {
> +	shutdown($socket, 2);
> +	close($socket);
> +    }
> +}
> +
> +sub start_virtiofsd {
> +    my ($vmid, $fsid, $virtiofs) = @_;
> +
> +    my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
> +    my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
> +
> +    # Default to dir config xattr & acl settings
> +    my $xattr = $virtiofs->{xattr} // $dir_cfg->{xattr};
> +    my $acl = $virtiofs->{'acl'} // $dir_cfg->{'acl'};

Style nit: using single quotes for 'acl' key

> +
> +    my $node_cfg = $node_list->[0];
> +    my $path = $node_cfg->{path};

Style nit: let's move the $path variable closer to where it is used,
i.e. after the $socket_path stuff ;)

> +    mkdir $socket_path_root;
> +    my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
> +    unlink($socket_path);
> +    my $socket = IO::Socket::UNIX->new(
> +	Type => SOCK_STREAM,

Missing use Socket qw(SOCK_STREAM); at the top

> +	Local => $socket_path,
> +	Listen => 1,
> +    ) or die "cannot create socket - $!\n";
> +
> +    my $flags = fcntl($socket, F_GETFD, 0)
> +	or die "failed to get file descriptor flags: $!\n";
> +    fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
> +	or die "failed to remove FD_CLOEXEC from file descriptor\n";
> +
> +    my $fd = $socket->fileno();
> +
> +    my $virtiofsd_bin = '/usr/libexec/virtiofsd';
> +
> +    my $pid = fork();
> +    if ($pid == 0) {
> +	setsid();

POSIX::setsid();

> +	$0 = "task pve-vm$vmid-virtiofs$fsid";
> +	my $pid2 = fork();
> +	if ($pid2 == 0) {
> +	    my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
> +	    push @$cmd, '--xattr' if $xattr;
> +	    push @$cmd, '--posix-acl' if $acl;
> +	    push @$cmd, '--announce-submounts' if ($node_cfg->{submounts});

Style nit: superfluous parentheses for post-if, also below

> +	    push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'});
> +	    push @$cmd, "--cache=$virtiofs->{'cache'}" if ($virtiofs->{'cache'});
> +	    push @$cmd, "--writeback" if ($virtiofs->{'writeback'});
> +	    push @$cmd, '--syslog';
> +	    exec(@$cmd);
> +	} elsif (!defined($pid2)) {
> +	    die "could not fork to start virtiofsd\n";
> +	} else {
> +	    POSIX::_exit(0);
> +	}
> +    } elsif (!defined($pid)) {
> +	die "could not fork to start virtiofsd\n";
> +    } else {
> +	waitpid($pid, 0);
> +    }
> +
> +    # return socket to keep it alive,
> +    # so that QEMU will wait for virtiofsd to start
> +    return $socket;
> +}

Module should end with "1;"




More information about the pve-devel mailing list