[pve-devel] [PATCH qemu-server v13 5/12] fix #1027: virtio-fs support

Fiona Ebner f.ebner at proxmox.com
Wed Feb 19 14:43:58 CET 2025


Looking very good! Just some nits and issue with schema description I
noticed below:

Am 22.01.25 um 11:08 schrieb Markus Frank:
> @@ -801,6 +802,32 @@ my sub check_vm_create_hostpci_perm {
>      return 1;
>  };
>  
> +my sub check_dir_perm {
> +    my ($rpcenv, $authuser, $vmid, $pool, $opt, $value) = @_;
> +
> +    return 1 if $authuser eq 'root at pam';
> +
> +    $rpcenv->check_vm_perm($authuser, $vmid, $pool, ['VM.Config.Disk']);
> +
> +    my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $value);
> +    $rpcenv->check_full($authuser, "/mapping/dir/$virtiofs->{dirid}", ['Mapping.Use']);
> +
> +    return 1;
> +};
> +
> +my sub check_vm_create_dir_perm {
> +    my ($rpcenv, $authuser, $vmid, $pool, $param) = @_;
> +
> +    return 1 if $authuser eq 'root at pam';
> +
> +    for my $opt (keys %{$param}) {

Nit: sort just to have predictable failure. Or better, iterate over the
known virtiofs keys instead like you do elsewhere.

> +	next if $opt !~ m/^virtiofs\d+$/;
> +	check_dir_perm($rpcenv, $authuser, $vmid, $pool, $opt, $param->{$opt});
> +    }
> +
> +    return 1;
> +};
> +

---snip 8<---

> @@ -3703,8 +3709,11 @@ sub config_to_command {
>  	push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
>      }
>  
> +    my $virtiofs_enabled = PVE::QemuServer::Virtiofs::virtiofs_enabled($conf);
> +
>      PVE::QemuServer::Memory::config(
> -	$conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
> +	$conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd,
> +	$machineFlags, $virtiofs_enabled);

Style nit: I'm afraid it's necessary to use one arguement per line to
comply with our style guide now

>  
>      push @$cmd, '-S' if $conf->{freeze};
>  
> @@ -3994,6 +4003,8 @@ sub config_to_command {
>  	push @$machineFlags, 'confidential-guest-support=sev0';
>      }
>  
> +    PVE::QemuServer::Virtiofs::config($conf, $vmid, $devices);
> +
>      push @$cmd, @$devices;
>      push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
>      push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
> @@ -5792,6 +5803,8 @@ sub vm_start_nolock {
>  	PVE::Tools::run_fork sub {
>  	    PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
>  
> +	    my $virtiofs_sockets = start_all_virtiofsd($conf, $vmid);
> +
>  	    my $tpmpid;
>  	    if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
>  		# start the TPM emulator so QEMU can connect on start
> @@ -5804,8 +5817,10 @@ sub vm_start_nolock {
>  		    warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
>  		    kill 'TERM', $tpmpid;
>  		}
> +		PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);
>  		die "QEMU exited with code $exitcode\n";
>  	    }
> +	    PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets);

Instead of having it twice, could be done slightly above and safeguarded
by an eval (should not be critical to fail here IMHO) like

my $exitcode = run_command($cmd, %run_params);
eval { PVE::QemuServer::Virtiofs::close_sockets(@$virtiofs_sockets); };
log_warn("closing virtiofs sockets failed - $@") if $@;
if ($exitcode) {

---snip 8<---

> @@ -418,15 +419,21 @@ sub config {
>  		die "host NUMA node$i doesn't exist\n"
>  		    if !host_numanode_exists($i) && $conf->{hugepages};
>  
> -		my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
> -		push @$cmd, '-object', $mem_object;
> -
>  		my $cpus = ($cores * $i);
>  		$cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
>  
> -		push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
> +		my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
> +		my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
> +		push @$cmd, '-object', $mem_object;
> +		push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
>  	    }
>  	}
> +    } elsif ($virtiofs_enabled) {
> +	# kvm: '-machine memory-backend' and '-numa memdev' properties are
> +	# mutually exclusive

Style nit: above comment fits within 100 characters on one line

> +	push @$cmd, '-object', 'memory-backend-memfd,id=virtiofs-mem'
> +	    .",size=$conf->{memory}M,share=on";
> +	push @$machine_flags, 'memory-backend=virtiofs-mem';
>      }
>  
>      if ($hotplug) {
> @@ -453,6 +460,8 @@ sub print_mem_object {
>  	my $path = hugepages_mount_path($hugepages_size);
>  
>  	return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
> +    } elsif ($id =~ m/^virtiofs-mem/) {
> +	return "memory-backend-memfd,id=$id,size=${size}M,share=on";
>      } else {
>  	return "memory-backend-ram,id=$id,size=${size}M";
>      }
> diff --git a/PVE/QemuServer/Virtiofs.pm b/PVE/QemuServer/Virtiofs.pm
> new file mode 100644
> index 00000000..e1aabd5a
> --- /dev/null
> +++ b/PVE/QemuServer/Virtiofs.pm
> @@ -0,0 +1,223 @@
> +package PVE::QemuServer::Virtiofs;
> +
> +use strict;
> +use warnings;
> +
> +use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC);
> +use IO::Socket::UNIX;
> +use POSIX;
> +use Socket qw(SOCK_STREAM);
> +
> +use PVE::JSONSchema qw(get_standard_option parse_property_string);

Nit: get_standard_option is not used

> +use PVE::Mapping::Dir;
> +use PVE::RESTEnvironment qw(log_warn);
> +

missing

use PVE::QemuServer::Helpers;

for the PVE::QemuServer::Helpers::windows_version() call

> +use base qw(Exporter);
> +
> +our @EXPORT_OK = qw(
> +max_virtiofs
> +start_all_virtiofsd
> +);
> +
> +my $MAX_VIRTIOFS = 10;
> +my $socket_path_root = "/run/qemu-server/virtiofsd";
> +
> +my $virtiofs_fmt = {
> +    'dirid' => {
> +	type => 'string',
> +	default_key => 1,
> +	description => "Mapping identifier of the directory mapping to be shared with the guest."
> +	    ." Also used as a mount tag inside the VM.",
> +	format_description => 'mapping-id',
> +	format => 'pve-configid',
> +    },
> +    'cache' => {
> +	type => 'string',
> +	description => "The caching policy the file system should use (auto, always, never).",
> +	enum => [qw(auto always never)],
> +	default => "auto",
> +	optional => 1,
> +    },
> +    'direct-io' => {
> +	type => 'boolean',
> +	description => "Honor the O_DIRECT flag passed down by guest applications.",
> +	default => 0,
> +	optional => 1,
> +    },
> +    writeback => {
> +	type => 'boolean',
> +	description => "Enable writeback cache. If enabled, writes may be cached in the guest until"
> +	    ." the file is closed or an fsync is performed.",
> +	default => 0,
> +	optional => 1,
> +    },
> +    'expose-xattr' => {
> +	type => 'boolean',
> +	description => "Overwrite the xattr option from mapping and explicitly enable/disable"
> +	    ." support for extended attributes for the VM.",

The setting is not VM-wide, so
s/for the VM/for this mount/

The option doesn't exist for the mapping anymore so should not mention
overwrite, right?

> +	default => 0,
> +	optional => 1,
> +    },
> +    'expose-acl' => {
> +	type => 'boolean',
> +	description => "Overwrite the acl option from mapping and explicitly enable/disable support"
> +	    ." for posix ACLs (enabled acl implies xattr) for the VM.",

s/posix/POSIX/
s/for the VM/for this mount/

The option doesn't exist for the mapping anymore so should not mention
overwrite, right?

> +	default => 0,
> +	optional => 1,
> +    },
> +};
> +PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
> +
> +my $virtiofsdesc = {
> +    optional => 1,
> +    type => 'string', format => $virtiofs_fmt,
> +    description => "Configuration for sharing a directory between host and guest using Virtio-fs.",
> +};
> +PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
> +
> +sub max_virtiofs {
> +    return $MAX_VIRTIOFS;
> +}
> +
> +sub assert_virtiofs_config {
> +    my ($conf, $virtiofs) = @_;

Nit: could also pass in just the ostype. That would also avoid potential
to confuse $conf with the virtiofs config in this context.

> +
> +    my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
> +    my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
> +
> +    my $acl = $virtiofs->{'expose-acl'};
> +    if ($acl && PVE::QemuServer::Helpers::windows_version($conf->{ostype})) {
> +	log_warn(
> +	    "Please disable ACLs for virtiofs on Windows VMs, otherwise"
> +	    ." the virtiofs shared directory cannot be mounted."
> +	);
> +    }
> +
> +    if (!$node_list || scalar($node_list->@*) != 1) {
> +	die "virtiofs needs exactly one mapping for this node\n";
> +    }
> +
> +    eval { PVE::Mapping::Dir::assert_valid($node_list->[0]) };
> +    die "directory mapping invalid: $@\n" if $@;
> +}
> +
> +sub config {
> +    my ($conf, $vmid, $devices) = @_;
> +
> +    for (my $i = 0; $i < max_virtiofs(); $i++) {
> +	my $opt = "virtiofs$i";
> +
> +	next if !$conf->{$opt};
> +	my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +	next if !$virtiofs;

Nit: I'd "die" instead of "next" since this is unexpected, i.e.
parse_property_string() was successful so there should be a result. And
I don't think you even need it. We don't check this elsewhere either,
because the only way parse_property_string() can return something false
but not die if there is a custom validator for the format and that one
returns something false but not die (which would be a bug with the
validator).

> +
> +	assert_virtiofs_config($conf, $virtiofs);
> +
> +	push @$devices, '-chardev', "socket,id=virtiofs$i,path=$socket_path_root/vm$vmid-fs$i";
> +
> +	# queue-size is set 1024 because of bug with Windows guests:
> +	# https://bugzilla.redhat.com/show_bug.cgi?id=1873088
> +	# 1024 is also always used in the virtiofs documentations:
> +	# https://gitlab.com/virtio-fs/virtiofsd#examples
> +	push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
> +	    .",chardev=virtiofs$i,tag=$virtiofs->{dirid}";
> +    }
> +}
> +
> +sub virtiofs_enabled {
> +    my ($conf) = @_;
> +
> +    my $virtiofs_enabled = 0;
> +    for (my $i = 0; $i < max_virtiofs(); $i++) {
> +	my $opt = "virtiofs$i";
> +	next if !$conf->{$opt};
> +	my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +	if ($virtiofs) {

Nit: Similar to above, if we do get a result, it will always evaluate to
true.

> +	    $virtiofs_enabled = 1;
> +	    last;
> +	}
> +    }
> +    return $virtiofs_enabled;
> +}
> +
> +sub start_all_virtiofsd {
> +    my ($conf, $vmid) = @_;
> +    my $virtiofs_sockets = [];
> +    for (my $i = 0; $i < max_virtiofs(); $i++) {
> +	my $opt = "virtiofs$i";
> +
> +	next if !$conf->{$opt};
> +	my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> +	next if !$virtiofs;

Nit: Similar to above. Or did you ever run into the situation where the
result from parse_property_string() would be false?

> +
> +	my $virtiofs_socket = start_virtiofsd($vmid, $i, $virtiofs);
> +	push @$virtiofs_sockets, $virtiofs_socket;
> +    }
> +    return $virtiofs_sockets;
> +}
> +
> +sub start_virtiofsd {
> +    my ($vmid, $fsid, $virtiofs) = @_;
> +
> +    mkdir $socket_path_root;
> +    my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
> +    unlink($socket_path);
> +    my $socket = IO::Socket::UNIX->new(
> +	Type => SOCK_STREAM,
> +	Local => $socket_path,
> +	Listen => 1,
> +    ) or die "cannot create socket - $!\n";
> +
> +    my $flags = fcntl($socket, F_GETFD, 0)
> +	or die "failed to get file descriptor flags: $!\n";
> +    fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
> +	or die "failed to remove FD_CLOEXEC from file descriptor\n";
> +
> +    my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
> +    my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
> +    my $node_cfg = $node_list->[0];

Nit: this should also check if there is exactly one entry in the result.
But since all callers require that, we can make the
find_on_current_node() function check this itself and return the single
entry directly.

> +
> +    my $virtiofsd_bin = '/usr/libexec/virtiofsd';
> +    my $fd = $socket->fileno();
> +    my $path = $node_cfg->{path};
> +
> +    my $could_not_fork_err = "could not fork to start virtiofsd\n";
> +    my $pid = fork();
> +    if ($pid == 0) {
> +	setsid();

Is this automatically imported by POSIX? I'd still be explicit here with
POSIX::setsid();

> +	$0 = "task pve-vm$vmid-virtiofs$fsid";
> +	my $pid2 = fork();
> +	if ($pid2 == 0) {
> +	    my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
> +	    push @$cmd, '--xattr' if $virtiofs->{'expose-xattr'};
> +	    push @$cmd, '--posix-acl' if $virtiofs->{'expose-acl'};
> +	    push @$cmd, '--announce-submounts' if $node_cfg->{'announce-submounts'};
> +	    push @$cmd, '--allow-direct-io' if $virtiofs->{'direct-io'};
> +	    push @$cmd, '--cache='.$virtiofs->{cache} if $virtiofs->{cache};
> +	    push @$cmd, '--writeback' if $virtiofs->{'writeback'};
> +	    push @$cmd, '--syslog';
> +	    exec(@$cmd);
> +	} elsif (!defined($pid2)) {
> +	    die $could_not_fork_err;
> +	} else {
> +	    POSIX::_exit(0);
> +	}
> +    } elsif (!defined($pid)) {
> +	die $could_not_fork_err;
> +    } else {
> +	waitpid($pid, 0);
> +    }
> +
> +    # return socket to keep it alive,
> +    # so that QEMU will wait for virtiofsd to start
> +    return $socket;
> +}
> +
> +sub close_sockets {
> +    my @sockets = @_;
> +    for my $socket (@sockets) {
> +	shutdown($socket, 2);
> +	close($socket);
> +    }
> +}

Style nit: missing blank line here

> +1;





More information about the pve-devel mailing list