[pve-devel] [PATCH qemu-server v7 4/11] feature #1027: virtio-fs support
Fabian Grünbichler
f.gruenbichler at proxmox.com
Thu Oct 5 10:56:45 CEST 2023
On August 9, 2023 10:37 am, Markus Frank wrote:
> add support for sharing directories with a guest vm
>
> virtio-fs needs virtiofsd to be started.
>
> In order to start virtiofsd as a process (despite being a daemon it is does not run
> in the background), a double-fork is used.
>
> virtiofsd should close itself together with qemu.
>
> There are the parameters dirid
> and the optional parameters direct-io & cache.
> Additionally the xattr & acl parameter overwrite the
> directory mapping settings for xattr & acl.
>
> The dirid gets mapped to the path on the current node
> and is also used as a mount-tag (name used to mount the
> device on the guest).
>
> example config:
> ```
> virtiofs0: foo,direct-io=1,cache=always,acl=1
> virtiofs1: dirid=bar,cache=never,xattr=1
> ```
>
> For information on the optional parameters see there:
> https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md
>
> Signed-off-by: Markus Frank <m.frank at proxmox.com>
> ---
> I did not get virtiofsd to run with run_command without creating zombie
> processes after stutdown.
> So I replaced run_command with exec for now.
> Maybe someone can find out why this happens.
>
> PVE/QemuServer.pm | 174 ++++++++++++++++++++++++++++++++++++++-
> PVE/QemuServer/Memory.pm | 25 ++++--
> debian/control | 1 +
> 3 files changed, 193 insertions(+), 7 deletions(-)
>
> diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
> index 484bc7f..d547dd6 100644
> --- a/PVE/QemuServer.pm
> +++ b/PVE/QemuServer.pm
> @@ -43,6 +43,7 @@ use PVE::PBSClient;
> use PVE::RESTEnvironment qw(log_warn);
> use PVE::RPCEnvironment;
> use PVE::Storage;
> +use PVE::Mapping::Dir;
> use PVE::SysFSTools;
> use PVE::Systemd;
> use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
> @@ -276,6 +277,42 @@ my $rng_fmt = {
> },
> };
>
> +my $virtiofs_fmt = {
> + 'dirid' => {
> + type => 'string',
> + default_key => 1,
> + description => "Mapping identifier of the directory mapping to be"
> + ." shared with the guest. Also used as a mount tag inside the VM.",
> + format_description => 'mapping-id',
> + format => 'pve-configid',
> + },
> + 'cache' => {
> + type => 'string',
> + description => "The caching policy the file system should use"
> + ." (auto, always, never).",
> + format_description => "virtiofs-cache",
> + enum => [qw(auto always never)],
> + optional => 1,
> + },
> + 'direct-io' => {
> + type => 'boolean',
> + description => "Honor the O_DIRECT flag passed down by guest applications",
> + format_description => "virtiofs-directio",
> + optional => 1,
> + },
> + xattr => {
> + type => 'boolean',
> + description => "Enable support for extended attributes.",
> + optional => 1,
> + },
> + acl => {
> + type => 'boolean',
> + description => "Enable support for posix ACLs (implies --xattr).",
> + optional => 1,
> + },
> +};
> +PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
> +
> my $meta_info_fmt = {
> 'ctime' => {
> type => 'integer',
> @@ -840,6 +877,7 @@ while (my ($k, $v) = each %$confdesc) {
> }
>
> my $MAX_NETS = 32;
> +my $MAX_VIRTIOFS = 10;
> my $MAX_SERIAL_PORTS = 4;
> my $MAX_PARALLEL_PORTS = 3;
> my $MAX_NUMA = 8;
> @@ -984,6 +1022,21 @@ my $netdesc = {
>
> PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
>
> +my $virtiofsdesc = {
> + optional => 1,
> + type => 'string', format => $virtiofs_fmt,
> + description => "share files between host and guest",
> +};
> +PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
> +
> +sub max_virtiofs {
> + return $MAX_VIRTIOFS;
> +}
> +
> +for (my $i = 0; $i < $MAX_VIRTIOFS; $i++) {
> + $confdesc->{"virtiofs$i"} = $virtiofsdesc;
> +}
> +
> my $ipconfig_fmt = {
> ip => {
> type => 'string',
> @@ -4113,6 +4166,21 @@ sub config_to_command {
> push @$devices, '-device', $netdevicefull;
> }
>
> + my $virtiofs_enabled = 0;
> + for (my $i = 0; $i < $MAX_VIRTIOFS; $i++) {
> + my $opt = "virtiofs$i";
> +
> + next if !$conf->{$opt};
> + my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
> + next if !$virtiofs;
> +
> + push @$devices, '-chardev', "socket,id=virtfs$i,path=/var/run/virtiofsd/vm$vmid-fs$i";
> + push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
> + .",chardev=virtfs$i,tag=$virtiofs->{dirid}";
> +
> + $virtiofs_enabled = 1;
> + }
> +
> if ($conf->{ivshmem}) {
> my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
>
> @@ -4172,6 +4240,14 @@ sub config_to_command {
> }
> push @$machineFlags, "type=${machine_type_min}";
>
> + if ($virtiofs_enabled && !$conf->{numa}) {
> + # kvm: '-machine memory-backend' and '-numa memdev' properties are
> + # mutually exclusive
> + push @$devices, '-object', 'memory-backend-file,id=virtiofs-mem'
> + .",size=$conf->{memory}M,mem-path=/dev/shm,share=on";
as discussed off-list, this might be switched to memfd to avoid /dev/shm
(same further below)
> + push @$machineFlags, 'memory-backend=virtiofs-mem';
> + }
> +
> push @$cmd, @$devices;
> push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
> push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
> @@ -4198,6 +4274,85 @@ sub config_to_command {
> return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
> }
>
> +sub start_virtiofs {
> + my ($vmid, $fsid, $virtiofs) = @_;
> +
> + my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
> + my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
> +
> + if (!$node_list || scalar($node_list->@*) != 1) {
> + die "virtiofs needs exactly one mapping for this node\n";
> + }
> +
> + eval {
> + PVE::Mapping::Dir::assert_valid($node_list->[0]);
> + };
> + if (my $err = $@) {
> + die "Directory Mapping invalid: $err\n";
> + }
> +
> + my $node_cfg = $node_list->[0];
> + my $path = $node_cfg->{path};
> + my $socket_path_root = "/var/run/virtiofsd";
> + mkdir $socket_path_root;
> + my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
> + unlink($socket_path);
> + my $socket = IO::Socket::UNIX->new(
> + Type => SOCK_STREAM,
> + Local => $socket_path,
> + Listen => 1,
> + ) or die "cannot create socket - $!\n";
> +
> + my $flags = fcntl($socket, F_GETFD, 0)
> + or die "failed to get file descriptor flags: $!\n";
> + fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
> + or die "failed to remove FD_CLOEXEC from file descriptor\n";
> +
> + my $fd = $socket->fileno();
> +
> + my $virtiofsd_bin = '/usr/libexec/virtiofsd';
> +
> + my $pid = fork();
> + if ($pid == 0) {
> + setsid();
> + $0 = "task pve-vm$vmid-virtiofs$fsid";
> + for my $fd_loop (3 .. POSIX::sysconf( &POSIX::_SC_OPEN_MAX )) {
> + POSIX::close($fd_loop) if ($fd_loop != $fd);
> + }
> +
> + my $pid2 = fork();
> + if ($pid2 == 0) {
> + my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
> + push @$cmd, '--xattr' if ($virtiofs->{xattr});
> + push @$cmd, '--posix-acl' if ($virtiofs->{acl});
> +
> + # Default to dir config xattr & acl settings
> + push @$cmd, '--xattr'
> + if !defined $virtiofs->{'xattr'} && $dir_cfg->{'xattr'};
> + push @$cmd, '--posix-acl'
> + if !defined $virtiofs->{'acl'} && $dir_cfg->{'acl'};
nit: this could be a lot simpler:
my $xattr = $virtiofs->{xattr} // $dir_cfg->{xattr};
push @$cmd, '--xattr' if $xattr;
or even as a one-liner ;)
same for ACL
> +
> + push @$cmd, '--announce-submounts' if ($node_cfg->{submounts});
> + push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'});
> + push @$cmd, "--cache=$virtiofs->{'cache'}" if ($virtiofs->{'cache'});
> +
> + exec(@$cmd);
> + } elsif (!defined($pid2)) {
> + die "could not fork to start virtiofsd\n";
> + } else {
> + POSIX::_exit(0);
> + }
> + } elsif (!defined($pid)) {
> + die "could not fork to start virtiofsd\n";
> + } else {
> + waitpid($pid, 0);
> + }
> +
> + # return socket to keep it alive,
> + # so that qemu will wait for virtiofsd to start
> + return $socket;
> +}
More information about the pve-devel
mailing list