[pve-devel] [PATCH qemu-server v7 4/11] feature #1027: virtio-fs support
Markus Frank
m.frank at proxmox.com
Wed Aug 9 10:37:32 CEST 2023
add support for sharing directories with a guest vm
virtio-fs needs virtiofsd to be started.
In order to start virtiofsd as a process (despite being a daemon it is does not run
in the background), a double-fork is used.
virtiofsd should close itself together with qemu.
There are the parameters dirid
and the optional parameters direct-io & cache.
Additionally the xattr & acl parameter overwrite the
directory mapping settings for xattr & acl.
The dirid gets mapped to the path on the current node
and is also used as a mount-tag (name used to mount the
device on the guest).
example config:
```
virtiofs0: foo,direct-io=1,cache=always,acl=1
virtiofs1: dirid=bar,cache=never,xattr=1
```
For information on the optional parameters see there:
https://gitlab.com/virtio-fs/virtiofsd/-/blob/main/README.md
Signed-off-by: Markus Frank <m.frank at proxmox.com>
---
I did not get virtiofsd to run with run_command without creating zombie
processes after stutdown.
So I replaced run_command with exec for now.
Maybe someone can find out why this happens.
PVE/QemuServer.pm | 174 ++++++++++++++++++++++++++++++++++++++-
PVE/QemuServer/Memory.pm | 25 ++++--
debian/control | 1 +
3 files changed, 193 insertions(+), 7 deletions(-)
diff --git a/PVE/QemuServer.pm b/PVE/QemuServer.pm
index 484bc7f..d547dd6 100644
--- a/PVE/QemuServer.pm
+++ b/PVE/QemuServer.pm
@@ -43,6 +43,7 @@ use PVE::PBSClient;
use PVE::RESTEnvironment qw(log_warn);
use PVE::RPCEnvironment;
use PVE::Storage;
+use PVE::Mapping::Dir;
use PVE::SysFSTools;
use PVE::Systemd;
use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
@@ -276,6 +277,42 @@ my $rng_fmt = {
},
};
+my $virtiofs_fmt = {
+ 'dirid' => {
+ type => 'string',
+ default_key => 1,
+ description => "Mapping identifier of the directory mapping to be"
+ ." shared with the guest. Also used as a mount tag inside the VM.",
+ format_description => 'mapping-id',
+ format => 'pve-configid',
+ },
+ 'cache' => {
+ type => 'string',
+ description => "The caching policy the file system should use"
+ ." (auto, always, never).",
+ format_description => "virtiofs-cache",
+ enum => [qw(auto always never)],
+ optional => 1,
+ },
+ 'direct-io' => {
+ type => 'boolean',
+ description => "Honor the O_DIRECT flag passed down by guest applications",
+ format_description => "virtiofs-directio",
+ optional => 1,
+ },
+ xattr => {
+ type => 'boolean',
+ description => "Enable support for extended attributes.",
+ optional => 1,
+ },
+ acl => {
+ type => 'boolean',
+ description => "Enable support for posix ACLs (implies --xattr).",
+ optional => 1,
+ },
+};
+PVE::JSONSchema::register_format('pve-qm-virtiofs', $virtiofs_fmt);
+
my $meta_info_fmt = {
'ctime' => {
type => 'integer',
@@ -840,6 +877,7 @@ while (my ($k, $v) = each %$confdesc) {
}
my $MAX_NETS = 32;
+my $MAX_VIRTIOFS = 10;
my $MAX_SERIAL_PORTS = 4;
my $MAX_PARALLEL_PORTS = 3;
my $MAX_NUMA = 8;
@@ -984,6 +1022,21 @@ my $netdesc = {
PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
+my $virtiofsdesc = {
+ optional => 1,
+ type => 'string', format => $virtiofs_fmt,
+ description => "share files between host and guest",
+};
+PVE::JSONSchema::register_standard_option("pve-qm-virtiofs", $virtiofsdesc);
+
+sub max_virtiofs {
+ return $MAX_VIRTIOFS;
+}
+
+for (my $i = 0; $i < $MAX_VIRTIOFS; $i++) {
+ $confdesc->{"virtiofs$i"} = $virtiofsdesc;
+}
+
my $ipconfig_fmt = {
ip => {
type => 'string',
@@ -4113,6 +4166,21 @@ sub config_to_command {
push @$devices, '-device', $netdevicefull;
}
+ my $virtiofs_enabled = 0;
+ for (my $i = 0; $i < $MAX_VIRTIOFS; $i++) {
+ my $opt = "virtiofs$i";
+
+ next if !$conf->{$opt};
+ my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ next if !$virtiofs;
+
+ push @$devices, '-chardev', "socket,id=virtfs$i,path=/var/run/virtiofsd/vm$vmid-fs$i";
+ push @$devices, '-device', 'vhost-user-fs-pci,queue-size=1024'
+ .",chardev=virtfs$i,tag=$virtiofs->{dirid}";
+
+ $virtiofs_enabled = 1;
+ }
+
if ($conf->{ivshmem}) {
my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
@@ -4172,6 +4240,14 @@ sub config_to_command {
}
push @$machineFlags, "type=${machine_type_min}";
+ if ($virtiofs_enabled && !$conf->{numa}) {
+ # kvm: '-machine memory-backend' and '-numa memdev' properties are
+ # mutually exclusive
+ push @$devices, '-object', 'memory-backend-file,id=virtiofs-mem'
+ .",size=$conf->{memory}M,mem-path=/dev/shm,share=on";
+ push @$machineFlags, 'memory-backend=virtiofs-mem';
+ }
+
push @$cmd, @$devices;
push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
@@ -4198,6 +4274,85 @@ sub config_to_command {
return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
}
+sub start_virtiofs {
+ my ($vmid, $fsid, $virtiofs) = @_;
+
+ my $dir_cfg = PVE::Mapping::Dir::config()->{ids}->{$virtiofs->{dirid}};
+ my $node_list = PVE::Mapping::Dir::find_on_current_node($virtiofs->{dirid});
+
+ if (!$node_list || scalar($node_list->@*) != 1) {
+ die "virtiofs needs exactly one mapping for this node\n";
+ }
+
+ eval {
+ PVE::Mapping::Dir::assert_valid($node_list->[0]);
+ };
+ if (my $err = $@) {
+ die "Directory Mapping invalid: $err\n";
+ }
+
+ my $node_cfg = $node_list->[0];
+ my $path = $node_cfg->{path};
+ my $socket_path_root = "/var/run/virtiofsd";
+ mkdir $socket_path_root;
+ my $socket_path = "$socket_path_root/vm$vmid-fs$fsid";
+ unlink($socket_path);
+ my $socket = IO::Socket::UNIX->new(
+ Type => SOCK_STREAM,
+ Local => $socket_path,
+ Listen => 1,
+ ) or die "cannot create socket - $!\n";
+
+ my $flags = fcntl($socket, F_GETFD, 0)
+ or die "failed to get file descriptor flags: $!\n";
+ fcntl($socket, F_SETFD, $flags & ~FD_CLOEXEC)
+ or die "failed to remove FD_CLOEXEC from file descriptor\n";
+
+ my $fd = $socket->fileno();
+
+ my $virtiofsd_bin = '/usr/libexec/virtiofsd';
+
+ my $pid = fork();
+ if ($pid == 0) {
+ setsid();
+ $0 = "task pve-vm$vmid-virtiofs$fsid";
+ for my $fd_loop (3 .. POSIX::sysconf( &POSIX::_SC_OPEN_MAX )) {
+ POSIX::close($fd_loop) if ($fd_loop != $fd);
+ }
+
+ my $pid2 = fork();
+ if ($pid2 == 0) {
+ my $cmd = [$virtiofsd_bin, "--fd=$fd", "--shared-dir=$path"];
+ push @$cmd, '--xattr' if ($virtiofs->{xattr});
+ push @$cmd, '--posix-acl' if ($virtiofs->{acl});
+
+ # Default to dir config xattr & acl settings
+ push @$cmd, '--xattr'
+ if !defined $virtiofs->{'xattr'} && $dir_cfg->{'xattr'};
+ push @$cmd, '--posix-acl'
+ if !defined $virtiofs->{'acl'} && $dir_cfg->{'acl'};
+
+ push @$cmd, '--announce-submounts' if ($node_cfg->{submounts});
+ push @$cmd, '--allow-direct-io' if ($virtiofs->{'direct-io'});
+ push @$cmd, "--cache=$virtiofs->{'cache'}" if ($virtiofs->{'cache'});
+
+ exec(@$cmd);
+ } elsif (!defined($pid2)) {
+ die "could not fork to start virtiofsd\n";
+ } else {
+ POSIX::_exit(0);
+ }
+ } elsif (!defined($pid)) {
+ die "could not fork to start virtiofsd\n";
+ } else {
+ waitpid($pid, 0);
+ }
+
+ # return socket to keep it alive,
+ # so that qemu will wait for virtiofsd to start
+ return $socket;
+}
+
sub check_rng_source {
my ($source) = @_;
@@ -5655,7 +5810,6 @@ sub vm_start {
});
}
-
# params:
# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
# skiplock => 0/1, skip checking for config lock
@@ -5918,10 +6072,23 @@ sub vm_start_nolock {
}
$systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
+
my $run_qemu = sub {
PVE::Tools::run_fork sub {
PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
+ my @virtiofs_sockets;
+ for (my $i = 0; $i < $MAX_VIRTIOFS; $i++) {
+ my $opt = "virtiofs$i";
+
+ next if !$conf->{$opt};
+ my $virtiofs = parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ next if !$virtiofs;
+
+ my $virtiofs_socket = start_virtiofs($vmid, $i, $virtiofs);
+ push @virtiofs_sockets, $virtiofs_socket;
+ }
+
my $tpmpid;
if (my $tpm = $conf->{tpmstate0}) {
# start the TPM emulator so QEMU can connect on start
@@ -5936,6 +6103,11 @@ sub vm_start_nolock {
}
die "QEMU exited with code $exitcode\n";
}
+
+ foreach my $virtiofs_socket (@virtiofs_sockets) {
+ shutdown($virtiofs_socket, 2);
+ close($virtiofs_socket);
+ }
};
};
diff --git a/PVE/QemuServer/Memory.pm b/PVE/QemuServer/Memory.pm
index 0601dd6..648bc08 100644
--- a/PVE/QemuServer/Memory.pm
+++ b/PVE/QemuServer/Memory.pm
@@ -278,6 +278,16 @@ sub config {
die "numa needs to be enabled to use hugepages" if $conf->{hugepages} && !$conf->{numa};
+ my $virtiofs_enabled = 0;
+ for (my $i = 0; $i < PVE::QemuServer::max_virtiofs(); $i++) {
+ my $opt = "virtiofs$i";
+ next if !$conf->{$opt};
+ my $virtiofs = PVE::JSONSchema::parse_property_string('pve-qm-virtiofs', $conf->{$opt});
+ if ($virtiofs) {
+ $virtiofs_enabled = 1;
+ }
+ }
+
if ($conf->{numa}) {
my $numa_totalmemory = undef;
@@ -290,7 +300,8 @@ sub config {
my $numa_memory = $numa->{memory};
$numa_totalmemory += $numa_memory;
- my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
+ my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+ my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
# cpus
my $cpulists = $numa->{cpus};
@@ -315,7 +326,7 @@ sub config {
}
push @$cmd, '-object', $mem_object;
- push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+ push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
}
die "total memory for NUMA nodes must be equal to vm static memory\n"
@@ -329,13 +340,13 @@ sub config {
die "host NUMA node$i doesn't exist\n"
if !host_numanode_exists($i) && $conf->{hugepages};
- my $mem_object = print_mem_object($conf, "ram-node$i", $numa_memory);
- push @$cmd, '-object', $mem_object;
-
my $cpus = ($cores * $i);
$cpus .= "-" . ($cpus + $cores - 1) if $cores > 1;
- push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=ram-node$i";
+ my $memdev = $virtiofs_enabled ? "virtiofs-mem$i" : "ram-node$i";
+ my $mem_object = print_mem_object($conf, $memdev, $numa_memory);
+ push @$cmd, '-object', $mem_object;
+ push @$cmd, '-numa', "node,nodeid=$i,cpus=$cpus,memdev=$memdev";
}
}
}
@@ -364,6 +375,8 @@ sub print_mem_object {
my $path = hugepages_mount_path($hugepages_size);
return "memory-backend-file,id=$id,size=${size}M,mem-path=$path,share=on,prealloc=yes";
+ } elsif ($id =~ m/^virtiofs-mem/) {
+ return "memory-backend-file,id=$id,size=${size}M,mem-path=/dev/shm,share=on";
} else {
return "memory-backend-ram,id=$id,size=${size}M";
}
diff --git a/debian/control b/debian/control
index 49f67b2..f008a9b 100644
--- a/debian/control
+++ b/debian/control
@@ -53,6 +53,7 @@ Depends: dbus,
socat,
swtpm,
swtpm-tools,
+ virtiofsd,
${misc:Depends},
${perl:Depends},
${shlibs:Depends},
--
2.39.2
More information about the pve-devel
mailing list