[pve-devel] [PATCH v2 qemu] fix #4507: add patch to automatically increase NOFILE soft limit

Fiona Ebner f.ebner at proxmox.com
Mon Feb 5 14:13:17 CET 2024


In many configurations, e.g. multiple vNICs with multiple queues or
with many Ceph OSDs, the default soft limit of 1024 is not enough.
QEMU is supposed to work fine with file descriptors >= 1024 and does
not use select() on POSIX. Bump the soft limit to the allowed hard
limit to avoid issues with the aforementioned configurations.

Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
---

Changes in v2:
    * pick v2 that was accepted upstream:
      https://lore.kernel.org/qemu-devel/ZcC3-G_COqbk8SxX@redhat.com/
    * because of that, put it into the extra/ folder rather than pve/

 ...-increase-NOFILE-soft-limit-on-POSIX.patch | 119 ++++++++++++++++++
 ...async-for-background-state-snapshots.patch |   4 +-
 ...-Add-dummy-id-command-line-parameter.patch |   4 +-
 ...E-Allow-version-code-in-machine-type.patch |   4 +-
 ...ct-stderr-to-journal-when-daemonized.patch |   6 +-
 debian/patches/series                         |   1 +
 6 files changed, 129 insertions(+), 9 deletions(-)
 create mode 100644 debian/patches/extra/0012-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch

diff --git a/debian/patches/extra/0012-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch b/debian/patches/extra/0012-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch
new file mode 100644
index 0000000..cdc1e06
--- /dev/null
+++ b/debian/patches/extra/0012-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch
@@ -0,0 +1,119 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner at proxmox.com>
+Date: Mon, 18 Dec 2023 11:13:40 +0100
+Subject: [PATCH] qemu_init: increase NOFILE soft limit on POSIX
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+In many configurations, e.g. multiple vNICs with multiple queues or
+with many Ceph OSDs, the default soft limit of 1024 is not enough.
+QEMU is supposed to work fine with file descriptors >= 1024 and does
+not use select() on POSIX. Bump the soft limit to the allowed hard
+limit to avoid issues with the aforementioned configurations.
+
+Of course the limit could be raised from the outside, but the man page
+of systemd.exec states about 'LimitNOFILE=':
+
+> Don't use.
+> [...]
+> Typically applications should increase their soft limit to the hard
+> limit on their own, if they are OK with working with file
+> descriptors above 1023,
+
+If the soft limit is already the same as the hard limit, avoid the
+superfluous setrlimit call. This can avoid a warning with a strict
+seccomp filter blocking setrlimit if NOFILE was already raised before
+executing QEMU.
+
+Buglink: https://bugzilla.proxmox.com/show_bug.cgi?id=4507
+Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
+Reviewed-by: Daniel P. Berrangé <berrange at redhat.com>
+---
+ include/sysemu/os-posix.h |  1 +
+ include/sysemu/os-win32.h |  5 +++++
+ os-posix.c                | 22 ++++++++++++++++++++++
+ softmmu/vl.c              |  2 ++
+ 4 files changed, 30 insertions(+)
+
+diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h
+index 1030d39904..edc415aff5 100644
+--- a/include/sysemu/os-posix.h
++++ b/include/sysemu/os-posix.h
+@@ -48,6 +48,7 @@ void os_setup_early_signal_handling(void);
+ void os_set_proc_name(const char *s);
+ void os_setup_signal_handling(void);
+ void os_daemonize(void);
++void os_setup_limits(void);
+ void os_setup_post(void);
+ int os_mlock(void);
+ 
+diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
+index 91aa0d7ec0..f6e23fe01e 100644
+--- a/include/sysemu/os-win32.h
++++ b/include/sysemu/os-win32.h
+@@ -129,6 +129,11 @@ static inline int os_mlock(void)
+     return -ENOSYS;
+ }
+ 
++void os_setup_limits(void)
++{
++    return;
++}
++
+ #define fsync _commit
+ 
+ #if !defined(lseek)
+diff --git a/os-posix.c b/os-posix.c
+index cfcb96533c..0cc1d991b1 100644
+--- a/os-posix.c
++++ b/os-posix.c
+@@ -24,6 +24,7 @@
+  */
+ 
+ #include "qemu/osdep.h"
++#include <sys/resource.h>
+ #include <sys/wait.h>
+ #include <pwd.h>
+ #include <grp.h>
+@@ -286,6 +287,27 @@ void os_daemonize(void)
+     }
+ }
+ 
++void os_setup_limits(void)
++{
++    struct rlimit nofile;
++
++    if (getrlimit(RLIMIT_NOFILE, &nofile) < 0) {
++        warn_report("unable to query NOFILE limit: %s", strerror(errno));
++        return;
++    }
++
++    if (nofile.rlim_cur == nofile.rlim_max) {
++        return;
++    }
++
++    nofile.rlim_cur = nofile.rlim_max;
++
++    if (setrlimit(RLIMIT_NOFILE, &nofile) < 0) {
++        warn_report("unable to set NOFILE limit: %s", strerror(errno));
++        return;
++    }
++}
++
+ void os_setup_post(void)
+ {
+     int fd = 0;
+diff --git a/softmmu/vl.c b/softmmu/vl.c
+index c9e9ede237..ba6ad8a8df 100644
+--- a/softmmu/vl.c
++++ b/softmmu/vl.c
+@@ -2713,6 +2713,8 @@ void qemu_init(int argc, char **argv)
+     error_init(argv[0]);
+     qemu_init_exec_dir(argv[0]);
+ 
++    os_setup_limits();
++
+     qemu_init_arch_modules();
+ 
+     qemu_init_subsystems();
diff --git a/debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch b/debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
index d80d1a1..df12d85 100644
--- a/debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
+++ b/debian/patches/pve/0017-PVE-add-savevm-async-for-background-state-snapshots.patch
@@ -823,7 +823,7 @@ index 8073f5edf5..dc1ececc9c 100644
  DEF("daemonize", 0, QEMU_OPTION_daemonize, \
      "-daemonize      daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
 diff --git a/softmmu/vl.c b/softmmu/vl.c
-index c9e9ede237..3f2681aded 100644
+index ba6ad8a8df..ddeace306e 100644
 --- a/softmmu/vl.c
 +++ b/softmmu/vl.c
 @@ -164,6 +164,7 @@ static const char *accelerators;
@@ -847,7 +847,7 @@ index c9e9ede237..3f2681aded 100644
      }
      if (replay_mode != REPLAY_MODE_NONE) {
          replay_vmstate_init();
-@@ -3194,6 +3201,9 @@ void qemu_init(int argc, char **argv)
+@@ -3196,6 +3203,9 @@ void qemu_init(int argc, char **argv)
              case QEMU_OPTION_loadvm:
                  loadvm = optarg;
                  break;
diff --git a/debian/patches/pve/0020-PVE-Add-dummy-id-command-line-parameter.patch b/debian/patches/pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
index fee97e0..17f5de1 100644
--- a/debian/patches/pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
+++ b/debian/patches/pve/0020-PVE-Add-dummy-id-command-line-parameter.patch
@@ -28,7 +28,7 @@ index dc1ececc9c..848d2dfdd1 100644
      "-fda/-fdb file  use 'file' as floppy disk 0/1 image\n", QEMU_ARCH_ALL)
  DEF("fdb", HAS_ARG, QEMU_OPTION_fdb, "", QEMU_ARCH_ALL)
 diff --git a/softmmu/vl.c b/softmmu/vl.c
-index 3f2681aded..1a3b9cc4b8 100644
+index ddeace306e..3ee90b3b94 100644
 --- a/softmmu/vl.c
 +++ b/softmmu/vl.c
 @@ -2683,6 +2683,7 @@ void qemu_init(int argc, char **argv)
@@ -39,7 +39,7 @@ index 3f2681aded..1a3b9cc4b8 100644
  
      qemu_add_opts(&qemu_drive_opts);
      qemu_add_drive_opts(&qemu_legacy_drive_opts);
-@@ -3306,6 +3307,13 @@ void qemu_init(int argc, char **argv)
+@@ -3308,6 +3309,13 @@ void qemu_init(int argc, char **argv)
                  machine_parse_property_opt(qemu_find_opts("smp-opts"),
                                             "smp", optarg);
                  break;
diff --git a/debian/patches/pve/0025-PVE-Allow-version-code-in-machine-type.patch b/debian/patches/pve/0025-PVE-Allow-version-code-in-machine-type.patch
index 27a801a..507a5e3 100644
--- a/debian/patches/pve/0025-PVE-Allow-version-code-in-machine-type.patch
+++ b/debian/patches/pve/0025-PVE-Allow-version-code-in-machine-type.patch
@@ -72,7 +72,7 @@ index fbb61f18e4..7da3c519ba 100644
  ##
  # @query-machines:
 diff --git a/softmmu/vl.c b/softmmu/vl.c
-index 1a3b9cc4b8..e9b5f62cc3 100644
+index 3ee90b3b94..4b6d0b82fd 100644
 --- a/softmmu/vl.c
 +++ b/softmmu/vl.c
 @@ -1597,6 +1597,7 @@ static const QEMUOption *lookup_opt(int argc, char **argv,
@@ -95,7 +95,7 @@ index 1a3b9cc4b8..e9b5f62cc3 100644
      g_slist_free(machines);
      if (local_err) {
          error_append_hint(&local_err, "Use -machine help to list supported machines\n");
-@@ -3248,12 +3254,31 @@ void qemu_init(int argc, char **argv)
+@@ -3250,12 +3256,31 @@ void qemu_init(int argc, char **argv)
              case QEMU_OPTION_machine:
                  {
                      bool help;
diff --git a/debian/patches/pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch b/debian/patches/pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
index 2a5b43e..24ec761 100644
--- a/debian/patches/pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
+++ b/debian/patches/pve/0033-PVE-redirect-stderr-to-journal-when-daemonized.patch
@@ -34,10 +34,10 @@ index cbfc9a43fb..8206270272 100644
  endif
  
 diff --git a/os-posix.c b/os-posix.c
-index cfcb96533c..fb2ad87009 100644
+index 0cc1d991b1..f33d9901cf 100644
 --- a/os-posix.c
 +++ b/os-posix.c
-@@ -28,6 +28,8 @@
+@@ -29,6 +29,8 @@
  #include <pwd.h>
  #include <grp.h>
  #include <libgen.h>
@@ -46,7 +46,7 @@ index cfcb96533c..fb2ad87009 100644
  
  /* Needed early for CONFIG_BSD etc. */
  #include "net/slirp.h"
-@@ -310,9 +312,10 @@ void os_setup_post(void)
+@@ -332,9 +334,10 @@ void os_setup_post(void)
  
          dup2(fd, 0);
          dup2(fd, 1);
diff --git a/debian/patches/series b/debian/patches/series
index ee0028d..4d75ec3 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -9,6 +9,7 @@ extra/0008-target-i386-the-sgx_epc_get_section-stub-is-reachabl.patch
 extra/0009-ui-clipboard-mark-type-as-not-available-when-there-i.patch
 extra/0010-virtio-scsi-Attach-event-vq-notifier-with-no_poll.patch
 extra/0011-virtio-Re-enable-notifications-after-drain.patch
+extra/0012-qemu_init-increase-NOFILE-soft-limit-on-POSIX.patch
 bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
 bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
 bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
-- 
2.39.2





More information about the pve-devel mailing list