[pbs-devel] [PATCH proxmox v2] daemon: clean up middle process of double fork

Dominik Csapak d.csapak at proxmox.com
Tue Dec 3 12:23:49 CET 2024


so we don't leave around a zombie process when the old daemon still
needs to run, because of e.g. a running task.

Since this is mostly a cosmetic issue though, only try a clean up with a
10 second timeout, so we don't block forever. (It could happen that it
didn't exit at that point, but it's very unlikely.)

In case we do run into the timeout here, the process will not be
collected until the parent process exits and the middle process is
collected by pid 1.

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
changes from v1:
* use a timeout
* log the error

 proxmox-daemon/src/server.rs | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/proxmox-daemon/src/server.rs b/proxmox-daemon/src/server.rs
index efea9078..27153fc2 100644
--- a/proxmox-daemon/src/server.rs
+++ b/proxmox-daemon/src/server.rs
@@ -8,6 +8,7 @@ use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
 use std::panic::UnwindSafe;
 use std::path::PathBuf;
 use std::pin::{pin, Pin};
+use std::time::Duration;
 
 use anyhow::{bail, format_err, Error};
 use futures::future::{self, Either};
@@ -15,6 +16,7 @@ use nix::unistd::{fork, ForkResult};
 
 use proxmox_sys::fd::fd_change_cloexec;
 use proxmox_sys::fs::CreateOptions;
+use proxmox_sys::linux::timer;
 
 type BoxedStoreFunc = Box<dyn FnOnce() -> Result<String, Error> + UnwindSafe + Send>;
 
@@ -165,10 +167,12 @@ impl Reloader {
                 // No matter how we managed to get here, this is the time where we bail out quickly:
                 unsafe { libc::_exit(-1) }
             }
-            Ok(ForkResult::Parent { child }) => {
+            Ok(ForkResult::Parent {
+                child: middle_child,
+            }) => {
                 log::debug!(
                     "forked off a new server (first pid: {}), waiting for 2nd pid",
-                    child
+                    middle_child
                 );
                 std::mem::drop(pnew);
                 let mut pold = std::fs::File::from(pold);
@@ -211,6 +215,10 @@ impl Reloader {
                     log::error!("child vanished during reload: {}", e);
                 }
 
+                if let Err(e) = waitpid_with_timeout(middle_child, Duration::from_secs(10)) {
+                    log::error!("waitpid for middle process failed: {e}");
+                }
+
                 Ok(())
             }
             Err(e) => {
@@ -230,6 +238,27 @@ impl Reloader {
     }
 }
 
+fn waitpid_with_timeout(pid: nix::unistd::Pid, timeout: std::time::Duration) -> Result<(), Error> {
+    // unblock the timeout signal temporarily
+    let _sigblock_guard = timer::unblock_timeout_signal();
+
+    // setup a timeout timer
+    let mut timer = timer::Timer::create(
+        timer::Clock::Realtime,
+        timer::TimerEvent::ThisThreadSignal(timer::SIGTIMEOUT),
+    )?;
+
+    timer.arm(
+        timer::TimerSpec::new()
+            .value(Some(timeout))
+            .interval(Some(Duration::from_millis(10))),
+    )?;
+
+    nix::sys::wait::waitpid(pid, None)?;
+
+    Ok(())
+}
+
 fn fd_store_func(fd: RawFd) -> Result<BoxedStoreFunc, Error> {
     let fd = unsafe {
         OwnedFd::from_raw_fd(nix::fcntl::fcntl(
-- 
2.39.5





More information about the pbs-devel mailing list