[pbs-devel] [PATCH proxmox-backup v2] tools/daemon: improve reload behaviour

Dominik Csapak d.csapak at proxmox.com
Thu Dec 17 15:50:18 CET 2020


it seems that sometimes, the child process signal gets handled
before the parent process signal. Systemd then ignores the
childs signal (finished reloading) and only after going into
reloading state because of the parent. this will never finish.

Instead, wait for the state to change to 'reloading' after sending
that signal in the parent, an only fork afterwards. This way
we ensure that systemd knows about the reloading before actually trying
to do it.

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
changes from v1:
* introduce wait_service_is_(not_)state
    it is a bit more generic
    has a better name
* factor the common code out into get_service_state

 src/tools/daemon.rs | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/src/tools/daemon.rs b/src/tools/daemon.rs
index 6bb4a41b..0e3a174a 100644
--- a/src/tools/daemon.rs
+++ b/src/tools/daemon.rs
@@ -291,6 +291,7 @@ where
         if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
             log::error!("failed to notify systemd about the state change: {}", e);
         }
+        wait_service_is_state(service_name, "reloading").await?;
         if let Err(e) = reloader.take().unwrap().fork_restart() {
             log::error!("error during reload: {}", e);
             let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
@@ -305,7 +306,7 @@ where
 
     // FIXME: this is a hack, replace with sd_notify_barrier when available
     if server::is_reload_request() {
-        wait_service_is_active(service_name).await?;
+        wait_service_is_not_state(service_name, "reloading").await?;
     }
 
     log::info!("daemon shut down...");
@@ -313,26 +314,36 @@ where
 }
 
 // hack, do not use if unsure!
-async fn wait_service_is_active(service: &str) -> Result<(), Error> {
+async fn get_service_state(service: &str) -> Result<String, Error> {
+    let text = match tokio::process::Command::new("systemctl")
+        .args(&["is-active", service])
+        .output()
+        .await
+    {
+        Ok(output) => match String::from_utf8(output.stdout) {
+            Ok(text) => text,
+            Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
+        },
+        Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
+    };
+
+    Ok(text.trim().trim_start().to_string())
+}
+
+async fn wait_service_is_state(service: &str, state: &str) -> Result<(), Error> {
     tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
-    loop {
-        let text = match tokio::process::Command::new("systemctl")
-            .args(&["is-active", service])
-            .output()
-            .await
-        {
-            Ok(output) => match String::from_utf8(output.stdout) {
-                Ok(text) => text,
-                Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
-            },
-            Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
-        };
+    while get_service_state(service).await? != state {
+        tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
+    }
+    Ok(())
+}
 
-        if text.trim().trim_start() != "reloading" {
-            return Ok(());
-        }
+async fn wait_service_is_not_state(service: &str, state: &str) -> Result<(), Error> {
+    tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
+    while get_service_state(service).await? == state {
         tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
     }
+    Ok(())
 }
 
 #[link(name = "systemd")]
-- 
2.20.1





More information about the pbs-devel mailing list