[pbs-devel] [PATCH proxmox-backup] tools/daemon: improve reload behaviour

Dominik Csapak d.csapak at proxmox.com
Wed Dec 16 09:12:09 CET 2020


it seems that sometimes, the child process signal gets handled
before the parent process signal. Systemd then ignores the
childs signal (finished reloading) and only after going into
reloading state because of the parent. this will never finish.

Instead, wait for the state to change to 'reloading' after sending
that signal in the parent, an only fork afterwards. This way
we ensure that systemd knows about the reloading before actually trying
to do it.

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
this all goes away with systemds notify barrier hopefully....

 src/tools/daemon.rs | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/tools/daemon.rs b/src/tools/daemon.rs
index 6bb4a41b..2aa52772 100644
--- a/src/tools/daemon.rs
+++ b/src/tools/daemon.rs
@@ -291,6 +291,7 @@ where
         if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
             log::error!("failed to notify systemd about the state change: {}", e);
         }
+        wait_service_is_active_or_reloading(service_name, true).await?;
         if let Err(e) = reloader.take().unwrap().fork_restart() {
             log::error!("error during reload: {}", e);
             let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
@@ -305,7 +306,7 @@ where
 
     // FIXME: this is a hack, replace with sd_notify_barrier when available
     if server::is_reload_request() {
-        wait_service_is_active(service_name).await?;
+        wait_service_is_active_or_reloading(service_name, false).await?;
     }
 
     log::info!("daemon shut down...");
@@ -313,7 +314,7 @@ where
 }
 
 // hack, do not use if unsure!
-async fn wait_service_is_active(service: &str) -> Result<(), Error> {
+async fn wait_service_is_active_or_reloading(service: &str, wait_for_reload: bool) -> Result<(), Error> {
     tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
     loop {
         let text = match tokio::process::Command::new("systemctl")
@@ -328,7 +329,8 @@ async fn wait_service_is_active(service: &str) -> Result<(), Error> {
             Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
         };
 
-        if text.trim().trim_start() != "reloading" {
+        let is_reload = text.trim().trim_start() == "reloading";
+        if is_reload == wait_for_reload {
             return Ok(());
         }
         tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
-- 
2.20.1





More information about the pbs-devel mailing list