[pbs-devel] [PATCH proxmox-backup v2] tools/daemon: improve reload behaviour

Thomas Lamprecht t.lamprecht at proxmox.com
Thu Dec 17 17:17:00 CET 2020


On 17/12/2020 15:50, Dominik Csapak wrote:
> it seems that sometimes, the child process signal gets handled
> before the parent process signal. Systemd then ignores the
> childs signal (finished reloading) and only after going into
> reloading state because of the parent. this will never finish.
> 
> Instead, wait for the state to change to 'reloading' after sending
> that signal in the parent, an only fork afterwards. This way
> we ensure that systemd knows about the reloading before actually trying
> to do it.
> 
> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
> ---
> changes from v1:
> * introduce wait_service_is_(not_)state
>     it is a bit more generic
>     has a better name
> * factor the common code out into get_service_state
> 

that seems much nicer, thanks!
@Fabian can you please redo your testing (1.5k is maybe a bit much in light
of the changes between v1 and v2 I'm happy with 100 ^^ too)

I'm quite confident in the patch, but this is not something I'd like to touch
again soon, so better we check it a bit to much than to less :)

>  src/tools/daemon.rs | 45 ++++++++++++++++++++++++++++-----------------
>  1 file changed, 28 insertions(+), 17 deletions(-)
> 
> diff --git a/src/tools/daemon.rs b/src/tools/daemon.rs
> index 6bb4a41b..0e3a174a 100644
> --- a/src/tools/daemon.rs
> +++ b/src/tools/daemon.rs
> @@ -291,6 +291,7 @@ where
>          if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
>              log::error!("failed to notify systemd about the state change: {}", e);
>          }
> +        wait_service_is_state(service_name, "reloading").await?;
>          if let Err(e) = reloader.take().unwrap().fork_restart() {
>              log::error!("error during reload: {}", e);
>              let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
> @@ -305,7 +306,7 @@ where
>  
>      // FIXME: this is a hack, replace with sd_notify_barrier when available
>      if server::is_reload_request() {
> -        wait_service_is_active(service_name).await?;
> +        wait_service_is_not_state(service_name, "reloading").await?;
>      }
>  
>      log::info!("daemon shut down...");
> @@ -313,26 +314,36 @@ where
>  }
>  
>  // hack, do not use if unsure!
> -async fn wait_service_is_active(service: &str) -> Result<(), Error> {
> +async fn get_service_state(service: &str) -> Result<String, Error> {
> +    let text = match tokio::process::Command::new("systemctl")
> +        .args(&["is-active", service])
> +        .output()
> +        .await
> +    {
> +        Ok(output) => match String::from_utf8(output.stdout) {
> +            Ok(text) => text,
> +            Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
> +        },
> +        Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
> +    };
> +
> +    Ok(text.trim().trim_start().to_string())
> +}
> +
> +async fn wait_service_is_state(service: &str, state: &str) -> Result<(), Error> {
>      tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
> -    loop {
> -        let text = match tokio::process::Command::new("systemctl")
> -            .args(&["is-active", service])
> -            .output()
> -            .await
> -        {
> -            Ok(output) => match String::from_utf8(output.stdout) {
> -                Ok(text) => text,
> -                Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
> -            },
> -            Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
> -        };
> +    while get_service_state(service).await? != state {
> +        tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
> +    }
> +    Ok(())
> +}
>  
> -        if text.trim().trim_start() != "reloading" {
> -            return Ok(());
> -        }
> +async fn wait_service_is_not_state(service: &str, state: &str) -> Result<(), Error> {
> +    tokio::time::delay_for(std::time::Duration::new(1, 0)).await;
> +    while get_service_state(service).await? == state {
>          tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
>      }
> +    Ok(())
>  }
>  
>  #[link(name = "systemd")]
> 





More information about the pbs-devel mailing list