[pbs-devel] [PATCH proxmox-backup] rest-server/daemon: use sd_notify_barrier for service reloading

Thomas Lamprecht t.lamprecht at proxmox.com
Thu Sep 30 10:19:59 CEST 2021


On 30.09.21 09:18, Dominik Csapak wrote:
> until now, we manually polled the systemd service state during a reload
> so that the sd_notify messages get processed in the correct order
> (RELOAD(old) -> MAINPID(old) -> READY(new))
> 
> with systemd >= 246 there is now 'sd_notify_barrier' which
> blocks until systemd processed all prior messages
> 
> with that change, the daemon does not need to know the service name anymore
> 

looks OK, much nicer, two nits inline (that can be followed up too)

> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
> ---
>  debian/control                                |  2 +-
>  .../examples/minimal-rest-server.rs           |  1 -
>  proxmox-rest-server/src/daemon.rs             | 61 +++++++------------
>  src/bin/proxmox-backup-api.rs                 |  1 -
>  src/bin/proxmox-backup-proxy.rs               |  1 -
>  5 files changed, 22 insertions(+), 44 deletions(-)
> 
> diff --git a/debian/control b/debian/control
> index 02b094bd..8aec88cf 100644
> --- a/debian/control
> +++ b/debian/control
> @@ -89,7 +89,7 @@ Build-Depends: debhelper (>= 12),
>   librust-zstd-0.6+default-dev,
>   libacl1-dev,
>   libfuse3-dev,
> - libsystemd-dev,
> + libsystemd-dev (>= 246-~~),
>   uuid-dev,
>   libsgutils2-dev,
>   bash-completion,
> diff --git a/proxmox-rest-server/examples/minimal-rest-server.rs b/proxmox-rest-server/examples/minimal-rest-server.rs
> index 22477039..2b6a40a1 100644
> --- a/proxmox-rest-server/examples/minimal-rest-server.rs
> +++ b/proxmox-rest-server/examples/minimal-rest-server.rs
> @@ -207,7 +207,6 @@ async fn run() -> Result<(), Error> {
>                  Ok(())
>              })
>          },
> -        "example_server",
>      ).await?;
>  
>      Ok(())
> diff --git a/proxmox-rest-server/src/daemon.rs b/proxmox-rest-server/src/daemon.rs
> index 9d48ecd2..5d59fce2 100644
> --- a/proxmox-rest-server/src/daemon.rs
> +++ b/proxmox-rest-server/src/daemon.rs
> @@ -186,6 +186,9 @@ impl Reloader {
>                  if let Err(e) = systemd_notify(SystemdNotify::MainPid(child)) {
>                      log::error!("failed to notify systemd about the new main pid: {}", e);
>                  }
> +                if let Err(e) = systemd_notify_barrier() {

maybe add a comment regarding ordering here, e.g., something like
// ensure systemd got the message about the new main PID before continuing, else it gets confused

> +                    log::error!("failed to wait on systemd-processing: {}", e);
> +                }
>  
>                  // notify child that it is now the new main process:
>                  if let Err(e) = pold.write_all(&[1u8]) {
> @@ -248,7 +251,6 @@ impl Reloadable for tokio::net::TcpListener {
>  pub async fn create_daemon<F, S>(
>      address: std::net::SocketAddr,
>      create_service: F,
> -    service_name: &str,
>  ) -> Result<(), Error>
>  where
>      F: FnOnce(tokio::net::TcpListener) -> Result<S, Error>,
> @@ -289,7 +291,10 @@ where
>          if let Err(e) = systemd_notify(SystemdNotify::Reloading) {
>              log::error!("failed to notify systemd about the state change: {}", e);
>          }
> -        wait_service_is_state(service_name, "reloading").await?;
> +        if let Err(e) = systemd_notify_barrier() {
> +            log::error!("failed to wait on systemd-processing: {}", e);
> +        }
> +
>          if let Err(e) = reloader.take().unwrap().fork_restart() {
>              log::error!("error during reload: {}", e);
>              let _ = systemd_notify(SystemdNotify::Status("error during reload".to_string()));
> @@ -302,51 +307,14 @@ where
>          future.await;
>      }
>  
> -    // FIXME: this is a hack, replace with sd_notify_barrier when available
> -    if crate::is_reload_request() {
> -        wait_service_is_not_state(service_name, "reloading").await?;
> -    }
> -
>      log::info!("daemon shut down.");
>      Ok(())
>  }
>  
> -// hack, do not use if unsure!
> -async fn get_service_state(service: &str) -> Result<String, Error> {
> -    let text = match tokio::process::Command::new("systemctl")
> -        .args(&["is-active", service])
> -        .output()
> -        .await
> -    {
> -        Ok(output) => match String::from_utf8(output.stdout) {
> -            Ok(text) => text,
> -            Err(err) => bail!("output of 'systemctl is-active' not valid UTF-8 - {}", err),
> -        },
> -        Err(err) => bail!("executing 'systemctl is-active' failed - {}", err),
> -    };
> -
> -    Ok(text.trim().trim_start().to_string())
> -}
> -
> -async fn wait_service_is_state(service: &str, state: &str) -> Result<(), Error> {
> -    tokio::time::sleep(std::time::Duration::new(1, 0)).await;
> -    while get_service_state(service).await? != state {
> -        tokio::time::sleep(std::time::Duration::new(5, 0)).await;
> -    }
> -    Ok(())
> -}
> -
> -async fn wait_service_is_not_state(service: &str, state: &str) -> Result<(), Error> {
> -    tokio::time::sleep(std::time::Duration::new(1, 0)).await;
> -    while get_service_state(service).await? == state {
> -        tokio::time::sleep(std::time::Duration::new(5, 0)).await;
> -    }
> -    Ok(())
> -}
> -
>  #[link(name = "systemd")]
>  extern "C" {
>      fn sd_notify(unset_environment: c_int, state: *const c_char) -> c_int;
> +    fn sd_notify_barrier(unset_environment: c_int, timeout: u64) -> c_int;
>  }
>  
>  /// Systemd sercice startup states (see: ``man sd_notify``)
> @@ -358,6 +326,19 @@ pub enum SystemdNotify {
>      MainPid(nix::unistd::Pid),
>  }
>  
> +/// Waits until all previously sent messages with sd_notify are processed
> +pub fn systemd_notify_barrier() -> Result<(), Error> {
> +    let rc = unsafe { sd_notify_barrier(0, u64::MAX) }; // infinite timeout
> +    if rc < 0 {
> +        bail!(
> +            "systemd_notify_barrier failed: {}",
> +            std::io::Error::from_raw_os_error(-rc),
> +        );

single line for above would works out nicely for our <= 100 cc in rust here :)

> +    }
> +
> +    Ok(())
> +}
> +
>  /// Tells systemd the startup state of the service (see: ``man sd_notify``)
>  pub fn systemd_notify(state: SystemdNotify) -> Result<(), Error> {
>  
> diff --git a/src/bin/proxmox-backup-api.rs b/src/bin/proxmox-backup-api.rs
> index 35cfc5f0..97b7a5e8 100644
> --- a/src/bin/proxmox-backup-api.rs
> +++ b/src/bin/proxmox-backup-api.rs
> @@ -119,7 +119,6 @@ async fn run() -> Result<(), Error> {
>                      .await
>              })
>          },
> -        "proxmox-backup.service",
>      );
>  
>      proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
> diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs
> index a98d4c1f..a548b535 100644
> --- a/src/bin/proxmox-backup-proxy.rs
> +++ b/src/bin/proxmox-backup-proxy.rs
> @@ -262,7 +262,6 @@ async fn run() -> Result<(), Error> {
>                      .await
>              })
>          },
> -        "proxmox-backup-proxy.service",
>      );
>  
>      proxmox_rest_server::write_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?;
> 






More information about the pbs-devel mailing list