[pbs-devel] [PATCH proxmox-backup] daemon: add hack for sd_notify
Dominik Csapak
d.csapak at proxmox.com
Tue Nov 10 16:31:03 CET 2020
sd_notify is not synchronous, iow. it only waits until the message
reaches the queue not until it is processed by systemd
apparently, messages from different process will not be processed
in the order they enter the queue(s?) so there is a slight chance
that the setting of the MAINPID will be processed after the READY=1
from the new child (especially when multiple services are reloaded,
like it happens on package upgrade)
to mitigate this, we start a background task on the child to
send multiple READY=1 messages until the service is active
and on the parent we wait for the same without sending the message
(this is necssary for the MAINPID= message to be correctly
associated with the old MAINPID)
since there is no (AFAICS) library interface to check the unit status,
we use 'systemctl is-active <SERVICE_NAME>' to check...
on newer systemd versions, there is 'sd_notify_barrier' which would
allow us to wait for systemd to have all messages from the current
pid to be processed before acknowledging to the child, but on buster
the systemd version is to old...
Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
src/bin/proxmox-backup-api.rs | 11 +++++++++++
src/bin/proxmox-backup-proxy.rs | 11 +++++++++++
src/tools/daemon.rs | 29 +++++++++++++++++++++++++++++
3 files changed, 51 insertions(+)
diff --git a/src/bin/proxmox-backup-api.rs b/src/bin/proxmox-backup-api.rs
index 7d59717b..77daa44a 100644
--- a/src/bin/proxmox-backup-api.rs
+++ b/src/bin/proxmox-backup-api.rs
@@ -58,6 +58,8 @@ async fn run() -> Result<(), Error> {
let rest_server = RestServer::new(config);
+ const SERVICE_NAME: &str = "proxmox-backup.service";
+
// http server future:
let server = daemon::create_daemon(
([127,0,0,1], 82).into(),
@@ -76,11 +78,20 @@ async fn run() -> Result<(), Error> {
})
)
},
+ SERVICE_NAME,
);
server::write_pid(buildcfg::PROXMOX_BACKUP_API_PID_FN)?;
daemon::systemd_notify(daemon::SystemdNotify::Ready)?;
+ // dirty hack! replace with sd_notify_barrier when available
+ tokio::spawn(async move {
+ select!{
+ _ = daemon::check_service_is_active(SERVICE_NAME, true).fuse() => {},
+ _ = server::shutdown_future().fuse() => {},
+ };
+ });
+
let init_result: Result<(), Error> = try_block!({
server::register_task_control_commands(&mut commando_sock)?;
commando_sock.spawn()?;
diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs
index 04c976b5..433d355f 100644
--- a/src/bin/proxmox-backup-proxy.rs
+++ b/src/bin/proxmox-backup-proxy.rs
@@ -116,6 +116,8 @@ async fn run() -> Result<(), Error> {
let acceptor = Arc::new(acceptor.build());
+ const SERVICE_NAME: &str = "proxmox-backup-proxy.service";
+
let server = daemon::create_daemon(
([0,0,0,0,0,0,0,0], 8007).into(),
|listener, ready| {
@@ -133,11 +135,20 @@ async fn run() -> Result<(), Error> {
.map(|_| ())
)
},
+ SERVICE_NAME,
);
server::write_pid(buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)?;
daemon::systemd_notify(daemon::SystemdNotify::Ready)?;
+ // dirty hack! replace with sd_notify_barrier when available
+ tokio::spawn(async move {
+ select!{
+ _ = daemon::check_service_is_active(SERVICE_NAME, true).fuse() => {},
+ _ = server::shutdown_future().fuse() => {},
+ };
+ });
+
let init_result: Result<(), Error> = try_block!({
server::register_task_control_commands(&mut commando_sock)?;
commando_sock.spawn()?;
diff --git a/src/tools/daemon.rs b/src/tools/daemon.rs
index 249ce2ad..257247ed 100644
--- a/src/tools/daemon.rs
+++ b/src/tools/daemon.rs
@@ -260,6 +260,7 @@ impl Future for NotifyReady {
pub async fn create_daemon<F, S>(
address: std::net::SocketAddr,
create_service: F,
+ service_name: &str,
) -> Result<(), Error>
where
F: FnOnce(tokio::net::TcpListener, NotifyReady) -> Result<S, Error>,
@@ -301,10 +302,38 @@ where
if let Some(future) = finish_future {
future.await;
}
+
+ // dirty hack, replace with sd_notify_barrier when available
+ if server::is_reload_request() {
+ check_service_is_active(service_name, false).await?;
+ }
log::info!("daemon shut down...");
Ok(())
}
+pub async fn check_service_is_active(service: &str, send_ready: bool) -> Result<(), Error> {
+ for _ in 0..5 {
+ tokio::time::delay_for(std::time::Duration::new(5, 0)).await;
+ if let Ok(output) = tokio::process::Command::new("systemctl")
+ .args(&["is-active", service])
+ .output()
+ .await
+ {
+ if let Ok(text) = String::from_utf8(output.stdout) {
+ if text.trim().trim_start() == "active" {
+ return Ok(());
+ }
+ }
+ }
+
+ if send_ready {
+ systemd_notify(SystemdNotify::Ready)?;
+ }
+ }
+
+ Ok(())
+}
+
#[link(name = "systemd")]
extern "C" {
fn sd_notify(unset_environment: c_int, state: *const c_char) -> c_int;
--
2.20.1
More information about the pbs-devel
mailing list