[pbs-devel] [PATCH proxmox-backup 6/9] file-restore-daemon: watchdog: add inhibit for long downloads

Stefan Reiter s.reiter at proxmox.com
Thu May 6 17:26:21 CEST 2021


The extract API call may be active for more than the watchdog timeout,
so a simple ping is not enough.

This adds an "inhibit" API, which will stop the watchdog from completing
as long as at least one WatchdogInhibitor instance is alive. Keep one in
the download task, so it will be dropped once it completes (or errors).

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
 src/bin/proxmox_restore_daemon/api.rs      |  8 ++++++--
 src/bin/proxmox_restore_daemon/watchdog.rs | 24 +++++++++++++++++++++-
 2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/src/bin/proxmox_restore_daemon/api.rs b/src/bin/proxmox_restore_daemon/api.rs
index 5aeb69f3..c578b2c0 100644
--- a/src/bin/proxmox_restore_daemon/api.rs
+++ b/src/bin/proxmox_restore_daemon/api.rs
@@ -25,7 +25,7 @@ use proxmox_backup::tools::{self, fs::read_subdir, zip::zip_directory};
 
 use pxar::encoder::aio::TokioWriter;
 
-use super::{disk::ResolveResult, watchdog_remaining, watchdog_ping};
+use super::{disk::ResolveResult, watchdog_remaining, watchdog_inhibit, watchdog_ping};
 
 // NOTE: All API endpoints must have Permission::Superuser, as the configs for authentication do
 // not exist within the restore VM. Safety is guaranteed by checking a ticket via a custom ApiAuth.
@@ -248,8 +248,10 @@ fn extract(
     _info: &ApiMethod,
     _rpcenv: Box<dyn RpcEnvironment>,
 ) -> ApiResponseFuture {
-    watchdog_ping();
+    // download can take longer than watchdog timeout, inhibit until done
+    let _inhibitor = watchdog_inhibit();
     async move {
+        let _inhibitor = _inhibitor;
         let path = tools::required_string_param(&param, "path")?;
         let mut path = base64::decode(path)?;
         if let Some(b'/') = path.last() {
@@ -283,6 +285,7 @@ fn extract(
 
         if pxar {
             tokio::spawn(async move {
+                let _inhibitor = _inhibitor;
                 let result = async move {
                     // pxar always expects a directory as it's root, so to accommodate files as
                     // well we encode the parent dir with a filter only matching the target instead
@@ -340,6 +343,7 @@ fn extract(
             });
         } else {
             tokio::spawn(async move {
+                let _inhibitor = _inhibitor;
                 let result = async move {
                     if vm_path.is_dir() {
                         zip_directory(&mut writer, &vm_path).await?;
diff --git a/src/bin/proxmox_restore_daemon/watchdog.rs b/src/bin/proxmox_restore_daemon/watchdog.rs
index 399f99a7..24997809 100644
--- a/src/bin/proxmox_restore_daemon/watchdog.rs
+++ b/src/bin/proxmox_restore_daemon/watchdog.rs
@@ -4,6 +4,9 @@ use proxmox::tools::time::epoch_i64;
 
 const TIMEOUT: i64 = 600; // seconds
 static TRIGGERED: AtomicI64 = AtomicI64::new(0);
+static INHIBITORS: AtomicI64 = AtomicI64::new(0);
+
+pub struct WatchdogInhibitor {}
 
 fn handle_expired() -> ! {
     use nix::sys::reboot;
@@ -37,5 +40,24 @@ pub fn watchdog_ping() {
 
 /// Returns the remaining time before watchdog expiry in seconds
 pub fn watchdog_remaining() -> i64 {
-    TIMEOUT - (epoch_i64() - TRIGGERED.load(Ordering::Acquire))
+    if INHIBITORS.load(Ordering::Acquire) > 0 {
+        TIMEOUT
+    } else {
+        TIMEOUT - (epoch_i64() - TRIGGERED.load(Ordering::Acquire))
+    }
+}
+
+/// Returns an object that inhibts watchdog expiry for its lifetime, it will issue a ping on Drop
+pub fn watchdog_inhibit() -> WatchdogInhibitor {
+    let prev = INHIBITORS.fetch_add(1, Ordering::AcqRel);
+    log::info!("Inhibit added: {}", prev + 1);
+    WatchdogInhibitor {}
+}
+
+impl Drop for WatchdogInhibitor {
+    fn drop(&mut self) {
+        watchdog_ping();
+        let prev = INHIBITORS.fetch_sub(1, Ordering::AcqRel);
+        log::info!("Inhibit dropped: {}", prev - 1);
+    }
 }
-- 
2.20.1






More information about the pbs-devel mailing list