[pbs-devel] [RFC proxmox-backup 5/8] server: implement sanity check job

Christian Ebner c.ebner at proxmox.com
Wed Dec 13 16:38:16 CET 2023


Adds the sanity check job execution logic and implements a check for
the datastore usage levels exceeding the config values threshold
level.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
 src/server/mod.rs              |   3 +
 src/server/sanity_check_job.rs | 131 +++++++++++++++++++++++++++++++++
 2 files changed, 134 insertions(+)
 create mode 100644 src/server/sanity_check_job.rs

diff --git a/src/server/mod.rs b/src/server/mod.rs
index 4e3b68ac..b3fdc281 100644
--- a/src/server/mod.rs
+++ b/src/server/mod.rs
@@ -25,6 +25,9 @@ pub use gc_job::*;
 mod realm_sync_job;
 pub use realm_sync_job::*;
 
+mod sanity_check_job;
+pub use sanity_check_job::*;
+
 mod email_notifications;
 pub use email_notifications::*;
 
diff --git a/src/server/sanity_check_job.rs b/src/server/sanity_check_job.rs
new file mode 100644
index 00000000..a68b4bfd
--- /dev/null
+++ b/src/server/sanity_check_job.rs
@@ -0,0 +1,131 @@
+use std::sync::Arc;
+
+use anyhow::{format_err, Error};
+
+use proxmox_human_byte::HumanByte;
+use proxmox_sys::{task_error, task_log};
+
+use pbs_api_types::{
+    Authid, Operation, SanityCheckJobOptions, Userid, DATASTORE_USAGE_FULL_THRESHOLD_DEFAULT,
+};
+use pbs_datastore::DataStore;
+use proxmox_rest_server::WorkerTask;
+
+use crate::server::{jobstate::Job, lookup_user_email};
+
+pub fn check_datastore_usage_full_threshold(
+    worker: Arc<WorkerTask>,
+    sanity_check_options: SanityCheckJobOptions,
+) -> Result<Vec<String>, Error> {
+    let (config, _digest) = pbs_config::datastore::config()?;
+    let threshold = sanity_check_options
+        .datastore_usage_full_threshold
+        .unwrap_or(DATASTORE_USAGE_FULL_THRESHOLD_DEFAULT);
+    let mut errors = Vec::new();
+
+    task_log!(
+        worker,
+        "Checking datastore usage levels with {threshold}% threshold ..."
+    );
+    for (store, (_, _)) in &config.sections {
+        let datastore = match DataStore::lookup_datastore(store, Some(Operation::Read)) {
+            Ok(datastore) => datastore,
+            Err(err) => {
+                let msg = format!("failed to lookup datastore - {err}");
+                task_error!(worker, "{msg}");
+                errors.push(msg);
+                continue;
+            }
+        };
+
+        let status = match proxmox_sys::fs::fs_info(&datastore.base_path()) {
+            Ok(status) => status,
+            Err(err) => {
+                let msg = format!("failed to get datastore status - {err}");
+                task_error!(worker, "{msg}");
+                errors.push(msg);
+                continue;
+            }
+        };
+
+        let used = (status.used as f64 / status.total as f64 * 100f64).trunc() as u8;
+        if used >= threshold {
+            let msg = format!(
+                "Datastore '{store}' exceeded usage threshold!\n  used {} of {} ({used}%)",
+                HumanByte::from(status.used),
+                HumanByte::from(status.total),
+            );
+            task_error!(worker, "{msg}");
+            errors.push(msg);
+        } else {
+            task_log!(
+                worker,
+                "Datastore '{store}' below usage threshold, used {} of {} ({used}%)",
+                HumanByte::from(status.used),
+                HumanByte::from(status.total),
+            );
+        }
+    }
+
+    Ok(errors)
+}
+
+pub fn do_sanity_check_job(
+    mut job: Job,
+    sanity_check_options: SanityCheckJobOptions,
+    auth_id: &Authid,
+    schedule: Option<String>,
+) -> Result<String, Error> {
+    let worker_type = job.jobtype().to_string();
+    let auth_id = auth_id.clone();
+
+    let notify_user = sanity_check_options
+        .notify_user
+        .as_ref()
+        .unwrap_or_else(|| Userid::root_userid());
+    let email = lookup_user_email(notify_user);
+
+    let upid_str = WorkerTask::new_thread(
+        &worker_type,
+        Some(job.jobname().to_string()),
+        auth_id.to_string(),
+        false,
+        move |worker| {
+            job.start(&worker.upid().to_string())?;
+
+            task_log!(worker, "sanity check job '{}'", job.jobname());
+
+            if let Some(event_str) = schedule {
+                task_log!(worker, "task triggered by schedule '{event_str}'");
+            }
+
+            let result = check_datastore_usage_full_threshold(worker.clone(), sanity_check_options);
+            let job_result = match result {
+                Ok(ref errors) if errors.is_empty() => Ok(()),
+                Ok(_) => Err(format_err!(
+                    "sanity check failed - please check the log for details"
+                )),
+                Err(_) => Err(format_err!("sanity check failed - job aborted")),
+            };
+
+            let status = worker.create_state(&job_result);
+
+            if let Err(err) = job.finish(status) {
+                eprintln!("could not finish job state for {}: {err}", job.jobtype());
+            }
+
+            if let Some(email) = email {
+                task_log!(worker, "sending notification email to '{email}'");
+                if let Err(err) =
+                    crate::server::send_sanity_check_status(&email, None, job.jobname(), &result)
+                {
+                    log::error!("send sanity check notification failed: {err}");
+                }
+            }
+
+            job_result
+        },
+    )?;
+
+    Ok(upid_str)
+}
-- 
2.39.2





More information about the pbs-devel mailing list