[pbs-devel] [RFC proxmox-backup 5/8] server: implement sanity check job
Christian Ebner
c.ebner at proxmox.com
Wed Dec 13 16:38:16 CET 2023
Adds the sanity check job execution logic and implements a check for
the datastore usage levels exceeding the config values threshold
level.
Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
src/server/mod.rs | 3 +
src/server/sanity_check_job.rs | 131 +++++++++++++++++++++++++++++++++
2 files changed, 134 insertions(+)
create mode 100644 src/server/sanity_check_job.rs
diff --git a/src/server/mod.rs b/src/server/mod.rs
index 4e3b68ac..b3fdc281 100644
--- a/src/server/mod.rs
+++ b/src/server/mod.rs
@@ -25,6 +25,9 @@ pub use gc_job::*;
mod realm_sync_job;
pub use realm_sync_job::*;
+mod sanity_check_job;
+pub use sanity_check_job::*;
+
mod email_notifications;
pub use email_notifications::*;
diff --git a/src/server/sanity_check_job.rs b/src/server/sanity_check_job.rs
new file mode 100644
index 00000000..a68b4bfd
--- /dev/null
+++ b/src/server/sanity_check_job.rs
@@ -0,0 +1,131 @@
+use std::sync::Arc;
+
+use anyhow::{format_err, Error};
+
+use proxmox_human_byte::HumanByte;
+use proxmox_sys::{task_error, task_log};
+
+use pbs_api_types::{
+ Authid, Operation, SanityCheckJobOptions, Userid, DATASTORE_USAGE_FULL_THRESHOLD_DEFAULT,
+};
+use pbs_datastore::DataStore;
+use proxmox_rest_server::WorkerTask;
+
+use crate::server::{jobstate::Job, lookup_user_email};
+
+pub fn check_datastore_usage_full_threshold(
+ worker: Arc<WorkerTask>,
+ sanity_check_options: SanityCheckJobOptions,
+) -> Result<Vec<String>, Error> {
+ let (config, _digest) = pbs_config::datastore::config()?;
+ let threshold = sanity_check_options
+ .datastore_usage_full_threshold
+ .unwrap_or(DATASTORE_USAGE_FULL_THRESHOLD_DEFAULT);
+ let mut errors = Vec::new();
+
+ task_log!(
+ worker,
+ "Checking datastore usage levels with {threshold}% threshold ..."
+ );
+ for (store, (_, _)) in &config.sections {
+ let datastore = match DataStore::lookup_datastore(store, Some(Operation::Read)) {
+ Ok(datastore) => datastore,
+ Err(err) => {
+ let msg = format!("failed to lookup datastore - {err}");
+ task_error!(worker, "{msg}");
+ errors.push(msg);
+ continue;
+ }
+ };
+
+ let status = match proxmox_sys::fs::fs_info(&datastore.base_path()) {
+ Ok(status) => status,
+ Err(err) => {
+ let msg = format!("failed to get datastore status - {err}");
+ task_error!(worker, "{msg}");
+ errors.push(msg);
+ continue;
+ }
+ };
+
+ let used = (status.used as f64 / status.total as f64 * 100f64).trunc() as u8;
+ if used >= threshold {
+ let msg = format!(
+ "Datastore '{store}' exceeded usage threshold!\n used {} of {} ({used}%)",
+ HumanByte::from(status.used),
+ HumanByte::from(status.total),
+ );
+ task_error!(worker, "{msg}");
+ errors.push(msg);
+ } else {
+ task_log!(
+ worker,
+ "Datastore '{store}' below usage threshold, used {} of {} ({used}%)",
+ HumanByte::from(status.used),
+ HumanByte::from(status.total),
+ );
+ }
+ }
+
+ Ok(errors)
+}
+
+pub fn do_sanity_check_job(
+ mut job: Job,
+ sanity_check_options: SanityCheckJobOptions,
+ auth_id: &Authid,
+ schedule: Option<String>,
+) -> Result<String, Error> {
+ let worker_type = job.jobtype().to_string();
+ let auth_id = auth_id.clone();
+
+ let notify_user = sanity_check_options
+ .notify_user
+ .as_ref()
+ .unwrap_or_else(|| Userid::root_userid());
+ let email = lookup_user_email(notify_user);
+
+ let upid_str = WorkerTask::new_thread(
+ &worker_type,
+ Some(job.jobname().to_string()),
+ auth_id.to_string(),
+ false,
+ move |worker| {
+ job.start(&worker.upid().to_string())?;
+
+ task_log!(worker, "sanity check job '{}'", job.jobname());
+
+ if let Some(event_str) = schedule {
+ task_log!(worker, "task triggered by schedule '{event_str}'");
+ }
+
+ let result = check_datastore_usage_full_threshold(worker.clone(), sanity_check_options);
+ let job_result = match result {
+ Ok(ref errors) if errors.is_empty() => Ok(()),
+ Ok(_) => Err(format_err!(
+ "sanity check failed - please check the log for details"
+ )),
+ Err(_) => Err(format_err!("sanity check failed - job aborted")),
+ };
+
+ let status = worker.create_state(&job_result);
+
+ if let Err(err) = job.finish(status) {
+ eprintln!("could not finish job state for {}: {err}", job.jobtype());
+ }
+
+ if let Some(email) = email {
+ task_log!(worker, "sending notification email to '{email}'");
+ if let Err(err) =
+ crate::server::send_sanity_check_status(&email, None, job.jobname(), &result)
+ {
+ log::error!("send sanity check notification failed: {err}");
+ }
+ }
+
+ job_result
+ },
+ )?;
+
+ Ok(upid_str)
+}
--
2.39.2
More information about the pbs-devel
mailing list