[pbs-devel] [PATCH v1 proxmox-backup 04/14] add do_verification_job function to verify.rs

Dominik Csapak d.csapak at proxmox.com
Thu Oct 1 12:40:12 CEST 2020


high-level: i am not sure i would put that code here

it makes the verify code even more intertwined with the worker-tasks
which is not something that is optimal (the verify code should have no
dependency on a worker task imho)

some comments inline

On 9/25/20 10:43 AM, Hannes Laimer wrote:
> Signed-off-by: Hannes Laimer <h.laimer at proxmox.com>
> ---
>   src/backup/verify.rs | 91 +++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 90 insertions(+), 1 deletion(-)
> 
> diff --git a/src/backup/verify.rs b/src/backup/verify.rs
> index 1fad6187..1f54ebeb 100644
> --- a/src/backup/verify.rs
> +++ b/src/backup/verify.rs
> @@ -5,8 +5,10 @@ use std::time::Instant;
>   
>   use anyhow::{bail, format_err, Error};
>   
> -use crate::server::WorkerTask;
> +use crate::server::{WorkerTask, TaskState};
>   use crate::api2::types::*;
> +use crate::config::jobstate::Job;
> +use crate::config::verify::VerifyJobConfig;
>   
>   use super::{
>       DataStore, DataBlob, BackupGroup, BackupDir, BackupInfo, IndexFile,
> @@ -432,3 +434,90 @@ pub fn verify_all_backups(datastore: Arc<DataStore>, worker: Arc<WorkerTask>) ->
>   
>       Ok(errors)
>   }
> +
> +/// Runs a verification job.
> +pub fn do_verification_job(
> +    mut job: Job,
> +    verify_job: VerifyJobConfig,
> +    userid: &Userid,
> +    schedule: Option<String>,
> +) -> Result<String, Error> {
> +    let datastore = DataStore::lookup_datastore(&verify_job.store)?;
> +
> +    let mut backups_to_verify = BackupInfo::list_backups(&datastore.base_path())?;
> +
> +    if verify_job.ignore_verified {
> +        backups_to_verify.retain(|backup_info| {
> +            if let Ok((manifest, _)) = datastore.load_manifest(&backup_info.backup_dir) {
> +                let verify = manifest.unprotected["verify_state"].clone();
> +                if let Ok(verify) = serde_json::from_value::<SnapshotVerifyState>(verify) {
> +                    let days_since_last_verify =
> +                        (proxmox::tools::time::epoch_i64() - verify.upid.starttime) / 86400;
> +                    // if last verification failed we have to verify again since it might be fixed OR
> +                    // if outdated_after is None, verifications do not become outdated
> +                    verify.state == VerifyState::Failed || (verify_job.outdated_after.is_some()
> +                        && days_since_last_verify > verify_job.outdated_after.unwrap())

after talking some days ago with thomas, it may not be the best way to
try to re-verify failed backups so maybe we can simply drop this and 
only verify 'non-verified' backups


> +                } else { true } // was never verified, therefore we always want to verify
> +            } else { false } // manifest could not be loaded, do not verify in that case
> +        })
> +    }
> +
> +    let job_id = job.jobname().to_string();
> +    let worker_type = job.jobtype().to_string();
> +
> +    let upid_str = WorkerTask::new_thread(
> +        &worker_type,
> +        Some(job.jobname().to_string()),
> +        userid.clone(),
> +        false,
> +        move |worker| {
> +            job.start(&worker.upid().to_string())?;
> +
> +            let verified_chunks = Arc::new(Mutex::new(HashSet::with_capacity(1024 * 16)));
> +            let corrupt_chunks = Arc::new(Mutex::new(HashSet::with_capacity(64)));
> +
> +            worker.log(format!("Starting datastore verify job '{}'", job_id));
> +            if let Some(event_str) = schedule {
> +                worker.log(format!("task triggered by schedule '{}'", event_str));
> +            }
> +
> +            let mut failed_dirs: Vec<String> = Vec::new();
> +            for backup_info in backups_to_verify {
> +                match verify_backup_dir(
> +                    datastore.clone(),
> +                    &backup_info.backup_dir,
> +                    verified_chunks.clone(),
> +                    corrupt_chunks.clone(),
> +                    worker.clone(),
> +                ) {
> +                    Ok(false) => failed_dirs.push(backup_info.backup_dir.to_string()),
> +                    Err(err) => {
> +                        let endtime = proxmox::tools::time::epoch_i64();
> +                        job.finish(TaskState::Error {
> +                            message: err.to_string(),
> +                            endtime
> +                        })?;
> +                        bail!(err.to_string());

here you can use our 'try_block' macro, i would it do it like so:

let result = proxmox::try_block!({
   // basically the whole 'real' worker code
   // this can use '?'/bail to bubble up errors
});

// here goes only one regular job.finish call

> +                    },
> +                    _ => {}
> +                }
> +            }
> +            if !failed_dirs.is_empty() {
> +                worker.log("Failed to verify following snapshots:");
> +                for dir in failed_dirs {
> +                    worker.log(format!("\t{}", dir));
> +                }
> +                let endtime = proxmox::tools::time::epoch_i64();
> +                job.finish(TaskState::Error {
> +                    message: String::from("verification failed - please check the log for details"),
> +                    endtime
> +                })?;
> +                bail!("verification failed - please check the log for details");
> +            }
> +            let endtime = proxmox::tools::time::epoch_i64();
> +            job.finish(TaskState::OK { endtime })?;
> +            Ok(())
> +        })?;
> +
> +    Ok(upid_str)
> +}
> 






More information about the pbs-devel mailing list