[pbs-devel] [RFC v2 proxmox-backup 35/36] test-suite: add detection mode change benchmark

Wed Mar 13 12:48:16 CET 2024

On March 5, 2024 10:27 am, Christian Ebner wrote:
> Introduces the proxmox-backup-test-suite create intended for
> benchmarking and high level user facing testing.
> 
> The initial code includes a benchmark intended for regression testing of
> the proxmox-backup-client when using different file detection modes
> during backup.
> 
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> changes since version 1:
> - no changes
> 
>  Cargo.toml                                    |   1 +
>  proxmox-backup-test-suite/Cargo.toml          |  18 ++
>  .../src/detection_mode_bench.rs               | 294 ++++++++++++++++++
>  proxmox-backup-test-suite/src/main.rs         |  17 +
>  4 files changed, 330 insertions(+)
>  create mode 100644 proxmox-backup-test-suite/Cargo.toml
>  create mode 100644 proxmox-backup-test-suite/src/detection_mode_bench.rs
>  create mode 100644 proxmox-backup-test-suite/src/main.rs
> 
> diff --git a/Cargo.toml b/Cargo.toml
> index 00dc4d86..76635b4e 100644
> --- a/Cargo.toml
> +++ b/Cargo.toml
> @@ -45,6 +45,7 @@ members = [
>      "proxmox-restore-daemon",
>  
>      "pxar-bin",
> +    "proxmox-backup-test-suite",
>  ]
>  
>  [lib]
> diff --git a/proxmox-backup-test-suite/Cargo.toml b/proxmox-backup-test-suite/Cargo.toml
> new file mode 100644
> index 00000000..3f899e9b
> --- /dev/null
> +++ b/proxmox-backup-test-suite/Cargo.toml
> @@ -0,0 +1,18 @@
> +[package]
> +name = "proxmox-backup-test-suite"
> +version = "0.1.0"
> +authors.workspace = true
> +edition.workspace = true
> +
> +[dependencies]
> +anyhow.workspace = true
> +futures.workspace = true
> +serde.workspace = true
> +serde_json.workspace = true
> +
> +pbs-client.workspace = true
> +pbs-key-config.workspace = true
> +pbs-tools.workspace = true
> +proxmox-async.workspace = true
> +proxmox-router = { workspace = true, features = ["cli"] }
> +proxmox-schema = { workspace = true, features = [ "api-macro" ] }
> diff --git a/proxmox-backup-test-suite/src/detection_mode_bench.rs b/proxmox-backup-test-suite/src/detection_mode_bench.rs
> new file mode 100644
> index 00000000..9a3c7680
> --- /dev/null
> +++ b/proxmox-backup-test-suite/src/detection_mode_bench.rs
> @@ -0,0 +1,294 @@
> +use std::path::Path;
> +use std::process::Command;
> +use std::{thread, time};
> +
> +use anyhow::{bail, format_err, Error};
> +use serde_json::Value;
> +
> +use pbs_client::{
> +    tools::{complete_repository, key_source::KEYFILE_SCHEMA, REPO_URL_SCHEMA},
> +    BACKUP_SOURCE_SCHEMA,
> +};
> +use pbs_tools::json;
> +use proxmox_router::cli::*;
> +use proxmox_schema::api;
> +
> +const DEFAULT_NUMBER_OF_RUNS: u64 = 5;
> +// Homepage https://cocodataset.org/
> +const COCO_DATASET_SRC_URL: &'static str = "http://images.cocodataset.org/zips/unlabeled2017.zip";
> +// Homepage https://kernel.org/
> +const LINUX_GIT_REPOSITORY: &'static str =
> +    "git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git";
> +const LINUX_GIT_TAG: &'static str = "v6.5.5";
> +
> +pub(crate) fn detection_mode_bench_mgtm_cli() -> CliCommandMap {
> +    let run_cmd_def = CliCommand::new(&API_METHOD_DETECTION_MODE_BENCH_RUN)
> +        .arg_param(&["backupspec"])
> +        .completion_cb("repository", complete_repository)
> +        .completion_cb("keyfile", complete_file_name);
> +
> +    let prepare_cmd_def = CliCommand::new(&API_METHOD_DETECTION_MODE_BENCH_PREPARE);
> +    CliCommandMap::new()
> +        .insert("prepare", prepare_cmd_def)
> +        .insert("run", run_cmd_def)
> +}
> +
> +#[api(
> +   input: {
> +       properties: {
> +           backupspec: {
> +               type: Array,
> +               description: "List of backup source specifications ([<label.ext>:<path>] ...)",
> +               items: {
> +                   schema: BACKUP_SOURCE_SCHEMA,
> +               }
> +           },
> +           repository: {
> +               schema: REPO_URL_SCHEMA,
> +               optional: true,
> +           },
> +           keyfile: {
> +               schema: KEYFILE_SCHEMA,
> +               optional: true,
> +           },
> +           "number-of-runs": {
> +               description: "Number of times to repeat the run",
> +               type: Integer,
> +               optional: true,
> +           },
> +       }
> +   }
> +)]
> +/// Run benchmark to compare performance for backups using different change detection modes.
> +fn detection_mode_bench_run(param: Value) -> Result<(), Error> {
> +    let mut pbc = Command::new("proxmox-backup-client");
> +    pbc.arg("backup");
> +
> +    let backupspec_list = json::required_array_param(&param, "backupspec")?;
> +    for backupspec in backupspec_list {
> +        let arg = backupspec
> +            .as_str()
> +            .ok_or_else(|| format_err!("failed to parse backupspec"))?;
> +        pbc.arg(arg);
> +    }
> +
> +    if let Some(repo) = param["repository"].as_str() {
> +        pbc.arg("--repository");
> +        pbc.arg::<&str>(repo);
> +    }
> +
> +    if let Some(keyfile) = param["keyfile"].as_str() {
> +        pbc.arg("--keyfile");
> +        pbc.arg::<&str>(keyfile);
> +    }
> +
> +    let number_of_runs = match param["number_of_runs"].as_u64() {
> +        Some(n) => n,
> +        None => DEFAULT_NUMBER_OF_RUNS,
> +    };
> +    if number_of_runs < 1 {
> +        bail!("Number of runs must be greater than 1, aborting.");
> +    }
> +
> +    // First run is an initial run to make sure all chunks are present already, reduce side effects
> +    // by filesystem caches ecc.
> +    let _stats_initial = do_run(&mut pbc, 1)?;

this run here

> +
> +    println!("\nStarting benchmarking backups with regular detection mode...\n");
> +    let stats_reg = do_run(&mut pbc, number_of_runs)?;
> +
> +    // Make sure to have a valid reference with catalog fromat version 2
> +    pbc.arg("--change-detection-mode=metadata");
> +    let _stats_initial = do_run(&mut pbc, 1)?;

and this run here

make the output a bit confusing to read, maybe they could get their own
intro line?

> +    println!("\nStarting benchmarking backups with metadata detection mode...\n");
> +    let stats_meta = do_run(&mut pbc, number_of_runs)?;
> +
> +    println!("\nCompleted benchmark with {number_of_runs} runs for each tested mode.");
> +    println!("\nCompleted regular backup with:");
> +    println!("Total runtime: {:.2} s", stats_reg.total);
> +    println!("Average: {:.2} ± {:.2} s", stats_reg.avg, stats_reg.stddev);
> +    println!("Min: {:.2} s", stats_reg.min);
> +    println!("Max: {:.2} s", stats_reg.max);
> +
> +    println!("\nCompleted metadata detection mode backup with:");
> +    println!("Total runtime: {:.2} s", stats_meta.total);
> +    println!(
> +        "Average: {:.2} ± {:.2} s",
> +        stats_meta.avg, stats_meta.stddev
> +    );
> +    println!("Min: {:.2} s", stats_meta.min);
> +    println!("Max: {:.2} s", stats_meta.max);
> +
> +    let diff_stddev =
> +        ((stats_meta.stddev * stats_meta.stddev) + (stats_reg.stddev * stats_reg.stddev)).sqrt();
> +    println!("\nDifferences (metadata based - regular):");
> +    println!(
> +        "Delta total runtime: {:.2} s ({:.2} %)",
> +        stats_meta.total - stats_reg.total,
> +        100.0 * (stats_meta.total / stats_reg.total - 1.0),
> +    );
> +    println!(
> +        "Delta average: {:.2} ± {:.2} s ({:.2} %)",
> +        stats_meta.avg - stats_reg.avg,
> +        diff_stddev,
> +        100.0 * (stats_meta.avg / stats_reg.avg - 1.0),
> +    );
> +    println!(
> +        "Delta min: {:.2} s ({:.2} %)",
> +        stats_meta.min - stats_reg.min,
> +        100.0 * (stats_meta.min / stats_reg.min - 1.0),
> +    );
> +    println!(
> +        "Delta max: {:.2} s ({:.2} %)",
> +        stats_meta.max - stats_reg.max,
> +        100.0 * (stats_meta.max / stats_reg.max - 1.0),
> +    );
> +
> +    Ok(())
> +}
> +
> +fn do_run(cmd: &mut Command, n_runs: u64) -> Result<Statistics, Error> {
> +    // Avoid consecutive snapshot timestamps collision
> +    thread::sleep(time::Duration::from_millis(1000));
> +    let mut timings = Vec::with_capacity(n_runs as usize);
> +    for iteration in 1..n_runs + 1 {
> +        let start = std::time::SystemTime::now();
> +        let mut child = cmd.spawn()?;
> +        let exit_code = child.wait()?;
> +        let elapsed = start.elapsed()?;
> +        timings.push(elapsed);
> +        if !exit_code.success() {
> +            bail!("Run number {iteration} of {n_runs} failed, aborting.");
> +        }
> +    }
> +
> +    Ok(statistics(timings))
> +}
> +
> +struct Statistics {
> +    total: f64,
> +    avg: f64,
> +    stddev: f64,
> +    min: f64,
> +    max: f64,
> +}
> +
> +fn statistics(timings: Vec<std::time::Duration>) -> Statistics {
> +    let total = timings
> +        .iter()
> +        .fold(0f64, |sum, time| sum + time.as_secs_f64());
> +    let avg = total / timings.len() as f64;
> +    let var = 1f64 / (timings.len() - 1) as f64
> +        * timings.iter().fold(0f64, |sq_sum, time| {
> +            let diff = time.as_secs_f64() - avg;
> +            sq_sum + diff * diff
> +        });
> +    let stddev = var.sqrt();
> +    let min = timings.iter().min().unwrap().as_secs_f64();
> +    let max = timings.iter().max().unwrap().as_secs_f64();
> +
> +    Statistics {
> +        total,
> +        avg,
> +        stddev,
> +        min,
> +        max,
> +    }
> +}
> +
> +#[api(
> +    input: {
> +        properties: {
> +            target: {
> +                description: "target path to prepare test data.",
> +            },
> +        },
> +    },
> +)]
> +/// Prepare files required for detection mode backup benchmarks.
> +fn detection_mode_bench_prepare(target: String) -> Result<(), Error> {
> +    let linux_repo_target = format!("{target}/linux");
> +    let coco_dataset_target = format!("{target}/coco");
> +    git_clone(LINUX_GIT_REPOSITORY, linux_repo_target.as_str())?;
> +    git_checkout(LINUX_GIT_TAG, linux_repo_target.as_str())?;
> +    wget_download(COCO_DATASET_SRC_URL, coco_dataset_target.as_str())?;
> +
> +    Ok(())
> +}
> +
> +fn git_clone(repo: &str, target: &str) -> Result<(), Error> {
> +    println!("Calling git clone for '{repo}'.");
> +    let target_git = format!("{target}/.git");
> +    let path = Path::new(&target_git);
> +    if let Ok(true) = path.try_exists() {
> +        println!("Target '{target}' already contains a git repository, skip.");
> +        return Ok(());
> +    }
> +
> +    let mut git = Command::new("git");
> +    git.args(["clone", repo, target]);
> +
> +    let mut child = git.spawn()?;
> +    let exit_code = child.wait()?;
> +    if exit_code.success() {
> +        println!("git clone finished with success.");
> +    } else {
> +        bail!("git clone failed for '{target}'.");
> +    }
> +
> +    Ok(())
> +}
> +
> +fn git_checkout(checkout_target: &str, target: &str) -> Result<(), Error> {
> +    println!("Calling git checkout '{checkout_target}'.");
> +    let mut git = Command::new("git");
> +    git.args(["-C", target, "checkout", checkout_target]);
> +
> +    let mut child = git.spawn()?;
> +    let exit_code = child.wait()?;
> +    if exit_code.success() {
> +        println!("git checkout finished with success.");
> +    } else {
> +        bail!("git checkout '{checkout_target}' failed for '{target}'.");
> +    }
> +    Ok(())
> +}
> +
> +fn wget_download(source_url: &str, target: &str) -> Result<(), Error> {
> +    let path = Path::new(&target);
> +    if let Ok(true) = path.try_exists() {
> +        println!("Target '{target}' already exists, skip.");
> +        return Ok(());
> +    }
> +    let zip = format!("{}/unlabeled2017.zip", target);
> +    let path = Path::new(&zip);
> +    if !path.try_exists()? {
> +        println!("Download archive using wget from '{source_url}' to '{target}'.");
> +        let mut wget = Command::new("wget");
> +        wget.args(["-P", target, source_url]);
> +
> +        let mut child = wget.spawn()?;
> +        let exit_code = child.wait()?;
> +        if exit_code.success() {
> +            println!("Download finished with success.");
> +        } else {
> +            bail!("Failed to download '{source_url}' to '{target}'.");
> +        }
> +        return Ok(());
> +    } else {
> +        println!("Target '{target}' already contains download, skip download.");
> +    }
> +
> +    let mut unzip = Command::new("unzip");
> +    unzip.args([&zip, "-d", target]);
> +
> +    let mut child = unzip.spawn()?;
> +    let exit_code = child.wait()?;
> +    if exit_code.success() {
> +        println!("Extracting zip archive finished with success.");
> +    } else {
> +        bail!("Failed to extract zip archive '{zip}' to '{target}'.");
> +    }
> +    Ok(())
> +}
> diff --git a/proxmox-backup-test-suite/src/main.rs b/proxmox-backup-test-suite/src/main.rs
> new file mode 100644
> index 00000000..0a5b436a
> --- /dev/null
> +++ b/proxmox-backup-test-suite/src/main.rs
> @@ -0,0 +1,17 @@
> +use proxmox_router::cli::*;
> +
> +mod detection_mode_bench;
> +
> +fn main() {
> +    let cmd_def = CliCommandMap::new().insert(
> +        "detection-mode-bench",
> +        detection_mode_bench::detection_mode_bench_mgtm_cli(),
> +    );
> +
> +    let rpcenv = CliEnvironment::new();
> +    run_cli_command(
> +        cmd_def,
> +        rpcenv,
> +        Some(|future| proxmox_async::runtime::main(future)),
> +    );
> +}
> -- 
> 2.39.2
> 
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>