[pbs-devel] [PATCH v5 proxmox-backup 1/4] add chunk inspection to pb-debug

Wolfgang Bumiller w.bumiller at proxmox.com
Fri Apr 30 11:39:38 CEST 2021


On Thu, Apr 29, 2021 at 01:00:13PM +0200, Hannes Laimer wrote:
> diff --git a/src/bin/proxmox_backup_debug/inspect.rs b/src/bin/proxmox_backup_debug/inspect.rs
> new file mode 100644
> index 00000000..dd6ee287
> --- /dev/null
> +++ b/src/bin/proxmox_backup_debug/inspect.rs
> @@ -0,0 +1,162 @@
> +use std::path::Path;
> +
> +use anyhow::{format_err, Error};
> +use proxmox::api::cli::{
> +    format_and_print_result, get_output_format, CliCommand, CliCommandMap, CommandLineInterface,
> +};
> +use proxmox::api::{api, cli::*};
> +use serde_json::{json, Value};
> +use walkdir::WalkDir;
> +
> +use proxmox_backup::backup::{
> +    load_and_decrypt_key, CryptConfig, DataBlob, DynamicIndexReader, FixedIndexReader, IndexFile,
> +};
> +
> +use crate::{get_encryption_key_password, KEYFILE_SCHEMA, PATH_SCHEMA};
> +
> +use proxmox_backup::tools::outfile_or_stdout;
> +
> +/// Decodes a blob and writes its content either to stdout or into a file
> +fn decode_blob(
> +    output_path: Option<&Path>,
> +    key_file: Option<&Path>,
> +    digest: Option<&[u8; 32]>,
> +    blob: &DataBlob,
> +) -> Result<(), Error> {
> +    let mut crypt_conf_opt = None;
> +    let crypt_conf;
> +
> +    if blob.is_encrypted() && key_file.is_some() {
> +        let (key, _created, _fingerprint) =
> +            load_and_decrypt_key(&key_file.unwrap(), &get_encryption_key_password)?;
> +        crypt_conf = CryptConfig::new(key)?;
> +        crypt_conf_opt = Some(&crypt_conf);
> +    }
> +
> +    outfile_or_stdout(output_path)?.write_all(blob.decode(crypt_conf_opt, digest)?.as_slice())?;
> +    Ok(())
> +}
> +
> +#[api(
> +    input: {
> +        properties: {
> +            chunk: {
> +                schema: PATH_SCHEMA,
> +            },
> +            "reference-filter": {
> +                schema: PATH_SCHEMA,
> +                optional: true,
> +            },
> +            "decode": {
> +                schema: PATH_SCHEMA,
> +                optional: true,
> +            },
> +            "keyfile": {
> +                schema: KEYFILE_SCHEMA,
> +                optional: true,
> +            },
> +            "output-format": {
> +                schema: OUTPUT_FORMAT,
> +                optional: true,
> +            },
> +        }
> +    }
> +)]
> +/// Inspect a chunk
> +fn inspect_chunk(
> +    chunk: String,
> +    reference_filter: Option<String>,
> +    decode: Option<String>,
> +    keyfile: Option<String>,
> +    param: Value,
> +) -> Result<(), Error> {
> +    let output_format = get_output_format(&param);
> +    let chunk_path = Path::new(&chunk);
> +    let digest_str = chunk_path.file_name().unwrap().to_str().unwrap();

^ The first unwrap() fails with an ugly error on `/`, so please don't
use unwrap there.

Also I think for a debug helper it makes sense if it can also work on copied
and renamed files that aren't literally named after their digest, so I think
`digest_{str,raw}` should in fact be `Option<>`. (In the `decode` method the
digest and its verification are already optional after all.)

You can, however, output a warning that the digest will not be verified.

(Also I wonder if an additional optional `--digest` parameter should be added?)

> +    let digest_raw = proxmox::tools::hex_to_digest(digest_str)
> +        .map_err(|e| format_err!("could not parse chunk - {}", e))?;
> +
> +    let search_path = reference_filter.as_ref().map(Path::new);
> +    let key_file_path = keyfile.as_ref().map(Path::new);
> +
> +    let (decode_output_path, to_stdout) = (
> +        decode.as_ref().map(Path::new),
> +        decode.clone().map_or(false, |p| p.eq("-")),

^ `to_stdout=true` now implies that `decode_output_path` was `Some`, so
I think you can drop that variable altogether.

> +    );
> +
> +    let mut file = std::fs::File::open(&chunk_path)
> +        .map_err(|e| format_err!("could not open chunk file - {}", e))?;
> +    let blob = DataBlob::load_from_reader(&mut file)?;

^ If you're doing cleanups, I'd still like `file` to be dropped at this point.

> +
> +    let mut referenced_by = None;
> +    if let Some(search_path) = search_path {

Try to avoid `mut` variables where they're only assigned once anyway:

    let referenced_by = if let Some(search_path) = search_path {
        ...
        Some(references)
    } else {
        None
    };

> +        let mut references = Vec::new();
> +        for entry in WalkDir::new(search_path)
> +            .follow_links(false)
> +            .into_iter()
> +            .filter_map(|e| e.ok())
> +        {
> +            let file_name = entry.file_name().to_string_lossy();

^ Since you're just throwing away the produced string and only want to check
the suffix, just do:

    use std::os::unix::ffi::OsStrExt;

    let file_name = entry.file_name().as_bytes();

and prefix the strings with a `b` => `b".fidx"`

> +            let mut index: Option<Box<dyn IndexFile>> = None;

Same pattern as above:

    let index = if file_name.ends_with(b".fidx") {
        ...
    } else if file_name.ends_with(b".didx") {
        ...
    } else {
        continue;
    };

Also note the `continue`:

> +
> +            if file_name.ends_with(".fidx") {
> +                index = match FixedIndexReader::open(entry.path()) {
> +                    Ok(index) => Some(Box::new(index)),
> +                    Err(_) => None,

You can just drop the whole `Some`/`Option` part and use

   Err(_) => continue,

here

> +                };
> +            }
> +
> +            if file_name.ends_with(".didx") {
> +                index = match DynamicIndexReader::open(entry.path()) {
> +                    Ok(index) => Some(Box::new(index)),
> +                    Err(_) => None,

and here

> +                };
> +            }
> +
> +            if let Some(index) = index {

and then drop the condition above entirely
and get rid of a level of indentation on the block below

> +                for pos in 0..index.index_count() {
> +                    if let Some(index_chunk_digest) = index.index_digest(pos) {
> +                        if digest_raw.eq(index_chunk_digest) {
> +                            references.push(entry.path().to_string_lossy().into_owned());
> +                            break;
> +                        }
> +                    }
> +                }
> +            }
> +        }
> +        referenced_by = Some(references);
> +    }
> +
> +    if decode_output_path.is_some() || to_stdout {

... because this is now: `if X || <something which implies X>`

> +        decode_blob(decode_output_path, key_file_path, Some(&digest_raw), &blob)?;
> +    }
> +
> +    let crc_status = format!(
> +        "{}({})",
> +        blob.compute_crc(),
> +        blob.verify_crc().map_or("NOK", |_| "OK")
> +    );
> +
> +    let val = match referenced_by {
> +        Some(references) => json!({
> +            "digest": digest_str,
> +            "crc": crc_status,
> +            "encryption": blob.crypt_mode()?,
> +            "referenced-by": references
> +        }),
> +        None => json!({
> +             "digest": digest_str,
> +             "crc": crc_status,
> +             "encryption": blob.crypt_mode()?,
> +        }),
> +    };
> +
> +    format_and_print_result(&val, &output_format);
> +    Ok(())
> +}
> +
> +pub fn inspect_commands() -> CommandLineInterface {
> +    let cmd_def = CliCommandMap::new().insert("chunk", CliCommand::new(&API_METHOD_INSPECT_CHUNK));

^ I think this should also get a `.arg_param(["chunk"])`.

Otherwise it feels somewhat as if you'd have to write
  $ cat --file foo
instead of just
  $ cat foo

;-)

> +
> +    cmd_def.into()
> +}
> diff --git a/src/bin/proxmox_backup_debug/mod.rs b/src/bin/proxmox_backup_debug/mod.rs
> new file mode 100644
> index 00000000..644583db
> --- /dev/null
> +++ b/src/bin/proxmox_backup_debug/mod.rs
> @@ -0,0 +1,2 @@
> +mod inspect;
> +pub use inspect::*;
> diff --git a/src/tools.rs b/src/tools.rs
> index 08af55e5..16ebf3fc 100644
> --- a/src/tools.rs
> +++ b/src/tools.rs
> @@ -6,7 +6,7 @@ use std::borrow::Borrow;
>  use std::collections::HashMap;
>  use std::hash::BuildHasher;
>  use std::fs::File;
> -use std::io::{self, BufRead, Read, Seek, SeekFrom};
> +use std::io::{self, BufRead, Read, Seek, SeekFrom, stdout, Write};
>  use std::os::unix::io::RawFd;
>  use std::path::Path;
>  
> @@ -571,3 +571,13 @@ pub fn create_run_dir() -> Result<(), Error> {
>      let _: bool = proxmox::tools::fs::create_path(PROXMOX_BACKUP_RUN_DIR_M!(), None, None)?;
>      Ok(())
>  }

Code below is fine but needs rebasing on current master.

> +
> +/// Returns either a new file, if a path is given, or stdout, if no path is given.
> +pub fn outfile_or_stdout<P: AsRef<Path>>(path: Option<P>) -> Result<Box<dyn Write>, Error> {
> +    if let Some(path) = path {
> +        let f = File::create(path)?;
> +        Ok(Box::new(f) as Box<dyn Write>)
> +    } else {
> +        Ok(Box::new(stdout()) as Box<dyn Write>)
> +    }
> +}
> -- 
> 2.20.1





More information about the pbs-devel mailing list