[pbs-devel] [PATCH proxmox-backup 1/3] add inspection of chunks

Thomas Lamprecht t.lamprecht at proxmox.com
Tue Dec 15 11:14:24 CET 2020


Still no patch version, that must be used! Use the `-vX` option to your git
format-patch or git send-email command, please really read, IIRC I'm linking that
the third time now!

https://pve.proxmox.com/wiki/Developer_Documentation#Versioned_Patches

On 15.12.20 09:16, Hannes Laimer wrote:
> Signed-off-by: Hannes Laimer <h.laimer at proxmox.com>
> ---

below this triple dash is for meta information, it is not the commit message
but should describe changes since the last version of the patch, it won't get
included in git.

> Adds possibility to inspect chunks and find indexes that reference the
> chunk. Options:
>  - chunk: path to the chunk file
>  - [opt] decode: path to a file or to stdout(-), if specified, the chunk will
>    be decoded into the specified location
>  - [opt] keyfile: path to a keyfile, needed if decode is specified and the
>    data was encrypted
>  - [opt] reference-filter: path in which indexes that reference the chunk
>    should be searched, can be a group, snapshot or the whole datastore,
>    if not specified no references will be searched
>  Makefile                                   |   3 +-
>  src/bin/proxmox-backup-recovery.rs         |  32 ++++

what are they now, recovery or inspect tools? Maybe a more generic name like:

# proxmox-backup-debug 
   inspect chunk ...
   inspect file ...
   recover index ...

I mean what is the plan for the whole CLI tool, what additional commands
could be added, I'd like to not just tape this together somehow, but have
somewhat of a design plan..

Documentation patches would be also good to have, e.g., what is the reference-filter,
full path to datastore, index, ...?

>  src/bin/proxmox_backup_recovery/inspect.rs | 195 +++++++++++++++++++++
>  src/bin/proxmox_backup_recovery/mod.rs     |   2 +
>  4 files changed, 231 insertions(+), 1 deletion(-)
>  create mode 100644 src/bin/proxmox-backup-recovery.rs
>  create mode 100644 src/bin/proxmox_backup_recovery/inspect.rs
>  create mode 100644 src/bin/proxmox_backup_recovery/mod.rs
> 
> diff --git a/Makefile b/Makefile
> index a1af9d51..a9c2a711 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -13,7 +13,8 @@ USR_BIN := \
>  
>  # Binaries usable by admins
>  USR_SBIN := \
> -	proxmox-backup-manager
> +	proxmox-backup-manager \
> +	proxmox-backup-recovery \
>  
>  # Binaries for services:
>  SERVICE_BIN := \
> diff --git a/src/bin/proxmox-backup-recovery.rs b/src/bin/proxmox-backup-recovery.rs
> new file mode 100644
> index 00000000..ae8c18d5
> --- /dev/null
> +++ b/src/bin/proxmox-backup-recovery.rs
> @@ -0,0 +1,32 @@
> +use anyhow::Error;
> +use proxmox::api::schema::{Schema, StringSchema};
> +use proxmox::api::cli::*;
> +
> +use proxmox::sys::linux::tty;
> +use proxmox_backup_recovery::*;
> +mod proxmox_backup_recovery;
> +
> +pub fn get_encryption_key_password() -> Result<Vec<u8>, Error> {
> +    tty::read_password("Encryption Key Password: ")
> +}
> +
> +pub const PATH_SCHEMA: Schema = StringSchema::new("Path to a file or a directory").schema();
> +
> +pub const KEYFILE_SCHEMA: Schema = StringSchema::new(
> +    "Path to encryption key. If the data was encrypted, this key will be used for decryption.",
> +)
> +.schema();
> +
> +fn main() {
> +    proxmox_backup::tools::setup_safe_path_env();
> +
> +    let cmd_def = CliCommandMap::new()
> +        .insert("inspect", inspect_commands());
> +
> +    let rpcenv = CliEnvironment::new();
> +    run_cli_command(
> +        cmd_def,
> +        rpcenv,
> +        Some(|future| proxmox_backup::tools::runtime::main(future)),
> +    );
> +}
> diff --git a/src/bin/proxmox_backup_recovery/inspect.rs b/src/bin/proxmox_backup_recovery/inspect.rs
> new file mode 100644
> index 00000000..6bf4a0a3
> --- /dev/null
> +++ b/src/bin/proxmox_backup_recovery/inspect.rs
> @@ -0,0 +1,195 @@
> +use std::collections::HashSet;
> +use std::fs::File;
> +use std::io::Write;
> +use std::path::Path;
> +
> +use anyhow::Error;
> +use proxmox::api::cli::{
> +    format_and_print_result, get_output_format, CliCommand, CliCommandMap, CommandLineInterface,
> +};
> +use proxmox::api::{api, cli::*, RpcEnvironment};
> +use serde_json::{json, Value};
> +use walkdir::WalkDir;
> +
> +use proxmox_backup::api2::types::SHA256_HEX_REGEX;
> +use proxmox_backup::backup::{
> +    load_and_decrypt_key, CryptConfig, DataBlob, DynamicIndexReader, FixedIndexReader, IndexFile,
> +};
> +use proxmox_backup::tools;
> +
> +use crate::{get_encryption_key_password, KEYFILE_SCHEMA, PATH_SCHEMA};
> +
> +/// Decodes a blob and writes its content either to stdout or into a file
> +fn decode_blob(
> +    output_path: Option<&Path>,
> +    key_file: Option<&Path>,
> +    digest: Option<&[u8; 32]>,
> +    blob: &DataBlob,
> +) -> Result<(), Error> {
> +    let mut crypt_conf_opt = None;
> +    let crypt_conf;
> +
> +    if blob.is_encrypted() && key_file.is_some() {
> +        let (key, _created, _fingerprint) =
> +            load_and_decrypt_key(&key_file.unwrap(), &get_encryption_key_password)?;
> +        crypt_conf = CryptConfig::new(key)?;
> +        crypt_conf_opt = Some(&crypt_conf);
> +    }
> +
> +    if output_path.is_some() {
> +        let mut file = File::create(output_path.unwrap())?;
> +        Ok(file.write_all(blob.decode(crypt_conf_opt, digest)?.as_slice())?)
> +    } else {
> +        Ok(println!(
> +            "{}",
> +            String::from_utf8_lossy(blob.decode(crypt_conf_opt, digest)?.as_slice())
> +        ))
> +    }
> +}
> +
> +#[api(
> +    input: {
> +        properties: {
> +            chunk: {
> +                schema: PATH_SCHEMA,
> +            },
> +            "reference-filter": {
> +                schema: PATH_SCHEMA,
> +                optional: true,
> +            },
> +            "decode": {
> +                schema: PATH_SCHEMA,
> +                optional: true,
> +            },
> +            "keyfile": {
> +                schema: KEYFILE_SCHEMA,
> +                optional: true,
> +            },
> +            "output-format": {
> +                schema: OUTPUT_FORMAT,
> +                optional: true,
> +            },
> +        }
> +    }
> +)]
> +/// Inspect a chunk
> +fn inspect_chunk(param: Value, _rpcenv: &mut dyn RpcEnvironment) -> Result<Value, Error> {
> +    let chunk_path = Path::new(tools::required_string_param(&param, "chunk")?);
> +    let output_format = get_output_format(&param);
> +    let digest_str = chunk_path.file_name().unwrap().to_str().unwrap();
> +
> +    if !SHA256_HEX_REGEX.is_match(digest_str) {
> +        println!("chunk filename is not valid");

at least to stderr, using eprintln! maybe also refer why is it not valid.

> +        return Ok(Value::Null);

maybe even use bail!(...), I mean, feels weird that we exit with 0 in such a case

> +    }
> +
> +    let digest_raw = proxmox::tools::hex_to_digest(digest_str)?;

or, you may even drop above REGEX check + if and just do:

let digest_raw = proxmox::tools::hex_to_digest(digest_str)
    .map_err(|e| format_err!("could not parse chunk - {}", e))?;

> +
> +    let reference_filter_param = param["reference-filter"].as_str();
> +    let decode_output_param = param["decode"].as_str();
> +    let key_file_param = param["keyfile"].as_str();
> +
> +    let mut search_path = None;
> +    let mut decode_output_path = None;
> +    let mut key_file_path = None;
> +    let mut to_stdout = false;
> +
> +    if let Some(path) = reference_filter_param {
> +        search_path = Some(Path::new(path))
> +    };

Rather avoid mutable plus three extra lines (noise) and do

let search_path = reference_filter_param.map(|path| Path::new(path)):

> +
> +    if let Some(path) = decode_output_param {
> +        to_stdout = path.eq("-");
> +        decode_output_path = Some(Path::new(path))

maybe a match could fit above better and do away with mutables

let (decode_output_path, to_stdout) = match decode_output_param {
    Some(path) if path.eq("-") => (None, true),
    Some(path) => (Some(path), false),
    None => (None, false),
};

just a line less total, but more explicit,  no hard feelings here though


> +    };
> +
> +    if let Some(path) = key_file_param {
> +        key_file_path = Some(Path::new(path))
> +    };

use map here

let key_file_path = key_file_param.map(|path| Path::new(path));

> +
> +    let mut file = std::fs::File::open(&chunk_path)?;

may map_err + format! can make this a better UX if the open fails

> +    let blob = DataBlob::load_from_reader(&mut file)?;
> +
> +    let mut referenced_by = None;
> +    if let Some(search_path) = search_path {
> +        let mut references = Vec::new();
> +        for entry in WalkDir::new(search_path)
> +            .follow_links(false)
> +            .into_iter()
> +            .filter_map(|e| e.ok())
> +        {
> +            let file_name = entry.file_name().to_string_lossy();
> +            let mut in_index = HashSet::new();

why is this defined here, but only used below, actually it should not be required anyway.


> +            let mut index: Option<Box<dyn IndexFile>> = None;
> +
> +            if file_name.ends_with(".fidx") {
> +                index = match FixedIndexReader::open(entry.path()) {
> +                    Ok(index) => Some(Box::new(index)),
> +                    Err(_) => None,
> +                };
> +            }
> +
> +            if file_name.ends_with(".didx") {
> +                index = match DynamicIndexReader::open(entry.path()) {
> +                    Ok(index) => Some(Box::new(index)),
> +                    Err(_) => None,
> +                };
> +            }
> +
> +            if let Some(index) = index {
> +                for pos in 0..index.index_count() {
> +                    if let Some(index_chunk_digest) = index.index_digest(pos) {
> +                        in_index.insert(proxmox::tools::digest_to_hex(index_chunk_digest));
> +                    }
> +                }
> +            }
> +

you could move below if hunk in the for loop's if above, avoiding the need for any hashset

> +            if in_index.contains(digest_str) {
> +                references.push(entry.path().to_string_lossy().into_owned());
> +            }
> +        }
> +        referenced_by = Some(references);
> +    }
> +
> +    if let Some(decode_output_path) = decode_output_path {
> +        if to_stdout {
> +            decode_blob(None, key_file_path, Some(&digest_raw), &blob)?;
> +        } else {
> +            decode_blob(
> +                Some(decode_output_path),
> +                key_file_path,
> +                Some(&digest_raw),
> +                &blob,
> +            )?;
> +        }
> +    }

Above 12 lines could be reduced with something like (untested):

if decode_output_path.is_some() {
    let decode_output_path =  if to_stdout { None } else { decode_output_path };

    decode_blob(decode_output_path, key_file_path, Some(&digest_raw), &blob)?;
}

> +
> +    let crc_status = format!(
> +        "{}({})",
> +        blob.compute_crc(),
> +        blob.verify_crc().map_or("NOK", |_x| "OK")

rather use just _ if unused param

> +    );
> +
> +    let val = match referenced_by {
> +        Some(references) => json!({
> +            "digest": digest_str,
> +            "crc": crc_status,
> +            "encryption": blob.crypt_mode()?,
> +            "referenced-by": references
> +        }),
> +        None => json!({
> +             "digest": digest_str,
> +             "crc": crc_status,
> +             "encryption": blob.crypt_mode()?,
> +        }),
> +    };
> +
> +    format_and_print_result(&val, &output_format);
> +    Ok(Value::Null)
> +}
> +
> +pub fn inspect_commands() -> CommandLineInterface {
> +    let cmd_def = CliCommandMap::new().insert("chunk", CliCommand::new(&API_METHOD_INSPECT_CHUNK));
> +
> +    cmd_def.into()
> +}
> diff --git a/src/bin/proxmox_backup_recovery/mod.rs b/src/bin/proxmox_backup_recovery/mod.rs
> new file mode 100644
> index 00000000..644583db
> --- /dev/null
> +++ b/src/bin/proxmox_backup_recovery/mod.rs
> @@ -0,0 +1,2 @@
> +mod inspect;
> +pub use inspect::*;
> 







More information about the pbs-devel mailing list