[pbs-devel] [PATCH v4 proxmox-backup 1/3] add chunk inspection to pb-debug

Dominik Csapak d.csapak at proxmox.com
Mon Apr 12 15:08:54 CEST 2021


comments inline

On 2/19/21 13:09, Hannes Laimer wrote:
> Adds possibility to inspect chunks and find indexes that reference the
> chunk. Options:
>   - chunk: path to the chunk file
>   - [opt] decode: path to a file or to stdout(-), if specified, the chunk will
>     be decoded into the specified location
>   - [opt] keyfile: path to a keyfile, needed if decode is specified and the
>     data was encrypted
>   - [opt] reference-filter: path in which indexes that reference the chunk
>     should be searched, can be a group, snapshot or the whole datastore,
>     if not specified no references will be searched
> 
> Signed-off-by: Hannes Laimer <h.laimer at proxmox.com>
> ---
> v4:
>   - left the two schemas here since they are quite specific to this binary
>   - output_or_stdout() directly outputs the data instead of returning
>     stdout or an open file (could not find a type that allows to properly
>     return either stdout or a file)

the type would be either
  Box<dyn std::io::Write>
  File(then we'd have to to a File::from_raw_fd to get a File for stdout)
  enum Output {
    File(File),
    Stdout,
  } (then we would have either to match everywhere on that, or
impl write on that, so that we can simply do a write_all on it)

while the function is ok, i think (like Wolfgang) a helper to
get a handle to Option<File> or Option<Box<dyn Write>> or Option<Output>
would be much better than having an output_path and a to_stdout variable

> 
>   Makefile                                |   3 +-
>   src/bin/proxmox-backup-debug.rs         |  32 +++++
>   src/bin/proxmox_backup_debug/inspect.rs | 162 ++++++++++++++++++++++++
>   src/bin/proxmox_backup_debug/mod.rs     |   2 +
>   src/tools.rs                            |  11 +-
>   5 files changed, 208 insertions(+), 2 deletions(-)
>   create mode 100644 src/bin/proxmox-backup-debug.rs
>   create mode 100644 src/bin/proxmox_backup_debug/inspect.rs
>   create mode 100644 src/bin/proxmox_backup_debug/mod.rs
> 
> diff --git a/Makefile b/Makefile
> index b2ef9d32..120e7f98 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -15,7 +15,8 @@ USR_BIN := \
>   
>   # Binaries usable by admins
>   USR_SBIN := \
> -	proxmox-backup-manager
> +	proxmox-backup-manager \
> +	proxmox-backup-debug \
>   
>   # Binaries for services:
>   SERVICE_BIN := \
> diff --git a/src/bin/proxmox-backup-debug.rs b/src/bin/proxmox-backup-debug.rs
> new file mode 100644
> index 00000000..ae747c5d
> --- /dev/null
> +++ b/src/bin/proxmox-backup-debug.rs
> @@ -0,0 +1,32 @@
> +use anyhow::Error;
> +
> +use proxmox::api::cli::*;
> +use proxmox::api::schema::{Schema, StringSchema};
> +use proxmox::sys::linux::tty;
> +
> +mod proxmox_backup_debug;
> +use proxmox_backup_debug::inspect_commands;
> +
> +pub fn get_encryption_key_password() -> Result<Vec<u8>, Error> {
> +    tty::read_password("Encryption Key Password: ")
> +}
> +
> +pub const PATH_SCHEMA: Schema = StringSchema::new("Path to a file or a directory").schema();
> +
> +pub const KEYFILE_SCHEMA: Schema = StringSchema::new(
> +    "Path to encryption key. If the data was encrypted, this key will be used for decryption.",
> +)
> +.schema();

at least this we could reuse from the proxmox_client_tools, no ?
if there is a more specific reason, please state so at least in a 
comment. (proxmox-file-restore also uses the proxmox_client_tools for 
example)

> +
> +fn main() {
> +    proxmox_backup::tools::setup_safe_path_env();
> +
> +    let cmd_def = CliCommandMap::new().insert("inspect", inspect_commands());
> +
> +    let rpcenv = CliEnvironment::new();
> +    run_cli_command(
> +        cmd_def,
> +        rpcenv,
> +        Some(|future| proxmox_backup::tools::runtime::main(future)),
> +    );
> +}
> diff --git a/src/bin/proxmox_backup_debug/inspect.rs b/src/bin/proxmox_backup_debug/inspect.rs
> new file mode 100644
> index 00000000..b211857e
> --- /dev/null
> +++ b/src/bin/proxmox_backup_debug/inspect.rs
> @@ -0,0 +1,162 @@
> +use std::path::Path;
> +
> +use anyhow::{format_err, Error};
> +use proxmox::api::cli::{
> +    format_and_print_result, get_output_format, CliCommand, CliCommandMap, CommandLineInterface,
> +};
> +use proxmox::api::{api, cli::*};
> +use serde_json::{json, Value};
> +use walkdir::WalkDir;
> +
> +use proxmox_backup::backup::{
> +    load_and_decrypt_key, CryptConfig, DataBlob, DynamicIndexReader, FixedIndexReader, IndexFile,
> +};
> +
> +use crate::{get_encryption_key_password, KEYFILE_SCHEMA, PATH_SCHEMA};
> +use proxmox_backup::tools::output_or_stdout;
> +
> +/// Decodes a blob and writes its content either to stdout or into a file
> +fn decode_blob(
> +    output_path: Option<&Path>,
> +    key_file: Option<&Path>,
> +    digest: Option<&[u8; 32]>,
> +    blob: &DataBlob,
> +) -> Result<(), Error> {
> +    let mut crypt_conf_opt = None;
> +    let crypt_conf;
> +
> +    if blob.is_encrypted() && key_file.is_some() {
> +        let (key, _created, _fingerprint) =
> +            load_and_decrypt_key(&key_file.unwrap(), &get_encryption_key_password)?;
> +        crypt_conf = CryptConfig::new(key)?;
> +        crypt_conf_opt = Some(&crypt_conf);
> +    }
> +
> +    output_or_stdout(output_path, blob.decode(crypt_conf_opt, digest)?.as_slice())
> +}
> +
> +#[api(
> +    input: {
> +        properties: {
> +            chunk: {
> +                schema: PATH_SCHEMA,
> +            },
> +            "reference-filter": {
> +                schema: PATH_SCHEMA,
> +                optional: true,
> +            },
> +            "decode": {
> +                schema: PATH_SCHEMA,
> +                optional: true,
> +            },
> +            "keyfile": {
> +                schema: KEYFILE_SCHEMA,
> +                optional: true,
> +            },
> +            "output-format": {
> +                schema: OUTPUT_FORMAT,
> +                optional: true,
> +            },
> +        }
> +    }
> +)]
> +/// Inspect a chunk
> +fn inspect_chunk(
> +    chunk: String,
> +    reference_filter: Option<String>,
> +    decode: Option<String>,
> +    keyfile: Option<String>,
> +    param: Value,
> +) -> Result<(), Error> {
> +    let output_format = get_output_format(&param);
> +
> +    let chunk_path = Path::new(&chunk);
> +    let digest_str = chunk_path.file_name().unwrap().to_str().unwrap();
> +    let digest_raw = proxmox::tools::hex_to_digest(digest_str)
> +        .map_err(|e| format_err!("could not parse chunk - {}", e))?;
> +
> +    let search_path = reference_filter.as_ref().map(Path::new);
> +    let key_file_path = keyfile.as_ref().map(Path::new);
> +
> +    let (decode_output_path, to_stdout) = (
> +        decode.as_ref().map(Path::new),
> +        decode.clone().map_or(false, |p| p.eq("-")),
> +    );
> +
> +    let mut file = std::fs::File::open(&chunk_path)
> +        .map_err(|e| format_err!("could not open chunk file - {}", e))?;
> +    let blob = DataBlob::load_from_reader(&mut file)?;
> +
> +    let mut referenced_by = None;
> +    if let Some(search_path) = search_path {
> +        let mut references = Vec::new();
> +        for entry in WalkDir::new(search_path)
> +            .follow_links(false)
> +            .into_iter()
> +            .filter_map(|e| e.ok())
> +        {
> +            let file_name = entry.file_name().to_string_lossy();
> +            let mut index: Option<Box<dyn IndexFile>> = None;
> +
> +            if file_name.ends_with(".fidx") {
> +                index = match FixedIndexReader::open(entry.path()) {
> +                    Ok(index) => Some(Box::new(index)),
> +                    Err(_) => None,
> +                };
> +            }
> +
> +            if file_name.ends_with(".didx") {
> +                index = match DynamicIndexReader::open(entry.path()) {
> +                    Ok(index) => Some(Box::new(index)),
> +                    Err(_) => None,
> +                };
> +            }
> +
> +            if let Some(index) = index {
> +                for pos in 0..index.index_count() {
> +                    if let Some(index_chunk_digest) = index.index_digest(pos) {
> +                        let str = proxmox::tools::digest_to_hex(index_chunk_digest);
> +                        if str.eq(digest_str) {

why do you compare str here?
would it not be enough to compare index_chunk_digest to digest_raw ?
i guess a comparison of to fixed sized arrays is faster than comparing 
to str ? it's at least a comparison less, and if i let this run over
my whole datastore this can easily add up

> +                            references.push(entry.path().to_string_lossy().into_owned());
> +                            break;
> +                        }
> +                    }
> +                }
> +            }
> +        }
> +        referenced_by = Some(references);
> +    }
> +
> +    if decode_output_path.is_some() || to_stdout {
> +        decode_blob(decode_output_path, key_file_path, Some(&digest_raw), &blob)?;
> +    }
> +
> +    let crc_status = format!(
> +        "{}({})",
> +        blob.compute_crc(),
> +        blob.verify_crc().map_or("NOK", |_| "OK")
> +    );
> +
> +    let val = match referenced_by {
> +        Some(references) => json!({
> +            "digest": digest_str,
> +            "crc": crc_status,
> +            "encryption": blob.crypt_mode()?,
> +            "referenced-by": references
> +        }),
> +        None => json!({
> +             "digest": digest_str,
> +             "crc": crc_status,
> +             "encryption": blob.crypt_mode()?,
> +        }),
> +    };
> +
> +    format_and_print_result(&val, &output_format);
> +    Ok(())
> +}
> +
> +pub fn inspect_commands() -> CommandLineInterface {
> +    let cmd_def = CliCommandMap::new().insert("chunk", CliCommand::new(&API_METHOD_INSPECT_CHUNK));
> +
> +    cmd_def.into()
> +}
> diff --git a/src/bin/proxmox_backup_debug/mod.rs b/src/bin/proxmox_backup_debug/mod.rs
> new file mode 100644
> index 00000000..644583db
> --- /dev/null
> +++ b/src/bin/proxmox_backup_debug/mod.rs
> @@ -0,0 +1,2 @@
> +mod inspect;
> +pub use inspect::*;
> diff --git a/src/tools.rs b/src/tools.rs
> index cc782da2..2ab8157e 100644
> --- a/src/tools.rs
> +++ b/src/tools.rs
> @@ -6,7 +6,7 @@ use std::borrow::Borrow;
>   use std::collections::HashMap;
>   use std::hash::BuildHasher;
>   use std::fs::File;
> -use std::io::{self, BufRead, Read, Seek, SeekFrom};
> +use std::io::{self, BufRead, Read, Seek, SeekFrom, stdout, Write};
>   use std::os::unix::io::RawFd;
>   use std::path::Path;
>   
> @@ -566,3 +566,12 @@ pub fn create_run_dir() -> Result<(), Error> {
>       let _: bool = proxmox::tools::fs::create_path(PROXMOX_BACKUP_RUN_DIR_M!(), None, None)?;
>       Ok(())
>   }
> +
> +/// Writes data to stdout or a file, based on whether a path is given.
> +pub fn output_or_stdout<P: AsRef<Path>>(path: Option<P>, data: &[u8]) -> Result<(), Error> {
> +    if let Some(path) = path {
> +        File::create(path)?.write_all(data).map_err(Error::new)
> +    } else {
> +        stdout().write_all(data).map_err(Error::new)
> +    }
> +}
> 






More information about the pbs-devel mailing list