[pbs-devel] [PATCH v3 proxmox-backup 3/3] add index recovery to pb-debug

Tue Feb 9 11:36:22 CET 2021

On Fri, Feb 05, 2021 at 08:58:06AM +0100, Hannes Laimer wrote:
> Adds possibility to recover data from an index file. Options:
>  - chunks: path to the directory where the chunks are saved
>  - file: the index file that should be recovered(must be either .fidx or
>    didx)
>  - [opt] keyfile: path to a keyfile, if the data was encrypted, a keyfile is
>    needed
>  - [opt] skip-crc: boolean, if true, read chunks wont be verified with their
>    crc-sum, increases the restore speed by a lot
> 
> Signed-off-by: Hannes Laimer <h.laimer at proxmox.com>
> ---
> 
>  src/bin/proxmox-backup-debug.rs         |   6 +-
>  src/bin/proxmox_backup_debug/mod.rs     |   2 +
>  src/bin/proxmox_backup_debug/recover.rs | 109 ++++++++++++++++++++++++
>  3 files changed, 115 insertions(+), 2 deletions(-)
>  create mode 100644 src/bin/proxmox_backup_debug/recover.rs
> 
> diff --git a/src/bin/proxmox-backup-debug.rs b/src/bin/proxmox-backup-debug.rs
> index 079b2bc8..8bd01a59 100644
> --- a/src/bin/proxmox-backup-debug.rs
> +++ b/src/bin/proxmox-backup-debug.rs
> @@ -22,7 +22,9 @@ pub const KEYFILE_SCHEMA: Schema = StringSchema::new(
>  fn main() {
>      proxmox_backup::tools::setup_safe_path_env();
>  
> -    let cmd_def = CliCommandMap::new().insert("inspect", inspect_commands());
> +    let cmd_def = CliCommandMap::new()
> +        .insert("inspect", inspect_commands())
> +        .insert("recover", recover_commands());
>  
>      let rpcenv = CliEnvironment::new();
>      run_cli_command(
> @@ -30,4 +32,4 @@ fn main() {
>          rpcenv,
>          Some(|future| proxmox_backup::tools::runtime::main(future)),
>      );
> -}
> +}
> \ No newline at end of file
> diff --git a/src/bin/proxmox_backup_debug/mod.rs b/src/bin/proxmox_backup_debug/mod.rs
> index 644583db..62df7754 100644
> --- a/src/bin/proxmox_backup_debug/mod.rs
> +++ b/src/bin/proxmox_backup_debug/mod.rs
> @@ -1,2 +1,4 @@
>  mod inspect;
>  pub use inspect::*;
> +mod recover;
> +pub use recover::*;
> diff --git a/src/bin/proxmox_backup_debug/recover.rs b/src/bin/proxmox_backup_debug/recover.rs
> new file mode 100644
> index 00000000..61025adf
> --- /dev/null
> +++ b/src/bin/proxmox_backup_debug/recover.rs
> @@ -0,0 +1,109 @@
> +use std::fs::File;
> +use std::io::{Read, Write};
> +use std::path::Path;
> +
> +use anyhow::{bail, format_err, Error};
> +
> +use proxmox::api::api;
> +use proxmox::api::cli::{CliCommand, CliCommandMap, CommandLineInterface};
> +use serde_json::Value;
> +
> +use proxmox_backup::backup::{
> +    load_and_decrypt_key, CryptConfig, DataBlob, DynamicIndexReader, FixedIndexReader, IndexFile,
> +};
> +use proxmox_backup::tools;
> +
> +use crate::{get_encryption_key_password, KEYFILE_SCHEMA, PATH_SCHEMA};
> +
> +use proxmox::tools::digest_to_hex;
> +
> +use std::time::Instant;
> +
> +#[api(
> +    input: {
> +        properties: {
> +            file: {
> +                schema: PATH_SCHEMA,
> +            },
> +            chunks: {
> +                schema: PATH_SCHEMA,
> +            },
> +            "keyfile": {
> +                schema: KEYFILE_SCHEMA,
> +                optional: true,
> +            },
> +            "skip-crc": {
> +                type: Boolean,
> +                optional: true,
> +                default: false,
> +                description: "Skip the crc verification, increases the restore speed immensely",
> +            }
> +        }
> +    }
> +)]
> +/// Recover a index file

'an'

Perhapse add some more information of what this actually does and
particularly what its limitations are.

> +fn recover_index(skip_crc: bool, param: Value) -> Result<Value, Error> {
> +    let start = Instant::now();
> +    let file_path = Path::new(tools::required_string_param(&param, "file")?);
> +    let chunks_path = Path::new(tools::required_string_param(&param, "chunks")?);
> +
> +    let key_file_param = param["keyfile"].as_str();
> +    let key_file_path = key_file_param.map(|path| Path::new(path));
> +
> +    let index: Box<dyn IndexFile> = match file_path.extension() {
> +        Some(ext) if ext.eq("fidx") => Box::new(
> +            FixedIndexReader::open(file_path)
> +                .map_err(|e| format_err!("could not read index - {}", e))?,
> +        ),
> +        Some(ext) if ext.eq("didx") => Box::new(
> +            DynamicIndexReader::open(file_path)
> +                .map_err(|e| format_err!("could not read index - {}", e))?,
> +        ),
> +        _ => bail!("index file must either be a .fidx or a .didx file"),
> +    };

Come to think of it, we do have magic bytes at the top of all of these
file types, so shouldn't a *debug* binary use *that* instead of the
file's *name*? ;-)

*And* perhaps warn if the name doesn't match its type...

So for this we could just have a helper `fn detect_file_type() -> FileType`,
which we could use in all the other functions of this binary as well.

> +
> +    let mut crypt_conf_opt = None;
> +    let mut crypt_conf;
> +
> +    let output_filename = file_path.file_stem().unwrap().to_str().unwrap();
> +    let output_path = Path::new(output_filename);
> +    let mut output_file = File::create(output_path)
> +        .map_err(|e| format_err!("could not create output file - {}", e))?;
> +

I feel like the code below should already exist somewhere in some
half-way reusable form. It might be worth trying to instantiate a
`LocalDataStore` and use its `read_chunk` method, but I'm not sure if it
helps much. Plus it wouldn't reuse the buffer...

> +    for pos in 0..index.index_count() {
> +        let chunk_digest = index.index_digest(pos).unwrap();
> +        let digest_str = digest_to_hex(chunk_digest);
> +        let digest_prefix = &digest_str[0..4];
> +        let chunk_path = chunks_path.join(digest_prefix).join(digest_str);
> +        let mut chunk_file = std::fs::File::open(&chunk_path)
> +            .map_err(|e| format_err!("could not open chunk file - {}", e))?;
> +
> +        let mut data = Vec::with_capacity(1024 * 1024);

...but neither do you right now ;-)
Please move this above the loop and just use `.clear()` here (which does
not deallocate).

> +        chunk_file.read_to_end(&mut data)?;
> +        let chunk_blob = DataBlob::from_raw(data)?;
> +
> +        if !skip_crc {
> +            chunk_blob.verify_crc()?;
> +        }
> +
> +        if key_file_path.is_some() && chunk_blob.is_encrypted() && crypt_conf_opt.is_none() {
> +            let (key, _created, _fingerprint) =
> +                load_and_decrypt_key(&key_file_path.unwrap(), &get_encryption_key_password)?;
> +            crypt_conf = CryptConfig::new(key)?;
> +            crypt_conf_opt = Some(&crypt_conf);
> +        }
> +
> +        output_file.write_all(
> +            chunk_blob
> +                .decode(crypt_conf_opt, Some(chunk_digest))?
> +                .as_slice(),
> +        )?;
> +    }
> +    println!("{} sec.", start.elapsed().as_secs_f32());

As a frequent user of the `time` shell command I'm wondering if this is
really something I want to always see? ;-)

> +    Ok(Value::Null)
> +}
> +
> +pub fn recover_commands() -> CommandLineInterface {
> +    let cmd_def = CliCommandMap::new().insert("index", CliCommand::new(&API_METHOD_RECOVER_INDEX));
> +    cmd_def.into()
> +}
> -- 
> 2.20.1