[pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries

Fabian Grünbichler f.gruenbichler at proxmox.com
Thu Apr 4 14:54:53 CEST 2024


On March 28, 2024 1:36 pm, Christian Ebner wrote:
> The helper method allows to lookup the entries of a dynamic index
> which fully cover a given offset range. Further, the helper returns
> the start padding from the start offset of the dynamic index entry
> to the start offset of the given range and the end padding.
> 
> This will be used to lookup size and digest for chunks covering the
> payload range of a regular file in order to re-use found chunks by
> indexing them in the archives index file instead of re-encoding the
> payload.
> 
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> changes since version 2:
> - moved this from the dynamic index to the pxar create as suggested
> - refactored and optimized search, going for linear search to find the
>   end entry
> - reworded commit message
> 
>  pbs-client/src/pxar/create.rs | 63 +++++++++++++++++++++++++++++++++++
>  1 file changed, 63 insertions(+)
> 
> diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
> index 2bb5a6253..e2d3954ca 100644
> --- a/pbs-client/src/pxar/create.rs
> +++ b/pbs-client/src/pxar/create.rs
> @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet};
>  use std::ffi::{CStr, CString, OsStr};
>  use std::fmt;
>  use std::io::{self, Read};
> +use std::ops::Range;
>  use std::os::unix::ffi::OsStrExt;
>  use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
>  use std::path::{Path, PathBuf};
> @@ -16,6 +17,7 @@ use nix::fcntl::OFlag;
>  use nix::sys::stat::{FileStat, Mode};
>  
>  use pathpatterns::{MatchEntry, MatchFlag, MatchList, MatchType, PatternFlag};
> +use pbs_datastore::index::IndexFile;
>  use proxmox_sys::error::SysError;
>  use pxar::encoder::{LinkOffset, SeqWrite};
>  use pxar::Metadata;
> @@ -25,6 +27,7 @@ use proxmox_lang::c_str;
>  use proxmox_sys::fs::{self, acl, xattr};
>  
>  use pbs_datastore::catalog::BackupCatalogWriter;
> +use pbs_datastore::dynamic_index::DynamicIndexReader;
>  
>  use crate::pxar::metadata::errno_is_unsupported;
>  use crate::pxar::tools::assert_single_path_component;
> @@ -791,6 +794,66 @@ impl Archiver {
>      }
>  }
>  
> +/// Dynamic Entry reusable by payload references
> +#[derive(Clone, Debug)]
> +#[repr(C)]
> +pub struct ReusableDynamicEntry {
> +    size_le: u64,
> +    digest: [u8; 32],
> +}
> +
> +impl ReusableDynamicEntry {
> +    #[inline]
> +    pub fn size(&self) -> u64 {
> +        u64::from_le(self.size_le)
> +    }
> +
> +    #[inline]
> +    pub fn digest(&self) -> [u8; 32] {
> +        self.digest
> +    }
> +}
> +
> +/// List of dynamic entries containing the data given by an offset range
> +fn lookup_dynamic_entries(
> +    index: &DynamicIndexReader,
> +    range: Range<u64>,
> +) -> Result<(Vec<ReusableDynamicEntry>, u64, u64), Error> {
> +    let end_idx = index.index_count() - 1;
> +    let chunk_end = index.chunk_end(end_idx);
> +    let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?;
> +    let mut end = start;
> +    while end < end_idx {
> +        if range.end < index.chunk_end(end) {
> +            break;
> +        }
> +        end += 1;
> +    }

this loop here

> +
> +    let offset_first = if start == 0 {
> +        0
> +    } else {
> +        index.chunk_end(start - 1)
> +    };

offset_first is prev_end, so maybe we could just name it like that from
the start?

> +
> +    let padding_start = range.start - offset_first;
> +    let padding_end = index.chunk_end(end) - range.end;
> +
> +    let mut indices = Vec::new();
> +    let mut prev_end = offset_first;
> +    for dynamic_entry in &index.index()[start..end + 1] {
> +        let size = dynamic_entry.end() - prev_end;
> +        let reusable_dynamic_entry = ReusableDynamicEntry {
> +            size_le: size.to_le(),
> +            digest: dynamic_entry.digest(),
> +        };
> +        prev_end += size;
> +        indices.push(reusable_dynamic_entry);
> +    }

and this one here could probably be combined?

> +
> +    Ok((indices, padding_start, padding_end))
> +}

e.g., the whole thing could become something like (untested ;)):

    let end_idx = index.index_count() - 1;
    let chunk_end = index.chunk_end(end_idx);
    let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?;

    let mut prev_end = if start == 0 {
        0
    } else {
        index.chunk_end(start - 1)
    };
    let padding_start = range.start - prev_end;
    let mut padding_end = 0;

    let mut indices = Vec::new();
    for dynamic_entry in &index.index()[start..] {
        let end = dynamic_entry.end();
        if range.end < end {
            padding_end = end - range.end;
            break;
        }

        let reusable_dynamic_entry = ReusableDynamicEntry {
            size_le: (end - prev_end).to_le(),
            digest: dynamic_entry.digest(),
        };
        indices.push(reusable_dynamic_entry);
        prev_end = end;
    }

    Ok((indices, padding_start, padding_end))

> +
>  fn get_metadata(
>      fd: RawFd,
>      stat: &FileStat,
> -- 
> 2.39.2
> 
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
> 
> 
> 




More information about the pbs-devel mailing list