[pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries
Fabian Grünbichler
f.gruenbichler at proxmox.com
Thu Apr 4 14:54:53 CEST 2024
On March 28, 2024 1:36 pm, Christian Ebner wrote:
> The helper method allows to lookup the entries of a dynamic index
> which fully cover a given offset range. Further, the helper returns
> the start padding from the start offset of the dynamic index entry
> to the start offset of the given range and the end padding.
>
> This will be used to lookup size and digest for chunks covering the
> payload range of a regular file in order to re-use found chunks by
> indexing them in the archives index file instead of re-encoding the
> payload.
>
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> changes since version 2:
> - moved this from the dynamic index to the pxar create as suggested
> - refactored and optimized search, going for linear search to find the
> end entry
> - reworded commit message
>
> pbs-client/src/pxar/create.rs | 63 +++++++++++++++++++++++++++++++++++
> 1 file changed, 63 insertions(+)
>
> diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
> index 2bb5a6253..e2d3954ca 100644
> --- a/pbs-client/src/pxar/create.rs
> +++ b/pbs-client/src/pxar/create.rs
> @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet};
> use std::ffi::{CStr, CString, OsStr};
> use std::fmt;
> use std::io::{self, Read};
> +use std::ops::Range;
> use std::os::unix::ffi::OsStrExt;
> use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
> use std::path::{Path, PathBuf};
> @@ -16,6 +17,7 @@ use nix::fcntl::OFlag;
> use nix::sys::stat::{FileStat, Mode};
>
> use pathpatterns::{MatchEntry, MatchFlag, MatchList, MatchType, PatternFlag};
> +use pbs_datastore::index::IndexFile;
> use proxmox_sys::error::SysError;
> use pxar::encoder::{LinkOffset, SeqWrite};
> use pxar::Metadata;
> @@ -25,6 +27,7 @@ use proxmox_lang::c_str;
> use proxmox_sys::fs::{self, acl, xattr};
>
> use pbs_datastore::catalog::BackupCatalogWriter;
> +use pbs_datastore::dynamic_index::DynamicIndexReader;
>
> use crate::pxar::metadata::errno_is_unsupported;
> use crate::pxar::tools::assert_single_path_component;
> @@ -791,6 +794,66 @@ impl Archiver {
> }
> }
>
> +/// Dynamic Entry reusable by payload references
> +#[derive(Clone, Debug)]
> +#[repr(C)]
> +pub struct ReusableDynamicEntry {
> + size_le: u64,
> + digest: [u8; 32],
> +}
> +
> +impl ReusableDynamicEntry {
> + #[inline]
> + pub fn size(&self) -> u64 {
> + u64::from_le(self.size_le)
> + }
> +
> + #[inline]
> + pub fn digest(&self) -> [u8; 32] {
> + self.digest
> + }
> +}
> +
> +/// List of dynamic entries containing the data given by an offset range
> +fn lookup_dynamic_entries(
> + index: &DynamicIndexReader,
> + range: Range<u64>,
> +) -> Result<(Vec<ReusableDynamicEntry>, u64, u64), Error> {
> + let end_idx = index.index_count() - 1;
> + let chunk_end = index.chunk_end(end_idx);
> + let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?;
> + let mut end = start;
> + while end < end_idx {
> + if range.end < index.chunk_end(end) {
> + break;
> + }
> + end += 1;
> + }
this loop here
> +
> + let offset_first = if start == 0 {
> + 0
> + } else {
> + index.chunk_end(start - 1)
> + };
offset_first is prev_end, so maybe we could just name it like that from
the start?
> +
> + let padding_start = range.start - offset_first;
> + let padding_end = index.chunk_end(end) - range.end;
> +
> + let mut indices = Vec::new();
> + let mut prev_end = offset_first;
> + for dynamic_entry in &index.index()[start..end + 1] {
> + let size = dynamic_entry.end() - prev_end;
> + let reusable_dynamic_entry = ReusableDynamicEntry {
> + size_le: size.to_le(),
> + digest: dynamic_entry.digest(),
> + };
> + prev_end += size;
> + indices.push(reusable_dynamic_entry);
> + }
and this one here could probably be combined?
> +
> + Ok((indices, padding_start, padding_end))
> +}
e.g., the whole thing could become something like (untested ;)):
let end_idx = index.index_count() - 1;
let chunk_end = index.chunk_end(end_idx);
let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?;
let mut prev_end = if start == 0 {
0
} else {
index.chunk_end(start - 1)
};
let padding_start = range.start - prev_end;
let mut padding_end = 0;
let mut indices = Vec::new();
for dynamic_entry in &index.index()[start..] {
let end = dynamic_entry.end();
if range.end < end {
padding_end = end - range.end;
break;
}
let reusable_dynamic_entry = ReusableDynamicEntry {
size_le: (end - prev_end).to_le(),
digest: dynamic_entry.digest(),
};
indices.push(reusable_dynamic_entry);
prev_end = end;
}
Ok((indices, padding_start, padding_end))
> +
> fn get_metadata(
> fd: RawFd,
> stat: &FileStat,
> --
> 2.39.2
>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
>
More information about the pbs-devel
mailing list