[pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries
Christian Ebner
c.ebner at proxmox.com
Fri Apr 5 09:22:42 CEST 2024
On 4/4/24 19:13, Christian Ebner wrote:
> On 4/4/24 14:54, Fabian Grünbichler wrote:
>> On March 28, 2024 1:36 pm, Christian Ebner wrote:
>>> The helper method allows to lookup the entries of a dynamic index
>>> which fully cover a given offset range. Further, the helper returns
>>> the start padding from the start offset of the dynamic index entry
>>> to the start offset of the given range and the end padding.
>>>
>>> This will be used to lookup size and digest for chunks covering the
>>> payload range of a regular file in order to re-use found chunks by
>>> indexing them in the archives index file instead of re-encoding the
>>> payload.
>>>
>>> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
>>> ---
>>> changes since version 2:
>>> - moved this from the dynamic index to the pxar create as suggested
>>> - refactored and optimized search, going for linear search to find the
>>> end entry
>>> - reworded commit message
>>>
>>> pbs-client/src/pxar/create.rs | 63 +++++++++++++++++++++++++++++++++++
>>> 1 file changed, 63 insertions(+)
>>>
>>> diff --git a/pbs-client/src/pxar/create.rs
>>> b/pbs-client/src/pxar/create.rs
>>> index 2bb5a6253..e2d3954ca 100644
>>> --- a/pbs-client/src/pxar/create.rs
>>> +++ b/pbs-client/src/pxar/create.rs
>>> @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet};
>>> use std::ffi::{CStr, CString, OsStr};
>>> use std::fmt;
>>> use std::io::{self, Read};
>>> +use std::ops::Range;
>>> use std::os::unix::ffi::OsStrExt;
>>> use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd,
>>> RawFd};
>>> use std::path::{Path, PathBuf};
>>> @@ -16,6 +17,7 @@ use nix::fcntl::OFlag;
>>> use nix::sys::stat::{FileStat, Mode};
>>> use pathpatterns::{MatchEntry, MatchFlag, MatchList, MatchType,
>>> PatternFlag};
>>> +use pbs_datastore::index::IndexFile;
>>> use proxmox_sys::error::SysError;
>>> use pxar::encoder::{LinkOffset, SeqWrite};
>>> use pxar::Metadata;
>>> @@ -25,6 +27,7 @@ use proxmox_lang::c_str;
>>> use proxmox_sys::fs::{self, acl, xattr};
>>> use pbs_datastore::catalog::BackupCatalogWriter;
>>> +use pbs_datastore::dynamic_index::DynamicIndexReader;
>>> use crate::pxar::metadata::errno_is_unsupported;
>>> use crate::pxar::tools::assert_single_path_component;
>>> @@ -791,6 +794,66 @@ impl Archiver {
>>> }
>>> }
>>> +/// Dynamic Entry reusable by payload references
>>> +#[derive(Clone, Debug)]
>>> +#[repr(C)]
>>> +pub struct ReusableDynamicEntry {
>>> + size_le: u64,
>>> + digest: [u8; 32],
>>> +}
>>> +
>>> +impl ReusableDynamicEntry {
>>> + #[inline]
>>> + pub fn size(&self) -> u64 {
>>> + u64::from_le(self.size_le)
>>> + }
>>> +
>>> + #[inline]
>>> + pub fn digest(&self) -> [u8; 32] {
>>> + self.digest
>>> + }
>>> +}
>>> +
>>> +/// List of dynamic entries containing the data given by an offset
>>> range
>>> +fn lookup_dynamic_entries(
>>> + index: &DynamicIndexReader,
>>> + range: Range<u64>,
>>> +) -> Result<(Vec<ReusableDynamicEntry>, u64, u64), Error> {
>>> + let end_idx = index.index_count() - 1;
>>> + let chunk_end = index.chunk_end(end_idx);
>>> + let start = index.binary_search(0, 0, end_idx, chunk_end,
>>> range.start)?;
>>> + let mut end = start;
>>> + while end < end_idx {
>>> + if range.end < index.chunk_end(end) {
>>> + break;
>>> + }
>>> + end += 1;
>>> + }
>>
>> this loop here
>>
>>> +
>>> + let offset_first = if start == 0 {
>>> + 0
>>> + } else {
>>> + index.chunk_end(start - 1)
>>> + };
>>
>> offset_first is prev_end, so maybe we could just name it like that from
>> the start?
>>
>>> +
>>> + let padding_start = range.start - offset_first;
>>> + let padding_end = index.chunk_end(end) - range.end;
>>> +
>>> + let mut indices = Vec::new();
>>> + let mut prev_end = offset_first;
>>> + for dynamic_entry in &index.index()[start..end + 1] {
>>> + let size = dynamic_entry.end() - prev_end;
>>> + let reusable_dynamic_entry = ReusableDynamicEntry {
>>> + size_le: size.to_le(),
>>> + digest: dynamic_entry.digest(),
>>> + };
>>> + prev_end += size;
>>> + indices.push(reusable_dynamic_entry);
>>> + }
>>
>> and this one here could probably be combined?
>>
>>> +
>>> + Ok((indices, padding_start, padding_end))
>>> +}
>>
>> e.g., the whole thing could become something like (untested ;)):
>>
>> let end_idx = index.index_count() - 1;
>> let chunk_end = index.chunk_end(end_idx);
>> let start = index.binary_search(0, 0, end_idx, chunk_end,
>> range.start)?;
>>
>> let mut prev_end = if start == 0 {
>> 0
>> } else {
>> index.chunk_end(start - 1)
>> };
>> let padding_start = range.start - prev_end;
>> let mut padding_end = 0;
>>
>> let mut indices = Vec::new();
>> for dynamic_entry in &index.index()[start..] {
>> let end = dynamic_entry.end();
>> if range.end < end {
>> padding_end = end - range.end;
>> break;
>> }
>>
>> let reusable_dynamic_entry = ReusableDynamicEntry {
>> size_le: (end - prev_end).to_le(),
>> digest: dynamic_entry.digest(),
>> };
>> indices.push(reusable_dynamic_entry);
>> prev_end = end;
>> }
>>
>> Ok((indices, padding_start, padding_end))
>
> Thanks for looking into this so deeply, unfortunately this version leads
> to missing injected chunks in my quick test. Will have a look on where
> the problem is tomorrow.
Just had to move the pushing of the final chunk to before the end check.
Will include this in the next version of the patches, thanks a lot for
the optimization!
More information about the pbs-devel
mailing list