[pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries

Christian Ebner c.ebner at proxmox.com
Fri Apr 5 09:22:42 CEST 2024


On 4/4/24 19:13, Christian Ebner wrote:
> On 4/4/24 14:54, Fabian Grünbichler wrote:
>> On March 28, 2024 1:36 pm, Christian Ebner wrote:
>>> The helper method allows to lookup the entries of a dynamic index
>>> which fully cover a given offset range. Further, the helper returns
>>> the start padding from the start offset of the dynamic index entry
>>> to the start offset of the given range and the end padding.
>>>
>>> This will be used to lookup size and digest for chunks covering the
>>> payload range of a regular file in order to re-use found chunks by
>>> indexing them in the archives index file instead of re-encoding the
>>> payload.
>>>
>>> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
>>> ---
>>> changes since version 2:
>>> - moved this from the dynamic index to the pxar create as suggested
>>> - refactored and optimized search, going for linear search to find the
>>>    end entry
>>> - reworded commit message
>>>
>>>   pbs-client/src/pxar/create.rs | 63 +++++++++++++++++++++++++++++++++++
>>>   1 file changed, 63 insertions(+)
>>>
>>> diff --git a/pbs-client/src/pxar/create.rs 
>>> b/pbs-client/src/pxar/create.rs
>>> index 2bb5a6253..e2d3954ca 100644
>>> --- a/pbs-client/src/pxar/create.rs
>>> +++ b/pbs-client/src/pxar/create.rs
>>> @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet};
>>>   use std::ffi::{CStr, CString, OsStr};
>>>   use std::fmt;
>>>   use std::io::{self, Read};
>>> +use std::ops::Range;
>>>   use std::os::unix::ffi::OsStrExt;
>>>   use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, 
>>> RawFd};
>>>   use std::path::{Path, PathBuf};
>>> @@ -16,6 +17,7 @@ use nix::fcntl::OFlag;
>>>   use nix::sys::stat::{FileStat, Mode};
>>>   use pathpatterns::{MatchEntry, MatchFlag, MatchList, MatchType, 
>>> PatternFlag};
>>> +use pbs_datastore::index::IndexFile;
>>>   use proxmox_sys::error::SysError;
>>>   use pxar::encoder::{LinkOffset, SeqWrite};
>>>   use pxar::Metadata;
>>> @@ -25,6 +27,7 @@ use proxmox_lang::c_str;
>>>   use proxmox_sys::fs::{self, acl, xattr};
>>>   use pbs_datastore::catalog::BackupCatalogWriter;
>>> +use pbs_datastore::dynamic_index::DynamicIndexReader;
>>>   use crate::pxar::metadata::errno_is_unsupported;
>>>   use crate::pxar::tools::assert_single_path_component;
>>> @@ -791,6 +794,66 @@ impl Archiver {
>>>       }
>>>   }
>>> +/// Dynamic Entry reusable by payload references
>>> +#[derive(Clone, Debug)]
>>> +#[repr(C)]
>>> +pub struct ReusableDynamicEntry {
>>> +    size_le: u64,
>>> +    digest: [u8; 32],
>>> +}
>>> +
>>> +impl ReusableDynamicEntry {
>>> +    #[inline]
>>> +    pub fn size(&self) -> u64 {
>>> +        u64::from_le(self.size_le)
>>> +    }
>>> +
>>> +    #[inline]
>>> +    pub fn digest(&self) -> [u8; 32] {
>>> +        self.digest
>>> +    }
>>> +}
>>> +
>>> +/// List of dynamic entries containing the data given by an offset 
>>> range
>>> +fn lookup_dynamic_entries(
>>> +    index: &DynamicIndexReader,
>>> +    range: Range<u64>,
>>> +) -> Result<(Vec<ReusableDynamicEntry>, u64, u64), Error> {
>>> +    let end_idx = index.index_count() - 1;
>>> +    let chunk_end = index.chunk_end(end_idx);
>>> +    let start = index.binary_search(0, 0, end_idx, chunk_end, 
>>> range.start)?;
>>> +    let mut end = start;
>>> +    while end < end_idx {
>>> +        if range.end < index.chunk_end(end) {
>>> +            break;
>>> +        }
>>> +        end += 1;
>>> +    }
>>
>> this loop here
>>
>>> +
>>> +    let offset_first = if start == 0 {
>>> +        0
>>> +    } else {
>>> +        index.chunk_end(start - 1)
>>> +    };
>>
>> offset_first is prev_end, so maybe we could just name it like that from
>> the start?
>>
>>> +
>>> +    let padding_start = range.start - offset_first;
>>> +    let padding_end = index.chunk_end(end) - range.end;
>>> +
>>> +    let mut indices = Vec::new();
>>> +    let mut prev_end = offset_first;
>>> +    for dynamic_entry in &index.index()[start..end + 1] {
>>> +        let size = dynamic_entry.end() - prev_end;
>>> +        let reusable_dynamic_entry = ReusableDynamicEntry {
>>> +            size_le: size.to_le(),
>>> +            digest: dynamic_entry.digest(),
>>> +        };
>>> +        prev_end += size;
>>> +        indices.push(reusable_dynamic_entry);
>>> +    }
>>
>> and this one here could probably be combined?
>>
>>> +
>>> +    Ok((indices, padding_start, padding_end))
>>> +}
>>
>> e.g., the whole thing could become something like (untested ;)):
>>
>>      let end_idx = index.index_count() - 1;
>>      let chunk_end = index.chunk_end(end_idx);
>>      let start = index.binary_search(0, 0, end_idx, chunk_end, 
>> range.start)?;
>>
>>      let mut prev_end = if start == 0 {
>>          0
>>      } else {
>>          index.chunk_end(start - 1)
>>      };
>>      let padding_start = range.start - prev_end;
>>      let mut padding_end = 0;
>>
>>      let mut indices = Vec::new();
>>      for dynamic_entry in &index.index()[start..] {
>>          let end = dynamic_entry.end();
>>          if range.end < end {
>>              padding_end = end - range.end;
>>              break;
>>          }
>>
>>          let reusable_dynamic_entry = ReusableDynamicEntry {
>>              size_le: (end - prev_end).to_le(),
>>              digest: dynamic_entry.digest(),
>>          };
>>          indices.push(reusable_dynamic_entry);
>>          prev_end = end;
>>      }
>>
>>      Ok((indices, padding_start, padding_end))
> 
> Thanks for looking into this so deeply, unfortunately this version leads 
> to missing injected chunks in my quick test. Will have a look on where 
> the problem is tomorrow.

Just had to move the pushing of the final chunk to before the end check. 
Will include this in the next version of the patches, thanks a lot for 
the optimization!





More information about the pbs-devel mailing list