[pbs-devel] [PATCH v3 proxmox-backup 37/58] client: pxar: helper for lookup of reusable dynamic entries

Christian Ebner c.ebner at proxmox.com
Thu Apr 4 19:13:28 CEST 2024


On 4/4/24 14:54, Fabian Grünbichler wrote:
> On March 28, 2024 1:36 pm, Christian Ebner wrote:
>> The helper method allows to lookup the entries of a dynamic index
>> which fully cover a given offset range. Further, the helper returns
>> the start padding from the start offset of the dynamic index entry
>> to the start offset of the given range and the end padding.
>>
>> This will be used to lookup size and digest for chunks covering the
>> payload range of a regular file in order to re-use found chunks by
>> indexing them in the archives index file instead of re-encoding the
>> payload.
>>
>> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
>> ---
>> changes since version 2:
>> - moved this from the dynamic index to the pxar create as suggested
>> - refactored and optimized search, going for linear search to find the
>>    end entry
>> - reworded commit message
>>
>>   pbs-client/src/pxar/create.rs | 63 +++++++++++++++++++++++++++++++++++
>>   1 file changed, 63 insertions(+)
>>
>> diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
>> index 2bb5a6253..e2d3954ca 100644
>> --- a/pbs-client/src/pxar/create.rs
>> +++ b/pbs-client/src/pxar/create.rs
>> @@ -2,6 +2,7 @@ use std::collections::{HashMap, HashSet};
>>   use std::ffi::{CStr, CString, OsStr};
>>   use std::fmt;
>>   use std::io::{self, Read};
>> +use std::ops::Range;
>>   use std::os::unix::ffi::OsStrExt;
>>   use std::os::unix::io::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd};
>>   use std::path::{Path, PathBuf};
>> @@ -16,6 +17,7 @@ use nix::fcntl::OFlag;
>>   use nix::sys::stat::{FileStat, Mode};
>>   
>>   use pathpatterns::{MatchEntry, MatchFlag, MatchList, MatchType, PatternFlag};
>> +use pbs_datastore::index::IndexFile;
>>   use proxmox_sys::error::SysError;
>>   use pxar::encoder::{LinkOffset, SeqWrite};
>>   use pxar::Metadata;
>> @@ -25,6 +27,7 @@ use proxmox_lang::c_str;
>>   use proxmox_sys::fs::{self, acl, xattr};
>>   
>>   use pbs_datastore::catalog::BackupCatalogWriter;
>> +use pbs_datastore::dynamic_index::DynamicIndexReader;
>>   
>>   use crate::pxar::metadata::errno_is_unsupported;
>>   use crate::pxar::tools::assert_single_path_component;
>> @@ -791,6 +794,66 @@ impl Archiver {
>>       }
>>   }
>>   
>> +/// Dynamic Entry reusable by payload references
>> +#[derive(Clone, Debug)]
>> +#[repr(C)]
>> +pub struct ReusableDynamicEntry {
>> +    size_le: u64,
>> +    digest: [u8; 32],
>> +}
>> +
>> +impl ReusableDynamicEntry {
>> +    #[inline]
>> +    pub fn size(&self) -> u64 {
>> +        u64::from_le(self.size_le)
>> +    }
>> +
>> +    #[inline]
>> +    pub fn digest(&self) -> [u8; 32] {
>> +        self.digest
>> +    }
>> +}
>> +
>> +/// List of dynamic entries containing the data given by an offset range
>> +fn lookup_dynamic_entries(
>> +    index: &DynamicIndexReader,
>> +    range: Range<u64>,
>> +) -> Result<(Vec<ReusableDynamicEntry>, u64, u64), Error> {
>> +    let end_idx = index.index_count() - 1;
>> +    let chunk_end = index.chunk_end(end_idx);
>> +    let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?;
>> +    let mut end = start;
>> +    while end < end_idx {
>> +        if range.end < index.chunk_end(end) {
>> +            break;
>> +        }
>> +        end += 1;
>> +    }
> 
> this loop here
> 
>> +
>> +    let offset_first = if start == 0 {
>> +        0
>> +    } else {
>> +        index.chunk_end(start - 1)
>> +    };
> 
> offset_first is prev_end, so maybe we could just name it like that from
> the start?
> 
>> +
>> +    let padding_start = range.start - offset_first;
>> +    let padding_end = index.chunk_end(end) - range.end;
>> +
>> +    let mut indices = Vec::new();
>> +    let mut prev_end = offset_first;
>> +    for dynamic_entry in &index.index()[start..end + 1] {
>> +        let size = dynamic_entry.end() - prev_end;
>> +        let reusable_dynamic_entry = ReusableDynamicEntry {
>> +            size_le: size.to_le(),
>> +            digest: dynamic_entry.digest(),
>> +        };
>> +        prev_end += size;
>> +        indices.push(reusable_dynamic_entry);
>> +    }
> 
> and this one here could probably be combined?
> 
>> +
>> +    Ok((indices, padding_start, padding_end))
>> +}
> 
> e.g., the whole thing could become something like (untested ;)):
> 
>      let end_idx = index.index_count() - 1;
>      let chunk_end = index.chunk_end(end_idx);
>      let start = index.binary_search(0, 0, end_idx, chunk_end, range.start)?;
> 
>      let mut prev_end = if start == 0 {
>          0
>      } else {
>          index.chunk_end(start - 1)
>      };
>      let padding_start = range.start - prev_end;
>      let mut padding_end = 0;
> 
>      let mut indices = Vec::new();
>      for dynamic_entry in &index.index()[start..] {
>          let end = dynamic_entry.end();
>          if range.end < end {
>              padding_end = end - range.end;
>              break;
>          }
> 
>          let reusable_dynamic_entry = ReusableDynamicEntry {
>              size_le: (end - prev_end).to_le(),
>              digest: dynamic_entry.digest(),
>          };
>          indices.push(reusable_dynamic_entry);
>          prev_end = end;
>      }
> 
>      Ok((indices, padding_start, padding_end))

Thanks for looking into this so deeply, unfortunately this version leads 
to missing injected chunks in my quick test. Will have a look on where 
the problem is tomorrow.





More information about the pbs-devel mailing list