[pbs-devel] [PATCH v2 proxmox-backup 1/5] fix #3847: datastore: support writing fidx files of unknown size
Christian Ebner
c.ebner at proxmox.com
Thu Jan 8 11:44:08 CET 2026
some nits inline
On 12/19/25 5:19 PM, Robert Obkircher wrote:
> Use mremap and ftruncate to support growable FixedIndexWriters. Grow
> exponentially from a small initial index size for efficiency. Truncate
> excessive capacity after encountering a non-full block or on close.
>
> Signed-off-by: Robert Obkircher <r.obkircher at proxmox.com>
> ---
> pbs-datastore/src/datastore.rs | 2 +-
> pbs-datastore/src/fixed_index.rs | 98 ++++++++++++++++++++++++++++++--
> 2 files changed, 93 insertions(+), 7 deletions(-)
>
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
> index 9c57aaac..af712726 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -591,7 +591,7 @@ impl DataStore {
> pub fn create_fixed_writer<P: AsRef<Path>>(
> &self,
> filename: P,
> - size: usize,
> + size: Option<usize>,
> chunk_size: usize,
> ) -> Result<FixedIndexWriter, Error> {
> let index = FixedIndexWriter::create(
> diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
> index 6c3be2d4..42b97464 100644
> --- a/pbs-datastore/src/fixed_index.rs
> +++ b/pbs-datastore/src/fixed_index.rs
> @@ -1,6 +1,7 @@
> use std::fs::File;
> use std::io::Write;
> use std::io::{Seek, SeekFrom};
> +use std::os::unix::fs::FileExt;
> use std::os::unix::io::AsRawFd;
> use std::path::{Path, PathBuf};
> use std::ptr::NonNull;
> @@ -222,6 +223,8 @@ pub struct FixedIndexWriter {
> index: *mut u8,
> pub uuid: [u8; 16],
> pub ctime: i64,
> + growable_size: bool,
> + write_size_on_close: bool,
> }
>
> // `index` is mmap()ed which cannot be thread-local so should be sendable
> @@ -237,12 +240,15 @@ impl Drop for FixedIndexWriter {
> }
>
> impl FixedIndexWriter {
> + // TODO: this is deliberately small at the moment to test resizing
> + const INITIAL_CHUNKS_IF_UNKNOWN: usize = 4;
nit: this is actually the initial index length, so maybe
INITIAL_INDEX_LENGTH or FALLBACK_DEFAULT_INDEX_LENGTH?
> +
> #[allow(clippy::cast_ptr_alignment)]
> // Requires obtaining a shared chunk store lock beforehand
> pub fn create(
> store: Arc<ChunkStore>,
> path: &Path,
> - size: usize,
> + known_size: Option<usize>,
> chunk_size: usize,
> ) -> Result<Self, Error> {
> let full_path = store.relative_path(path);
> @@ -264,6 +270,7 @@ impl FixedIndexWriter {
> }
>
> let ctime = proxmox_time::epoch_i64();
> + let size = known_size.unwrap_or(0);
>
> let uuid = Uuid::generate();
>
> @@ -280,7 +287,9 @@ impl FixedIndexWriter {
>
> file.write_all(&buffer)?;
>
> - let index_length = size.div_ceil(chunk_size);
> + let index_length = known_size
> + .map(|s| s.div_ceil(chunk_size))
> + .unwrap_or(Self::INITIAL_CHUNKS_IF_UNKNOWN);
> let index_size = index_length * 32;
> nix::unistd::ftruncate(&file, (header_size + index_size) as i64)?;
>
> @@ -308,11 +317,69 @@ impl FixedIndexWriter {
> index: data,
> ctime,
> uuid: *uuid.as_bytes(),
> + growable_size: known_size.is_none(),
> + write_size_on_close: known_size.is_none(),
> })
> }
>
> + fn resize_index(&mut self, new_index_length: usize) -> Result<(), Error> {
> + let old_index_size = self.index_length * 32;
> +
> + let header_size = std::mem::size_of::<FixedIndexHeader>();
> + let new_index_size = new_index_length * 32;
> + let new_file_size = (header_size + new_index_size) as i64;
> +
> + let index_addr = NonNull::new(self.index as *mut std::ffi::c_void).ok_or_else(|| {
> + format_err!("Can't resize FixedIndexWriter index because the mmap pointer is null.")
> + })?;
> +
> + nix::unistd::ftruncate(&self.file, new_file_size)?;
> +
> + let new_index = unsafe {
> + nix::sys::mman::mremap(
> + index_addr,
> + old_index_size,
> + new_index_size,
> + nix::sys::mman::MRemapFlags::MREMAP_MAYMOVE,
> + None,
> + )
> + }?
> + .as_ptr()
> + .cast::<u8>();
> +
> + self.index = new_index;
> + self.index_length = new_index_length;
> +
> + Ok(())
> + }
> +
nit: include a docstring for this method, although not present for the
pub methods we should aim to add them.
> + pub fn grow_to_size(&mut self, requested: usize) -> Result<(), Error> {
> + if self.size < requested {
> + if !self.growable_size {
> + bail!("refusing to resize from {} to {}", self.size, requested);
> + }
> + let len = requested.div_ceil(self.chunk_size);
> + if len * self.chunk_size != requested {
> + self.growable_size = false; // ensures only the last chunk can be smaller
> + self.resize_index(len)?;
> + } else {
question: what is the reason for the 1.5 factor, why not e.g. doubling
the length? Is max virtual memory a concern?
> + // grow by 1.5x
> + let mut new_len = self.index_length.max(2);
> + while new_len < len {
> + new_len += new_len / 2;
> + }
> + debug_assert!(new_len * self.chunk_size >= requested);
> + self.resize_index(new_len)?;
> + }
> + self.size = requested;
> + }
> + Ok(())
> + }
> +
nit: this now is the current index length and will change when growing,
so the method name should reflect that
> pub fn index_length(&self) -> usize {
> - self.index_length
> + let len = self.size.div_ceil(self.chunk_size);
nit: I think we should avoid the possible panic here, and return an
error instead. Although it is clear that this should never happen under
normal operations.
> + assert!((self.write_size_on_close && len <= self.index_length) || len == self.index_length);
> + len
> }
>
> fn unmap(&mut self) -> Result<(), Error> {
> @@ -336,15 +403,26 @@ impl FixedIndexWriter {
> bail!("cannot close already closed index file.");
> }
>
> - let index_size = self.index_length * 32;
> + let used_index_length = self.index_length();
> + let index_size = used_index_length * 32;
> let data = unsafe { std::slice::from_raw_parts(self.index, index_size) };
> let index_csum = openssl::sha::sha256(data);
>
> self.unmap()?;
>
> + if used_index_length < self.index_length {
> + let header_size = std::mem::size_of::<FixedIndexHeader>();
> + nix::unistd::ftruncate(&self.file, (header_size + index_size) as i64)?;
> + self.index_length = used_index_length;
> + }
> +
> let csum_offset = std::mem::offset_of!(FixedIndexHeader, index_csum);
> - self.file.seek(SeekFrom::Start(csum_offset as u64))?;
> - self.file.write_all(&index_csum)?;
> + self.file.write_all_at(&index_csum, csum_offset as u64)?;
nit: the changes above are a bit independent and might be pulled out
into their own patch
> + if self.write_size_on_close {
> + let size_offset = std::mem::offset_of!(FixedIndexHeader, size);
> + self.file
> + .write_all_at(&(self.size as u64).to_le_bytes(), size_offset as u64)?;
> + }
> self.file.flush()?;
>
> if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
> @@ -407,6 +485,14 @@ impl FixedIndexWriter {
> }
>
> pub fn clone_data_from(&mut self, reader: &FixedIndexReader) -> Result<(), Error> {
> + if self.growable_size {
nit: this error might be misunderstood, as the backup is using fixed
size chunking. So maybe better to reword this to e.g.
"reusing the fixed index is only supported with known input size"
Further, this might be
> + bail!("reusing the index is only supported with a fixed size");
> + }
> +
> + if self.chunk_size != reader.chunk_size {
> + bail!("chunk size mismatch");
> + }
> +
> if self.index_length != reader.index_count() {
> bail!("clone_data_from failed - index sizes not equal");
> }
More information about the pbs-devel
mailing list