[pbs-devel] [PATCH v2 proxmox-backup 1/5] fix #3847: datastore: support writing fidx files of unknown size
Robert Obkircher
r.obkircher at proxmox.com
Fri Dec 19 17:18:28 CET 2025
Use mremap and ftruncate to support growable FixedIndexWriters. Grow
exponentially from a small initial index size for efficiency. Truncate
excessive capacity after encountering a non-full block or on close.
Signed-off-by: Robert Obkircher <r.obkircher at proxmox.com>
---
pbs-datastore/src/datastore.rs | 2 +-
pbs-datastore/src/fixed_index.rs | 98 ++++++++++++++++++++++++++++++--
2 files changed, 93 insertions(+), 7 deletions(-)
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 9c57aaac..af712726 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -591,7 +591,7 @@ impl DataStore {
pub fn create_fixed_writer<P: AsRef<Path>>(
&self,
filename: P,
- size: usize,
+ size: Option<usize>,
chunk_size: usize,
) -> Result<FixedIndexWriter, Error> {
let index = FixedIndexWriter::create(
diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
index 6c3be2d4..42b97464 100644
--- a/pbs-datastore/src/fixed_index.rs
+++ b/pbs-datastore/src/fixed_index.rs
@@ -1,6 +1,7 @@
use std::fs::File;
use std::io::Write;
use std::io::{Seek, SeekFrom};
+use std::os::unix::fs::FileExt;
use std::os::unix::io::AsRawFd;
use std::path::{Path, PathBuf};
use std::ptr::NonNull;
@@ -222,6 +223,8 @@ pub struct FixedIndexWriter {
index: *mut u8,
pub uuid: [u8; 16],
pub ctime: i64,
+ growable_size: bool,
+ write_size_on_close: bool,
}
// `index` is mmap()ed which cannot be thread-local so should be sendable
@@ -237,12 +240,15 @@ impl Drop for FixedIndexWriter {
}
impl FixedIndexWriter {
+ // TODO: this is deliberately small at the moment to test resizing
+ const INITIAL_CHUNKS_IF_UNKNOWN: usize = 4;
+
#[allow(clippy::cast_ptr_alignment)]
// Requires obtaining a shared chunk store lock beforehand
pub fn create(
store: Arc<ChunkStore>,
path: &Path,
- size: usize,
+ known_size: Option<usize>,
chunk_size: usize,
) -> Result<Self, Error> {
let full_path = store.relative_path(path);
@@ -264,6 +270,7 @@ impl FixedIndexWriter {
}
let ctime = proxmox_time::epoch_i64();
+ let size = known_size.unwrap_or(0);
let uuid = Uuid::generate();
@@ -280,7 +287,9 @@ impl FixedIndexWriter {
file.write_all(&buffer)?;
- let index_length = size.div_ceil(chunk_size);
+ let index_length = known_size
+ .map(|s| s.div_ceil(chunk_size))
+ .unwrap_or(Self::INITIAL_CHUNKS_IF_UNKNOWN);
let index_size = index_length * 32;
nix::unistd::ftruncate(&file, (header_size + index_size) as i64)?;
@@ -308,11 +317,69 @@ impl FixedIndexWriter {
index: data,
ctime,
uuid: *uuid.as_bytes(),
+ growable_size: known_size.is_none(),
+ write_size_on_close: known_size.is_none(),
})
}
+ fn resize_index(&mut self, new_index_length: usize) -> Result<(), Error> {
+ let old_index_size = self.index_length * 32;
+
+ let header_size = std::mem::size_of::<FixedIndexHeader>();
+ let new_index_size = new_index_length * 32;
+ let new_file_size = (header_size + new_index_size) as i64;
+
+ let index_addr = NonNull::new(self.index as *mut std::ffi::c_void).ok_or_else(|| {
+ format_err!("Can't resize FixedIndexWriter index because the mmap pointer is null.")
+ })?;
+
+ nix::unistd::ftruncate(&self.file, new_file_size)?;
+
+ let new_index = unsafe {
+ nix::sys::mman::mremap(
+ index_addr,
+ old_index_size,
+ new_index_size,
+ nix::sys::mman::MRemapFlags::MREMAP_MAYMOVE,
+ None,
+ )
+ }?
+ .as_ptr()
+ .cast::<u8>();
+
+ self.index = new_index;
+ self.index_length = new_index_length;
+
+ Ok(())
+ }
+
+ pub fn grow_to_size(&mut self, requested: usize) -> Result<(), Error> {
+ if self.size < requested {
+ if !self.growable_size {
+ bail!("refusing to resize from {} to {}", self.size, requested);
+ }
+ let len = requested.div_ceil(self.chunk_size);
+ if len * self.chunk_size != requested {
+ self.growable_size = false; // ensures only the last chunk can be smaller
+ self.resize_index(len)?;
+ } else {
+ // grow by 1.5x
+ let mut new_len = self.index_length.max(2);
+ while new_len < len {
+ new_len += new_len / 2;
+ }
+ debug_assert!(new_len * self.chunk_size >= requested);
+ self.resize_index(new_len)?;
+ }
+ self.size = requested;
+ }
+ Ok(())
+ }
+
pub fn index_length(&self) -> usize {
- self.index_length
+ let len = self.size.div_ceil(self.chunk_size);
+ assert!((self.write_size_on_close && len <= self.index_length) || len == self.index_length);
+ len
}
fn unmap(&mut self) -> Result<(), Error> {
@@ -336,15 +403,26 @@ impl FixedIndexWriter {
bail!("cannot close already closed index file.");
}
- let index_size = self.index_length * 32;
+ let used_index_length = self.index_length();
+ let index_size = used_index_length * 32;
let data = unsafe { std::slice::from_raw_parts(self.index, index_size) };
let index_csum = openssl::sha::sha256(data);
self.unmap()?;
+ if used_index_length < self.index_length {
+ let header_size = std::mem::size_of::<FixedIndexHeader>();
+ nix::unistd::ftruncate(&self.file, (header_size + index_size) as i64)?;
+ self.index_length = used_index_length;
+ }
+
let csum_offset = std::mem::offset_of!(FixedIndexHeader, index_csum);
- self.file.seek(SeekFrom::Start(csum_offset as u64))?;
- self.file.write_all(&index_csum)?;
+ self.file.write_all_at(&index_csum, csum_offset as u64)?;
+ if self.write_size_on_close {
+ let size_offset = std::mem::offset_of!(FixedIndexHeader, size);
+ self.file
+ .write_all_at(&(self.size as u64).to_le_bytes(), size_offset as u64)?;
+ }
self.file.flush()?;
if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
@@ -407,6 +485,14 @@ impl FixedIndexWriter {
}
pub fn clone_data_from(&mut self, reader: &FixedIndexReader) -> Result<(), Error> {
+ if self.growable_size {
+ bail!("reusing the index is only supported with a fixed size");
+ }
+
+ if self.chunk_size != reader.chunk_size {
+ bail!("chunk size mismatch");
+ }
+
if self.index_length != reader.index_count() {
bail!("clone_data_from failed - index sizes not equal");
}
--
2.47.3
More information about the pbs-devel
mailing list