[pbs-devel] [RFC proxmox-backup] index writers: remove dead code
Christian Ebner
c.ebner at proxmox.com
Wed Oct 29 09:04:38 CET 2025
Thanks for the cleanup, two questions inline.
On 10/27/25 2:55 PM, Fabian Grünbichler wrote:
> the current code base doesn't use the index writers for inserting chunks into a
> chunk store, and it probably shouldn't given the intricate requirements around
> interacting with S3.
>
> all of thise code seems to be dead code, remove it to make reasoning about
> chunk insertion code paths easier.
>
> Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
> ---
> stumbled upon this while thinking about S3 synchronization issues..
> AFAICT this is leftover from the very initial phase of PBS development
>
> pbs-datastore/src/dynamic_index.rs | 142 -----------------------------
> pbs-datastore/src/fixed_index.rs | 38 --------
> 2 files changed, 180 deletions(-)
>
> diff --git a/pbs-datastore/src/dynamic_index.rs b/pbs-datastore/src/dynamic_index.rs
> index ff6c36782..624df0119 100644
> --- a/pbs-datastore/src/dynamic_index.rs
> +++ b/pbs-datastore/src/dynamic_index.rs
> @@ -16,13 +16,10 @@ use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation};
>
> use pbs_tools::lru_cache::LruCache;
>
> -use crate::chunk_stat::ChunkStat;
> use crate::chunk_store::ChunkStore;
> -use crate::data_blob::{DataBlob, DataChunkBuilder};
> use crate::file_formats;
> use crate::index::{ChunkReadInfo, IndexFile};
> use crate::read_chunk::ReadChunk;
> -use crate::{Chunker, ChunkerImpl};
>
> /// Header format definition for dynamic index files (`.dixd`)
> #[repr(C)]
> @@ -275,7 +272,6 @@ impl IndexFile for DynamicIndexReader {
>
> /// Create dynamic index files (`.dixd`)
> pub struct DynamicIndexWriter {
> - store: Arc<ChunkStore>,
> writer: BufWriter<File>,
> closed: bool,
> filename: PathBuf,
> @@ -321,7 +317,6 @@ impl DynamicIndexWriter {
> let csum = Some(openssl::sha::Sha256::new());
>
> Ok(Self {
> - store,
question: not sure if it is fine to drop the chunk store here? The chunk
store holds the process locker, which should outlive the writer ...
> writer,
> closed: false,
> filename: full_path,
> @@ -332,11 +327,6 @@ impl DynamicIndexWriter {
> })
> }
>
> - // fixme: use add_chunk instead?
> - pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
> - self.store.insert_chunk(chunk, digest)
> - }
> -
> pub fn close(&mut self) -> Result<[u8; 32], Error> {
> if self.closed {
> bail!(
> @@ -387,138 +377,6 @@ impl DynamicIndexWriter {
> }
> }
>
> -/// Writer which splits a binary stream into dynamic sized chunks
> -///
> -/// And store the resulting chunk list into the index file.
> -pub struct DynamicChunkWriter {
> - index: DynamicIndexWriter,
> - closed: bool,
> - chunker: ChunkerImpl,
> - stat: ChunkStat,
> - chunk_offset: usize,
> - last_chunk: usize,
> - chunk_buffer: Vec<u8>,
> -}
> -
> -impl DynamicChunkWriter {
> - pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self {
> - Self {
> - index,
> - closed: false,
> - chunker: ChunkerImpl::new(chunk_size),
> - stat: ChunkStat::new(0),
> - chunk_offset: 0,
> - last_chunk: 0,
> - chunk_buffer: Vec::with_capacity(chunk_size * 4),
> - }
> - }
> -
> - pub fn stat(&self) -> &ChunkStat {
> - &self.stat
> - }
> -
> - pub fn close(&mut self) -> Result<(), Error> {
> - if self.closed {
> - return Ok(());
> - }
> -
> - self.closed = true;
> -
> - self.write_chunk_buffer()?;
> -
> - self.index.close()?;
> -
> - self.stat.size = self.chunk_offset as u64;
> -
> - // add size of index file
> - self.stat.size +=
> - (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64;
> -
> - Ok(())
> - }
> -
> - fn write_chunk_buffer(&mut self) -> Result<(), Error> {
> - let chunk_size = self.chunk_buffer.len();
> -
> - if chunk_size == 0 {
> - return Ok(());
> - }
> -
> - let expected_chunk_size = self.chunk_offset - self.last_chunk;
> - if expected_chunk_size != self.chunk_buffer.len() {
> - bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size);
> - }
> -
> - self.stat.chunk_count += 1;
> -
> - self.last_chunk = self.chunk_offset;
> -
> - let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer)
> - .compress(true)
> - .build()?;
> -
> - match self.index.insert_chunk(&chunk, &digest) {
> - Ok((is_duplicate, compressed_size)) => {
> - self.stat.compressed_size += compressed_size;
> - if is_duplicate {
> - self.stat.duplicate_chunks += 1;
> - } else {
> - self.stat.disk_size += compressed_size;
> - }
> -
> - log::info!(
> - "ADD CHUNK {:016x} {} {}% {} {}",
> - self.chunk_offset,
> - chunk_size,
> - (compressed_size * 100) / (chunk_size as u64),
> - is_duplicate,
> - hex::encode(digest)
> - );
> - self.index.add_chunk(self.chunk_offset as u64, &digest)?;
> - self.chunk_buffer.truncate(0);
> - Ok(())
> - }
> - Err(err) => {
> - self.chunk_buffer.truncate(0);
> - Err(err)
> - }
> - }
> - }
> -}
> -
> -impl Write for DynamicChunkWriter {
> - fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
> - let chunker = &mut self.chunker;
> -
> - let ctx = crate::chunker::Context::default();
> - let pos = chunker.scan(data, &ctx);
> -
> - if pos > 0 {
> - self.chunk_buffer.extend_from_slice(&data[0..pos]);
> - self.chunk_offset += pos;
> -
> - if let Err(err) = self.write_chunk_buffer() {
> - return Err(std::io::Error::new(
> - std::io::ErrorKind::Other,
> - err.to_string(),
> - ));
> - }
> - Ok(pos)
> - } else {
> - self.chunk_offset += data.len();
> - self.chunk_buffer.extend_from_slice(data);
> - Ok(data.len())
> - }
> - }
> -
> - fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
> - Err(std::io::Error::new(
> - std::io::ErrorKind::Other,
> - "please use close() instead of flush()",
> - ))
> - }
> -}
> -
> struct CachedChunk {
> range: Range<u64>,
> data: Vec<u8>,
> diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
> index 0f289543f..6c3be2d49 100644
> --- a/pbs-datastore/src/fixed_index.rs
> +++ b/pbs-datastore/src/fixed_index.rs
> @@ -11,9 +11,7 @@ use anyhow::{bail, format_err, Error};
> use proxmox_io::ReadExt;
> use proxmox_uuid::Uuid;
>
> -use crate::chunk_stat::ChunkStat;
> use crate::chunk_store::ChunkStore;
> -use crate::data_blob::ChunkInfo;
> use crate::file_formats;
> use crate::index::{ChunkReadInfo, IndexFile};
>
> @@ -215,7 +213,6 @@ impl IndexFile for FixedIndexReader {
> }
>
> pub struct FixedIndexWriter {
> - store: Arc<ChunkStore>,
> file: File,
> filename: PathBuf,
> tmp_filename: PathBuf,
> @@ -302,7 +299,6 @@ impl FixedIndexWriter {
> .cast::<u8>();
>
> Ok(Self {
> - store,
question: ... same as above?
> file,
> filename: full_path,
> tmp_filename: tmp_path,
> @@ -388,40 +384,6 @@ impl FixedIndexWriter {
> Ok(pos / self.chunk_size)
> }
>
> - // Note: We want to add data out of order, so do not assume any order here.
> - pub fn add_chunk(&mut self, chunk_info: &ChunkInfo, stat: &mut ChunkStat) -> Result<(), Error> {
> - let chunk_len = chunk_info.chunk_len as usize;
> - let offset = chunk_info.offset as usize; // end of chunk
> -
> - let idx = self.check_chunk_alignment(offset, chunk_len)?;
> -
> - let (is_duplicate, compressed_size) = self
> - .store
> - .insert_chunk(&chunk_info.chunk, &chunk_info.digest)?;
> -
> - stat.chunk_count += 1;
> - stat.compressed_size += compressed_size;
> -
> - let digest = &chunk_info.digest;
> -
> - log::info!(
> - "ADD CHUNK {} {} {}% {} {}",
> - idx,
> - chunk_len,
> - (compressed_size * 100) / (chunk_len as u64),
> - is_duplicate,
> - hex::encode(digest)
> - );
> -
> - if is_duplicate {
> - stat.duplicate_chunks += 1;
> - } else {
> - stat.disk_size += compressed_size;
> - }
> -
> - self.add_digest(idx, digest)
> - }
> -
> pub fn add_digest(&mut self, index: usize, digest: &[u8; 32]) -> Result<(), Error> {
> if index >= self.index_length {
> bail!(
More information about the pbs-devel
mailing list