[pbs-devel] [RFC proxmox-backup] index writers: remove dead code

Thu Oct 30 11:25:27 CET 2025

On October 29, 2025 9:04 am, Christian Ebner wrote:
> Thanks for the cleanup, two questions inline.
> 
> On 10/27/25 2:55 PM, Fabian Grünbichler wrote:
>> the current code base doesn't use the index writers for inserting chunks into a
>> chunk store, and it probably shouldn't given the intricate requirements around
>> interacting with S3.
>> 
>> all of thise code seems to be dead code, remove it to make reasoning about
>> chunk insertion code paths easier.
>> 
>> Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
>> ---
>> stumbled upon this while thinking about S3 synchronization issues..
>> AFAICT this is leftover from the very initial phase of PBS development
>> 
>>   pbs-datastore/src/dynamic_index.rs | 142 -----------------------------
>>   pbs-datastore/src/fixed_index.rs   |  38 --------
>>   2 files changed, 180 deletions(-)
>> 
>> diff --git a/pbs-datastore/src/dynamic_index.rs b/pbs-datastore/src/dynamic_index.rs
>> index ff6c36782..624df0119 100644
>> --- a/pbs-datastore/src/dynamic_index.rs
>> +++ b/pbs-datastore/src/dynamic_index.rs
>> @@ -16,13 +16,10 @@ use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation};
>>   
>>   use pbs_tools::lru_cache::LruCache;
>>   
>> -use crate::chunk_stat::ChunkStat;
>>   use crate::chunk_store::ChunkStore;
>> -use crate::data_blob::{DataBlob, DataChunkBuilder};
>>   use crate::file_formats;
>>   use crate::index::{ChunkReadInfo, IndexFile};
>>   use crate::read_chunk::ReadChunk;
>> -use crate::{Chunker, ChunkerImpl};
>>   
>>   /// Header format definition for dynamic index files (`.dixd`)
>>   #[repr(C)]
>> @@ -275,7 +272,6 @@ impl IndexFile for DynamicIndexReader {
>>   
>>   /// Create dynamic index files (`.dixd`)
>>   pub struct DynamicIndexWriter {
>> -    store: Arc<ChunkStore>,
>>       writer: BufWriter<File>,
>>       closed: bool,
>>       filename: PathBuf,
>> @@ -321,7 +317,6 @@ impl DynamicIndexWriter {
>>           let csum = Some(openssl::sha::Sha256::new());
>>   
>>           Ok(Self {
>> -            store,
> 
> question: not sure if it is fine to drop the chunk store here? The chunk 
> store holds the process locker, which should outlive the writer ...

technically true, but

the writer can already outlive the lock, this is only protected by how
we are calling things, no matter whether a reference to the chunk store
is stored inside the writer or not. the actual lock guard (which
releases the lock when dropped) is what counts, and that is stored in
the backup writer env, not in the index writer here.. (the env also
contains a reference to the chunk store via the reference to the
datastore).

or am I missing something?

> 
>>               writer,
>>               closed: false,
>>               filename: full_path,
>> @@ -332,11 +327,6 @@ impl DynamicIndexWriter {
>>           })
>>       }
>>   
>> -    // fixme: use add_chunk instead?
>> -    pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
>> -        self.store.insert_chunk(chunk, digest)
>> -    }
>> -
>>       pub fn close(&mut self) -> Result<[u8; 32], Error> {
>>           if self.closed {
>>               bail!(
>> @@ -387,138 +377,6 @@ impl DynamicIndexWriter {
>>       }
>>   }
>>   
>> -/// Writer which splits a binary stream into dynamic sized chunks
>> -///
>> -/// And store the resulting chunk list into the index file.
>> -pub struct DynamicChunkWriter {
>> -    index: DynamicIndexWriter,
>> -    closed: bool,
>> -    chunker: ChunkerImpl,
>> -    stat: ChunkStat,
>> -    chunk_offset: usize,
>> -    last_chunk: usize,
>> -    chunk_buffer: Vec<u8>,
>> -}
>> -
>> -impl DynamicChunkWriter {
>> -    pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self {
>> -        Self {
>> -            index,
>> -            closed: false,
>> -            chunker: ChunkerImpl::new(chunk_size),
>> -            stat: ChunkStat::new(0),
>> -            chunk_offset: 0,
>> -            last_chunk: 0,
>> -            chunk_buffer: Vec::with_capacity(chunk_size * 4),
>> -        }
>> -    }
>> -
>> -    pub fn stat(&self) -> &ChunkStat {
>> -        &self.stat
>> -    }
>> -
>> -    pub fn close(&mut self) -> Result<(), Error> {
>> -        if self.closed {
>> -            return Ok(());
>> -        }
>> -
>> -        self.closed = true;
>> -
>> -        self.write_chunk_buffer()?;
>> -
>> -        self.index.close()?;
>> -
>> -        self.stat.size = self.chunk_offset as u64;
>> -
>> -        // add size of index file
>> -        self.stat.size +=
>> -            (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64;
>> -
>> -        Ok(())
>> -    }
>> -
>> -    fn write_chunk_buffer(&mut self) -> Result<(), Error> {
>> -        let chunk_size = self.chunk_buffer.len();
>> -
>> -        if chunk_size == 0 {
>> -            return Ok(());
>> -        }
>> -
>> -        let expected_chunk_size = self.chunk_offset - self.last_chunk;
>> -        if expected_chunk_size != self.chunk_buffer.len() {
>> -            bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size);
>> -        }
>> -
>> -        self.stat.chunk_count += 1;
>> -
>> -        self.last_chunk = self.chunk_offset;
>> -
>> -        let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer)
>> -            .compress(true)
>> -            .build()?;
>> -
>> -        match self.index.insert_chunk(&chunk, &digest) {
>> -            Ok((is_duplicate, compressed_size)) => {
>> -                self.stat.compressed_size += compressed_size;
>> -                if is_duplicate {
>> -                    self.stat.duplicate_chunks += 1;
>> -                } else {
>> -                    self.stat.disk_size += compressed_size;
>> -                }
>> -
>> -                log::info!(
>> -                    "ADD CHUNK {:016x} {} {}% {} {}",
>> -                    self.chunk_offset,
>> -                    chunk_size,
>> -                    (compressed_size * 100) / (chunk_size as u64),
>> -                    is_duplicate,
>> -                    hex::encode(digest)
>> -                );
>> -                self.index.add_chunk(self.chunk_offset as u64, &digest)?;
>> -                self.chunk_buffer.truncate(0);
>> -                Ok(())
>> -            }
>> -            Err(err) => {
>> -                self.chunk_buffer.truncate(0);
>> -                Err(err)
>> -            }
>> -        }
>> -    }
>> -}
>> -
>> -impl Write for DynamicChunkWriter {
>> -    fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> {
>> -        let chunker = &mut self.chunker;
>> -
>> -        let ctx = crate::chunker::Context::default();
>> -        let pos = chunker.scan(data, &ctx);
>> -
>> -        if pos > 0 {
>> -            self.chunk_buffer.extend_from_slice(&data[0..pos]);
>> -            self.chunk_offset += pos;
>> -
>> -            if let Err(err) = self.write_chunk_buffer() {
>> -                return Err(std::io::Error::new(
>> -                    std::io::ErrorKind::Other,
>> -                    err.to_string(),
>> -                ));
>> -            }
>> -            Ok(pos)
>> -        } else {
>> -            self.chunk_offset += data.len();
>> -            self.chunk_buffer.extend_from_slice(data);
>> -            Ok(data.len())
>> -        }
>> -    }
>> -
>> -    fn flush(&mut self) -> std::result::Result<(), std::io::Error> {
>> -        Err(std::io::Error::new(
>> -            std::io::ErrorKind::Other,
>> -            "please use close() instead of flush()",
>> -        ))
>> -    }
>> -}
>> -
>>   struct CachedChunk {
>>       range: Range<u64>,
>>       data: Vec<u8>,
>> diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs
>> index 0f289543f..6c3be2d49 100644
>> --- a/pbs-datastore/src/fixed_index.rs
>> +++ b/pbs-datastore/src/fixed_index.rs
>> @@ -11,9 +11,7 @@ use anyhow::{bail, format_err, Error};
>>   use proxmox_io::ReadExt;
>>   use proxmox_uuid::Uuid;
>>   
>> -use crate::chunk_stat::ChunkStat;
>>   use crate::chunk_store::ChunkStore;
>> -use crate::data_blob::ChunkInfo;
>>   use crate::file_formats;
>>   use crate::index::{ChunkReadInfo, IndexFile};
>>   
>> @@ -215,7 +213,6 @@ impl IndexFile for FixedIndexReader {
>>   }
>>   
>>   pub struct FixedIndexWriter {
>> -    store: Arc<ChunkStore>,
>>       file: File,
>>       filename: PathBuf,
>>       tmp_filename: PathBuf,
>> @@ -302,7 +299,6 @@ impl FixedIndexWriter {
>>           .cast::<u8>();
>>   
>>           Ok(Self {
>> -            store,
> 
> question: ... same as above?
> 
>>               file,
>>               filename: full_path,
>>               tmp_filename: tmp_path,
>> @@ -388,40 +384,6 @@ impl FixedIndexWriter {
>>           Ok(pos / self.chunk_size)
>>       }
>>   
>> -    // Note: We want to add data out of order, so do not assume any order here.
>> -    pub fn add_chunk(&mut self, chunk_info: &ChunkInfo, stat: &mut ChunkStat) -> Result<(), Error> {
>> -        let chunk_len = chunk_info.chunk_len as usize;
>> -        let offset = chunk_info.offset as usize; // end of chunk
>> -
>> -        let idx = self.check_chunk_alignment(offset, chunk_len)?;
>> -
>> -        let (is_duplicate, compressed_size) = self
>> -            .store
>> -            .insert_chunk(&chunk_info.chunk, &chunk_info.digest)?;
>> -
>> -        stat.chunk_count += 1;
>> -        stat.compressed_size += compressed_size;
>> -
>> -        let digest = &chunk_info.digest;
>> -
>> -        log::info!(
>> -            "ADD CHUNK {} {} {}% {} {}",
>> -            idx,
>> -            chunk_len,
>> -            (compressed_size * 100) / (chunk_len as u64),
>> -            is_duplicate,
>> -            hex::encode(digest)
>> -        );
>> -
>> -        if is_duplicate {
>> -            stat.duplicate_chunks += 1;
>> -        } else {
>> -            stat.disk_size += compressed_size;
>> -        }
>> -
>> -        self.add_digest(idx, digest)
>> -    }
>> -
>>       pub fn add_digest(&mut self, index: usize, digest: &[u8; 32]) -> Result<(), Error> {
>>           if index >= self.index_length {
>>               bail!(
> 
>