[pbs-devel] [PATCH proxmox-backup 4/7] chunk store: fix: replace evicted cache chunks instead of truncate
Christian Ebner
c.ebner at proxmox.com
Mon Oct 6 17:35:36 CEST 2025
On 10/6/25 3:18 PM, Fabian Grünbichler wrote:
> On October 6, 2025 12:41 pm, Christian Ebner wrote:
>> Evicted chunks have been truncated to size zero, keeping the chunk
>> file in place as in-use marker for the garbage collection but freeing
>> the chunk file contents. This can however lead to restores failing if
>> they already opened the chunk file for reading, as their contents are
>> now incomplete.
>>
>> Fix this by instead replacing the chunk file with a zero sized file,
>> leaving the contents accessible for the already opened chunk readers.
>>
>> By moving the logic from the local datastore cache to a helper method
>> on the chunk store, it is also assured that the operation is guarded
>> by the chunk store mutex lock to avoid races with chunk re-insert.
>
> AFAICT this is still racy even after this patch, because in cache.access
> after a cache miss (either no local file, or empty local file) we fetch
> the chunk from S3, insert it into the chunk store (cache), but then
> instead of returning the chunk from the already in-memory chunk data, we
> load it again from the path - without holding the lock that prevents the
> chunk from being evicted again.. while unlikely to be hit in practice,
> this is still wasteful because we could save a round-trip and just
> return the chunk we've already constructed from the S3 response and hit
> two birds with one stone?
Good catch! Indeed we can serve the response from the already in-memory
data in that case.
>
>>
>> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
>> ---
>> pbs-datastore/src/chunk_store.rs | 20 +++++++++++++++
>> .../src/local_datastore_lru_cache.rs | 25 +++----------------
>> 2 files changed, 23 insertions(+), 22 deletions(-)
>>
>> diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
>> index 010785fbc..74fa79db1 100644
>> --- a/pbs-datastore/src/chunk_store.rs
>> +++ b/pbs-datastore/src/chunk_store.rs
>> @@ -668,6 +668,26 @@ impl ChunkStore {
>> (chunk_path, digest_str)
>> }
>>
>> + /// Replace a chunk file with a zero size file in the chunk store.
>> + ///
>> + /// Used to evict chunks from the local datastore cache, while keeping them as in-use markers
>> + /// for garbage collection. Returns with success also if chunk file is not pre-existing.
>> + pub fn clear_chunk(&self, digest: &[u8; 32]) -> Result<(), Error> {
>> + let (chunk_path, digest_str) = self.chunk_path(digest);
>> + let mut create_options = CreateOptions::new();
>> + if nix::unistd::Uid::effective().is_root() {
>> + let uid = pbs_config::backup_user()?.uid;
>> + let gid = pbs_config::backup_group()?.gid;
>> + create_options = create_options.owner(uid).group(gid);
>> + }
>> +
>> + let _lock = self.mutex.lock();
>> +
>> + proxmox_sys::fs::replace_file(&chunk_path, &[], create_options, false)
>> + .map_err(|err| format_err!("clear chunk failed for {digest_str} - {err}"))?;
>> + Ok(())
>> + }
>> +
>> pub fn relative_path(&self, path: &Path) -> PathBuf {
>> // unwrap: only `None` in unit tests
>> assert!(self.locker.is_some());
>> diff --git a/pbs-datastore/src/local_datastore_lru_cache.rs b/pbs-datastore/src/local_datastore_lru_cache.rs
>> index 1d2e87cb9..6f950f4b3 100644
>> --- a/pbs-datastore/src/local_datastore_lru_cache.rs
>> +++ b/pbs-datastore/src/local_datastore_lru_cache.rs
>> @@ -71,17 +71,8 @@ impl LocalDatastoreLruCache {
>> /// Fails if the chunk cannot be inserted successfully.
>> pub fn insert(&self, digest: &[u8; 32], chunk: &DataBlob) -> Result<(), Error> {
>> self.store.insert_chunk(chunk, digest)?;
>> - self.cache.insert(*digest, (), |digest| {
>> - let (path, _digest_str) = self.store.chunk_path(&digest);
>> - // Truncate to free up space but keep the inode around, since that
>> - // is used as marker for chunks in use by garbage collection.
>> - if let Err(err) = nix::unistd::truncate(&path, 0) {
>> - if err != nix::errno::Errno::ENOENT {
>> - return Err(Error::from(err));
>> - }
>> - }
>> - Ok(())
>> - })
>> + self.cache
>> + .insert(*digest, (), |digest| self.store.clear_chunk(&digest))
>> }
>>
>> /// Remove a chunk from the local datastore cache.
>> @@ -104,17 +95,7 @@ impl LocalDatastoreLruCache {
>> ) -> Result<Option<DataBlob>, Error> {
>> if self
>> .cache
>> - .access(*digest, cacher, |digest| {
>> - let (path, _digest_str) = self.store.chunk_path(&digest);
>> - // Truncate to free up space but keep the inode around, since that
>> - // is used as marker for chunks in use by garbage collection.
>> - if let Err(err) = nix::unistd::truncate(&path, 0) {
>> - if err != nix::errno::Errno::ENOENT {
>> - return Err(Error::from(err));
>> - }
>> - }
>> - Ok(())
>> - })
>> + .access(*digest, cacher, |digest| self.store.clear_chunk(&digest))
>> .await?
>> .is_some()
>> {
>> --
>> 2.47.3
>>
>>
>>
>> _______________________________________________
>> pbs-devel mailing list
>> pbs-devel at lists.proxmox.com
>> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>>
>>
>>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
More information about the pbs-devel
mailing list