[pbs-devel] [PATCH proxmox-backup 2/2] datastore: reinsert unused chunks into cache during instantiation

Christian Ebner c.ebner at proxmox.com
Fri Aug 1 16:10:24 CEST 2025


The local datastore chunk cache stores the currently cached chunk
digests in-memory, the chunk's data is stored however on the
filesystem. The in-memory cache might however be lost when:
- the datastore is removed for the lookup cache when a corresponding
  maintenance mode is set.
- the services are restarted.
- the system is rebooted.

After above actions, the cache is reistantiated again together with
the datastore on the next datastore lookup, calculating a cache
capacity based on the currently available storage space. This however
leaves the previously cached chunks out.
Therefore, reinsert them in an asynchronos task, by iterating over
them an insert the chunk digest again. For these previously used
chunks, increase also the cache size as this is now usable storage
for the cache as well.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
 pbs-datastore/src/chunk_store.rs              | 65 +++++++++++++++++++
 pbs-datastore/src/datastore.rs                | 18 ++++-
 .../src/local_datastore_lru_cache.rs          | 12 ++++
 3 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
index 3c59612bb..fcc0db3c6 100644
--- a/pbs-datastore/src/chunk_store.rs
+++ b/pbs-datastore/src/chunk_store.rs
@@ -8,6 +8,8 @@ use anyhow::{bail, format_err, Context, Error};
 use tracing::{info, warn};
 
 use pbs_api_types::{DatastoreFSyncLevel, GarbageCollectionStatus};
+use pbs_tools::async_lru_cache::AsyncLruCache;
+use proxmox_human_byte::HumanByte;
 use proxmox_io::ReadExt;
 use proxmox_s3_client::S3Client;
 use proxmox_sys::fs::{create_dir, create_path, file_type_from_file_stat, CreateOptions};
@@ -704,6 +706,69 @@ impl ChunkStore {
         ChunkStore::check_permissions(lockfile_path, 0o644)?;
         Ok(())
     }
+
+    /// Reinsert all cache chunks currently present in the chunk store, but not in the in-memory
+    /// LRU cache. Ignores chunks which atime is newer than the start time at the reinsert call.
+    pub fn reinsert_unused_cache_chunks(
+        &self,
+        cache: &AsyncLruCache<[u8; 32], ()>,
+    ) -> Result<(), Error> {
+        let min_atime = proxmox_time::epoch_i64();
+
+        let mut reclaimed = 0;
+        for (entry, _progress, _bad) in self.get_chunk_iterator()? {
+            let entry = entry
+                .with_context(|| format!("chunk iterator on chunk store '{}' failed", self.name))?;
+            let filename = entry.file_name();
+
+            if let Ok(stat) = nix::sys::stat::fstatat(
+                Some(entry.parent_fd()),
+                filename,
+                nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW,
+            ) {
+                let file_type = file_type_from_file_stat(&stat);
+                if file_type != Some(nix::dir::Type::File) {
+                    continue;
+                }
+
+                if stat.st_atime < min_atime && stat.st_size > 0 {
+                    let filename_bytes = filename.to_bytes();
+                    if filename_bytes.len() != 64
+                        || !filename_bytes.iter().all(u8::is_ascii_hexdigit)
+                    {
+                        continue;
+                    }
+                    let mut digest = [0u8; 32];
+                    // safe to unwrap as already checked above
+                    hex::decode_to_slice(&filename_bytes[..64], &mut digest).unwrap();
+                    let (path, _digest_str) = self.chunk_path(&digest);
+
+                    cache.increase_capacity(1);
+                    if let Err(err) = cache.insert(digest, (), |_| {
+                        if let Err(err) = nix::unistd::truncate(&path, 0) {
+                            if err != nix::errno::Errno::ENOENT {
+                                return Err(Error::from(err));
+                            }
+                        }
+                        Ok(())
+                    }) {
+                        tracing::error!(
+                            "Failed to rewarm cache with chunk {filename:?} on store '{}' - {err}",
+                            self.name,
+                        );
+                    }
+                    reclaimed += stat.st_size as u64;
+                }
+            }
+        }
+        tracing::info!(
+            "Reclaimed {} from chunk cache for store {}",
+            HumanByte::from(reclaimed),
+            self.name,
+        );
+
+        Ok(())
+    }
 }
 
 #[test]
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 5a22ffbcc..4a4f8b33a 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -364,10 +364,24 @@ impl DataStore {
             update_active_operations(name, operation, 1)?;
         }
 
-        Ok(Arc::new(Self {
+        let datastore = Arc::new(Self {
             inner: datastore,
             operation,
-        }))
+        });
+
+        if datastore.cache().is_some() {
+            let datastore2 = datastore.clone();
+            let name = name.to_string();
+            tokio::task::spawn_blocking(move || {
+                tracing::info!("Started cache refresh for datastore {name}");
+                let _ = datastore2
+                    .cache()
+                    .unwrap()
+                    .refresh_cache_and_resize_capacity();
+            });
+        }
+
+        Ok(datastore)
     }
 
     /// removes all datastores that are not configured anymore
diff --git a/pbs-datastore/src/local_datastore_lru_cache.rs b/pbs-datastore/src/local_datastore_lru_cache.rs
index 00cce94d6..9d585fa7a 100644
--- a/pbs-datastore/src/local_datastore_lru_cache.rs
+++ b/pbs-datastore/src/local_datastore_lru_cache.rs
@@ -184,4 +184,16 @@ impl LocalDatastoreLruCache {
     pub fn increase_capacity(&self, increment: usize) -> usize {
         self.cache.increase_capacity(increment)
     }
+
+    /// Reinsert non-zero chunks currently found on the local datastore cache filesystem
+    /// into the list of digest stored in-memory, so they are reused. Increases also the
+    /// cache capacity for each inserted chunk, as the previous capacity is calculated base
+    /// on available storage, but the chunk was already present, thereby decreasing the
+    /// available on-disk storage space.
+    ///
+    /// Returns the new cache capacity.
+    pub fn refresh_cache_and_resize_capacity(&self) -> Result<(), Error> {
+        let (store, cache) = (&self.store, &self.cache);
+        store.reinsert_unused_cache_chunks(cache)
+    }
 }
-- 
2.47.2





More information about the pbs-devel mailing list