[pbs-devel] [PATCH proxmox-backup v3 2/6] partial fix #6049: datastore: impl ConfigVersionCache fast path for lookups

Samuel Rufinatscha s.rufinatscha at proxmox.com
Thu Nov 20 14:03:38 CET 2025


Repeated /status requests caused lookup_datastore() to re-read and
parse datastore.cfg on every call. The issue was mentioned in report
#6049 [1]. cargo-flamegraph [2] confirmed that the hot path is
dominated by pbs_config::datastore::config() (config parsing).

This patch implements caching of the global datastore.cfg using the
generation numbers from the shared config version cache. It caches the
datastore.cfg along with the generation number and, when a subsequent
lookup sees the same generation, it reuses the cached config without
re-reading it from disk. If the generation differs
(or the cache is unavailable), it falls back to the existing slow path
with no behavioral changes.

Behavioral notes

- The generation is bumped via the existing save_config() path, so
  API-driven config changes are detected immediately.
- Manual edits to datastore.cfg are not detected; a TTL
  guard is introduced in a dedicated patch in this series.
- DataStore::drop still performs a config read on the common path,
  this is covered in a dedicated patch in this series.

Links

[1] Bugzilla: https://bugzilla.proxmox.com/show_bug.cgi?id=6049
[2] cargo-flamegraph: https://github.com/flamegraph-rs/flamegraph

Fixes: #6049
Signed-off-by: Samuel Rufinatscha <s.rufinatscha at proxmox.com>
---
 pbs-datastore/Cargo.toml       |   1 +
 pbs-datastore/src/datastore.rs | 120 +++++++++++++++++++++++----------
 2 files changed, 87 insertions(+), 34 deletions(-)

diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml
index 8ce930a9..42f49a7b 100644
--- a/pbs-datastore/Cargo.toml
+++ b/pbs-datastore/Cargo.toml
@@ -40,6 +40,7 @@ proxmox-io.workspace = true
 proxmox-lang.workspace=true
 proxmox-s3-client = { workspace = true, features = [ "impl" ] }
 proxmox-schema = { workspace = true, features = [ "api-macro" ] }
+proxmox-section-config.workspace = true
 proxmox-serde = { workspace = true, features = [ "serde_json" ] }
 proxmox-sys.workspace = true
 proxmox-systemd.workspace = true
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index 0a517923..8c687097 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -32,7 +32,8 @@ use pbs_api_types::{
     MaintenanceType, Operation, UPID,
 };
 use pbs_config::s3::S3_CFG_TYPE_ID;
-use pbs_config::BackupLockGuard;
+use pbs_config::{BackupLockGuard, ConfigVersionCache};
+use proxmox_section_config::SectionConfigData;
 
 use crate::backup_info::{
     BackupDir, BackupGroup, BackupInfo, OLD_LOCKING, PROTECTED_MARKER_FILENAME,
@@ -46,6 +47,17 @@ use crate::s3::S3_CONTENT_PREFIX;
 use crate::task_tracking::{self, update_active_operations};
 use crate::{DataBlob, LocalDatastoreLruCache};
 
+// Cache for fully parsed datastore.cfg
+struct DatastoreConfigCache {
+    // Parsed datastore.cfg file
+    config: Arc<SectionConfigData>,
+    // Generation number from ConfigVersionCache
+    last_generation: usize,
+}
+
+static DATASTORE_CONFIG_CACHE: LazyLock<Mutex<Option<DatastoreConfigCache>>> =
+    LazyLock::new(|| Mutex::new(None));
+
 static DATASTORE_MAP: LazyLock<Mutex<HashMap<String, Arc<DataStoreImpl>>>> =
     LazyLock::new(|| Mutex::new(HashMap::new()));
 
@@ -142,10 +154,12 @@ pub struct DataStoreImpl {
     last_gc_status: Mutex<GarbageCollectionStatus>,
     verify_new: bool,
     chunk_order: ChunkOrder,
-    last_digest: Option<[u8; 32]>,
     sync_level: DatastoreFSyncLevel,
     backend_config: DatastoreBackendConfig,
     lru_store_caching: Option<LocalDatastoreLruCache>,
+    /// Datastore generation number from `ConfigVersionCache` at creation time, used to
+    /// validate reuse of this cached `DataStoreImpl`.
+    config_generation: Option<usize>,
 }
 
 impl DataStoreImpl {
@@ -158,10 +172,10 @@ impl DataStoreImpl {
             last_gc_status: Mutex::new(GarbageCollectionStatus::default()),
             verify_new: false,
             chunk_order: Default::default(),
-            last_digest: None,
             sync_level: Default::default(),
             backend_config: Default::default(),
             lru_store_caching: None,
+            config_generation: None,
         })
     }
 }
@@ -256,6 +270,37 @@ impl DatastoreBackend {
     }
 }
 
+/// Return the cached datastore SectionConfig and its generation.
+fn datastore_section_config_cached() -> Result<(Arc<SectionConfigData>, Option<usize>), Error> {
+    let gen = ConfigVersionCache::new()
+        .ok()
+        .map(|c| c.datastore_generation());
+
+    let mut guard = DATASTORE_CONFIG_CACHE.lock().unwrap();
+
+    // Fast path: re-use cached datastore.cfg
+    if let (Some(gen), Some(cache)) = (gen, guard.as_ref()) {
+        if cache.last_generation == gen {
+            return Ok((cache.config.clone(), Some(gen)));
+        }
+    }
+
+    // Slow path: re-read datastore.cfg
+    let (config_raw, _digest) = pbs_config::datastore::config()?;
+    let config = Arc::new(config_raw);
+
+    if let Some(gen_val) = gen {
+        *guard = Some(DatastoreConfigCache {
+            config: config.clone(),
+            last_generation: gen_val,
+        });
+    } else {
+        *guard = None;
+    }
+
+    Ok((config, gen))
+}
+
 impl DataStore {
     // This one just panics on everything
     #[doc(hidden)]
@@ -327,56 +372,63 @@ impl DataStore {
         name: &str,
         operation: Option<Operation>,
     ) -> Result<Arc<DataStore>, Error> {
-        // Avoid TOCTOU between checking maintenance mode and updating active operation counter, as
-        // we use it to decide whether it is okay to delete the datastore.
+        // Avoid TOCTOU between checking maintenance mode and updating active operations.
         let _config_lock = pbs_config::datastore::lock_config()?;
 
-        // we could use the ConfigVersionCache's generation for staleness detection, but  we load
-        // the config anyway -> just use digest, additional benefit: manual changes get detected
-        let (config, digest) = pbs_config::datastore::config()?;
-        let config: DataStoreConfig = config.lookup("datastore", name)?;
+        // Get the current datastore.cfg generation number and cached config
+        let (section_config, gen_num) = datastore_section_config_cached()?;
+
+        let datastore_cfg: DataStoreConfig = section_config.lookup("datastore", name)?;
+        let maintenance_mode = datastore_cfg.get_maintenance_mode();
+        let mount_status = get_datastore_mount_status(&datastore_cfg);
 
-        if let Some(maintenance_mode) = config.get_maintenance_mode() {
-            if let Err(error) = maintenance_mode.check(operation) {
+        if let Some(mm) = &maintenance_mode {
+            if let Err(error) = mm.check(operation.clone()) {
                 bail!("datastore '{name}' is unavailable: {error}");
             }
         }
 
-        if get_datastore_mount_status(&config) == Some(false) {
-            let mut datastore_cache = DATASTORE_MAP.lock().unwrap();
-            datastore_cache.remove(&config.name);
-            bail!("datastore '{}' is not mounted", config.name);
+        let mut datastore_cache = DATASTORE_MAP.lock().unwrap();
+
+        if mount_status == Some(false) {
+            datastore_cache.remove(&datastore_cfg.name);
+            bail!("datastore '{}' is not mounted", datastore_cfg.name);
         }
 
-        let mut datastore_cache = DATASTORE_MAP.lock().unwrap();
-        let entry = datastore_cache.get(name);
-
-        // reuse chunk store so that we keep using the same process locker instance!
-        let chunk_store = if let Some(datastore) = &entry {
-            let last_digest = datastore.last_digest.as_ref();
-            if let Some(true) = last_digest.map(|last_digest| last_digest == &digest) {
-                if let Some(operation) = operation {
-                    update_active_operations(name, operation, 1)?;
+        // Re-use DataStoreImpl
+        if let Some(existing) = datastore_cache.get(name).cloned() {
+            if let (Some(last_generation), Some(gen_num)) = (existing.config_generation, gen_num) {
+                if last_generation == gen_num {
+                    if let Some(op) = operation {
+                        update_active_operations(name, op, 1)?;
+                    }
+
+                    return Ok(Arc::new(Self {
+                        inner: existing,
+                        operation,
+                    }));
                 }
-                return Ok(Arc::new(Self {
-                    inner: Arc::clone(datastore),
-                    operation,
-                }));
             }
-            Arc::clone(&datastore.chunk_store)
+        }
+
+        // (Re)build DataStoreImpl
+
+        // Reuse chunk store so that we keep using the same process locker instance!
+        let chunk_store = if let Some(existing) = datastore_cache.get(name) {
+            Arc::clone(&existing.chunk_store)
         } else {
             let tuning: DatastoreTuning = serde_json::from_value(
                 DatastoreTuning::API_SCHEMA
-                    .parse_property_string(config.tuning.as_deref().unwrap_or(""))?,
+                    .parse_property_string(datastore_cfg.tuning.as_deref().unwrap_or(""))?,
             )?;
             Arc::new(ChunkStore::open(
                 name,
-                config.absolute_path(),
+                datastore_cfg.absolute_path(),
                 tuning.sync_level.unwrap_or_default(),
             )?)
         };
 
-        let datastore = DataStore::with_store_and_config(chunk_store, config, Some(digest))?;
+        let datastore = DataStore::with_store_and_config(chunk_store, datastore_cfg, gen_num)?;
 
         let datastore = Arc::new(datastore);
         datastore_cache.insert(name.to_string(), datastore.clone());
@@ -478,7 +530,7 @@ impl DataStore {
     fn with_store_and_config(
         chunk_store: Arc<ChunkStore>,
         config: DataStoreConfig,
-        last_digest: Option<[u8; 32]>,
+        generation: Option<usize>,
     ) -> Result<DataStoreImpl, Error> {
         let mut gc_status_path = chunk_store.base_path();
         gc_status_path.push(".gc-status");
@@ -538,10 +590,10 @@ impl DataStore {
             last_gc_status: Mutex::new(gc_status),
             verify_new: config.verify_new.unwrap_or(false),
             chunk_order: tuning.chunk_order.unwrap_or_default(),
-            last_digest,
             sync_level: tuning.sync_level.unwrap_or_default(),
             backend_config,
             lru_store_caching,
+            config_generation: generation,
         })
     }
 
-- 
2.47.3





More information about the pbs-devel mailing list