From s.rufinatscha at proxmox.com Fri Jan 2 17:07:41 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:41 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v3 2/4] pbs-config: cache verified API token secrets In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-3-s.rufinatscha@proxmox.com> Currently, every token-based API request reads the token.shadow file and runs the expensive password hash verification for the given token secret. This shows up as a hotspot in /status profiling (see bug #7017 [1]). This patch introduces an in-memory cache of successfully verified token secrets. Subsequent requests for the same token+secret combination only perform a comparison using openssl::memcmp::eq and avoid re-running the password hash. The cache is updated when a token secret is set and cleared when a token is deleted. Note, this does NOT include manual config changes, which will be covered in a subsequent patch. This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- Changes from v1 to v2: * Replace OnceCell with LazyLock, and std::sync::RwLock with parking_lot::RwLock. * Add API_MUTATION_GENERATION and guard cache inserts to prevent ?zombie inserts? across concurrent set/delete. * Refactor cache operations into cache_try_secret_matches, cache_try_insert_secret, and centralize write-side behavior in apply_api_mutation. * Switch fast-path cache access to try_read/try_write (best-effort). Changes from v2 to v3: * Replaced process-local cache invalidation (AtomicU64 API_MUTATION_GENERATION) with a cross-process shared generation via ConfigVersionCache. * Validate shared generation before/after the constant-time secret compare; only insert into cache if the generation is unchanged. * invalidate_cache_state() on insert if shared generation changed. Cargo.toml | 1 + pbs-config/Cargo.toml | 1 + pbs-config/src/token_shadow.rs | 157 ++++++++++++++++++++++++++++++++- 3 files changed, 158 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 1aa57ae5..821b63b7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -143,6 +143,7 @@ nom = "7" num-traits = "0.2" once_cell = "1.3.1" openssl = "0.10.40" +parking_lot = "0.12" percent-encoding = "2.1" pin-project-lite = "0.2" regex = "1.5.5" diff --git a/pbs-config/Cargo.toml b/pbs-config/Cargo.toml index 74afb3c6..eb81ce00 100644 --- a/pbs-config/Cargo.toml +++ b/pbs-config/Cargo.toml @@ -13,6 +13,7 @@ libc.workspace = true nix.workspace = true once_cell.workspace = true openssl.workspace = true +parking_lot.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/pbs-config/src/token_shadow.rs b/pbs-config/src/token_shadow.rs index 640fabbf..fa84aee5 100644 --- a/pbs-config/src/token_shadow.rs +++ b/pbs-config/src/token_shadow.rs @@ -1,6 +1,8 @@ use std::collections::HashMap; +use std::sync::LazyLock; use anyhow::{bail, format_err, Error}; +use parking_lot::RwLock; use serde::{Deserialize, Serialize}; use serde_json::{from_value, Value}; @@ -13,6 +15,18 @@ use crate::{open_backup_lockfile, BackupLockGuard}; const LOCK_FILE: &str = pbs_buildcfg::configdir!("/token.shadow.lock"); const CONF_FILE: &str = pbs_buildcfg::configdir!("/token.shadow"); +/// Global in-memory cache for successfully verified API token secrets. +/// The cache stores plain text secrets for token Authids that have already been +/// verified against the hashed values in `token.shadow`. This allows for cheap +/// subsequent authentications for the same token+secret combination, avoiding +/// recomputing the password hash on every request. +static TOKEN_SECRET_CACHE: LazyLock> = LazyLock::new(|| { + RwLock::new(ApiTokenSecretCache { + secrets: HashMap::new(), + shared_gen: 0, + }) +}); + #[derive(Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] /// ApiToken id / secret pair @@ -54,9 +68,27 @@ pub fn verify_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { bail!("not an API token ID"); } + // Fast path + if cache_try_secret_matches(tokenid, secret) { + return Ok(()); + } + + // Slow path + // First, capture the shared generation before doing the hash verification. + let gen_before = token_shadow_shared_gen(); + let data = read_file()?; match data.get(tokenid) { - Some(hashed_secret) => proxmox_sys::crypt::verify_crypt_pw(secret, hashed_secret), + Some(hashed_secret) => { + proxmox_sys::crypt::verify_crypt_pw(secret, hashed_secret)?; + + // Try to cache only if nothing changed while verifying the secret. + if let Some(gen) = gen_before { + cache_try_insert_secret(tokenid.clone(), secret.to_owned(), gen); + } + + Ok(()) + } None => bail!("invalid API token"), } } @@ -82,6 +114,8 @@ fn set_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { data.insert(tokenid.clone(), hashed_secret); write_file(data)?; + apply_api_mutation(tokenid, Some(secret)); + Ok(()) } @@ -97,5 +131,126 @@ pub fn delete_secret(tokenid: &Authid) -> Result<(), Error> { data.remove(tokenid); write_file(data)?; + apply_api_mutation(tokenid, None); + Ok(()) } + +struct ApiTokenSecretCache { + /// Keys are token Authids, values are the corresponding plain text secrets. + /// Entries are added after a successful on-disk verification in + /// `verify_secret` or when a new token secret is generated by + /// `generate_and_set_secret`. Used to avoid repeated + /// password-hash computation on subsequent authentications. + secrets: HashMap, + /// Shared generation to detect mutations of the underlying token.shadow file. + shared_gen: usize, +} + +/// Cached secret. +struct CachedSecret { + secret: String, +} + +fn cache_try_insert_secret(tokenid: Authid, secret: String, shared_gen_before: usize) { + let Some(mut cache) = TOKEN_SECRET_CACHE.try_write() else { + return; + }; + + let Some(shared_gen_now) = token_shadow_shared_gen() else { + return; + }; + + // If this process missed a generation bump, its cache is stale. + if cache.shared_gen != shared_gen_now { + invalidate_cache_state(&mut cache); + cache.shared_gen = shared_gen_now; + } + + // If a mutation happened while we were verifying the secret, do not insert. + if shared_gen_now == shared_gen_before { + cache.secrets.insert(tokenid, CachedSecret { secret }); + } +} + +// Tries to match the given token secret against the cached secret. +// Checks the generation before and after the constant-time compare to avoid a +// TOCTOU window. If another process rotates/deletes a token while we're validating +// the cached secret, the generation will change, and we +// must not trust the cache for this request. +fn cache_try_secret_matches(tokenid: &Authid, secret: &str) -> bool { + let Some(cache) = TOKEN_SECRET_CACHE.try_read() else { + return false; + }; + let Some(entry) = cache.secrets.get(tokenid) else { + return false; + }; + + let cache_gen = cache.shared_gen; + + let Some(gen1) = token_shadow_shared_gen() else { + return false; + }; + if gen1 != cache_gen { + return false; + } + + let eq = openssl::memcmp::eq(entry.secret.as_bytes(), secret.as_bytes()); + + let Some(gen2) = token_shadow_shared_gen() else { + return false; + }; + + eq && gen2 == cache_gen +} + +fn apply_api_mutation(tokenid: &Authid, new_secret: Option<&str>) { + // Signal cache invalidation to other processes (best-effort). + let new_shared_gen = bump_token_shadow_shared_gen(); + + let mut cache = TOKEN_SECRET_CACHE.write(); + + // If we cannot read/bump the shared generation, we cannot safely trust the cache. + let Some(gen) = new_shared_gen else { + invalidate_cache_state(&mut cache); + cache.shared_gen = 0; + return; + }; + + // Update to the post-mutation generation. + cache.shared_gen = gen; + + // Apply the new mutation. + match new_secret { + Some(secret) => { + cache.secrets.insert( + tokenid.clone(), + CachedSecret { + secret: secret.to_owned(), + }, + ); + } + None => { + cache.secrets.remove(tokenid); + } + } +} + +/// Get the current shared generation. +fn token_shadow_shared_gen() -> Option { + crate::ConfigVersionCache::new() + .ok() + .map(|cvc| cvc.token_shadow_generation()) +} + +/// Bump and return the new shared generation. +fn bump_token_shadow_shared_gen() -> Option { + crate::ConfigVersionCache::new() + .ok() + .map(|cvc| cvc.increase_token_shadow_generation() + 1) +} + +/// Invalidates the cache state and only keeps the shared generation. +fn invalidate_cache_state(cache: &mut ApiTokenSecretCache) { + cache.secrets.clear(); +} -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:43 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:43 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v3 4/4] pbs-config: add TTL window to token secret cache In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-5-s.rufinatscha@proxmox.com> Verify_secret() currently calls refresh_cache_if_file_changed() on every request, which performs a metadata() call on token.shadow each time. Under load this adds unnecessary overhead, considering also the file usually should rarely change. This patch introduces a TTL boundary, controlled by TOKEN_SECRET_CACHE_TTL_SECS. File metadata is only re-loaded once the TTL has expired. Documents TTL effects. This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- Changes from v1 to v2: * Add TOKEN_SECRET_CACHE_TTL_SECS and last_checked. * Implement double-checked TTL: check with try_read first; only attempt refresh with try_write if expired/unknown. * Fix TTL bookkeeping: update last_checked on the ?file unchanged? path and after API mutations. * Add documentation warning about TTL-delayed effect of manual token.shadow edits. Changes from v2 to v3: * Refactored refresh_cache_if_file_changed TTL logic. * Remove had_prior_state check (replaced by last_checked logic). * Improve TTL bound checks. * Reword documentation warning for clarity. docs/user-management.rst | 4 ++++ pbs-config/src/token_shadow.rs | 29 ++++++++++++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/docs/user-management.rst b/docs/user-management.rst index 41b43d60..8dfae528 100644 --- a/docs/user-management.rst +++ b/docs/user-management.rst @@ -156,6 +156,10 @@ metadata: Similarly, the ``user delete-token`` subcommand can be used to delete a token again. +.. WARNING:: Direct/manual edits to ``token.shadow`` may take up to 60 seconds (or + longer in edge cases) to take effect due to caching. Restart services for + immediate effect of manual edits. + Newly generated API tokens don't have any permissions. Please read the next section to learn how to set access permissions. diff --git a/pbs-config/src/token_shadow.rs b/pbs-config/src/token_shadow.rs index 02fb191b..e3529b40 100644 --- a/pbs-config/src/token_shadow.rs +++ b/pbs-config/src/token_shadow.rs @@ -33,6 +33,8 @@ static TOKEN_SECRET_CACHE: LazyLock> = LazyLock::new last_checked: None, }) }); +/// Max age in seconds of the token secret cache before checking for file changes. +const TOKEN_SECRET_CACHE_TTL_SECS: i64 = 60; #[derive(Serialize, Deserialize)] #[serde(rename_all = "kebab-case")] @@ -74,11 +76,28 @@ fn write_file(data: HashMap) -> Result<(), Error> { fn refresh_cache_if_file_changed() -> bool { let now = epoch_i64(); - // Best-effort refresh under write lock. + // Fast path: cache is fresh if shared-gen matches and TTL not expired. + if let (Some(cache), Some(shared_gen_read)) = + (TOKEN_SECRET_CACHE.try_read(), token_shadow_shared_gen()) + { + if cache.shared_gen == shared_gen_read + && cache + .last_checked + .is_some_and(|last| now >= last && (now - last) < TOKEN_SECRET_CACHE_TTL_SECS) + { + return true; + } + // read lock drops here + } else { + return false; + } + + // Slow path: best-effort refresh under write lock. let Some(mut cache) = TOKEN_SECRET_CACHE.try_write() else { return false; }; + // Re-read generation after acquiring the lock (may have changed meanwhile). let Some(shared_gen_now) = token_shadow_shared_gen() else { return false; }; @@ -89,6 +108,14 @@ fn refresh_cache_if_file_changed() -> bool { cache.shared_gen = shared_gen_now; } + // TTL check again after acquiring the lock + if cache + .last_checked + .is_some_and(|last| now >= last && (now - last) < TOKEN_SECRET_CACHE_TTL_SECS) + { + return true; + } + // Stat the file to detect manual edits. let Ok((new_mtime, new_len)) = shadow_mtime_len() else { return false; -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:42 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:42 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v3 3/4] pbs-config: invalidate token-secret cache on token.shadow changes In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-4-s.rufinatscha@proxmox.com> Previously the in-memory token-secret cache was only updated via set_secret() and delete_secret(), so manual edits to token.shadow were not reflected. This patch adds file change detection to the cache. It tracks the mtime and length of token.shadow and clears the in-memory token secret cache whenever these values change. Note, this patch fetches file stats on every request. An TTL-based optimization will be covered in a subsequent patch of the series. This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- Changes from v1 to v2: * Add file metadata tracking (file_mtime, file_len) and FILE_GENERATION. * Store file_gen in CachedSecret and verify it against the current FILE_GENERATION to ensure cached entries belong to the current file state. * Add shadow_mtime_len() helper and convert refresh to best-effort (try_write, returns bool). * Pass a pre-write metadata snapshot into apply_api_mutation and clear/bump generation if the cache metadata indicates missed external edits. Changes from v2 to v3: * Cache now tracks last_checked (epoch seconds). * Simplified refresh_cache_if_file_changed, removed FILE_GENERATION logic * On first load, initializes file metadata and keeps empty cache. pbs-config/src/token_shadow.rs | 122 +++++++++++++++++++++++++++++++-- 1 file changed, 118 insertions(+), 4 deletions(-) diff --git a/pbs-config/src/token_shadow.rs b/pbs-config/src/token_shadow.rs index fa84aee5..02fb191b 100644 --- a/pbs-config/src/token_shadow.rs +++ b/pbs-config/src/token_shadow.rs @@ -1,5 +1,8 @@ use std::collections::HashMap; +use std::fs; +use std::io::ErrorKind; use std::sync::LazyLock; +use std::time::SystemTime; use anyhow::{bail, format_err, Error}; use parking_lot::RwLock; @@ -7,6 +10,7 @@ use serde::{Deserialize, Serialize}; use serde_json::{from_value, Value}; use proxmox_sys::fs::CreateOptions; +use proxmox_time::epoch_i64; use pbs_api_types::Authid; //use crate::auth; @@ -24,6 +28,9 @@ static TOKEN_SECRET_CACHE: LazyLock> = LazyLock::new RwLock::new(ApiTokenSecretCache { secrets: HashMap::new(), shared_gen: 0, + file_mtime: None, + file_len: None, + last_checked: None, }) }); @@ -62,6 +69,63 @@ fn write_file(data: HashMap) -> Result<(), Error> { proxmox_sys::fs::replace_file(CONF_FILE, &json, options, true) } +/// Refreshes the in-memory cache if the on-disk token.shadow file changed. +/// Returns true if the cache is valid to use, false if not. +fn refresh_cache_if_file_changed() -> bool { + let now = epoch_i64(); + + // Best-effort refresh under write lock. + let Some(mut cache) = TOKEN_SECRET_CACHE.try_write() else { + return false; + }; + + let Some(shared_gen_now) = token_shadow_shared_gen() else { + return false; + }; + + // If another process bumped the generation, we don't know what changed -> clear cache + if cache.shared_gen != shared_gen_now { + invalidate_cache_state(&mut cache); + cache.shared_gen = shared_gen_now; + } + + // Stat the file to detect manual edits. + let Ok((new_mtime, new_len)) = shadow_mtime_len() else { + return false; + }; + + // Initialize file stats if we have no prior state. + if cache.last_checked.is_none() { + cache.secrets.clear(); // ensure cache is empty on first load + cache.file_mtime = new_mtime; + cache.file_len = new_len; + cache.last_checked = Some(now); + return true; + } + + // No change detected. + if cache.file_mtime == new_mtime && cache.file_len == new_len { + cache.last_checked = Some(now); + return true; + } + + // Manual edit detected -> invalidate cache and update stat. + cache.secrets.clear(); + cache.file_mtime = new_mtime; + cache.file_len = new_len; + cache.last_checked = Some(now); + + // Best-effort propagation to other processes + update local view. + if let Some(shared_gen_new) = bump_token_shadow_shared_gen() { + cache.shared_gen = shared_gen_new; + } else { + // Do not fail: local cache is already safe as we cleared it above. + // Keep local shared_gen as-is to avoid repeated failed attempts. + } + + true +} + /// Verifies that an entry for given tokenid / API token secret exists pub fn verify_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { if !tokenid.is_token() { @@ -69,7 +133,7 @@ pub fn verify_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { } // Fast path - if cache_try_secret_matches(tokenid, secret) { + if refresh_cache_if_file_changed() && cache_try_secret_matches(tokenid, secret) { return Ok(()); } @@ -109,12 +173,15 @@ fn set_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { let _guard = lock_config()?; + // Capture state before we write to detect external edits. + let pre_meta = shadow_mtime_len().unwrap_or((None, None)); + let mut data = read_file()?; let hashed_secret = proxmox_sys::crypt::encrypt_pw(secret)?; data.insert(tokenid.clone(), hashed_secret); write_file(data)?; - apply_api_mutation(tokenid, Some(secret)); + apply_api_mutation(tokenid, Some(secret), pre_meta); Ok(()) } @@ -127,11 +194,14 @@ pub fn delete_secret(tokenid: &Authid) -> Result<(), Error> { let _guard = lock_config()?; + // Capture state before we write to detect external edits. + let pre_meta = shadow_mtime_len().unwrap_or((None, None)); + let mut data = read_file()?; data.remove(tokenid); write_file(data)?; - apply_api_mutation(tokenid, None); + apply_api_mutation(tokenid, None, pre_meta); Ok(()) } @@ -145,6 +215,12 @@ struct ApiTokenSecretCache { secrets: HashMap, /// Shared generation to detect mutations of the underlying token.shadow file. shared_gen: usize, + // shadow file mtime to detect changes + file_mtime: Option, + // shadow file length to detect changes + file_len: Option, + // last time the file metadata was checked + last_checked: Option, } /// Cached secret. @@ -204,7 +280,13 @@ fn cache_try_secret_matches(tokenid: &Authid, secret: &str) -> bool { eq && gen2 == cache_gen } -fn apply_api_mutation(tokenid: &Authid, new_secret: Option<&str>) { +fn apply_api_mutation( + tokenid: &Authid, + new_secret: Option<&str>, + pre_write_meta: (Option, Option), +) { + let now = epoch_i64(); + // Signal cache invalidation to other processes (best-effort). let new_shared_gen = bump_token_shadow_shared_gen(); @@ -220,6 +302,13 @@ fn apply_api_mutation(tokenid: &Authid, new_secret: Option<&str>) { // Update to the post-mutation generation. cache.shared_gen = gen; + // If our cached file metadata does not match the on-disk state before our write, + // we likely missed an external/manual edit. We can no longer trust any cached secrets. + let (pre_mtime, pre_len) = pre_write_meta; + if cache.file_mtime != pre_mtime || cache.file_len != pre_len { + cache.secrets.clear(); + } + // Apply the new mutation. match new_secret { Some(secret) => { @@ -234,6 +323,20 @@ fn apply_api_mutation(tokenid: &Authid, new_secret: Option<&str>) { cache.secrets.remove(tokenid); } } + + // Update our view of the file metadata to the post-write state (best-effort). + // (If this fails, drop local cache so callers fall back to slow path until refreshed.) + match shadow_mtime_len() { + Ok((mtime, len)) => { + cache.file_mtime = mtime; + cache.file_len = len; + cache.last_checked = Some(now); + } + Err(_) => { + // If we cannot validate state, do not trust cache. + invalidate_cache_state(&mut cache); + } + } } /// Get the current shared generation. @@ -253,4 +356,15 @@ fn bump_token_shadow_shared_gen() -> Option { /// Invalidates the cache state and only keeps the shared generation. fn invalidate_cache_state(cache: &mut ApiTokenSecretCache) { cache.secrets.clear(); + cache.file_mtime = None; + cache.file_len = None; + cache.last_checked = None; +} + +fn shadow_mtime_len() -> Result<(Option, Option), Error> { + match fs::metadata(CONF_FILE) { + Ok(meta) => Ok((meta.modified().ok(), Some(meta.len()))), + Err(e) if e.kind() == ErrorKind::NotFound => Ok((None, None)), + Err(e) => Err(e.into()), + } } -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:39 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:39 +0100 Subject: [pbs-devel] [PATCH proxmox{-backup, , -datacenter-manager} v3 00/10] token-shadow: reduce api token verification overhead Message-ID: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Hi, this series improves the performance of token-based API authentication in PBS (pbs-config) and in PDM (underlying proxmox-access-control crate), addressing the API token verification hotspot reported in our bugtracker #7017 [1]. When profiling PBS /status endpoint with cargo flamegraph [2], token-based authentication showed up as a dominant hotspot via proxmox_sys::crypt::verify_crypt_pw. Applying this series removes that path from the hot section of the flamegraph. The same performance issue was measured [2] for PDM. PDM uses the underlying shared proxmox-access-control library for token handling, which is a factored out version of the token.shadow handling code from PBS. While this series fixes the immediate performance issue both in PBS (pbs-config) and in the shared proxmox-access-control crate used by PDM, PBS should eventually, ideally be refactored, in a separate effort, to use proxmox-access-control for token handling instead of its local implementation. Problem For token-based API requests, both PBS?s pbs-config token.shadow handling and PDM proxmox-access-control?s token.shadow handling currently: 1. read the token.shadow file on each request 2. deserialize it into a HashMap 3. run password hash verification via proxmox_sys::crypt::verify_crypt_pw for the provided token secret Under load, this results in significant CPU usage spent in repeated password hashing for the same token+secret pairs. The attached flamegraphs for PBS [2] and PDM [3] show proxmox_sys::crypt::verify_crypt_pw dominating the hot path. Approach The goal is to reduce the cost of token-based authentication preserving the existing token handling semantics (including detecting manual edits to token.shadow) and be consistent between PBS (pbs-config) and PDM (proxmox-access-control). For both sites, this series proposes to: 1. Introduce an in-memory cache for verified token secrets and invalidate it through a shared ConfigVersionCache generation. Note, a shared generation is required to keep privileged and unprivileged daemon in sync to avoid caching inconsistencies across processes. 2. Invalidate on token.shadow file API changes (set_secret, delete_secret) 3. Invalidate on direct/manual token.shadow file changes (mtime + length) 4. Avoid per-request file stat calls using a TTL window Testing *PBS (pbs-config)* To verify the effect in PBS, I: 1. Set up test environment based on latest PBS ISO, installed Rust toolchain, cloned proxmox-backup repository to use with cargo flamegraph. Reproduced bug #7017 [1] by profiling the /status endpoint with token-based authentication using cargo flamegraph [2]. 2. Built PBS with pbs-config patches and re-ran the same workload and profiling setup. Confirmed that proxmox_sys::crypt::verify_crypt_pw path no longer appears in the hot section of the flamegraph. CPU usage is now dominated by TLS overhead. 3. Functionally-wise, I verified that: * valid tokens authenticate correctly when used in API requests * invalid secrets are rejected as before * generating a new token secret via dashboard (create token for user, regenerate existing secret) works and authenticates correctly *PDM (proxmox-access-control)* To verify the effect in PDM, I followed a similar testing approach. Instead of PBS? /status, I profiled the /version endpoint with cargo flamegraph [2] and verified that the expensive hashing path disappears from the hot section after introducing caching. Functionally-wise, I verified that: * valid tokens authenticate correctly when used in API requests * invalid secrets are rejected as before * generating a new token secret via dashboard (create token for user, regenerate existing secret) works and authenticates correctly Benchmarks: Two different benchmarks have been run to measure caching effects and RwLock contention: (1) Requests per second for PBS /status endpoint (E2E) Benchmarked parallel token auth requests for /status?verbose=0 on top of the datastore lookup cache series [4] to check throughput impact. With datastores=1, repeat=5000, parallel=16 this series gives ~172 req/s compared to ~65 req/s without it. This is a ~2.6x improvement (and aligns with the ~179 req/s from the previous series, which used per-process cache invalidation). (2) RwLock contention for token create/delete under heavy load of token-authenticated requests The previous version of the series compared std::sync::RwLock and parking_lot::RwLock contention for token create/delete under heavy parallel token-authenticated readers. parking_lot::RwLock has been chosen for the added fairness guarantees. Patch summary pbs-config: 0001 ? pbs-config: add token.shadow generation to ConfigVersionCache Extends ConfigVersionCache to provide a process-shared generation number for token.shadow changes. 0002 ? pbs-config: cache verified API token secrets Adds an in-memory cache to cache verified, plain-text API token secrets. Cache is invalidated through the process-shared ConfigVersionCache generation number. Uses openssl?s memcmp constant-time for matching secrets. 0003 ? pbs-config: invalidate token-secret cache on token.shadow changes Stats token.shadow mtime and length and clears the cache when the file changes, on each token verification request. 0004 ? pbs-config: add TTL window to token-secret cache Introduces a TTL (TOKEN_SECRET_CACHE_TTL_SECS, default 60) for metadata checks so that fs::metadata calls are not performed on each request. proxmox-access-control: 0005 ? access-control: extend AccessControlConfig for token.shadow invalidation Extends the AccessControlConfig trait with token_shadow_cache_generation() and increment_token_shadow_cache_generation() for proxmox-access-control to get the shared token.shadow generation number and bump it on token shadow changes. 0006 ? access-control: cache verified API token secrets Mirrors PBS PATCH 0002. 0007 ? access-control: invalidate token-secret cache on token.shadow changes Mirrors PBS PATCH 0003. 0008 ? access-control: add TTL window to token-secret cache Mirrors PBS PATCH 0004. proxmox-datacenter-manager: 0009 ? pdm-config: add token.shadow generation to ConfigVersionCache Extends PDM ConfigVersionCache and implements token_shadow_cache_generation() and increment_token_shadow_cache_generation() from AccessControlConfig for PDM. 0010 ? docs: document API token-cache TTL effects Documents the effects of the TTL window on token.shadow edits Changes from v1 to v2: * (refactor) Switched cache initialization to LazyLock * (perf) Use parking_lot::RwLock and best-effort cache access on the read/refresh path (try_read/try_write) to avoid lock contention * (doc) Document TTL-delayed effect of manual token.shadow edits * (fix) Add generation guards (API_MUTATION_GENERATION + FILE_GENERATION) to prevent caching across concurrent set/delete and external edits Changes from v2 to v3: * (refactor) Replace PBS per-process cache invalidation with a cross-process token.shadow generation based on PBS ConfigVersionCache, ensuring cache consistency between privileged and unprivileged daemons. * (refactor) Decoupling generation source from the proxmox/proxmox-access-control cache implementation: extend AccessControlConfig hooks so that products can provide the shared token.shadow generation source. * (refactor) Extend PDM's ConfigVersionCache with token_shadow_generation and introduce a pdm_config::AccessControlConfig wrapper implementing the new proxmox-access-control trait hooks. Switch server and CLI initialization to use pdm_config::AccessControlConfig instead of pdm_api_types::AccessControlConfig. * (refactor) Adapt generation checks around cached-secret comparison to use the new shared generation source. * (fix/logic) cache_try_insert_secret: Update the local cache generation if stale, allowing the new secret to be inserted immediately * (refactor) Extract cache invalidation logic into a invalidate_cache_state helper to reduce duplication and ensure consistent state resets * (refactor) Simplify refresh_cache_if_file_changed: handle the un-initialized/reset state and adjust the generation mismatch path to ensure file metadata is always re-read. * (doc) Clarify TTL-delayed effects of manual token.shadow edits. Please see the patch specific changelogs for more details. Thanks for considering this patch series, I look forward to your feedback. Best, Samuel Rufinatscha [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 [2] attachment 1767 [1]: Flamegraph showing the proxmox_sys::crypt::verify_crypt_pw stack [3] attachment 1794 [1]: Flamegraph PDM baseline [4] https://bugzilla.proxmox.com/show_bug.cgi?id=6049 proxmox-backup: Samuel Rufinatscha (4): pbs-config: add token.shadow generation to ConfigVersionCache pbs-config: cache verified API token secrets pbs-config: invalidate token-secret cache on token.shadow changes pbs-config: add TTL window to token secret cache Cargo.toml | 1 + docs/user-management.rst | 4 + pbs-config/Cargo.toml | 1 + pbs-config/src/config_version_cache.rs | 18 ++ pbs-config/src/token_shadow.rs | 298 ++++++++++++++++++++++++- 5 files changed, 321 insertions(+), 1 deletion(-) proxmox: Samuel Rufinatscha (4): proxmox-access-control: extend AccessControlConfig for token.shadow invalidation proxmox-access-control: cache verified API token secrets proxmox-access-control: invalidate token-secret cache on token.shadow changes proxmox-access-control: add TTL window to token secret cache Cargo.toml | 1 + proxmox-access-control/Cargo.toml | 1 + proxmox-access-control/src/init.rs | 17 ++ proxmox-access-control/src/token_shadow.rs | 299 ++++++++++++++++++++- 4 files changed, 317 insertions(+), 1 deletion(-) proxmox-datacenter-manager: Samuel Rufinatscha (2): pdm-config: implement token.shadow generation docs: document API token-cache TTL effects cli/admin/src/main.rs | 2 +- docs/access-control.rst | 4 ++ lib/pdm-config/Cargo.toml | 1 + lib/pdm-config/src/access_control_config.rs | 73 +++++++++++++++++++++ lib/pdm-config/src/config_version_cache.rs | 18 +++++ lib/pdm-config/src/lib.rs | 2 + server/src/acl.rs | 3 +- 7 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 lib/pdm-config/src/access_control_config.rs Summary over all repositories: 16 files changed, 738 insertions(+), 5 deletions(-) -- Generated by git-murpp 0.8.1 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:49 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:49 +0100 Subject: [pbs-devel] [PATCH proxmox-datacenter-manager v3 2/2] docs: document API token-cache TTL effects In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-11-s.rufinatscha@proxmox.com> Documents the effects of the added API token-cache in the proxmox-access-control crate. This patch is part of the series that fixes bug #7017 [1]. This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- Changes from v2 to v3: * Reword documentation warning for clarity. docs/access-control.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/access-control.rst b/docs/access-control.rst index adf26cd..18e57a2 100644 --- a/docs/access-control.rst +++ b/docs/access-control.rst @@ -47,6 +47,10 @@ place of the user ID (``user at realm``) and the user password, respectively. The API token is passed from the client to the server by setting the ``Authorization`` HTTP header with method ``PDMAPIToken`` to the value ``TOKENID:TOKENSECRET``. +.. WARNING:: Direct/manual edits to ``token.shadow`` may take up to 60 seconds (or + longer in edge cases) to take effect due to caching. Restart services for + immediate effect of manual edits. + .. _access_control: Access Control -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:48 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:48 +0100 Subject: [pbs-devel] [PATCH proxmox-datacenter-manager v3 1/2] pdm-config: implement token.shadow generation In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-10-s.rufinatscha@proxmox.com> PDM depends on the shared proxmox/proxmox-access-control crate for token.shadow handling, which expects the product to provide a cross-process invalidation signal so it can safely cache verified API token secrets and invalidate them when token.shadow is changed. This patch * adds a token_shadow_generation to PDM?s shared-memory ConfigVersionCache * implements proxmox_access_control::init::AccessControlConfig for pdm_config::AccessControlConfig, which - delegates roles/privs/path checks to the existing pdm_api_types::AccessControlConfig implementation - implements the shadow cache generation trait functions * switches the AccessControlConfig init paths (server + CLI) to use pdm_config::AccessControlConfig instead of pdm_api_types::AccessControlConfig This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- cli/admin/src/main.rs | 2 +- lib/pdm-config/Cargo.toml | 1 + lib/pdm-config/src/access_control_config.rs | 73 +++++++++++++++++++++ lib/pdm-config/src/config_version_cache.rs | 18 +++++ lib/pdm-config/src/lib.rs | 2 + server/src/acl.rs | 3 +- 6 files changed, 96 insertions(+), 3 deletions(-) create mode 100644 lib/pdm-config/src/access_control_config.rs diff --git a/cli/admin/src/main.rs b/cli/admin/src/main.rs index f698fa2..916c633 100644 --- a/cli/admin/src/main.rs +++ b/cli/admin/src/main.rs @@ -19,7 +19,7 @@ fn main() { proxmox_product_config::init(api_user, priv_user); proxmox_access_control::init::init( - &pdm_api_types::AccessControlConfig, + &pdm_config::AccessControlConfig, pdm_buildcfg::configdir!("/access"), ) .expect("failed to setup access control config"); diff --git a/lib/pdm-config/Cargo.toml b/lib/pdm-config/Cargo.toml index d39c2ad..19781d2 100644 --- a/lib/pdm-config/Cargo.toml +++ b/lib/pdm-config/Cargo.toml @@ -13,6 +13,7 @@ once_cell.workspace = true openssl.workspace = true serde.workspace = true +proxmox-access-control.workspace = true proxmox-config-digest = { workspace = true, features = [ "openssl" ] } proxmox-http = { workspace = true, features = [ "http-helpers" ] } proxmox-ldap = { workspace = true, features = [ "types" ]} diff --git a/lib/pdm-config/src/access_control_config.rs b/lib/pdm-config/src/access_control_config.rs new file mode 100644 index 0000000..6f2e6b3 --- /dev/null +++ b/lib/pdm-config/src/access_control_config.rs @@ -0,0 +1,73 @@ +// e.g. in src/main.rs or server::context mod, wherever convenient + +use anyhow::Error; +use pdm_api_types::{Authid, Userid}; +use proxmox_section_config::SectionConfigData; +use std::collections::HashMap; + +pub struct AccessControlConfig; + +impl proxmox_access_control::init::AccessControlConfig for AccessControlConfig { + fn privileges(&self) -> &HashMap<&str, u64> { + pdm_api_types::AccessControlConfig.privileges() + } + + fn roles(&self) -> &HashMap<&str, (u64, &str)> { + pdm_api_types::AccessControlConfig.roles() + } + + fn is_superuser(&self, auth_id: &Authid) -> bool { + pdm_api_types::AccessControlConfig.is_superuser(auth_id) + } + + fn is_group_member(&self, user_id: &Userid, group: &str) -> bool { + pdm_api_types::AccessControlConfig.is_group_member(user_id, group) + } + + fn role_admin(&self) -> Option<&str> { + pdm_api_types::AccessControlConfig.role_admin() + } + + fn role_no_access(&self) -> Option<&str> { + pdm_api_types::AccessControlConfig.role_no_access() + } + + fn init_user_config(&self, config: &mut SectionConfigData) -> Result<(), Error> { + pdm_api_types::AccessControlConfig.init_user_config(config) + } + + fn acl_audit_privileges(&self) -> u64 { + pdm_api_types::AccessControlConfig.acl_audit_privileges() + } + + fn acl_modify_privileges(&self) -> u64 { + pdm_api_types::AccessControlConfig.acl_modify_privileges() + } + + fn check_acl_path(&self, path: &str) -> Result<(), Error> { + pdm_api_types::AccessControlConfig.check_acl_path(path) + } + + fn allow_partial_permission_match(&self) -> bool { + pdm_api_types::AccessControlConfig.allow_partial_permission_match() + } + + fn cache_generation(&self) -> Option { + pdm_api_types::AccessControlConfig.cache_generation() + } + + fn increment_cache_generation(&self) -> Result<(), Error> { + pdm_api_types::AccessControlConfig.increment_cache_generation() + } + + fn token_shadow_cache_generation(&self) -> Option { + crate::ConfigVersionCache::new() + .ok() + .map(|c| c.token_shadow_generation()) + } + + fn increment_token_shadow_cache_generation(&self) -> Result { + let c = crate::ConfigVersionCache::new()?; + Ok(c.increase_token_shadow_generation()) + } +} diff --git a/lib/pdm-config/src/config_version_cache.rs b/lib/pdm-config/src/config_version_cache.rs index 36a6a77..933140c 100644 --- a/lib/pdm-config/src/config_version_cache.rs +++ b/lib/pdm-config/src/config_version_cache.rs @@ -27,6 +27,8 @@ struct ConfigVersionCacheDataInner { traffic_control_generation: AtomicUsize, // Tracks updates to the remote/hostname/nodename mapping cache. remote_mapping_cache: AtomicUsize, + // Token shadow (token.shadow) generation/version. + token_shadow_generation: AtomicUsize, // Add further atomics here } @@ -172,4 +174,20 @@ impl ConfigVersionCache { .fetch_add(1, Ordering::Relaxed) + 1 } + + /// Returns the token shadow generation number. + pub fn token_shadow_generation(&self) -> usize { + self.shmem + .data() + .token_shadow_generation + .load(Ordering::Acquire) + } + + /// Increase the token shadow generation number. + pub fn increase_token_shadow_generation(&self) -> usize { + self.shmem + .data() + .token_shadow_generation + .fetch_add(1, Ordering::AcqRel) + } } diff --git a/lib/pdm-config/src/lib.rs b/lib/pdm-config/src/lib.rs index 4c49054..a15a006 100644 --- a/lib/pdm-config/src/lib.rs +++ b/lib/pdm-config/src/lib.rs @@ -9,6 +9,8 @@ pub mod remotes; pub mod setup; pub mod views; +mod access_control_config; +pub use access_control_config::AccessControlConfig; mod config_version_cache; pub use config_version_cache::ConfigVersionCache; diff --git a/server/src/acl.rs b/server/src/acl.rs index f421814..e6e007b 100644 --- a/server/src/acl.rs +++ b/server/src/acl.rs @@ -1,6 +1,5 @@ pub(crate) fn init() { - static ACCESS_CONTROL_CONFIG: pdm_api_types::AccessControlConfig = - pdm_api_types::AccessControlConfig; + static ACCESS_CONTROL_CONFIG: pdm_config::AccessControlConfig = pdm_config::AccessControlConfig; proxmox_access_control::init::init(&ACCESS_CONTROL_CONFIG, pdm_buildcfg::configdir!("/access")) .expect("failed to setup access control config"); -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:40 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:40 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v3 1/4] pbs-config: add token.shadow generation to ConfigVersionCache In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-2-s.rufinatscha@proxmox.com> Currently, every token-based API request reads the token.shadow file and runs the expensive password hash verification for the given token secret. This shows up as a hotspot in /status profiling (see bug #7017 [1]). To solve the issue, this patch prepares the config version cache, so that token_shadow_generation config caching can be built on top of it. This patch specifically: (1) implements increment function in order to invalidate generations This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- pbs-config/src/config_version_cache.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pbs-config/src/config_version_cache.rs b/pbs-config/src/config_version_cache.rs index e8fb994f..1376b11d 100644 --- a/pbs-config/src/config_version_cache.rs +++ b/pbs-config/src/config_version_cache.rs @@ -28,6 +28,8 @@ struct ConfigVersionCacheDataInner { // datastore (datastore.cfg) generation/version // FIXME: remove with PBS 3.0 datastore_generation: AtomicUsize, + // Token shadow (token.shadow) generation/version. + token_shadow_generation: AtomicUsize, // Add further atomics here } @@ -153,4 +155,20 @@ impl ConfigVersionCache { .datastore_generation .fetch_add(1, Ordering::AcqRel) } + + /// Returns the token shadow generation number. + pub fn token_shadow_generation(&self) -> usize { + self.shmem + .data() + .token_shadow_generation + .load(Ordering::Acquire) + } + + /// Increase the token shadow generation number. + pub fn increase_token_shadow_generation(&self) -> usize { + self.shmem + .data() + .token_shadow_generation + .fetch_add(1, Ordering::AcqRel) + } } -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:44 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:44 +0100 Subject: [pbs-devel] [PATCH proxmox v3 1/4] proxmox-access-control: extend AccessControlConfig for token.shadow invalidation In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-6-s.rufinatscha@proxmox.com> Add token_shadow_cache_generation() and increment_token_shadow_cache_generation() hooks to AccessControlConfig. This lets products provide a cross-process invalidation signal for token.shadow so proxmox-access-control can cache verified API token secrets and invalidate that cache on token rotation/deletion. This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- proxmox-access-control/src/init.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/proxmox-access-control/src/init.rs b/proxmox-access-control/src/init.rs index e64398e8..0ba1a526 100644 --- a/proxmox-access-control/src/init.rs +++ b/proxmox-access-control/src/init.rs @@ -51,6 +51,23 @@ pub trait AccessControlConfig: Send + Sync { Ok(()) } + /// Returns the current cache generation of the token shadow cache. If the generation was + /// incremented since the last time the cache was queried, the token shadow cache is reloaded + /// from disk. + /// + /// Default: Always returns `None`. + fn token_shadow_cache_generation(&self) -> Option { + None + } + + /// Increment the cache generation of the token shadow cache. This indicates that it was + /// changed on disk. + /// + /// Default: Returns an error as token shadow generation is not supported. + fn increment_token_shadow_cache_generation(&self) -> Result { + anyhow::bail!("token shadow generation not supported"); + } + /// Optionally returns a role that has no access to any resource. /// /// Default: Returns `None`. -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:47 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:47 +0100 Subject: [pbs-devel] [PATCH proxmox v3 4/4] proxmox-access-control: add TTL window to token secret cache In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-9-s.rufinatscha@proxmox.com> Verify_secret() currently calls refresh_cache_if_file_changed() on every request, which performs a metadata() call on token.shadow each time. Under load this adds unnecessary overhead, considering also the file should rarely change. This patch introduces a TTL boundary, controlled by TOKEN_SECRET_CACHE_TTL_SECS. File metadata is only re-loaded once the TTL has expired. This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- Changes from v1 to v2: * Add TOKEN_SECRET_CACHE_TTL_SECS and last_checked. * Implement double-checked TTL: check with try_read first; only attempt refresh with try_write if expired/unknown. * Fix TTL bookkeeping: update last_checked on the ?file unchanged? path and after API mutations. * Add documentation warning about TTL-delayed effect of manual token.shadow edits. Changes from v2 to v3: * Refactored refresh_cache_if_file_changed TTL logic. * Remove had_prior_state check (replaced by last_checked logic). * Improve TTL bound checks. * Reword documentation warning for clarity. proxmox-access-control/src/token_shadow.rs | 30 +++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/proxmox-access-control/src/token_shadow.rs b/proxmox-access-control/src/token_shadow.rs index f30c8ed5..14eea560 100644 --- a/proxmox-access-control/src/token_shadow.rs +++ b/proxmox-access-control/src/token_shadow.rs @@ -30,6 +30,9 @@ static TOKEN_SECRET_CACHE: LazyLock> = LazyLock::new }) }); +/// Max age in seconds of the token secret cache before checking for file changes. +const TOKEN_SECRET_CACHE_TTL_SECS: i64 = 60; + // Get exclusive lock fn lock_config() -> Result { open_api_lockfile(token_shadow_lock(), None, true) @@ -57,11 +60,28 @@ fn write_file(data: HashMap) -> Result<(), Error> { fn refresh_cache_if_file_changed() -> bool { let now = epoch_i64(); - // Best-effort refresh under write lock. + // Fast path: cache is fresh if shared-gen matches and TTL not expired. + if let (Some(cache), Some(shared_gen_read)) = + (TOKEN_SECRET_CACHE.try_read(), token_shadow_shared_gen()) + { + if cache.shared_gen == shared_gen_read + && cache + .last_checked + .is_some_and(|last| now >= last && (now - last) < TOKEN_SECRET_CACHE_TTL_SECS) + { + return true; + } + // read lock drops here + } else { + return false; + } + + // Slow path: best-effort refresh under write lock. let Some(mut cache) = TOKEN_SECRET_CACHE.try_write() else { return false; }; + // Re-read generation after acquiring the lock (may have changed meanwhile). let Some(shared_gen_now) = token_shadow_shared_gen() else { return false; }; @@ -72,6 +92,14 @@ fn refresh_cache_if_file_changed() -> bool { cache.shared_gen = shared_gen_now; } + // TTL check again after acquiring the lock + if cache + .last_checked + .is_some_and(|last| now >= last && (now - last) < TOKEN_SECRET_CACHE_TTL_SECS) + { + return true; + } + // Stat the file to detect manual edits. let Ok((new_mtime, new_len)) = shadow_mtime_len() else { return false; -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:45 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:45 +0100 Subject: [pbs-devel] [PATCH proxmox v3 2/4] proxmox-access-control: cache verified API token secrets In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-7-s.rufinatscha@proxmox.com> Currently, every token-based API request reads the token.shadow file and runs the expensive password hash verification for the given token secret. This issue was first observed as part of profiling the PBS /status endpoint (see bug #7017 [1]) and is required for the factored out proxmox_access_control token_shadow implementation too. This patch introduces an in-memory cache of successfully verified token secrets. Subsequent requests for the same token+secret combination only perform a comparison using openssl::memcmp::eq and avoid re-running the password hash. The cache is updated when a token secret is set and cleared when a token is deleted. Note, this does NOT include manual config changes, which will be covered in a subsequent patch. This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- Changes from v1 to v2: * Replace OnceCell with LazyLock, and std::sync::RwLock with parking_lot::RwLock. * Add API_MUTATION_GENERATION and guard cache inserts to prevent ?zombie inserts? across concurrent set/delete. * Refactor cache operations into cache_try_secret_matches, cache_try_insert_secret, and centralize write-side behavior in apply_api_mutation. * Switch fast-path cache access to try_read/try_write (best-effort). Changes from v2 to v3: * Replaced process-local cache invalidation (AtomicU64 API_MUTATION_GENERATION) with a cross-process shared generation via ConfigVersionCache. * Validate shared generation before/after the constant-time secret compare; only insert into cache if the generation is unchanged. * invalidate_cache_state() on insert if shared generation changed. Cargo.toml | 1 + proxmox-access-control/Cargo.toml | 1 + proxmox-access-control/src/token_shadow.rs | 154 ++++++++++++++++++++- 3 files changed, 155 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 27a69afa..59a2ec93 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -112,6 +112,7 @@ native-tls = "0.2" nix = "0.29" openssl = "0.10" pam-sys = "0.5" +parking_lot = "0.12" percent-encoding = "2.1" pin-utils = "0.1.0" proc-macro2 = "1.0" diff --git a/proxmox-access-control/Cargo.toml b/proxmox-access-control/Cargo.toml index ec189664..1de2842c 100644 --- a/proxmox-access-control/Cargo.toml +++ b/proxmox-access-control/Cargo.toml @@ -16,6 +16,7 @@ anyhow.workspace = true const_format.workspace = true nix = { workspace = true, optional = true } openssl = { workspace = true, optional = true } +parking_lot.workspace = true regex.workspace = true hex = { workspace = true, optional = true } serde.workspace = true diff --git a/proxmox-access-control/src/token_shadow.rs b/proxmox-access-control/src/token_shadow.rs index c586d834..895309d2 100644 --- a/proxmox-access-control/src/token_shadow.rs +++ b/proxmox-access-control/src/token_shadow.rs @@ -1,13 +1,28 @@ use std::collections::HashMap; +use std::sync::LazyLock; use anyhow::{bail, format_err, Error}; +use parking_lot::RwLock; use serde_json::{from_value, Value}; use proxmox_auth_api::types::Authid; use proxmox_product_config::{open_api_lockfile, replace_config, ApiLockGuard}; +use crate::init::access_conf; use crate::init::impl_feature::{token_shadow, token_shadow_lock}; +/// Global in-memory cache for successfully verified API token secrets. +/// The cache stores plain text secrets for token Authids that have already been +/// verified against the hashed values in `token.shadow`. This allows for cheap +/// subsequent authentications for the same token+secret combination, avoiding +/// recomputing the password hash on every request. +static TOKEN_SECRET_CACHE: LazyLock> = LazyLock::new(|| { + RwLock::new(ApiTokenSecretCache { + secrets: HashMap::new(), + shared_gen: 0, + }) +}); + // Get exclusive lock fn lock_config() -> Result { open_api_lockfile(token_shadow_lock(), None, true) @@ -36,9 +51,27 @@ pub fn verify_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { bail!("not an API token ID"); } + // Fast path + if cache_try_secret_matches(tokenid, secret) { + return Ok(()); + } + + // Slow path + // First, capture the shared generation before doing the hash verification. + let gen_before = token_shadow_shared_gen(); + let data = read_file()?; match data.get(tokenid) { - Some(hashed_secret) => proxmox_sys::crypt::verify_crypt_pw(secret, hashed_secret), + Some(hashed_secret) => { + proxmox_sys::crypt::verify_crypt_pw(secret, hashed_secret)?; + + // Try to cache only if nothing changed while verifying the secret. + if let Some(gen) = gen_before { + cache_try_insert_secret(tokenid.clone(), secret.to_owned(), gen); + } + + Ok(()) + } None => bail!("invalid API token"), } } @@ -56,6 +89,8 @@ pub fn set_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { data.insert(tokenid.clone(), hashed_secret); write_file(data)?; + apply_api_mutation(tokenid, Some(secret)); + Ok(()) } @@ -71,6 +106,8 @@ pub fn delete_secret(tokenid: &Authid) -> Result<(), Error> { data.remove(tokenid); write_file(data)?; + apply_api_mutation(tokenid, None); + Ok(()) } @@ -81,3 +118,118 @@ pub fn generate_and_set_secret(tokenid: &Authid) -> Result { set_secret(tokenid, &secret)?; Ok(secret) } + +struct ApiTokenSecretCache { + /// Keys are token Authids, values are the corresponding plain text secrets. + /// Entries are added after a successful on-disk verification in + /// `verify_secret` or when a new token secret is generated by + /// `generate_and_set_secret`. Used to avoid repeated + /// password-hash computation on subsequent authentications. + secrets: HashMap, + /// Shared generation to detect mutations of the underlying token.shadow file. + shared_gen: usize, +} + +/// Cached secret. +struct CachedSecret { + secret: String, +} + +fn cache_try_insert_secret(tokenid: Authid, secret: String, shared_gen_before: usize) { + let Some(mut cache) = TOKEN_SECRET_CACHE.try_write() else { + return; + }; + + let Some(shared_gen_now) = token_shadow_shared_gen() else { + return; + }; + + // If this process missed a generation bump, its cache is stale. + if cache.shared_gen != shared_gen_now { + invalidate_cache_state(&mut cache); + cache.shared_gen = shared_gen_now; + } + + // If a mutation happened while we were verifying the secret, do not insert. + if shared_gen_now == shared_gen_before { + cache.secrets.insert(tokenid, CachedSecret { secret }); + } +} + +// Tries to match the given token secret against the cached secret. +// Checks the generation before and after the constant-time compare to avoid a +// TOCTOU window. If another process rotates/deletes a token while we're validating +// the cached secret, the generation will change, and we +// must not trust the cache for this request. +fn cache_try_secret_matches(tokenid: &Authid, secret: &str) -> bool { + let Some(cache) = TOKEN_SECRET_CACHE.try_read() else { + return false; + }; + let Some(entry) = cache.secrets.get(tokenid) else { + return false; + }; + + let cache_gen = cache.shared_gen; + + let Some(gen1) = token_shadow_shared_gen() else { + return false; + }; + if gen1 != cache_gen { + return false; + } + + let eq = openssl::memcmp::eq(entry.secret.as_bytes(), secret.as_bytes()); + + let Some(gen2) = token_shadow_shared_gen() else { + return false; + }; + + eq && gen2 == cache_gen +} + +fn apply_api_mutation(tokenid: &Authid, new_secret: Option<&str>) { + // Signal cache invalidation to other processes (best-effort). + let new_shared_gen = bump_token_shadow_shared_gen(); + + let mut cache = TOKEN_SECRET_CACHE.write(); + + // If we cannot read/bump the shared generation, we cannot safely trust the cache. + let Some(gen) = new_shared_gen else { + invalidate_cache_state(&mut cache); + cache.shared_gen = 0; + return; + }; + + // Update to the post-mutation generation. + cache.shared_gen = gen; + + // Apply the new mutation. + match new_secret { + Some(secret) => { + cache.secrets.insert( + tokenid.clone(), + CachedSecret { + secret: secret.to_owned(), + }, + ); + } + None => { + cache.secrets.remove(tokenid); + } + } +} + +/// Get the current shared generation. +fn token_shadow_shared_gen() -> Option { + access_conf().token_shadow_cache_generation() +} + +/// Bump and return the new shared generation. +fn bump_token_shadow_shared_gen() -> Option { + access_conf().increment_token_shadow_cache_generation().ok().map(|prev| prev + 1) +} + +/// Invalidates the cache state and only keeps the shared generation. +fn invalidate_cache_state(cache: &mut ApiTokenSecretCache) { + cache.secrets.clear(); +} \ No newline at end of file -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:07:46 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:07:46 +0100 Subject: [pbs-devel] [PATCH proxmox v3 3/4] proxmox-access-control: invalidate token-secret cache on token.shadow changes In-Reply-To: <20260102160750.285157-1-s.rufinatscha@proxmox.com> References: <20260102160750.285157-1-s.rufinatscha@proxmox.com> Message-ID: <20260102160750.285157-8-s.rufinatscha@proxmox.com> Previously the in-memory token-secret cache was only updated via set_secret() and delete_secret(), so manual edits to token.shadow were not reflected. This patch adds file change detection to the cache. It tracks the mtime and length of token.shadow and clears the in-memory token secret cache whenever these values change. Note, this patch fetches file stats on every request. An TTL-based optimization will be covered in a subsequent patch of the series. This patch is part of the series which fixes bug #7017 [1]. [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Signed-off-by: Samuel Rufinatscha --- Changes from v1 to v2: * Add file metadata tracking (file_mtime, file_len) and FILE_GENERATION. * Store file_gen in CachedSecret and verify it against the current FILE_GENERATION to ensure cached entries belong to the current file state. * Add shadow_mtime_len() helper and convert refresh to best-effort (try_write, returns bool). * Pass a pre-write metadata snapshot into apply_api_mutation and clear/bump generation if the cache metadata indicates missed external edits. Changes from v2 to v3: * Cache now tracks last_checked (epoch seconds). * Simplified refresh_cache_if_file_changed, removed FILE_GENERATION logic * On first load, initializes file metadata and keeps empty cache. proxmox-access-control/src/token_shadow.rs | 129 ++++++++++++++++++++- 1 file changed, 123 insertions(+), 6 deletions(-) diff --git a/proxmox-access-control/src/token_shadow.rs b/proxmox-access-control/src/token_shadow.rs index 895309d2..f30c8ed5 100644 --- a/proxmox-access-control/src/token_shadow.rs +++ b/proxmox-access-control/src/token_shadow.rs @@ -1,5 +1,8 @@ use std::collections::HashMap; +use std::fs; +use std::io::ErrorKind; use std::sync::LazyLock; +use std::time::SystemTime; use anyhow::{bail, format_err, Error}; use parking_lot::RwLock; @@ -7,6 +10,7 @@ use serde_json::{from_value, Value}; use proxmox_auth_api::types::Authid; use proxmox_product_config::{open_api_lockfile, replace_config, ApiLockGuard}; +use proxmox_time::epoch_i64; use crate::init::access_conf; use crate::init::impl_feature::{token_shadow, token_shadow_lock}; @@ -20,6 +24,9 @@ static TOKEN_SECRET_CACHE: LazyLock> = LazyLock::new RwLock::new(ApiTokenSecretCache { secrets: HashMap::new(), shared_gen: 0, + file_mtime: None, + file_len: None, + last_checked: None, }) }); @@ -45,6 +52,63 @@ fn write_file(data: HashMap) -> Result<(), Error> { replace_config(token_shadow(), &json) } +/// Refreshes the in-memory cache if the on-disk token.shadow file changed. +/// Returns true if the cache is valid to use, false if not. +fn refresh_cache_if_file_changed() -> bool { + let now = epoch_i64(); + + // Best-effort refresh under write lock. + let Some(mut cache) = TOKEN_SECRET_CACHE.try_write() else { + return false; + }; + + let Some(shared_gen_now) = token_shadow_shared_gen() else { + return false; + }; + + // If another process bumped the generation, we don't know what changed -> clear cache + if cache.shared_gen != shared_gen_now { + invalidate_cache_state(&mut cache); + cache.shared_gen = shared_gen_now; + } + + // Stat the file to detect manual edits. + let Ok((new_mtime, new_len)) = shadow_mtime_len() else { + return false; + }; + + // Initialize file stats if we have no prior state. + if cache.last_checked.is_none() { + cache.secrets.clear(); // ensure cache is empty on first load + cache.file_mtime = new_mtime; + cache.file_len = new_len; + cache.last_checked = Some(now); + return true; + } + + // No change detected. + if cache.file_mtime == new_mtime && cache.file_len == new_len { + cache.last_checked = Some(now); + return true; + } + + // Manual edit detected -> invalidate cache and update stat. + cache.secrets.clear(); + cache.file_mtime = new_mtime; + cache.file_len = new_len; + cache.last_checked = Some(now); + + // Best-effort propagation to other processes + update local view. + if let Some(shared_gen_new) = bump_token_shadow_shared_gen() { + cache.shared_gen = shared_gen_new; + } else { + // Do not fail: local cache is already safe as we cleared it above. + // Keep local shared_gen as-is to avoid repeated failed attempts. + } + + true +} + /// Verifies that an entry for given tokenid / API token secret exists pub fn verify_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { if !tokenid.is_token() { @@ -52,7 +116,7 @@ pub fn verify_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { } // Fast path - if cache_try_secret_matches(tokenid, secret) { + if refresh_cache_if_file_changed() && cache_try_secret_matches(tokenid, secret) { return Ok(()); } @@ -84,12 +148,15 @@ pub fn set_secret(tokenid: &Authid, secret: &str) -> Result<(), Error> { let _guard = lock_config()?; + // Capture state before we write to detect external edits. + let pre_meta = shadow_mtime_len().unwrap_or((None, None)); + let mut data = read_file()?; let hashed_secret = proxmox_sys::crypt::encrypt_pw(secret)?; data.insert(tokenid.clone(), hashed_secret); write_file(data)?; - apply_api_mutation(tokenid, Some(secret)); + apply_api_mutation(tokenid, Some(secret), pre_meta); Ok(()) } @@ -102,11 +169,14 @@ pub fn delete_secret(tokenid: &Authid) -> Result<(), Error> { let _guard = lock_config()?; + // Capture state before we write to detect external edits. + let pre_meta = shadow_mtime_len().unwrap_or((None, None)); + let mut data = read_file()?; data.remove(tokenid); write_file(data)?; - apply_api_mutation(tokenid, None); + apply_api_mutation(tokenid, None, pre_meta); Ok(()) } @@ -128,6 +198,12 @@ struct ApiTokenSecretCache { secrets: HashMap, /// Shared generation to detect mutations of the underlying token.shadow file. shared_gen: usize, + // shadow file mtime to detect changes + file_mtime: Option, + // shadow file length to detect changes + file_len: Option, + // last time the file metadata was checked + last_checked: Option, } /// Cached secret. @@ -187,7 +263,13 @@ fn cache_try_secret_matches(tokenid: &Authid, secret: &str) -> bool { eq && gen2 == cache_gen } -fn apply_api_mutation(tokenid: &Authid, new_secret: Option<&str>) { +fn apply_api_mutation( + tokenid: &Authid, + new_secret: Option<&str>, + pre_write_meta: (Option, Option), +) { + let now = epoch_i64(); + // Signal cache invalidation to other processes (best-effort). let new_shared_gen = bump_token_shadow_shared_gen(); @@ -203,6 +285,13 @@ fn apply_api_mutation(tokenid: &Authid, new_secret: Option<&str>) { // Update to the post-mutation generation. cache.shared_gen = gen; + // If our cached file metadata does not match the on-disk state before our write, + // we likely missed an external/manual edit. We can no longer trust any cached secrets. + let (pre_mtime, pre_len) = pre_write_meta; + if cache.file_mtime != pre_mtime || cache.file_len != pre_len { + cache.secrets.clear(); + } + // Apply the new mutation. match new_secret { Some(secret) => { @@ -217,6 +306,20 @@ fn apply_api_mutation(tokenid: &Authid, new_secret: Option<&str>) { cache.secrets.remove(tokenid); } } + + // Update our view of the file metadata to the post-write state (best-effort). + // (If this fails, drop local cache so callers fall back to slow path until refreshed.) + match shadow_mtime_len() { + Ok((mtime, len)) => { + cache.file_mtime = mtime; + cache.file_len = len; + cache.last_checked = Some(now); + } + Err(_) => { + // If we cannot validate state, do not trust cache. + invalidate_cache_state(&mut cache); + } + } } /// Get the current shared generation. @@ -226,10 +329,24 @@ fn token_shadow_shared_gen() -> Option { /// Bump and return the new shared generation. fn bump_token_shadow_shared_gen() -> Option { - access_conf().increment_token_shadow_cache_generation().ok().map(|prev| prev + 1) + access_conf() + .increment_token_shadow_cache_generation() + .ok() + .map(|prev| prev + 1) } /// Invalidates the cache state and only keeps the shared generation. fn invalidate_cache_state(cache: &mut ApiTokenSecretCache) { cache.secrets.clear(); -} \ No newline at end of file + cache.file_mtime = None; + cache.file_len = None; + cache.last_checked = None; +} + +fn shadow_mtime_len() -> Result<(Option, Option), Error> { + match fs::metadata(token_shadow()) { + Ok(meta) => Ok((meta.modified().ok(), Some(meta.len()))), + Err(e) if e.kind() == ErrorKind::NotFound => Ok((None, None)), + Err(e) => Err(e.into()), + } +} -- 2.47.3 From s.rufinatscha at proxmox.com Fri Jan 2 17:09:45 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Fri, 2 Jan 2026 17:09:45 +0100 Subject: [pbs-devel] superseded: [PATCH proxmox{-backup, , -datacenter-manager} v2 0/7] token-shadow: reduce api token verification overhead In-Reply-To: <20251217162520.486520-1-s.rufinatscha@proxmox.com> References: <20251217162520.486520-1-s.rufinatscha@proxmox.com> Message-ID: <323772e0-ac91-4bd3-942c-e18e70119e05@proxmox.com> https://lore.proxmox.com/pbs-devel/20260102160750.285157-1-s.rufinatscha at proxmox.com/T/#t On 12/17/25 5:25 PM, Samuel Rufinatscha wrote: > Hi, > > this series improves the performance of token-based API authentication > in PBS (pbs-config) and in PDM (underlying proxmox-access-control > crate), addressing the API token verification hotspot reported in our > bugtracker #6049 [1]. > > When profiling PBS /status endpoint with cargo flamegraph [2], > token-based authentication showed up as a dominant hotspot via > proxmox_sys::crypt::verify_crypt_pw. Applying this series removes that > path from the hot section of the flamegraph. The same performance issue > was measured [2] for PDM. PDM uses the underlying shared > proxmox-access-control library for token handling, which is a > factored out version of the token.shadow handling code from PBS. > > While this series fixes the immediate performance issue both in PBS > (pbs-config) and in the shared proxmox-access-control crate used by > PDM, PBS should eventually, ideally be refactored, in a separate > effort, to use proxmox-access-control for token handling instead of its > local implementation. > > Problem > > For token-based API requests, both PBS?s pbs-config token.shadow > handling and PDM proxmox-access-control?s token.shadow handling > currently: > > 1. read the token.shadow file on each request > 2. deserialize it into a HashMap > 3. run password hash verification via > proxmox_sys::crypt::verify_crypt_pw for the provided token secret > > Under load, this results in significant CPU usage spent in repeated > password hash computations for the same token+secret pairs. The > attached flamegraphs for PBS [2] and PDM [3] show > proxmox_sys::crypt::verify_crypt_pw dominating the hot path. > > Approach > > The goal is to reduce the cost of token-based authentication preserving > the existing token handling semantics (including detecting manual edits > to token.shadow) and be consistent between PBS (pbs-config) and > PDM (proxmox-access-control). For both sites, the series proposes > following approach: > > 1. Introduce an in-memory cache for verified token secrets > 2. Invalidate the cache when token.shadow changes (detect manual edits) > 3. Control metadata checks with a TTL window > > Testing > > *PBS (pbs-config)* > > To verify the effect in PBS, I: > 1. Set up test environment based on latest PBS ISO, installed Rust > toolchain, cloned proxmox-backup repository to use with cargo > flamegraph. Reproduced bug #6049 [1] by profiling the /status > endpoint with token-based authentication using cargo flamegraph [2]. > The flamegraph showed proxmox_sys::crypt::verify_crypt_pw is the > hotspot. > 2. Built PBS with pbs-config patches and re-ran the same workload and > profiling setup. > 3. Confirmed that the proxmox_sys::crypt::verify_crypt_pw path no > longer appears in the hot section of the flamegraph. CPU usage is > now dominated by TLS overhead. > 4. Functionally verified that: > * token-based API authentication still works for valid tokens > * invalid secrets are rejected as before > * generating a new token secret via dashboard works and > authenticates correctly > > *PDM (proxmox-access-control)* > > To verify the effect in PDM, I followed a similar testing approach. > Instead of /status, I profiled the /version endpoint with cargo > flamegraph [2] and verified that the token hashing path disappears [4] > from the hot section after applying the proxmox-access-control patches. > > Functionally I verified that: > * token-based API authentication still works for valid tokens > * invalid secrets are rejected as before > * generating a new token secret via dashboard works and > authenticates correctly > > Benchmarks: > > Two different benchmarks have been run to measure caching effects > and RwLock contention: > > (1) Requests per second for PBS /status endpoint (E2E) > (2) RwLock contention for token create/delete under > heavy parallel token-authenticated readers; compared > std::sync::RwLock and parking_lot::RwLock. > > (1) benchmarked parallel token auth requests for > /status?verbose=0 on top of the datastore lookup cache series [5] > to check throughput impact. With datastores=1, repeat=5000, parallel=16 > this series gives ~179 req/s compared to ~65 req/s without it. > This is a ~2.75x improvement. > > (2) benchmarked token create/delete operations under heavy load of > token-authenticated requests on top of the datastore lookup cache [5] > series. This benchmark was done using against a 64-parallel > token-auth flood (200k requests) against > /admin/datastore/ds0001/status?verbose=0 while executing 50 token > create + 50 token delete operations. After the series I saw the > following e2e API latencies: > > parking_lot::RwLock > - create avg ~27ms (p95 ~28ms) vs ~46ms (p95 ~50ms) baseline > - delete avg ~17ms (p95 ~19ms) vs ~33ms (p95 ~35ms) baseline > > std::sync::RwLock > - create avg ~27ms (p95 ~28ms) > - create avg ~17ms (p95 ~19ms) > > It appears that the both RwLock implementations perform similarly > for this workload. The parking_lot version has been chosen for the > added fairness guarantees. > > Patch summary > > pbs-config: > > 0001 ? pbs-config: cache verified API token secrets > Adds an in-memory cache keyed by Authid that stores plain text token > secrets after a successful verification or generation and uses > openssl?s memcmp constant-time for comparison. > > 0002 ? pbs-config: invalidate token-secret cache on token.shadow > changes > Tracks token.shadow mtime and length and clears the in-memory > cache when the file changes. > > 0003 ? pbs-config: add TTL window to token-secret cache > Introduces a TTL (TOKEN_SECRET_CACHE_TTL_SECS, default 60) for metadata > checks so that fs::metadata is only called periodically. > > proxmox-access-control: > > 0004 ? access-control: cache verified API token secrets > Mirrors PBS PATCH 0001. > > 0005 ? access-control: invalidate token-secret cache on token.shadow changes > Mirrors PBS PATCH 0002. > > 0006 ? access-control: add TTL window to token-secret cache > Mirrors PBS PATCH 0003. > > proxmox-datacenter-manager: > > 0007 ? docs: document API token-cache TTL effects > Documents the effects of the TTL window on token.shadow edits > > Changes since v1 > > - (refactor) Switched cache initialization to LazyLock > - (perf) Use parking_lot::RwLock and best-effort cache access on the > read/refresh path (try_read/try_write) to avoid lock contention > - (doc) Document TTL-delayed effect of manual token.shadow edits > - (fix) Add generation guards (API_MUTATION_GENERATION + > FILE_GENERATION) to prevent caching across concurrent set/delete and > external edits > > Please see the patch specific changelogs for more details. > > Thanks for considering this patch series, I look forward to your > feedback. > > Best, > Samuel Rufinatscha > > [1] https://bugzilla.proxmox.com/show_bug.cgi?id=7017 > [2] attachment 1767 [1]: Flamegraph showing the proxmox_sys::crypt::verify_crypt_pw stack > [3] attachment 1794 [1]: Flamegraph PDM baseline > [4] attachment 1795 [1]: Flamegraph PDM patched > [5] https://bugzilla.proxmox.com/show_bug.cgi?id=6049 > > proxmox-backup: > > Samuel Rufinatscha (3): > pbs-config: cache verified API token secrets > pbs-config: invalidate token-secret cache on token.shadow changes > pbs-config: add TTL window to token secret cache > > Cargo.toml | 1 + > docs/user-management.rst | 4 + > pbs-config/Cargo.toml | 1 + > pbs-config/src/token_shadow.rs | 238 ++++++++++++++++++++++++++++++++- > 4 files changed, 243 insertions(+), 1 deletion(-) > > > proxmox: > > Samuel Rufinatscha (3): > proxmox-access-control: cache verified API token secrets > proxmox-access-control: invalidate token-secret cache on token.shadow > changes > proxmox-access-control: add TTL window to token secret cache > > Cargo.toml | 1 + > proxmox-access-control/Cargo.toml | 1 + > proxmox-access-control/src/token_shadow.rs | 238 ++++++++++++++++++++- > 3 files changed, 239 insertions(+), 1 deletion(-) > > > proxmox-datacenter-manager: > > Samuel Rufinatscha (1): > docs: document API token-cache TTL effects > > docs/access-control.rst | 3 +++ > 1 file changed, 3 insertions(+) > > > Summary over all repositories: > 8 files changed, 485 insertions(+), 2 deletions(-) > From n.frey at proxmox.com Mon Jan 5 11:34:07 2026 From: n.frey at proxmox.com (Nicolas Frey) Date: Mon, 5 Jan 2026 11:34:07 +0100 Subject: [pbs-devel] [PATCH proxmox-backup 1/1] fix: s3: make s3_refresh apihandler sync Message-ID: <20260105103407.63587-1-n.frey@proxmox.com> fixes regression from 524cf1e that made `datastore::s3_refresh` sync but did not change the ApiHandler matching part here This would result in a panic every time an s3-refresh was initiated Fixes: https://forum.proxmox.com/threads/178655 Signed-off-by: Nicolas Frey --- src/bin/proxmox_backup_manager/datastore.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/proxmox_backup_manager/datastore.rs b/src/bin/proxmox_backup_manager/datastore.rs index 57b4ca29..5c65c5ec 100644 --- a/src/bin/proxmox_backup_manager/datastore.rs +++ b/src/bin/proxmox_backup_manager/datastore.rs @@ -339,7 +339,7 @@ async fn s3_refresh(mut param: Value, rpcenv: &mut dyn RpcEnvironment) -> Result let info = &api2::admin::datastore::API_METHOD_S3_REFRESH; let result = match info.handler { - ApiHandler::Async(handler) => (handler)(param, info, rpcenv).await?, + ApiHandler::Sync(handler) => (handler)(param, info, rpcenv)?, _ => unreachable!(), }; -- 2.47.3 From s.rufinatscha at proxmox.com Mon Jan 5 15:16:13 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Mon, 5 Jan 2026 15:16:13 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v6 3/4] partial fix #6049: datastore: use config fast-path in Drop In-Reply-To: <20260105141615.242463-1-s.rufinatscha@proxmox.com> References: <20260105141615.242463-1-s.rufinatscha@proxmox.com> Message-ID: <20260105141615.242463-4-s.rufinatscha@proxmox.com> The Drop impl of DataStore re-read datastore.cfg to decide whether the entry should be evicted from the in-process cache (based on maintenance mode?s clear_from_cache). During the investigation of issue #6049 [1], a flamegraph [2] showed that the config reload in Drop accounted for a measurable share of CPU time under load. This patch wires the datastore config fast path to the Drop impl to eventually avoid an expensive config reload from disk to capture the maintenance mandate. Behavioral notes - Drop no longer silently ignores config/lookup failures: failures to load/parse datastore.cfg are logged at WARN level - If the datastore no longer exists in datastore.cfg when the last handle is dropped, the cached instance is evicted from DATASTORE_MAP if available (without checking maintenance mode). Links [1] Bugzilla: https://bugzilla.proxmox.com/show_bug.cgi?id=6049 [2] cargo-flamegraph: https://github.com/flamegraph-rs/flamegraph Fixes: #6049 Signed-off-by: Samuel Rufinatscha --- Changes: >From v1 ? v2 - Replace caching logic with the datastore_section_config_cached() helper. >From v2 ? v3 No changes >From v3 ? v4, thanks @Fabian - Pass datastore_section_config_cached(false) in Drop to avoid concurrent cache updates. >From v4 ? v5 - Rebased only, no changes >From v5 ? v6 - Rebased - Styling: restructured cache eviction condition - Drop impl: log cache-related failures to load/parse datastore.cfg at WARN level instead of ERROR - Note logging change in the patch message, thanks @Fabian - Remove cached entry from DATASTORE_MAP (if available) if datastore no longer exists in datastore.cfg when the last handle is dropped, thanks @Fabian - Removed slow-path generation bumping in datastore_section_config_cached, since API changes already bump the generation on config save. Moved to subsequent patch, relevant for TTL-based mechanism to bump on non-API edits, thanks @Fabian pbs-datastore/src/datastore.rs | 35 ++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index aa366826..8adb0e3b 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -224,14 +224,33 @@ impl Drop for DataStore { // remove datastore from cache iff // - last task finished, and - // - datastore is in a maintenance mode that mandates it - let remove_from_cache = last_task - && pbs_config::datastore::config() - .and_then(|(s, _)| s.lookup::("datastore", self.name())) - .is_ok_and(|c| { - c.get_maintenance_mode() - .is_some_and(|m| m.clear_from_cache()) - }); + // - datastore is in a maintenance mode that mandates it, or the datastore was removed from datastore.cfg + + // first check: check if last task finished + if !last_task { + return; + } + + // determine whether we should evict from DATASTORE_MAP. + let remove_from_cache = match datastore_section_config_cached(false) { + Ok((section_config, _gen)) => { + match section_config.lookup::("datastore", self.name()) { + // second check: check if maintenance mode requires closing FDs + Ok(config) => config + .get_maintenance_mode() + .is_some_and(|m| m.clear_from_cache()), + Err(err) => { + // datastore removed from config; evict cached entry if available (without checking maintenance mode) + log::warn!("DataStore::drop: datastore '{}' missing from datastore.cfg; evicting cached instance: {err}", self.name()); + true + } + } + } + Err(err) => { + log::warn!("DataStore::drop: failed to load datastore.cfg for '{}'; skipping cache-eviction: {err}", self.name()); + false + } + }; if remove_from_cache { DATASTORE_MAP.lock().unwrap().remove(self.name()); -- 2.47.3 From s.rufinatscha at proxmox.com Mon Jan 5 15:16:12 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Mon, 5 Jan 2026 15:16:12 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v6 2/4] partial fix #6049: datastore: impl ConfigVersionCache fast path for lookups In-Reply-To: <20260105141615.242463-1-s.rufinatscha@proxmox.com> References: <20260105141615.242463-1-s.rufinatscha@proxmox.com> Message-ID: <20260105141615.242463-3-s.rufinatscha@proxmox.com> Repeated /status requests caused lookup_datastore() to re-read and parse datastore.cfg on every call. The issue was mentioned in report #6049 [1]. cargo-flamegraph [2] confirmed that the hot path is dominated by pbs_config::datastore::config() (config parsing). This patch implements caching of the global datastore.cfg using the generation numbers from the shared config version cache. It caches the datastore.cfg along with the generation number and, when a subsequent lookup sees the same generation, it reuses the cached config without re-reading it from disk. If the generation differs (or the cache is unavailable), the config is re-read from disk. If `update_cache = true`, the new config and current generation are persisted in the cache. In this case, callers must hold the datastore config lock to avoid racing with concurrent config changes. If `update_cache` is `false` and generation did not match, the freshly read config is returned but the cache is left unchanged. If `ConfigVersionCache` is not available, the config is always read from disk and `None` is returned as generation. Behavioral notes - The generation is bumped via the existing save_config() path, so API-driven config changes are detected immediately. - Manual edits to datastore.cfg are not detected; this is covered in a dedicated patch in this series. - DataStore::drop still performs a config read on the common path; also covered in a dedicated patch in this series. Links [1] Bugzilla: https://bugzilla.proxmox.com/show_bug.cgi?id=6049 [2] cargo-flamegraph: https://github.com/flamegraph-rs/flamegraph Fixes: #6049 Signed-off-by: Samuel Rufinatscha --- Changes: >From v1 ? v2 - Moved the ConfigVersionCache changes into its own patch, thanks @Fabian - Introduced the global static DATASTORE_CONFIG_CACHE to store the fully parsed datastore.cfg instead, along with its generation number. thanks @Fabian - Introduced DatastoreConfigCache struct to hold cache values - Removed and replaced the CachedDatastoreConfigTag field of DataStoreImpl with a generation number field only (Option) to validate DataStoreImpl reuse. - Added DataStore::datastore_section_config_cached() helper function to encapsulate the caching logic and simplify reuse. - Modified DataStore::lookup_datastore() to use the new helper. >From v2 ? v3 No changes >From v3 ? v4, thanks @Fabian - Restructured the version cache checks in datastore_section_config_cached(), to simplify the logic. - Added update_cache parameter to datastore_section_config_cached() to control cache updates. >From v4 ? v5 - Rebased only, no changes >From v5 ? v6 - Rebased - Styling: minimize/avoid diff noise, thanks @Fabian pbs-datastore/Cargo.toml | 1 + pbs-datastore/src/datastore.rs | 90 ++++++++++++++++++++++++++++------ 2 files changed, 77 insertions(+), 14 deletions(-) diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml index 8ce930a9..42f49a7b 100644 --- a/pbs-datastore/Cargo.toml +++ b/pbs-datastore/Cargo.toml @@ -40,6 +40,7 @@ proxmox-io.workspace = true proxmox-lang.workspace=true proxmox-s3-client = { workspace = true, features = [ "impl" ] } proxmox-schema = { workspace = true, features = [ "api-macro" ] } +proxmox-section-config.workspace = true proxmox-serde = { workspace = true, features = [ "serde_json" ] } proxmox-sys.workspace = true proxmox-systemd.workspace = true diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 9c57aaac..aa366826 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -34,7 +34,8 @@ use pbs_api_types::{ MaintenanceType, Operation, UPID, }; use pbs_config::s3::S3_CFG_TYPE_ID; -use pbs_config::BackupLockGuard; +use pbs_config::{BackupLockGuard, ConfigVersionCache}; +use proxmox_section_config::SectionConfigData; use crate::backup_info::{ BackupDir, BackupGroup, BackupInfo, OLD_LOCKING, PROTECTED_MARKER_FILENAME, @@ -48,6 +49,17 @@ use crate::s3::S3_CONTENT_PREFIX; use crate::task_tracking::{self, update_active_operations}; use crate::{DataBlob, LocalDatastoreLruCache}; +// Cache for fully parsed datastore.cfg +struct DatastoreConfigCache { + // Parsed datastore.cfg file + config: Arc, + // Generation number from ConfigVersionCache + last_generation: usize, +} + +static DATASTORE_CONFIG_CACHE: LazyLock>> = + LazyLock::new(|| Mutex::new(None)); + static DATASTORE_MAP: LazyLock>>> = LazyLock::new(|| Mutex::new(HashMap::new())); @@ -149,11 +161,13 @@ pub struct DataStoreImpl { last_gc_status: Mutex, verify_new: bool, chunk_order: ChunkOrder, - last_digest: Option<[u8; 32]>, sync_level: DatastoreFSyncLevel, backend_config: DatastoreBackendConfig, lru_store_caching: Option, thread_settings: DatastoreThreadSettings, + /// datastore.cfg cache generation number at lookup time, used to + /// invalidate this cached `DataStoreImpl` + config_generation: Option, } impl DataStoreImpl { @@ -166,11 +180,11 @@ impl DataStoreImpl { last_gc_status: Mutex::new(GarbageCollectionStatus::default()), verify_new: false, chunk_order: Default::default(), - last_digest: None, sync_level: Default::default(), backend_config: Default::default(), lru_store_caching: None, thread_settings: Default::default(), + config_generation: None, }) } } @@ -286,6 +300,55 @@ impl DatastoreThreadSettings { } } +/// Returns the parsed datastore config (`datastore.cfg`) and its +/// generation. +/// +/// Uses `ConfigVersionCache` to detect stale entries: +/// - If the cached generation matches the current generation, the +/// cached config is returned. +/// - Otherwise the config is re-read from disk. If `update_cache` is +/// `true`, the new config and current generation are stored in the +/// cache. Callers that set `update_cache = true` must hold the +/// datastore config lock to avoid racing with concurrent config +/// changes. +/// - If `update_cache` is `false`, the freshly read config is returned +/// but the cache is left unchanged. +/// +/// If `ConfigVersionCache` is not available, the config is always read +/// from disk and `None` is returned as the generation. +fn datastore_section_config_cached( + update_cache: bool, +) -> Result<(Arc, Option), Error> { + let mut config_cache = DATASTORE_CONFIG_CACHE.lock().unwrap(); + + if let Ok(version_cache) = ConfigVersionCache::new() { + let current_gen = version_cache.datastore_generation(); + if let Some(cached) = config_cache.as_ref() { + // Fast path: re-use cached datastore.cfg + if cached.last_generation == current_gen { + return Ok((cached.config.clone(), Some(cached.last_generation))); + } + } + // Slow path: re-read datastore.cfg + let (config_raw, _digest) = pbs_config::datastore::config()?; + let config = Arc::new(config_raw); + + if update_cache { + *config_cache = Some(DatastoreConfigCache { + config: config.clone(), + last_generation: current_gen, + }); + } + + Ok((config, Some(current_gen))) + } else { + // Fallback path, no config version cache: read datastore.cfg and return None as generation + *config_cache = None; + let (config_raw, _digest) = pbs_config::datastore::config()?; + Ok((Arc::new(config_raw), None)) + } +} + impl DataStore { // This one just panics on everything #[doc(hidden)] @@ -367,10 +430,9 @@ impl DataStore { // we use it to decide whether it is okay to delete the datastore. let _config_lock = pbs_config::datastore::lock_config()?; - // we could use the ConfigVersionCache's generation for staleness detection, but we load - // the config anyway -> just use digest, additional benefit: manual changes get detected - let (config, digest) = pbs_config::datastore::config()?; - let config: DataStoreConfig = config.lookup("datastore", name)?; + // Get the current datastore.cfg generation number and cached config + let (section_config, gen_num) = datastore_section_config_cached(true)?; + let config: DataStoreConfig = section_config.lookup("datastore", name)?; if let Some(maintenance_mode) = config.get_maintenance_mode() { if let Err(error) = maintenance_mode.check(operation) { @@ -378,19 +440,19 @@ impl DataStore { } } + let mut datastore_cache = DATASTORE_MAP.lock().unwrap(); + if get_datastore_mount_status(&config) == Some(false) { - let mut datastore_cache = DATASTORE_MAP.lock().unwrap(); datastore_cache.remove(&config.name); bail!("datastore '{}' is not mounted", config.name); } - let mut datastore_cache = DATASTORE_MAP.lock().unwrap(); let entry = datastore_cache.get(name); // reuse chunk store so that we keep using the same process locker instance! let chunk_store = if let Some(datastore) = &entry { - let last_digest = datastore.last_digest.as_ref(); - if let Some(true) = last_digest.map(|last_digest| last_digest == &digest) { + // Re-use DataStoreImpl + if datastore.config_generation == gen_num && gen_num.is_some() { if let Some(operation) = operation { update_active_operations(name, operation, 1)?; } @@ -412,7 +474,7 @@ impl DataStore { )?) }; - let datastore = DataStore::with_store_and_config(chunk_store, config, Some(digest))?; + let datastore = DataStore::with_store_and_config(chunk_store, config, gen_num)?; let datastore = Arc::new(datastore); datastore_cache.insert(name.to_string(), datastore.clone()); @@ -514,7 +576,7 @@ impl DataStore { fn with_store_and_config( chunk_store: Arc, config: DataStoreConfig, - last_digest: Option<[u8; 32]>, + generation: Option, ) -> Result { let mut gc_status_path = chunk_store.base_path(); gc_status_path.push(".gc-status"); @@ -579,11 +641,11 @@ impl DataStore { last_gc_status: Mutex::new(gc_status), verify_new: config.verify_new.unwrap_or(false), chunk_order: tuning.chunk_order.unwrap_or_default(), - last_digest, sync_level: tuning.sync_level.unwrap_or_default(), backend_config, lru_store_caching, thread_settings, + config_generation: generation, }) } -- 2.47.3 From s.rufinatscha at proxmox.com Mon Jan 5 15:16:11 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Mon, 5 Jan 2026 15:16:11 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v6 1/4] config: enable config version cache for datastore In-Reply-To: <20260105141615.242463-1-s.rufinatscha@proxmox.com> References: <20260105141615.242463-1-s.rufinatscha@proxmox.com> Message-ID: <20260105141615.242463-2-s.rufinatscha@proxmox.com> Repeated /status requests caused lookup_datastore() to re-read and parse datastore.cfg on every call. The issue was mentioned in report #6049 [1]. cargo-flamegraph [2] confirmed that the hot path is dominated by pbs_config::datastore::config() (config parsing). To solve the issue, this patch prepares the config version cache, so that datastore config caching can be built on top of it. This patch specifically: (1) implements increment function in order to invalidate generations (2) removes obsolete comments Links [1] Bugzilla: https://bugzilla.proxmox.com/show_bug.cgi?id=6049 [2] cargo-flamegraph: https://github.com/flamegraph-rs/flamegraph Signed-off-by: Samuel Rufinatscha --- Changes: >From v1 ? v2 (original introduction), thanks @Fabian - Split the ConfigVersionCache changes out of the large datastore patch into their own config-only patch >From v2 ? v3 No changes >From v3 ? v4 No changes >From v4 ? v5 - Rebased only, no changes >From v5 ? v6 - Rebased - Removed "partial-fix" prefix from subject, thanks @Fabian pbs-config/src/config_version_cache.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pbs-config/src/config_version_cache.rs b/pbs-config/src/config_version_cache.rs index e8fb994f..b875f7e0 100644 --- a/pbs-config/src/config_version_cache.rs +++ b/pbs-config/src/config_version_cache.rs @@ -26,7 +26,6 @@ struct ConfigVersionCacheDataInner { // Traffic control (traffic-control.cfg) generation/version. traffic_control_generation: AtomicUsize, // datastore (datastore.cfg) generation/version - // FIXME: remove with PBS 3.0 datastore_generation: AtomicUsize, // Add further atomics here } @@ -145,8 +144,15 @@ impl ConfigVersionCache { .fetch_add(1, Ordering::AcqRel); } + /// Returns the datastore generation number. + pub fn datastore_generation(&self) -> usize { + self.shmem + .data() + .datastore_generation + .load(Ordering::Acquire) + } + /// Increase the datastore generation number. - // FIXME: remove with PBS 3.0 or make actually useful again in datastore lookup pub fn increase_datastore_generation(&self) -> usize { self.shmem .data() -- 2.47.3 From s.rufinatscha at proxmox.com Mon Jan 5 15:16:10 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Mon, 5 Jan 2026 15:16:10 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v6 0/4] datastore: remove config reload on hot path Message-ID: <20260105141615.242463-1-s.rufinatscha@proxmox.com> Hi, this series reduces CPU time in datastore lookups by avoiding repeated datastore.cfg reads/parses in both `lookup_datastore()` and `DataStore::Drop`. It also adds a TTL so manual config edits are noticed without reintroducing hashing on every request. While investigating #6049 [1], cargo-flamegraph [2] showed hotspots during repeated `/status` calls in `lookup_datastore()` and in `Drop`, dominated by `pbs_config::datastore::config()` (config parse). The parsing cost itself should eventually be investigated in a future effort. Furthermore, cargo-flamegraph showed that when using a token-based auth method to access the API, a significant amount of time is spent in validation on every request request [3]. ## Approach [PATCH 1/4] Support datastore generation in ConfigVersionCache [PATCH 2/4] Fast path for datastore lookups Cache the parsed datastore.cfg keyed by the shared datastore generation. lookup_datastore() reuses both the cached config and an existing DataStoreImpl when the generation matches, and falls back to the old slow path otherwise. The caching logic is implemented using the datastore_section_config_cached(update_cache: bool) helper. [PATCH 3/4] Fast path for Drop Make DataStore::Drop use the datastore_section_config_cached() helper to avoid re-reading/parsing datastore.cfg on every Drop. [PATCH 4/4] TTL to catch manual edits Add a TTL to the cached config and bump the datastore generation iff the digest changed but generation stays the same. This catches manual edits to datastore.cfg without reintroducing hashing or config parsing on every request. ## Benchmark results ### End-to-end Testing `/status?verbose=0` end-to-end with 1000 stores, 5 req/store and parallel=16 before/after the series: Metric Before After ---------------------------------------- Total time 12s 9s Throughput (all) 416.67 555.56 Cold RPS (round #1) 83.33 111.11 Warm RPS (#2..N) 333.33 444.44 Running under flamegraph [2], TLS appears to consume a significant amount of CPU time and blur the results. Still, a ~33% higher overall throughput and ~25% less end-to-end time for this workload. ### Isolated benchmarks (hyperfine) In addition to the end-to-end tests, I measured two standalone benchmarks with hyperfine, each using a config with 1000 datastores. `M` is the number of distinct datastores looked up and `N` is the number of lookups per datastore. Drop-direct variant: Drops the `DataStore` after every lookup, so the `Drop` path runs on every iteration: use anyhow::Error; use pbs_api_types::Operation; use pbs_datastore::DataStore; fn main() -> Result<(), Error> { let mut args = std::env::args(); args.next(); let datastores = if let Some(n) = args.next() { n.parse::()? } else { 1000 }; let iterations = if let Some(n) = args.next() { n.parse::()? } else { 1000 }; for d in 1..=datastores { let name = format!("ds{:04}", d); for i in 1..=iterations { DataStore::lookup_datastore(&name, Some(Operation::Write))?; } } Ok(()) } +----+------+-----------+-----------+---------+ | M | N | Baseline | Patched | Speedup | +----+------+-----------+-----------+---------+ | 1 | 1000 | 1.684 s | 35.3 ms | 47.7x | | 10 | 100 | 1.689 s | 35.0 ms | 48.3x | | 100| 10 | 1.709 s | 35.8 ms | 47.7x | |1000| 1 | 1.809 s | 39.0 ms | 46.4x | +----+------+-----------+-----------+---------+ Bulk-drop variant: Keeps the `DataStore` instances alive for all `N` lookups of a given datastore and then drops them in bulk, mimicking a task that performs many lookups while it is running and only triggers the expensive `Drop` logic when the last user exits. use anyhow::Error; use pbs_api_types::Operation; use pbs_datastore::DataStore; fn main() -> Result<(), Error> { let mut args = std::env::args(); args.next(); let datastores = if let Some(n) = args.next() { n.parse::()? } else { 1000 }; let iterations = if let Some(n) = args.next() { n.parse::()? } else { 1000 }; for d in 1..=datastores { let name = format!("ds{:04}", d); let mut stores = Vec::with_capacity(iterations); for i in 1..=iterations { stores.push(DataStore::lookup_datastore(&name, Some(Operation::Write))?); } } Ok(()) } +------+------+---------------+--------------+---------+ | M | N | Baseline mean | Patched mean | Speedup | +------+------+---------------+--------------+---------+ | 1 | 1000 | 890.6 ms | 35.5 ms | 25.1x | | 10 | 100 | 891.3 ms | 35.1 ms | 25.4x | | 100 | 10 | 983.9 ms | 35.6 ms | 27.6x | | 1000 | 1 | 1829.0 ms | 45.2 ms | 40.5x | +------+------+---------------+--------------+---------+ Both variants show that the combination of the cached config lookups and the cheaper `Drop` handling reduces the hot-path cost from ~1.8 s per run to a few tens of milliseconds in these benchmarks. ## Reproduction steps VM: 4 vCPU, ~8 GiB RAM, VirtIO-SCSI; disks: - scsi0 32G (OS) - scsi1 1000G (datastores) Install PBS from ISO on the VM. Set up ZFS on /dev/sdb (adjust if different): zpool create -f -o ashift=12 pbsbench /dev/sdb zfs set mountpoint=/pbsbench pbsbench zfs create pbsbench/pbs-bench Raise file-descriptor limit: sudo systemctl edit proxmox-backup-proxy.service Add the following lines: [Service] LimitNOFILE=1048576 Reload systemd and restart the proxy: sudo systemctl daemon-reload sudo systemctl restart proxmox-backup-proxy.service Verify the limit: systemctl show proxmox-backup-proxy.service | grep LimitNOFILE Create 1000 ZFS-backed datastores (as used in #6049 [1]): seq -w 001 1000 | xargs -n1 -P1 bash -c ' id=$0 name="ds${id}" dataset="pbsbench/pbs-bench/${name}" path="/pbsbench/pbs-bench/${name}" zfs create -o mountpoint="$path" "$dataset" proxmox-backup-manager datastore create "$name" "$path" \ --comment "ZFS dataset-based datastore" ' Build PBS from this series, then run the server under manually under flamegraph: systemctl stop proxmox-backup-proxy cargo flamegraph --release --bin proxmox-backup-proxy ## Patch summary [PATCH 1/4] config: enable config version cache for datastore [PATCH 2/4] partial fix #6049: datastore: impl ConfigVersionCache fast path for lookups [PATCH 3/4] partial fix #6049: datastore: use config fast-path in Drop [PATCH 4/4] partial fix #6049: datastore: add TTL fallback to catch manual config edits ## Changes Please refer to the per-patch changelogs. ## Maintainer notes No dependency bumps, no API changes and no breaking changes. Kind regards, Samuel Links [1] Bugzilla #6049: https://bugzilla.proxmox.com/show_bug.cgi?id=6049 [2] cargo-flamegraph: https://github.com/flamegraph-rs/flamegraph [3] Bugzilla #7017: https://bugzilla.proxmox.com/show_bug.cgi?id=7017 Samuel Rufinatscha (4): config: enable config version cache for datastore partial fix #6049: datastore: impl ConfigVersionCache fast path for lookups partial fix #6049: datastore: use config fast-path in Drop partial fix #6049: datastore: add TTL fallback to catch manual config edits pbs-config/src/config_version_cache.rs | 10 +- pbs-datastore/Cargo.toml | 1 + pbs-datastore/src/datastore.rs | 148 +++++++++++++++++++++---- 3 files changed, 135 insertions(+), 24 deletions(-) -- 2.47.3 From s.rufinatscha at proxmox.com Mon Jan 5 15:16:14 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Mon, 5 Jan 2026 15:16:14 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v6 4/4] partial fix #6049: datastore: add TTL fallback to catch manual config edits In-Reply-To: <20260105141615.242463-1-s.rufinatscha@proxmox.com> References: <20260105141615.242463-1-s.rufinatscha@proxmox.com> Message-ID: <20260105141615.242463-5-s.rufinatscha@proxmox.com> The lookup fast path reacts to API-driven config changes because save_config() bumps the generation. Manual edits of datastore.cfg do not bump the counter. To keep the system robust against such edits without reintroducing config reading and hashing on the hot path, this patch adds a TTL to the cache entry. If the cached config is older than DATASTORE_CONFIG_CACHE_TTL_SECS (set to 60s), the next lookup takes the slow path and refreshes the entry. As an optimization, a check to catch manual edits was added (if the digest changed but generation stayed the same). If a manual edit was detected, the generation will be bumped. Links [1] cargo-flamegraph: https://github.com/flamegraph-rs/flamegraph Fixes: #6049 Signed-off-by: Samuel Rufinatscha --- Changes: >From v1 ? v2 - Store last_update timestamp in DatastoreConfigCache type. >From v2 ? v3 No changes >From v3 ? v4 - Fix digest generation bump logic in update_cache, thanks @Fabian. >From v4 ? v5 - Rebased only, no changes >From v5 ? v6 - Rebased - Styling: simplified digest-matching, thanks @Fabian pbs-datastore/src/datastore.rs | 47 +++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 8adb0e3b..c4be55ad 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -53,8 +53,12 @@ use crate::{DataBlob, LocalDatastoreLruCache}; struct DatastoreConfigCache { // Parsed datastore.cfg file config: Arc, + // Digest of the datastore.cfg file + digest: [u8; 32], // Generation number from ConfigVersionCache last_generation: usize, + // Last update time (epoch seconds) + last_update: i64, } static DATASTORE_CONFIG_CACHE: LazyLock>> = @@ -63,6 +67,8 @@ static DATASTORE_CONFIG_CACHE: LazyLock>> = static DATASTORE_MAP: LazyLock>>> = LazyLock::new(|| Mutex::new(HashMap::new())); +/// Max age in seconds to reuse the cached datastore config. +const DATASTORE_CONFIG_CACHE_TTL_SECS: i64 = 60; /// Filename to store backup group notes pub const GROUP_NOTES_FILE_NAME: &str = "notes"; /// Filename to store backup group owner @@ -323,15 +329,16 @@ impl DatastoreThreadSettings { /// generation. /// /// Uses `ConfigVersionCache` to detect stale entries: -/// - If the cached generation matches the current generation, the -/// cached config is returned. +/// - If the cached generation matches the current generation and TTL is +/// OK, the cached config is returned. /// - Otherwise the config is re-read from disk. If `update_cache` is -/// `true`, the new config and current generation are stored in the -/// cache. Callers that set `update_cache = true` must hold the -/// datastore config lock to avoid racing with concurrent config -/// changes. +/// `true` and a previous cached entry exists with the same generation +/// but a different digest, this indicates the config has changed +/// (e.g. manual edit) and the generation must be bumped. Callers +/// that set `update_cache = true` must hold the datastore config lock +/// to avoid racing with concurrent config changes. /// - If `update_cache` is `false`, the freshly read config is returned -/// but the cache is left unchanged. +/// but the cache and generation are left unchanged. /// /// If `ConfigVersionCache` is not available, the config is always read /// from disk and `None` is returned as the generation. @@ -341,25 +348,41 @@ fn datastore_section_config_cached( let mut config_cache = DATASTORE_CONFIG_CACHE.lock().unwrap(); if let Ok(version_cache) = ConfigVersionCache::new() { + let now = epoch_i64(); let current_gen = version_cache.datastore_generation(); if let Some(cached) = config_cache.as_ref() { - // Fast path: re-use cached datastore.cfg - if cached.last_generation == current_gen { + // Fast path: re-use cached datastore.cfg if generation matches and TTL not expired + if cached.last_generation == current_gen + && now - cached.last_update < DATASTORE_CONFIG_CACHE_TTL_SECS + { return Ok((cached.config.clone(), Some(cached.last_generation))); } } // Slow path: re-read datastore.cfg - let (config_raw, _digest) = pbs_config::datastore::config()?; + let (config_raw, digest) = pbs_config::datastore::config()?; let config = Arc::new(config_raw); + let mut effective_gen = current_gen; if update_cache { + // Bump the generation if the config has been changed manually. + // This ensures that Drop handlers will detect that a newer config exists + // and will not rely on a stale cached entry for maintenance mandate. + if let Some(cached) = config_cache.as_ref() { + if cached.last_generation == current_gen && cached.digest != digest { + effective_gen = version_cache.increase_datastore_generation() + 1; + } + } + + // Persist *config_cache = Some(DatastoreConfigCache { config: config.clone(), - last_generation: current_gen, + digest, + last_generation: effective_gen, + last_update: now, }); } - Ok((config, Some(current_gen))) + Ok((config, Some(effective_gen))) } else { // Fallback path, no config version cache: read datastore.cfg and return None as generation *config_cache = None; -- 2.47.3 From s.rufinatscha at proxmox.com Mon Jan 5 15:21:07 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Mon, 5 Jan 2026 15:21:07 +0100 Subject: [pbs-devel] superseded: [PATCH proxmox-backup v5 0/4] datastore: remove config reload on hot path In-Reply-To: <20251124170423.303300-1-s.rufinatscha@proxmox.com> References: <20251124170423.303300-1-s.rufinatscha@proxmox.com> Message-ID: https://lore.proxmox.com/pbs-devel/20260105141615.242463-1-s.rufinatscha at proxmox.com/T/#t On 11/24/25 6:03 PM, Samuel Rufinatscha wrote: > Hi, > > this series reduces CPU time in datastore lookups by avoiding repeated > datastore.cfg reads/parses in both `lookup_datastore()` and > `DataStore::Drop`. It also adds a TTL so manual config edits are > noticed without reintroducing hashing on every request. > > While investigating #6049 [1], cargo-flamegraph [2] showed hotspots > during repeated `/status` calls in `lookup_datastore()` and in `Drop`, > dominated by `pbs_config::datastore::config()` (config parse). > > The parsing cost itself should eventually be investigated in a future > effort. Furthermore, cargo-flamegraph showed that when using a > token-based auth method to access the API, a significant amount of time > is spent in validation on every request request [3]. > > ## Approach > > [PATCH 1/4] Support datastore generation in ConfigVersionCache > > [PATCH 2/4] Fast path for datastore lookups > Cache the parsed datastore.cfg keyed by the shared datastore > generation. lookup_datastore() reuses both the cached config and an > existing DataStoreImpl when the generation matches, and falls back > to the old slow path otherwise. The caching logic is implemented > using the datastore_section_config_cached(update_cache: bool) helper. > > [PATCH 3/4] Fast path for Drop > Make DataStore::Drop use the datastore_section_config_cached() > helper to avoid re-reading/parsing datastore.cfg on every Drop. > Bump generation not only on API config saves, but also on slow-path > lookups (if update_cache is true), to enable Drop handlers see > eventual newer configs. > > [PATCH 4/4] TTL to catch manual edits > Add a TTL to the cached config and bump the datastore generation iff > the digest changed but generation stays the same. This catches manual > edits to datastore.cfg without reintroducing hashing or config > parsing on every request. > > ## Benchmark results > > ### End-to-end > > Testing `/status?verbose=0` end-to-end with 1000 stores, 5 req/store > and parallel=16 before/after the series: > > Metric Before After > ---------------------------------------- > Total time 12s 9s > Throughput (all) 416.67 555.56 > Cold RPS (round #1) 83.33 111.11 > Warm RPS (#2..N) 333.33 444.44 > > Running under flamegraph [2], TLS appears to consume a significant > amount of CPU time and blur the results. Still, a ~33% higher overall > throughput and ~25% less end-to-end time for this workload. > > ### Isolated benchmarks (hyperfine) > > In addition to the end-to-end tests, I measured two standalone > benchmarks with hyperfine, each using a config with 1000 datastores. > `M` is the number of distinct datastores looked up and > `N` is the number of lookups per datastore. > > Drop-direct variant: > > Drops the `DataStore` after every lookup, so the `Drop` path runs on > every iteration: > > use anyhow::Error; > > use pbs_api_types::Operation; > use pbs_datastore::DataStore; > > fn main() -> Result<(), Error> { > let mut args = std::env::args(); > args.next(); > > let datastores = if let Some(n) = args.next() { > n.parse::()? > } else { > 1000 > }; > > let iterations = if let Some(n) = args.next() { > n.parse::()? > } else { > 1000 > }; > > for d in 1..=datastores { > let name = format!("ds{:04}", d); > > for i in 1..=iterations { > DataStore::lookup_datastore(&name, Some(Operation::Write))?; > } > } > > Ok(()) > } > > +----+------+-----------+-----------+---------+ > | M | N | Baseline | Patched | Speedup | > +----+------+-----------+-----------+---------+ > | 1 | 1000 | 1.684 s | 35.3 ms | 47.7x | > | 10 | 100 | 1.689 s | 35.0 ms | 48.3x | > | 100| 10 | 1.709 s | 35.8 ms | 47.7x | > |1000| 1 | 1.809 s | 39.0 ms | 46.4x | > +----+------+-----------+-----------+---------+ > > Bulk-drop variant: > > Keeps the `DataStore` instances alive for > all `N` lookups of a given datastore and then drops them in bulk, > mimicking a task that performs many lookups while it is running and > only triggers the expensive `Drop` logic when the last user exits. > > use anyhow::Error; > > use pbs_api_types::Operation; > use pbs_datastore::DataStore; > > fn main() -> Result<(), Error> { > let mut args = std::env::args(); > args.next(); > > let datastores = if let Some(n) = args.next() { > n.parse::()? > } else { > 1000 > }; > > let iterations = if let Some(n) = args.next() { > n.parse::()? > } else { > 1000 > }; > > for d in 1..=datastores { > let name = format!("ds{:04}", d); > > let mut stores = Vec::with_capacity(iterations); > for i in 1..=iterations { > stores.push(DataStore::lookup_datastore(&name, Some(Operation::Write))?); > } > } > > Ok(()) > } > > +------+------+---------------+--------------+---------+ > | M | N | Baseline mean | Patched mean | Speedup | > +------+------+---------------+--------------+---------+ > | 1 | 1000 | 890.6 ms | 35.5 ms | 25.1x | > | 10 | 100 | 891.3 ms | 35.1 ms | 25.4x | > | 100 | 10 | 983.9 ms | 35.6 ms | 27.6x | > | 1000 | 1 | 1829.0 ms | 45.2 ms | 40.5x | > +------+------+---------------+--------------+---------+ > > > Both variants show that the combination of the cached config lookups > and the cheaper `Drop` handling reduces the hot-path cost from ~1.8 s > per run to a few tens of milliseconds in these benchmarks. > > ## Reproduction steps > > VM: 4 vCPU, ~8 GiB RAM, VirtIO-SCSI; disks: > - scsi0 32G (OS) > - scsi1 1000G (datastores) > > Install PBS from ISO on the VM. > > Set up ZFS on /dev/sdb (adjust if different): > > zpool create -f -o ashift=12 pbsbench /dev/sdb > zfs set mountpoint=/pbsbench pbsbench > zfs create pbsbench/pbs-bench > > Raise file-descriptor limit: > > sudo systemctl edit proxmox-backup-proxy.service > > Add the following lines: > > [Service] > LimitNOFILE=1048576 > > Reload systemd and restart the proxy: > > sudo systemctl daemon-reload > sudo systemctl restart proxmox-backup-proxy.service > > Verify the limit: > > systemctl show proxmox-backup-proxy.service | grep LimitNOFILE > > Create 1000 ZFS-backed datastores (as used in #6049 [1]): > > seq -w 001 1000 | xargs -n1 -P1 bash -c ' > id=$0 > name="ds${id}" > dataset="pbsbench/pbs-bench/${name}" > path="/pbsbench/pbs-bench/${name}" > zfs create -o mountpoint="$path" "$dataset" > proxmox-backup-manager datastore create "$name" "$path" \ > --comment "ZFS dataset-based datastore" > ' > > Build PBS from this series, then run the server under manually > under flamegraph: > > systemctl stop proxmox-backup-proxy > cargo flamegraph --release --bin proxmox-backup-proxy > > ## Patch summary > > [PATCH 1/4] partial fix #6049: config: enable config version cache for datastore > [PATCH 2/4] partial fix #6049: datastore: impl ConfigVersionCache fast path for lookups > [PATCH 3/4] partial fix #6049: datastore: use config fast-path in Drop > [PATCH 4/4] partial fix #6049: datastore: add TTL fallback to catch manual config edits > > ## Maintainer notes > > No dependency bumps, no API changes and no breaking changes. > > Thanks, > Samuel > > Links > > [1] Bugzilla #6049: https://bugzilla.proxmox.com/show_bug.cgi?id=6049 > [2] cargo-flamegraph: https://github.com/flamegraph-rs/flamegraph > [3] Bugzilla #7017: https://bugzilla.proxmox.com/show_bug.cgi?id=7017 > > Samuel Rufinatscha (4): > partial fix #6049: config: enable config version cache for datastore > partial fix #6049: datastore: impl ConfigVersionCache fast path for > lookups > partial fix #6049: datastore: use config fast-path in Drop > partial fix #6049: datastore: add TTL fallback to catch manual config > edits > > pbs-config/src/config_version_cache.rs | 10 +- > pbs-datastore/Cargo.toml | 1 + > pbs-datastore/src/datastore.rs | 213 ++++++++++++++++++++----- > 3 files changed, 179 insertions(+), 45 deletions(-) > From s.rufinatscha at proxmox.com Mon Jan 5 16:22:16 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Mon, 5 Jan 2026 16:22:16 +0100 Subject: [pbs-devel] [PATCH proxmox-backup 1/1] fix: s3: make s3_refresh apihandler sync In-Reply-To: <20260105103407.63587-1-n.frey@proxmox.com> References: <20260105103407.63587-1-n.frey@proxmox.com> Message-ID: <2be9a5a5-41d6-43c4-b25c-5029de7ea456@proxmox.com> Thanks, this makes sense - the ApiHandler mismatch explains the panic. Reviewed-by: Samuel Rufinatscha On 1/5/26 11:34 AM, Nicolas Frey wrote: > fixes regression from 524cf1e that made `datastore::s3_refresh` sync > but did not change the ApiHandler matching part here > > This would result in a panic every time an s3-refresh was initiated > > Fixes: https://forum.proxmox.com/threads/178655 > Signed-off-by: Nicolas Frey > --- > src/bin/proxmox_backup_manager/datastore.rs | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/bin/proxmox_backup_manager/datastore.rs b/src/bin/proxmox_backup_manager/datastore.rs > index 57b4ca29..5c65c5ec 100644 > --- a/src/bin/proxmox_backup_manager/datastore.rs > +++ b/src/bin/proxmox_backup_manager/datastore.rs > @@ -339,7 +339,7 @@ async fn s3_refresh(mut param: Value, rpcenv: &mut dyn RpcEnvironment) -> Result > > let info = &api2::admin::datastore::API_METHOD_S3_REFRESH; > let result = match info.handler { > - ApiHandler::Async(handler) => (handler)(param, info, rpcenv).await?, > + ApiHandler::Sync(handler) => (handler)(param, info, rpcenv)?, > _ => unreachable!(), > }; > From c.ebner at proxmox.com Wed Jan 7 12:37:37 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Wed, 7 Jan 2026 12:37:37 +0100 Subject: [pbs-devel] [PATCH proxmox-backup 1/1] fix: s3: make s3_refresh apihandler sync In-Reply-To: <20260105103407.63587-1-n.frey@proxmox.com> References: <20260105103407.63587-1-n.frey@proxmox.com> Message-ID: <7d08e58c-58b7-4dec-995b-db7142e9ea7f@proxmox.com> Patch looks good to me and restores the correct behavior of the cli command. On 1/5/26 11:34 AM, Nicolas Frey wrote: > fixes regression from 524cf1e that made `datastore::s3_refresh` sync > but did not change the ApiHandler matching part here > > This would result in a panic every time an s3-refresh was initiated > > Fixes: https://forum.proxmox.com/threads/178655 Might reference the blamed commit in a fixes trailer as well for easier lookup and search: Fixes: 524cf1e7 ("api: admin: make s3 refresh handler sync") > Signed-off-by: Nicolas Frey > --- > src/bin/proxmox_backup_manager/datastore.rs | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/bin/proxmox_backup_manager/datastore.rs b/src/bin/proxmox_backup_manager/datastore.rs > index 57b4ca29..5c65c5ec 100644 > --- a/src/bin/proxmox_backup_manager/datastore.rs > +++ b/src/bin/proxmox_backup_manager/datastore.rs > @@ -339,7 +339,7 @@ async fn s3_refresh(mut param: Value, rpcenv: &mut dyn RpcEnvironment) -> Result > > let info = &api2::admin::datastore::API_METHOD_S3_REFRESH; > let result = match info.handler { > - ApiHandler::Async(handler) => (handler)(param, info, rpcenv).await?, > + ApiHandler::Sync(handler) => (handler)(param, info, rpcenv)?, > _ => unreachable!(), > }; > Reviewed-by: Christian Ebner Tested-by: Christian Ebner From c.ebner at proxmox.com Wed Jan 7 13:07:57 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Wed, 7 Jan 2026 13:07:57 +0100 Subject: [pbs-devel] [PATCH proxmox] s3-client: make truncation flag optional in list object v2 response Message-ID: <20260107120757.346517-1-c.ebner@proxmox.com> Some providers do not return the `IsTruncated` flag [0] in the response body for list object v2 API calls, signaling that there are further object keys to be returned by subsequent API calls providing the next continuation token. Since this flag is optional, allow it to be missing for XML response body parsing and default for it to be false in the client response. Other unused members for the struct used for XML parsing have already been dropped in commit eb559d87 ("fix #7008: s3-client: drop unused optional object list v2 response fields"). Fixes: https://forum.proxmox.com/threads/178707/ Signed-off-by: Christian Ebner --- proxmox-s3-client/src/response_reader.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/proxmox-s3-client/src/response_reader.rs b/proxmox-s3-client/src/response_reader.rs index 47fcd129..e03b3bb0 100644 --- a/proxmox-s3-client/src/response_reader.rs +++ b/proxmox-s3-client/src/response_reader.rs @@ -37,7 +37,7 @@ pub struct ListObjectsV2Response { /// https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html#API_ListObjectsV2_ResponseSyntax struct ListObjectsV2ResponseBody { /// Flag indication if response was truncated because of key limits. - pub is_truncated: bool, + pub is_truncated: Option, /// Token used for this request to get further keys in truncated responses. pub continuation_token: Option, /// Allows to fetch the next set of keys for truncated responses. @@ -50,7 +50,7 @@ impl ListObjectsV2ResponseBody { fn with_optional_date(self, date: Option) -> ListObjectsV2Response { ListObjectsV2Response { date, - is_truncated: self.is_truncated, + is_truncated: self.is_truncated.unwrap_or_default(), continuation_token: self.continuation_token, next_continuation_token: self.next_continuation_token, contents: self.contents.unwrap_or_default(), @@ -530,7 +530,7 @@ fn parse_list_objects_v2_response_test() { "#; let result: ListObjectsV2ResponseBody = serde_xml_rs::from_str(response_body).unwrap(); - assert!(!result.is_truncated); + assert_eq!(result.is_truncated, Some(false)); assert_eq!( result.contents.unwrap(), vec![ -- 2.47.3 From n.frey at proxmox.com Wed Jan 7 13:46:04 2026 From: n.frey at proxmox.com (Nicolas Frey) Date: Wed, 7 Jan 2026 13:46:04 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v2 1/1] fix: s3: make s3_refresh apihandler sync Message-ID: <20260107124604.159625-1-n.frey@proxmox.com> fixes regression from 524cf1e7 that made `datastore::s3_refresh` sync but did not change the ApiHandler matching part here This would result in a panic every time an s3-refresh was initiated Reviewed-by: Christian Ebner Tested-by: Christian Ebner Reviewed-by: Samuel Rufinatscha Fixes: 524cf1e7 ("api: admin: make s3 refresh handler sync") Fixes: https://forum.proxmox.com/threads/178655 Signed-off-by: Nicolas Frey --- added Fixes trailer to reference blamed commit src/bin/proxmox_backup_manager/datastore.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/proxmox_backup_manager/datastore.rs b/src/bin/proxmox_backup_manager/datastore.rs index 57b4ca29..5c65c5ec 100644 --- a/src/bin/proxmox_backup_manager/datastore.rs +++ b/src/bin/proxmox_backup_manager/datastore.rs @@ -339,7 +339,7 @@ async fn s3_refresh(mut param: Value, rpcenv: &mut dyn RpcEnvironment) -> Result let info = &api2::admin::datastore::API_METHOD_S3_REFRESH; let result = match info.handler { - ApiHandler::Async(handler) => (handler)(param, info, rpcenv).await?, + ApiHandler::Sync(handler) => (handler)(param, info, rpcenv)?, _ => unreachable!(), }; -- 2.47.3 From n.frey at proxmox.com Wed Jan 7 13:47:33 2026 From: n.frey at proxmox.com (Nicolas Frey) Date: Wed, 7 Jan 2026 13:47:33 +0100 Subject: [pbs-devel] superseded: [PATCH proxmox-backup 1/1] fix: s3: make s3_refresh apihandler sync In-Reply-To: <20260105103407.63587-1-n.frey@proxmox.com> References: <20260105103407.63587-1-n.frey@proxmox.com> Message-ID: Superseded-by: https://lore.proxmox.com/pbs-devel/20260107124604.159625-1-n.frey at proxmox.com/T/#u On 1/5/26 11:34 AM, Nicolas Frey wrote: > fixes regression from 524cf1e that made `datastore::s3_refresh` sync > but did not change the ApiHandler matching part here > > This would result in a panic every time an s3-refresh was initiated > > Fixes: https://forum.proxmox.com/threads/178655 > Signed-off-by: Nicolas Frey > --- > src/bin/proxmox_backup_manager/datastore.rs | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/bin/proxmox_backup_manager/datastore.rs b/src/bin/proxmox_backup_manager/datastore.rs > index 57b4ca29..5c65c5ec 100644 > --- a/src/bin/proxmox_backup_manager/datastore.rs > +++ b/src/bin/proxmox_backup_manager/datastore.rs > @@ -339,7 +339,7 @@ async fn s3_refresh(mut param: Value, rpcenv: &mut dyn RpcEnvironment) -> Result > > let info = &api2::admin::datastore::API_METHOD_S3_REFRESH; > let result = match info.handler { > - ApiHandler::Async(handler) => (handler)(param, info, rpcenv).await?, > + ApiHandler::Sync(handler) => (handler)(param, info, rpcenv)?, > _ => unreachable!(), > }; > From c.ebner at proxmox.com Wed Jan 7 14:10:12 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Wed, 7 Jan 2026 14:10:12 +0100 Subject: [pbs-devel] [PATCH v2 proxmox-backup 1/2] datastore: check for null pointer when allocating DynamicIndexHeader In-Reply-To: <20251230124154.115442-2-r.obkircher@proxmox.com> References: <20251230124154.115442-1-r.obkircher@proxmox.com> <20251230124154.115442-2-r.obkircher@proxmox.com> Message-ID: Please provide a short commit message on why this is done. As far as I see this now panics in handle_alloc_error() if either memory is exhausted or the layout does not fit the allocator constraints, while previously this failed on Box::from_raw() for the null pointer? So maybe better to propagate the allocation error to the call site of zeroed() in DynamicIndexWriter::create() and return it there as well? On 12/30/25 1:42 PM, Robert Obkircher wrote: > Signed-off-by: Robert Obkircher > --- > pbs-datastore/src/dynamic_index.rs | 9 ++++++++- > 1 file changed, 8 insertions(+), 1 deletion(-) > > diff --git a/pbs-datastore/src/dynamic_index.rs b/pbs-datastore/src/dynamic_index.rs > index ad49cdf3..12df78b1 100644 > --- a/pbs-datastore/src/dynamic_index.rs > +++ b/pbs-datastore/src/dynamic_index.rs > @@ -41,13 +41,20 @@ proxmox_lang::static_assert_size!(DynamicIndexHeader, 4096); > impl DynamicIndexHeader { > /// Convenience method to allocate a zero-initialized header struct. > pub fn zeroed() -> Box { > + let layout = std::alloc::Layout::new::(); > unsafe { > - Box::from_raw(std::alloc::alloc_zeroed(std::alloc::Layout::new::()) as *mut Self) > + let ptr = std::alloc::alloc_zeroed(layout) as *mut Self; > + if ptr.is_null() { > + std::alloc::handle_alloc_error(layout); > + } > + Box::from_raw(ptr) > } > } > > pub fn as_bytes(&self) -> &[u8] { > unsafe { > + // There can't be any uninitialized padding, because the fields > + // take up all of the statically asserted total size. > std::slice::from_raw_parts( > self as *const Self as *const u8, > std::mem::size_of::(), From r.obkircher at proxmox.com Wed Jan 7 15:29:03 2026 From: r.obkircher at proxmox.com (Robert Obkircher) Date: Wed, 7 Jan 2026 15:29:03 +0100 Subject: [pbs-devel] [PATCH v2 proxmox-backup 1/2] datastore: check for null pointer when allocating DynamicIndexHeader In-Reply-To: References: <20251230124154.115442-1-r.obkircher@proxmox.com> <20251230124154.115442-2-r.obkircher@proxmox.com> Message-ID: <6a6165e9-e3b9-469d-84c6-bcab95a0426a@proxmox.com> On 1/7/26 14:09, Christian Ebner wrote: > Please provide a short commit message on why this is done. Ok, I'll do that tomorrow, after I've finished the next version of "pipe to stdin". > > As far as I see this now panics in handle_alloc_error() if either > memory is exhausted or the layout does not fit the allocator > constraints, while previously this failed on Box::from_raw() for the > null pointer? No, in the previous version this was undefined behavior, which is why I assumed it should be ok to panic.?For example, constructing a box from a null can result in `Some(reference) == None` being true [1]. I've also seen the same issue in multiple other places. For example this `uninitialized` function [2] is copy-pasted twice.?That one is also super dangerous, because if you accidentally read from it the compiler will most likely delete your code [3].?Creating an uninitialized &mut slice itself currently seems to be ok with a lot of asterisks though [4]. [1] https://godbolt.org/z/Ezfqbhqz3 [2] https://godbolt.org/z/ahx6vs8qz [3] https://godbolt.org/z/4MePEKf79 [4] https://github.com/rust-lang/unsafe-code-guidelines/issues/346 > > > So maybe better to propagate the allocation error to the call site of > zeroed() in DynamicIndexWriter::create() and return it there as well? I don't think we can reasonably deal with that situation, because returning an anyhow error most likely also requires allocation. On a Linux system with overcommit enabled, allocations will probably never fail anyway, unless the size is something huge like isize::MAX, which is more likely to happen for an incorrectly sized Vec than for a Box. > > On 12/30/25 1:42 PM, Robert Obkircher wrote: >> Signed-off-by: Robert Obkircher >> --- >> ? pbs-datastore/src/dynamic_index.rs | 9 ++++++++- >> ? 1 file changed, 8 insertions(+), 1 deletion(-) >> >> diff --git a/pbs-datastore/src/dynamic_index.rs >> b/pbs-datastore/src/dynamic_index.rs >> index ad49cdf3..12df78b1 100644 >> --- a/pbs-datastore/src/dynamic_index.rs >> +++ b/pbs-datastore/src/dynamic_index.rs >> @@ -41,13 +41,20 @@ >> proxmox_lang::static_assert_size!(DynamicIndexHeader, 4096); >> ? impl DynamicIndexHeader { >> ????? /// Convenience method to allocate a zero-initialized header >> struct. >> ????? pub fn zeroed() -> Box { >> +??????? let layout = std::alloc::Layout::new::(); >> ????????? unsafe { >> - >> Box::from_raw(std::alloc::alloc_zeroed(std::alloc::Layout::new::()) >> as *mut Self) >> +??????????? let ptr = std::alloc::alloc_zeroed(layout) as *mut Self; >> +??????????? if ptr.is_null() { >> +??????????????? std::alloc::handle_alloc_error(layout); >> +??????????? } >> +??????????? Box::from_raw(ptr) >> ????????? } >> ????? } >> ? ????? pub fn as_bytes(&self) -> &[u8] { >> ????????? unsafe { >> +??????????? // There can't be any uninitialized padding, because the >> fields >> +??????????? // take up all of the statically asserted total size. >> ????????????? std::slice::from_raw_parts( >> ????????????????? self as *const Self as *const u8, >> ????????????????? std::mem::size_of::(), > From c.ebner at proxmox.com Wed Jan 7 15:57:31 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Wed, 7 Jan 2026 15:57:31 +0100 Subject: [pbs-devel] [PATCH v2 proxmox-backup 1/2] datastore: check for null pointer when allocating DynamicIndexHeader In-Reply-To: <6a6165e9-e3b9-469d-84c6-bcab95a0426a@proxmox.com> References: <20251230124154.115442-1-r.obkircher@proxmox.com> <20251230124154.115442-2-r.obkircher@proxmox.com> <6a6165e9-e3b9-469d-84c6-bcab95a0426a@proxmox.com> Message-ID: On 1/7/26 3:28 PM, Robert Obkircher wrote: > > On 1/7/26 14:09, Christian Ebner wrote: >> Please provide a short commit message on why this is done. > Ok, I'll do that tomorrow, after I've finished the next version of "pipe > to stdin". > >> >> As far as I see this now panics in handle_alloc_error() if either >> memory is exhausted or the layout does not fit the allocator >> constraints, while previously this failed on Box::from_raw() for the >> null pointer? Okay, thanks for clarification! > > No, in the previous version this was undefined behavior, which is why I > assumed it should be ok to panic.?For example, constructing a box from a > null can result in `Some(reference) == None` being true [1]. > > I've also seen the same issue in multiple other places. For example this > `uninitialized` function [2] is copy-pasted twice.?That one is also > super dangerous, because if you accidentally read from it the compiler > will most likely delete your code [3].?Creating an uninitialized &mut > slice itself currently seems to be ok with a lot of asterisks though [4]. > > [1] https://godbolt.org/z/Ezfqbhqz3 > [2] https://godbolt.org/z/ahx6vs8qz > [3] https://godbolt.org/z/4MePEKf79 > [4] https://github.com/rust-lang/unsafe-code-guidelines/issues/346 > >> >> >> So maybe better to propagate the allocation error to the call site of >> zeroed() in DynamicIndexWriter::create() and return it there as well? > I don't think we can reasonably deal with that situation, because > returning an anyhow error most likely also requires allocation. > > On a Linux system with overcommit enabled, allocations will probably > never fail anyway, unless the size is something huge like isize::MAX, > which is more likely to happen for an incorrectly sized Vec than for a Box. True, also allocation here is 4K only. > >> >> On 12/30/25 1:42 PM, Robert Obkircher wrote: >>> Signed-off-by: Robert Obkircher >>> --- >>> ? pbs-datastore/src/dynamic_index.rs | 9 ++++++++- >>> ? 1 file changed, 8 insertions(+), 1 deletion(-) >>> >>> diff --git a/pbs-datastore/src/dynamic_index.rs b/pbs-datastore/src/ >>> dynamic_index.rs >>> index ad49cdf3..12df78b1 100644 >>> --- a/pbs-datastore/src/dynamic_index.rs >>> +++ b/pbs-datastore/src/dynamic_index.rs >>> @@ -41,13 +41,20 @@ proxmox_lang::static_assert_size! >>> (DynamicIndexHeader, 4096); >>> ? impl DynamicIndexHeader { >>> ????? /// Convenience method to allocate a zero-initialized header >>> struct. >>> ????? pub fn zeroed() -> Box { >>> +??????? let layout = std::alloc::Layout::new::(); >>> ????????? unsafe { >>> - >>> Box::from_raw(std::alloc::alloc_zeroed(std::alloc::Layout::new::()) as *mut Self) >>> +??????????? let ptr = std::alloc::alloc_zeroed(layout) as *mut Self; >>> +??????????? if ptr.is_null() { >>> +??????????????? std::alloc::handle_alloc_error(layout); >>> +??????????? } >>> +??????????? Box::from_raw(ptr) >>> ????????? } >>> ????? } >>> ? ????? pub fn as_bytes(&self) -> &[u8] { >>> ????????? unsafe { >>> +??????????? // There can't be any uninitialized padding, because the >>> fields >>> +??????????? // take up all of the statically asserted total size. >>> ????????????? std::slice::from_raw_parts( >>> ????????????????? self as *const Self as *const u8, >>> ????????????????? std::mem::size_of::(), >> From c.ebner at proxmox.com Thu Jan 8 11:44:11 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Thu, 8 Jan 2026 11:44:11 +0100 Subject: [pbs-devel] [PATCH v2 proxmox-backup 3/5] fix #3847: client: support fifo pipe inputs for images In-Reply-To: <20251219161850.244154-4-r.obkircher@proxmox.com> References: <20251219161850.244154-1-r.obkircher@proxmox.com> <20251219161850.244154-4-r.obkircher@proxmox.com> Message-ID: <91b0f702-94a2-49c0-8bed-727396c78005@proxmox.com> some comments inline On 12/19/25 5:19 PM, Robert Obkircher wrote: > Accept fifo files as inputs for images and omit the size when > uploading the fixed index file. > > Signed-off-by: Robert Obkircher > --- > pbs-client/src/backup_writer.rs | 37 ++++++++++++++++++++++--------- > proxmox-backup-client/src/main.rs | 30 ++++++++++++++----------- > src/server/push.rs | 13 ++++++----- > 3 files changed, 51 insertions(+), 29 deletions(-) > > diff --git a/pbs-client/src/backup_writer.rs b/pbs-client/src/backup_writer.rs > index dbd177d8..1963b700 100644 > --- a/pbs-client/src/backup_writer.rs > +++ b/pbs-client/src/backup_writer.rs > @@ -52,7 +52,16 @@ pub struct UploadOptions { > pub previous_manifest: Option>, > pub compress: bool, > pub encrypt: bool, > - pub fixed_size: Option, > + pub chunk_size: ChunkSize, above is ill-named as this is not the chunk size, but rather the image file size. I suggest to rename this to ``` index_type: IndexType ``` or another even better fitting name and define the IndexType with tuple enum variant for the size > +} > + > +#[derive(Default, Clone)] > +pub enum ChunkSize { > + #[default] > + Dynamic, > + Fixed { > + file_size: Option, > + }, /// Index type for upload options pub enum IndexType { #[default] /// Dynamic chunking Dynamic, /// Fixed size chunking with optional image file size Fixed(Option), } > } > > struct ChunkUploadResponse { > @@ -292,11 +301,14 @@ impl BackupWriter { > options: UploadOptions, > ) -> Result { > let mut param = json!({ "archive-name": archive_name }); > - let prefix = if let Some(size) = options.fixed_size { > - param["size"] = size.into(); > - "fixed" > - } else { > - "dynamic" > + let prefix = match options.chunk_size { > + ChunkSize::Fixed { file_size } => { ... above makes this to ``` IndexType::Fixed(file_size) => { ``` > + if let Some(size) = file_size { > + param["size"] = size.into(); > + } > + "fixed" > + } > + ChunkSize::Dynamic => "dynamic", and ``` IndexType::Dynamic => "dynamic", ``` as well as for other occurences. > }; > > if options.encrypt && self.crypt_config.is_none() { > @@ -387,11 +399,14 @@ impl BackupWriter { > let known_chunks = Arc::new(Mutex::new(HashSet::new())); > > let mut param = json!({ "archive-name": archive_name }); > - let prefix = if let Some(size) = options.fixed_size { > - param["size"] = size.into(); > - "fixed" > - } else { > - "dynamic" > + let prefix = match options.chunk_size { > + ChunkSize::Fixed { file_size } => { > + if let Some(size) = file_size { > + param["size"] = size.into(); > + } > + "fixed" > + } > + ChunkSize::Dynamic => "dynamic", > }; > > if options.encrypt && self.crypt_config.is_none() { > diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/src/main.rs > index 999e5020..828643da 100644 > --- a/proxmox-backup-client/src/main.rs > +++ b/proxmox-backup-client/src/main.rs > @@ -46,7 +46,7 @@ use pbs_client::tools::{ > use pbs_client::{ > delete_ticket_info, parse_backup_specification, view_task_result, BackupDetectionMode, > BackupReader, BackupRepository, BackupSpecificationType, BackupStats, BackupWriter, > - BackupWriterOptions, ChunkStream, FixedChunkStream, HttpClient, InjectionData, > + BackupWriterOptions, ChunkSize, ChunkStream, FixedChunkStream, HttpClient, InjectionData, > PxarBackupStream, RemoteChunkReader, UploadOptions, BACKUP_SOURCE_SCHEMA, > }; > use pbs_datastore::catalog::{BackupCatalogWriter, CatalogReader, CatalogWriter}; > @@ -205,7 +205,7 @@ async fn backup_directory>( > pxar_create_options: pbs_client::pxar::PxarCreateOptions, > upload_options: UploadOptions, > ) -> Result<(BackupStats, Option), Error> { > - if upload_options.fixed_size.is_some() { > + if let ChunkSize::Fixed { .. } = upload_options.chunk_size { > bail!("cannot backup directory with fixed chunk size!"); > } > > @@ -295,7 +295,7 @@ async fn backup_image>( > > let stream = FixedChunkStream::new(stream, chunk_size.unwrap_or(4 * 1024 * 1024)); > > - if upload_options.fixed_size.is_none() { > + if let ChunkSize::Dynamic = upload_options.chunk_size { > bail!("cannot backup image with dynamic chunk size!"); > } > > @@ -859,15 +859,17 @@ async fn create_backup( > upload_list.push((BackupSpecificationType::PXAR, filename, target, "didx", 0)); > } > BackupSpecificationType::IMAGE => { > - if !(file_type.is_file() || file_type.is_block_device()) { > - bail!("got unexpected file type (expected file or block device)"); > - } > - > - let size = image_size(&PathBuf::from(&filename))?; > - > - if size == 0 { > - bail!("got zero-sized file '{}'", filename); > - } > + let size = if file_type.is_file() || file_type.is_block_device() { > + let size = image_size(&PathBuf::from(&filename))?; > + if size == 0 { > + bail!("got zero-sized file '{}'", filename); > + } > + size > + } else if file_type.is_fifo() { > + 0 > + } else { > + bail!("got unexpected file type (expected file, block device, or fifo"); > + }; > > upload_list.push(( > BackupSpecificationType::IMAGE, > @@ -1191,9 +1193,11 @@ async fn create_backup( > (BackupSpecificationType::IMAGE, false) => { > log_file("image", &filename, target.as_ref()); > > + // 0 means fifo pipe with unknown size > + let file_size = (size != 0).then_some(size); > let upload_options = UploadOptions { > previous_manifest: previous_manifest.clone(), > - fixed_size: Some(size), > + chunk_size: ChunkSize::Fixed { file_size }, > compress: true, > encrypt: crypto.mode == CryptMode::Encrypt, > }; > diff --git a/src/server/push.rs b/src/server/push.rs > index d7884fce..a1216ba9 100644 > --- a/src/server/push.rs > +++ b/src/server/push.rs > @@ -17,7 +17,8 @@ use pbs_api_types::{ > PRIV_REMOTE_DATASTORE_MODIFY, PRIV_REMOTE_DATASTORE_PRUNE, > }; > use pbs_client::{ > - BackupRepository, BackupWriter, BackupWriterOptions, HttpClient, MergedChunkInfo, UploadOptions, > + BackupRepository, BackupWriter, BackupWriterOptions, ChunkSize, HttpClient, MergedChunkInfo, > + UploadOptions, > }; > use pbs_config::CachedUserInfo; > use pbs_datastore::data_blob::ChunkInfo; > @@ -917,7 +918,7 @@ pub(crate) async fn push_snapshot( > index, > chunk_reader, > &backup_writer, > - None, > + ChunkSize::Dynamic, > known_chunks.clone(), > ) > .await?; > @@ -944,7 +945,9 @@ pub(crate) async fn push_snapshot( > index, > chunk_reader, > &backup_writer, > - Some(size), > + ChunkSize::Fixed { > + file_size: Some(size), > + }, > known_chunks.clone(), > ) > .await?; > @@ -1002,7 +1005,7 @@ async fn push_index( > index: impl IndexFile + Send + 'static, > chunk_reader: Arc, > backup_writer: &BackupWriter, > - size: Option, > + chunk_size: ChunkSize, > known_chunks: Arc>>, > ) -> Result { > let (upload_channel_tx, upload_channel_rx) = mpsc::channel(20); > @@ -1048,7 +1051,7 @@ async fn push_index( > let upload_options = UploadOptions { > compress: true, > encrypt: false, > - fixed_size: size, > + chunk_size, > ..UploadOptions::default() > }; > From c.ebner at proxmox.com Thu Jan 8 11:44:15 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Thu, 8 Jan 2026 11:44:15 +0100 Subject: [pbs-devel] [PATCH v2 proxmox-backup 0/5] fix: #3847 pipe from STDIN to proxmox-backup-client In-Reply-To: <20251219161850.244154-1-r.obkircher@proxmox.com> References: <20251219161850.244154-1-r.obkircher@proxmox.com> Message-ID: <32a89cb5-763b-4489-8f3f-69a4e138faba@proxmox.com> Patches look promising already, great work! Huge improvement since the previous version of the patches. Left some comments on individual patches, but nothing big so far. On 12/19/25 5:18 PM, Robert Obkircher wrote: > Add support for commands like: > ssh host cmd | proxmox-backup-client backup data.img:/dev/stdin > proxmox-backup-client backup a.img:<(mysqldump) b.img:<(pgdump) > > Changes since v1: > - use mremap+ftruncate instead of write_all_at > - make the size API parameter optional instead of using 0 > - use an enum to represent fixed/dynamic chunk size in UploadOptions > - alias "-" to "/dev/stdin" > - split changes into separate commits > > This does not yet need a detailed review, but let me know if anything > looks completely off. > > I'm still planning on writing some proper tests for the backend. > That may involve moving the resizing logic to a type like > proxmox_sys::mmap::Mmap, so it can be tested in isolation. > > Christian Ebner previously suggested defining a trait for the > FixedIndexWriter, with separate implementations for known and unknown > size. I'm not sure if this is still necessary, because the changes are > already much more isolated. Should I still introduce such a trait? No, the current implementation really does not call for that at all. > > > Robert Obkircher (5): > fix #3847: datastore: support writing fidx files of unknown size > fix #3847: api: backup: make fixed index file size optional > fix #3847: client: support fifo pipe inputs for images > fix #3847: client: treat minus sign as stdin > DO NOT MERGE: test script for reference > > pbs-client/src/backup_writer.rs | 37 ++++++++---- > pbs-datastore/src/datastore.rs | 2 +- > pbs-datastore/src/fixed_index.rs | 98 +++++++++++++++++++++++++++++-- > proxmox-backup-client/src/main.rs | 37 ++++++++---- > src/api2/backup/environment.rs | 8 ++- > src/api2/backup/mod.rs | 4 +- > src/server/push.rs | 13 ++-- > test-pipes.sh | 68 +++++++++++++++++++++ > 8 files changed, 227 insertions(+), 40 deletions(-) > create mode 100755 test-pipes.sh > From c.ebner at proxmox.com Thu Jan 8 11:44:08 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Thu, 8 Jan 2026 11:44:08 +0100 Subject: [pbs-devel] [PATCH v2 proxmox-backup 1/5] fix #3847: datastore: support writing fidx files of unknown size In-Reply-To: <20251219161850.244154-2-r.obkircher@proxmox.com> References: <20251219161850.244154-1-r.obkircher@proxmox.com> <20251219161850.244154-2-r.obkircher@proxmox.com> Message-ID: <311156c7-9bf8-4d2f-9707-a1e88edcaaf0@proxmox.com> some nits inline On 12/19/25 5:19 PM, Robert Obkircher wrote: > Use mremap and ftruncate to support growable FixedIndexWriters. Grow > exponentially from a small initial index size for efficiency. Truncate > excessive capacity after encountering a non-full block or on close. > > Signed-off-by: Robert Obkircher > --- > pbs-datastore/src/datastore.rs | 2 +- > pbs-datastore/src/fixed_index.rs | 98 ++++++++++++++++++++++++++++++-- > 2 files changed, 93 insertions(+), 7 deletions(-) > > diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs > index 9c57aaac..af712726 100644 > --- a/pbs-datastore/src/datastore.rs > +++ b/pbs-datastore/src/datastore.rs > @@ -591,7 +591,7 @@ impl DataStore { > pub fn create_fixed_writer>( > &self, > filename: P, > - size: usize, > + size: Option, > chunk_size: usize, > ) -> Result { > let index = FixedIndexWriter::create( > diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs > index 6c3be2d4..42b97464 100644 > --- a/pbs-datastore/src/fixed_index.rs > +++ b/pbs-datastore/src/fixed_index.rs > @@ -1,6 +1,7 @@ > use std::fs::File; > use std::io::Write; > use std::io::{Seek, SeekFrom}; > +use std::os::unix::fs::FileExt; > use std::os::unix::io::AsRawFd; > use std::path::{Path, PathBuf}; > use std::ptr::NonNull; > @@ -222,6 +223,8 @@ pub struct FixedIndexWriter { > index: *mut u8, > pub uuid: [u8; 16], > pub ctime: i64, > + growable_size: bool, > + write_size_on_close: bool, > } > > // `index` is mmap()ed which cannot be thread-local so should be sendable > @@ -237,12 +240,15 @@ impl Drop for FixedIndexWriter { > } > > impl FixedIndexWriter { > + // TODO: this is deliberately small at the moment to test resizing > + const INITIAL_CHUNKS_IF_UNKNOWN: usize = 4; nit: this is actually the initial index length, so maybe INITIAL_INDEX_LENGTH or FALLBACK_DEFAULT_INDEX_LENGTH? > + > #[allow(clippy::cast_ptr_alignment)] > // Requires obtaining a shared chunk store lock beforehand > pub fn create( > store: Arc, > path: &Path, > - size: usize, > + known_size: Option, > chunk_size: usize, > ) -> Result { > let full_path = store.relative_path(path); > @@ -264,6 +270,7 @@ impl FixedIndexWriter { > } > > let ctime = proxmox_time::epoch_i64(); > + let size = known_size.unwrap_or(0); > > let uuid = Uuid::generate(); > > @@ -280,7 +287,9 @@ impl FixedIndexWriter { > > file.write_all(&buffer)?; > > - let index_length = size.div_ceil(chunk_size); > + let index_length = known_size > + .map(|s| s.div_ceil(chunk_size)) > + .unwrap_or(Self::INITIAL_CHUNKS_IF_UNKNOWN); > let index_size = index_length * 32; > nix::unistd::ftruncate(&file, (header_size + index_size) as i64)?; > > @@ -308,11 +317,69 @@ impl FixedIndexWriter { > index: data, > ctime, > uuid: *uuid.as_bytes(), > + growable_size: known_size.is_none(), > + write_size_on_close: known_size.is_none(), > }) > } > > + fn resize_index(&mut self, new_index_length: usize) -> Result<(), Error> { > + let old_index_size = self.index_length * 32; > + > + let header_size = std::mem::size_of::(); > + let new_index_size = new_index_length * 32; > + let new_file_size = (header_size + new_index_size) as i64; > + > + let index_addr = NonNull::new(self.index as *mut std::ffi::c_void).ok_or_else(|| { > + format_err!("Can't resize FixedIndexWriter index because the mmap pointer is null.") > + })?; > + > + nix::unistd::ftruncate(&self.file, new_file_size)?; > + > + let new_index = unsafe { > + nix::sys::mman::mremap( > + index_addr, > + old_index_size, > + new_index_size, > + nix::sys::mman::MRemapFlags::MREMAP_MAYMOVE, > + None, > + ) > + }? > + .as_ptr() > + .cast::(); > + > + self.index = new_index; > + self.index_length = new_index_length; > + > + Ok(()) > + } > + nit: include a docstring for this method, although not present for the pub methods we should aim to add them. > + pub fn grow_to_size(&mut self, requested: usize) -> Result<(), Error> { > + if self.size < requested { > + if !self.growable_size { > + bail!("refusing to resize from {} to {}", self.size, requested); > + } > + let len = requested.div_ceil(self.chunk_size); > + if len * self.chunk_size != requested { > + self.growable_size = false; // ensures only the last chunk can be smaller > + self.resize_index(len)?; > + } else { question: what is the reason for the 1.5 factor, why not e.g. doubling the length? Is max virtual memory a concern? > + // grow by 1.5x > + let mut new_len = self.index_length.max(2); > + while new_len < len { > + new_len += new_len / 2; > + } > + debug_assert!(new_len * self.chunk_size >= requested); > + self.resize_index(new_len)?; > + } > + self.size = requested; > + } > + Ok(()) > + } > + nit: this now is the current index length and will change when growing, so the method name should reflect that > pub fn index_length(&self) -> usize { > - self.index_length > + let len = self.size.div_ceil(self.chunk_size); nit: I think we should avoid the possible panic here, and return an error instead. Although it is clear that this should never happen under normal operations. > + assert!((self.write_size_on_close && len <= self.index_length) || len == self.index_length); > + len > } > > fn unmap(&mut self) -> Result<(), Error> { > @@ -336,15 +403,26 @@ impl FixedIndexWriter { > bail!("cannot close already closed index file."); > } > > - let index_size = self.index_length * 32; > + let used_index_length = self.index_length(); > + let index_size = used_index_length * 32; > let data = unsafe { std::slice::from_raw_parts(self.index, index_size) }; > let index_csum = openssl::sha::sha256(data); > > self.unmap()?; > > + if used_index_length < self.index_length { > + let header_size = std::mem::size_of::(); > + nix::unistd::ftruncate(&self.file, (header_size + index_size) as i64)?; > + self.index_length = used_index_length; > + } > + > let csum_offset = std::mem::offset_of!(FixedIndexHeader, index_csum); > - self.file.seek(SeekFrom::Start(csum_offset as u64))?; > - self.file.write_all(&index_csum)?; > + self.file.write_all_at(&index_csum, csum_offset as u64)?; nit: the changes above are a bit independent and might be pulled out into their own patch > + if self.write_size_on_close { > + let size_offset = std::mem::offset_of!(FixedIndexHeader, size); > + self.file > + .write_all_at(&(self.size as u64).to_le_bytes(), size_offset as u64)?; > + } > self.file.flush()?; > > if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { > @@ -407,6 +485,14 @@ impl FixedIndexWriter { > } > > pub fn clone_data_from(&mut self, reader: &FixedIndexReader) -> Result<(), Error> { > + if self.growable_size { nit: this error might be misunderstood, as the backup is using fixed size chunking. So maybe better to reword this to e.g. "reusing the fixed index is only supported with known input size" Further, this might be > + bail!("reusing the index is only supported with a fixed size"); > + } > + > + if self.chunk_size != reader.chunk_size { > + bail!("chunk size mismatch"); > + } > + > if self.index_length != reader.index_count() { > bail!("clone_data_from failed - index sizes not equal"); > } From s.rufinatscha at proxmox.com Thu Jan 8 12:26:24 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:24 +0100 Subject: [pbs-devel] [PATCH proxmox v5 4/4] acme-api: add helper to load client for an account In-Reply-To: <20260108112629.189670-1-s.rufinatscha@proxmox.com> References: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Message-ID: <20260108112629.189670-5-s.rufinatscha@proxmox.com> The PBS ACME refactoring needs a simple way to obtain an AcmeClient for a given configured account without duplicating config wiring. This patch adds a load_client_with_account helper in proxmox-acme-api that loads the account and constructs a matching client, similarly as PBS previous own AcmeClient::load() function. Signed-off-by: Samuel Rufinatscha --- proxmox-acme-api/src/account_api_impl.rs | 5 +++++ proxmox-acme-api/src/lib.rs | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/proxmox-acme-api/src/account_api_impl.rs b/proxmox-acme-api/src/account_api_impl.rs index ef195908..ca8c8655 100644 --- a/proxmox-acme-api/src/account_api_impl.rs +++ b/proxmox-acme-api/src/account_api_impl.rs @@ -116,3 +116,8 @@ pub async fn update_account(name: &AcmeAccountName, contact: Option) -> Ok(()) } + +pub async fn load_client_with_account(account_name: &AcmeAccountName) -> Result { + let account_data = super::account_config::load_account_config(&account_name).await?; + Ok(account_data.client()) +} diff --git a/proxmox-acme-api/src/lib.rs b/proxmox-acme-api/src/lib.rs index 623e9e23..96f88ae2 100644 --- a/proxmox-acme-api/src/lib.rs +++ b/proxmox-acme-api/src/lib.rs @@ -31,7 +31,8 @@ mod plugin_config; mod account_api_impl; #[cfg(feature = "impl")] pub use account_api_impl::{ - deactivate_account, get_account, get_tos, list_accounts, register_account, update_account, + deactivate_account, get_account, get_tos, list_accounts, load_client_with_account, + register_account, update_account, }; #[cfg(feature = "impl")] -- 2.47.3 From s.rufinatscha at proxmox.com Thu Jan 8 12:26:23 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:23 +0100 Subject: [pbs-devel] [PATCH proxmox v5 3/4] fix #6939: acme: support servers returning 204 for nonce requests In-Reply-To: <20260108112629.189670-1-s.rufinatscha@proxmox.com> References: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Message-ID: <20260108112629.189670-4-s.rufinatscha@proxmox.com> Some ACME servers (notably custom or legacy implementations) respond to HEAD /newNonce with a 204 No Content instead of the RFC 8555-recommended 200 OK [1]. While this behavior is technically off-spec, it is not illegal. This issue was reported on our bug tracker [2]. The previous implementation treated any non-200 response as an error, causing account registration to fail against such servers. Relax the status-code check to accept both 200 and 204 responses (and potentially support other 2xx codes) to improve interoperability. Note: In comparison, PVE?s Perl ACME client performs a GET request [3] instead of a HEAD request and accepts any 2xx success code when retrieving the nonce [4]. This difference in behavior does not affect functionality but is worth noting for consistency across implementations. [1] https://datatracker.ietf.org/doc/html/rfc8555/#section-7.2 [2] https://bugzilla.proxmox.com/show_bug.cgi?id=6939 [3] https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l219 [4] https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l597 Fixes: #6939 Signed-off-by: Samuel Rufinatscha --- proxmox-acme/src/account.rs | 8 ++++---- proxmox-acme/src/async_client.rs | 6 +++--- proxmox-acme/src/client.rs | 2 +- proxmox-acme/src/request.rs | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/proxmox-acme/src/account.rs b/proxmox-acme/src/account.rs index ea1a3c60..84610bf3 100644 --- a/proxmox-acme/src/account.rs +++ b/proxmox-acme/src/account.rs @@ -84,7 +84,7 @@ impl Account { method: "POST", content_type: crate::request::JSON_CONTENT_TYPE, body, - expected: crate::http_status::CREATED, + expected: &[crate::http_status::CREATED], }; Ok(NewOrder::new(request)) @@ -106,7 +106,7 @@ impl Account { method: "POST", content_type: crate::request::JSON_CONTENT_TYPE, body, - expected: crate::http_status::OK, + expected: &[crate::http_status::OK], }) } @@ -131,7 +131,7 @@ impl Account { method: "POST", content_type: crate::request::JSON_CONTENT_TYPE, body, - expected: crate::http_status::OK, + expected: &[crate::http_status::OK], }) } @@ -321,7 +321,7 @@ impl AccountCreator { method: "POST", content_type: crate::request::JSON_CONTENT_TYPE, body, - expected: crate::http_status::CREATED, + expected: &[crate::http_status::CREATED], }) } diff --git a/proxmox-acme/src/async_client.rs b/proxmox-acme/src/async_client.rs index 043648bb..07da842c 100644 --- a/proxmox-acme/src/async_client.rs +++ b/proxmox-acme/src/async_client.rs @@ -420,7 +420,7 @@ impl AcmeClient { }; if parts.status.is_success() { - if status != request.expected { + if !request.expected.contains(&status) { return Err(Error::InvalidApi(format!( "ACME server responded with unexpected status code: {:?}", parts.status @@ -498,7 +498,7 @@ impl AcmeClient { method: "GET", content_type: "", body: String::new(), - expected: crate::http_status::OK, + expected: &[crate::http_status::OK], }, nonce, ) @@ -550,7 +550,7 @@ impl AcmeClient { method: "HEAD", content_type: "", body: String::new(), - expected: crate::http_status::OK, + expected: &[crate::http_status::OK, crate::http_status::NO_CONTENT], }, nonce, ) diff --git a/proxmox-acme/src/client.rs b/proxmox-acme/src/client.rs index 5c812567..af250fb8 100644 --- a/proxmox-acme/src/client.rs +++ b/proxmox-acme/src/client.rs @@ -203,7 +203,7 @@ impl Inner { let got_nonce = self.update_nonce(&mut response)?; if response.is_success() { - if response.status != request.expected { + if !request.expected.contains(&response.status) { return Err(Error::InvalidApi(format!( "API server responded with unexpected status code: {:?}", response.status diff --git a/proxmox-acme/src/request.rs b/proxmox-acme/src/request.rs index 341ce53e..d782a7de 100644 --- a/proxmox-acme/src/request.rs +++ b/proxmox-acme/src/request.rs @@ -16,8 +16,8 @@ pub(crate) struct Request { /// The body to pass along with request, or an empty string. pub(crate) body: String, - /// The expected status code a compliant ACME provider will return on success. - pub(crate) expected: u16, + /// The set of HTTP status codes that indicate a successful response from an ACME provider. + pub(crate) expected: &'static [u16], } /// Common HTTP status codes used in ACME responses. -- 2.47.3 From s.rufinatscha at proxmox.com Thu Jan 8 12:26:28 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:28 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v5 4/5] acme: change API impls to use proxmox-acme-api handlers In-Reply-To: <20260108112629.189670-1-s.rufinatscha@proxmox.com> References: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Message-ID: <20260108112629.189670-9-s.rufinatscha@proxmox.com> PBS currently uses its own ACME client and API logic, while PDM uses the factored out proxmox-acme and proxmox-acme-api crates. This duplication risks differences in behaviour and requires ACME maintenance in two places. This patch is part of a series to move PBS over to the shared ACME stack. Changes: - Replace api2/config/acme.rs API logic with proxmox-acme-api handlers. - Drop local caching and helper types that duplicate proxmox-acme-api. Signed-off-by: Samuel Rufinatscha --- src/api2/config/acme.rs | 378 ++----------------------- src/api2/types/acme.rs | 16 -- src/bin/proxmox_backup_manager/acme.rs | 6 +- src/config/acme/mod.rs | 44 +-- 4 files changed, 33 insertions(+), 411 deletions(-) diff --git a/src/api2/config/acme.rs b/src/api2/config/acme.rs index 898f06dd..3314430c 100644 --- a/src/api2/config/acme.rs +++ b/src/api2/config/acme.rs @@ -1,29 +1,18 @@ -use std::fs; -use std::ops::ControlFlow; -use std::path::Path; -use std::sync::{Arc, LazyLock, Mutex}; -use std::time::SystemTime; - -use anyhow::{bail, format_err, Error}; -use hex::FromHex; -use serde::{Deserialize, Serialize}; -use serde_json::{json, Value}; -use tracing::{info, warn}; +use anyhow::Error; +use tracing::info; use pbs_api_types::{Authid, PRIV_SYS_MODIFY}; -use proxmox_acme::async_client::AcmeClient; -use proxmox_acme::types::AccountData as AcmeAccountData; -use proxmox_acme_api::AcmeAccountName; +use proxmox_acme_api::{ + AccountEntry, AccountInfo, AcmeAccountName, AcmeChallengeSchema, ChallengeSchemaWrapper, + DeletablePluginProperty, DnsPluginCore, DnsPluginCoreUpdater, KnownAcmeDirectory, PluginConfig, + DEFAULT_ACME_DIRECTORY_ENTRY, PLUGIN_ID_SCHEMA, +}; +use proxmox_config_digest::ConfigDigest; use proxmox_rest_server::WorkerTask; use proxmox_router::{ http_bail, list_subdirs_api_method, Permission, Router, RpcEnvironment, SubdirMap, }; -use proxmox_schema::{api, param_bail}; - -use crate::api2::types::{AcmeChallengeSchema, KnownAcmeDirectory}; -use crate::config::acme::plugin::{ - self, DnsPlugin, DnsPluginCore, DnsPluginCoreUpdater, PLUGIN_ID_SCHEMA, -}; +use proxmox_schema::api; pub(crate) const ROUTER: Router = Router::new() .get(&list_subdirs_api_method!(SUBDIRS)) @@ -65,19 +54,6 @@ const PLUGIN_ITEM_ROUTER: Router = Router::new() .put(&API_METHOD_UPDATE_PLUGIN) .delete(&API_METHOD_DELETE_PLUGIN); -#[api( - properties: { - name: { type: AcmeAccountName }, - }, -)] -/// An ACME Account entry. -/// -/// Currently only contains a 'name' property. -#[derive(Serialize)] -pub struct AccountEntry { - name: AcmeAccountName, -} - #[api( access: { permission: &Permission::Privilege(&["system", "certificates"], PRIV_SYS_MODIFY, false), @@ -91,40 +67,7 @@ pub struct AccountEntry { )] /// List ACME accounts. pub fn list_accounts() -> Result, Error> { - let mut entries = Vec::new(); - crate::config::acme::foreach_acme_account(|name| { - entries.push(AccountEntry { name }); - ControlFlow::Continue(()) - })?; - Ok(entries) -} - -#[api( - properties: { - account: { type: Object, properties: {}, additional_properties: true }, - tos: { - type: String, - optional: true, - }, - }, -)] -/// ACME Account information. -/// -/// This is what we return via the API. -#[derive(Serialize)] -pub struct AccountInfo { - /// Raw account data. - account: AcmeAccountData, - - /// The ACME directory URL the account was created at. - directory: String, - - /// The account's own URL within the ACME directory. - location: String, - - /// The ToS URL, if the user agreed to one. - #[serde(skip_serializing_if = "Option::is_none")] - tos: Option, + proxmox_acme_api::list_accounts() } #[api( @@ -141,23 +84,7 @@ pub struct AccountInfo { )] /// Return existing ACME account information. pub async fn get_account(name: AcmeAccountName) -> Result { - let account_info = proxmox_acme_api::get_account(name).await?; - - Ok(AccountInfo { - location: account_info.location, - tos: account_info.tos, - directory: account_info.directory, - account: AcmeAccountData { - only_return_existing: false, // don't actually write this out in case it's set - ..account_info.account - }, - }) -} - -fn account_contact_from_string(s: &str) -> Vec { - s.split(&[' ', ';', ',', '\0'][..]) - .map(|s| format!("mailto:{s}")) - .collect() + proxmox_acme_api::get_account(name).await } #[api( @@ -222,15 +149,11 @@ fn register_account( ); } - if Path::new(&crate::config::acme::account_path(&name)).exists() { + if std::path::Path::new(&proxmox_acme_api::account_config_filename(&name)).exists() { http_bail!(BAD_REQUEST, "account {} already exists", name); } - let directory = directory.unwrap_or_else(|| { - crate::config::acme::DEFAULT_ACME_DIRECTORY_ENTRY - .url - .to_owned() - }); + let directory = directory.unwrap_or_else(|| DEFAULT_ACME_DIRECTORY_ENTRY.url.to_string()); WorkerTask::spawn( "acme-register", @@ -286,17 +209,7 @@ pub fn update_account( auth_id.to_string(), true, move |_worker| async move { - let data = match contact { - Some(data) => json!({ - "contact": account_contact_from_string(&data), - }), - None => json!({}), - }; - - proxmox_acme_api::load_client_with_account(&name) - .await? - .update_account(&data) - .await?; + proxmox_acme_api::update_account(&name, contact).await?; Ok(()) }, @@ -334,18 +247,8 @@ pub fn deactivate_account( auth_id.to_string(), true, move |_worker| async move { - match proxmox_acme_api::load_client_with_account(&name) - .await? - .update_account(&json!({"status": "deactivated"})) - .await - { - Ok(_account) => (), - Err(err) if !force => return Err(err), - Err(err) => { - warn!("error deactivating account {name}, proceeding anyway - {err}"); - } - } - crate::config::acme::mark_account_deactivated(&name)?; + proxmox_acme_api::deactivate_account(&name, force).await?; + Ok(()) }, ) @@ -372,15 +275,7 @@ pub fn deactivate_account( )] /// Get the Terms of Service URL for an ACME directory. async fn get_tos(directory: Option) -> Result, Error> { - let directory = directory.unwrap_or_else(|| { - crate::config::acme::DEFAULT_ACME_DIRECTORY_ENTRY - .url - .to_owned() - }); - Ok(AcmeClient::new(directory) - .terms_of_service_url() - .await? - .map(str::to_owned)) + proxmox_acme_api::get_tos(directory).await } #[api( @@ -395,52 +290,7 @@ async fn get_tos(directory: Option) -> Result, Error> { )] /// Get named known ACME directory endpoints. fn get_directories() -> Result<&'static [KnownAcmeDirectory], Error> { - Ok(crate::config::acme::KNOWN_ACME_DIRECTORIES) -} - -/// Wrapper for efficient Arc use when returning the ACME challenge-plugin schema for serializing -struct ChallengeSchemaWrapper { - inner: Arc>, -} - -impl Serialize for ChallengeSchemaWrapper { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - self.inner.serialize(serializer) - } -} - -struct CachedSchema { - schema: Arc>, - cached_mtime: SystemTime, -} - -fn get_cached_challenge_schemas() -> Result { - static CACHE: LazyLock>> = LazyLock::new(|| Mutex::new(None)); - - // the actual loading code - let mut last = CACHE.lock().unwrap(); - - let actual_mtime = fs::metadata(crate::config::acme::ACME_DNS_SCHEMA_FN)?.modified()?; - - let schema = match &*last { - Some(CachedSchema { - schema, - cached_mtime, - }) if *cached_mtime >= actual_mtime => schema.clone(), - _ => { - let new_schema = Arc::new(crate::config::acme::load_dns_challenge_schema()?); - *last = Some(CachedSchema { - schema: Arc::clone(&new_schema), - cached_mtime: actual_mtime, - }); - new_schema - } - }; - - Ok(ChallengeSchemaWrapper { inner: schema }) + Ok(proxmox_acme_api::KNOWN_ACME_DIRECTORIES) } #[api( @@ -455,69 +305,7 @@ fn get_cached_challenge_schemas() -> Result { )] /// Get named known ACME directory endpoints. fn get_challenge_schema() -> Result { - get_cached_challenge_schemas() -} - -#[api] -#[derive(Default, Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -/// The API's format is inherited from PVE/PMG: -pub struct PluginConfig { - /// Plugin ID. - plugin: String, - - /// Plugin type. - #[serde(rename = "type")] - ty: String, - - /// DNS Api name. - #[serde(skip_serializing_if = "Option::is_none", default)] - api: Option, - - /// Plugin configuration data. - #[serde(skip_serializing_if = "Option::is_none", default)] - data: Option, - - /// Extra delay in seconds to wait before requesting validation. - /// - /// Allows to cope with long TTL of DNS records. - #[serde(skip_serializing_if = "Option::is_none", default)] - validation_delay: Option, - - /// Flag to disable the config. - #[serde(skip_serializing_if = "Option::is_none", default)] - disable: Option, -} - -// See PMG/PVE's $modify_cfg_for_api sub -fn modify_cfg_for_api(id: &str, ty: &str, data: &Value) -> PluginConfig { - let mut entry = data.clone(); - - let obj = entry.as_object_mut().unwrap(); - obj.remove("id"); - obj.insert("plugin".to_string(), Value::String(id.to_owned())); - obj.insert("type".to_string(), Value::String(ty.to_owned())); - - // FIXME: This needs to go once the `Updater` is fixed. - // None of these should be able to fail unless the user changed the files by hand, in which - // case we leave the unmodified string in the Value for now. This will be handled with an error - // later. - if let Some(Value::String(ref mut data)) = obj.get_mut("data") { - if let Ok(new) = proxmox_base64::url::decode_no_pad(&data) { - if let Ok(utf8) = String::from_utf8(new) { - *data = utf8; - } - } - } - - // PVE/PMG do this explicitly for ACME plugins... - // obj.insert("digest".to_string(), Value::String(digest.clone())); - - serde_json::from_value(entry).unwrap_or_else(|_| PluginConfig { - plugin: "*Error*".to_string(), - ty: "*Error*".to_string(), - ..Default::default() - }) + proxmox_acme_api::get_cached_challenge_schemas() } #[api( @@ -533,12 +321,7 @@ fn modify_cfg_for_api(id: &str, ty: &str, data: &Value) -> PluginConfig { )] /// List ACME challenge plugins. pub fn list_plugins(rpcenv: &mut dyn RpcEnvironment) -> Result, Error> { - let (plugins, digest) = plugin::config()?; - rpcenv["digest"] = hex::encode(digest).into(); - Ok(plugins - .iter() - .map(|(id, (ty, data))| modify_cfg_for_api(id, ty, data)) - .collect()) + proxmox_acme_api::list_plugins(rpcenv) } #[api( @@ -555,13 +338,7 @@ pub fn list_plugins(rpcenv: &mut dyn RpcEnvironment) -> Result )] /// List ACME challenge plugins. pub fn get_plugin(id: String, rpcenv: &mut dyn RpcEnvironment) -> Result { - let (plugins, digest) = plugin::config()?; - rpcenv["digest"] = hex::encode(digest).into(); - - match plugins.get(&id) { - Some((ty, data)) => Ok(modify_cfg_for_api(&id, ty, data)), - None => http_bail!(NOT_FOUND, "no such plugin"), - } + proxmox_acme_api::get_plugin(id, rpcenv) } // Currently we only have "the" standalone plugin and DNS plugins so we can just flatten a @@ -593,30 +370,7 @@ pub fn get_plugin(id: String, rpcenv: &mut dyn RpcEnvironment) -> Result Result<(), Error> { - // Currently we only support DNS plugins and the standalone plugin is "fixed": - if r#type != "dns" { - param_bail!("type", "invalid ACME plugin type: {:?}", r#type); - } - - let data = String::from_utf8(proxmox_base64::decode(data)?) - .map_err(|_| format_err!("data must be valid UTF-8"))?; - - let id = core.id.clone(); - - let _lock = plugin::lock()?; - - let (mut plugins, _digest) = plugin::config()?; - if plugins.contains_key(&id) { - param_bail!("id", "ACME plugin ID {:?} already exists", id); - } - - let plugin = serde_json::to_value(DnsPlugin { core, data })?; - - plugins.insert(id, r#type, plugin); - - plugin::save_config(&plugins)?; - - Ok(()) + proxmox_acme_api::add_plugin(r#type, core, data) } #[api( @@ -632,26 +386,7 @@ pub fn add_plugin(r#type: String, core: DnsPluginCore, data: String) -> Result<( )] /// Delete an ACME plugin configuration. pub fn delete_plugin(id: String) -> Result<(), Error> { - let _lock = plugin::lock()?; - - let (mut plugins, _digest) = plugin::config()?; - if plugins.remove(&id).is_none() { - http_bail!(NOT_FOUND, "no such plugin"); - } - plugin::save_config(&plugins)?; - - Ok(()) -} - -#[api()] -#[derive(Serialize, Deserialize)] -#[serde(rename_all = "kebab-case")] -/// Deletable property name -pub enum DeletableProperty { - /// Delete the disable property - Disable, - /// Delete the validation-delay property - ValidationDelay, + proxmox_acme_api::delete_plugin(id) } #[api( @@ -673,12 +408,12 @@ pub enum DeletableProperty { type: Array, optional: true, items: { - type: DeletableProperty, + type: DeletablePluginProperty, } }, digest: { - description: "Digest to protect against concurrent updates", optional: true, + type: ConfigDigest, }, }, }, @@ -692,65 +427,8 @@ pub fn update_plugin( id: String, update: DnsPluginCoreUpdater, data: Option, - delete: Option>, - digest: Option, + delete: Option>, + digest: Option, ) -> Result<(), Error> { - let data = data - .as_deref() - .map(proxmox_base64::decode) - .transpose()? - .map(String::from_utf8) - .transpose() - .map_err(|_| format_err!("data must be valid UTF-8"))?; - - let _lock = plugin::lock()?; - - let (mut plugins, expected_digest) = plugin::config()?; - - if let Some(digest) = digest { - let digest = <[u8; 32]>::from_hex(digest)?; - crate::tools::detect_modified_configuration_file(&digest, &expected_digest)?; - } - - match plugins.get_mut(&id) { - Some((ty, ref mut entry)) => { - if ty != "dns" { - bail!("cannot update plugin of type {:?}", ty); - } - - let mut plugin = DnsPlugin::deserialize(&*entry)?; - - if let Some(delete) = delete { - for delete_prop in delete { - match delete_prop { - DeletableProperty::ValidationDelay => { - plugin.core.validation_delay = None; - } - DeletableProperty::Disable => { - plugin.core.disable = None; - } - } - } - } - if let Some(data) = data { - plugin.data = data; - } - if let Some(api) = update.api { - plugin.core.api = api; - } - if update.validation_delay.is_some() { - plugin.core.validation_delay = update.validation_delay; - } - if update.disable.is_some() { - plugin.core.disable = update.disable; - } - - *entry = serde_json::to_value(plugin)?; - } - None => http_bail!(NOT_FOUND, "no such plugin"), - } - - plugin::save_config(&plugins)?; - - Ok(()) + proxmox_acme_api::update_plugin(id, update, data, delete, digest) } diff --git a/src/api2/types/acme.rs b/src/api2/types/acme.rs index 64175aff..0ff496b6 100644 --- a/src/api2/types/acme.rs +++ b/src/api2/types/acme.rs @@ -43,22 +43,6 @@ pub const ACME_DOMAIN_PROPERTY_SCHEMA: Schema = .format(&ApiStringFormat::PropertyString(&AcmeDomain::API_SCHEMA)) .schema(); -#[api( - properties: { - name: { type: String }, - url: { type: String }, - }, -)] -/// An ACME directory endpoint with a name and URL. -#[derive(Serialize)] -pub struct KnownAcmeDirectory { - /// The ACME directory's name. - pub name: &'static str, - - /// The ACME directory's endpoint URL. - pub url: &'static str, -} - #[api( properties: { schema: { diff --git a/src/bin/proxmox_backup_manager/acme.rs b/src/bin/proxmox_backup_manager/acme.rs index 6ed61560..d11d7498 100644 --- a/src/bin/proxmox_backup_manager/acme.rs +++ b/src/bin/proxmox_backup_manager/acme.rs @@ -4,14 +4,12 @@ use anyhow::{bail, Error}; use serde_json::Value; use proxmox_acme::async_client::AcmeClient; -use proxmox_acme_api::AcmeAccountName; +use proxmox_acme_api::{AcmeAccountName, DnsPluginCore, KNOWN_ACME_DIRECTORIES}; use proxmox_router::{cli::*, ApiHandler, RpcEnvironment}; use proxmox_schema::api; use proxmox_sys::fs::file_get_contents; use proxmox_backup::api2; -use proxmox_backup::config::acme::plugin::DnsPluginCore; -use proxmox_backup::config::acme::KNOWN_ACME_DIRECTORIES; pub fn acme_mgmt_cli() -> CommandLineInterface { let cmd_def = CliCommandMap::new() @@ -122,7 +120,7 @@ async fn register_account( match input.trim().parse::() { Ok(n) if n < KNOWN_ACME_DIRECTORIES.len() => { - break (KNOWN_ACME_DIRECTORIES[n].url.to_owned(), false); + break (KNOWN_ACME_DIRECTORIES[n].url.to_string(), false); } Ok(n) if n == KNOWN_ACME_DIRECTORIES.len() => { input.clear(); diff --git a/src/config/acme/mod.rs b/src/config/acme/mod.rs index e4639c53..01ab6223 100644 --- a/src/config/acme/mod.rs +++ b/src/config/acme/mod.rs @@ -1,16 +1,15 @@ use std::collections::HashMap; use std::ops::ControlFlow; -use std::path::Path; -use anyhow::{bail, format_err, Error}; +use anyhow::Error; use serde_json::Value; use pbs_api_types::PROXMOX_SAFE_ID_REGEX; -use proxmox_acme_api::AcmeAccountName; +use proxmox_acme_api::{AcmeAccountName, KnownAcmeDirectory, KNOWN_ACME_DIRECTORIES}; use proxmox_sys::error::SysError; use proxmox_sys::fs::{file_read_string, CreateOptions}; -use crate::api2::types::{AcmeChallengeSchema, KnownAcmeDirectory}; +use crate::api2::types::AcmeChallengeSchema; pub(crate) const ACME_DIR: &str = pbs_buildcfg::configdir!("/acme"); pub(crate) const ACME_ACCOUNT_DIR: &str = pbs_buildcfg::configdir!("/acme/accounts"); @@ -35,23 +34,8 @@ pub(crate) fn make_acme_dir() -> Result<(), Error> { create_acme_subdir(ACME_DIR) } -pub const KNOWN_ACME_DIRECTORIES: &[KnownAcmeDirectory] = &[ - KnownAcmeDirectory { - name: "Let's Encrypt V2", - url: "https://acme-v02.api.letsencrypt.org/directory", - }, - KnownAcmeDirectory { - name: "Let's Encrypt V2 Staging", - url: "https://acme-staging-v02.api.letsencrypt.org/directory", - }, -]; - pub const DEFAULT_ACME_DIRECTORY_ENTRY: &KnownAcmeDirectory = &KNOWN_ACME_DIRECTORIES[0]; -pub fn account_path(name: &str) -> String { - format!("{ACME_ACCOUNT_DIR}/{name}") -} - pub fn foreach_acme_account(mut func: F) -> Result<(), Error> where F: FnMut(AcmeAccountName) -> ControlFlow>, @@ -82,28 +66,6 @@ where } } -pub fn mark_account_deactivated(name: &str) -> Result<(), Error> { - let from = account_path(name); - for i in 0..100 { - let to = account_path(&format!("_deactivated_{name}_{i}")); - if !Path::new(&to).exists() { - return std::fs::rename(&from, &to).map_err(|err| { - format_err!( - "failed to move account path {:?} to {:?} - {}", - from, - to, - err - ) - }); - } - } - bail!( - "No free slot to rename deactivated account {:?}, please cleanup {:?}", - from, - ACME_ACCOUNT_DIR - ); -} - pub fn load_dns_challenge_schema() -> Result, Error> { let raw = file_read_string(ACME_DNS_SCHEMA_FN)?; let schemas: serde_json::Map = serde_json::from_str(&raw)?; -- 2.47.3 From s.rufinatscha at proxmox.com Thu Jan 8 12:26:29 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:29 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v5 5/5] acme: certificate ordering through proxmox-acme-api In-Reply-To: <20260108112629.189670-1-s.rufinatscha@proxmox.com> References: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Message-ID: <20260108112629.189670-10-s.rufinatscha@proxmox.com> PBS currently uses its own ACME client and API logic, while PDM uses the factored out proxmox-acme and proxmox-acme-api crates. This duplication risks differences in behaviour and requires ACME maintenance in two places. This patch is part of a series to move PBS over to the shared ACME stack. Changes: - Replace the custom ACME order/authorization loop in node certificates with a call to proxmox_acme_api::order_certificate. - Build domain + config data as proxmox-acme-api types - Remove obsolete local ACME ordering and plugin glue code. Signed-off-by: Samuel Rufinatscha --- src/acme/mod.rs | 2 - src/acme/plugin.rs | 335 ---------------------------------- src/api2/node/certificates.rs | 229 ++++------------------- src/api2/types/acme.rs | 73 -------- src/api2/types/mod.rs | 3 - src/config/acme/mod.rs | 8 +- src/config/acme/plugin.rs | 92 +--------- src/config/node.rs | 20 +- src/lib.rs | 2 - 9 files changed, 38 insertions(+), 726 deletions(-) delete mode 100644 src/acme/mod.rs delete mode 100644 src/acme/plugin.rs delete mode 100644 src/api2/types/acme.rs diff --git a/src/acme/mod.rs b/src/acme/mod.rs deleted file mode 100644 index cc561f9a..00000000 --- a/src/acme/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -pub(crate) mod plugin; -pub(crate) use plugin::get_acme_plugin; diff --git a/src/acme/plugin.rs b/src/acme/plugin.rs deleted file mode 100644 index 6804243c..00000000 --- a/src/acme/plugin.rs +++ /dev/null @@ -1,335 +0,0 @@ -use std::future::Future; -use std::net::{IpAddr, SocketAddr}; -use std::pin::Pin; -use std::process::Stdio; -use std::sync::Arc; -use std::time::Duration; - -use anyhow::{bail, format_err, Error}; -use bytes::Bytes; -use futures::TryFutureExt; -use http_body_util::Full; -use hyper::body::Incoming; -use hyper::server::conn::http1; -use hyper::service::service_fn; -use hyper::{Request, Response}; -use hyper_util::rt::TokioIo; -use tokio::io::{AsyncBufReadExt, AsyncRead, AsyncWriteExt, BufReader}; -use tokio::net::TcpListener; -use tokio::process::Command; - -use proxmox_acme::async_client::AcmeClient; -use proxmox_acme::{Authorization, Challenge}; -use proxmox_rest_server::WorkerTask; - -use crate::api2::types::AcmeDomain; -use crate::config::acme::plugin::{DnsPlugin, PluginData}; - -const PROXMOX_ACME_SH_PATH: &str = "/usr/share/proxmox-acme/proxmox-acme"; - -pub(crate) fn get_acme_plugin( - plugin_data: &PluginData, - name: &str, -) -> Result>, Error> { - let (ty, data) = match plugin_data.get(name) { - Some(plugin) => plugin, - None => return Ok(None), - }; - - Ok(Some(match ty.as_str() { - "dns" => { - let plugin: DnsPlugin = serde::Deserialize::deserialize(data)?; - Box::new(plugin) - } - "standalone" => { - // this one has no config - Box::::default() - } - other => bail!("missing implementation for plugin type '{}'", other), - })) -} - -pub(crate) trait AcmePlugin { - /// Setup everything required to trigger the validation and return the corresponding validation - /// URL. - fn setup<'fut, 'a: 'fut, 'b: 'fut, 'c: 'fut, 'd: 'fut>( - &'a mut self, - client: &'b mut AcmeClient, - authorization: &'c Authorization, - domain: &'d AcmeDomain, - task: Arc, - ) -> Pin> + Send + 'fut>>; - - fn teardown<'fut, 'a: 'fut, 'b: 'fut, 'c: 'fut, 'd: 'fut>( - &'a mut self, - client: &'b mut AcmeClient, - authorization: &'c Authorization, - domain: &'d AcmeDomain, - task: Arc, - ) -> Pin> + Send + 'fut>>; -} - -fn extract_challenge<'a>( - authorization: &'a Authorization, - ty: &str, -) -> Result<&'a Challenge, Error> { - authorization - .challenges - .iter() - .find(|ch| ch.ty == ty) - .ok_or_else(|| format_err!("no supported challenge type ({}) found", ty)) -} - -async fn pipe_to_tasklog( - pipe: T, - task: Arc, -) -> Result<(), std::io::Error> { - let mut pipe = BufReader::new(pipe); - let mut line = String::new(); - loop { - line.clear(); - match pipe.read_line(&mut line).await { - Ok(0) => return Ok(()), - Ok(_) => task.log_message(line.as_str()), - Err(err) => return Err(err), - } - } -} - -impl DnsPlugin { - async fn action<'a>( - &self, - client: &mut AcmeClient, - authorization: &'a Authorization, - domain: &AcmeDomain, - task: Arc, - action: &str, - ) -> Result<&'a str, Error> { - let challenge = extract_challenge(authorization, "dns-01")?; - let mut stdin_data = client - .dns_01_txt_value( - challenge - .token() - .ok_or_else(|| format_err!("missing token in challenge"))?, - )? - .into_bytes(); - stdin_data.push(b'\n'); - stdin_data.extend(self.data.as_bytes()); - if stdin_data.last() != Some(&b'\n') { - stdin_data.push(b'\n'); - } - - let mut command = Command::new("/usr/bin/setpriv"); - - #[rustfmt::skip] - command.args([ - "--reuid", "nobody", - "--regid", "nogroup", - "--clear-groups", - "--reset-env", - "--", - "/bin/bash", - PROXMOX_ACME_SH_PATH, - action, - &self.core.api, - domain.alias.as_deref().unwrap_or(&domain.domain), - ]); - - // We could use 1 socketpair, but tokio wraps them all in `File` internally causing `close` - // to be called separately on all of them without exception, so we need 3 pipes :-( - - let mut child = command - .stdin(Stdio::piped()) - .stdout(Stdio::piped()) - .stderr(Stdio::piped()) - .spawn()?; - - let mut stdin = child.stdin.take().expect("Stdio::piped()"); - let stdout = child.stdout.take().expect("Stdio::piped() failed?"); - let stdout = pipe_to_tasklog(stdout, Arc::clone(&task)); - let stderr = child.stderr.take().expect("Stdio::piped() failed?"); - let stderr = pipe_to_tasklog(stderr, Arc::clone(&task)); - let stdin = async move { - stdin.write_all(&stdin_data).await?; - stdin.flush().await?; - Ok::<_, std::io::Error>(()) - }; - match futures::try_join!(stdin, stdout, stderr) { - Ok(((), (), ())) => (), - Err(err) => { - if let Err(err) = child.kill().await { - task.log_message(format!( - "failed to kill '{PROXMOX_ACME_SH_PATH} {action}' command: {err}" - )); - } - bail!("'{}' failed: {}", PROXMOX_ACME_SH_PATH, err); - } - } - - let status = child.wait().await?; - if !status.success() { - bail!( - "'{} {}' exited with error ({})", - PROXMOX_ACME_SH_PATH, - action, - status.code().unwrap_or(-1) - ); - } - - Ok(&challenge.url) - } -} - -impl AcmePlugin for DnsPlugin { - fn setup<'fut, 'a: 'fut, 'b: 'fut, 'c: 'fut, 'd: 'fut>( - &'a mut self, - client: &'b mut AcmeClient, - authorization: &'c Authorization, - domain: &'d AcmeDomain, - task: Arc, - ) -> Pin> + Send + 'fut>> { - Box::pin(async move { - let result = self - .action(client, authorization, domain, task.clone(), "setup") - .await; - - let validation_delay = self.core.validation_delay.unwrap_or(30) as u64; - if validation_delay > 0 { - task.log_message(format!( - "Sleeping {validation_delay} seconds to wait for TXT record propagation" - )); - tokio::time::sleep(Duration::from_secs(validation_delay)).await; - } - result - }) - } - - fn teardown<'fut, 'a: 'fut, 'b: 'fut, 'c: 'fut, 'd: 'fut>( - &'a mut self, - client: &'b mut AcmeClient, - authorization: &'c Authorization, - domain: &'d AcmeDomain, - task: Arc, - ) -> Pin> + Send + 'fut>> { - Box::pin(async move { - self.action(client, authorization, domain, task, "teardown") - .await - .map(drop) - }) - } -} - -#[derive(Default)] -struct StandaloneServer { - abort_handle: Option, -} - -// In case the "order_certificates" future gets dropped between setup & teardown, let's also cancel -// the HTTP listener on Drop: -impl Drop for StandaloneServer { - fn drop(&mut self) { - self.stop(); - } -} - -impl StandaloneServer { - fn stop(&mut self) { - if let Some(abort) = self.abort_handle.take() { - abort.abort(); - } - } -} - -async fn standalone_respond( - req: Request, - path: Arc, - key_auth: Arc, -) -> Result>, hyper::Error> { - if req.method() == hyper::Method::GET && req.uri().path() == path.as_str() { - Ok(Response::builder() - .status(hyper::http::StatusCode::OK) - .body(key_auth.as_bytes().to_vec().into()) - .unwrap()) - } else { - Ok(Response::builder() - .status(hyper::http::StatusCode::NOT_FOUND) - .body("Not found.".into()) - .unwrap()) - } -} - -impl AcmePlugin for StandaloneServer { - fn setup<'fut, 'a: 'fut, 'b: 'fut, 'c: 'fut, 'd: 'fut>( - &'a mut self, - client: &'b mut AcmeClient, - authorization: &'c Authorization, - _domain: &'d AcmeDomain, - _task: Arc, - ) -> Pin> + Send + 'fut>> { - Box::pin(async move { - self.stop(); - - let challenge = extract_challenge(authorization, "http-01")?; - let token = challenge - .token() - .ok_or_else(|| format_err!("missing token in challenge"))?; - let key_auth = Arc::new(client.key_authorization(token)?); - let path = Arc::new(format!("/.well-known/acme-challenge/{token}")); - - // `[::]:80` first, then `*:80` - let dual = SocketAddr::new(IpAddr::from([0u16; 8]), 80); - let ipv4 = SocketAddr::new(IpAddr::from([0u8; 4]), 80); - let incoming = TcpListener::bind(dual) - .or_else(|_| TcpListener::bind(ipv4)) - .await?; - - let server = async move { - loop { - let key_auth = Arc::clone(&key_auth); - let path = Arc::clone(&path); - match incoming.accept().await { - Ok((tcp, _)) => { - let io = TokioIo::new(tcp); - let service = service_fn(move |request| { - standalone_respond( - request, - Arc::clone(&path), - Arc::clone(&key_auth), - ) - }); - - tokio::task::spawn(async move { - if let Err(err) = - http1::Builder::new().serve_connection(io, service).await - { - println!("Error serving connection: {err:?}"); - } - }); - } - Err(err) => println!("Error accepting connection: {err:?}"), - } - } - }; - let (future, abort) = futures::future::abortable(server); - self.abort_handle = Some(abort); - tokio::spawn(future); - - Ok(challenge.url.as_str()) - }) - } - - fn teardown<'fut, 'a: 'fut, 'b: 'fut, 'c: 'fut, 'd: 'fut>( - &'a mut self, - _client: &'b mut AcmeClient, - _authorization: &'c Authorization, - _domain: &'d AcmeDomain, - _task: Arc, - ) -> Pin> + Send + 'fut>> { - Box::pin(async move { - if let Some(abort) = self.abort_handle.take() { - abort.abort(); - } - Ok(()) - }) - } -} diff --git a/src/api2/node/certificates.rs b/src/api2/node/certificates.rs index 47ff8de5..73401c41 100644 --- a/src/api2/node/certificates.rs +++ b/src/api2/node/certificates.rs @@ -1,14 +1,11 @@ -use std::sync::Arc; -use std::time::Duration; - use anyhow::{bail, format_err, Error}; use openssl::pkey::PKey; use openssl::x509::X509; use serde::{Deserialize, Serialize}; -use tracing::{info, warn}; +use tracing::info; use pbs_api_types::{NODE_SCHEMA, PRIV_SYS_MODIFY}; -use proxmox_acme::async_client::AcmeClient; +use proxmox_acme_api::AcmeDomain; use proxmox_rest_server::WorkerTask; use proxmox_router::list_subdirs_api_method; use proxmox_router::SubdirMap; @@ -18,8 +15,6 @@ use proxmox_schema::api; use pbs_buildcfg::configdir; use pbs_tools::cert; -use crate::api2::types::AcmeDomain; -use crate::config::node::NodeConfig; use crate::server::send_certificate_renewal_mail; pub const ROUTER: Router = Router::new() @@ -268,193 +263,6 @@ pub async fn delete_custom_certificate() -> Result<(), Error> { Ok(()) } -struct OrderedCertificate { - certificate: hyper::body::Bytes, - private_key_pem: Vec, -} - -async fn order_certificate( - worker: Arc, - node_config: &NodeConfig, -) -> Result, Error> { - use proxmox_acme::authorization::Status; - use proxmox_acme::order::Identifier; - - let domains = node_config.acme_domains().try_fold( - Vec::::new(), - |mut acc, domain| -> Result<_, Error> { - let mut domain = domain?; - domain.domain.make_ascii_lowercase(); - if let Some(alias) = &mut domain.alias { - alias.make_ascii_lowercase(); - } - acc.push(domain); - Ok(acc) - }, - )?; - - let get_domain_config = |domain: &str| { - domains - .iter() - .find(|d| d.domain == domain) - .ok_or_else(|| format_err!("no config for domain '{}'", domain)) - }; - - if domains.is_empty() { - info!("No domains configured to be ordered from an ACME server."); - return Ok(None); - } - - let (plugins, _) = crate::config::acme::plugin::config()?; - - let mut acme = node_config.acme_client().await?; - - info!("Placing ACME order"); - let order = acme - .new_order(domains.iter().map(|d| d.domain.to_ascii_lowercase())) - .await?; - info!("Order URL: {}", order.location); - - let identifiers: Vec = order - .data - .identifiers - .iter() - .map(|identifier| match identifier { - Identifier::Dns(domain) => domain.clone(), - }) - .collect(); - - for auth_url in &order.data.authorizations { - info!("Getting authorization details from '{auth_url}'"); - let mut auth = acme.get_authorization(auth_url).await?; - - let domain = match &mut auth.identifier { - Identifier::Dns(domain) => domain.to_ascii_lowercase(), - }; - - if auth.status == Status::Valid { - info!("{domain} is already validated!"); - continue; - } - - info!("The validation for {domain} is pending"); - let domain_config: &AcmeDomain = get_domain_config(&domain)?; - let plugin_id = domain_config.plugin.as_deref().unwrap_or("standalone"); - let mut plugin_cfg = crate::acme::get_acme_plugin(&plugins, plugin_id)? - .ok_or_else(|| format_err!("plugin '{plugin_id}' for domain '{domain}' not found!"))?; - - info!("Setting up validation plugin"); - let validation_url = plugin_cfg - .setup(&mut acme, &auth, domain_config, Arc::clone(&worker)) - .await?; - - let result = request_validation(&mut acme, auth_url, validation_url).await; - - if let Err(err) = plugin_cfg - .teardown(&mut acme, &auth, domain_config, Arc::clone(&worker)) - .await - { - warn!("Failed to teardown plugin '{plugin_id}' for domain '{domain}' - {err}"); - } - - result?; - } - - info!("All domains validated"); - info!("Creating CSR"); - - let csr = proxmox_acme::util::Csr::generate(&identifiers, &Default::default())?; - let mut finalize_error_cnt = 0u8; - let order_url = &order.location; - let mut order; - loop { - use proxmox_acme::order::Status; - - order = acme.get_order(order_url).await?; - - match order.status { - Status::Pending => { - info!("still pending, trying to finalize anyway"); - let finalize = order - .finalize - .as_deref() - .ok_or_else(|| format_err!("missing 'finalize' URL in order"))?; - if let Err(err) = acme.finalize(finalize, &csr.data).await { - if finalize_error_cnt >= 5 { - return Err(err); - } - - finalize_error_cnt += 1; - } - tokio::time::sleep(Duration::from_secs(5)).await; - } - Status::Ready => { - info!("order is ready, finalizing"); - let finalize = order - .finalize - .as_deref() - .ok_or_else(|| format_err!("missing 'finalize' URL in order"))?; - acme.finalize(finalize, &csr.data).await?; - tokio::time::sleep(Duration::from_secs(5)).await; - } - Status::Processing => { - info!("still processing, trying again in 30 seconds"); - tokio::time::sleep(Duration::from_secs(30)).await; - } - Status::Valid => { - info!("valid"); - break; - } - other => bail!("order status: {:?}", other), - } - } - - info!("Downloading certificate"); - let certificate = acme - .get_certificate( - order - .certificate - .as_deref() - .ok_or_else(|| format_err!("missing certificate url in finalized order"))?, - ) - .await?; - - Ok(Some(OrderedCertificate { - certificate, - private_key_pem: csr.private_key_pem, - })) -} - -async fn request_validation( - acme: &mut AcmeClient, - auth_url: &str, - validation_url: &str, -) -> Result<(), Error> { - info!("Triggering validation"); - acme.request_challenge_validation(validation_url).await?; - - info!("Sleeping for 5 seconds"); - tokio::time::sleep(Duration::from_secs(5)).await; - - loop { - use proxmox_acme::authorization::Status; - - let auth = acme.get_authorization(auth_url).await?; - match auth.status { - Status::Pending => { - info!("Status is still 'pending', trying again in 10 seconds"); - tokio::time::sleep(Duration::from_secs(10)).await; - } - Status::Valid => return Ok(()), - other => bail!( - "validating challenge '{}' failed - status: {:?}", - validation_url, - other - ), - } - } -} - #[api( input: { properties: { @@ -524,9 +332,30 @@ fn spawn_certificate_worker( let auth_id = rpcenv.get_auth_id().unwrap(); + let acme_config = if let Some(cfg) = node_config.acme_config().transpose()? { + cfg + } else { + proxmox_acme_api::parse_acme_config_string("account=default")? + }; + + let domains = node_config.acme_domains().try_fold( + Vec::::new(), + |mut acc, domain| -> Result<_, Error> { + let mut domain = domain?; + domain.domain.make_ascii_lowercase(); + if let Some(alias) = &mut domain.alias { + alias.make_ascii_lowercase(); + } + acc.push(domain); + Ok(acc) + }, + )?; + WorkerTask::spawn(name, None, auth_id, true, move |worker| async move { let work = || async { - if let Some(cert) = order_certificate(worker, &node_config).await? { + if let Some(cert) = + proxmox_acme_api::order_certificate(worker, &acme_config, &domains).await? + { crate::config::set_proxy_certificate(&cert.certificate, &cert.private_key_pem)?; crate::server::reload_proxy_certificate().await?; } @@ -562,16 +391,20 @@ pub fn revoke_acme_cert(rpcenv: &mut dyn RpcEnvironment) -> Result, - - /// The plugin to use to validate this domain. - /// - /// Empty means standalone HTTP validation is used. - #[serde(skip_serializing_if = "Option::is_none")] - pub plugin: Option, -} - -pub const ACME_DOMAIN_PROPERTY_SCHEMA: Schema = - StringSchema::new("ACME domain configuration string") - .format(&ApiStringFormat::PropertyString(&AcmeDomain::API_SCHEMA)) - .schema(); - -#[api( - properties: { - schema: { - type: Object, - additional_properties: true, - properties: {}, - }, - type: { - type: String, - }, - }, -)] -#[derive(Serialize)] -/// Schema for an ACME challenge plugin. -pub struct AcmeChallengeSchema { - /// Plugin ID. - pub id: String, - - /// Human readable name, falls back to id. - pub name: String, - - /// Plugin Type. - #[serde(rename = "type")] - pub ty: &'static str, - - /// The plugin's parameter schema. - pub schema: Value, -} diff --git a/src/api2/types/mod.rs b/src/api2/types/mod.rs index afc34b30..34193685 100644 --- a/src/api2/types/mod.rs +++ b/src/api2/types/mod.rs @@ -4,9 +4,6 @@ use anyhow::bail; use proxmox_schema::*; -mod acme; -pub use acme::*; - // File names: may not contain slashes, may not start with "." pub const FILENAME_FORMAT: ApiStringFormat = ApiStringFormat::VerifyFn(|name| { if name.starts_with('.') { diff --git a/src/config/acme/mod.rs b/src/config/acme/mod.rs index 01ab6223..73486df9 100644 --- a/src/config/acme/mod.rs +++ b/src/config/acme/mod.rs @@ -5,12 +5,10 @@ use anyhow::Error; use serde_json::Value; use pbs_api_types::PROXMOX_SAFE_ID_REGEX; -use proxmox_acme_api::{AcmeAccountName, KnownAcmeDirectory, KNOWN_ACME_DIRECTORIES}; +use proxmox_acme_api::{AcmeAccountName, AcmeChallengeSchema}; use proxmox_sys::error::SysError; use proxmox_sys::fs::{file_read_string, CreateOptions}; -use crate::api2::types::AcmeChallengeSchema; - pub(crate) const ACME_DIR: &str = pbs_buildcfg::configdir!("/acme"); pub(crate) const ACME_ACCOUNT_DIR: &str = pbs_buildcfg::configdir!("/acme/accounts"); @@ -34,8 +32,6 @@ pub(crate) fn make_acme_dir() -> Result<(), Error> { create_acme_subdir(ACME_DIR) } -pub const DEFAULT_ACME_DIRECTORY_ENTRY: &KnownAcmeDirectory = &KNOWN_ACME_DIRECTORIES[0]; - pub fn foreach_acme_account(mut func: F) -> Result<(), Error> where F: FnMut(AcmeAccountName) -> ControlFlow>, @@ -79,7 +75,7 @@ pub fn load_dns_challenge_schema() -> Result, Error> { .and_then(Value::as_str) .unwrap_or(id) .to_owned(), - ty: "dns", + ty: "dns".into(), schema: schema.to_owned(), }) .collect()) diff --git a/src/config/acme/plugin.rs b/src/config/acme/plugin.rs index 8ce852ec..4b4a216e 100644 --- a/src/config/acme/plugin.rs +++ b/src/config/acme/plugin.rs @@ -1,104 +1,16 @@ use std::sync::LazyLock; use anyhow::Error; -use serde::{Deserialize, Serialize}; use serde_json::Value; -use pbs_api_types::PROXMOX_SAFE_ID_FORMAT; -use proxmox_schema::{api, ApiType, Schema, StringSchema, Updater}; +use proxmox_acme_api::{DnsPlugin, StandalonePlugin, PLUGIN_ID_SCHEMA}; +use proxmox_schema::{ApiType, Schema}; use proxmox_section_config::{SectionConfig, SectionConfigData, SectionConfigPlugin}; use pbs_config::{open_backup_lockfile, BackupLockGuard}; -pub const PLUGIN_ID_SCHEMA: Schema = StringSchema::new("ACME Challenge Plugin ID.") - .format(&PROXMOX_SAFE_ID_FORMAT) - .min_length(1) - .max_length(32) - .schema(); - pub static CONFIG: LazyLock = LazyLock::new(init); -#[api( - properties: { - id: { schema: PLUGIN_ID_SCHEMA }, - }, -)] -#[derive(Deserialize, Serialize)] -/// Standalone ACME Plugin for the http-1 challenge. -pub struct StandalonePlugin { - /// Plugin ID. - id: String, -} - -impl Default for StandalonePlugin { - fn default() -> Self { - Self { - id: "standalone".to_string(), - } - } -} - -#[api( - properties: { - id: { schema: PLUGIN_ID_SCHEMA }, - disable: { - optional: true, - default: false, - }, - "validation-delay": { - default: 30, - optional: true, - minimum: 0, - maximum: 2 * 24 * 60 * 60, - }, - }, -)] -/// DNS ACME Challenge Plugin core data. -#[derive(Deserialize, Serialize, Updater)] -#[serde(rename_all = "kebab-case")] -pub struct DnsPluginCore { - /// Plugin ID. - #[updater(skip)] - pub id: String, - - /// DNS API Plugin Id. - pub api: String, - - /// Extra delay in seconds to wait before requesting validation. - /// - /// Allows to cope with long TTL of DNS records. - #[serde(skip_serializing_if = "Option::is_none", default)] - pub validation_delay: Option, - - /// Flag to disable the config. - #[serde(skip_serializing_if = "Option::is_none", default)] - pub disable: Option, -} - -#[api( - properties: { - core: { type: DnsPluginCore }, - }, -)] -/// DNS ACME Challenge Plugin. -#[derive(Deserialize, Serialize)] -#[serde(rename_all = "kebab-case")] -pub struct DnsPlugin { - #[serde(flatten)] - pub core: DnsPluginCore, - - // We handle this property separately in the API calls. - /// DNS plugin data (base64url encoded without padding). - #[serde(with = "proxmox_serde::string_as_base64url_nopad")] - pub data: String, -} - -impl DnsPlugin { - pub fn decode_data(&self, output: &mut Vec) -> Result<(), Error> { - Ok(proxmox_base64::url::decode_to_vec(&self.data, output)?) - } -} - fn init() -> SectionConfig { let mut config = SectionConfig::new(&PLUGIN_ID_SCHEMA); diff --git a/src/config/node.rs b/src/config/node.rs index e4b66a20..6865b815 100644 --- a/src/config/node.rs +++ b/src/config/node.rs @@ -9,14 +9,14 @@ use pbs_api_types::{ OPENSSL_CIPHERS_TLS_1_3_SCHEMA, }; use proxmox_acme::async_client::AcmeClient; -use proxmox_acme_api::AcmeAccountName; +use proxmox_acme_api::{AcmeAccountName, AcmeConfig, AcmeDomain, ACME_DOMAIN_PROPERTY_SCHEMA}; use proxmox_http::ProxyConfig; use proxmox_schema::{api, ApiStringFormat, ApiType, Updater}; use pbs_buildcfg::configdir; use pbs_config::{open_backup_lockfile, BackupLockGuard}; -use crate::api2::types::{AcmeDomain, ACME_DOMAIN_PROPERTY_SCHEMA, HTTP_PROXY_SCHEMA}; +use crate::api2::types::HTTP_PROXY_SCHEMA; const CONF_FILE: &str = configdir!("/node.cfg"); const LOCK_FILE: &str = configdir!("/.node.lck"); @@ -43,20 +43,6 @@ pub fn save_config(config: &NodeConfig) -> Result<(), Error> { pbs_config::replace_backup_config(CONF_FILE, &raw) } -#[api( - properties: { - account: { type: AcmeAccountName }, - } -)] -#[derive(Deserialize, Serialize)] -/// The ACME configuration. -/// -/// Currently only contains the name of the account use. -pub struct AcmeConfig { - /// Account to use to acquire ACME certificates. - account: AcmeAccountName, -} - /// All available languages in Proxmox. Taken from proxmox-i18n repository. /// pt_BR, zh_CN, and zh_TW use the same case in the translation files. // TODO: auto-generate from available translations @@ -242,7 +228,7 @@ impl NodeConfig { pub async fn acme_client(&self) -> Result { let account = if let Some(cfg) = self.acme_config().transpose()? { - cfg.account + AcmeAccountName::from_string(cfg.account)? } else { AcmeAccountName::from_string("default".to_string())? // should really not happen }; diff --git a/src/lib.rs b/src/lib.rs index 8633378c..828f5842 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,8 +27,6 @@ pub(crate) mod auth; pub mod tape; -pub mod acme; - pub mod client_helpers; pub mod traffic_control_cache; -- 2.47.3 From s.rufinatscha at proxmox.com Thu Jan 8 12:26:22 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:22 +0100 Subject: [pbs-devel] [PATCH proxmox v5 2/4] acme: introduce http_status module In-Reply-To: <20260108112629.189670-1-s.rufinatscha@proxmox.com> References: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Message-ID: <20260108112629.189670-3-s.rufinatscha@proxmox.com> Introduce an internal http_status module with the common ACME HTTP response codes, and replace use of crate::request::CREATED as well as direct numeric status code usages. Signed-off-by: Samuel Rufinatscha --- proxmox-acme/src/account.rs | 8 ++++---- proxmox-acme/src/async_client.rs | 4 ++-- proxmox-acme/src/lib.rs | 2 ++ proxmox-acme/src/request.rs | 11 ++++++++++- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/proxmox-acme/src/account.rs b/proxmox-acme/src/account.rs index d8eb3e73..ea1a3c60 100644 --- a/proxmox-acme/src/account.rs +++ b/proxmox-acme/src/account.rs @@ -84,7 +84,7 @@ impl Account { method: "POST", content_type: crate::request::JSON_CONTENT_TYPE, body, - expected: crate::request::CREATED, + expected: crate::http_status::CREATED, }; Ok(NewOrder::new(request)) @@ -106,7 +106,7 @@ impl Account { method: "POST", content_type: crate::request::JSON_CONTENT_TYPE, body, - expected: 200, + expected: crate::http_status::OK, }) } @@ -131,7 +131,7 @@ impl Account { method: "POST", content_type: crate::request::JSON_CONTENT_TYPE, body, - expected: 200, + expected: crate::http_status::OK, }) } @@ -321,7 +321,7 @@ impl AccountCreator { method: "POST", content_type: crate::request::JSON_CONTENT_TYPE, body, - expected: crate::request::CREATED, + expected: crate::http_status::CREATED, }) } diff --git a/proxmox-acme/src/async_client.rs b/proxmox-acme/src/async_client.rs index 2ff3ba22..043648bb 100644 --- a/proxmox-acme/src/async_client.rs +++ b/proxmox-acme/src/async_client.rs @@ -498,7 +498,7 @@ impl AcmeClient { method: "GET", content_type: "", body: String::new(), - expected: 200, + expected: crate::http_status::OK, }, nonce, ) @@ -550,7 +550,7 @@ impl AcmeClient { method: "HEAD", content_type: "", body: String::new(), - expected: 200, + expected: crate::http_status::OK, }, nonce, ) diff --git a/proxmox-acme/src/lib.rs b/proxmox-acme/src/lib.rs index 6722030c..6051a025 100644 --- a/proxmox-acme/src/lib.rs +++ b/proxmox-acme/src/lib.rs @@ -70,6 +70,8 @@ pub use order::Order; #[cfg(feature = "impl")] pub use order::NewOrder; #[cfg(feature = "impl")] +pub(crate) use request::http_status; +#[cfg(feature = "impl")] pub use request::ErrorResponse; /// Header name for nonces. diff --git a/proxmox-acme/src/request.rs b/proxmox-acme/src/request.rs index dadfc5af..341ce53e 100644 --- a/proxmox-acme/src/request.rs +++ b/proxmox-acme/src/request.rs @@ -1,7 +1,6 @@ use serde::Deserialize; pub(crate) const JSON_CONTENT_TYPE: &str = "application/jose+json"; -pub(crate) const CREATED: u16 = 201; /// A request which should be performed on the ACME provider. pub(crate) struct Request { @@ -21,6 +20,16 @@ pub(crate) struct Request { pub(crate) expected: u16, } +/// Common HTTP status codes used in ACME responses. +pub(crate) mod http_status { + /// 200 OK + pub(crate) const OK: u16 = 200; + /// 201 Created + pub(crate) const CREATED: u16 = 201; + /// 204 No Content + pub(crate) const NO_CONTENT: u16 = 204; +} + /// An ACME error response contains a specially formatted type string, and can optionally /// contain textual details and a set of sub problems. #[derive(Clone, Debug, Deserialize)] -- 2.47.3 From s.rufinatscha at proxmox.com Thu Jan 8 12:26:26 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:26 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v5 2/5] acme: include proxmox-acme-api dependency In-Reply-To: <20260108112629.189670-1-s.rufinatscha@proxmox.com> References: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Message-ID: <20260108112629.189670-7-s.rufinatscha@proxmox.com> PBS currently uses its own ACME client and API logic, while PDM uses the factored out proxmox-acme and proxmox-acme-api crates. This duplication risks differences in behaviour and requires ACME maintenance in two places. This patch is part of a series to move PBS over to the shared ACME stack. Changes: - Add proxmox-acme-api with the "impl" feature as a dependency. - Initialize proxmox_acme_api in proxmox-backup- api, manager and proxy. * Inits PBS config dir /acme as proxmox ACME directory Signed-off-by: Samuel Rufinatscha --- Cargo.toml | 3 +++ src/bin/proxmox-backup-api.rs | 2 ++ src/bin/proxmox-backup-manager.rs | 2 ++ src/bin/proxmox-backup-proxy.rs | 1 + 4 files changed, 8 insertions(+) diff --git a/Cargo.toml b/Cargo.toml index 1aa57ae5..feae351d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -101,6 +101,7 @@ pbs-api-types = "1.0.8" # other proxmox crates pathpatterns = "1" proxmox-acme = "1" +proxmox-acme-api = { version = "1", features = [ "impl" ] } pxar = "1" # PBS workspace @@ -251,6 +252,7 @@ pbs-api-types.workspace = true # in their respective repo proxmox-acme.workspace = true +proxmox-acme-api.workspace = true pxar.workspace = true # proxmox-backup workspace/internal crates @@ -269,6 +271,7 @@ proxmox-rrd-api-types.workspace = true [patch.crates-io] #pbs-api-types = { path = "../proxmox/pbs-api-types" } #proxmox-acme = { path = "../proxmox/proxmox-acme" } +#proxmox-acme-api = { path = "../proxmox/proxmox-acme-api" } #proxmox-api-macro = { path = "../proxmox/proxmox-api-macro" } #proxmox-apt = { path = "../proxmox/proxmox-apt" } #proxmox-apt-api-types = { path = "../proxmox/proxmox-apt-api-types" } diff --git a/src/bin/proxmox-backup-api.rs b/src/bin/proxmox-backup-api.rs index 417e9e97..d0091dca 100644 --- a/src/bin/proxmox-backup-api.rs +++ b/src/bin/proxmox-backup-api.rs @@ -14,6 +14,7 @@ use proxmox_rest_server::{ApiConfig, RestServer}; use proxmox_router::RpcEnvironmentType; use proxmox_sys::fs::CreateOptions; +use pbs_buildcfg::configdir; use proxmox_backup::auth_helpers::*; use proxmox_backup::config; use proxmox_backup::server::auth::check_pbs_auth; @@ -78,6 +79,7 @@ async fn run() -> Result<(), Error> { let mut command_sock = proxmox_daemon::command_socket::CommandSocket::new(backup_user.gid); proxmox_product_config::init(backup_user.clone(), pbs_config::priv_user()?); + proxmox_acme_api::init(configdir!("/acme"), true)?; let dir_opts = CreateOptions::new() .owner(backup_user.uid) diff --git a/src/bin/proxmox-backup-manager.rs b/src/bin/proxmox-backup-manager.rs index f8365070..30bc8da9 100644 --- a/src/bin/proxmox-backup-manager.rs +++ b/src/bin/proxmox-backup-manager.rs @@ -19,6 +19,7 @@ use proxmox_router::{cli::*, RpcEnvironment}; use proxmox_schema::api; use proxmox_sys::fs::CreateOptions; +use pbs_buildcfg::configdir; use pbs_client::{display_task_log, view_task_result}; use pbs_config::sync; use pbs_tools::json::required_string_param; @@ -667,6 +668,7 @@ async fn run() -> Result<(), Error> { .init()?; proxmox_backup::server::notifications::init()?; proxmox_product_config::init(pbs_config::backup_user()?, pbs_config::priv_user()?); + proxmox_acme_api::init(configdir!("/acme"), false)?; let cmd_def = CliCommandMap::new() .insert("acl", acl_commands()) diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs index 870208fe..eea44a7d 100644 --- a/src/bin/proxmox-backup-proxy.rs +++ b/src/bin/proxmox-backup-proxy.rs @@ -188,6 +188,7 @@ async fn run() -> Result<(), Error> { proxmox_backup::server::notifications::init()?; metric_collection::init()?; proxmox_product_config::init(pbs_config::backup_user()?, pbs_config::priv_user()?); + proxmox_acme_api::init(configdir!("/acme"), false)?; let mut indexpath = PathBuf::from(pbs_buildcfg::JS_DIR); indexpath.push("index.hbs"); -- 2.47.3 From s.rufinatscha at proxmox.com Thu Jan 8 12:26:21 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:21 +0100 Subject: [pbs-devel] [PATCH proxmox v5 1/4] acme: reduce visibility of Request type In-Reply-To: <20260108112629.189670-1-s.rufinatscha@proxmox.com> References: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Message-ID: <20260108112629.189670-2-s.rufinatscha@proxmox.com> Currently, the low-level ACME Request type is publicly exposed, even though users are expected to go through AcmeClient and proxmox-acme-api handlers. This patch reduces visibility so that the Request type and related fields/methods are crate-internal only. Signed-off-by: Samuel Rufinatscha --- proxmox-acme/src/account.rs | 94 ++----------------------------- proxmox-acme/src/async_client.rs | 2 +- proxmox-acme/src/authorization.rs | 30 ---------- proxmox-acme/src/client.rs | 6 +- proxmox-acme/src/lib.rs | 4 -- proxmox-acme/src/order.rs | 2 +- proxmox-acme/src/request.rs | 12 ++-- 7 files changed, 16 insertions(+), 134 deletions(-) diff --git a/proxmox-acme/src/account.rs b/proxmox-acme/src/account.rs index f763c1e9..d8eb3e73 100644 --- a/proxmox-acme/src/account.rs +++ b/proxmox-acme/src/account.rs @@ -8,12 +8,11 @@ use openssl::pkey::{PKey, Private}; use serde::{Deserialize, Serialize}; use serde_json::Value; -use crate::authorization::{Authorization, GetAuthorization}; use crate::b64u; use crate::directory::Directory; use crate::jws::Jws; use crate::key::{Jwk, PublicKey}; -use crate::order::{NewOrder, Order, OrderData}; +use crate::order::{NewOrder, OrderData}; use crate::request::Request; use crate::types::{AccountData, AccountStatus, ExternalAccountBinding}; use crate::Error; @@ -92,7 +91,7 @@ impl Account { } /// Prepare a "POST-as-GET" request to fetch data. Low level helper. - pub fn get_request(&self, url: &str, nonce: &str) -> Result { + pub(crate) fn get_request(&self, url: &str, nonce: &str) -> Result { let key = PKey::private_key_from_pem(self.private_key.as_bytes())?; let body = serde_json::to_string(&Jws::new_full( &key, @@ -112,7 +111,7 @@ impl Account { } /// Prepare a JSON POST request. Low level helper. - pub fn post_request( + pub(crate) fn post_request( &self, url: &str, nonce: &str, @@ -136,31 +135,6 @@ impl Account { }) } - /// Prepare a JSON POST request. - fn post_request_raw_payload( - &self, - url: &str, - nonce: &str, - payload: String, - ) -> Result { - let key = PKey::private_key_from_pem(self.private_key.as_bytes())?; - let body = serde_json::to_string(&Jws::new_full( - &key, - Some(self.location.clone()), - url.to_owned(), - nonce.to_owned(), - payload, - )?)?; - - Ok(Request { - url: url.to_owned(), - method: "POST", - content_type: crate::request::JSON_CONTENT_TYPE, - body, - expected: 200, - }) - } - /// Get the "key authorization" for a token. pub fn key_authorization(&self, token: &str) -> Result { let key = PKey::private_key_from_pem(self.private_key.as_bytes())?; @@ -176,64 +150,6 @@ impl Account { Ok(b64u::encode(digest)) } - /// Prepare a request to update account data. - /// - /// This is a rather low level interface. You should know what you're doing. - pub fn update_account_request( - &self, - nonce: &str, - data: &T, - ) -> Result { - self.post_request(&self.location, nonce, data) - } - - /// Prepare a request to deactivate this account. - pub fn deactivate_account_request(&self, nonce: &str) -> Result { - self.post_request_raw_payload( - &self.location, - nonce, - r#"{"status":"deactivated"}"#.to_string(), - ) - } - - /// Prepare a request to query an Authorization for an Order. - /// - /// Returns `Ok(None)` if `auth_index` is out of out of range. You can query the number of - /// authorizations from via [`Order::authorization_len`] or by manually inspecting its - /// `.data.authorization` vector. - pub fn get_authorization( - &self, - order: &Order, - auth_index: usize, - nonce: &str, - ) -> Result, Error> { - match order.authorization(auth_index) { - None => Ok(None), - Some(url) => Ok(Some(GetAuthorization::new(self.get_request(url, nonce)?))), - } - } - - /// Prepare a request to validate a Challenge from an Authorization. - /// - /// Returns `Ok(None)` if `challenge_index` is out of out of range. The challenge count is - /// available by inspecting the [`Authorization::challenges`] vector. - /// - /// This returns a raw `Request` since validation takes some time and the `Authorization` - /// object has to be re-queried and its `status` inspected. - pub fn validate_challenge( - &self, - authorization: &Authorization, - challenge_index: usize, - nonce: &str, - ) -> Result, Error> { - match authorization.challenges.get(challenge_index) { - None => Ok(None), - Some(challenge) => self - .post_request_raw_payload(&challenge.url, nonce, "{}".to_string()) - .map(Some), - } - } - /// Prepare a request to revoke a certificate. /// /// The certificate can be either PEM or DER formatted. @@ -274,7 +190,7 @@ pub struct CertificateRevocation<'a> { impl CertificateRevocation<'_> { /// Create the revocation request using the specified nonce for the given directory. - pub fn request(&self, directory: &Directory, nonce: &str) -> Result { + pub(crate) fn request(&self, directory: &Directory, nonce: &str) -> Result { let revoke_cert = directory.data.revoke_cert.as_ref().ok_or_else(|| { Error::Custom("no 'revokeCert' URL specified by provider".to_string()) })?; @@ -364,7 +280,7 @@ impl AccountCreator { /// the resulting request. /// Changing the private key between using the request and passing the response to /// [`response`](AccountCreator::response()) will render the account unusable! - pub fn request(&self, directory: &Directory, nonce: &str) -> Result { + pub(crate) fn request(&self, directory: &Directory, nonce: &str) -> Result { let key = self.key.as_deref().ok_or(Error::MissingKey)?; let url = directory.new_account_url().ok_or_else(|| { Error::Custom("no 'newAccount' URL specified by provider".to_string()) diff --git a/proxmox-acme/src/async_client.rs b/proxmox-acme/src/async_client.rs index dc755fb9..2ff3ba22 100644 --- a/proxmox-acme/src/async_client.rs +++ b/proxmox-acme/src/async_client.rs @@ -10,7 +10,7 @@ use proxmox_http::{client::Client, Body}; use crate::account::AccountCreator; use crate::order::{Order, OrderData}; -use crate::Request as AcmeRequest; +use crate::request::Request as AcmeRequest; use crate::{Account, Authorization, Challenge, Directory, Error, ErrorResponse}; /// A non-blocking Acme client using tokio/hyper. diff --git a/proxmox-acme/src/authorization.rs b/proxmox-acme/src/authorization.rs index 28bc1b4b..7027381a 100644 --- a/proxmox-acme/src/authorization.rs +++ b/proxmox-acme/src/authorization.rs @@ -6,8 +6,6 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use crate::order::Identifier; -use crate::request::Request; -use crate::Error; /// Status of an [`Authorization`]. #[derive(Clone, Copy, Debug, Eq, PartialEq, Deserialize, Serialize)] @@ -132,31 +130,3 @@ impl Challenge { fn is_false(b: &bool) -> bool { !*b } - -/// Represents an in-flight query for an authorization. -/// -/// This is created via [`Account::get_authorization`](crate::Account::get_authorization()). -pub struct GetAuthorization { - //order: OrderData, - /// The request to send to the ACME provider. This is wrapped in an option in order to allow - /// moving it out instead of copying the contents. - /// - /// When generated via [`Account::get_authorization`](crate::Account::get_authorization()), - /// this is guaranteed to be `Some`. - /// - /// The response should be passed to the the [`response`](GetAuthorization::response()) method. - pub request: Option, -} - -impl GetAuthorization { - pub(crate) fn new(request: Request) -> Self { - Self { - request: Some(request), - } - } - - /// Deal with the response we got from the server. - pub fn response(self, response_body: &[u8]) -> Result { - Ok(serde_json::from_slice(response_body)?) - } -} diff --git a/proxmox-acme/src/client.rs b/proxmox-acme/src/client.rs index 931f7245..5c812567 100644 --- a/proxmox-acme/src/client.rs +++ b/proxmox-acme/src/client.rs @@ -7,8 +7,8 @@ use serde::{Deserialize, Serialize}; use crate::b64u; use crate::error; use crate::order::OrderData; -use crate::request::ErrorResponse; -use crate::{Account, Authorization, Challenge, Directory, Error, Order, Request}; +use crate::request::{ErrorResponse, Request}; +use crate::{Account, Authorization, Challenge, Directory, Error, Order}; macro_rules! format_err { ($($fmt:tt)*) => { Error::Client(format!($($fmt)*)) }; @@ -564,7 +564,7 @@ impl Client { } /// Low-level API to run an n API request. This automatically updates the current nonce! - pub fn run_request(&mut self, request: Request) -> Result { + pub(crate) fn run_request(&mut self, request: Request) -> Result { self.inner.run_request(request) } diff --git a/proxmox-acme/src/lib.rs b/proxmox-acme/src/lib.rs index df722629..6722030c 100644 --- a/proxmox-acme/src/lib.rs +++ b/proxmox-acme/src/lib.rs @@ -66,10 +66,6 @@ pub use error::Error; #[doc(inline)] pub use order::Order; -#[cfg(feature = "impl")] -#[doc(inline)] -pub use request::Request; - // we don't inline these: #[cfg(feature = "impl")] pub use order::NewOrder; diff --git a/proxmox-acme/src/order.rs b/proxmox-acme/src/order.rs index b6551004..432a81a4 100644 --- a/proxmox-acme/src/order.rs +++ b/proxmox-acme/src/order.rs @@ -153,7 +153,7 @@ pub struct NewOrder { //order: OrderData, /// The request to execute to place the order. When creating a [`NewOrder`] via /// [`Account::new_order`](crate::Account::new_order) this is guaranteed to be `Some`. - pub request: Option, + pub(crate) request: Option, } impl NewOrder { diff --git a/proxmox-acme/src/request.rs b/proxmox-acme/src/request.rs index 78a90913..dadfc5af 100644 --- a/proxmox-acme/src/request.rs +++ b/proxmox-acme/src/request.rs @@ -4,21 +4,21 @@ pub(crate) const JSON_CONTENT_TYPE: &str = "application/jose+json"; pub(crate) const CREATED: u16 = 201; /// A request which should be performed on the ACME provider. -pub struct Request { +pub(crate) struct Request { /// The complete URL to send the request to. - pub url: String, + pub(crate) url: String, /// The HTTP method name to use. - pub method: &'static str, + pub(crate) method: &'static str, /// The `Content-Type` header to pass along. - pub content_type: &'static str, + pub(crate) content_type: &'static str, /// The body to pass along with request, or an empty string. - pub body: String, + pub(crate) body: String, /// The expected status code a compliant ACME provider will return on success. - pub expected: u16, + pub(crate) expected: u16, } /// An ACME error response contains a specially formatted type string, and can optionally -- 2.47.3 From s.rufinatscha at proxmox.com Thu Jan 8 12:26:25 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:25 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v5 1/5] acme: clean up ACME-related imports In-Reply-To: <20260108112629.189670-1-s.rufinatscha@proxmox.com> References: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Message-ID: <20260108112629.189670-6-s.rufinatscha@proxmox.com> Clean up ACME-related imports to make it easier to switch to the factored out proxmox/ ACME implementation later. Signed-off-by: Samuel Rufinatscha --- src/acme/plugin.rs | 3 +-- src/api2/config/acme.rs | 10 ++++------ src/api2/node/certificates.rs | 7 +++---- src/api2/types/acme.rs | 3 +-- src/bin/proxmox-backup-manager.rs | 12 +++++------- src/bin/proxmox-backup-proxy.rs | 14 ++++++-------- src/config/acme/mod.rs | 3 +-- src/config/acme/plugin.rs | 2 +- src/config/node.rs | 6 ++---- 9 files changed, 24 insertions(+), 36 deletions(-) diff --git a/src/acme/plugin.rs b/src/acme/plugin.rs index f756e9b5..993d729b 100644 --- a/src/acme/plugin.rs +++ b/src/acme/plugin.rs @@ -19,11 +19,10 @@ use tokio::net::TcpListener; use tokio::process::Command; use proxmox_acme::{Authorization, Challenge}; +use proxmox_rest_server::WorkerTask; use crate::acme::AcmeClient; use crate::api2::types::AcmeDomain; -use proxmox_rest_server::WorkerTask; - use crate::config::acme::plugin::{DnsPlugin, PluginData}; const PROXMOX_ACME_SH_PATH: &str = "/usr/share/proxmox-acme/proxmox-acme"; diff --git a/src/api2/config/acme.rs b/src/api2/config/acme.rs index 35c3fb77..18671639 100644 --- a/src/api2/config/acme.rs +++ b/src/api2/config/acme.rs @@ -10,22 +10,20 @@ use serde::{Deserialize, Serialize}; use serde_json::{json, Value}; use tracing::{info, warn}; +use pbs_api_types::{Authid, PRIV_SYS_MODIFY}; +use proxmox_acme::types::AccountData as AcmeAccountData; +use proxmox_acme::Account; +use proxmox_rest_server::WorkerTask; use proxmox_router::{ http_bail, list_subdirs_api_method, Permission, Router, RpcEnvironment, SubdirMap, }; use proxmox_schema::{api, param_bail}; -use proxmox_acme::types::AccountData as AcmeAccountData; -use proxmox_acme::Account; - -use pbs_api_types::{Authid, PRIV_SYS_MODIFY}; - use crate::acme::AcmeClient; use crate::api2::types::{AcmeAccountName, AcmeChallengeSchema, KnownAcmeDirectory}; use crate::config::acme::plugin::{ self, DnsPlugin, DnsPluginCore, DnsPluginCoreUpdater, PLUGIN_ID_SCHEMA, }; -use proxmox_rest_server::WorkerTask; pub(crate) const ROUTER: Router = Router::new() .get(&list_subdirs_api_method!(SUBDIRS)) diff --git a/src/api2/node/certificates.rs b/src/api2/node/certificates.rs index 61ef910e..6b1d87d2 100644 --- a/src/api2/node/certificates.rs +++ b/src/api2/node/certificates.rs @@ -5,23 +5,22 @@ use anyhow::{bail, format_err, Error}; use openssl::pkey::PKey; use openssl::x509::X509; use serde::{Deserialize, Serialize}; -use tracing::info; +use tracing::{info, warn}; +use pbs_api_types::{NODE_SCHEMA, PRIV_SYS_MODIFY}; +use proxmox_rest_server::WorkerTask; use proxmox_router::list_subdirs_api_method; use proxmox_router::SubdirMap; use proxmox_router::{Permission, Router, RpcEnvironment}; use proxmox_schema::api; -use pbs_api_types::{NODE_SCHEMA, PRIV_SYS_MODIFY}; use pbs_buildcfg::configdir; use pbs_tools::cert; -use tracing::warn; use crate::acme::AcmeClient; use crate::api2::types::AcmeDomain; use crate::config::node::NodeConfig; use crate::server::send_certificate_renewal_mail; -use proxmox_rest_server::WorkerTask; pub const ROUTER: Router = Router::new() .get(&list_subdirs_api_method!(SUBDIRS)) diff --git a/src/api2/types/acme.rs b/src/api2/types/acme.rs index 210ebdbc..8661f9e8 100644 --- a/src/api2/types/acme.rs +++ b/src/api2/types/acme.rs @@ -1,9 +1,8 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; -use proxmox_schema::{api, ApiStringFormat, ApiType, Schema, StringSchema}; - use pbs_api_types::{DNS_ALIAS_FORMAT, DNS_NAME_FORMAT, PROXMOX_SAFE_ID_FORMAT}; +use proxmox_schema::{api, ApiStringFormat, ApiType, Schema, StringSchema}; #[api( properties: { diff --git a/src/bin/proxmox-backup-manager.rs b/src/bin/proxmox-backup-manager.rs index d9f41353..f8365070 100644 --- a/src/bin/proxmox-backup-manager.rs +++ b/src/bin/proxmox-backup-manager.rs @@ -5,10 +5,6 @@ use std::str::FromStr; use anyhow::{format_err, Error}; use serde_json::{json, Value}; -use proxmox_router::{cli::*, RpcEnvironment}; -use proxmox_schema::api; -use proxmox_sys::fs::CreateOptions; - use pbs_api_types::percent_encoding::percent_encode_component; use pbs_api_types::{ BackupNamespace, GroupFilter, RateLimitConfig, SyncDirection, SyncJobConfig, DATASTORE_SCHEMA, @@ -18,12 +14,14 @@ use pbs_api_types::{ VERIFICATION_OUTDATED_AFTER_SCHEMA, VERIFY_JOB_READ_THREADS_SCHEMA, VERIFY_JOB_VERIFY_THREADS_SCHEMA, }; +use proxmox_rest_server::wait_for_local_worker; +use proxmox_router::{cli::*, RpcEnvironment}; +use proxmox_schema::api; +use proxmox_sys::fs::CreateOptions; + use pbs_client::{display_task_log, view_task_result}; use pbs_config::sync; use pbs_tools::json::required_string_param; - -use proxmox_rest_server::wait_for_local_worker; - use proxmox_backup::api2; use proxmox_backup::client_helpers::connect_to_localhost; use proxmox_backup::config; diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs index 92a8cb3c..870208fe 100644 --- a/src/bin/proxmox-backup-proxy.rs +++ b/src/bin/proxmox-backup-proxy.rs @@ -9,27 +9,25 @@ use hyper::http::request::Parts; use hyper::http::Response; use hyper::StatusCode; use hyper_util::server::graceful::GracefulShutdown; +use openssl::ssl::SslAcceptor; +use serde_json::{json, Value}; use tracing::level_filters::LevelFilter; use tracing::{info, warn}; use url::form_urlencoded; -use openssl::ssl::SslAcceptor; -use serde_json::{json, Value}; - use proxmox_http::Body; use proxmox_http::RateLimiterTag; use proxmox_lang::try_block; +use proxmox_rest_server::{ + cleanup_old_tasks, cookie_from_header, rotate_task_log_archive, ApiConfig, Redirector, + RestEnvironment, RestServer, WorkerTask, +}; use proxmox_router::{RpcEnvironment, RpcEnvironmentType}; use proxmox_sys::fs::CreateOptions; use proxmox_sys::logrotate::LogRotate; use pbs_datastore::DataStore; -use proxmox_rest_server::{ - cleanup_old_tasks, cookie_from_header, rotate_task_log_archive, ApiConfig, Redirector, - RestEnvironment, RestServer, WorkerTask, -}; - use proxmox_backup::{ server::{ auth::check_pbs_auth, diff --git a/src/config/acme/mod.rs b/src/config/acme/mod.rs index 274a23fd..ac89ae5e 100644 --- a/src/config/acme/mod.rs +++ b/src/config/acme/mod.rs @@ -5,11 +5,10 @@ use std::path::Path; use anyhow::{bail, format_err, Error}; use serde_json::Value; +use pbs_api_types::PROXMOX_SAFE_ID_REGEX; use proxmox_sys::error::SysError; use proxmox_sys::fs::{file_read_string, CreateOptions}; -use pbs_api_types::PROXMOX_SAFE_ID_REGEX; - use crate::api2::types::{AcmeAccountName, AcmeChallengeSchema, KnownAcmeDirectory}; pub(crate) const ACME_DIR: &str = pbs_buildcfg::configdir!("/acme"); diff --git a/src/config/acme/plugin.rs b/src/config/acme/plugin.rs index 18e71199..8ce852ec 100644 --- a/src/config/acme/plugin.rs +++ b/src/config/acme/plugin.rs @@ -4,10 +4,10 @@ use anyhow::Error; use serde::{Deserialize, Serialize}; use serde_json::Value; +use pbs_api_types::PROXMOX_SAFE_ID_FORMAT; use proxmox_schema::{api, ApiType, Schema, StringSchema, Updater}; use proxmox_section_config::{SectionConfig, SectionConfigData, SectionConfigPlugin}; -use pbs_api_types::PROXMOX_SAFE_ID_FORMAT; use pbs_config::{open_backup_lockfile, BackupLockGuard}; pub const PLUGIN_ID_SCHEMA: Schema = StringSchema::new("ACME Challenge Plugin ID.") diff --git a/src/config/node.rs b/src/config/node.rs index d2d6e383..253b2e36 100644 --- a/src/config/node.rs +++ b/src/config/node.rs @@ -4,14 +4,12 @@ use anyhow::{bail, Error}; use openssl::ssl::{SslAcceptor, SslMethod}; use serde::{Deserialize, Serialize}; -use proxmox_schema::{api, ApiStringFormat, ApiType, Updater}; - -use proxmox_http::ProxyConfig; - use pbs_api_types::{ EMAIL_SCHEMA, MULTI_LINE_COMMENT_SCHEMA, OPENSSL_CIPHERS_TLS_1_2_SCHEMA, OPENSSL_CIPHERS_TLS_1_3_SCHEMA, }; +use proxmox_http::ProxyConfig; +use proxmox_schema::{api, ApiStringFormat, ApiType, Updater}; use pbs_buildcfg::configdir; use pbs_config::{open_backup_lockfile, BackupLockGuard}; -- 2.47.3 From s.rufinatscha at proxmox.com Thu Jan 8 12:26:20 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:20 +0100 Subject: [pbs-devel] [PATCH proxmox{, -backup} v5 0/9] fix #6939: acme: support servers returning 204 for nonce requests Message-ID: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Hi, this series fixes account registration for ACME providers that return HTTP 204 No Content to the newNonce request. Currently, both the PBS ACME client and the shared ACME client in proxmox-acme only accept HTTP 200 OK for this request. The issue was observed in PBS against a custom ACME deployment and reported as bug #6939 [1]. ## Problem During ACME account registration, PBS first fetches an anti-replay nonce by sending a HEAD request to the CA?s newNonce URL. RFC 8555 ?7.2 [2] states that: * the server MUST include a Replay-Nonce header with a fresh nonce, * the server SHOULD use status 200 OK for the HEAD request, * the server MUST also handle GET on the same resource and may return 204 No Content with an empty body. The reporter observed the following error message: *ACME server responded with unexpected status code: 204* and mentioned that the issue did not appear with PVE 9 [1]. Looking at PVE?s Perl ACME client [3], it uses a GET request instead of HEAD and accepts any 2xx success code when retrieving the nonce. This difference in behavior does not affect functionality but is worth noting for consistency across implementations. ## Approach To support ACME providers which return 204 No Content, the Rust ACME clients in proxmox-backup and proxmox need to treat both 200 OK and 204 No Content as valid responses for the nonce request, as long as a Replay-Nonce header is present. This series changes the expected field of the internal Request type from a single u16 to a list of allowed status codes (e.g. &'static [u16]), so one request can explicitly accept multiple success codes. To avoid fixing the issue twice (once in PBS? own ACME client and once in the shared Rust client), this series first refactors PBS to use the shared AcmeClient from proxmox-acme / proxmox-acme-api, similar to PDM, and then applies the bug fix in that shared implementation so that all consumers benefit from the more tolerant behavior. ## Testing *Testing the refactor* To test the refactor, I (1) installed latest stable PBS on a VM (2) created .deb package from latest PBS (master), containing the refactor (3) installed created .deb package (4) installed Pebble from Let's Encrypt [5] on the same VM (5) created an ACME account and ordered the new certificate for the host domain. Steps to reproduce: (1) install latest stable PBS on a VM, create .deb package from latest PBS (master) containing the refactor, install created .deb package (2) install Pebble from Let's Encrypt [5] on the same VM: cd apt update apt install -y golang git git clone https://github.com/letsencrypt/pebble cd pebble go build ./cmd/pebble then, download and trust the Pebble cert: wget https://raw.githubusercontent.com/letsencrypt/pebble/main/test/certs/pebble.minica.pem cp pebble.minica.pem /usr/local/share/ca-certificates/pebble.minica.crt update-ca-certificates We want Pebble to perform HTTP-01 validation against port 80, because PBS?s standalone plugin will bind port 80. Set httpPort to 80. nano ./test/config/pebble-config.json Start the Pebble server in the background: ./pebble -config ./test/config/pebble-config.json & Create a Pebble ACME account: proxmox-backup-manager acme account register default admin at example.com --directory 'https://127.0.0.1:14000/dir' To verify persistence of the account I checked ls /etc/proxmox-backup/acme/accounts Verified if update-account works proxmox-backup-manager acme account update default --contact "a at example.com,b at example.com" proxmox-backup-manager acme account info default In the PBS GUI, you can create a new domain. You can use your host domain name (see /etc/hosts). Select the created account and order the certificate. After a page reload, you might need to accept the new certificate in the browser. In the PBS dashboard, you should see the new Pebble certificate. *Note: on reboot, the created Pebble ACME account will be gone and you will need to create a new one. Pebble does not persist account info. In that case remove the previously created account in /etc/proxmox-backup/acme/accounts. *Testing the newNonce fix* To prove the ACME newNonce fix, I put nginx in front of Pebble, to intercept the newNonce request in order to return 204 No Content instead of 200 OK, all other requests are unchanged and forwarded to Pebble. Requires trusting the nginx CAs via /usr/local/share/ca-certificates + update-ca-certificates on the VM. Then I ran following command against nginx: proxmox-backup-manager acme account register proxytest root at backup.local --directory 'https://nginx-address/dir The account could be created successfully. When adjusting the nginx configuration to return any other non-expected success status code, PBS rejects as expected. ## Patch summary 0001 ? [PATCH proxmox v5 1/4] acme: reduce visibility of Request type Restricts the visibility of the low-level Request type. Consumers should rely on proxmox-acme-api or AcmeClient handlers. 0002? [PATCH proxmox v5 2/4] acme: introduce http_status module 0003 ? [PATCH proxmox v5 3/4] fix #6939: acme: support servers returning 204 for nonce requests Adjusts nonce handling to support ACME servers that return HTTP 204 (No Content) for new-nonce requests. 0004 ? [PATCH proxmox v5 4/4] acme-api: add helper to load client for an account Introduces a helper function to load an ACME client instance for a given account. Required for the following PBS ACME refactor. 0005 ? [PATCH proxmox-backup v5 1/5] acme: clean up ACME-related imports 0006 ? [PATCH proxmox-backup v5 2/5] acme: include proxmox-acme-api dependency Prepares the codebase to use the factored out ACME API impl. 0007 ? [PATCH proxmox-backup v5 3/5] acme: drop local AcmeClient Removes the local AcmeClient implementation. Represents the minimal set of changes to replace it with the factored out AcmeClient. 0008 ? [PATCH proxmox-backup v5 4/5] acme: change API impls to use proxmox-acme-api handlers 0009 ? [PATCH proxmox-backup v5 5/5] acme: certificate ordering through proxmox-acme-api Thanks for considering this patch series, I look forward to your feedback. Best, Samuel Rufinatscha ## Changelog Changes from v4 to v5: * rebased series * re-ordered series (proxmox-acme fix first) * proxmox-backup: cleaned up imports based on an initial clean-up patch * proxmox-acme: removed now unused post_request_raw_payload(), update_account_request(), deactivate_account_request() * proxmox-acme: removed now obsolete/unused get_authorization() and GetAuthorization impl Verified removal by compiling PBS, PDM, and proxmox-perl-rs with all features. Changes from v3 to v4: * add proxmox-acme-api as a dependency and initialize it in PBS so PBS can use the shared ACME API instead. * remove the PBS-local AcmeClient implementation and switch PBS over to the shared proxmox-acme async client. * rework PBS? ACME API endpoints to delegate to proxmox-acme-api handlers instead of duplicating logic locally. * move PBS? ACME certificate ordering logic over to proxmox-acme-api, keeping only certificate installation/reload in PBS. * add a load_client_with_account helper in proxmox-acme-api so PBS (and others) can construct an AcmeClient for a configured account without duplicating boilerplate. * hide the low-level Request type and its fields behind constructors / reduced visibility so changes to ?expected? no longer affect the public API as they did in v3. * split out the HTTP status constants into an internal http_status module as a separate preparatory cleanup before the bug fix, instead of doing this inline like in v3. * Rebased on top of the refactor: keep the same behavioural fix as in v3 accept 204 for newNonce with Replay-Nonce present), but implement it on top of the http_status module that is part of the refactor. Changes from v2 to v3: * rename `http_success` module to `http_status` * replace `http_success` usage * introduced `http_success` module to contain the http success codes * replaced `Vec` with `&[u16]` for expected codes to avoid allocations. * clarified the PVEs Perl ACME client behaviour in the commit message. * integrated the `http_success` module, replacing `Vec` with `&[u16]` * clarified the PVEs Perl ACME client behaviour in the commit message. [1] Bugzilla report #6939: [https://bugzilla.proxmox.com/show_bug.cgi?id=6939](https://bugzilla.proxmox.com/show_bug.cgi?id=6939) [2] RFC 8555 (ACME): [https://datatracker.ietf.org/doc/html/rfc8555/#section-7.2](https://datatracker.ietf.org/doc/html/rfc8555/#section-7.2) [3] PVE?s Perl ACME client (allow 2xx codes for nonce requests): [https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l597](https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l597) [4] Pebble ACME server: [https://github.com/letsencrypt/pebble](https://github.com/letsencrypt/pebble) [5] Pebble ACME server (perform GET request: [https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l219](https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l219) proxmox: Samuel Rufinatscha (4): acme: reduce visibility of Request type acme: introduce http_status module fix #6939: acme: support servers returning 204 for nonce requests acme-api: add helper to load client for an account proxmox-acme-api/src/account_api_impl.rs | 5 ++ proxmox-acme-api/src/lib.rs | 3 +- proxmox-acme/src/account.rs | 102 ++--------------------- proxmox-acme/src/async_client.rs | 8 +- proxmox-acme/src/authorization.rs | 30 ------- proxmox-acme/src/client.rs | 8 +- proxmox-acme/src/lib.rs | 6 +- proxmox-acme/src/order.rs | 2 +- proxmox-acme/src/request.rs | 25 ++++-- 9 files changed, 44 insertions(+), 145 deletions(-) proxmox-backup: Samuel Rufinatscha (5): acme: clean up ACME-related imports acme: include proxmox-acme-api dependency acme: drop local AcmeClient acme: change API impls to use proxmox-acme-api handlers acme: certificate ordering through proxmox-acme-api Cargo.toml | 3 + src/acme/client.rs | 691 ------------------------- src/acme/mod.rs | 5 - src/acme/plugin.rs | 336 ------------ src/api2/config/acme.rs | 406 ++------------- src/api2/node/certificates.rs | 232 ++------- src/api2/types/acme.rs | 98 ---- src/api2/types/mod.rs | 3 - src/bin/proxmox-backup-api.rs | 2 + src/bin/proxmox-backup-manager.rs | 14 +- src/bin/proxmox-backup-proxy.rs | 15 +- src/bin/proxmox_backup_manager/acme.rs | 21 +- src/config/acme/mod.rs | 55 +- src/config/acme/plugin.rs | 92 +--- src/config/node.rs | 31 +- src/lib.rs | 2 - 16 files changed, 109 insertions(+), 1897 deletions(-) delete mode 100644 src/acme/client.rs delete mode 100644 src/acme/mod.rs delete mode 100644 src/acme/plugin.rs delete mode 100644 src/api2/types/acme.rs Summary over all repositories: 25 files changed, 153 insertions(+), 2042 deletions(-) -- Generated by git-murpp 0.8.1 From s.rufinatscha at proxmox.com Thu Jan 8 12:26:27 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:26:27 +0100 Subject: [pbs-devel] [PATCH proxmox-backup v5 3/5] acme: drop local AcmeClient In-Reply-To: <20260108112629.189670-1-s.rufinatscha@proxmox.com> References: <20260108112629.189670-1-s.rufinatscha@proxmox.com> Message-ID: <20260108112629.189670-8-s.rufinatscha@proxmox.com> PBS currently uses its own ACME client and API logic, while PDM uses the factored out proxmox-acme and proxmox-acme-api crates. This duplication risks differences in behaviour and requires ACME maintenance in two places. This patch is part of a series to move PBS over to the shared ACME stack. Changes: - Remove the local src/acme/client.rs and switch to proxmox_acme::async_client::AcmeClient where needed. - Use proxmox_acme_api::load_client_with_account to the custom AcmeClient::load() function - Replace the local do_register() logic with proxmox_acme_api::register_account, to further ensure accounts are persisted - Replace the local AcmeAccountName type, required for proxmox_acme_api::register_account Signed-off-by: Samuel Rufinatscha --- src/acme/client.rs | 691 ------------------------- src/acme/mod.rs | 3 - src/acme/plugin.rs | 2 +- src/api2/config/acme.rs | 50 +- src/api2/node/certificates.rs | 2 +- src/api2/types/acme.rs | 8 - src/bin/proxmox_backup_manager/acme.rs | 17 +- src/config/acme/mod.rs | 8 +- src/config/node.rs | 9 +- 9 files changed, 36 insertions(+), 754 deletions(-) delete mode 100644 src/acme/client.rs diff --git a/src/acme/client.rs b/src/acme/client.rs deleted file mode 100644 index 9fb6ad55..00000000 --- a/src/acme/client.rs +++ /dev/null @@ -1,691 +0,0 @@ -//! HTTP Client for the ACME protocol. - -use std::fs::OpenOptions; -use std::io; -use std::os::unix::fs::OpenOptionsExt; - -use anyhow::{bail, format_err}; -use bytes::Bytes; -use http_body_util::BodyExt; -use hyper::Request; -use nix::sys::stat::Mode; -use proxmox_http::Body; -use serde::{Deserialize, Serialize}; - -use proxmox_acme::account::AccountCreator; -use proxmox_acme::order::{Order, OrderData}; -use proxmox_acme::types::AccountData as AcmeAccountData; -use proxmox_acme::Request as AcmeRequest; -use proxmox_acme::{Account, Authorization, Challenge, Directory, Error, ErrorResponse}; -use proxmox_http::client::Client; -use proxmox_sys::fs::{replace_file, CreateOptions}; - -use crate::api2::types::AcmeAccountName; -use crate::config::acme::account_path; -use crate::tools::pbs_simple_http; - -/// Our on-disk format inherited from PVE's proxmox-acme code. -#[derive(Deserialize, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct AccountData { - /// The account's location URL. - location: String, - - /// The account data. - account: AcmeAccountData, - - /// The private key as PEM formatted string. - key: String, - - /// ToS URL the user agreed to. - #[serde(skip_serializing_if = "Option::is_none")] - tos: Option, - - #[serde(skip_serializing_if = "is_false", default)] - debug: bool, - - /// The directory's URL. - directory_url: String, -} - -#[inline] -fn is_false(b: &bool) -> bool { - !*b -} - -pub struct AcmeClient { - directory_url: String, - debug: bool, - account_path: Option, - tos: Option, - account: Option, - directory: Option, - nonce: Option, - http_client: Client, -} - -impl AcmeClient { - /// Create a new ACME client for a given ACME directory URL. - pub fn new(directory_url: String) -> Self { - Self { - directory_url, - debug: false, - account_path: None, - tos: None, - account: None, - directory: None, - nonce: None, - http_client: pbs_simple_http(None), - } - } - - /// Load an existing ACME account by name. - pub async fn load(account_name: &AcmeAccountName) -> Result { - let account_path = account_path(account_name.as_ref()); - let data = match tokio::fs::read(&account_path).await { - Ok(data) => data, - Err(err) if err.kind() == io::ErrorKind::NotFound => { - bail!("acme account '{}' does not exist", account_name) - } - Err(err) => bail!( - "failed to load acme account from '{}' - {}", - account_path, - err - ), - }; - let data: AccountData = serde_json::from_slice(&data).map_err(|err| { - format_err!( - "failed to parse acme account from '{}' - {}", - account_path, - err - ) - })?; - - let account = Account::from_parts(data.location, data.key, data.account); - - let mut me = Self::new(data.directory_url); - me.debug = data.debug; - me.account_path = Some(account_path); - me.tos = data.tos; - me.account = Some(account); - - Ok(me) - } - - pub async fn new_account<'a>( - &'a mut self, - account_name: &AcmeAccountName, - tos_agreed: bool, - contact: Vec, - rsa_bits: Option, - eab_creds: Option<(String, String)>, - ) -> Result<&'a Account, anyhow::Error> { - self.tos = if tos_agreed { - self.terms_of_service_url().await?.map(str::to_owned) - } else { - None - }; - - let mut account = Account::creator() - .set_contacts(contact) - .agree_to_tos(tos_agreed); - - if let Some((eab_kid, eab_hmac_key)) = eab_creds { - account = account.set_eab_credentials(eab_kid, eab_hmac_key)?; - } - - let account = if let Some(bits) = rsa_bits { - account.generate_rsa_key(bits)? - } else { - account.generate_ec_key()? - }; - - let _ = self.register_account(account).await?; - - crate::config::acme::make_acme_account_dir()?; - let account_path = account_path(account_name.as_ref()); - let file = OpenOptions::new() - .write(true) - .create_new(true) - .mode(0o600) - .open(&account_path) - .map_err(|err| format_err!("failed to open {:?} for writing: {}", account_path, err))?; - self.write_to(file).map_err(|err| { - format_err!( - "failed to write acme account to {:?}: {}", - account_path, - err - ) - })?; - self.account_path = Some(account_path); - - // unwrap: Setting `self.account` is literally this function's job, we just can't keep - // the borrow from from `self.register_account()` active due to clashes. - Ok(self.account.as_ref().unwrap()) - } - - fn save(&self) -> Result<(), anyhow::Error> { - let mut data = Vec::::new(); - self.write_to(&mut data)?; - let account_path = self.account_path.as_ref().ok_or_else(|| { - format_err!("no account path set, cannot save updated account information") - })?; - crate::config::acme::make_acme_account_dir()?; - replace_file( - account_path, - &data, - CreateOptions::new() - .perm(Mode::from_bits_truncate(0o600)) - .owner(nix::unistd::ROOT) - .group(nix::unistd::Gid::from_raw(0)), - true, - ) - } - - /// Shortcut to `account().ok_or_else(...).key_authorization()`. - pub fn key_authorization(&self, token: &str) -> Result { - Ok(Self::need_account(&self.account)?.key_authorization(token)?) - } - - /// Shortcut to `account().ok_or_else(...).dns_01_txt_value()`. - /// the key authorization value. - pub fn dns_01_txt_value(&self, token: &str) -> Result { - Ok(Self::need_account(&self.account)?.dns_01_txt_value(token)?) - } - - async fn register_account( - &mut self, - account: AccountCreator, - ) -> Result<&Account, anyhow::Error> { - let mut retry = retry(); - let mut response = loop { - retry.tick()?; - - let (directory, nonce) = Self::get_dir_nonce( - &mut self.http_client, - &self.directory_url, - &mut self.directory, - &mut self.nonce, - ) - .await?; - let request = account.request(directory, nonce)?; - match self.run_request(request).await { - Ok(response) => break response, - Err(err) if err.is_bad_nonce() => continue, - Err(err) => return Err(err.into()), - } - }; - - let account = account.response(response.location_required()?, &response.body)?; - - self.account = Some(account); - Ok(self.account.as_ref().unwrap()) - } - - pub async fn update_account( - &mut self, - data: &T, - ) -> Result<&Account, anyhow::Error> { - let account = Self::need_account(&self.account)?; - - let mut retry = retry(); - let response = loop { - retry.tick()?; - - let (_directory, nonce) = Self::get_dir_nonce( - &mut self.http_client, - &self.directory_url, - &mut self.directory, - &mut self.nonce, - ) - .await?; - - let request = account.post_request(&account.location, nonce, data)?; - match Self::execute(&mut self.http_client, request, &mut self.nonce).await { - Ok(response) => break response, - Err(err) if err.is_bad_nonce() => continue, - Err(err) => return Err(err.into()), - } - }; - - // unwrap: we've been keeping an immutable reference to it from the top of the method - let _ = account; - self.account.as_mut().unwrap().data = response.json()?; - self.save()?; - Ok(self.account.as_ref().unwrap()) - } - - pub async fn new_order(&mut self, domains: I) -> Result - where - I: IntoIterator, - { - let account = Self::need_account(&self.account)?; - - let order = domains - .into_iter() - .fold(OrderData::new(), |order, domain| order.domain(domain)); - - let mut retry = retry(); - loop { - retry.tick()?; - - let (directory, nonce) = Self::get_dir_nonce( - &mut self.http_client, - &self.directory_url, - &mut self.directory, - &mut self.nonce, - ) - .await?; - - let mut new_order = account.new_order(&order, directory, nonce)?; - let mut response = match Self::execute( - &mut self.http_client, - new_order.request.take().unwrap(), - &mut self.nonce, - ) - .await - { - Ok(response) => response, - Err(err) if err.is_bad_nonce() => continue, - Err(err) => return Err(err.into()), - }; - - return Ok( - new_order.response(response.location_required()?, response.bytes().as_ref())? - ); - } - } - - /// Low level "POST-as-GET" request. - async fn post_as_get(&mut self, url: &str) -> Result { - let account = Self::need_account(&self.account)?; - - let mut retry = retry(); - loop { - retry.tick()?; - - let (_directory, nonce) = Self::get_dir_nonce( - &mut self.http_client, - &self.directory_url, - &mut self.directory, - &mut self.nonce, - ) - .await?; - - let request = account.get_request(url, nonce)?; - match Self::execute(&mut self.http_client, request, &mut self.nonce).await { - Ok(response) => return Ok(response), - Err(err) if err.is_bad_nonce() => continue, - Err(err) => return Err(err.into()), - } - } - } - - /// Low level POST request. - async fn post( - &mut self, - url: &str, - data: &T, - ) -> Result { - let account = Self::need_account(&self.account)?; - - let mut retry = retry(); - loop { - retry.tick()?; - - let (_directory, nonce) = Self::get_dir_nonce( - &mut self.http_client, - &self.directory_url, - &mut self.directory, - &mut self.nonce, - ) - .await?; - - let request = account.post_request(url, nonce, data)?; - match Self::execute(&mut self.http_client, request, &mut self.nonce).await { - Ok(response) => return Ok(response), - Err(err) if err.is_bad_nonce() => continue, - Err(err) => return Err(err.into()), - } - } - } - - /// Request challenge validation. Afterwards, the challenge should be polled. - pub async fn request_challenge_validation( - &mut self, - url: &str, - ) -> Result { - Ok(self - .post(url, &serde_json::Value::Object(Default::default())) - .await? - .json()?) - } - - /// Assuming the provided URL is an 'Authorization' URL, get and deserialize it. - pub async fn get_authorization(&mut self, url: &str) -> Result { - Ok(self.post_as_get(url).await?.json()?) - } - - /// Assuming the provided URL is an 'Order' URL, get and deserialize it. - pub async fn get_order(&mut self, url: &str) -> Result { - Ok(self.post_as_get(url).await?.json()?) - } - - /// Finalize an Order via its `finalize` URL property and the DER encoded CSR. - pub async fn finalize(&mut self, url: &str, csr: &[u8]) -> Result<(), anyhow::Error> { - let csr = proxmox_base64::url::encode_no_pad(csr); - let data = serde_json::json!({ "csr": csr }); - self.post(url, &data).await?; - Ok(()) - } - - /// Download a certificate via its 'certificate' URL property. - /// - /// The certificate will be a PEM certificate chain. - pub async fn get_certificate(&mut self, url: &str) -> Result { - Ok(self.post_as_get(url).await?.body) - } - - /// Revoke an existing certificate (PEM or DER formatted). - pub async fn revoke_certificate( - &mut self, - certificate: &[u8], - reason: Option, - ) -> Result<(), anyhow::Error> { - // TODO: This can also work without an account. - let account = Self::need_account(&self.account)?; - - let revocation = account.revoke_certificate(certificate, reason)?; - - let mut retry = retry(); - loop { - retry.tick()?; - - let (directory, nonce) = Self::get_dir_nonce( - &mut self.http_client, - &self.directory_url, - &mut self.directory, - &mut self.nonce, - ) - .await?; - - let request = revocation.request(directory, nonce)?; - match Self::execute(&mut self.http_client, request, &mut self.nonce).await { - Ok(_response) => return Ok(()), - Err(err) if err.is_bad_nonce() => continue, - Err(err) => return Err(err.into()), - } - } - } - - fn need_account(account: &Option) -> Result<&Account, anyhow::Error> { - account - .as_ref() - .ok_or_else(|| format_err!("cannot use client without an account")) - } - - pub(crate) fn account(&self) -> Result<&Account, anyhow::Error> { - Self::need_account(&self.account) - } - - pub fn tos(&self) -> Option<&str> { - self.tos.as_deref() - } - - pub fn directory_url(&self) -> &str { - &self.directory_url - } - - fn to_account_data(&self) -> Result { - let account = self.account()?; - - Ok(AccountData { - location: account.location.clone(), - key: account.private_key.clone(), - account: AcmeAccountData { - only_return_existing: false, // don't actually write this out in case it's set - ..account.data.clone() - }, - tos: self.tos.clone(), - debug: self.debug, - directory_url: self.directory_url.clone(), - }) - } - - fn write_to(&self, out: T) -> Result<(), anyhow::Error> { - let data = self.to_account_data()?; - - Ok(serde_json::to_writer_pretty(out, &data)?) - } -} - -struct AcmeResponse { - body: Bytes, - location: Option, - got_nonce: bool, -} - -impl AcmeResponse { - /// Convenience helper to assert that a location header was part of the response. - fn location_required(&mut self) -> Result { - self.location - .take() - .ok_or_else(|| format_err!("missing Location header")) - } - - /// Convenience shortcut to perform json deserialization of the returned body. - fn json Deserialize<'a>>(&self) -> Result { - Ok(serde_json::from_slice(&self.body)?) - } - - /// Convenience shortcut to get the body as bytes. - fn bytes(&self) -> &[u8] { - &self.body - } -} - -impl AcmeClient { - /// Non-self-borrowing run_request version for borrow workarounds. - async fn execute( - http_client: &mut Client, - request: AcmeRequest, - nonce: &mut Option, - ) -> Result { - let req_builder = Request::builder().method(request.method).uri(&request.url); - - let http_request = if !request.content_type.is_empty() { - req_builder - .header("Content-Type", request.content_type) - .header("Content-Length", request.body.len()) - .body(request.body.into()) - } else { - req_builder.body(Body::empty()) - } - .map_err(|err| Error::Custom(format!("failed to create http request: {err}")))?; - - let response = http_client - .request(http_request) - .await - .map_err(|err| Error::Custom(err.to_string()))?; - let (parts, body) = response.into_parts(); - - let status = parts.status.as_u16(); - let body = body - .collect() - .await - .map_err(|err| Error::Custom(format!("failed to retrieve response body: {err}")))? - .to_bytes(); - - let got_nonce = if let Some(new_nonce) = parts.headers.get(proxmox_acme::REPLAY_NONCE) { - let new_nonce = new_nonce.to_str().map_err(|err| { - Error::Client(format!( - "received invalid replay-nonce header from ACME server: {err}" - )) - })?; - *nonce = Some(new_nonce.to_owned()); - true - } else { - false - }; - - if parts.status.is_success() { - if status != request.expected { - return Err(Error::InvalidApi(format!( - "ACME server responded with unexpected status code: {:?}", - parts.status - ))); - } - - let location = parts - .headers - .get("Location") - .map(|header| { - header.to_str().map(str::to_owned).map_err(|err| { - Error::Client(format!( - "received invalid location header from ACME server: {err}" - )) - }) - }) - .transpose()?; - - return Ok(AcmeResponse { - body, - location, - got_nonce, - }); - } - - let error: ErrorResponse = serde_json::from_slice(&body).map_err(|err| { - Error::Client(format!( - "error status with improper error ACME response: {err}" - )) - })?; - - if error.ty == proxmox_acme::error::BAD_NONCE { - if !got_nonce { - return Err(Error::InvalidApi( - "badNonce without a new Replay-Nonce header".to_string(), - )); - } - return Err(Error::BadNonce); - } - - Err(Error::Api(error)) - } - - /// Low-level API to run an n API request. This automatically updates the current nonce! - async fn run_request(&mut self, request: AcmeRequest) -> Result { - Self::execute(&mut self.http_client, request, &mut self.nonce).await - } - - pub async fn directory(&mut self) -> Result<&Directory, Error> { - Ok(Self::get_directory( - &mut self.http_client, - &self.directory_url, - &mut self.directory, - &mut self.nonce, - ) - .await? - .0) - } - - async fn get_directory<'a, 'b>( - http_client: &mut Client, - directory_url: &str, - directory: &'a mut Option, - nonce: &'b mut Option, - ) -> Result<(&'a Directory, Option<&'b str>), Error> { - if let Some(d) = directory { - return Ok((d, nonce.as_deref())); - } - - let response = Self::execute( - http_client, - AcmeRequest { - url: directory_url.to_string(), - method: "GET", - content_type: "", - body: String::new(), - expected: 200, - }, - nonce, - ) - .await?; - - *directory = Some(Directory::from_parts( - directory_url.to_string(), - response.json()?, - )); - - Ok((directory.as_mut().unwrap(), nonce.as_deref())) - } - - /// Like `get_directory`, but if the directory provides no nonce, also performs a `HEAD` - /// request on the new nonce URL. - async fn get_dir_nonce<'a, 'b>( - http_client: &mut Client, - directory_url: &str, - directory: &'a mut Option, - nonce: &'b mut Option, - ) -> Result<(&'a Directory, &'b str), Error> { - // this let construct is a lifetime workaround: - let _ = Self::get_directory(http_client, directory_url, directory, nonce).await?; - let dir = directory.as_ref().unwrap(); // the above fails if it couldn't fill this option - if nonce.is_none() { - // this is also a lifetime issue... - let _ = Self::get_nonce(http_client, nonce, dir.new_nonce_url()).await?; - }; - Ok((dir, nonce.as_deref().unwrap())) - } - - pub async fn terms_of_service_url(&mut self) -> Result, Error> { - Ok(self.directory().await?.terms_of_service_url()) - } - - async fn get_nonce<'a>( - http_client: &mut Client, - nonce: &'a mut Option, - new_nonce_url: &str, - ) -> Result<&'a str, Error> { - let response = Self::execute( - http_client, - AcmeRequest { - url: new_nonce_url.to_owned(), - method: "HEAD", - content_type: "", - body: String::new(), - expected: 200, - }, - nonce, - ) - .await?; - - if !response.got_nonce { - return Err(Error::InvalidApi( - "no new nonce received from new nonce URL".to_string(), - )); - } - - nonce - .as_deref() - .ok_or_else(|| Error::Client("failed to update nonce".to_string())) - } -} - -/// bad nonce retry count helper -struct Retry(usize); - -const fn retry() -> Retry { - Retry(0) -} - -impl Retry { - fn tick(&mut self) -> Result<(), Error> { - if self.0 >= 3 { - Err(Error::Client("kept getting a badNonce error!".to_string())) - } else { - self.0 += 1; - Ok(()) - } - } -} diff --git a/src/acme/mod.rs b/src/acme/mod.rs index bf61811c..cc561f9a 100644 --- a/src/acme/mod.rs +++ b/src/acme/mod.rs @@ -1,5 +1,2 @@ -mod client; -pub use client::AcmeClient; - pub(crate) mod plugin; pub(crate) use plugin::get_acme_plugin; diff --git a/src/acme/plugin.rs b/src/acme/plugin.rs index 993d729b..6804243c 100644 --- a/src/acme/plugin.rs +++ b/src/acme/plugin.rs @@ -18,10 +18,10 @@ use tokio::io::{AsyncBufReadExt, AsyncRead, AsyncWriteExt, BufReader}; use tokio::net::TcpListener; use tokio::process::Command; +use proxmox_acme::async_client::AcmeClient; use proxmox_acme::{Authorization, Challenge}; use proxmox_rest_server::WorkerTask; -use crate::acme::AcmeClient; use crate::api2::types::AcmeDomain; use crate::config::acme::plugin::{DnsPlugin, PluginData}; diff --git a/src/api2/config/acme.rs b/src/api2/config/acme.rs index 18671639..898f06dd 100644 --- a/src/api2/config/acme.rs +++ b/src/api2/config/acme.rs @@ -11,16 +11,16 @@ use serde_json::{json, Value}; use tracing::{info, warn}; use pbs_api_types::{Authid, PRIV_SYS_MODIFY}; +use proxmox_acme::async_client::AcmeClient; use proxmox_acme::types::AccountData as AcmeAccountData; -use proxmox_acme::Account; +use proxmox_acme_api::AcmeAccountName; use proxmox_rest_server::WorkerTask; use proxmox_router::{ http_bail, list_subdirs_api_method, Permission, Router, RpcEnvironment, SubdirMap, }; use proxmox_schema::{api, param_bail}; -use crate::acme::AcmeClient; -use crate::api2::types::{AcmeAccountName, AcmeChallengeSchema, KnownAcmeDirectory}; +use crate::api2::types::{AcmeChallengeSchema, KnownAcmeDirectory}; use crate::config::acme::plugin::{ self, DnsPlugin, DnsPluginCore, DnsPluginCoreUpdater, PLUGIN_ID_SCHEMA, }; @@ -141,15 +141,15 @@ pub struct AccountInfo { )] /// Return existing ACME account information. pub async fn get_account(name: AcmeAccountName) -> Result { - let client = AcmeClient::load(&name).await?; - let account = client.account()?; + let account_info = proxmox_acme_api::get_account(name).await?; + Ok(AccountInfo { - location: account.location.clone(), - tos: client.tos().map(str::to_owned), - directory: client.directory_url().to_owned(), + location: account_info.location, + tos: account_info.tos, + directory: account_info.directory, account: AcmeAccountData { only_return_existing: false, // don't actually write this out in case it's set - ..account.data.clone() + ..account_info.account }, }) } @@ -238,41 +238,24 @@ fn register_account( auth_id.to_string(), true, move |_worker| async move { - let mut client = AcmeClient::new(directory); - info!("Registering ACME account '{}'...", &name); - let account = do_register_account( - &mut client, + let location = proxmox_acme_api::register_account( &name, - tos_url.is_some(), contact, - None, + tos_url, + Some(directory), eab_kid.zip(eab_hmac_key), ) .await?; - info!("Registration successful, account URL: {}", account.location); + info!("Registration successful, account URL: {}", location); Ok(()) }, ) } -pub async fn do_register_account<'a>( - client: &'a mut AcmeClient, - name: &AcmeAccountName, - agree_to_tos: bool, - contact: String, - rsa_bits: Option, - eab_creds: Option<(String, String)>, -) -> Result<&'a Account, Error> { - let contact = account_contact_from_string(&contact); - client - .new_account(name, agree_to_tos, contact, rsa_bits, eab_creds) - .await -} - #[api( input: { properties: { @@ -310,7 +293,10 @@ pub fn update_account( None => json!({}), }; - AcmeClient::load(&name).await?.update_account(&data).await?; + proxmox_acme_api::load_client_with_account(&name) + .await? + .update_account(&data) + .await?; Ok(()) }, @@ -348,7 +334,7 @@ pub fn deactivate_account( auth_id.to_string(), true, move |_worker| async move { - match AcmeClient::load(&name) + match proxmox_acme_api::load_client_with_account(&name) .await? .update_account(&json!({"status": "deactivated"})) .await diff --git a/src/api2/node/certificates.rs b/src/api2/node/certificates.rs index 6b1d87d2..47ff8de5 100644 --- a/src/api2/node/certificates.rs +++ b/src/api2/node/certificates.rs @@ -8,6 +8,7 @@ use serde::{Deserialize, Serialize}; use tracing::{info, warn}; use pbs_api_types::{NODE_SCHEMA, PRIV_SYS_MODIFY}; +use proxmox_acme::async_client::AcmeClient; use proxmox_rest_server::WorkerTask; use proxmox_router::list_subdirs_api_method; use proxmox_router::SubdirMap; @@ -17,7 +18,6 @@ use proxmox_schema::api; use pbs_buildcfg::configdir; use pbs_tools::cert; -use crate::acme::AcmeClient; use crate::api2::types::AcmeDomain; use crate::config::node::NodeConfig; use crate::server::send_certificate_renewal_mail; diff --git a/src/api2/types/acme.rs b/src/api2/types/acme.rs index 8661f9e8..64175aff 100644 --- a/src/api2/types/acme.rs +++ b/src/api2/types/acme.rs @@ -59,14 +59,6 @@ pub struct KnownAcmeDirectory { pub url: &'static str, } -proxmox_schema::api_string_type! { - #[api(format: &PROXMOX_SAFE_ID_FORMAT)] - /// ACME account name. - #[derive(Clone, Eq, PartialEq, Hash, Deserialize, Serialize)] - #[serde(transparent)] - pub struct AcmeAccountName(String); -} - #[api( properties: { schema: { diff --git a/src/bin/proxmox_backup_manager/acme.rs b/src/bin/proxmox_backup_manager/acme.rs index 0f0eafea..6ed61560 100644 --- a/src/bin/proxmox_backup_manager/acme.rs +++ b/src/bin/proxmox_backup_manager/acme.rs @@ -3,13 +3,13 @@ use std::io::Write; use anyhow::{bail, Error}; use serde_json::Value; +use proxmox_acme::async_client::AcmeClient; +use proxmox_acme_api::AcmeAccountName; use proxmox_router::{cli::*, ApiHandler, RpcEnvironment}; use proxmox_schema::api; use proxmox_sys::fs::file_get_contents; -use proxmox_backup::acme::AcmeClient; use proxmox_backup::api2; -use proxmox_backup::api2::types::AcmeAccountName; use proxmox_backup::config::acme::plugin::DnsPluginCore; use proxmox_backup::config::acme::KNOWN_ACME_DIRECTORIES; @@ -188,17 +188,20 @@ async fn register_account( println!("Attempting to register account with {directory_url:?}..."); - let account = api2::config::acme::do_register_account( - &mut client, + let tos_agreed = tos_agreed + .then(|| directory.terms_of_service_url().map(str::to_owned)) + .flatten(); + + let location = proxmox_acme_api::register_account( &name, - tos_agreed, contact, - None, + tos_agreed, + Some(directory_url), eab_creds, ) .await?; - println!("Registration successful, account URL: {}", account.location); + println!("Registration successful, account URL: {}", location); Ok(()) } diff --git a/src/config/acme/mod.rs b/src/config/acme/mod.rs index ac89ae5e..e4639c53 100644 --- a/src/config/acme/mod.rs +++ b/src/config/acme/mod.rs @@ -6,10 +6,11 @@ use anyhow::{bail, format_err, Error}; use serde_json::Value; use pbs_api_types::PROXMOX_SAFE_ID_REGEX; +use proxmox_acme_api::AcmeAccountName; use proxmox_sys::error::SysError; use proxmox_sys::fs::{file_read_string, CreateOptions}; -use crate::api2::types::{AcmeAccountName, AcmeChallengeSchema, KnownAcmeDirectory}; +use crate::api2::types::{AcmeChallengeSchema, KnownAcmeDirectory}; pub(crate) const ACME_DIR: &str = pbs_buildcfg::configdir!("/acme"); pub(crate) const ACME_ACCOUNT_DIR: &str = pbs_buildcfg::configdir!("/acme/accounts"); @@ -34,11 +35,6 @@ pub(crate) fn make_acme_dir() -> Result<(), Error> { create_acme_subdir(ACME_DIR) } -pub(crate) fn make_acme_account_dir() -> Result<(), Error> { - make_acme_dir()?; - create_acme_subdir(ACME_ACCOUNT_DIR) -} - pub const KNOWN_ACME_DIRECTORIES: &[KnownAcmeDirectory] = &[ KnownAcmeDirectory { name: "Let's Encrypt V2", diff --git a/src/config/node.rs b/src/config/node.rs index 253b2e36..e4b66a20 100644 --- a/src/config/node.rs +++ b/src/config/node.rs @@ -8,16 +8,15 @@ use pbs_api_types::{ EMAIL_SCHEMA, MULTI_LINE_COMMENT_SCHEMA, OPENSSL_CIPHERS_TLS_1_2_SCHEMA, OPENSSL_CIPHERS_TLS_1_3_SCHEMA, }; +use proxmox_acme::async_client::AcmeClient; +use proxmox_acme_api::AcmeAccountName; use proxmox_http::ProxyConfig; use proxmox_schema::{api, ApiStringFormat, ApiType, Updater}; use pbs_buildcfg::configdir; use pbs_config::{open_backup_lockfile, BackupLockGuard}; -use crate::acme::AcmeClient; -use crate::api2::types::{ - AcmeAccountName, AcmeDomain, ACME_DOMAIN_PROPERTY_SCHEMA, HTTP_PROXY_SCHEMA, -}; +use crate::api2::types::{AcmeDomain, ACME_DOMAIN_PROPERTY_SCHEMA, HTTP_PROXY_SCHEMA}; const CONF_FILE: &str = configdir!("/node.cfg"); const LOCK_FILE: &str = configdir!("/.node.lck"); @@ -247,7 +246,7 @@ impl NodeConfig { } else { AcmeAccountName::from_string("default".to_string())? // should really not happen }; - AcmeClient::load(&account).await + proxmox_acme_api::load_client_with_account(&account).await } pub fn acme_domains(&'_ self) -> AcmeDomainIter<'_> { -- 2.47.3 From s.rufinatscha at proxmox.com Thu Jan 8 12:48:43 2026 From: s.rufinatscha at proxmox.com (Samuel Rufinatscha) Date: Thu, 8 Jan 2026 12:48:43 +0100 Subject: [pbs-devel] superseded: [PATCH proxmox{-backup, } v4 0/8] fix #6939: acme: support servers returning 204 for nonce requests In-Reply-To: <20251203102217.59923-1-s.rufinatscha@proxmox.com> References: <20251203102217.59923-1-s.rufinatscha@proxmox.com> Message-ID: <9f94815f-0378-4181-b0cd-c64f05d6ccc7@proxmox.com> https://lore.proxmox.com/pbs-devel/20260108112629.189670-1-s.rufinatscha at proxmox.com/T/#t On 12/3/25 11:21 AM, Samuel Rufinatscha wrote: > Hi, > > this series fixes account registration for ACME providers that return > HTTP 204 No Content to the newNonce request. Currently, both the PBS > ACME client and the shared ACME client in proxmox-acme only accept > HTTP 200 OK for this request. The issue was observed in PBS against a > custom ACME deployment and reported as bug #6939 [1]. > > ## Problem > > During ACME account registration, PBS first fetches an anti-replay > nonce by sending a HEAD request to the CA?s newNonce URL. > RFC 8555 ?7.2 [2] states that: > > * the server MUST include a Replay-Nonce header with a fresh nonce, > * the server SHOULD use status 200 OK for the HEAD request, > * the server MUST also handle GET on the same resource and may return > 204 No Content with an empty body. > > The reporter observed the following error message: > > *ACME server responded with unexpected status code: 204* > > and mentioned that the issue did not appear with PVE 9 [1]. Looking at > PVE?s Perl ACME client [3], it uses a GET request instead of HEAD and > accepts any 2xx success code when retrieving the nonce. This difference > in behavior does not affect functionality but is worth noting for > consistency across implementations. > > ## Approach > > To support ACME providers which return 204 No Content, the Rust ACME > clients in proxmox-backup and proxmox need to treat both 200 OK and 204 > No Content as valid responses for the nonce request, as long as a > Replay-Nonce header is present. > > This series changes the expected field of the internal Request type > from a single u16 to a list of allowed status codes > (e.g. &'static [u16]), so one request can explicitly accept multiple > success codes. > > To avoid fixing the issue twice (once in PBS? own ACME client and once > in the shared Rust client), this series first refactors PBS to use the > shared AcmeClient from proxmox-acme / proxmox-acme-api, similar to PDM, > and then applies the bug fix in that shared implementation so that all > consumers benefit from the more tolerant behavior. > > ## Testing > > *Testing the refactor* > > To test the refactor, I > (1) installed latest stable PBS on a VM > (2) created .deb package from latest PBS (master), containing the > refactor > (3) installed created .deb package > (4) installed Pebble from Let's Encrypt [5] on the same VM > (5) created an ACME account and ordered the new certificate for the > host domain. > > Steps to reproduce: > > (1) install latest stable PBS on a VM, create .deb package from latest > PBS (master) containing the refactor, install created .deb package > (2) install Pebble from Let's Encrypt [5] on the same VM: > > cd > apt update > apt install -y golang git > git clone https://github.com/letsencrypt/pebble > cd pebble > go build ./cmd/pebble > > then, download and trust the Pebble cert: > > wget https://raw.githubusercontent.com/letsencrypt/pebble/main/test/certs/pebble.minica.pem > cp pebble.minica.pem /usr/local/share/ca-certificates/pebble.minica.crt > update-ca-certificates > > We want Pebble to perform HTTP-01 validation against port 80, because > PBS?s standalone plugin will bind port 80. Set httpPort to 80. > > nano ./test/config/pebble-config.json > > Start the Pebble server in the background: > > ./pebble -config ./test/config/pebble-config.json & > > Create a Pebble ACME account: > > proxmox-backup-manager acme account register default admin at example.com --directory 'https://127.0.0.1:14000/dir' > > To verify persistence of the account I checked > > ls /etc/proxmox-backup/acme/accounts > > Verified if update-account works > > proxmox-backup-manager acme account update default --contact "a at example.com,b at example.com" > proxmox-backup-manager acme account info default > > In the PBS GUI, you can create a new domain. You can use your host > domain name (see /etc/hosts). Select the created account and order the > certificate. > > After a page reload, you might need to accept the new certificate in the browser. > In the PBS dashboard, you should see the new Pebble certificate. > > *Note: on reboot, the created Pebble ACME account will be gone and you > will need to create a new one. Pebble does not persist account info. > In that case remove the previously created account in > /etc/proxmox-backup/acme/accounts. > > *Testing the newNonce fix* > > To prove the ACME newNonce fix, I put nginx in front of Pebble, to > intercept the newNonce request in order to return 204 No Content > instead of 200 OK, all other requests are unchanged and forwarded to > Pebble. Requires trusting the nginx CAs via > /usr/local/share/ca-certificates + update-ca-certificates on the VM. > > Then I ran following command against nginx: > > proxmox-backup-manager acme account register proxytest root at backup.local --directory 'https://nginx-address/dir > > The account could be created successfully. When adjusting the nginx > configuration to return any other non-expected success status code, > PBS rejects as expected. > > ## Patch summary > > 0001 ? acme: include proxmox-acme-api dependency > Adds proxmox-acme-api as a new dependency for the ACME code. This > prepares the codebase to use the shared ACME API instead of local > implementations. > > 0002 ? acme: drop local AcmeClient > Removes the local AcmeClient implementation. Minimal changes > required to support the removal. > > 0003 ? acme: change API impls to use proxmox-acme-api handler > Updates existing ACME API implementations to use the handlers provided > by proxmox-acme-api. > > 0004 ? acme: certificate ordering through proxmox-acme-api > Perform certificate ordering through proxmox-acme-api instead of local > logic. > > 0005 ? acme api: add helper to load client for an account > Introduces a helper function to load an ACME client instance for a > given account. Required for the PBS refactor. > > 0006 ? acme: reduce visibility of Request type > Restricts the visibility of the internal Request type. > > 0007 ? acme: introduce http_status module > Adds a dedicated http_status module for handling common HTTP status > codes. > > 0008 ? fix #6939: acme: support servers returning 204 for nonce > Adjusts nonce handling to support ACME servers that return HTTP 204 > (No Content) for new-nonce requests. > > Thanks for considering this patch series, I look forward to your > feedback. > > Best, > Samuel Rufinatscha > > ## Changelog > > Changes from v3 to v4: > > Removed: [PATCH proxmox-backup v3 1/1]. > > Added: > > [PATCH proxmox-backup v4 1/4] acme: include proxmox-acme-api dependency > * New: add proxmox-acme-api as a dependency and initialize it in > PBS so PBS can use the shared ACME API instead. > > [PATCH proxmox-backup v4 2/4] acme: drop local AcmeClient > * New: remove the PBS-local AcmeClient implementation and switch PBS > over to the shared proxmox-acme async client. > > [PATCH proxmox-backup v4 3/4] acme: change API impls to use proxmox-acme-api > handlers > * New: rework PBS? ACME API endpoints to delegate to > proxmox-acme-api handlers instead of duplicating logic locally. > > [PATCH proxmox-backup v4 4/4] acme: certificate ordering through > proxmox-acme-api > * New: move PBS? ACME certificate ordering logic over to > proxmox-acme-api, keeping only certificate installation/reload in > PBS. > > [PATCH proxmox v4 1/4] acme-api: add helper to load client for an account > * New: add a load_client_with_account helper in proxmox-acme-api so > PBS (and others) can construct an AcmeClient for a configured account > without duplicating boilerplate. > > [PATCH proxmox v4 2/4] acme: reduce visibility of Request type > * New: hide the low-level Request type and its fields behind > constructors / reduced visibility so changes to ?expected? no longer > affect the public API as they did in v3. > > [PATCH proxmox v4 3/4] acme: introduce http_status module > * New: split out the HTTP status constants into an internal > http_status module as a separate preparatory cleanup before the bug > fix, instead of doing this inline like in v3. > > Changed: > > [PATCH proxmox v3 1/1] -> [PATCH proxmox v4 4/4] > fix #6939: acme: support server returning 204 for nonce requests > * Rebased on top of the refactor: keep the same behavioural fix as in v3 > (accept 204 for newNonce with Replay-Nonce present), but implement it > on top of the http_status module that is part of the refactor. > > Changes from v2 to v3: > > [PATCH proxmox v3 1/1] fix #6939: support providers returning 204 for nonce > requests > * Rename `http_success` module to `http_status` > > [PATCH proxmox-backup v3 1/1] acme: accept HTTP 204 from newNonce endpoint > * Replace `http_success` usage > > Changes from v1 to v2: > > [PATCH proxmox v2 1/1] fix #6939: support providers returning 204 for nonce > requests > * Introduced `http_success` module to contain the http success codes > * Replaced `Vec` with `&[u16]` for expected codes to avoid > allocations. > * Clarified the PVEs Perl ACME client behaviour in the commit message. > > [PATCH proxmox-backup v2 1/1] acme: accept HTTP 204 from newNonce endpoint > * Integrated the `http_success` module, replacing `Vec` with `&[u16]` > * Clarified the PVEs Perl ACME client behaviour in the commit message. > > [1] Bugzilla report #6939: > [https://bugzilla.proxmox.com/show_bug.cgi?id=6939](https://bugzilla.proxmox.com/show_bug.cgi?id=6939) > [2] RFC 8555 (ACME): > [https://datatracker.ietf.org/doc/html/rfc8555/#section-7.2](https://datatracker.ietf.org/doc/html/rfc8555/#section-7.2) > [3] PVE?s Perl ACME client (allow 2xx codes for nonce requests): > [https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l597](https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l597) > [4] Pebble ACME server: > [https://github.com/letsencrypt/pebble](https://github.com/letsencrypt/pebble) > [5] Pebble ACME server (perform GET request: > [https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l219](https://git.proxmox.com/?p=proxmox-acme.git;a=blob;f=src/PVE/ACME.pm;h=f1e9bb7d316e3cea1e376c610b0479119217aecc;hb=HEAD#l219) > > proxmox-backup: > > Samuel Rufinatscha (4): > acme: include proxmox-acme-api dependency > acme: drop local AcmeClient > acme: change API impls to use proxmox-acme-api handlers > acme: certificate ordering through proxmox-acme-api > > Cargo.toml | 3 + > src/acme/client.rs | 691 ------------------------- > src/acme/mod.rs | 5 - > src/acme/plugin.rs | 336 ------------ > src/api2/config/acme.rs | 407 ++------------- > src/api2/node/certificates.rs | 240 ++------- > src/api2/types/acme.rs | 98 ---- > src/api2/types/mod.rs | 3 - > src/bin/proxmox-backup-api.rs | 2 + > src/bin/proxmox-backup-manager.rs | 2 + > src/bin/proxmox-backup-proxy.rs | 1 + > src/bin/proxmox_backup_manager/acme.rs | 21 +- > src/config/acme/mod.rs | 51 +- > src/config/acme/plugin.rs | 99 +--- > src/config/node.rs | 29 +- > src/lib.rs | 2 - > 16 files changed, 103 insertions(+), 1887 deletions(-) > delete mode 100644 src/acme/client.rs > delete mode 100644 src/acme/mod.rs > delete mode 100644 src/acme/plugin.rs > delete mode 100644 src/api2/types/acme.rs > > > proxmox: > > Samuel Rufinatscha (4): > acme-api: add helper to load client for an account > acme: reduce visibility of Request type > acme: introduce http_status module > fix #6939: acme: support servers returning 204 for nonce requests > > proxmox-acme-api/src/account_api_impl.rs | 5 +++++ > proxmox-acme-api/src/lib.rs | 3 ++- > proxmox-acme/src/account.rs | 27 +++++++++++++----------- > proxmox-acme/src/async_client.rs | 8 +++---- > proxmox-acme/src/authorization.rs | 2 +- > proxmox-acme/src/client.rs | 8 +++---- > proxmox-acme/src/lib.rs | 6 ++---- > proxmox-acme/src/order.rs | 2 +- > proxmox-acme/src/request.rs | 25 +++++++++++++++------- > 9 files changed, 51 insertions(+), 35 deletions(-) > > > Summary over all repositories: > 25 files changed, 154 insertions(+), 1922 deletions(-) > From c.ebner at proxmox.com Thu Jan 8 16:25:17 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Thu, 8 Jan 2026 16:25:17 +0100 Subject: [pbs-devel] [PATCH proxmox-backup 1/4] datastore: fix clippy too many arguments warning In-Reply-To: <20260108152520.783200-1-c.ebner@proxmox.com> References: <20260108152520.783200-1-c.ebner@proxmox.com> Message-ID: <20260108152520.783200-2-c.ebner@proxmox.com> Introduce a transient CondSweepChunkParams type to limit the function call arguments for the ChunkStore::cond_sweep_chunk() method. No functional changes. Signed-off-by: Christian Ebner --- pbs-datastore/src/chunk_store.rs | 46 +++++++++++++++++++------------- pbs-datastore/src/datastore.rs | 14 +++++----- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs index 7fe09b914..cccfcfcdf 100644 --- a/pbs-datastore/src/chunk_store.rs +++ b/pbs-datastore/src/chunk_store.rs @@ -39,6 +39,16 @@ pub struct ChunkStore { // TODO: what about sysctl setting vm.vfs_cache_pressure (0 - 100) ? +/// Transient type used to limit the number of function call parameters +/// for ChunkStore::cond_sweep_chunk() +pub(super) struct CondSweepChunkParams { + pub(super) atime: i64, + pub(super) min_atime: i64, + pub(super) oldest_writer: i64, + pub(super) size: u64, + pub(super) bad: bool, +} + pub fn verify_chunk_size(size: usize) -> Result<(), Error> { static SIZES: [usize; 7] = [ 64 * 1024, @@ -443,11 +453,13 @@ impl ChunkStore { unsafe { self.cond_sweep_chunk( - stat.st_atime, - min_atime, - oldest_writer, - stat.st_size as u64, - bad, + CondSweepChunkParams { + atime: stat.st_atime, + min_atime, + oldest_writer, + size: stat.st_size as u64, + bad, + }, status, || { // non-bad S3 chunks need to be removed via cache @@ -495,39 +507,35 @@ impl ChunkStore { /// FIXME: make this internal with further refactoring pub(super) unsafe fn cond_sweep_chunk Result<(), Error>>( &self, - atime: i64, - min_atime: i64, - oldest_writer: i64, - size: u64, - bad: bool, + params: CondSweepChunkParams, gc_status: &mut GarbageCollectionStatus, remove_callback: T, ) -> Result<(), Error> { - if atime < min_atime { + if params.atime < params.min_atime { if let Err(err) = remove_callback() { - if bad { + if params.bad { gc_status.still_bad += 1; } return Err(err); } - if bad { + if params.bad { gc_status.removed_bad += 1; } else { gc_status.removed_chunks += 1; } - gc_status.removed_bytes += size; - } else if atime < oldest_writer { - if bad { + gc_status.removed_bytes += params.size; + } else if params.atime < params.oldest_writer { + if params.bad { gc_status.still_bad += 1; } else { gc_status.pending_chunks += 1; } - gc_status.pending_bytes += size; + gc_status.pending_bytes += params.size; } else { - if !bad { + if !params.bad { gc_status.disk_chunks += 1; } - gc_status.disk_bytes += size; + gc_status.disk_bytes += params.size; } Ok(()) } diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 9c57aaac1..2f401f6fd 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -39,7 +39,7 @@ use pbs_config::BackupLockGuard; use crate::backup_info::{ BackupDir, BackupGroup, BackupInfo, OLD_LOCKING, PROTECTED_MARKER_FILENAME, }; -use crate::chunk_store::ChunkStore; +use crate::chunk_store::{ChunkStore, CondSweepChunkParams}; use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter}; use crate::fixed_index::{FixedIndexReader, FixedIndexWriter}; use crate::hierarchy::{ListGroups, ListGroupsType, ListNamespaces, ListNamespacesRecursive}; @@ -1765,11 +1765,13 @@ impl DataStore { unsafe { self.inner.chunk_store.cond_sweep_chunk( - atime, - min_atime, - oldest_writer, - content.size, - bad, + CondSweepChunkParams { + atime, + min_atime, + oldest_writer, + size: content.size, + bad, + }, &mut gc_status, || { if let Some(cache) = self.cache() { -- 2.47.3 From c.ebner at proxmox.com Thu Jan 8 16:25:18 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Thu, 8 Jan 2026 16:25:18 +0100 Subject: [pbs-devel] [PATCH proxmox-backup 2/4] tree-wide: fix clippy warnings needless borrow In-Reply-To: <20260108152520.783200-1-c.ebner@proxmox.com> References: <20260108152520.783200-1-c.ebner@proxmox.com> Message-ID: <20260108152520.783200-3-c.ebner@proxmox.com> Omit all needless borrows currently reported by clippy run. No functional changes. Signed-off-by: Christian Ebner --- pbs-datastore/src/chunk_store.rs | 2 +- pbs-datastore/src/datastore.rs | 4 ++-- src/api2/admin/datastore.rs | 4 ++-- src/auth.rs | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs index cccfcfcdf..315e18218 100644 --- a/pbs-datastore/src/chunk_store.rs +++ b/pbs-datastore/src/chunk_store.rs @@ -758,7 +758,7 @@ impl ChunkStore { let gid = pbs_config::backup_group()?.gid; create_options = create_options.owner(uid).group(gid); } - proxmox_sys::fs::replace_file(&path, &[], create_options, false) + proxmox_sys::fs::replace_file(path, &[], create_options, false) } /// Mark chunk as expected to be present by writing a file the chunk store. diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 2f401f6fd..83d209414 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -2007,7 +2007,7 @@ impl DataStore { ) -> Result<(bool, u64), Error> { match backend { DatastoreBackend::Filesystem => self.inner.chunk_store.insert_chunk(chunk, digest), - DatastoreBackend::S3(s3_client) => self.insert_chunk_cached(chunk, digest, &s3_client), + DatastoreBackend::S3(s3_client) => self.insert_chunk_cached(chunk, digest, s3_client), } } @@ -2062,7 +2062,7 @@ impl DataStore { // or the chunk marker file exists on filesystem. The latter means the chunk has // been uploaded in the past, but was evicted from the LRU cache since but was not // cleaned up by garbage collection, so contained in the S3 object store. - if self.cache_contains(&digest) { + if self.cache_contains(digest) { tracing::info!("Skip upload of cached chunk {}", hex::encode(digest)); return Ok((true, chunk_size)); } diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs index ddd894c12..88ad5d53b 100644 --- a/src/api2/admin/datastore.rs +++ b/src/api2/admin/datastore.rs @@ -2734,8 +2734,8 @@ pub fn s3_refresh(store: String, rpcenv: &mut dyn RpcEnvironment) -> Result Result<(), Error> { - let datastore = DataStore::lookup_datastore(&store, Some(Operation::Lookup))?; - run_maintenance_locked(&store, MaintenanceType::S3Refresh, worker, || { + let datastore = DataStore::lookup_datastore(store, Some(Operation::Lookup))?; + run_maintenance_locked(store, MaintenanceType::S3Refresh, worker, || { proxmox_async::runtime::block_on(datastore.s3_refresh()) }) } diff --git a/src/auth.rs b/src/auth.rs index a930d8cd9..24bb3f753 100644 --- a/src/auth.rs +++ b/src/auth.rs @@ -475,7 +475,7 @@ impl proxmox_auth_api::api::AuthContext for PbsAuthContext { path_vec.push(part); } } - user_info.check_privs(&auth_id, &path_vec, *privilege, false)?; + user_info.check_privs(auth_id, &path_vec, *privilege, false)?; return Ok(Some(true)); } } -- 2.47.3 From c.ebner at proxmox.com Thu Jan 8 16:25:20 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Thu, 8 Jan 2026 16:25:20 +0100 Subject: [pbs-devel] [PATCH proxmox-backup 4/4] fix #7219: client: mount: align encryption key loading behavior In-Reply-To: <20260108152520.783200-1-c.ebner@proxmox.com> References: <20260108152520.783200-1-c.ebner@proxmox.com> Message-ID: <20260108152520.783200-5-c.ebner@proxmox.com> The mount subcommand currently does not load the encrypton key from the default key location, requiring to explicitley set the `keyfile` parameter on command invocation. Align the behaviour to the rest of the client commands by using the pbs_client::tools::key_source::crypto_parameters helper to load the key instead. The same current behaviour for the benchmark command is not touched, as there loading the encryption key should always be conrolled by explicitley setting it, to avoid possible pitfalls. Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=7219 Signed-off-by: Christian Ebner --- proxmox-backup-client/src/mount.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/proxmox-backup-client/src/mount.rs b/proxmox-backup-client/src/mount.rs index fa3385597..e815c8a9c 100644 --- a/proxmox-backup-client/src/mount.rs +++ b/proxmox-backup-client/src/mount.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use std::ffi::OsStr; use std::hash::BuildHasher; use std::os::unix::io::{AsRawFd, OwnedFd}; -use std::path::{Path, PathBuf}; +use std::path::Path; use std::sync::Arc; use anyhow::{bail, format_err, Error}; @@ -18,11 +18,13 @@ use proxmox_schema::*; use proxmox_sortable_macro::sortable; use pbs_api_types::{ArchiveType, BackupArchiveName, BackupNamespace}; -use pbs_client::tools::key_source::get_encryption_key_password; +use pbs_client::tools::key_source::{ + crypto_parameters, format_key_source, get_encryption_key_password, +}; use pbs_client::{BackupReader, RemoteChunkReader}; use pbs_datastore::cached_chunk_reader::CachedChunkReader; use pbs_datastore::index::IndexFile; -use pbs_key_config::load_and_decrypt_key; +use pbs_key_config::decrypt_key; use pbs_tools::crypt_config::CryptConfig; use pbs_tools::json::required_string_param; @@ -208,14 +210,16 @@ async fn mount_do(param: Value, pipe: Option) -> Result { let path = required_string_param(¶m, "snapshot")?; let backup_dir = dir_or_last_from_group(&client, &repo, &backup_ns, path).await?; - let keyfile = param["keyfile"].as_str().map(PathBuf::from); - let crypt_config = match keyfile { + let crypto = crypto_parameters(¶m)?; + + let crypt_config = match crypto.enc_key { None => None, - Some(path) => { - log::info!("Encryption key file: '{:?}'", path); - let (key, _, fingerprint) = load_and_decrypt_key(&path, &get_encryption_key_password)?; - log::info!("Encryption key fingerprint: '{}'", fingerprint); - Some(Arc::new(CryptConfig::new(key)?)) + Some(key) => { + log::info!("{}", format_key_source(&key.source, "encryption")); + let (key, _created, fingerprint) = decrypt_key(&key.key, &get_encryption_key_password)?; + log::info!("Encryption key fingerprint: '{fingerprint}'"); + let crypt_config = CryptConfig::new(key)?; + Some(Arc::new(crypt_config)) } }; -- 2.47.3 From c.ebner at proxmox.com Thu Jan 8 16:25:16 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Thu, 8 Jan 2026 16:25:16 +0100 Subject: [pbs-devel] [PATCH proxmox-backup 0/4] fix #7219: align encryption key loading behavior for mount Message-ID: <20260108152520.783200-1-c.ebner@proxmox.com> Align the proxmox-backup-client mount subcommand behavior to the behavior of other subcommands with respect to encryption key loading. Previously this did not load the encryption key from the default location, if present. While at it, also fix issues reported by cargo clippy in the first 3 patches of the series Christian Ebner (4): datastore: fix clippy too many arguments warning tree-wide: fix clippy warnings needless borrow api: access: silence too may arguments warning on api handler fix #7219: client: mount: align encryption key loading behavior pbs-datastore/src/chunk_store.rs | 48 +++++++++++++++++------------- pbs-datastore/src/datastore.rs | 18 ++++++----- proxmox-backup-client/src/mount.rs | 24 ++++++++------- src/api2/access/user.rs | 1 + src/api2/admin/datastore.rs | 4 +-- src/auth.rs | 2 +- 6 files changed, 56 insertions(+), 41 deletions(-) -- 2.47.3 From c.ebner at proxmox.com Thu Jan 8 16:25:19 2026 From: c.ebner at proxmox.com (Christian Ebner) Date: Thu, 8 Jan 2026 16:25:19 +0100 Subject: [pbs-devel] [PATCH proxmox-backup 3/4] api: access: silence too may arguments warning on api handler In-Reply-To: <20260108152520.783200-1-c.ebner@proxmox.com> References: <20260108152520.783200-1-c.ebner@proxmox.com> Message-ID: <20260108152520.783200-4-c.ebner@proxmox.com> To keep the easy to read function signature for the api method. Signed-off-by: Christian Ebner --- src/api2/access/user.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/api2/access/user.rs b/src/api2/access/user.rs index a8dd4c0d0..3b4cf1214 100644 --- a/src/api2/access/user.rs +++ b/src/api2/access/user.rs @@ -598,6 +598,7 @@ pub enum DeletableTokenProperty { }, )] /// Update user's API token metadata +#[allow(clippy::too_many_arguments)] pub fn update_token( userid: Userid, token_name: Tokenname, -- 2.47.3 From d.herzig at proxmox.com Fri Jan 9 13:33:24 2026 From: d.herzig at proxmox.com (Daniel Herzig) Date: Fri, 9 Jan 2026 13:33:24 +0100 Subject: [pbs-devel] [PATCH proxmox-backup 4/4] fix #7219: client: mount: align encryption key loading behavior In-Reply-To: <20260108152520.783200-5-c.ebner@proxmox.com> References: <20260108152520.783200-1-c.ebner@proxmox.com> <20260108152520.783200-5-c.ebner@proxmox.com> Message-ID: <2295b7b9-27d9-4886-8970-54d4753430d2@proxmox.com> Thanks --?I just gave this a quick spin and works as expected here (no need to pass the default key location anymore). Tested-by: Daniel Herzig On 1/8/26 4:25 PM, Christian Ebner wrote: > The mount subcommand currently does not load the encrypton key from > the default key location, requiring to explicitley set the `keyfile` > parameter on command invocation. > > Align the behaviour to the rest of the client commands by using the > pbs_client::tools::key_source::crypto_parameters helper to load the > key instead. > > The same current behaviour for the benchmark command is not touched, > as there loading the encryption key should always be conrolled by > explicitley setting it, to avoid possible pitfalls. > > Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=7219 > Signed-off-by: Christian Ebner > --- > proxmox-backup-client/src/mount.rs | 24 ++++++++++++++---------- > 1 file changed, 14 insertions(+), 10 deletions(-) > > diff --git a/proxmox-backup-client/src/mount.rs b/proxmox-backup-client/src/mount.rs > index fa3385597..e815c8a9c 100644 > --- a/proxmox-backup-client/src/mount.rs > +++ b/proxmox-backup-client/src/mount.rs > @@ -2,7 +2,7 @@ use std::collections::HashMap; > use std::ffi::OsStr; > use std::hash::BuildHasher; > use std::os::unix::io::{AsRawFd, OwnedFd}; > -use std::path::{Path, PathBuf}; > +use std::path::Path; > use std::sync::Arc; > > use anyhow::{bail, format_err, Error}; > @@ -18,11 +18,13 @@ use proxmox_schema::*; > use proxmox_sortable_macro::sortable; > > use pbs_api_types::{ArchiveType, BackupArchiveName, BackupNamespace}; > -use pbs_client::tools::key_source::get_encryption_key_password; > +use pbs_client::tools::key_source::{ > + crypto_parameters, format_key_source, get_encryption_key_password, > +}; > use pbs_client::{BackupReader, RemoteChunkReader}; > use pbs_datastore::cached_chunk_reader::CachedChunkReader; > use pbs_datastore::index::IndexFile; > -use pbs_key_config::load_and_decrypt_key; > +use pbs_key_config::decrypt_key; > use pbs_tools::crypt_config::CryptConfig; > use pbs_tools::json::required_string_param; > > @@ -208,14 +210,16 @@ async fn mount_do(param: Value, pipe: Option) -> Result { > let path = required_string_param(¶m, "snapshot")?; > let backup_dir = dir_or_last_from_group(&client, &repo, &backup_ns, path).await?; > > - let keyfile = param["keyfile"].as_str().map(PathBuf::from); > - let crypt_config = match keyfile { > + let crypto = crypto_parameters(¶m)?; > + > + let crypt_config = match crypto.enc_key { > None => None, > - Some(path) => { > - log::info!("Encryption key file: '{:?}'", path); > - let (key, _, fingerprint) = load_and_decrypt_key(&path, &get_encryption_key_password)?; > - log::info!("Encryption key fingerprint: '{}'", fingerprint); > - Some(Arc::new(CryptConfig::new(key)?)) > + Some(key) => { > + log::info!("{}", format_key_source(&key.source, "encryption")); > + let (key, _created, fingerprint) = decrypt_key(&key.key, &get_encryption_key_password)?; > + log::info!("Encryption key fingerprint: '{fingerprint}'"); > + let crypt_config = CryptConfig::new(key)?; > + Some(Arc::new(crypt_config)) > } > }; > From r.obkircher at proxmox.com Fri Jan 9 18:35:36 2026 From: r.obkircher at proxmox.com (Robert Obkircher) Date: Fri, 9 Jan 2026 18:35:36 +0100 Subject: [pbs-devel] [PATCH v3 proxmox-backup 1/5] fix #3847: datastore: support writing fidx files of unknown size In-Reply-To: <20260109173548.301653-1-r.obkircher@proxmox.com> References: <20260109173548.301653-1-r.obkircher@proxmox.com> Message-ID: <20260109173548.301653-2-r.obkircher@proxmox.com> Use mremap and ftruncate to support growable FixedIndexWriters. Grow exponentially from a small initial index size for efficiency. Truncate excessive capacity after encountering a non-full block or on close. Signed-off-by: Robert Obkircher --- pbs-datastore/src/datastore.rs | 2 +- pbs-datastore/src/fixed_index.rs | 120 +++++++++++++++++++++++++++++-- 2 files changed, 117 insertions(+), 5 deletions(-) diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs index 9c57aaac..af712726 100644 --- a/pbs-datastore/src/datastore.rs +++ b/pbs-datastore/src/datastore.rs @@ -591,7 +591,7 @@ impl DataStore { pub fn create_fixed_writer>( &self, filename: P, - size: usize, + size: Option, chunk_size: usize, ) -> Result { let index = FixedIndexWriter::create( diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs index 6c3be2d4..8036a519 100644 --- a/pbs-datastore/src/fixed_index.rs +++ b/pbs-datastore/src/fixed_index.rs @@ -219,9 +219,12 @@ pub struct FixedIndexWriter { chunk_size: usize, size: usize, index_length: usize, + index_capacity: usize, index: *mut u8, pub uuid: [u8; 16], pub ctime: i64, + growable_size: bool, + write_size_on_close: bool, } // `index` is mmap()ed which cannot be thread-local so should be sendable @@ -237,12 +240,18 @@ impl Drop for FixedIndexWriter { } impl FixedIndexWriter { + /// The initial capacity, if the total size is unknown. + /// + /// This capacity takes up the same amount of space as the header + /// and can refer to 128 Blocks * 4 MiB/Block = 512 MiB of content. + const INITIAL_CAPACITY: usize = 4096 / 32; + #[allow(clippy::cast_ptr_alignment)] // Requires obtaining a shared chunk store lock beforehand pub fn create( store: Arc, path: &Path, - size: usize, + known_size: Option, chunk_size: usize, ) -> Result { let full_path = store.relative_path(path); @@ -264,6 +273,7 @@ impl FixedIndexWriter { } let ctime = proxmox_time::epoch_i64(); + let size = known_size.unwrap_or(0); let uuid = Uuid::generate(); @@ -280,8 +290,12 @@ impl FixedIndexWriter { file.write_all(&buffer)?; - let index_length = size.div_ceil(chunk_size); - let index_size = index_length * 32; + let (index_length, index_capacity) = known_size + .map(|s| s.div_ceil(chunk_size)) + .map(|len| (len, len)) + .unwrap_or((0, Self::INITIAL_CAPACITY)); + + let index_size = index_capacity * 32; nix::unistd::ftruncate(&file, (header_size + index_size) as i64)?; let data = unsafe { @@ -305,12 +319,87 @@ impl FixedIndexWriter { chunk_size, size, index_length, + index_capacity, index: data, ctime, uuid: *uuid.as_bytes(), + growable_size: known_size.is_none(), + write_size_on_close: known_size.is_none(), + }) + } + + /// If this returns an error, the sizes may be out of sync, + /// which is especially bad if the capacity was reduced. + fn set_index_capacity(&mut self, new_capacity: usize) -> Result<(), Error> { + if new_capacity == self.index_capacity { + return Ok(()); + } + let old_index_size = self.index_capacity * 32; + let new_index_size = new_capacity * 32; + let new_file_size = (size_of::() + new_index_size) as i64; + + let index_addr = NonNull::new(self.index as *mut std::ffi::c_void).ok_or_else(|| { + format_err!("Can't resize FixedIndexWriter index because the index pointer is null.") + })?; + + nix::unistd::ftruncate(&self.file, new_file_size)?; + + let new_index = unsafe { + nix::sys::mman::mremap( + index_addr, + old_index_size, + new_index_size, + nix::sys::mman::MRemapFlags::MREMAP_MAYMOVE, + None, + ) + }?; + + self.index = new_index.as_ptr().cast::(); + self.index_capacity = new_capacity; + Ok(()) + } + + /// Unmapping ensures future add and close operations fail. + fn set_index_capacity_or_unmap(&mut self, new_capacity: usize) -> Result<(), Error> { + self.set_index_capacity(new_capacity).map_err(|e| { + let unmap_result = self.unmap(); + let message = format!( + "failed to resize index capacity from {} to {new_capacity} with backing file: {:?}", + self.index_capacity, self.tmp_filename + ); + assert!(self.index.is_null(), "{message} {unmap_result:?}"); + e.context(message) }) } + /// Increase the content size to be at least `requested_size` and + /// ensure there is enough capacity. + /// + /// Only writers that were created without a known size can grow. + /// The size also becomes fixed as soon as it is no longer divisible + /// by the block size, to ensure that only the last block can be + /// smaller. + pub fn grow_to_size(&mut self, requested_size: usize) -> Result<(), Error> { + if self.size < requested_size { + if !self.growable_size { + bail!("refusing to resize from {} to {requested_size}", self.size); + } + let new_len = requested_size.div_ceil(self.chunk_size); + if new_len * self.chunk_size != requested_size { + // not a full chunk, so this must be the last one + self.growable_size = false; + self.set_index_capacity_or_unmap(new_len)?; + } else if new_len > self.index_capacity { + self.set_index_capacity_or_unmap(new_len.next_power_of_two())?; + }; + assert!(new_len <= self.index_capacity); + self.index_length = new_len; + self.size = requested_size; + } + Ok(()) + } + + /// The current length of the index. This may be increased with [`grow_to_size`]. pub fn index_length(&self) -> usize { self.index_length } @@ -320,7 +409,7 @@ impl FixedIndexWriter { return Ok(()); }; - let index_size = self.index_length * 32; + let index_size = self.index_capacity * 32; if let Err(err) = unsafe { nix::sys::mman::munmap(index, index_size) } { bail!("unmap file {:?} failed - {}", self.tmp_filename, err); @@ -342,9 +431,24 @@ impl FixedIndexWriter { self.unmap()?; + if self.index_length == 0 { + bail!("refusing to close empty fidx file {:?}", self.tmp_filename); + } else if self.index_length < self.index_capacity { + let file_size = size_of::() + index_size; + nix::unistd::ftruncate(&self.file, file_size as i64)?; + self.index_capacity = self.index_length; + } + let csum_offset = std::mem::offset_of!(FixedIndexHeader, index_csum); self.file.seek(SeekFrom::Start(csum_offset as u64))?; self.file.write_all(&index_csum)?; + + if self.write_size_on_close { + let size_offset = std::mem::offset_of!(FixedIndexHeader, size); + self.file.seek(SeekFrom::Start(size_offset as u64))?; + self.file.write_all(&(self.size as u64).to_le_bytes())?; + } + self.file.flush()?; if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { @@ -407,6 +511,14 @@ impl FixedIndexWriter { } pub fn clone_data_from(&mut self, reader: &FixedIndexReader) -> Result<(), Error> { + if self.growable_size { + bail!("reusing the index is only supported with known input size"); + } + + if self.chunk_size != reader.chunk_size { + bail!("can't reuse file with different chunk size"); + } + if self.index_length != reader.index_count() { bail!("clone_data_from failed - index sizes not equal"); } -- 2.47.3 From r.obkircher at proxmox.com Fri Jan 9 18:35:35 2026 From: r.obkircher at proxmox.com (Robert Obkircher) Date: Fri, 9 Jan 2026 18:35:35 +0100 Subject: [pbs-devel] [PATCH v3 proxmox-backup 0/5] fix: #3847 pipe from STDIN to proxmox-backup-client Message-ID: <20260109173548.301653-1-r.obkircher@proxmox.com> Add support for commands like: ssh host cmd | proxmox-backup-client backup data.img:/dev/stdin proxmox-backup-client backup a.img:<(mysqldump) b.img:<(pgdump) Changes since v2: client: - Renamed ChunkSize to IndexType as suggested. datastore: - Introduced explicit `index_capacity` field. - Previously I (ab)used `index_length` for the capacity and computed the actual index length from `size`. - This removes the assert mentioned in the review. - Renamed INITIAL_CHUNKS_IF_UNKNOWN to INITIAL_CAPACITY and increased it to 128 blocks which is enough for 512 MiB of content. - Only remap when capacity increases, not every time size increases. - Increase capacity to next power of 2 instead of by 1.5x. - This is less code and file systems probably don't do in place updates where 1.5x is theoretically better. - Ensure that add_digest and close fail after remap errors. - Keep seek+write_all instead of write_all_at to minimize changes. - Imroved error messages in clone_data_from. - Refuse creation of empty files because the original also did that. - Added tests for FixedIndexWriter: - Is it okay to to write to a directory in the cwd? - This was inspired by `chunk_store::test_chunk_store1` - Removed test script. Changes since v1: - use mremap+ftruncate instead of write_all_at - make the size API parameter optional instead of using 0 - use an enum to represent fixed/dynamic chunk size in UploadOptions - alias "-" to "/dev/stdin" - split changes into separate commits Robert Obkircher (5): fix #3847: datastore: support writing fidx files of unknown size fix #3847: datastore: test FixedIndexWriter fix #3847: api: backup: make fixed index file size optional fix #3847: client: support fifo pipe inputs for images fix #3847: client: treat minus sign as stdin pbs-client/src/backup_writer.rs | 38 ++-- pbs-datastore/src/datastore.rs | 2 +- pbs-datastore/src/fixed_index.rs | 304 +++++++++++++++++++++++++++++- proxmox-backup-client/src/main.rs | 37 ++-- src/api2/backup/environment.rs | 8 +- src/api2/backup/mod.rs | 4 +- src/server/push.rs | 11 +- 7 files changed, 366 insertions(+), 38 deletions(-) -- 2.47.3 From r.obkircher at proxmox.com Fri Jan 9 18:35:37 2026 From: r.obkircher at proxmox.com (Robert Obkircher) Date: Fri, 9 Jan 2026 18:35:37 +0100 Subject: [pbs-devel] [PATCH v3 proxmox-backup 2/5] fix #3847: datastore: test FixedIndexWriter In-Reply-To: <20260109173548.301653-1-r.obkircher@proxmox.com> References: <20260109173548.301653-1-r.obkircher@proxmox.com> Message-ID: <20260109173548.301653-3-r.obkircher@proxmox.com> Create a dummy chunk store and write fidx files with fixed and dynamically sized writers. Compare the resulting binary files directly (ignoring uuid and ctime) and also read them back using the reader. The chunk hashes are made up and don't actually exist in the chunk store. Signed-off-by: Robert Obkircher --- pbs-datastore/src/fixed_index.rs | 184 +++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) diff --git a/pbs-datastore/src/fixed_index.rs b/pbs-datastore/src/fixed_index.rs index 8036a519..a20edc94 100644 --- a/pbs-datastore/src/fixed_index.rs +++ b/pbs-datastore/src/fixed_index.rs @@ -530,3 +530,187 @@ impl FixedIndexWriter { Ok(()) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::chunk_store::ChunkStore; + use pbs_api_types::DatastoreFSyncLevel; + use std::fs; + use std::sync::Arc; + + const CS: usize = 4096; + + #[test] + fn test_fixed_index_writer() { + let mut testdir = fs::canonicalize(".").unwrap(); + testdir.push(".testdir_fixed_index"); + + if let Err(_e) = std::fs::remove_dir_all(&testdir) { /* ignore */ } + + // this is a lot faster than ChunkStore::create, which takes almost 3 seconds + let store = { + let mut chunks = PathBuf::from(&testdir); + chunks.push(".chunks"); + fs::create_dir_all(chunks).unwrap(); + Arc::new(ChunkStore::open("test", &testdir, DatastoreFSyncLevel::None).unwrap()) + }; + + test_empty(store.clone()); + test_smaller_than_initial_capacity(store.clone()); + test_grow_to_multiples_of_chunk_size(store.clone()); + test_grow_to_fixed_size(store); + + std::fs::remove_dir_all(&testdir).unwrap(); + } + + fn test_empty(store: Arc) { + let path = Path::new("test_empty"); + let mut w = FixedIndexWriter::create(store, path, None, CS).unwrap(); + + assert_eq!(0, w.index_length(), "returns length, not capacity"); + assert!(w.add_digest(0, &[1u8; 32]).is_err(), "out of bounds"); + assert!(w.close().is_err(), "should refuse to create empty file"); + } + + fn test_smaller_than_initial_capacity(store: Arc) { + let path = Path::new("test_smaller_than_initial_capacity"); + let mut w = FixedIndexWriter::create(store.clone(), path, None, CS).unwrap(); + + let size = CS - 1; + let expected = test_data(size); + w.grow_to_size(size).unwrap(); + expected[0].add_to(&mut w); + + w.close().unwrap(); + drop(w); + + check_with_reader(&store.relative_path(path), size, &expected); + compare_to_known_size_writer(store, path, size, &expected); + } + + fn test_grow_to_multiples_of_chunk_size(store: Arc) { + let path = Path::new("test_grow_to_multiples_of_chunk_size"); + let mut w = FixedIndexWriter::create(store.clone(), path, None, CS).unwrap(); + + let initial = FixedIndexWriter::INITIAL_CAPACITY; + let steps = [1, 2, initial, initial + 1, 5 * initial, 10 * initial + 1]; + let expected = test_data(steps.last().unwrap() * CS); + + let mut begin = 0; + for chunk_count in steps { + let last = &expected[chunk_count - 1]; + w.grow_to_size(last.end).unwrap(); + assert_eq!(last.index + 1, w.index_length()); + assert!(w.add_digest(last.index + 1, &[1u8; 32]).is_err()); + + for c in expected[begin..chunk_count].iter().rev() { + c.add_to(&mut w); + } + begin = chunk_count; + } + w.close().unwrap(); + drop(w); + + let size = expected.len() * CS; + check_with_reader(&store.relative_path(path), size, &expected); + compare_to_known_size_writer(store, path, size, &expected); + } + + fn test_grow_to_fixed_size(store: Arc) { + let path = Path::new("test_grow_to_fixed_size"); + let mut w = FixedIndexWriter::create(store.clone(), path, None, CS).unwrap(); + + let size = (FixedIndexWriter::INITIAL_CAPACITY + 42) * CS - 1; // last is not full + let expected = test_data(size); + + w.grow_to_size(size).unwrap(); + assert!(w.grow_to_size(size + 1).is_err(), "size must be fixed now"); + assert_eq!(expected.len(), w.index_length()); + assert!(w.add_digest(expected.len(), &[1u8; 32]).is_err()); + + for c in expected.iter().rev() { + c.add_to(&mut w); + } + + w.close().unwrap(); + drop(w); + + check_with_reader(&store.relative_path(path), size, &expected); + compare_to_known_size_writer(store, path, size, &expected); + } + + struct TestChunk { + digest: [u8; 32], + index: usize, + size: usize, + end: usize, + } + + impl TestChunk { + fn add_to(&self, w: &mut FixedIndexWriter) { + assert_eq!( + self.index, + w.check_chunk_alignment(self.end, self.size).unwrap() + ); + w.add_digest(self.index, &self.digest).unwrap(); + } + } + + fn test_data(size: usize) -> Vec { + (0..size.div_ceil(CS)) + .map(|index| { + let mut digest = [0u8; 32]; + let i = &(index as u64).to_le_bytes(); + for c in digest.chunks_mut(i.len()) { + c.copy_from_slice(i); + } + let size = if ((index + 1) * CS) <= size { + CS + } else { + size % CS + }; + TestChunk { + digest, + index, + size, + end: index * CS + size, + } + }) + .collect() + } + + fn check_with_reader(path: &Path, size: usize, chunks: &[TestChunk]) { + let reader = FixedIndexReader::open(path).unwrap(); + assert_eq!(size as u64, reader.index_bytes()); + assert_eq!(chunks.len(), reader.index_count()); + for c in chunks { + assert_eq!(&c.digest, reader.index_digest(c.index).unwrap()); + } + } + + fn compare_to_known_size_writer( + store: Arc, + name: &Path, + size: usize, + chunks: &[TestChunk], + ) { + let mut path = PathBuf::from(name); + path.set_extension("reference"); + let mut w = FixedIndexWriter::create(store.clone(), &path, Some(size), CS).unwrap(); + for c in chunks { + c.add_to(&mut w); + } + w.close().unwrap(); + drop(w); + + let mut reference = fs::read(store.relative_path(&path)).unwrap(); + let mut tested = fs::read(store.relative_path(name)).unwrap(); + + // ignore uuid and ctime + reference[8..32].fill(0); + tested[8..32].fill(0); + + assert_eq!(reference, tested); + } +} -- 2.47.3 From r.obkircher at proxmox.com Fri Jan 9 18:35:38 2026 From: r.obkircher at proxmox.com (Robert Obkircher) Date: Fri, 9 Jan 2026 18:35:38 +0100 Subject: [pbs-devel] [PATCH v3 proxmox-backup 3/5] fix #3847: api: backup: make fixed index file size optional In-Reply-To: <20260109173548.301653-1-r.obkircher@proxmox.com> References: <20260109173548.301653-1-r.obkircher@proxmox.com> Message-ID: <20260109173548.301653-4-r.obkircher@proxmox.com> Grow the FixedIndexWriter as necessary and update the duplicate size in BackupEnvironment. Signed-off-by: Robert Obkircher --- src/api2/backup/environment.rs | 8 ++++++-- src/api2/backup/mod.rs | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs index bd9c5211..77361724 100644 --- a/src/api2/backup/environment.rs +++ b/src/api2/backup/environment.rs @@ -349,7 +349,7 @@ impl BackupEnvironment { &self, index: FixedIndexWriter, name: String, - size: usize, + size: Option, chunk_size: u32, incremental: bool, ) -> Result { @@ -365,7 +365,7 @@ impl BackupEnvironment { index, name, chunk_count: 0, - size, + size: size.unwrap_or(0), chunk_size, small_chunk_count: 0, upload_stat: UploadStatistic::new(), @@ -443,7 +443,11 @@ impl BackupEnvironment { } let end = (offset as usize) + (size as usize); + data.index.grow_to_size(end)?; let idx = data.index.check_chunk_alignment(end, size as usize)?; + if end > data.size { + data.size = end; + } data.chunk_count += 1; diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs index 3e6b7a95..c2822c18 100644 --- a/src/api2/backup/mod.rs +++ b/src/api2/backup/mod.rs @@ -456,7 +456,7 @@ pub const API_METHOD_CREATE_FIXED_INDEX: ApiMethod = ApiMethod::new( ("archive-name", false, &BACKUP_ARCHIVE_NAME_SCHEMA), ( "size", - false, + true, &IntegerSchema::new("File size.").minimum(1).schema() ), ( @@ -480,7 +480,7 @@ fn create_fixed_index( let env: &BackupEnvironment = rpcenv.as_ref(); let name = required_string_param(¶m, "archive-name")?.to_owned(); - let size = required_integer_param(¶m, "size")? as usize; + let size = param["size"].as_u64().map(usize::try_from).transpose()?; let reuse_csum = param["reuse-csum"].as_str(); let archive_name = name.clone(); -- 2.47.3 From r.obkircher at proxmox.com Fri Jan 9 18:35:39 2026 From: r.obkircher at proxmox.com (Robert Obkircher) Date: Fri, 9 Jan 2026 18:35:39 +0100 Subject: [pbs-devel] [PATCH v3 proxmox-backup 4/5] fix #3847: client: support fifo pipe inputs for images In-Reply-To: <20260109173548.301653-1-r.obkircher@proxmox.com> References: <20260109173548.301653-1-r.obkircher@proxmox.com> Message-ID: <20260109173548.301653-5-r.obkircher@proxmox.com> Accept fifo files as inputs for image backups. The unknown size is represented in the UploadOptions using a new IndexType enum that now stores an optional total size for the Fixed variant. Signed-off-by: Robert Obkircher --- pbs-client/src/backup_writer.rs | 38 ++++++++++++++++++++++--------- proxmox-backup-client/src/main.rs | 30 +++++++++++++----------- src/server/push.rs | 11 +++++---- 3 files changed, 50 insertions(+), 29 deletions(-) diff --git a/pbs-client/src/backup_writer.rs b/pbs-client/src/backup_writer.rs index dbd177d8..f33f1063 100644 --- a/pbs-client/src/backup_writer.rs +++ b/pbs-client/src/backup_writer.rs @@ -52,7 +52,17 @@ pub struct UploadOptions { pub previous_manifest: Option>, pub compress: bool, pub encrypt: bool, - pub fixed_size: Option, + pub index_type: IndexType, +} + +/// Index type for upload options. +#[derive(Default, Clone)] +pub enum IndexType { + /// Dynamic chunking. + #[default] + Dynamic, + /// Fixed size chunking with optional image file size. + Fixed(Option), } struct ChunkUploadResponse { @@ -292,11 +302,14 @@ impl BackupWriter { options: UploadOptions, ) -> Result { let mut param = json!({ "archive-name": archive_name }); - let prefix = if let Some(size) = options.fixed_size { - param["size"] = size.into(); - "fixed" - } else { - "dynamic" + let prefix = match options.index_type { + IndexType::Fixed(image_file_size) => { + if let Some(size) = image_file_size { + param["size"] = size.into(); + } + "fixed" + } + IndexType::Dynamic => "dynamic", }; if options.encrypt && self.crypt_config.is_none() { @@ -387,11 +400,14 @@ impl BackupWriter { let known_chunks = Arc::new(Mutex::new(HashSet::new())); let mut param = json!({ "archive-name": archive_name }); - let prefix = if let Some(size) = options.fixed_size { - param["size"] = size.into(); - "fixed" - } else { - "dynamic" + let prefix = match options.index_type { + IndexType::Fixed(image_file_size) => { + if let Some(size) = image_file_size { + param["size"] = size.into(); + } + "fixed" + } + IndexType::Dynamic => "dynamic", }; if options.encrypt && self.crypt_config.is_none() { diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/src/main.rs index 999e5020..7fc711fd 100644 --- a/proxmox-backup-client/src/main.rs +++ b/proxmox-backup-client/src/main.rs @@ -46,7 +46,7 @@ use pbs_client::tools::{ use pbs_client::{ delete_ticket_info, parse_backup_specification, view_task_result, BackupDetectionMode, BackupReader, BackupRepository, BackupSpecificationType, BackupStats, BackupWriter, - BackupWriterOptions, ChunkStream, FixedChunkStream, HttpClient, InjectionData, + BackupWriterOptions, ChunkStream, FixedChunkStream, HttpClient, IndexType, InjectionData, PxarBackupStream, RemoteChunkReader, UploadOptions, BACKUP_SOURCE_SCHEMA, }; use pbs_datastore::catalog::{BackupCatalogWriter, CatalogReader, CatalogWriter}; @@ -205,7 +205,7 @@ async fn backup_directory>( pxar_create_options: pbs_client::pxar::PxarCreateOptions, upload_options: UploadOptions, ) -> Result<(BackupStats, Option), Error> { - if upload_options.fixed_size.is_some() { + if let IndexType::Fixed(_) = upload_options.index_type { bail!("cannot backup directory with fixed chunk size!"); } @@ -295,7 +295,7 @@ async fn backup_image>( let stream = FixedChunkStream::new(stream, chunk_size.unwrap_or(4 * 1024 * 1024)); - if upload_options.fixed_size.is_none() { + if let IndexType::Dynamic = upload_options.index_type { bail!("cannot backup image with dynamic chunk size!"); } @@ -859,15 +859,17 @@ async fn create_backup( upload_list.push((BackupSpecificationType::PXAR, filename, target, "didx", 0)); } BackupSpecificationType::IMAGE => { - if !(file_type.is_file() || file_type.is_block_device()) { - bail!("got unexpected file type (expected file or block device)"); - } - - let size = image_size(&PathBuf::from(&filename))?; - - if size == 0 { - bail!("got zero-sized file '{}'", filename); - } + let size = if file_type.is_file() || file_type.is_block_device() { + let size = image_size(&PathBuf::from(&filename))?; + if size == 0 { + bail!("got zero-sized file '{}'", filename); + } + size + } else if file_type.is_fifo() { + 0 + } else { + bail!("got unexpected file type (expected file, block device, or fifo"); + }; upload_list.push(( BackupSpecificationType::IMAGE, @@ -1191,9 +1193,11 @@ async fn create_backup( (BackupSpecificationType::IMAGE, false) => { log_file("image", &filename, target.as_ref()); + // 0 means fifo pipe with unknown size + let image_file_size = (size != 0).then_some(size); let upload_options = UploadOptions { previous_manifest: previous_manifest.clone(), - fixed_size: Some(size), + index_type: IndexType::Fixed(image_file_size), compress: true, encrypt: crypto.mode == CryptMode::Encrypt, }; diff --git a/src/server/push.rs b/src/server/push.rs index d7884fce..b1b41297 100644 --- a/src/server/push.rs +++ b/src/server/push.rs @@ -17,7 +17,8 @@ use pbs_api_types::{ PRIV_REMOTE_DATASTORE_MODIFY, PRIV_REMOTE_DATASTORE_PRUNE, }; use pbs_client::{ - BackupRepository, BackupWriter, BackupWriterOptions, HttpClient, MergedChunkInfo, UploadOptions, + BackupRepository, BackupWriter, BackupWriterOptions, HttpClient, IndexType, MergedChunkInfo, + UploadOptions, }; use pbs_config::CachedUserInfo; use pbs_datastore::data_blob::ChunkInfo; @@ -917,7 +918,7 @@ pub(crate) async fn push_snapshot( index, chunk_reader, &backup_writer, - None, + IndexType::Dynamic, known_chunks.clone(), ) .await?; @@ -944,7 +945,7 @@ pub(crate) async fn push_snapshot( index, chunk_reader, &backup_writer, - Some(size), + IndexType::Fixed(Some(size)), known_chunks.clone(), ) .await?; @@ -1002,7 +1003,7 @@ async fn push_index( index: impl IndexFile + Send + 'static, chunk_reader: Arc, backup_writer: &BackupWriter, - size: Option, + index_type: IndexType, known_chunks: Arc>>, ) -> Result { let (upload_channel_tx, upload_channel_rx) = mpsc::channel(20); @@ -1048,7 +1049,7 @@ async fn push_index( let upload_options = UploadOptions { compress: true, encrypt: false, - fixed_size: size, + index_type, ..UploadOptions::default() }; -- 2.47.3 From r.obkircher at proxmox.com Fri Jan 9 18:35:40 2026 From: r.obkircher at proxmox.com (Robert Obkircher) Date: Fri, 9 Jan 2026 18:35:40 +0100 Subject: [pbs-devel] [PATCH v3 proxmox-backup 5/5] fix #3847: client: treat minus sign as stdin In-Reply-To: <20260109173548.301653-1-r.obkircher@proxmox.com> References: <20260109173548.301653-1-r.obkircher@proxmox.com> Message-ID: <20260109173548.301653-6-r.obkircher@proxmox.com> Treat "-" as an alias for "/dev/stdin". If there is an actual file with that name it can still be read via "./-". Signed-off-by: Robert Obkircher --- proxmox-backup-client/src/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/src/main.rs index 7fc711fd..37878b01 100644 --- a/proxmox-backup-client/src/main.rs +++ b/proxmox-backup-client/src/main.rs @@ -845,6 +845,13 @@ async fn create_backup( } target_set.insert(target.clone()); + // one can still use ./- to refer to an actual file with that name + let filename = if filename == "-" { + String::from("/dev/stdin") + } else { + filename + }; + use std::os::unix::fs::FileTypeExt; let metadata = std::fs::metadata(&filename) -- 2.47.3