[pbs-devel] applied: [PATCH proxmox-backup] Revert "fix #5710: api: backup: stat known chunks on backup finish"
Fabian Grünbichler
f.gruenbichler at proxmox.com
Mon Jan 13 11:36:52 CET 2025
we should probably spec out some potential replacement approaches?
On December 12, 2024 8:52 am, Christian Ebner wrote:
> Commit da11d226 ("fix #5710: api: backup: stat known chunks on backup
> finish") introduced a seemingly cheap server side check to verify
> existence of known chunks in the chunk store by stating. This check
> however does not scale for large backup snapshots which might contain
> millions of known chunks, as reported in the community forum [0].
> Revert the changes for now instead of making this opt-in/opt-out, a
> more general approach has to be thought out to mark backup snapshots
> which fail verification.
>
> Link to the report in the forum:
> [0] https://forum.proxmox.com/threads/158812/
>
> Fixes: da11d226 ("fix #5710: api: backup: stat known chunks on backup finish")
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> src/api2/backup/environment.rs | 54 +++++-----------------------------
> src/api2/backup/mod.rs | 22 +-------------
> 2 files changed, 8 insertions(+), 68 deletions(-)
>
> diff --git a/src/api2/backup/environment.rs b/src/api2/backup/environment.rs
> index 19624fae3..99d885e2e 100644
> --- a/src/api2/backup/environment.rs
> +++ b/src/api2/backup/environment.rs
> @@ -1,4 +1,4 @@
> -use anyhow::{bail, format_err, Context, Error};
> +use anyhow::{bail, format_err, Error};
> use nix::dir::Dir;
> use std::collections::HashMap;
> use std::sync::{Arc, Mutex};
> @@ -72,14 +72,8 @@ struct FixedWriterState {
> incremental: bool,
> }
>
> -#[derive(Copy, Clone)]
> -struct KnownChunkInfo {
> - uploaded: bool,
> - length: u32,
> -}
> -
> -// key=digest, value=KnownChunkInfo
> -type KnownChunksMap = HashMap<[u8; 32], KnownChunkInfo>;
> +// key=digest, value=length
> +type KnownChunksMap = HashMap<[u8; 32], u32>;
>
> struct SharedBackupState {
> finished: bool,
> @@ -165,13 +159,7 @@ impl BackupEnvironment {
>
> state.ensure_unfinished()?;
>
> - state.known_chunks.insert(
> - digest,
> - KnownChunkInfo {
> - uploaded: false,
> - length,
> - },
> - );
> + state.known_chunks.insert(digest, length);
>
> Ok(())
> }
> @@ -225,13 +213,7 @@ impl BackupEnvironment {
> }
>
> // register chunk
> - state.known_chunks.insert(
> - digest,
> - KnownChunkInfo {
> - uploaded: true,
> - length: size,
> - },
> - );
> + state.known_chunks.insert(digest, size);
>
> Ok(())
> }
> @@ -266,13 +248,7 @@ impl BackupEnvironment {
> }
>
> // register chunk
> - state.known_chunks.insert(
> - digest,
> - KnownChunkInfo {
> - uploaded: true,
> - length: size,
> - },
> - );
> + state.known_chunks.insert(digest, size);
>
> Ok(())
> }
> @@ -280,23 +256,7 @@ impl BackupEnvironment {
> pub fn lookup_chunk(&self, digest: &[u8; 32]) -> Option<u32> {
> let state = self.state.lock().unwrap();
>
> - state
> - .known_chunks
> - .get(digest)
> - .map(|known_chunk_info| known_chunk_info.length)
> - }
> -
> - /// stat known chunks from previous backup, so excluding newly uploaded ones
> - pub fn stat_prev_known_chunks(&self) -> Result<(), Error> {
> - let state = self.state.lock().unwrap();
> - for (digest, known_chunk_info) in &state.known_chunks {
> - if !known_chunk_info.uploaded {
> - self.datastore
> - .stat_chunk(digest)
> - .with_context(|| format!("stat failed on {}", hex::encode(digest)))?;
> - }
> - }
> - Ok(())
> + state.known_chunks.get(digest).copied()
> }
>
> /// Store the writer with an unique ID
> diff --git a/src/api2/backup/mod.rs b/src/api2/backup/mod.rs
> index 31334b59c..0373d135b 100644
> --- a/src/api2/backup/mod.rs
> +++ b/src/api2/backup/mod.rs
> @@ -1,6 +1,6 @@
> //! Backup protocol (HTTP2 upgrade)
>
> -use anyhow::{bail, format_err, Context, Error};
> +use anyhow::{bail, format_err, Error};
> use futures::*;
> use hex::FromHex;
> use hyper::header::{HeaderValue, CONNECTION, UPGRADE};
> @@ -788,26 +788,6 @@ fn finish_backup(
> ) -> Result<Value, Error> {
> let env: &BackupEnvironment = rpcenv.as_ref();
>
> - if let Err(err) = env.stat_prev_known_chunks() {
> - env.debug(format!("stat registered chunks failed - {err:?}"));
> -
> - if let Some(last) = env.last_backup.as_ref() {
> - // No need to acquire snapshot lock, already locked when starting the backup
> - let verify_state = SnapshotVerifyState {
> - state: VerifyState::Failed,
> - upid: env.worker.upid().clone(), // backup writer UPID
> - };
> - let verify_state = serde_json::to_value(verify_state)?;
> - last.backup_dir
> - .update_manifest(|manifest| {
> - manifest.unprotected["verify_state"] = verify_state;
> - })
> - .with_context(|| "manifest update failed")?;
> - }
> -
> - bail!("stat known chunks failed - {err:?}");
> - }
> -
> env.finish_backup()?;
> env.log("successfully finished backup");
>
> --
> 2.39.5
>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
>
More information about the pbs-devel
mailing list