[pbs-devel] [PATCH v4 proxmox 01/31] client: backup writer: refactor backup and upload stats counters
Christian Ebner
c.ebner at proxmox.com
Thu Oct 17 15:26:46 CEST 2024
In preparation for push support in sync jobs.
Extend and move `BackupStats` into `backup_stats` submodule and add
method to create them from `UploadStats`.
Further, introduce `UploadCounters` struct to hold the Arc clones of
the chunk upload statistics counters, simplifying the house keeping.
By bundling the counters into the struct, they can be passed as
single function parameter when factoring out the common stream future
in the subsequent implementation of the chunk upload for sync jobs
in push direction.
Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 3:
- not present in previous version
pbs-client/src/backup_stats.rs | 130 ++++++++++++++++++++++++++++++++
pbs-client/src/backup_writer.rs | 104 ++++++++-----------------
pbs-client/src/lib.rs | 3 +
3 files changed, 165 insertions(+), 72 deletions(-)
create mode 100644 pbs-client/src/backup_stats.rs
diff --git a/pbs-client/src/backup_stats.rs b/pbs-client/src/backup_stats.rs
new file mode 100644
index 000000000..7aa618667
--- /dev/null
+++ b/pbs-client/src/backup_stats.rs
@@ -0,0 +1,130 @@
+//! Implements counters to generate statistics for log outputs during uploads with backup writer
+
+use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
+use std::sync::Arc;
+use std::time::Duration;
+
+/// Basic backup run statistics and archive checksum
+pub struct BackupStats {
+ pub size: u64,
+ pub csum: [u8; 32],
+ pub duration: Duration,
+ pub chunk_count: u64,
+}
+
+/// Extended backup run statistics and archive checksum
+pub(crate) struct UploadStats {
+ pub(crate) chunk_count: usize,
+ pub(crate) chunk_reused: usize,
+ pub(crate) chunk_injected: usize,
+ pub(crate) size: usize,
+ pub(crate) size_reused: usize,
+ pub(crate) size_injected: usize,
+ pub(crate) size_compressed: usize,
+ pub(crate) duration: Duration,
+ pub(crate) csum: [u8; 32],
+}
+
+impl UploadStats {
+ /// Convert the upload stats to the more concise [`BackupStats`]
+ #[inline(always)]
+ pub(crate) fn to_backup_stats(&self) -> BackupStats {
+ BackupStats {
+ chunk_count: self.chunk_count as u64,
+ size: self.size as u64,
+ duration: self.duration,
+ csum: self.csum,
+ }
+ }
+}
+
+/// Atomic counters for accounting upload stream progress information
+#[derive(Clone)]
+pub(crate) struct UploadCounters {
+ injected_chunk_count: Arc<AtomicUsize>,
+ known_chunk_count: Arc<AtomicUsize>,
+ total_chunk_count: Arc<AtomicUsize>,
+ compressed_stream_len: Arc<AtomicU64>,
+ injected_stream_len: Arc<AtomicUsize>,
+ reused_stream_len: Arc<AtomicUsize>,
+ total_stream_len: Arc<AtomicUsize>,
+}
+
+impl UploadCounters {
+ /// Create and zero init new upload counters
+ pub(crate) fn new() -> Self {
+ Self {
+ total_chunk_count: Arc::new(AtomicUsize::new(0)),
+ injected_chunk_count: Arc::new(AtomicUsize::new(0)),
+ known_chunk_count: Arc::new(AtomicUsize::new(0)),
+ compressed_stream_len: Arc::new(AtomicU64::new(0)),
+ injected_stream_len: Arc::new(AtomicUsize::new(0)),
+ reused_stream_len: Arc::new(AtomicUsize::new(0)),
+ total_stream_len: Arc::new(AtomicUsize::new(0)),
+ }
+ }
+
+ /// Increment total chunk counter by `count`, returns previous value
+ #[inline(always)]
+ pub(crate) fn inc_total_chunks(&mut self, count: usize) -> usize {
+ self.total_chunk_count.fetch_add(count, Ordering::SeqCst)
+ }
+
+ /// Increment known chunk counter by `count`, returns previous value
+ #[inline(always)]
+ pub(crate) fn inc_known_chunks(&mut self, count: usize) -> usize {
+ self.known_chunk_count.fetch_add(count, Ordering::SeqCst)
+ }
+
+ /// Increment injected chunk counter by `count`, returns previous value
+ #[inline(always)]
+ pub(crate) fn inc_injected_chunks(&mut self, count: usize) -> usize {
+ self.injected_chunk_count.fetch_add(count, Ordering::SeqCst)
+ }
+
+ /// Increment stream length counter by given size, returns previous value
+ #[inline(always)]
+ pub(crate) fn inc_total_stream_len(&mut self, size: usize) -> usize {
+ self.total_stream_len.fetch_add(size, Ordering::SeqCst)
+ }
+
+ /// Increment reused length counter by given size, returns previous value
+ #[inline(always)]
+ pub(crate) fn inc_reused_stream_len(&mut self, size: usize) -> usize {
+ self.reused_stream_len.fetch_add(size, Ordering::SeqCst)
+ }
+
+ /// Increment compressed length counter by given size, returns previous value
+ #[inline(always)]
+ pub(crate) fn inc_compressed_stream_len(&mut self, size: u64) -> u64 {
+ self.compressed_stream_len.fetch_add(size, Ordering::SeqCst)
+ }
+
+ /// Increment stream length counter by given size, returns previous value
+ #[inline(always)]
+ pub(crate) fn inc_injected_stream_len(&mut self, size: usize) -> usize {
+ self.injected_stream_len.fetch_add(size, Ordering::SeqCst)
+ }
+
+ /// Return a Arc clone to the total stream length counter
+ #[inline(always)]
+ pub(crate) fn total_stream_len_counter(&self) -> Arc<AtomicUsize> {
+ self.total_stream_len.clone()
+ }
+
+ /// Convert the counters to [`UploadStats`], including given archive checksum and runtime.
+ #[inline(always)]
+ pub(crate) fn to_upload_stats(&self, csum: [u8; 32], duration: Duration) -> UploadStats {
+ UploadStats {
+ chunk_count: self.total_chunk_count.load(Ordering::SeqCst),
+ chunk_reused: self.known_chunk_count.load(Ordering::SeqCst),
+ chunk_injected: self.injected_chunk_count.load(Ordering::SeqCst),
+ size: self.total_stream_len.load(Ordering::SeqCst),
+ size_reused: self.reused_stream_len.load(Ordering::SeqCst),
+ size_injected: self.injected_stream_len.load(Ordering::SeqCst),
+ size_compressed: self.compressed_stream_len.load(Ordering::SeqCst) as usize,
+ duration,
+ csum,
+ }
+ }
+}
diff --git a/pbs-client/src/backup_writer.rs b/pbs-client/src/backup_writer.rs
index d63c09b5a..5ccfcc9b3 100644
--- a/pbs-client/src/backup_writer.rs
+++ b/pbs-client/src/backup_writer.rs
@@ -1,7 +1,7 @@
use std::collections::HashSet;
use std::future::Future;
-use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
+use std::time::Instant;
use anyhow::{bail, format_err, Error};
use futures::future::{self, AbortHandle, Either, FutureExt, TryFutureExt};
@@ -22,6 +22,7 @@ use pbs_tools::crypt_config::CryptConfig;
use proxmox_human_byte::HumanByte;
+use super::backup_stats::{BackupStats, UploadCounters, UploadStats};
use super::inject_reused_chunks::{InjectChunks, InjectReusedChunks, InjectedChunksInfo};
use super::merge_known_chunks::{MergeKnownChunks, MergedChunkInfo};
@@ -39,11 +40,6 @@ impl Drop for BackupWriter {
}
}
-pub struct BackupStats {
- pub size: u64,
- pub csum: [u8; 32],
-}
-
/// Options for uploading blobs/streams to the server
#[derive(Default, Clone)]
pub struct UploadOptions {
@@ -53,18 +49,6 @@ pub struct UploadOptions {
pub fixed_size: Option<u64>,
}
-struct UploadStats {
- chunk_count: usize,
- chunk_reused: usize,
- chunk_injected: usize,
- size: usize,
- size_reused: usize,
- size_injected: usize,
- size_compressed: usize,
- duration: std::time::Duration,
- csum: [u8; 32],
-}
-
type UploadQueueSender = mpsc::Sender<(MergedChunkInfo, Option<h2::client::ResponseFuture>)>;
type UploadResultReceiver = oneshot::Receiver<Result<(), Error>>;
@@ -188,6 +172,7 @@ impl BackupWriter {
mut reader: R,
file_name: &str,
) -> Result<BackupStats, Error> {
+ let start_time = Instant::now();
let mut raw_data = Vec::new();
// fixme: avoid loading into memory
reader.read_to_end(&mut raw_data)?;
@@ -205,7 +190,12 @@ impl BackupWriter {
raw_data,
)
.await?;
- Ok(BackupStats { size, csum })
+ Ok(BackupStats {
+ size,
+ csum,
+ duration: start_time.elapsed(),
+ chunk_count: 0,
+ })
}
pub async fn upload_blob_from_data(
@@ -214,6 +204,7 @@ impl BackupWriter {
file_name: &str,
options: UploadOptions,
) -> Result<BackupStats, Error> {
+ let start_time = Instant::now();
let blob = match (options.encrypt, &self.crypt_config) {
(false, _) => DataBlob::encode(&data, None, options.compress)?,
(true, None) => bail!("requested encryption without a crypt config"),
@@ -237,7 +228,12 @@ impl BackupWriter {
raw_data,
)
.await?;
- Ok(BackupStats { size, csum })
+ Ok(BackupStats {
+ size,
+ csum,
+ duration: start_time.elapsed(),
+ chunk_count: 0,
+ })
}
pub async fn upload_blob_from_file<P: AsRef<std::path::Path>>(
@@ -413,10 +409,7 @@ impl BackupWriter {
"csum": hex::encode(upload_stats.csum),
});
let _value = self.h2.post(&close_path, Some(param)).await?;
- Ok(BackupStats {
- size: upload_stats.size as u64,
- csum: upload_stats.csum,
- })
+ Ok(upload_stats.to_backup_stats())
}
fn response_queue() -> (
@@ -637,21 +630,8 @@ impl BackupWriter {
compress: bool,
injections: Option<std::sync::mpsc::Receiver<InjectChunks>>,
) -> impl Future<Output = Result<UploadStats, Error>> {
- let total_chunks = Arc::new(AtomicUsize::new(0));
- let total_chunks2 = total_chunks.clone();
- let known_chunk_count = Arc::new(AtomicUsize::new(0));
- let known_chunk_count2 = known_chunk_count.clone();
- let injected_chunk_count = Arc::new(AtomicUsize::new(0));
- let injected_chunk_count2 = injected_chunk_count.clone();
-
- let stream_len = Arc::new(AtomicUsize::new(0));
- let stream_len2 = stream_len.clone();
- let compressed_stream_len = Arc::new(AtomicU64::new(0));
- let compressed_stream_len2 = compressed_stream_len.clone();
- let reused_len = Arc::new(AtomicUsize::new(0));
- let reused_len2 = reused_len.clone();
- let injected_len = Arc::new(AtomicUsize::new(0));
- let injected_len2 = injected_len.clone();
+ let mut counters = UploadCounters::new();
+ let counters_readonly = counters.clone();
let append_chunk_path = format!("{}_index", prefix);
let upload_chunk_path = format!("{}_chunk", prefix);
@@ -666,22 +646,21 @@ impl BackupWriter {
let index_csum_2 = index_csum.clone();
stream
- .inject_reused_chunks(injections, stream_len.clone())
+ .inject_reused_chunks(injections, counters.total_stream_len_counter())
.and_then(move |chunk_info| match chunk_info {
InjectedChunksInfo::Known(chunks) => {
// account for injected chunks
let count = chunks.len();
- total_chunks.fetch_add(count, Ordering::SeqCst);
- injected_chunk_count.fetch_add(count, Ordering::SeqCst);
+ counters.inc_total_chunks(count);
+ counters.inc_injected_chunks(count);
let mut known = Vec::new();
let mut guard = index_csum.lock().unwrap();
let csum = guard.as_mut().unwrap();
for chunk in chunks {
- let offset =
- stream_len.fetch_add(chunk.size() as usize, Ordering::SeqCst) as u64;
- reused_len.fetch_add(chunk.size() as usize, Ordering::SeqCst);
- injected_len.fetch_add(chunk.size() as usize, Ordering::SeqCst);
+ let offset = counters.inc_total_stream_len(chunk.size() as usize) as u64;
+ counters.inc_reused_stream_len(chunk.size() as usize);
+ counters.inc_injected_stream_len(chunk.size() as usize);
let digest = chunk.digest();
known.push((offset, digest));
let end_offset = offset + chunk.size();
@@ -694,8 +673,8 @@ impl BackupWriter {
// account for not injected chunks (new and known)
let chunk_len = data.len();
- total_chunks.fetch_add(1, Ordering::SeqCst);
- let offset = stream_len.fetch_add(chunk_len, Ordering::SeqCst) as u64;
+ counters.inc_total_chunks(1);
+ let offset = counters.inc_total_stream_len(chunk_len) as u64;
let mut chunk_builder = DataChunkBuilder::new(data.as_ref()).compress(compress);
@@ -718,14 +697,14 @@ impl BackupWriter {
let chunk_is_known = known_chunks.contains(digest);
if chunk_is_known {
- known_chunk_count.fetch_add(1, Ordering::SeqCst);
- reused_len.fetch_add(chunk_len, Ordering::SeqCst);
+ counters.inc_known_chunks(1);
+ counters.inc_reused_stream_len(chunk_len);
future::ok(MergedChunkInfo::Known(vec![(offset, *digest)]))
} else {
- let compressed_stream_len2 = compressed_stream_len.clone();
+ let mut counters = counters.clone();
known_chunks.insert(*digest);
future::ready(chunk_builder.build().map(move |(chunk, digest)| {
- compressed_stream_len2.fetch_add(chunk.raw_size(), Ordering::SeqCst);
+ counters.inc_compressed_stream_len(chunk.raw_size());
MergedChunkInfo::New(ChunkInfo {
chunk,
digest,
@@ -794,29 +773,10 @@ impl BackupWriter {
})
.then(move |result| async move { upload_result.await?.and(result) }.boxed())
.and_then(move |_| {
- let duration = start_time.elapsed();
- let chunk_count = total_chunks2.load(Ordering::SeqCst);
- let chunk_reused = known_chunk_count2.load(Ordering::SeqCst);
- let chunk_injected = injected_chunk_count2.load(Ordering::SeqCst);
- let size = stream_len2.load(Ordering::SeqCst);
- let size_reused = reused_len2.load(Ordering::SeqCst);
- let size_injected = injected_len2.load(Ordering::SeqCst);
- let size_compressed = compressed_stream_len2.load(Ordering::SeqCst) as usize;
-
let mut guard = index_csum_2.lock().unwrap();
let csum = guard.take().unwrap().finish();
- futures::future::ok(UploadStats {
- chunk_count,
- chunk_reused,
- chunk_injected,
- size,
- size_reused,
- size_injected,
- size_compressed,
- duration,
- csum,
- })
+ futures::future::ok(counters_readonly.to_upload_stats(csum, start_time.elapsed()))
})
}
diff --git a/pbs-client/src/lib.rs b/pbs-client/src/lib.rs
index 3d2da27b9..b875347bb 100644
--- a/pbs-client/src/lib.rs
+++ b/pbs-client/src/lib.rs
@@ -41,4 +41,7 @@ pub use backup_specification::*;
mod chunk_stream;
pub use chunk_stream::{ChunkStream, FixedChunkStream, InjectionData};
+mod backup_stats;
+pub use backup_stats::BackupStats;
+
pub const PROXMOX_BACKUP_TCP_KEEPALIVE_TIME: u32 = 120;
--
2.39.5
More information about the pbs-devel
mailing list