[pbs-devel] [PATCH proxmox-backup] client/backup_writer: clarify backup and upload size
Thomas Lamprecht
t.lamprecht at proxmox.com
Wed Mar 24 15:39:04 CET 2021
On 24.03.21 11:59, Dominik Csapak wrote:
> The text 'had to upload [KMG]iB' implies that this is the size we
> actually had to send to the server, while in reality it is the
> raw data size before compression.
>
> Count the size of the compressed chunks and print it separately.
> Split the average speed into its own line so they do not get too long.
>
looks Ok code-wise, so I can only complain about a few naming and a format nits
and that we now def. need to fix that mess of a return signature tuple monster
Fabian also noted off-list.
> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
> ---
> src/client/backup_writer.rs | 35 ++++++++++++++++++++++-------------
> 1 file changed, 22 insertions(+), 13 deletions(-)
>
> diff --git a/src/client/backup_writer.rs b/src/client/backup_writer.rs
> index cef7edef..1be66707 100644
> --- a/src/client/backup_writer.rs
> +++ b/src/client/backup_writer.rs
> @@ -1,6 +1,6 @@
> use std::collections::HashSet;
> use std::os::unix::fs::OpenOptionsExt;
> -use std::sync::atomic::{AtomicUsize, Ordering};
> +use std::sync::atomic::{AtomicUsize, AtomicU64, Ordering};
> use std::sync::{Arc, Mutex};
>
> use anyhow::{bail, format_err, Error};
> @@ -256,7 +256,7 @@ impl BackupWriter {
>
> let wid = self.h2.post(&index_path, Some(param)).await?.as_u64().unwrap();
>
> - let (chunk_count, chunk_reused, size, size_reused, duration, csum) =
> + let (chunk_count, chunk_reused, size, size_reused, uploaded, duration, csum) =
why not compressed_size ? it may not necessarily be the total upload amount (protocol
and tls overhead, ...?)
> Self::upload_chunk_info_stream(
> self.h2.clone(),
> wid,
> @@ -269,7 +269,7 @@ impl BackupWriter {
> )
> .await?;
>
> - let uploaded = size - size_reused;
> + let saved = size - size_reused;
hmm, that name change is not really improves understanding for me, `saved` is rather
general and a bit confusing (as we effectively saved all of `size`, not only this
amount). Maybe `dirty_size`?
> let vsize_h: HumanByte = size.into();
> let archive = if self.verbose {
> archive_name.to_string()
> @@ -277,9 +277,11 @@ impl BackupWriter {
> crate::tools::format::strip_server_file_extension(archive_name)
> };
> if archive_name != CATALOG_NAME {
> - let speed: HumanByte = ((uploaded * 1_000_000) / (duration.as_micros() as usize)).into();
> + let speed: HumanByte = ((saved * 1_000_000) / (duration.as_micros() as usize)).into();
> + let saved: HumanByte = saved.into();
> let uploaded: HumanByte = uploaded.into();
> - println!("{}: had to upload {} of {} in {:.2}s, average speed {}/s).", archive, uploaded, vsize_h, duration.as_secs_f64(), speed);
> + println!("{}: had to backup {} of {} (compressed {}) in {:.2}s", archive, saved, vsize_h, uploaded, duration.as_secs_f64());
> + println!("Average backup speed: {}.", speed);
we normally put the archive in front for archive-specific messages, and
there's no point at the end of the sentence in surrounding prints.
> } else {
> println!("Uploaded backup catalog ({})", vsize_h);
> }
> @@ -521,7 +523,7 @@ impl BackupWriter {
> crypt_config: Option<Arc<CryptConfig>>,
> compress: bool,
> verbose: bool,
> - ) -> impl Future<Output = Result<(usize, usize, usize, usize, std::time::Duration, [u8; 32]), Error>> {
> + ) -> impl Future<Output = Result<(usize, usize, usize, usize, usize, std::time::Duration, [u8; 32]), Error>> {
a WriterStats (just first name popping into my head) struct would be really
nicer here.
>
> let total_chunks = Arc::new(AtomicUsize::new(0));
> let total_chunks2 = total_chunks.clone();
> @@ -530,6 +532,8 @@ impl BackupWriter {
>
> let stream_len = Arc::new(AtomicUsize::new(0));
> let stream_len2 = stream_len.clone();
> + let compressed_stream_len = Arc::new(AtomicU64::new(0));
> + let compressed_stream_len2 = compressed_stream_len.clone();
> let reused_len = Arc::new(AtomicUsize::new(0));
> let reused_len2 = reused_len.clone();
>
> @@ -572,6 +576,7 @@ impl BackupWriter {
> csum.update(digest);
>
> let chunk_is_known = known_chunks.contains(digest);
> + let compressed_stream_len2 = compressed_stream_len.clone();
> if chunk_is_known {
> known_chunk_count.fetch_add(1, Ordering::SeqCst);
> reused_len.fetch_add(chunk_len, Ordering::SeqCst);
> @@ -580,12 +585,15 @@ impl BackupWriter {
> known_chunks.insert(*digest);
> future::ready(chunk_builder
> .build()
> - .map(move |(chunk, digest)| MergedChunkInfo::New(ChunkInfo {
> - chunk,
> - digest,
> - chunk_len: chunk_len as u64,
> - offset,
> - }))
> + .map(move |(chunk, digest)| {
> + compressed_stream_len2.fetch_add(chunk.raw_size(), Ordering::SeqCst);
> + MergedChunkInfo::New(ChunkInfo {
> + chunk,
> + digest,
> + chunk_len: chunk_len as u64,
> + offset,
> + })
> + })
> )
> }
> })
> @@ -645,12 +653,13 @@ impl BackupWriter {
> let total_chunks = total_chunks2.load(Ordering::SeqCst);
> let known_chunk_count = known_chunk_count2.load(Ordering::SeqCst);
> let stream_len = stream_len2.load(Ordering::SeqCst);
> + let upload_len = compressed_stream_len2.load(Ordering::SeqCst) as usize;
> let reused_len = reused_len2.load(Ordering::SeqCst);
>
> let mut guard = index_csum_2.lock().unwrap();
> let csum = guard.take().unwrap().finish();
>
> - futures::future::ok((total_chunks, known_chunk_count, stream_len, reused_len, duration, csum))
> + futures::future::ok((total_chunks, known_chunk_count, stream_len, reused_len, upload_len, duration, csum))
> })
> }
>
>
More information about the pbs-devel
mailing list