[pbs-devel] [PATCH proxmox-backup] client/backup_writer: clarify backup and upload size

Thomas Lamprecht t.lamprecht at proxmox.com
Wed Mar 24 15:39:04 CET 2021


On 24.03.21 11:59, Dominik Csapak wrote:
> The text 'had to upload [KMG]iB' implies that this is the size we
> actually had to send to the server, while in reality it is the
> raw data size before compression.
> 
> Count the size of the compressed chunks and print it separately.
> Split the average speed into its own line so they do not get too long.
> 

looks Ok code-wise, so I can only complain about a few naming and a format nits
and that we now def. need to fix that mess of a return signature tuple monster
Fabian also noted off-list.

> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
> ---
>  src/client/backup_writer.rs | 35 ++++++++++++++++++++++-------------
>  1 file changed, 22 insertions(+), 13 deletions(-)
> 
> diff --git a/src/client/backup_writer.rs b/src/client/backup_writer.rs
> index cef7edef..1be66707 100644
> --- a/src/client/backup_writer.rs
> +++ b/src/client/backup_writer.rs
> @@ -1,6 +1,6 @@
>  use std::collections::HashSet;
>  use std::os::unix::fs::OpenOptionsExt;
> -use std::sync::atomic::{AtomicUsize, Ordering};
> +use std::sync::atomic::{AtomicUsize, AtomicU64, Ordering};
>  use std::sync::{Arc, Mutex};
>  
>  use anyhow::{bail, format_err, Error};
> @@ -256,7 +256,7 @@ impl BackupWriter {
>  
>          let wid = self.h2.post(&index_path, Some(param)).await?.as_u64().unwrap();
>  
> -        let (chunk_count, chunk_reused, size, size_reused, duration, csum) =
> +        let (chunk_count, chunk_reused, size, size_reused, uploaded, duration, csum) =

why not compressed_size ? it may not necessarily be the total upload amount (protocol
and tls overhead, ...?)

>              Self::upload_chunk_info_stream(
>                  self.h2.clone(),
>                  wid,
> @@ -269,7 +269,7 @@ impl BackupWriter {
>              )
>              .await?;
>  
> -        let uploaded = size - size_reused;
> +        let saved = size - size_reused;

hmm, that name change is not really improves understanding for me, `saved` is rather
general and a bit confusing (as we effectively saved all of `size`, not only this
amount). Maybe `dirty_size`?

>          let vsize_h: HumanByte = size.into();
>          let archive = if self.verbose {
>              archive_name.to_string()
> @@ -277,9 +277,11 @@ impl BackupWriter {
>              crate::tools::format::strip_server_file_extension(archive_name)
>          };
>          if archive_name != CATALOG_NAME {
> -            let speed: HumanByte = ((uploaded * 1_000_000) / (duration.as_micros() as usize)).into();
> +            let speed: HumanByte = ((saved * 1_000_000) / (duration.as_micros() as usize)).into();
> +            let saved: HumanByte = saved.into();
>              let uploaded: HumanByte = uploaded.into();
> -            println!("{}: had to upload {} of {} in {:.2}s, average speed {}/s).", archive, uploaded, vsize_h, duration.as_secs_f64(), speed);
> +            println!("{}: had to backup {} of {} (compressed {}) in {:.2}s", archive, saved, vsize_h, uploaded, duration.as_secs_f64());
> +            println!("Average backup speed: {}.", speed);

we normally put the archive in front for archive-specific messages, and
there's no point at the end of the sentence in surrounding prints.

>          } else {
>              println!("Uploaded backup catalog ({})", vsize_h);
>          }
> @@ -521,7 +523,7 @@ impl BackupWriter {
>          crypt_config: Option<Arc<CryptConfig>>,
>          compress: bool,
>          verbose: bool,
> -    ) -> impl Future<Output = Result<(usize, usize, usize, usize, std::time::Duration, [u8; 32]), Error>> {
> +    ) -> impl Future<Output = Result<(usize, usize, usize, usize, usize, std::time::Duration, [u8; 32]), Error>> {

a WriterStats (just first name popping into my head) struct would be really
nicer here.

>  
>          let total_chunks = Arc::new(AtomicUsize::new(0));
>          let total_chunks2 = total_chunks.clone();
> @@ -530,6 +532,8 @@ impl BackupWriter {
>  
>          let stream_len = Arc::new(AtomicUsize::new(0));
>          let stream_len2 = stream_len.clone();
> +        let compressed_stream_len = Arc::new(AtomicU64::new(0));
> +        let compressed_stream_len2 = compressed_stream_len.clone();
>          let reused_len = Arc::new(AtomicUsize::new(0));
>          let reused_len2 = reused_len.clone();
>  
> @@ -572,6 +576,7 @@ impl BackupWriter {
>                  csum.update(digest);
>  
>                  let chunk_is_known = known_chunks.contains(digest);
> +                let compressed_stream_len2 = compressed_stream_len.clone();
>                  if chunk_is_known {
>                      known_chunk_count.fetch_add(1, Ordering::SeqCst);
>                      reused_len.fetch_add(chunk_len, Ordering::SeqCst);
> @@ -580,12 +585,15 @@ impl BackupWriter {
>                      known_chunks.insert(*digest);
>                      future::ready(chunk_builder
>                          .build()
> -                        .map(move |(chunk, digest)| MergedChunkInfo::New(ChunkInfo {
> -                            chunk,
> -                            digest,
> -                            chunk_len: chunk_len as u64,
> -                            offset,
> -                        }))
> +                        .map(move |(chunk, digest)| {
> +                            compressed_stream_len2.fetch_add(chunk.raw_size(), Ordering::SeqCst);
> +                            MergedChunkInfo::New(ChunkInfo {
> +                                chunk,
> +                                digest,
> +                                chunk_len: chunk_len as u64,
> +                                offset,
> +                            })
> +                        })
>                      )
>                  }
>              })
> @@ -645,12 +653,13 @@ impl BackupWriter {
>                  let total_chunks = total_chunks2.load(Ordering::SeqCst);
>                  let known_chunk_count = known_chunk_count2.load(Ordering::SeqCst);
>                  let stream_len = stream_len2.load(Ordering::SeqCst);
> +                let upload_len = compressed_stream_len2.load(Ordering::SeqCst) as usize;
>                  let reused_len = reused_len2.load(Ordering::SeqCst);
>  
>                  let mut guard = index_csum_2.lock().unwrap();
>                  let csum = guard.take().unwrap().finish();
>  
> -                futures::future::ok((total_chunks, known_chunk_count, stream_len, reused_len, duration, csum))
> +                futures::future::ok((total_chunks, known_chunk_count, stream_len, reused_len, upload_len, duration, csum))
>              })
>      }
>  
> 






More information about the pbs-devel mailing list