[pbs-devel] [PATCH v4 proxmox-backup 58/58] client: chunk stream: switch payload stream chunker

Christian Ebner c.ebner at proxmox.com
Mon Apr 29 14:11:02 CEST 2024


Use the dedicated chunker with boundary suggestions for the payload
stream, by attaching the channel sender to the archiver and the
channel receiver to the payload stream chunker.

The archiver sends the file boundaries for the chunker to consume.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
 examples/test_chunk_speed2.rs                 |  2 +-
 pbs-client/src/chunk_stream.rs                | 15 ++++++--
 pbs-client/src/pxar/create.rs                 |  8 +++++
 pbs-client/src/pxar_backup_stream.rs          | 34 ++++++++++++-------
 proxmox-backup-client/src/main.rs             | 16 ++++++---
 .../src/proxmox_restore_daemon/api.rs         |  1 +
 pxar-bin/src/main.rs                          |  1 +
 tests/catar.rs                                |  1 +
 8 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/examples/test_chunk_speed2.rs b/examples/test_chunk_speed2.rs
index 22dd14ce2..f2963746a 100644
--- a/examples/test_chunk_speed2.rs
+++ b/examples/test_chunk_speed2.rs
@@ -26,7 +26,7 @@ async fn run() -> Result<(), Error> {
         .map_err(Error::from);
 
     //let chunk_stream = FixedChunkStream::new(stream, 4*1024*1024);
-    let mut chunk_stream = ChunkStream::new(stream, None, None);
+    let mut chunk_stream = ChunkStream::new(stream, None, None, None);
 
     let start_time = std::time::Instant::now();
 
diff --git a/pbs-client/src/chunk_stream.rs b/pbs-client/src/chunk_stream.rs
index a32ecfd15..ab7b70d17 100644
--- a/pbs-client/src/chunk_stream.rs
+++ b/pbs-client/src/chunk_stream.rs
@@ -7,7 +7,7 @@ use bytes::BytesMut;
 use futures::ready;
 use futures::stream::{Stream, TryStream};
 
-use pbs_datastore::{Chunker, ChunkerImpl};
+use pbs_datastore::{Chunker, ChunkerImpl, PayloadChunker};
 
 use crate::inject_reused_chunks::InjectChunks;
 
@@ -42,11 +42,20 @@ pub struct ChunkStream<S: Unpin> {
 }
 
 impl<S: Unpin> ChunkStream<S> {
-    pub fn new(input: S, chunk_size: Option<usize>, injection_data: Option<InjectionData>) -> Self {
+    pub fn new(
+        input: S,
+        chunk_size: Option<usize>,
+        injection_data: Option<InjectionData>,
+        suggested_boundaries: Option<mpsc::Receiver<u64>>,
+    ) -> Self {
         let chunk_size = chunk_size.unwrap_or(4 * 1024 * 1024);
         Self {
             input,
-            chunker: Box::new(ChunkerImpl::new(chunk_size)),
+            chunker: if let Some(suggested) = suggested_boundaries {
+                Box::new(PayloadChunker::new(chunk_size, suggested))
+            } else {
+                Box::new(ChunkerImpl::new(chunk_size))
+            },
             buffer: BytesMut::new(),
             scan_pos: 0,
             consumed: 0,
diff --git a/pbs-client/src/pxar/create.rs b/pbs-client/src/pxar/create.rs
index dc473263d..d7eb0c598 100644
--- a/pbs-client/src/pxar/create.rs
+++ b/pbs-client/src/pxar/create.rs
@@ -167,6 +167,7 @@ struct Archiver {
     file_copy_buffer: Vec<u8>,
     skip_e2big_xattr: bool,
     forced_boundaries: Option<mpsc::Sender<InjectChunks>>,
+    suggested_boundaries: Option<mpsc::Sender<u64>>,
     previous_payload_index: Option<DynamicIndexReader>,
     cached_entries: Vec<CacheEntry>,
     cached_hardlinks: HashSet<HardLinkInfo>,
@@ -205,6 +206,7 @@ pub async fn create_archive<T, F>(
     callback: F,
     options: PxarCreateOptions,
     forced_boundaries: Option<mpsc::Sender<InjectChunks>>,
+    suggested_boundaries: Option<mpsc::Sender<u64>>,
 ) -> Result<(), Error>
 where
     T: SeqWrite + Send,
@@ -286,6 +288,7 @@ where
         skip_e2big_xattr: options.skip_e2big_xattr,
         forced_boundaries,
         previous_payload_index,
+        suggested_boundaries,
         cached_entries: Vec::new(),
         cached_range: Range::default(),
         cached_last_chunk: None,
@@ -841,6 +844,11 @@ impl Archiver {
                         .add_file(c_file_name, file_size, stat.st_mtime)?;
                 }
 
+                if let Some(sender) = self.suggested_boundaries.as_mut() {
+                    let offset = encoder.payload_position()?.raw();
+                    sender.send(offset)?;
+                }
+
                 let offset: LinkOffset = if let Some(payload_offset) = payload_offset {
                     self.reuse_stats.total_reused_payload_size +=
                         file_size + size_of::<pxar::format::Header>() as u64;
diff --git a/pbs-client/src/pxar_backup_stream.rs b/pbs-client/src/pxar_backup_stream.rs
index 9d2cb41d6..59ccbd631 100644
--- a/pbs-client/src/pxar_backup_stream.rs
+++ b/pbs-client/src/pxar_backup_stream.rs
@@ -27,6 +27,7 @@ use crate::pxar::create::PxarWriters;
 /// consumer.
 pub struct PxarBackupStream {
     rx: Option<std::sync::mpsc::Receiver<Result<Vec<u8>, Error>>>,
+    pub suggested_boundaries: Option<std::sync::mpsc::Receiver<u64>>,
     handle: Option<AbortHandle>,
     error: Arc<Mutex<Option<String>>>,
 }
@@ -55,19 +56,23 @@ impl PxarBackupStream {
         ));
         let writer = pxar::encoder::sync::StandardWriter::new(writer);
 
-        let (payload_writer, payload_rx) = if separate_payload_stream {
-            let (tx, rx) = std::sync::mpsc::sync_channel(10);
-            let payload_writer = TokioWriterAdapter::new(std::io::BufWriter::with_capacity(
-                buffer_size,
-                StdChannelWriter::new(tx),
-            ));
-            (
-                Some(pxar::encoder::sync::StandardWriter::new(payload_writer)),
-                Some(rx),
-            )
-        } else {
-            (None, None)
-        };
+        let (payload_writer, payload_rx, suggested_boundaries_tx, suggested_boundaries_rx) =
+            if separate_payload_stream {
+                let (tx, rx) = std::sync::mpsc::sync_channel(10);
+                let (suggested_boundaries_tx, suggested_boundaries_rx) = std::sync::mpsc::channel();
+                let payload_writer = TokioWriterAdapter::new(std::io::BufWriter::with_capacity(
+                    buffer_size,
+                    StdChannelWriter::new(tx),
+                ));
+                (
+                    Some(pxar::encoder::sync::StandardWriter::new(payload_writer)),
+                    Some(rx),
+                    Some(suggested_boundaries_tx),
+                    Some(suggested_boundaries_rx),
+                )
+            } else {
+                (None, None, None, None)
+            };
 
         let error = Arc::new(Mutex::new(None));
         let error2 = Arc::clone(&error);
@@ -82,6 +87,7 @@ impl PxarBackupStream {
                 },
                 options,
                 boundaries,
+                suggested_boundaries_tx,
             )
             .await
             {
@@ -96,12 +102,14 @@ impl PxarBackupStream {
 
         let backup_stream = Self {
             rx: Some(rx),
+            suggested_boundaries: None,
             handle: Some(handle.clone()),
             error: Arc::clone(&error),
         };
 
         let backup_payload_stream = payload_rx.map(|rx| Self {
             rx: Some(rx),
+            suggested_boundaries: suggested_boundaries_rx,
             handle: Some(handle),
             error,
         });
diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/src/main.rs
index 94fba29b7..df2c65c8f 100644
--- a/proxmox-backup-client/src/main.rs
+++ b/proxmox-backup-client/src/main.rs
@@ -209,7 +209,7 @@ async fn backup_directory<P: AsRef<Path>>(
         payload_target.is_some(),
     )?;
 
-    let mut chunk_stream = ChunkStream::new(pxar_stream, chunk_size, None);
+    let mut chunk_stream = ChunkStream::new(pxar_stream, chunk_size, None, None);
     let (tx, rx) = mpsc::channel(10); // allow to buffer 10 chunks
 
     let stream = ReceiverStream::new(rx).map_err(Error::from);
@@ -223,14 +223,19 @@ async fn backup_directory<P: AsRef<Path>>(
 
     let stats = client.upload_stream(archive_name, stream, upload_options.clone(), None);
 
-    if let Some(payload_stream) = payload_stream {
+    if let Some(mut payload_stream) = payload_stream {
         let payload_target = payload_target
             .ok_or_else(|| format_err!("got payload stream, but no target archive name"))?;
 
         let (payload_injections_tx, payload_injections_rx) = std::sync::mpsc::channel();
         let injection_data = InjectionData::new(payload_boundaries_rx, payload_injections_tx);
-        let mut payload_chunk_stream =
-            ChunkStream::new(payload_stream, chunk_size, Some(injection_data));
+        let suggested_boundaries = payload_stream.suggested_boundaries.take();
+        let mut payload_chunk_stream = ChunkStream::new(
+            payload_stream,
+            chunk_size,
+            Some(injection_data),
+            suggested_boundaries,
+        );
         let (payload_tx, payload_rx) = mpsc::channel(10); // allow to buffer 10 chunks
         let stream = ReceiverStream::new(payload_rx).map_err(Error::from);
 
@@ -573,7 +578,8 @@ fn spawn_catalog_upload(
     let (catalog_tx, catalog_rx) = std::sync::mpsc::sync_channel(10); // allow to buffer 10 writes
     let catalog_stream = proxmox_async::blocking::StdChannelStream(catalog_rx);
     let catalog_chunk_size = 512 * 1024;
-    let catalog_chunk_stream = ChunkStream::new(catalog_stream, Some(catalog_chunk_size), None);
+    let catalog_chunk_stream =
+        ChunkStream::new(catalog_stream, Some(catalog_chunk_size), None, None);
 
     let catalog_writer = Arc::new(Mutex::new(CatalogWriter::new(TokioWriterAdapter::new(
         StdChannelWriter::new(catalog_tx),
diff --git a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs b/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
index e50cb8184..956c3246a 100644
--- a/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
+++ b/proxmox-restore-daemon/src/proxmox_restore_daemon/api.rs
@@ -366,6 +366,7 @@ fn extract(
                         |_| Ok(()),
                         options,
                         None,
+                        None,
                     )
                     .await
                 }
diff --git a/pxar-bin/src/main.rs b/pxar-bin/src/main.rs
index c6d3794bb..85f96ad2c 100644
--- a/pxar-bin/src/main.rs
+++ b/pxar-bin/src/main.rs
@@ -407,6 +407,7 @@ async fn create_archive(
         },
         options,
         None,
+        None,
     )
     .await?;
 
diff --git a/tests/catar.rs b/tests/catar.rs
index d5ef85ffe..3f5b22177 100644
--- a/tests/catar.rs
+++ b/tests/catar.rs
@@ -40,6 +40,7 @@ fn run_test(dir_name: &str) -> Result<(), Error> {
         |_| Ok(()),
         options,
         None,
+        None,
     ))?;
 
     Command::new("cmp")
-- 
2.39.2





More information about the pbs-devel mailing list