[pbs-devel] [PATCH proxmox-backup v2 1/3] tape: introduce a tape backup read thread tuning option

Tue May 7 17:10:20 CEST 2024

On Tue May 7, 2024 at 9:29 AM CEST, Dominik Csapak wrote:
> using a single thread for reading is not optimal in some cases, e.g.
> when the underlying storage can handle more reads in parallel than with
> a single thread.
>

Got only a few tiny notes that are otherwise unremarkable:

Would prefer to reword this as:

Using a single thread for reading is not optimal in some cases, e.g.
when the underlying storage can handle reads from multiple threads in
parallel.

> We use the ParallelHandler to handle the actual reads. Make the
> sync_channel buffer size depending on the number of threads so we have
> space for two chunks per thread.

Would s/depending/depend/ for the above paragraph.

>
> How this impacts the backup speed largely depends on the underlying
> storage and how the backup is laid out on it.
>
> I benchmarked the following setups:
>
> * Setup A: relatively spread out backup on a virtualized pbs on single HDDs
> * Setup B: mostly sequential chunks on a virtualized pbs on single HDDs
> * Setup C: backup on virtualized pbs on a fast NVME
> * Setup D: backup on bare metal pbs with ZFS in a RAID10 with 6 HDDs
>   and 2 fast special devices in a mirror
>
> (values are reported in MB/s as seen in the task log, caches were
> cleared between runs, backups were bigger than the memory available)
>
> setup  1 thread  2 threads  4 threads  8 threads
> A      55        70         80         95
> B      110       89         100        108
> C      294       294        294        294
> D      118       180        300        300
>
> So there are cases where multiple read threads speed up the tape backup
> (dramatically). On the other hand there are situations where reading
> from a single thread is actually faster, probably because we can read
> from the HDD sequentially.
>
> I used a new default value of '4' here since that gave good performance
> on all setups (not necessarily the best) and we also have a default
> value of '4' for verification threads.
>
> We use a struct here for the datastore since we want to introduce other
> thread tuning options too.
>
> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>

The patch is pretty straightforward; there's one question inline, but
otherwise this looks fine to me. Solid work!

Wasn't able to test this yet as setting up a VTL on Bookworm failed for
me, unfortunately. Will try to test this tomorrow if possible.

For now:

Reviewed-by: Max Carrara <m.carrara at proxmox.com>

> ---
>  pbs-api-types/src/datastore.rs              |  8 ++++
>  pbs-datastore/src/datastore.rs              | 26 ++++++++++++
>  src/tape/pool_writer/new_chunks_iterator.rs | 45 +++++++++++++--------
>  www/Utils.js                                |  5 +++
>  www/datastore/OptionView.js                 |  8 ++++
>  5 files changed, 76 insertions(+), 16 deletions(-)
>
> diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs
> index 31767417a..1dae3867f 100644
> --- a/pbs-api-types/src/datastore.rs
> +++ b/pbs-api-types/src/datastore.rs
> @@ -209,6 +209,11 @@ pub enum DatastoreFSyncLevel {
>              type: ChunkOrder,
>              optional: true,
>          },
> +        "tape-backup-read-threads": {
> +            type: usize,
> +            optional: true,
> +            minimum: 1,
> +        },
>      },
>  )]
>  #[derive(Serialize, Deserialize, Default)]
> @@ -220,6 +225,9 @@ pub struct DatastoreTuning {
>      pub chunk_order: Option<ChunkOrder>,
>      #[serde(skip_serializing_if = "Option::is_none")]
>      pub sync_level: Option<DatastoreFSyncLevel>,
> +    #[serde(skip_serializing_if = "Option::is_none")]
> +    /// Configures how many threads to use to read from the datastore while backing up to tape.
> +    pub tape_backup_read_threads: Option<usize>,
>  }
>  
>  pub const DATASTORE_TUNING_STRING_SCHEMA: Schema = StringSchema::new("Datastore tuning options")
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
> index f95da7615..73a1cfa39 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -49,6 +49,19 @@ pub fn check_backup_owner(owner: &Authid, auth_id: &Authid) -> Result<(), Error>
>      Ok(())
>  }
>  
> +/// Contains the configuration of how many threads to use in various situations
> +pub struct ThreadConfiguration {
> +    pub tape_backup_read_threads: usize,
> +}
> +
> +impl Default for ThreadConfiguration {
> +    fn default() -> Self {
> +        Self {
> +            tape_backup_read_threads: 4,
> +        }
> +    }
> +}
> +
>  /// Datastore Management
>  ///
>  /// A Datastore can store severals backups, and provides the
> @@ -61,6 +74,7 @@ pub struct DataStoreImpl {
>      chunk_order: ChunkOrder,
>      last_digest: Option<[u8; 32]>,
>      sync_level: DatastoreFSyncLevel,
> +    thread_config: ThreadConfiguration,
>  }
>  
>  impl DataStoreImpl {
> @@ -75,6 +89,7 @@ impl DataStoreImpl {
>              chunk_order: Default::default(),
>              last_digest: None,
>              sync_level: Default::default(),
> +            thread_config: Default::default(),
>          })
>      }
>  }
> @@ -305,6 +320,11 @@ impl DataStore {
>                  .parse_property_string(config.tuning.as_deref().unwrap_or(""))?,
>          )?;
>  
> +        let mut thread_config = ThreadConfiguration::default();
> +        if let Some(value) = tuning.tape_backup_read_threads {
> +            thread_config.tape_backup_read_threads = value;
> +        }
> +
>          Ok(DataStoreImpl {
>              chunk_store,
>              gc_mutex: Mutex::new(()),
> @@ -313,6 +333,7 @@ impl DataStore {
>              chunk_order: tuning.chunk_order.unwrap_or_default(),
>              last_digest,
>              sync_level: tuning.sync_level.unwrap_or_default(),
> +            thread_config,
>          })
>      }
>  
> @@ -1377,6 +1398,11 @@ impl DataStore {
>          Ok(())
>      }
>  
> +    /// returns the datatstore thread configuration
> +    pub fn get_thread_configuration(&self) -> &ThreadConfiguration {
> +        &self.inner.thread_config
> +    }
> +
>      /// Destroy a datastore. This requires that there are no active operations on the datastore.
>      ///
>      /// This is a synchronous operation and should be run in a worker-thread.
> diff --git a/src/tape/pool_writer/new_chunks_iterator.rs b/src/tape/pool_writer/new_chunks_iterator.rs
> index 1454b33d2..a2a8091f6 100644
> --- a/src/tape/pool_writer/new_chunks_iterator.rs
> +++ b/src/tape/pool_writer/new_chunks_iterator.rs
> @@ -6,8 +6,9 @@ use anyhow::{format_err, Error};
>  use pbs_datastore::{DataBlob, DataStore, SnapshotReader};
>  
>  use crate::tape::CatalogSet;
> +use crate::tools::parallel_handler::ParallelHandler;
>  
> -/// Chunk iterator which use a separate thread to read chunks
> +/// Chunk iterator which uses separate threads to read chunks
>  ///
>  /// The iterator skips duplicate chunks and chunks already in the
>  /// catalog.
> @@ -25,7 +26,11 @@ impl NewChunksIterator {
>          snapshot_reader: Arc<Mutex<SnapshotReader>>,
>          catalog_set: Arc<Mutex<CatalogSet>>,
>      ) -> Result<(std::thread::JoinHandle<()>, Self), Error> {
> -        let (tx, rx) = std::sync::mpsc::sync_channel(3);
> +        let read_threads = datastore
> +            .get_thread_configuration()
> +            .tape_backup_read_threads;
> +
> +        let (tx, rx) = std::sync::mpsc::sync_channel(read_threads * 2);

Is there any reason you're using `* 2` here? For example, is the
throughput unaffected if you use a larger value, like `* 8`?

If the constant has an effect like that it should IMO be documented, but
if not, then it can just stay like it is.

>  
>          let reader_thread = std::thread::spawn(move || {
>              let snapshot_reader = snapshot_reader.lock().unwrap();
> @@ -35,36 +40,44 @@ impl NewChunksIterator {
>              let datastore_name = snapshot_reader.datastore_name().to_string();
>  
>              let result: Result<(), Error> = proxmox_lang::try_block!({
> -                let mut chunk_iter = snapshot_reader.chunk_iterator(move |digest| {
> +                let chunk_iter = snapshot_reader.chunk_iterator(move |digest| {
>                      catalog_set
>                          .lock()
>                          .unwrap()
>                          .contains_chunk(&datastore_name, digest)
>                  })?;
>  
> -                loop {
> -                    let digest = match chunk_iter.next() {
> -                        None => {
> -                            let _ = tx.send(Ok(None)); // ignore send error
> -                            break;
> +                let reader_pool =
> +                    ParallelHandler::new("tape backup chunk reader pool", read_threads, {
> +                        let tx = tx.clone();
> +                        move |digest| {
> +                            let blob = datastore.load_chunk(&digest)?;
> +                            //println!("LOAD CHUNK {}", hex::encode(&digest));
> +
> +                            tx.send(Ok(Some((digest, blob)))).map_err(|err| {
> +                                format_err!("error sending result from reader thread: {err}")
> +                            })?;
> +
> +                            Ok(())
>                          }
> -                        Some(digest) => digest?,
> -                    };
> +                    });
> +
> +                for digest in chunk_iter {
> +                    let digest = digest?;
>  
>                      if chunk_index.contains(&digest) {
>                          continue;
>                      }
>  
> -                    let blob = datastore.load_chunk(&digest)?;
> -                    //println!("LOAD CHUNK {}", hex::encode(&digest));
> -                    if let Err(err) = tx.send(Ok(Some((digest, blob)))) {
> -                        eprintln!("could not send chunk to reader thread: {err}");
> -                        break;
> -                    }
> +                    reader_pool.send(digest)?;
>  
>                      chunk_index.insert(digest);
>                  }
>  
> +                reader_pool.complete()?;
> +
> +                let _ = tx.send(Ok(None)); // ignore send error
> +
>                  Ok(())
>              });
>              if let Err(err) = result {
> diff --git a/www/Utils.js b/www/Utils.js
> index 1d7351a32..4d224cd4a 100644
> --- a/www/Utils.js
> +++ b/www/Utils.js
> @@ -790,6 +790,11 @@ Ext.define('PBS.Utils', {
>  	sync = PBS.Utils.tuningOptions['sync-level'][sync ?? '__default__'];
>  	options.push(`${gettext('Sync Level')}: ${sync}`);
>  
> +	let tapeBackupRT = tuning['tape-backup-read-threads'];
> +	delete tuning['tape-backup-read-threads'];
> +	tapeBackupRT ??= Proxmox.Utils.defaultText + ` (4)`;
> +	options.push(`${gettext('Tape Backup Read Threads')}: ${tapeBackupRT}`);
> +
>  	for (const [k, v] of Object.entries(tuning)) {
>  	    options.push(`${k}: ${v}`);
>  	}
> diff --git a/www/datastore/OptionView.js b/www/datastore/OptionView.js
> index e1f38af6f..cfbb89151 100644
> --- a/www/datastore/OptionView.js
> +++ b/www/datastore/OptionView.js
> @@ -271,6 +271,14 @@ Ext.define('PBS.Datastore.Options', {
>  			    deleteEmpty: true,
>  			    value: '__default__',
>  			},
> +			{
> +			    xtype: 'proxmoxintegerfield',
> +			    name: 'tape-backup-read-threads',
> +			    fieldLabel: gettext('Tape Backup Read Threads'),
> +			    min: 1,
> +			    emptyText: '4',
> +			    deleteEmpty: true,
> +			},
>  		    ],
>  		},
>  	    },