[pbs-devel] [PATCH proxmox-backup v2 1/3] tape: introduce a tape backup read thread tuning option
Max Carrara
m.carrara at proxmox.com
Tue May 7 17:10:20 CEST 2024
On Tue May 7, 2024 at 9:29 AM CEST, Dominik Csapak wrote:
> using a single thread for reading is not optimal in some cases, e.g.
> when the underlying storage can handle more reads in parallel than with
> a single thread.
>
Got only a few tiny notes that are otherwise unremarkable:
Would prefer to reword this as:
Using a single thread for reading is not optimal in some cases, e.g.
when the underlying storage can handle reads from multiple threads in
parallel.
> We use the ParallelHandler to handle the actual reads. Make the
> sync_channel buffer size depending on the number of threads so we have
> space for two chunks per thread.
Would s/depending/depend/ for the above paragraph.
>
> How this impacts the backup speed largely depends on the underlying
> storage and how the backup is laid out on it.
>
> I benchmarked the following setups:
>
> * Setup A: relatively spread out backup on a virtualized pbs on single HDDs
> * Setup B: mostly sequential chunks on a virtualized pbs on single HDDs
> * Setup C: backup on virtualized pbs on a fast NVME
> * Setup D: backup on bare metal pbs with ZFS in a RAID10 with 6 HDDs
> and 2 fast special devices in a mirror
>
> (values are reported in MB/s as seen in the task log, caches were
> cleared between runs, backups were bigger than the memory available)
>
> setup 1 thread 2 threads 4 threads 8 threads
> A 55 70 80 95
> B 110 89 100 108
> C 294 294 294 294
> D 118 180 300 300
>
> So there are cases where multiple read threads speed up the tape backup
> (dramatically). On the other hand there are situations where reading
> from a single thread is actually faster, probably because we can read
> from the HDD sequentially.
>
> I used a new default value of '4' here since that gave good performance
> on all setups (not necessarily the best) and we also have a default
> value of '4' for verification threads.
>
> We use a struct here for the datastore since we want to introduce other
> thread tuning options too.
>
> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
The patch is pretty straightforward; there's one question inline, but
otherwise this looks fine to me. Solid work!
Wasn't able to test this yet as setting up a VTL on Bookworm failed for
me, unfortunately. Will try to test this tomorrow if possible.
For now:
Reviewed-by: Max Carrara <m.carrara at proxmox.com>
> ---
> pbs-api-types/src/datastore.rs | 8 ++++
> pbs-datastore/src/datastore.rs | 26 ++++++++++++
> src/tape/pool_writer/new_chunks_iterator.rs | 45 +++++++++++++--------
> www/Utils.js | 5 +++
> www/datastore/OptionView.js | 8 ++++
> 5 files changed, 76 insertions(+), 16 deletions(-)
>
> diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs
> index 31767417a..1dae3867f 100644
> --- a/pbs-api-types/src/datastore.rs
> +++ b/pbs-api-types/src/datastore.rs
> @@ -209,6 +209,11 @@ pub enum DatastoreFSyncLevel {
> type: ChunkOrder,
> optional: true,
> },
> + "tape-backup-read-threads": {
> + type: usize,
> + optional: true,
> + minimum: 1,
> + },
> },
> )]
> #[derive(Serialize, Deserialize, Default)]
> @@ -220,6 +225,9 @@ pub struct DatastoreTuning {
> pub chunk_order: Option<ChunkOrder>,
> #[serde(skip_serializing_if = "Option::is_none")]
> pub sync_level: Option<DatastoreFSyncLevel>,
> + #[serde(skip_serializing_if = "Option::is_none")]
> + /// Configures how many threads to use to read from the datastore while backing up to tape.
> + pub tape_backup_read_threads: Option<usize>,
> }
>
> pub const DATASTORE_TUNING_STRING_SCHEMA: Schema = StringSchema::new("Datastore tuning options")
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
> index f95da7615..73a1cfa39 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -49,6 +49,19 @@ pub fn check_backup_owner(owner: &Authid, auth_id: &Authid) -> Result<(), Error>
> Ok(())
> }
>
> +/// Contains the configuration of how many threads to use in various situations
> +pub struct ThreadConfiguration {
> + pub tape_backup_read_threads: usize,
> +}
> +
> +impl Default for ThreadConfiguration {
> + fn default() -> Self {
> + Self {
> + tape_backup_read_threads: 4,
> + }
> + }
> +}
> +
> /// Datastore Management
> ///
> /// A Datastore can store severals backups, and provides the
> @@ -61,6 +74,7 @@ pub struct DataStoreImpl {
> chunk_order: ChunkOrder,
> last_digest: Option<[u8; 32]>,
> sync_level: DatastoreFSyncLevel,
> + thread_config: ThreadConfiguration,
> }
>
> impl DataStoreImpl {
> @@ -75,6 +89,7 @@ impl DataStoreImpl {
> chunk_order: Default::default(),
> last_digest: None,
> sync_level: Default::default(),
> + thread_config: Default::default(),
> })
> }
> }
> @@ -305,6 +320,11 @@ impl DataStore {
> .parse_property_string(config.tuning.as_deref().unwrap_or(""))?,
> )?;
>
> + let mut thread_config = ThreadConfiguration::default();
> + if let Some(value) = tuning.tape_backup_read_threads {
> + thread_config.tape_backup_read_threads = value;
> + }
> +
> Ok(DataStoreImpl {
> chunk_store,
> gc_mutex: Mutex::new(()),
> @@ -313,6 +333,7 @@ impl DataStore {
> chunk_order: tuning.chunk_order.unwrap_or_default(),
> last_digest,
> sync_level: tuning.sync_level.unwrap_or_default(),
> + thread_config,
> })
> }
>
> @@ -1377,6 +1398,11 @@ impl DataStore {
> Ok(())
> }
>
> + /// returns the datatstore thread configuration
> + pub fn get_thread_configuration(&self) -> &ThreadConfiguration {
> + &self.inner.thread_config
> + }
> +
> /// Destroy a datastore. This requires that there are no active operations on the datastore.
> ///
> /// This is a synchronous operation and should be run in a worker-thread.
> diff --git a/src/tape/pool_writer/new_chunks_iterator.rs b/src/tape/pool_writer/new_chunks_iterator.rs
> index 1454b33d2..a2a8091f6 100644
> --- a/src/tape/pool_writer/new_chunks_iterator.rs
> +++ b/src/tape/pool_writer/new_chunks_iterator.rs
> @@ -6,8 +6,9 @@ use anyhow::{format_err, Error};
> use pbs_datastore::{DataBlob, DataStore, SnapshotReader};
>
> use crate::tape::CatalogSet;
> +use crate::tools::parallel_handler::ParallelHandler;
>
> -/// Chunk iterator which use a separate thread to read chunks
> +/// Chunk iterator which uses separate threads to read chunks
> ///
> /// The iterator skips duplicate chunks and chunks already in the
> /// catalog.
> @@ -25,7 +26,11 @@ impl NewChunksIterator {
> snapshot_reader: Arc<Mutex<SnapshotReader>>,
> catalog_set: Arc<Mutex<CatalogSet>>,
> ) -> Result<(std::thread::JoinHandle<()>, Self), Error> {
> - let (tx, rx) = std::sync::mpsc::sync_channel(3);
> + let read_threads = datastore
> + .get_thread_configuration()
> + .tape_backup_read_threads;
> +
> + let (tx, rx) = std::sync::mpsc::sync_channel(read_threads * 2);
Is there any reason you're using `* 2` here? For example, is the
throughput unaffected if you use a larger value, like `* 8`?
If the constant has an effect like that it should IMO be documented, but
if not, then it can just stay like it is.
>
> let reader_thread = std::thread::spawn(move || {
> let snapshot_reader = snapshot_reader.lock().unwrap();
> @@ -35,36 +40,44 @@ impl NewChunksIterator {
> let datastore_name = snapshot_reader.datastore_name().to_string();
>
> let result: Result<(), Error> = proxmox_lang::try_block!({
> - let mut chunk_iter = snapshot_reader.chunk_iterator(move |digest| {
> + let chunk_iter = snapshot_reader.chunk_iterator(move |digest| {
> catalog_set
> .lock()
> .unwrap()
> .contains_chunk(&datastore_name, digest)
> })?;
>
> - loop {
> - let digest = match chunk_iter.next() {
> - None => {
> - let _ = tx.send(Ok(None)); // ignore send error
> - break;
> + let reader_pool =
> + ParallelHandler::new("tape backup chunk reader pool", read_threads, {
> + let tx = tx.clone();
> + move |digest| {
> + let blob = datastore.load_chunk(&digest)?;
> + //println!("LOAD CHUNK {}", hex::encode(&digest));
> +
> + tx.send(Ok(Some((digest, blob)))).map_err(|err| {
> + format_err!("error sending result from reader thread: {err}")
> + })?;
> +
> + Ok(())
> }
> - Some(digest) => digest?,
> - };
> + });
> +
> + for digest in chunk_iter {
> + let digest = digest?;
>
> if chunk_index.contains(&digest) {
> continue;
> }
>
> - let blob = datastore.load_chunk(&digest)?;
> - //println!("LOAD CHUNK {}", hex::encode(&digest));
> - if let Err(err) = tx.send(Ok(Some((digest, blob)))) {
> - eprintln!("could not send chunk to reader thread: {err}");
> - break;
> - }
> + reader_pool.send(digest)?;
>
> chunk_index.insert(digest);
> }
>
> + reader_pool.complete()?;
> +
> + let _ = tx.send(Ok(None)); // ignore send error
> +
> Ok(())
> });
> if let Err(err) = result {
> diff --git a/www/Utils.js b/www/Utils.js
> index 1d7351a32..4d224cd4a 100644
> --- a/www/Utils.js
> +++ b/www/Utils.js
> @@ -790,6 +790,11 @@ Ext.define('PBS.Utils', {
> sync = PBS.Utils.tuningOptions['sync-level'][sync ?? '__default__'];
> options.push(`${gettext('Sync Level')}: ${sync}`);
>
> + let tapeBackupRT = tuning['tape-backup-read-threads'];
> + delete tuning['tape-backup-read-threads'];
> + tapeBackupRT ??= Proxmox.Utils.defaultText + ` (4)`;
> + options.push(`${gettext('Tape Backup Read Threads')}: ${tapeBackupRT}`);
> +
> for (const [k, v] of Object.entries(tuning)) {
> options.push(`${k}: ${v}`);
> }
> diff --git a/www/datastore/OptionView.js b/www/datastore/OptionView.js
> index e1f38af6f..cfbb89151 100644
> --- a/www/datastore/OptionView.js
> +++ b/www/datastore/OptionView.js
> @@ -271,6 +271,14 @@ Ext.define('PBS.Datastore.Options', {
> deleteEmpty: true,
> value: '__default__',
> },
> + {
> + xtype: 'proxmoxintegerfield',
> + name: 'tape-backup-read-threads',
> + fieldLabel: gettext('Tape Backup Read Threads'),
> + min: 1,
> + emptyText: '4',
> + deleteEmpty: true,
> + },
> ],
> },
> },
More information about the pbs-devel
mailing list