[pbs-devel] [PATCH proxmox-backup v2 1/3] tape: introduce a tape backup read thread tuning option
Dominik Csapak
d.csapak at proxmox.com
Tue May 7 09:29:53 CEST 2024
using a single thread for reading is not optimal in some cases, e.g.
when the underlying storage can handle more reads in parallel than with
a single thread.
We use the ParallelHandler to handle the actual reads. Make the
sync_channel buffer size depending on the number of threads so we have
space for two chunks per thread.
How this impacts the backup speed largely depends on the underlying
storage and how the backup is laid out on it.
I benchmarked the following setups:
* Setup A: relatively spread out backup on a virtualized pbs on single HDDs
* Setup B: mostly sequential chunks on a virtualized pbs on single HDDs
* Setup C: backup on virtualized pbs on a fast NVME
* Setup D: backup on bare metal pbs with ZFS in a RAID10 with 6 HDDs
and 2 fast special devices in a mirror
(values are reported in MB/s as seen in the task log, caches were
cleared between runs, backups were bigger than the memory available)
setup 1 thread 2 threads 4 threads 8 threads
A 55 70 80 95
B 110 89 100 108
C 294 294 294 294
D 118 180 300 300
So there are cases where multiple read threads speed up the tape backup
(dramatically). On the other hand there are situations where reading
from a single thread is actually faster, probably because we can read
from the HDD sequentially.
I used a new default value of '4' here since that gave good performance
on all setups (not necessarily the best) and we also have a default
value of '4' for verification threads.
We use a struct here for the datastore since we want to introduce other
thread tuning options too.
Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
pbs-api-types/src/datastore.rs | 8 ++++
pbs-datastore/src/datastore.rs | 26 ++++++++++++
src/tape/pool_writer/new_chunks_iterator.rs | 45 +++++++++++++--------
www/Utils.js | 5 +++
www/datastore/OptionView.js | 8 ++++
5 files changed, 76 insertions(+), 16 deletions(-)
diff --git a/pbs-api-types/src/datastore.rs b/pbs-api-types/src/datastore.rs
index 31767417a..1dae3867f 100644
--- a/pbs-api-types/src/datastore.rs
+++ b/pbs-api-types/src/datastore.rs
@@ -209,6 +209,11 @@ pub enum DatastoreFSyncLevel {
type: ChunkOrder,
optional: true,
},
+ "tape-backup-read-threads": {
+ type: usize,
+ optional: true,
+ minimum: 1,
+ },
},
)]
#[derive(Serialize, Deserialize, Default)]
@@ -220,6 +225,9 @@ pub struct DatastoreTuning {
pub chunk_order: Option<ChunkOrder>,
#[serde(skip_serializing_if = "Option::is_none")]
pub sync_level: Option<DatastoreFSyncLevel>,
+ #[serde(skip_serializing_if = "Option::is_none")]
+ /// Configures how many threads to use to read from the datastore while backing up to tape.
+ pub tape_backup_read_threads: Option<usize>,
}
pub const DATASTORE_TUNING_STRING_SCHEMA: Schema = StringSchema::new("Datastore tuning options")
diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
index f95da7615..73a1cfa39 100644
--- a/pbs-datastore/src/datastore.rs
+++ b/pbs-datastore/src/datastore.rs
@@ -49,6 +49,19 @@ pub fn check_backup_owner(owner: &Authid, auth_id: &Authid) -> Result<(), Error>
Ok(())
}
+/// Contains the configuration of how many threads to use in various situations
+pub struct ThreadConfiguration {
+ pub tape_backup_read_threads: usize,
+}
+
+impl Default for ThreadConfiguration {
+ fn default() -> Self {
+ Self {
+ tape_backup_read_threads: 4,
+ }
+ }
+}
+
/// Datastore Management
///
/// A Datastore can store severals backups, and provides the
@@ -61,6 +74,7 @@ pub struct DataStoreImpl {
chunk_order: ChunkOrder,
last_digest: Option<[u8; 32]>,
sync_level: DatastoreFSyncLevel,
+ thread_config: ThreadConfiguration,
}
impl DataStoreImpl {
@@ -75,6 +89,7 @@ impl DataStoreImpl {
chunk_order: Default::default(),
last_digest: None,
sync_level: Default::default(),
+ thread_config: Default::default(),
})
}
}
@@ -305,6 +320,11 @@ impl DataStore {
.parse_property_string(config.tuning.as_deref().unwrap_or(""))?,
)?;
+ let mut thread_config = ThreadConfiguration::default();
+ if let Some(value) = tuning.tape_backup_read_threads {
+ thread_config.tape_backup_read_threads = value;
+ }
+
Ok(DataStoreImpl {
chunk_store,
gc_mutex: Mutex::new(()),
@@ -313,6 +333,7 @@ impl DataStore {
chunk_order: tuning.chunk_order.unwrap_or_default(),
last_digest,
sync_level: tuning.sync_level.unwrap_or_default(),
+ thread_config,
})
}
@@ -1377,6 +1398,11 @@ impl DataStore {
Ok(())
}
+ /// returns the datatstore thread configuration
+ pub fn get_thread_configuration(&self) -> &ThreadConfiguration {
+ &self.inner.thread_config
+ }
+
/// Destroy a datastore. This requires that there are no active operations on the datastore.
///
/// This is a synchronous operation and should be run in a worker-thread.
diff --git a/src/tape/pool_writer/new_chunks_iterator.rs b/src/tape/pool_writer/new_chunks_iterator.rs
index 1454b33d2..a2a8091f6 100644
--- a/src/tape/pool_writer/new_chunks_iterator.rs
+++ b/src/tape/pool_writer/new_chunks_iterator.rs
@@ -6,8 +6,9 @@ use anyhow::{format_err, Error};
use pbs_datastore::{DataBlob, DataStore, SnapshotReader};
use crate::tape::CatalogSet;
+use crate::tools::parallel_handler::ParallelHandler;
-/// Chunk iterator which use a separate thread to read chunks
+/// Chunk iterator which uses separate threads to read chunks
///
/// The iterator skips duplicate chunks and chunks already in the
/// catalog.
@@ -25,7 +26,11 @@ impl NewChunksIterator {
snapshot_reader: Arc<Mutex<SnapshotReader>>,
catalog_set: Arc<Mutex<CatalogSet>>,
) -> Result<(std::thread::JoinHandle<()>, Self), Error> {
- let (tx, rx) = std::sync::mpsc::sync_channel(3);
+ let read_threads = datastore
+ .get_thread_configuration()
+ .tape_backup_read_threads;
+
+ let (tx, rx) = std::sync::mpsc::sync_channel(read_threads * 2);
let reader_thread = std::thread::spawn(move || {
let snapshot_reader = snapshot_reader.lock().unwrap();
@@ -35,36 +40,44 @@ impl NewChunksIterator {
let datastore_name = snapshot_reader.datastore_name().to_string();
let result: Result<(), Error> = proxmox_lang::try_block!({
- let mut chunk_iter = snapshot_reader.chunk_iterator(move |digest| {
+ let chunk_iter = snapshot_reader.chunk_iterator(move |digest| {
catalog_set
.lock()
.unwrap()
.contains_chunk(&datastore_name, digest)
})?;
- loop {
- let digest = match chunk_iter.next() {
- None => {
- let _ = tx.send(Ok(None)); // ignore send error
- break;
+ let reader_pool =
+ ParallelHandler::new("tape backup chunk reader pool", read_threads, {
+ let tx = tx.clone();
+ move |digest| {
+ let blob = datastore.load_chunk(&digest)?;
+ //println!("LOAD CHUNK {}", hex::encode(&digest));
+
+ tx.send(Ok(Some((digest, blob)))).map_err(|err| {
+ format_err!("error sending result from reader thread: {err}")
+ })?;
+
+ Ok(())
}
- Some(digest) => digest?,
- };
+ });
+
+ for digest in chunk_iter {
+ let digest = digest?;
if chunk_index.contains(&digest) {
continue;
}
- let blob = datastore.load_chunk(&digest)?;
- //println!("LOAD CHUNK {}", hex::encode(&digest));
- if let Err(err) = tx.send(Ok(Some((digest, blob)))) {
- eprintln!("could not send chunk to reader thread: {err}");
- break;
- }
+ reader_pool.send(digest)?;
chunk_index.insert(digest);
}
+ reader_pool.complete()?;
+
+ let _ = tx.send(Ok(None)); // ignore send error
+
Ok(())
});
if let Err(err) = result {
diff --git a/www/Utils.js b/www/Utils.js
index 1d7351a32..4d224cd4a 100644
--- a/www/Utils.js
+++ b/www/Utils.js
@@ -790,6 +790,11 @@ Ext.define('PBS.Utils', {
sync = PBS.Utils.tuningOptions['sync-level'][sync ?? '__default__'];
options.push(`${gettext('Sync Level')}: ${sync}`);
+ let tapeBackupRT = tuning['tape-backup-read-threads'];
+ delete tuning['tape-backup-read-threads'];
+ tapeBackupRT ??= Proxmox.Utils.defaultText + ` (4)`;
+ options.push(`${gettext('Tape Backup Read Threads')}: ${tapeBackupRT}`);
+
for (const [k, v] of Object.entries(tuning)) {
options.push(`${k}: ${v}`);
}
diff --git a/www/datastore/OptionView.js b/www/datastore/OptionView.js
index e1f38af6f..cfbb89151 100644
--- a/www/datastore/OptionView.js
+++ b/www/datastore/OptionView.js
@@ -271,6 +271,14 @@ Ext.define('PBS.Datastore.Options', {
deleteEmpty: true,
value: '__default__',
},
+ {
+ xtype: 'proxmoxintegerfield',
+ name: 'tape-backup-read-threads',
+ fieldLabel: gettext('Tape Backup Read Threads'),
+ min: 1,
+ emptyText: '4',
+ deleteEmpty: true,
+ },
],
},
},
--
2.39.2
More information about the pbs-devel
mailing list