[pbs-devel] [PATCH proxmox-backup 1/2] backup/datastore: refactor chunk inode sorting to the datastore
Dominik Csapak
d.csapak at proxmox.com
Fri Jun 18 11:29:09 CEST 2021
so that we can reuse that information
the removal of the adding to the corrupted list is ok, since
'get_chunks_in_order' returns them at the end of the list
and we do the same if the loading fails later in 'verify_index_chunks'
so we still mark them corrupt
(assuming that the load will fail if the stat does)
Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
alternatively, we could return 2 lists, or check after the loop
for the u64::MAX value, but imho it's ok this way
src/backup/datastore.rs | 38 ++++++++++++++++++++++++++++++++++++++
src/backup/verify.rs | 38 +++++++-------------------------------
2 files changed, 45 insertions(+), 31 deletions(-)
diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
index a0cf50b2..116d2441 100644
--- a/src/backup/datastore.rs
+++ b/src/backup/datastore.rs
@@ -825,4 +825,42 @@ impl DataStore {
pub fn verify_new(&self) -> bool {
self.verify_new
}
+
+ /// returns a list of chunks sorted by their inode number on disk
+ /// chunks that could not be stat'ed are at the end of the list
+ pub fn get_chunks_in_order<F, A>(
+ &self,
+ index: &Box<dyn IndexFile + Send>,
+ skip_chunk: F,
+ check_abort: A,
+ ) -> Result<Vec<(usize, u64)>, Error>
+ where
+ F: Fn(&[u8; 32]) -> bool,
+ A: Fn(usize) -> Result<(), Error>,
+ {
+ let index_count = index.index_count();
+ let mut chunk_list = Vec::with_capacity(index_count);
+ use std::os::unix::fs::MetadataExt;
+ for pos in 0..index_count {
+ check_abort(pos)?;
+
+ let info = index.chunk_info(pos).unwrap();
+
+ if skip_chunk(&info.digest) {
+ continue;
+ }
+
+ let ino = match self.stat_chunk(&info.digest) {
+ Err(_) => u64::MAX, // could not stat, move to end of list
+ Ok(metadata) => metadata.ino(),
+ };
+
+ chunk_list.push((pos, ino));
+ }
+
+ // sorting by inode improves data locality, which makes it lots faster on spinners
+ chunk_list.sort_unstable_by(|(_, ino_a), (_, ino_b)| ino_a.cmp(&ino_b));
+
+ Ok(chunk_list)
+ }
}
diff --git a/src/backup/verify.rs b/src/backup/verify.rs
index a1b1e6dd..74b0cfe7 100644
--- a/src/backup/verify.rs
+++ b/src/backup/verify.rs
@@ -179,42 +179,18 @@ fn verify_index_chunks(
}
};
- let index_count = index.index_count();
- let mut chunk_list = Vec::with_capacity(index_count);
-
- use std::os::unix::fs::MetadataExt;
-
- for pos in 0..index_count {
+ let check_abort = |pos: usize| -> Result<(), Error> {
if pos & 1023 == 0 {
verify_worker.worker.check_abort()?;
crate::tools::fail_on_shutdown()?;
}
+ Ok(())
+ };
- let info = index.chunk_info(pos).unwrap();
-
- if skip_chunk(&info.digest) {
- continue; // already verified or marked corrupt
- }
-
- match verify_worker.datastore.stat_chunk(&info.digest) {
- Err(err) => {
- verify_worker.corrupt_chunks.lock().unwrap().insert(info.digest);
- task_log!(verify_worker.worker, "can't verify chunk, stat failed - {}", err);
- errors.fetch_add(1, Ordering::SeqCst);
- rename_corrupted_chunk(
- verify_worker.datastore.clone(),
- &info.digest,
- &verify_worker.worker,
- );
- }
- Ok(metadata) => {
- chunk_list.push((pos, metadata.ino()));
- }
- }
- }
-
- // sorting by inode improves data locality, which makes it lots faster on spinners
- chunk_list.sort_unstable_by(|(_, ino_a), (_, ino_b)| ino_a.cmp(&ino_b));
+ let chunk_list =
+ verify_worker
+ .datastore
+ .get_chunks_in_order(&index, skip_chunk, check_abort)?;
for (pos, _) in chunk_list {
verify_worker.worker.check_abort()?;
--
2.20.1
More information about the pbs-devel
mailing list