[pbs-devel] [RFC proxmox-backup 1/2] datastore: Allow encoding with a set compression level

Maximiliano Sandoval R m.sandoval at proxmox.com
Fri Nov 3 16:17:06 CET 2023


We replace the boolean flag with an enum.

Signed-off-by: Maximiliano Sandoval R <m.sandoval at proxmox.com>
---
 pbs-client/src/backup_writer.rs         | 15 +++----
 pbs-datastore/src/backup_info.rs        |  4 +-
 pbs-datastore/src/data_blob.rs          | 56 +++++++++++++++----------
 pbs-datastore/src/dynamic_index.rs      |  3 +-
 pbs-datastore/src/lib.rs                |  2 +-
 proxmox-backup-client/src/benchmark.rs  |  5 ++-
 proxmox-backup-client/src/main.rs       | 17 ++++----
 proxmox-backup-client/src/snapshot.rs   | 12 ++++--
 src/api2/tape/restore.rs                |  4 +-
 src/bin/proxmox_backup_debug/recover.rs |  8 +++-
 10 files changed, 76 insertions(+), 50 deletions(-)

diff --git a/pbs-client/src/backup_writer.rs b/pbs-client/src/backup_writer.rs
index 8a03d8ea..cfec0a1e 100644
--- a/pbs-client/src/backup_writer.rs
+++ b/pbs-client/src/backup_writer.rs
@@ -18,7 +18,7 @@ use pbs_datastore::dynamic_index::DynamicIndexReader;
 use pbs_datastore::fixed_index::FixedIndexReader;
 use pbs_datastore::index::IndexFile;
 use pbs_datastore::manifest::{ArchiveType, BackupManifest, MANIFEST_BLOB_NAME};
-use pbs_datastore::{CATALOG_NAME, PROXMOX_BACKUP_PROTOCOL_ID_V1};
+use pbs_datastore::{Compression, CATALOG_NAME, PROXMOX_BACKUP_PROTOCOL_ID_V1};
 use pbs_tools::crypt_config::CryptConfig;
 
 use proxmox_human_byte::HumanByte;
@@ -48,7 +48,7 @@ pub struct BackupStats {
 #[derive(Default, Clone)]
 pub struct UploadOptions {
     pub previous_manifest: Option<Arc<BackupManifest>>,
-    pub compress: bool,
+    pub compression: Compression,
     pub encrypt: bool,
     pub fixed_size: Option<u64>,
 }
@@ -213,10 +213,10 @@ impl BackupWriter {
         options: UploadOptions,
     ) -> Result<BackupStats, Error> {
         let blob = match (options.encrypt, &self.crypt_config) {
-            (false, _) => DataBlob::encode(&data, None, options.compress)?,
+            (false, _) => DataBlob::encode(&data, None, options.compression)?,
             (true, None) => bail!("requested encryption without a crypt config"),
             (true, Some(crypt_config)) => {
-                DataBlob::encode(&data, Some(crypt_config), options.compress)?
+                DataBlob::encode(&data, Some(crypt_config), options.compression)?
             }
         };
 
@@ -340,7 +340,7 @@ impl BackupWriter {
             } else {
                 None
             },
-            options.compress,
+            options.compression,
         )
         .await?;
 
@@ -636,7 +636,7 @@ impl BackupWriter {
         prefix: &str,
         known_chunks: Arc<Mutex<HashSet<[u8; 32]>>>,
         crypt_config: Option<Arc<CryptConfig>>,
-        compress: bool,
+        compression: Compression,
     ) -> impl Future<Output = Result<UploadStats, Error>> {
         let total_chunks = Arc::new(AtomicUsize::new(0));
         let total_chunks2 = total_chunks.clone();
@@ -669,7 +669,8 @@ impl BackupWriter {
                 total_chunks.fetch_add(1, Ordering::SeqCst);
                 let offset = stream_len.fetch_add(chunk_len, Ordering::SeqCst) as u64;
 
-                let mut chunk_builder = DataChunkBuilder::new(data.as_ref()).compress(compress);
+                let mut chunk_builder =
+                    DataChunkBuilder::new(data.as_ref()).compression(compression);
 
                 if let Some(ref crypt_config) = crypt_config {
                     chunk_builder = chunk_builder.crypt_config(crypt_config);
diff --git a/pbs-datastore/src/backup_info.rs b/pbs-datastore/src/backup_info.rs
index 128315ba..a8a8b631 100644
--- a/pbs-datastore/src/backup_info.rs
+++ b/pbs-datastore/src/backup_info.rs
@@ -15,7 +15,7 @@ use pbs_config::{open_backup_lockfile, BackupLockGuard};
 use crate::manifest::{
     BackupManifest, CLIENT_LOG_BLOB_NAME, MANIFEST_BLOB_NAME, MANIFEST_LOCK_NAME,
 };
-use crate::{DataBlob, DataStore};
+use crate::{Compression, DataBlob, DataStore};
 
 /// BackupGroup is a directory containing a list of BackupDir
 #[derive(Clone)]
@@ -505,7 +505,7 @@ impl BackupDir {
 
         let manifest = serde_json::to_value(manifest)?;
         let manifest = serde_json::to_string_pretty(&manifest)?;
-        let blob = DataBlob::encode(manifest.as_bytes(), None, true)?;
+        let blob = DataBlob::encode(manifest.as_bytes(), None, Compression::default())?;
         let raw_data = blob.raw_data();
 
         let mut path = self.full_path();
diff --git a/pbs-datastore/src/data_blob.rs b/pbs-datastore/src/data_blob.rs
index 4119c4a4..765f5a98 100644
--- a/pbs-datastore/src/data_blob.rs
+++ b/pbs-datastore/src/data_blob.rs
@@ -12,6 +12,18 @@ use super::file_formats::*;
 
 const MAX_BLOB_SIZE: usize = 128 * 1024 * 1024;
 
+#[derive(Copy, Clone, Debug)]
+pub enum Compression {
+    None,
+    Zstd(i32),
+}
+
+impl Default for Compression {
+    fn default() -> Self {
+        Compression::Zstd(1)
+    }
+}
+
 /// Encoded data chunk with digest and positional information
 pub struct ChunkInfo {
     pub chunk: DataBlob,
@@ -87,7 +99,7 @@ impl DataBlob {
     pub fn encode(
         data: &[u8],
         config: Option<&CryptConfig>,
-        compress: bool,
+        compression: Compression,
     ) -> Result<Self, Error> {
         if data.len() > MAX_BLOB_SIZE {
             bail!("data blob too large ({} bytes).", data.len());
@@ -95,16 +107,18 @@ impl DataBlob {
 
         let mut blob = if let Some(config) = config {
             let compr_data;
-            let (_compress, data, magic) = if compress {
-                compr_data = zstd::bulk::compress(data, 1)?;
-                // Note: We only use compression if result is shorter
-                if compr_data.len() < data.len() {
-                    (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
-                } else {
-                    (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
+
+            let (_compress, data, magic) = match compression {
+                Compression::Zstd(level) => {
+                    compr_data = zstd::bulk::compress(data, level)?;
+                    // Note: We only use compression if result is shorter
+                    if compr_data.len() < data.len() {
+                        (true, &compr_data[..], ENCR_COMPR_BLOB_MAGIC_1_0)
+                    } else {
+                        (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
+                    }
                 }
-            } else {
-                (false, data, ENCRYPTED_BLOB_MAGIC_1_0)
+                Compression::None => (false, data, ENCRYPTED_BLOB_MAGIC_1_0),
             };
 
             let header_len = std::mem::size_of::<EncryptedDataBlobHeader>();
@@ -137,7 +151,7 @@ impl DataBlob {
             DataBlob { raw_data }
         } else {
             let max_data_len = data.len() + std::mem::size_of::<DataBlobHeader>();
-            if compress {
+            if let Compression::Zstd(level) = compression {
                 let mut comp_data = Vec::with_capacity(max_data_len);
 
                 let head = DataBlobHeader {
@@ -148,7 +162,7 @@ impl DataBlob {
                     comp_data.write_le_value(head)?;
                 }
 
-                zstd::stream::copy_encode(data, &mut comp_data, 1)?;
+                zstd::stream::copy_encode(data, &mut comp_data, level)?;
 
                 if comp_data.len() < max_data_len {
                     let mut blob = DataBlob {
@@ -479,7 +493,7 @@ pub struct DataChunkBuilder<'a, 'b> {
     orig_data: &'a [u8],
     digest_computed: bool,
     digest: [u8; 32],
-    compress: bool,
+    compression: Compression,
 }
 
 impl<'a, 'b> DataChunkBuilder<'a, 'b> {
@@ -490,15 +504,13 @@ impl<'a, 'b> DataChunkBuilder<'a, 'b> {
             config: None,
             digest_computed: false,
             digest: [0u8; 32],
-            compress: true,
+            compression: Compression::default(),
         }
     }
 
-    /// Set compression flag.
-    ///
-    /// If true, chunk data is compressed using zstd (level 1).
-    pub fn compress(mut self, value: bool) -> Self {
-        self.compress = value;
+    /// Set compression level
+    pub fn compression(mut self, value: Compression) -> Self {
+        self.compression = value;
         self
     }
 
@@ -543,7 +555,7 @@ impl<'a, 'b> DataChunkBuilder<'a, 'b> {
             self.compute_digest();
         }
 
-        let chunk = DataBlob::encode(self.orig_data, self.config, self.compress)?;
+        let chunk = DataBlob::encode(self.orig_data, self.config, self.compression)?;
         Ok((chunk, self.digest))
     }
 
@@ -551,10 +563,10 @@ impl<'a, 'b> DataChunkBuilder<'a, 'b> {
     pub fn build_zero_chunk(
         crypt_config: Option<&CryptConfig>,
         chunk_size: usize,
-        compress: bool,
+        compression: Compression,
     ) -> Result<(DataBlob, [u8; 32]), Error> {
         let zero_bytes = vec![0; chunk_size];
-        let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compress(compress);
+        let mut chunk_builder = DataChunkBuilder::new(&zero_bytes).compression(compression);
         if let Some(crypt_config) = crypt_config {
             chunk_builder = chunk_builder.crypt_config(crypt_config);
         }
diff --git a/pbs-datastore/src/dynamic_index.rs b/pbs-datastore/src/dynamic_index.rs
index 71a5082e..04e644e0 100644
--- a/pbs-datastore/src/dynamic_index.rs
+++ b/pbs-datastore/src/dynamic_index.rs
@@ -24,6 +24,7 @@ use crate::file_formats;
 use crate::index::{ChunkReadInfo, IndexFile};
 use crate::read_chunk::ReadChunk;
 use crate::Chunker;
+use crate::Compression;
 
 /// Header format definition for dynamic index files (`.dixd`)
 #[repr(C)]
@@ -453,7 +454,7 @@ impl DynamicChunkWriter {
         self.last_chunk = self.chunk_offset;
 
         let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer)
-            .compress(true)
+            .compression(Compression::default())
             .build()?;
 
         match self.index.insert_chunk(&chunk, &digest) {
diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs
index b09fd114..430e2927 100644
--- a/pbs-datastore/src/lib.rs
+++ b/pbs-datastore/src/lib.rs
@@ -199,7 +199,7 @@ pub use chunk_store::ChunkStore;
 pub use chunker::Chunker;
 pub use crypt_reader::CryptReader;
 pub use crypt_writer::CryptWriter;
-pub use data_blob::DataBlob;
+pub use data_blob::{Compression, DataBlob};
 pub use data_blob_reader::DataBlobReader;
 pub use data_blob_writer::DataBlobWriter;
 pub use manifest::BackupManifest;
diff --git a/proxmox-backup-client/src/benchmark.rs b/proxmox-backup-client/src/benchmark.rs
index b3047308..836451a9 100644
--- a/proxmox-backup-client/src/benchmark.rs
+++ b/proxmox-backup-client/src/benchmark.rs
@@ -18,6 +18,7 @@ use pbs_api_types::{BackupNamespace, BackupType};
 use pbs_client::tools::key_source::get_encryption_key_password;
 use pbs_client::{BackupRepository, BackupWriter};
 use pbs_datastore::data_blob::{DataBlob, DataChunkBuilder};
+use pbs_datastore::Compression;
 use pbs_key_config::{load_and_decrypt_key, KeyDerivationConfig};
 use pbs_tools::crypt_config::CryptConfig;
 
@@ -346,7 +347,9 @@ fn test_crypt_speed(benchmark_result: &mut BenchmarkResult) -> Result<(), Error>
 
     let start_time = std::time::Instant::now();
 
-    let (chunk, digest) = DataChunkBuilder::new(&random_data).compress(true).build()?;
+    let (chunk, digest) = DataChunkBuilder::new(&random_data)
+        .compression(Compression::default())
+        .build()?;
 
     let mut bytes = 0;
     loop {
diff --git a/proxmox-backup-client/src/main.rs b/proxmox-backup-client/src/main.rs
index 1a13291a..e5d67c7d 100644
--- a/proxmox-backup-client/src/main.rs
+++ b/proxmox-backup-client/src/main.rs
@@ -4,6 +4,7 @@ use std::path::{Path, PathBuf};
 use std::pin::Pin;
 use std::sync::{Arc, Mutex};
 use std::task::Context;
+use std::str::FromStr;
 
 use anyhow::{bail, format_err, Error};
 use futures::stream::{StreamExt, TryStreamExt};
@@ -57,7 +58,7 @@ use pbs_datastore::manifest::{
     archive_type, ArchiveType, BackupManifest, ENCRYPTED_KEY_BLOB_NAME, MANIFEST_BLOB_NAME,
 };
 use pbs_datastore::read_chunk::AsyncReadChunk;
-use pbs_datastore::CATALOG_NAME;
+use pbs_datastore::{Compression, CATALOG_NAME};
 use pbs_key_config::{decrypt_key, rsa_encrypt_key_config, KeyConfig};
 use pbs_tools::crypt_config::CryptConfig;
 use pbs_tools::json;
@@ -539,7 +540,7 @@ fn spawn_catalog_upload(
 
     let upload_options = UploadOptions {
         encrypt,
-        compress: true,
+        compression: Compression::default(),
         ..UploadOptions::default()
     };
 
@@ -947,7 +948,7 @@ async fn create_backup(
             // no dry-run
             (BackupSpecificationType::CONFIG, false) => {
                 let upload_options = UploadOptions {
-                    compress: true,
+                    compression: Compression::default(),
                     encrypt: crypto.mode == CryptMode::Encrypt,
                     ..UploadOptions::default()
                 };
@@ -961,7 +962,7 @@ async fn create_backup(
             (BackupSpecificationType::LOGFILE, false) => {
                 // fixme: remove - not needed anymore ?
                 let upload_options = UploadOptions {
-                    compress: true,
+                    compression: Compression::default(),
                     encrypt: crypto.mode == CryptMode::Encrypt,
                     ..UploadOptions::default()
                 };
@@ -997,7 +998,7 @@ async fn create_backup(
 
                 let upload_options = UploadOptions {
                     previous_manifest: previous_manifest.clone(),
-                    compress: true,
+                    compression: Compression::default(),
                     encrypt: crypto.mode == CryptMode::Encrypt,
                     ..UploadOptions::default()
                 };
@@ -1021,7 +1022,7 @@ async fn create_backup(
                 let upload_options = UploadOptions {
                     previous_manifest: previous_manifest.clone(),
                     fixed_size: Some(size),
-                    compress: true,
+                    compression: Compression::default(),
                     encrypt: crypto.mode == CryptMode::Encrypt,
                 };
 
@@ -1058,7 +1059,7 @@ async fn create_backup(
         let target = ENCRYPTED_KEY_BLOB_NAME;
         log::info!("Upload RSA encoded key to '{}' as {}", repo, target);
         let options = UploadOptions {
-            compress: false,
+            compression: Compression::None,
             encrypt: false,
             ..UploadOptions::default()
         };
@@ -1076,7 +1077,7 @@ async fn create_backup(
     log::debug!("Upload index.json to '{}'", repo);
 
     let options = UploadOptions {
-        compress: true,
+        compression: Compression::default(),
         encrypt: false,
         ..UploadOptions::default()
     };
diff --git a/proxmox-backup-client/src/snapshot.rs b/proxmox-backup-client/src/snapshot.rs
index f195c23b..bd5271de 100644
--- a/proxmox-backup-client/src/snapshot.rs
+++ b/proxmox-backup-client/src/snapshot.rs
@@ -9,7 +9,7 @@ use proxmox_sys::fs::file_get_contents;
 
 use pbs_api_types::{BackupGroup, BackupNamespace, CryptMode, SnapshotListItem};
 use pbs_client::tools::key_source::get_encryption_key_password;
-use pbs_datastore::DataBlob;
+use pbs_datastore::{Compression, DataBlob};
 use pbs_key_config::decrypt_key;
 use pbs_tools::crypt_config::CryptConfig;
 use pbs_tools::json::required_string_param;
@@ -257,10 +257,14 @@ async fn upload_log(param: Value) -> Result<Value, Error> {
 
     // fixme: howto sign log?
     let blob = match crypto.mode {
-        CryptMode::None | CryptMode::SignOnly => DataBlob::encode(&data, None, true)?,
-        CryptMode::Encrypt => {
-            DataBlob::encode(&data, crypt_config.as_ref().map(Arc::as_ref), true)?
+        CryptMode::None | CryptMode::SignOnly => {
+            DataBlob::encode(&data, None, Compression::default())?
         }
+        CryptMode::Encrypt => DataBlob::encode(
+            &data,
+            crypt_config.as_ref().map(Arc::as_ref),
+            Compression::default(),
+        )?,
     };
 
     let raw_data = blob.into_inner();
diff --git a/src/api2/tape/restore.rs b/src/api2/tape/restore.rs
index 7b6c8978..37763af1 100644
--- a/src/api2/tape/restore.rs
+++ b/src/api2/tape/restore.rs
@@ -27,7 +27,7 @@ use pbs_datastore::dynamic_index::DynamicIndexReader;
 use pbs_datastore::fixed_index::FixedIndexReader;
 use pbs_datastore::index::IndexFile;
 use pbs_datastore::manifest::{archive_type, ArchiveType, BackupManifest, MANIFEST_BLOB_NAME};
-use pbs_datastore::{DataBlob, DataStore};
+use pbs_datastore::{Compression, DataBlob, DataStore};
 use pbs_tape::{
     BlockReadError, MediaContentHeader, TapeRead, PROXMOX_BACKUP_CONTENT_HEADER_MAGIC_1_0,
 };
@@ -1782,7 +1782,7 @@ fn try_restore_snapshot_archive<R: pxar::decoder::SeqRead>(
                 .map(|m| m.remove("verify_state"));
 
             let old_manifest = serde_json::to_string_pretty(&old_manifest)?;
-            let blob = DataBlob::encode(old_manifest.as_bytes(), None, true)?;
+            let blob = DataBlob::encode(old_manifest.as_bytes(), None, Compression::default())?;
 
             let options = CreateOptions::new();
             replace_file(&tmp_path, blob.raw_data(), options, false)?;
diff --git a/src/bin/proxmox_backup_debug/recover.rs b/src/bin/proxmox_backup_debug/recover.rs
index ccac476b..0fdbbf98 100644
--- a/src/bin/proxmox_backup_debug/recover.rs
+++ b/src/bin/proxmox_backup_debug/recover.rs
@@ -12,7 +12,7 @@ use pbs_datastore::dynamic_index::DynamicIndexReader;
 use pbs_datastore::file_formats::{DYNAMIC_SIZED_CHUNK_INDEX_1_0, FIXED_SIZED_CHUNK_INDEX_1_0};
 use pbs_datastore::fixed_index::FixedIndexReader;
 use pbs_datastore::index::IndexFile;
-use pbs_datastore::DataBlob;
+use pbs_datastore::{Compression, DataBlob};
 use pbs_key_config::load_and_decrypt_key;
 use pbs_tools::crypt_config::CryptConfig;
 
@@ -125,7 +125,11 @@ fn recover_index(
             eprintln!("WARN: replacing output file {:?} with '\\0'", info.range,);
 
             Ok((
-                DataBlob::encode(&vec![0; size as usize], crypt_conf_opt.as_ref(), true)?,
+                DataBlob::encode(
+                    &vec![0; size as usize],
+                    crypt_conf_opt.as_ref(),
+                    Compression::default(),
+                )?,
                 None,
             ))
         };
-- 
2.39.2






More information about the pbs-devel mailing list