[pbs-devel] [PATCH proxmox-backup v9 01/46] datastore: add helpers for path/digest to s3 object key conversion

Hannes Laimer h.laimer at proxmox.com
Mon Jul 21 14:29:18 CEST 2025


On Sat Jul 19, 2025 at 2:49 PM CEST, Christian Ebner wrote:
> Adds helper methods to generate the s3 object keys given a relative
> path and filename for datastore contents or digest in case of chunk
> files.
>
> Regular datastore contents are stored by grouping them with a content
> prefix in the object key. In order to keep the object key length
> small, given the max limit of 1024 bytes {0], `.cnt` is used as
> content prefix. Chunks on the other hand are prefixed by `.chunks`,
> same as on regular datastores.
>
> The prefix allows for selective listing of either contents or chunks
> by providing the prefix to the respective api calls.
>
> [0] https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
>
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> changes since version 8:
> - added unit tests for helper functions
>
>  Cargo.toml               |   1 +
>  pbs-datastore/Cargo.toml |   1 +
>  pbs-datastore/src/lib.rs |   1 +
>  pbs-datastore/src/s3.rs  | 114 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 117 insertions(+)
>  create mode 100644 pbs-datastore/src/s3.rs
>
> diff --git a/Cargo.toml b/Cargo.toml
> index adfa427d1..97783ddd5 100644
> --- a/Cargo.toml
> +++ b/Cargo.toml
> @@ -77,6 +77,7 @@ proxmox-rest-server = { version = "1", features = [ "templates" ] }
>  proxmox-router = { version = "3.2.2", default-features = false }
>  proxmox-rrd = "1"
>  proxmox-rrd-api-types = "1.0.2"
> +proxmox-s3-client = "1.0.0"
>  # everything but pbs-config and pbs-client use "api-macro"
>  proxmox-schema = "4"
>  proxmox-section-config = "3"
> diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml
> index 56f6e9094..c42eff165 100644
> --- a/pbs-datastore/Cargo.toml
> +++ b/pbs-datastore/Cargo.toml
> @@ -34,6 +34,7 @@ proxmox-borrow.workspace = true
>  proxmox-human-byte.workspace = true
>  proxmox-io.workspace = true
>  proxmox-lang.workspace=true
> +proxmox-s3-client = { workspace = true, features = [ "impl" ] }
>  proxmox-schema = { workspace = true, features = [ "api-macro" ] }
>  proxmox-serde = { workspace = true, features = [ "serde_json" ] }
>  proxmox-sys.workspace = true
> diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs
> index 5014b6c09..ffd0d91b2 100644
> --- a/pbs-datastore/src/lib.rs
> +++ b/pbs-datastore/src/lib.rs
> @@ -182,6 +182,7 @@ pub mod manifest;
>  pub mod paperkey;
>  pub mod prune;
>  pub mod read_chunk;
> +pub mod s3;
>  pub mod store_progress;
>  pub mod task_tracking;
>  
> diff --git a/pbs-datastore/src/s3.rs b/pbs-datastore/src/s3.rs
> new file mode 100644
> index 000000000..79e7548fb
> --- /dev/null
> +++ b/pbs-datastore/src/s3.rs
> @@ -0,0 +1,114 @@
> +use std::path::{Path, PathBuf};
> +
> +use anyhow::{bail, format_err, Error};
> +
> +use proxmox_s3_client::S3ObjectKey;
> +
> +/// Object key prefix to group regular datastore contents (not chunks)
> +pub const S3_CONTENT_PREFIX: &str = ".cnt";
> +
> +/// Generate a relative object key with content prefix from given path and filename
> +pub fn object_key_from_path(path: &Path, filename: &str) -> Result<S3ObjectKey, Error> {
> +    // Force the use of relative paths, otherwise this would loose the content prefix
> +    if path.is_absolute() {
> +        bail!("cannot generate object key from absolute path");
> +    }
> +    if filename.contains('/') {
> +        bail!("invalid filename containing slashes");
> +    }
> +    let mut object_path = PathBuf::from(S3_CONTENT_PREFIX);
> +    object_path.push(path);
> +    object_path.push(filename);
> +
> +    let object_key_str = object_path
> +        .to_str()
> +        .ok_or_else(|| format_err!("unexpected object key path"))?;
> +    Ok(S3ObjectKey::from(object_key_str))
> +}
> +
> +/// Generate a relative object key with chunk prefix from given digest
> +pub fn object_key_from_digest(digest: &[u8; 32]) -> Result<S3ObjectKey, Error> {
> +    let object_key = hex::encode(digest);
> +    let digest_prefix = &object_key[..4];
> +    let object_key_string = format!(".chunks/{digest_prefix}/{object_key}");

I just skimmed of the S3 key specs, but I was wondering if having the
`digest_prefix` in the key actually adds anything. For FSs sure, but S3?
They say this is just chars for them, they don't infer hierarchy on `/`s,
so whatever optimisation they do with the prefix present, they should
also do without it, no?

> +    Ok(S3ObjectKey::from(object_key_string.as_str()))
> +}
> +
> +/// Generate a relative object key with chunk prefix from given digest, extended by suffix
> +pub fn object_key_from_digest_with_suffix(
> +    digest: &[u8; 32],
> +    suffix: &str,
> +) -> Result<S3ObjectKey, Error> {
> +    if suffix.contains('/') {
> +        bail!("invalid suffix containing slashes");
> +    }
> +    let object_key = hex::encode(digest);
> +    let digest_prefix = &object_key[..4];
> +    let object_key_string = format!(".chunks/{digest_prefix}/{object_key}{suffix}");
> +    Ok(S3ObjectKey::from(object_key_string.as_str()))
> +}
> +
> +#[test]
> +fn test_object_key_from_path() {
> +    let path = Path::new("vm/100/2025-07-14T14:20:02Z");
> +    let filename = "drive-scsci0.img.fidx";
> +    assert_eq!(
> +        object_key_from_path(path, filename).unwrap().to_string(),
> +        ".cnt/vm/100/2025-07-14T14:20:02Z/drive-scsci0.img.fidx",
> +    );
> +}
> +
> +#[test]
> +fn test_object_key_from_empty_path() {
> +    let path = Path::new("");
> +    let filename = ".marker";
> +    assert_eq!(
> +        object_key_from_path(path, filename).unwrap().to_string(),
> +        ".cnt/.marker",
> +    );
> +}
> +
> +#[test]
> +fn test_object_key_from_absolute_path() {
> +    assert!(object_key_from_path(Path::new("/"), ".marker").is_err());
> +}
> +
> +#[test]
> +fn test_object_key_from_path_incorrect_filename() {
> +    assert!(object_key_from_path(Path::new(""), "/.marker").is_err());
> +}
> +
> +#[test]
> +fn test_object_key_from_digest() {
> +    use hex::FromHex;
> +    let digest =
> +        <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8")
> +            .unwrap();
> +    assert_eq!(
> +        object_key_from_digest(&digest).unwrap().to_string(),
> +        ".chunks/bb9f/bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8",
> +    );
> +}
> +
> +#[test]
> +fn test_object_key_from_digest_with_suffix() {
> +    use hex::FromHex;
> +    let digest =
> +        <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8")
> +            .unwrap();
> +    assert_eq!(
> +        object_key_from_digest_with_suffix(&digest, ".0.bad")
> +            .unwrap()
> +            .to_string(),
> +        ".chunks/bb9f/bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8.0.bad",
> +    );
> +}
> +
> +#[test]
> +fn test_object_key_from_digest_with_invalid_suffix() {
> +    use hex::FromHex;
> +    let digest =
> +        <[u8; 32]>::from_hex("bb9f8df61474d25e71fa00722318cd387396ca1736605e1248821cc0de3d3af8")
> +            .unwrap();
> +    assert!(object_key_from_digest_with_suffix(&digest, "/.0.bad").is_err());
> +}





More information about the pbs-devel mailing list