[pbs-devel] [PATCH v4 proxmox-backup 3/8] fix #5982: garbage collection: check atime updates are honored

Fabian Grünbichler f.gruenbichler at proxmox.com
Thu Mar 6 12:02:26 CET 2025


On March 5, 2025 4:14 pm, Christian Ebner wrote:
> Check if the filesystem backing the chunk store actually updates the
> atime to avoid potential data loss in phase 2 of garbage collection,
> in case the atime update is not honored.
> 
> Perform the check before phase 1 of garbage collection, as well as
> on datastore creation. The latter to early detect and disallow
> datastore creation on filesystem configurations which otherwise most
> likely would lead to data losses.
> 
> Enable the atime update check by default, but allow to opt-out by
> setting a datastore tuning parameter flag for backwards compatibility.
> This is honored by both, garbage collection and datastore creation.
> 
> The check uses a 4 MiB fixed sized, unencypted and compressed chunk
> as test marker, inserted if not present. This all zero-chunk is very
> likely anyways for unencrypted backup contents with large all-zero
> regions using fixed size chunking (e.g. VMs).
> 
> To avoid cases were the timestamp will not be updated because of the
> Linux kernels timestamp granularity, sleep in-between stating and
> utimensat for 1 second.
> 
> Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=5982
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> changes since version 3:
> - Drop check for relatime like behaviour as all tested filesystem (see
>   coverletter) honor the direct atime update.
> - Reduce risk of races by moving the sleep to be before the first
>   metadata call.
> - Log also if the test chunk was newly inserted or pre-existed.
> 
>  pbs-datastore/src/chunk_store.rs | 87 ++++++++++++++++++++++++++++++--
>  pbs-datastore/src/datastore.rs   |  9 ++++
>  src/api2/config/datastore.rs     |  1 +
>  3 files changed, 92 insertions(+), 5 deletions(-)
> 
> diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
> index 5e02909a1..e482330b3 100644
> --- a/pbs-datastore/src/chunk_store.rs
> +++ b/pbs-datastore/src/chunk_store.rs
> @@ -1,6 +1,8 @@
> +use std::os::unix::fs::MetadataExt;
>  use std::os::unix::io::AsRawFd;
>  use std::path::{Path, PathBuf};
>  use std::sync::{Arc, Mutex};
> +use std::time::Duration;
>  
>  use anyhow::{bail, format_err, Error};
>  use tracing::info;
> @@ -13,6 +15,7 @@ use proxmox_sys::process_locker::{
>  };
>  use proxmox_worker_task::WorkerTaskContext;
>  
> +use crate::data_blob::DataChunkBuilder;
>  use crate::file_formats::{
>      COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0,
>  };
> @@ -93,6 +96,7 @@ impl ChunkStore {
>          uid: nix::unistd::Uid,
>          gid: nix::unistd::Gid,
>          sync_level: DatastoreFSyncLevel,
> +        atime_safety_check: bool,
>      ) -> Result<Self, Error>
>      where
>          P: Into<PathBuf>,
> @@ -147,7 +151,14 @@ impl ChunkStore {
>              }
>          }
>  
> -        Self::open(name, base, sync_level)
> +        let chunk_store = Self::open(name, base, sync_level)?;
> +        if atime_safety_check {
> +            chunk_store.check_fs_atime_updates(true)?;
> +        } else {
> +            info!("atime safety check skipped.");
> +        }
> +
> +        Ok(chunk_store)
>      }
>  
>      fn lockfile_path<P: Into<PathBuf>>(base: P) -> PathBuf {
> @@ -442,6 +453,59 @@ impl ChunkStore {
>          Ok(())
>      }
>  
> +    /// Check if atime updates are honored by the filesystem backing the chunk store.
> +    ///
> +    /// Checks if the atime is always updated by utimensat taking into consideration the Linux
> +    /// kernel timestamp granularity.
> +    /// If `retry_on_file_changed` is set to true, the check is performed again on the changed file
> +    /// if a file change while testing is detected by differences in bith time or inode number.
> +    /// Uses a 4 MiB fixed size, compressed but unencrypted chunk to test. The chunk is inserted in
> +    /// the chunk store if not yet present.
> +    /// Returns with error if the check could not be performed.
> +    pub fn check_fs_atime_updates(&self, retry_on_file_changed: bool) -> Result<(), Error> {
> +        let (zero_chunk, digest) = DataChunkBuilder::build_zero_chunk(None, 4096 * 1024, true)?;
> +        let (pre_existing, _) = self.insert_chunk(&zero_chunk, &digest)?;

this one logs the path if something goes wrong, which is nice in case
the problem is related to permissions/..

> +        let (path, _digest) = self.chunk_path(&digest);
> +
> +        // Take into account timestamp update granularity in the kernel
> +        std::thread::sleep(Duration::from_secs(1));
> +
> +        let metadata_before = std::fs::metadata(&path).map_err(Error::from)?;

but this one probably wouldn't, and would benefit from it?

> +
> +        // Second atime update if chunk was newly inserted above

nit: comment is inverted - it's the second atime *update* if the chunk
already existed. it's the first *update* if we inserted it ;)

> +        self.cond_touch_path(&path, true)?;

this one includes the path in errors :)

> +
> +        let metadata_now = std::fs::metadata(&path).map_err(Error::from)?;

this one might benefit from some context as well?

> +
> +        // Check for the unlikely case that the file changed in-between the
> +        // two metadata calls, try to check once again on changed file
> +        if metadata_before.ino() != metadata_now.ino() {
> +            if retry_on_file_changed {
> +                return self.check_fs_atime_updates(false);
> +            }
> +            bail!("chunk file changed twice during atime testing, cannot proceed.");

adding the path or digest here might help as well

> +        }
> +
> +        match (
> +            metadata_before.accessed()? < metadata_now.accessed()?,

error context as well ;)

> +            pre_existing,
> +        ) {
> +            (true, false) => info!("Access time safety check successful (inserted chunk for test)."),
> +            (true, true) => {
> +                info!("Access time safety check successful (used pre-existing chunk for test).")
> +            }
> +            (false, false) => bail!(
> +                "Access time safety check failed (inserted chunk for test), is atime support \
> +                enabled on datastore backing filesystem?"
> +            ),
> +            (false, true) => bail!(
> +                "Access time safety check failed (used pre-existing chunk for test), is atime \
> +                support enabled on datastore backing filesystem?"
> +            ),
> +        }

this is a bit excessive while at the same time not including the
interesting bits..

how about the following:

if metadata_before.accessed()? >= metadata_now.accessed()? {
    let chunk_info_str = if pre_existing { "pre-existing" } else { "newly inserted" };
    log::warn!("Chunk metadata was not correctly updated during atime test:");
    info!("Metadata before update: {metadata_before:?}");
    info!("Metadata after update: {metadata_now:?}");
    bail!("atime safety check using {chunk_info_str} chunk failed, aborting GC!");
}

info!("atime safety check successful, proceeding with GC.");


would look like this in the task log:

Chunk metadata was not correctly updated during atime test:
Metadata before update: Metadata { file_type: FileType { is_file: true, is_dir: false, is_symlink: false, .. }, permissions: Permissions(FilePermissions { mode: 0o100500 (-r-x------) }), len: 158, modified: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, accessed: SystemTime { tv_sec: 1741256689, tv_nsec: 18294604 }, created: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, .. }
Metadata after update: Metadata { file_type: FileType { is_file: true, is_dir: false, is_symlink: false, .. }, permissions: Permissions(FilePermissions { mode: 0o100500 (-r-x------) }), len: 158, modified: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, accessed: SystemTime { tv_sec: 1741256689, tv_nsec: 18294604 }, created: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, .. }
TASK ERROR: atime safety check using pre-existing chunk failed, aborting GC!

alternatively we could of course do our own pretty printing, or add `#`
to the format string to get:

Metadata before update:
Metadata {
    file_type: FileType {
        is_file: true,
        is_dir: false,
        is_symlink: false,
        ..
    },
    permissions: Permissions(
        FilePermissions {
            mode: 0o100500 (-r-x------),
        },
    ),
    len: 158,
    modified: SystemTime {
        tv_sec: 1673516596,
        tv_nsec: 65483144,
    },
    accessed: SystemTime {
        tv_sec: 1741256877,
        tv_nsec: 225020600,
    },
    created: SystemTime {
        tv_sec: 1673516596,
        tv_nsec: 65483144,
    },
    ..
}

I think it is important to get the logging right here because if
somebody runs into a failing check, they should report the relevant data
to us without the need to jump through hoops.

> +        Ok(())
> +    }
> +
>      pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
>          // unwrap: only `None` in unit tests
>          assert!(self.locker.is_some());
> @@ -628,8 +692,15 @@ fn test_chunk_store1() {
>      let user = nix::unistd::User::from_uid(nix::unistd::Uid::current())
>          .unwrap()
>          .unwrap();
> -    let chunk_store =
> -        ChunkStore::create("test", &path, user.uid, user.gid, DatastoreFSyncLevel::None).unwrap();
> +    let chunk_store = ChunkStore::create(
> +        "test",
> +        &path,
> +        user.uid,
> +        user.gid,
> +        DatastoreFSyncLevel::None,
> +        true,
> +    )
> +    .unwrap();
>  
>      let (chunk, digest) = crate::data_blob::DataChunkBuilder::new(&[0u8, 1u8])
>          .build()
> @@ -641,8 +712,14 @@ fn test_chunk_store1() {
>      let (exists, _) = chunk_store.insert_chunk(&chunk, &digest).unwrap();
>      assert!(exists);
>  
> -    let chunk_store =
> -        ChunkStore::create("test", &path, user.uid, user.gid, DatastoreFSyncLevel::None);
> +    let chunk_store = ChunkStore::create(
> +        "test",
> +        &path,
> +        user.uid,
> +        user.gid,
> +        DatastoreFSyncLevel::None,
> +        true,
> +    );
>      assert!(chunk_store.is_err());
>  
>      if let Err(_e) = std::fs::remove_dir_all(".testdir") { /* ignore */ }
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
> index 75c0c16ab..b62ddc172 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -1170,6 +1170,15 @@ impl DataStore {
>                  upid: Some(upid.to_string()),
>                  ..Default::default()
>              };
> +            let tuning: DatastoreTuning = serde_json::from_value(
> +                DatastoreTuning::API_SCHEMA
> +                    .parse_property_string(gc_store_config.tuning.as_deref().unwrap_or(""))?,
> +            )?;
> +            if tuning.gc_atime_safety_check.unwrap_or(true) {
> +                self.inner.chunk_store.check_fs_atime_updates(true)?;
> +            } else {
> +                info!("Filesystem atime safety check disabled by datastore tuning options.");
> +            }
>  
>              info!("Start GC phase1 (mark used chunks)");
>  
> diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs
> index fe3260f6d..35847fc45 100644
> --- a/src/api2/config/datastore.rs
> +++ b/src/api2/config/datastore.rs
> @@ -119,6 +119,7 @@ pub(crate) fn do_create_datastore(
>                  backup_user.uid,
>                  backup_user.gid,
>                  tuning.sync_level.unwrap_or_default(),
> +                tuning.gc_atime_safety_check.unwrap_or(true),
>              )
>              .map(|_| ())
>          } else {
> -- 
> 2.39.5
> 
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
> 
> 
> 




More information about the pbs-devel mailing list