[pbs-devel] [PATCH v4 proxmox-backup 3/8] fix #5982: garbage collection: check atime updates are honored

Christian Ebner c.ebner at proxmox.com
Thu Mar 6 12:30:49 CET 2025


On 3/6/25 12:02, Fabian Grünbichler wrote:
> On March 5, 2025 4:14 pm, Christian Ebner wrote:
>> Check if the filesystem backing the chunk store actually updates the
>> atime to avoid potential data loss in phase 2 of garbage collection,
>> in case the atime update is not honored.
>>
>> Perform the check before phase 1 of garbage collection, as well as
>> on datastore creation. The latter to early detect and disallow
>> datastore creation on filesystem configurations which otherwise most
>> likely would lead to data losses.
>>
>> Enable the atime update check by default, but allow to opt-out by
>> setting a datastore tuning parameter flag for backwards compatibility.
>> This is honored by both, garbage collection and datastore creation.
>>
>> The check uses a 4 MiB fixed sized, unencypted and compressed chunk
>> as test marker, inserted if not present. This all zero-chunk is very
>> likely anyways for unencrypted backup contents with large all-zero
>> regions using fixed size chunking (e.g. VMs).
>>
>> To avoid cases were the timestamp will not be updated because of the
>> Linux kernels timestamp granularity, sleep in-between stating and
>> utimensat for 1 second.
>>
>> Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=5982
>> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
>> ---
>> changes since version 3:
>> - Drop check for relatime like behaviour as all tested filesystem (see
>>    coverletter) honor the direct atime update.
>> - Reduce risk of races by moving the sleep to be before the first
>>    metadata call.
>> - Log also if the test chunk was newly inserted or pre-existed.
>>
>>   pbs-datastore/src/chunk_store.rs | 87 ++++++++++++++++++++++++++++++--
>>   pbs-datastore/src/datastore.rs   |  9 ++++
>>   src/api2/config/datastore.rs     |  1 +
>>   3 files changed, 92 insertions(+), 5 deletions(-)
>>
>> diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
>> index 5e02909a1..e482330b3 100644
>> --- a/pbs-datastore/src/chunk_store.rs
>> +++ b/pbs-datastore/src/chunk_store.rs
>> @@ -1,6 +1,8 @@
>> +use std::os::unix::fs::MetadataExt;
>>   use std::os::unix::io::AsRawFd;
>>   use std::path::{Path, PathBuf};
>>   use std::sync::{Arc, Mutex};
>> +use std::time::Duration;
>>   
>>   use anyhow::{bail, format_err, Error};
>>   use tracing::info;
>> @@ -13,6 +15,7 @@ use proxmox_sys::process_locker::{
>>   };
>>   use proxmox_worker_task::WorkerTaskContext;
>>   
>> +use crate::data_blob::DataChunkBuilder;
>>   use crate::file_formats::{
>>       COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0,
>>   };
>> @@ -93,6 +96,7 @@ impl ChunkStore {
>>           uid: nix::unistd::Uid,
>>           gid: nix::unistd::Gid,
>>           sync_level: DatastoreFSyncLevel,
>> +        atime_safety_check: bool,
>>       ) -> Result<Self, Error>
>>       where
>>           P: Into<PathBuf>,
>> @@ -147,7 +151,14 @@ impl ChunkStore {
>>               }
>>           }
>>   
>> -        Self::open(name, base, sync_level)
>> +        let chunk_store = Self::open(name, base, sync_level)?;
>> +        if atime_safety_check {
>> +            chunk_store.check_fs_atime_updates(true)?;
>> +        } else {
>> +            info!("atime safety check skipped.");
>> +        }
>> +
>> +        Ok(chunk_store)
>>       }
>>   
>>       fn lockfile_path<P: Into<PathBuf>>(base: P) -> PathBuf {
>> @@ -442,6 +453,59 @@ impl ChunkStore {
>>           Ok(())
>>       }
>>   
>> +    /// Check if atime updates are honored by the filesystem backing the chunk store.
>> +    ///
>> +    /// Checks if the atime is always updated by utimensat taking into consideration the Linux
>> +    /// kernel timestamp granularity.
>> +    /// If `retry_on_file_changed` is set to true, the check is performed again on the changed file
>> +    /// if a file change while testing is detected by differences in bith time or inode number.
>> +    /// Uses a 4 MiB fixed size, compressed but unencrypted chunk to test. The chunk is inserted in
>> +    /// the chunk store if not yet present.
>> +    /// Returns with error if the check could not be performed.
>> +    pub fn check_fs_atime_updates(&self, retry_on_file_changed: bool) -> Result<(), Error> {
>> +        let (zero_chunk, digest) = DataChunkBuilder::build_zero_chunk(None, 4096 * 1024, true)?;
>> +        let (pre_existing, _) = self.insert_chunk(&zero_chunk, &digest)?;
> 
> this one logs the path if something goes wrong, which is nice in case
> the problem is related to permissions/..
> 
>> +        let (path, _digest) = self.chunk_path(&digest);
>> +
>> +        // Take into account timestamp update granularity in the kernel
>> +        std::thread::sleep(Duration::from_secs(1));
>> +
>> +        let metadata_before = std::fs::metadata(&path).map_err(Error::from)?;
> 
> but this one probably wouldn't, and would benefit from it?
> 
>> +
>> +        // Second atime update if chunk was newly inserted above
> 
> nit: comment is inverted - it's the second atime *update* if the chunk
> already existed. it's the first *update* if we inserted it ;)
> 
>> +        self.cond_touch_path(&path, true)?;
> 
> this one includes the path in errors :)
> 
>> +
>> +        let metadata_now = std::fs::metadata(&path).map_err(Error::from)?;
> 
> this one might benefit from some context as well?
> 
>> +
>> +        // Check for the unlikely case that the file changed in-between the
>> +        // two metadata calls, try to check once again on changed file
>> +        if metadata_before.ino() != metadata_now.ino() {
>> +            if retry_on_file_changed {
>> +                return self.check_fs_atime_updates(false);
>> +            }
>> +            bail!("chunk file changed twice during atime testing, cannot proceed.");
> 
> adding the path or digest here might help as well
> 
>> +        }
>> +
>> +        match (
>> +            metadata_before.accessed()? < metadata_now.accessed()?,
> 
> error context as well ;)
> 
>> +            pre_existing,
>> +        ) {
>> +            (true, false) => info!("Access time safety check successful (inserted chunk for test)."),
>> +            (true, true) => {
>> +                info!("Access time safety check successful (used pre-existing chunk for test).")
>> +            }
>> +            (false, false) => bail!(
>> +                "Access time safety check failed (inserted chunk for test), is atime support \
>> +                enabled on datastore backing filesystem?"
>> +            ),
>> +            (false, true) => bail!(
>> +                "Access time safety check failed (used pre-existing chunk for test), is atime \
>> +                support enabled on datastore backing filesystem?"
>> +            ),
>> +        }
> 
> this is a bit excessive while at the same time not including the
> interesting bits..
> 
> how about the following:
> 
> if metadata_before.accessed()? >= metadata_now.accessed()? {
>      let chunk_info_str = if pre_existing { "pre-existing" } else { "newly inserted" };
>      log::warn!("Chunk metadata was not correctly updated during atime test:");
>      info!("Metadata before update: {metadata_before:?}");
>      info!("Metadata after update: {metadata_now:?}");
>      bail!("atime safety check using {chunk_info_str} chunk failed, aborting GC!");
> }
> 
> info!("atime safety check successful, proceeding with GC.");
> 
> 
> would look like this in the task log:
> 
> Chunk metadata was not correctly updated during atime test:
> Metadata before update: Metadata { file_type: FileType { is_file: true, is_dir: false, is_symlink: false, .. }, permissions: Permissions(FilePermissions { mode: 0o100500 (-r-x------) }), len: 158, modified: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, accessed: SystemTime { tv_sec: 1741256689, tv_nsec: 18294604 }, created: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, .. }
> Metadata after update: Metadata { file_type: FileType { is_file: true, is_dir: false, is_symlink: false, .. }, permissions: Permissions(FilePermissions { mode: 0o100500 (-r-x------) }), len: 158, modified: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, accessed: SystemTime { tv_sec: 1741256689, tv_nsec: 18294604 }, created: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, .. }
> TASK ERROR: atime safety check using pre-existing chunk failed, aborting GC!
> 
> alternatively we could of course do our own pretty printing, or add `#`
> to the format string to get:
> 
> Metadata before update:
> Metadata {
>      file_type: FileType {
>          is_file: true,
>          is_dir: false,
>          is_symlink: false,
>          ..
>      },
>      permissions: Permissions(
>          FilePermissions {
>              mode: 0o100500 (-r-x------),
>          },
>      ),
>      len: 158,
>      modified: SystemTime {
>          tv_sec: 1673516596,
>          tv_nsec: 65483144,
>      },
>      accessed: SystemTime {
>          tv_sec: 1741256877,
>          tv_nsec: 225020600,
>      },
>      created: SystemTime {
>          tv_sec: 1673516596,
>          tv_nsec: 65483144,
>      },
>      ..
> }
> 
> I think it is important to get the logging right here because if
> somebody runs into a failing check, they should report the relevant data
> to us without the need to jump through hoops.

Agreed on this and above comments regarding error context, would however 
opt for a custom output, e.g. something along the line of what is used 
to output pxar entries via `format_single_line_entry`...

Do we really need information like file type, size and permissions here? 
That should already be covered by the chunk insert above?

> 
>> +        Ok(())
>> +    }
>> +
>>       pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
>>           // unwrap: only `None` in unit tests
>>           assert!(self.locker.is_some());
>> @@ -628,8 +692,15 @@ fn test_chunk_store1() {
>>       let user = nix::unistd::User::from_uid(nix::unistd::Uid::current())
>>           .unwrap()
>>           .unwrap();
>> -    let chunk_store =
>> -        ChunkStore::create("test", &path, user.uid, user.gid, DatastoreFSyncLevel::None).unwrap();
>> +    let chunk_store = ChunkStore::create(
>> +        "test",
>> +        &path,
>> +        user.uid,
>> +        user.gid,
>> +        DatastoreFSyncLevel::None,
>> +        true,
>> +    )
>> +    .unwrap();
>>   
>>       let (chunk, digest) = crate::data_blob::DataChunkBuilder::new(&[0u8, 1u8])
>>           .build()
>> @@ -641,8 +712,14 @@ fn test_chunk_store1() {
>>       let (exists, _) = chunk_store.insert_chunk(&chunk, &digest).unwrap();
>>       assert!(exists);
>>   
>> -    let chunk_store =
>> -        ChunkStore::create("test", &path, user.uid, user.gid, DatastoreFSyncLevel::None);
>> +    let chunk_store = ChunkStore::create(
>> +        "test",
>> +        &path,
>> +        user.uid,
>> +        user.gid,
>> +        DatastoreFSyncLevel::None,
>> +        true,
>> +    );
>>       assert!(chunk_store.is_err());
>>   
>>       if let Err(_e) = std::fs::remove_dir_all(".testdir") { /* ignore */ }
>> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
>> index 75c0c16ab..b62ddc172 100644
>> --- a/pbs-datastore/src/datastore.rs
>> +++ b/pbs-datastore/src/datastore.rs
>> @@ -1170,6 +1170,15 @@ impl DataStore {
>>                   upid: Some(upid.to_string()),
>>                   ..Default::default()
>>               };
>> +            let tuning: DatastoreTuning = serde_json::from_value(
>> +                DatastoreTuning::API_SCHEMA
>> +                    .parse_property_string(gc_store_config.tuning.as_deref().unwrap_or(""))?,
>> +            )?;
>> +            if tuning.gc_atime_safety_check.unwrap_or(true) {
>> +                self.inner.chunk_store.check_fs_atime_updates(true)?;
>> +            } else {
>> +                info!("Filesystem atime safety check disabled by datastore tuning options.");
>> +            }
>>   
>>               info!("Start GC phase1 (mark used chunks)");
>>   
>> diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs
>> index fe3260f6d..35847fc45 100644
>> --- a/src/api2/config/datastore.rs
>> +++ b/src/api2/config/datastore.rs
>> @@ -119,6 +119,7 @@ pub(crate) fn do_create_datastore(
>>                   backup_user.uid,
>>                   backup_user.gid,
>>                   tuning.sync_level.unwrap_or_default(),
>> +                tuning.gc_atime_safety_check.unwrap_or(true),
>>               )
>>               .map(|_| ())
>>           } else {
>> -- 
>> 2.39.5
>>
>>
>>
>> _______________________________________________
>> pbs-devel mailing list
>> pbs-devel at lists.proxmox.com
>> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>>
>>
>>
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
> 
> 





More information about the pbs-devel mailing list