[pbs-devel] [PATCH v6 proxmox-backup 3/8] fix #5982: garbage collection: check atime updates are honored

Christian Ebner c.ebner at proxmox.com
Thu Mar 20 09:44:23 CET 2025


On 3/20/25 09:36, Wolfgang Bumiller wrote:
> On Wed, Mar 19, 2025 at 06:24:27PM +0100, Christian Ebner wrote:
>> Check if the filesystem backing the chunk store actually updates the
>> atime to avoid potential data loss in phase 2 of garbage collection,
>> in case the atime update is not honored.
>>
>> Perform the check before phase 1 of garbage collection, as well as
>> on datastore creation. The latter to early detect and disallow
>> datastore creation on filesystem configurations which otherwise most
>> likely would lead to data losses. To perform the check also when
>> reusing an existing datastore, open the chunks store also on reuse.
>>
>> Enable the atime update check by default, but allow to opt-out by
>> setting a datastore tuning parameter flag for backwards compatibility.
>> This is honored by both, garbage collection and datastore creation.
>>
>> The check uses a 4 MiB fixed sized, unencypted and compressed chunk
>> as test marker, inserted if not present. This all zero-chunk is very
>> likely anyways for unencrypted backup contents with large all-zero
>> regions using fixed size chunking (e.g. VMs).
>>
>> To avoid cases were the timestamp will not be updated because of the
>> Linux kernels timestamp granularity, sleep in-between chunk insert
>> (including an atime update if pre-existing) and the subsequent
>> stating + utimensat for 1 second.
>>
>> Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=5982
>> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
>> ---
>> changes since version 5:
>> - Also perform check when reusing datastores, not just new one.
>> - Expand on comment for sleep, fix incorrect wording in commit message.
>> - Add additional error context to before/after stat calls.
>> - Soften wording of error messages if check is skipped because disabled.
>>
>>   pbs-datastore/src/chunk_store.rs | 72 ++++++++++++++++++++++++++++++--
>>   pbs-datastore/src/datastore.rs   | 13 ++++++
>>   src/api2/config/datastore.rs     | 35 ++++++++++++----
>>   3 files changed, 109 insertions(+), 11 deletions(-)
>>
>> diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
>> index 5e02909a1..dcb499426 100644
>> --- a/pbs-datastore/src/chunk_store.rs
>> +++ b/pbs-datastore/src/chunk_store.rs
>> @@ -1,9 +1,11 @@
>> +use std::os::unix::fs::MetadataExt;
>>   use std::os::unix::io::AsRawFd;
>>   use std::path::{Path, PathBuf};
>>   use std::sync::{Arc, Mutex};
>> +use std::time::{Duration, UNIX_EPOCH};
>>   
>> -use anyhow::{bail, format_err, Error};
>> -use tracing::info;
>> +use anyhow::{bail, format_err, Context, Error};
>> +use tracing::{info, warn};
>>   
>>   use pbs_api_types::{DatastoreFSyncLevel, GarbageCollectionStatus};
>>   use proxmox_io::ReadExt;
>> @@ -13,6 +15,7 @@ use proxmox_sys::process_locker::{
>>   };
>>   use proxmox_worker_task::WorkerTaskContext;
>>   
>> +use crate::data_blob::DataChunkBuilder;
>>   use crate::file_formats::{
>>       COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0,
>>   };
>> @@ -177,7 +180,7 @@ impl ChunkStore {
>>       /// Note that this must be used with care, as it's dangerous to create two instances on the
>>       /// same base path, as closing the underlying ProcessLocker drops all locks from this process
>>       /// on the lockfile (even if separate FDs)
>> -    pub(crate) fn open<P: Into<PathBuf>>(
>> +    pub fn open<P: Into<PathBuf>>(
>>           name: &str,
>>           base: P,
>>           sync_level: DatastoreFSyncLevel,
>> @@ -442,6 +445,69 @@ impl ChunkStore {
>>           Ok(())
>>       }
>>   
>> +    /// Check if atime updates are honored by the filesystem backing the chunk store.
>> +    ///
>> +    /// Checks if the atime is always updated by utimensat taking into consideration the Linux
>> +    /// kernel timestamp granularity.
>> +    /// If `retry_on_file_changed` is set to true, the check is performed again on the changed file
>> +    /// if a file change while testing is detected by differences in bith time or inode number.
>> +    /// Uses a 4 MiB fixed size, compressed but unencrypted chunk to test. The chunk is inserted in
>> +    /// the chunk store if not yet present.
>> +    /// Returns with error if the check could not be performed.
>> +    pub fn check_fs_atime_updates(&self, retry_on_file_changed: bool) -> Result<(), Error> {
>> +        let (zero_chunk, digest) = DataChunkBuilder::build_zero_chunk(None, 4096 * 1024, true)?;
>> +        let (pre_existing, _) = self.insert_chunk(&zero_chunk, &digest)?;
>> +        let (path, _digest) = self.chunk_path(&digest);
>> +
>> +        // Take into account timestamp update granularity in the kernel
>> +        // Blocking the thread is fine here since this runs in a worker.
>> +        std::thread::sleep(Duration::from_secs(1));
>> +
>> +        let metadata_before = std::fs::metadata(&path).context(format!(
>> +            "failed to get metadata for {path:?} before atime update"
>> +        ))?;
>> +
>> +        // Second atime update if chunk pre-existed, insert_chunk already updates pre-existing ones
>> +        self.cond_touch_path(&path, true)?;
>> +
>> +        let metadata_now = std::fs::metadata(&path).context(format!(
>> +            "failed to get metadata for {path:?} after atime update"
>> +        ))?;
>> +
>> +        // Check for the unlikely case that the file changed in-between the
>> +        // two metadata calls, try to check once again on changed file
>> +        if metadata_before.ino() != metadata_now.ino() {
>> +            if retry_on_file_changed {
>> +                return self.check_fs_atime_updates(false);
>> +            }
>> +            bail!("chunk {path:?} changed twice during access time safety check, cannot proceed.");
>> +        }
>> +
>> +        if metadata_before.accessed()? >= metadata_now.accessed()? {
>> +            let chunk_info_str = if pre_existing {
>> +                "pre-existing"
>> +            } else {
>> +                "newly inserted"
>> +            };
>> +            warn!("Chunk metadata was not correctly updated during access time safety check:");
>> +            info!(
>> +                "Timestamps before update: accessed {:?}, modified {:?}, created {:?}",
>> +                metadata_before.accessed().unwrap_or(UNIX_EPOCH),
>> +                metadata_before.modified().unwrap_or(UNIX_EPOCH),
>> +                metadata_before.created().unwrap_or(UNIX_EPOCH),
> 
> I wonder if rendering `UNIX_EPOCH` makes sense for failed values, we
> could just use `.ok()` and have it debug-render the `Option`?

Okay, will adapt that

> 
>> +            );
>> +            info!(
>> +                "Timestamps after update: accessed {:?}, modified {:?}, created {:?}",
>> +                metadata_now.accessed().unwrap_or(UNIX_EPOCH),
>> +                metadata_now.modified().unwrap_or(UNIX_EPOCH),
>> +                metadata_now.created().unwrap_or(UNIX_EPOCH),
>> +            );
>> +            bail!("access time safety check using {chunk_info_str} chunk failed, aborting GC!");
>> +        }
>> +
>> +        Ok(())
>> +    }
>> +
>>       pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
>>           // unwrap: only `None` in unit tests
>>           assert!(self.locker.is_some());
>> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
>> index a6a91ca79..09b5808e0 100644
>> --- a/pbs-datastore/src/datastore.rs
>> +++ b/pbs-datastore/src/datastore.rs
>> @@ -1170,6 +1170,19 @@ impl DataStore {
>>                   upid: Some(upid.to_string()),
>>                   ..Default::default()
>>               };
>> +            let tuning: DatastoreTuning = serde_json::from_value(
>> +                DatastoreTuning::API_SCHEMA
>> +                    .parse_property_string(gc_store_config.tuning.as_deref().unwrap_or(""))?,
>> +            )?;
>> +            if tuning.gc_atime_safety_check.unwrap_or(true) {
>> +                self.inner
>> +                    .chunk_store
>> +                    .check_fs_atime_updates(true)
>> +                    .map_err(|err| format_err!("atime safety check failed - {err:#}"))?;
> 
> ^ I see we have `{:#}` only exactly once in our code base from one of
> your patches. Do we really want to use this form?
> 
>  From what I can tell, this chains contexts with colons.
> 
> I think we mostly use `.context`/`.with_context` nowadays instead?
> That way the choice of formatting is left to the caller and they can
> choose between `{}`, `{:#}` and `{:?}` and `{:#?}`...

But exactly that was the intention here? The check_fs_atime_updates() 
returns some errors by adding context instead of reformatting the error 
itself.

I didn't adapt this for the whole datastore logic though, therefore 
limited it to this call side instead. I could of course bubble this up 
further, but who should be the caller defining the error format then?

> 
>> +                info!("Access time update check successful, proceeding with GC.");
>> +            } else {
>> +                info!("Access time update check disabled by datastore tuning options.");
>> +            }
>>   
>>               info!("Start GC phase1 (mark used chunks)");
>>   
>> diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs
>> index fe3260f6d..7e97a7de3 100644
>> --- a/src/api2/config/datastore.rs
>> +++ b/src/api2/config/datastore.rs
>> @@ -4,7 +4,7 @@ use ::serde::{Deserialize, Serialize};
>>   use anyhow::{bail, format_err, Error};
>>   use hex::FromHex;
>>   use serde_json::Value;
>> -use tracing::warn;
>> +use tracing::{info, warn};
>>   
>>   use proxmox_router::{http_bail, Permission, Router, RpcEnvironment, RpcEnvironmentType};
>>   use proxmox_schema::{api, param_bail, ApiType};
>> @@ -98,7 +98,15 @@ pub(crate) fn do_create_datastore(
>>       )?;
>>   
>>       let res = if reuse_datastore {
>> -        ChunkStore::verify_chunkstore(&path)
>> +        ChunkStore::verify_chunkstore(&path).and_then(|_| {
>> +            // Must be the only instance accessing and locking the chunk store,
>> +            // dropping will close all other locks from this process on the lockfile as well.
>> +            ChunkStore::open(
>> +                &datastore.name,
>> +                &path,
>> +                tuning.sync_level.unwrap_or_default(),
>> +            )
>> +        })
>>       } else {
>>           let mut is_empty = true;
>>           if let Ok(dir) = std::fs::read_dir(&path) {
>> @@ -120,19 +128,30 @@ pub(crate) fn do_create_datastore(
>>                   backup_user.gid,
>>                   tuning.sync_level.unwrap_or_default(),
>>               )
>> -            .map(|_| ())
>>           } else {
>>               Err(format_err!("datastore path not empty"))
>>           }
>>       };
>>   
>> -    if res.is_err() {
>> -        if need_unmount {
>> -            if let Err(e) = unmount_by_mountpoint(&path) {
>> -                warn!("could not unmount device: {e}");
>> +    match res {
>> +        Ok(chunk_store) => {
>> +            if tuning.gc_atime_safety_check.unwrap_or(true) {
>> +                chunk_store
>> +                    .check_fs_atime_updates(true)
>> +                    .map_err(|err| format_err!("access time safety check failed - {err:#}"))?;
> 
> ^ same here
> 
>> +                info!("Access time update check successful.");
>> +            } else {
>> +                info!("Access time update check skipped.");
>> +            }
>> +        }
>> +        Err(err) => {
>> +            if need_unmount {
>> +                if let Err(e) = unmount_by_mountpoint(&path) {
>> +                    warn!("could not unmount device: {e}");
>> +                }
>>               }
>> +            return Err(err);
>>           }
>> -        return res;
>>       }
>>   
>>       config.set_data(&datastore.name, "datastore", &datastore)?;
>> -- 
>> 2.39.5





More information about the pbs-devel mailing list