[pbs-devel] [PATCH v4 proxmox-backup 3/8] fix #5982: garbage collection: check atime updates are honored
Christian Ebner
c.ebner at proxmox.com
Thu Mar 6 12:30:49 CET 2025
On 3/6/25 12:02, Fabian Grünbichler wrote:
> On March 5, 2025 4:14 pm, Christian Ebner wrote:
>> Check if the filesystem backing the chunk store actually updates the
>> atime to avoid potential data loss in phase 2 of garbage collection,
>> in case the atime update is not honored.
>>
>> Perform the check before phase 1 of garbage collection, as well as
>> on datastore creation. The latter to early detect and disallow
>> datastore creation on filesystem configurations which otherwise most
>> likely would lead to data losses.
>>
>> Enable the atime update check by default, but allow to opt-out by
>> setting a datastore tuning parameter flag for backwards compatibility.
>> This is honored by both, garbage collection and datastore creation.
>>
>> The check uses a 4 MiB fixed sized, unencypted and compressed chunk
>> as test marker, inserted if not present. This all zero-chunk is very
>> likely anyways for unencrypted backup contents with large all-zero
>> regions using fixed size chunking (e.g. VMs).
>>
>> To avoid cases were the timestamp will not be updated because of the
>> Linux kernels timestamp granularity, sleep in-between stating and
>> utimensat for 1 second.
>>
>> Fixes: https://bugzilla.proxmox.com/show_bug.cgi?id=5982
>> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
>> ---
>> changes since version 3:
>> - Drop check for relatime like behaviour as all tested filesystem (see
>> coverletter) honor the direct atime update.
>> - Reduce risk of races by moving the sleep to be before the first
>> metadata call.
>> - Log also if the test chunk was newly inserted or pre-existed.
>>
>> pbs-datastore/src/chunk_store.rs | 87 ++++++++++++++++++++++++++++++--
>> pbs-datastore/src/datastore.rs | 9 ++++
>> src/api2/config/datastore.rs | 1 +
>> 3 files changed, 92 insertions(+), 5 deletions(-)
>>
>> diff --git a/pbs-datastore/src/chunk_store.rs b/pbs-datastore/src/chunk_store.rs
>> index 5e02909a1..e482330b3 100644
>> --- a/pbs-datastore/src/chunk_store.rs
>> +++ b/pbs-datastore/src/chunk_store.rs
>> @@ -1,6 +1,8 @@
>> +use std::os::unix::fs::MetadataExt;
>> use std::os::unix::io::AsRawFd;
>> use std::path::{Path, PathBuf};
>> use std::sync::{Arc, Mutex};
>> +use std::time::Duration;
>>
>> use anyhow::{bail, format_err, Error};
>> use tracing::info;
>> @@ -13,6 +15,7 @@ use proxmox_sys::process_locker::{
>> };
>> use proxmox_worker_task::WorkerTaskContext;
>>
>> +use crate::data_blob::DataChunkBuilder;
>> use crate::file_formats::{
>> COMPRESSED_BLOB_MAGIC_1_0, ENCRYPTED_BLOB_MAGIC_1_0, UNCOMPRESSED_BLOB_MAGIC_1_0,
>> };
>> @@ -93,6 +96,7 @@ impl ChunkStore {
>> uid: nix::unistd::Uid,
>> gid: nix::unistd::Gid,
>> sync_level: DatastoreFSyncLevel,
>> + atime_safety_check: bool,
>> ) -> Result<Self, Error>
>> where
>> P: Into<PathBuf>,
>> @@ -147,7 +151,14 @@ impl ChunkStore {
>> }
>> }
>>
>> - Self::open(name, base, sync_level)
>> + let chunk_store = Self::open(name, base, sync_level)?;
>> + if atime_safety_check {
>> + chunk_store.check_fs_atime_updates(true)?;
>> + } else {
>> + info!("atime safety check skipped.");
>> + }
>> +
>> + Ok(chunk_store)
>> }
>>
>> fn lockfile_path<P: Into<PathBuf>>(base: P) -> PathBuf {
>> @@ -442,6 +453,59 @@ impl ChunkStore {
>> Ok(())
>> }
>>
>> + /// Check if atime updates are honored by the filesystem backing the chunk store.
>> + ///
>> + /// Checks if the atime is always updated by utimensat taking into consideration the Linux
>> + /// kernel timestamp granularity.
>> + /// If `retry_on_file_changed` is set to true, the check is performed again on the changed file
>> + /// if a file change while testing is detected by differences in bith time or inode number.
>> + /// Uses a 4 MiB fixed size, compressed but unencrypted chunk to test. The chunk is inserted in
>> + /// the chunk store if not yet present.
>> + /// Returns with error if the check could not be performed.
>> + pub fn check_fs_atime_updates(&self, retry_on_file_changed: bool) -> Result<(), Error> {
>> + let (zero_chunk, digest) = DataChunkBuilder::build_zero_chunk(None, 4096 * 1024, true)?;
>> + let (pre_existing, _) = self.insert_chunk(&zero_chunk, &digest)?;
>
> this one logs the path if something goes wrong, which is nice in case
> the problem is related to permissions/..
>
>> + let (path, _digest) = self.chunk_path(&digest);
>> +
>> + // Take into account timestamp update granularity in the kernel
>> + std::thread::sleep(Duration::from_secs(1));
>> +
>> + let metadata_before = std::fs::metadata(&path).map_err(Error::from)?;
>
> but this one probably wouldn't, and would benefit from it?
>
>> +
>> + // Second atime update if chunk was newly inserted above
>
> nit: comment is inverted - it's the second atime *update* if the chunk
> already existed. it's the first *update* if we inserted it ;)
>
>> + self.cond_touch_path(&path, true)?;
>
> this one includes the path in errors :)
>
>> +
>> + let metadata_now = std::fs::metadata(&path).map_err(Error::from)?;
>
> this one might benefit from some context as well?
>
>> +
>> + // Check for the unlikely case that the file changed in-between the
>> + // two metadata calls, try to check once again on changed file
>> + if metadata_before.ino() != metadata_now.ino() {
>> + if retry_on_file_changed {
>> + return self.check_fs_atime_updates(false);
>> + }
>> + bail!("chunk file changed twice during atime testing, cannot proceed.");
>
> adding the path or digest here might help as well
>
>> + }
>> +
>> + match (
>> + metadata_before.accessed()? < metadata_now.accessed()?,
>
> error context as well ;)
>
>> + pre_existing,
>> + ) {
>> + (true, false) => info!("Access time safety check successful (inserted chunk for test)."),
>> + (true, true) => {
>> + info!("Access time safety check successful (used pre-existing chunk for test).")
>> + }
>> + (false, false) => bail!(
>> + "Access time safety check failed (inserted chunk for test), is atime support \
>> + enabled on datastore backing filesystem?"
>> + ),
>> + (false, true) => bail!(
>> + "Access time safety check failed (used pre-existing chunk for test), is atime \
>> + support enabled on datastore backing filesystem?"
>> + ),
>> + }
>
> this is a bit excessive while at the same time not including the
> interesting bits..
>
> how about the following:
>
> if metadata_before.accessed()? >= metadata_now.accessed()? {
> let chunk_info_str = if pre_existing { "pre-existing" } else { "newly inserted" };
> log::warn!("Chunk metadata was not correctly updated during atime test:");
> info!("Metadata before update: {metadata_before:?}");
> info!("Metadata after update: {metadata_now:?}");
> bail!("atime safety check using {chunk_info_str} chunk failed, aborting GC!");
> }
>
> info!("atime safety check successful, proceeding with GC.");
>
>
> would look like this in the task log:
>
> Chunk metadata was not correctly updated during atime test:
> Metadata before update: Metadata { file_type: FileType { is_file: true, is_dir: false, is_symlink: false, .. }, permissions: Permissions(FilePermissions { mode: 0o100500 (-r-x------) }), len: 158, modified: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, accessed: SystemTime { tv_sec: 1741256689, tv_nsec: 18294604 }, created: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, .. }
> Metadata after update: Metadata { file_type: FileType { is_file: true, is_dir: false, is_symlink: false, .. }, permissions: Permissions(FilePermissions { mode: 0o100500 (-r-x------) }), len: 158, modified: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, accessed: SystemTime { tv_sec: 1741256689, tv_nsec: 18294604 }, created: SystemTime { tv_sec: 1673516596, tv_nsec: 65483144 }, .. }
> TASK ERROR: atime safety check using pre-existing chunk failed, aborting GC!
>
> alternatively we could of course do our own pretty printing, or add `#`
> to the format string to get:
>
> Metadata before update:
> Metadata {
> file_type: FileType {
> is_file: true,
> is_dir: false,
> is_symlink: false,
> ..
> },
> permissions: Permissions(
> FilePermissions {
> mode: 0o100500 (-r-x------),
> },
> ),
> len: 158,
> modified: SystemTime {
> tv_sec: 1673516596,
> tv_nsec: 65483144,
> },
> accessed: SystemTime {
> tv_sec: 1741256877,
> tv_nsec: 225020600,
> },
> created: SystemTime {
> tv_sec: 1673516596,
> tv_nsec: 65483144,
> },
> ..
> }
>
> I think it is important to get the logging right here because if
> somebody runs into a failing check, they should report the relevant data
> to us without the need to jump through hoops.
Agreed on this and above comments regarding error context, would however
opt for a custom output, e.g. something along the line of what is used
to output pxar entries via `format_single_line_entry`...
Do we really need information like file type, size and permissions here?
That should already be covered by the chunk insert above?
>
>> + Ok(())
>> + }
>> +
>> pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> {
>> // unwrap: only `None` in unit tests
>> assert!(self.locker.is_some());
>> @@ -628,8 +692,15 @@ fn test_chunk_store1() {
>> let user = nix::unistd::User::from_uid(nix::unistd::Uid::current())
>> .unwrap()
>> .unwrap();
>> - let chunk_store =
>> - ChunkStore::create("test", &path, user.uid, user.gid, DatastoreFSyncLevel::None).unwrap();
>> + let chunk_store = ChunkStore::create(
>> + "test",
>> + &path,
>> + user.uid,
>> + user.gid,
>> + DatastoreFSyncLevel::None,
>> + true,
>> + )
>> + .unwrap();
>>
>> let (chunk, digest) = crate::data_blob::DataChunkBuilder::new(&[0u8, 1u8])
>> .build()
>> @@ -641,8 +712,14 @@ fn test_chunk_store1() {
>> let (exists, _) = chunk_store.insert_chunk(&chunk, &digest).unwrap();
>> assert!(exists);
>>
>> - let chunk_store =
>> - ChunkStore::create("test", &path, user.uid, user.gid, DatastoreFSyncLevel::None);
>> + let chunk_store = ChunkStore::create(
>> + "test",
>> + &path,
>> + user.uid,
>> + user.gid,
>> + DatastoreFSyncLevel::None,
>> + true,
>> + );
>> assert!(chunk_store.is_err());
>>
>> if let Err(_e) = std::fs::remove_dir_all(".testdir") { /* ignore */ }
>> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
>> index 75c0c16ab..b62ddc172 100644
>> --- a/pbs-datastore/src/datastore.rs
>> +++ b/pbs-datastore/src/datastore.rs
>> @@ -1170,6 +1170,15 @@ impl DataStore {
>> upid: Some(upid.to_string()),
>> ..Default::default()
>> };
>> + let tuning: DatastoreTuning = serde_json::from_value(
>> + DatastoreTuning::API_SCHEMA
>> + .parse_property_string(gc_store_config.tuning.as_deref().unwrap_or(""))?,
>> + )?;
>> + if tuning.gc_atime_safety_check.unwrap_or(true) {
>> + self.inner.chunk_store.check_fs_atime_updates(true)?;
>> + } else {
>> + info!("Filesystem atime safety check disabled by datastore tuning options.");
>> + }
>>
>> info!("Start GC phase1 (mark used chunks)");
>>
>> diff --git a/src/api2/config/datastore.rs b/src/api2/config/datastore.rs
>> index fe3260f6d..35847fc45 100644
>> --- a/src/api2/config/datastore.rs
>> +++ b/src/api2/config/datastore.rs
>> @@ -119,6 +119,7 @@ pub(crate) fn do_create_datastore(
>> backup_user.uid,
>> backup_user.gid,
>> tuning.sync_level.unwrap_or_default(),
>> + tuning.gc_atime_safety_check.unwrap_or(true),
>> )
>> .map(|_| ())
>> } else {
>> --
>> 2.39.5
>>
>>
>>
>> _______________________________________________
>> pbs-devel mailing list
>> pbs-devel at lists.proxmox.com
>> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>>
>>
>>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
More information about the pbs-devel
mailing list