[pbs-devel] [PATCH proxmox-backup] gc: treat .bad files like regular chunks

Stefan Reiter s.reiter at proxmox.com
Thu Nov 12 15:50:08 CET 2020


Simplify the phase 2 code by treating .bad files just like regular
chunks, with the exception of stat logging.

To facilitate, we need to touch .bad files in phase 1. We only do this
under the condition that 1) the original chunk is missing (as before),
and 2) the original chunk is still referenced somewhere (since the code
lives in the error handler for a failed chunk touch, it only gets called
for chunks we expect to be there, i.e. ones that are referenced).

Untouched they will then be cleaned up after 24 hours (or after the last
longer-running task finishes).

Reason 2) is also a fix for .bad files not being cleaned up at all if
the original is no longer referenced anywhere (e.g. a user deleting all
snapshots after seeing some corrupt chunks appear).

cond_touch_path is introduced to touch arbitrary paths in the chunk
store with the same logic as touching chunks.

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
 src/backup/chunk_store.rs | 68 +++++++++++++--------------------------
 src/backup/datastore.rs   | 11 +++++++
 2 files changed, 33 insertions(+), 46 deletions(-)

diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs
index b7556f8a..59719bad 100644
--- a/src/backup/chunk_store.rs
+++ b/src/backup/chunk_store.rs
@@ -154,9 +154,11 @@ impl ChunkStore {
     }
 
     pub fn cond_touch_chunk(&self, digest: &[u8; 32], fail_if_not_exist: bool) -> Result<bool, Error> {
-
         let (chunk_path, _digest_str) = self.chunk_path(digest);
+        self.cond_touch_path(&chunk_path, fail_if_not_exist)
+    }
 
+    pub fn cond_touch_path(&self, path: &Path, fail_if_not_exist: bool) -> Result<bool, Error> {
         const UTIME_NOW: i64 = (1 << 30) - 1;
         const UTIME_OMIT: i64 = (1 << 30) - 2;
 
@@ -167,7 +169,7 @@ impl ChunkStore {
 
         use nix::NixPath;
 
-        let res = chunk_path.with_nix_path(|cstr| unsafe {
+        let res = path.with_nix_path(|cstr| unsafe {
             let tmp = libc::utimensat(-1, cstr.as_ptr(), &times[0], libc::AT_SYMLINK_NOFOLLOW);
             nix::errno::Errno::result(tmp)
         })?;
@@ -177,7 +179,7 @@ impl ChunkStore {
                 return Ok(false);
             }
 
-            bail!("update atime failed for chunk {:?} - {}", chunk_path, err);
+            bail!("update atime failed for chunk/file {:?} - {}", path, err);
         }
 
         Ok(true)
@@ -328,49 +330,13 @@ impl ChunkStore {
             let lock = self.mutex.lock();
 
             if let Ok(stat) = fstatat(dirfd, filename, nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
-                if bad {
-                    // filename validity checked in iterator
-                    let orig_filename = std::ffi::CString::new(&filename.to_bytes()[..64])?;
-                    match fstatat(
-                        dirfd,
-                        orig_filename.as_c_str(),
-                        nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW)
-                    {
-                        Ok(_) => {
-                            match unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) {
-                                Err(err) =>
-                                    crate::task_warn!(
-                                        worker,
-                                        "unlinking corrupt chunk {:?} failed on store '{}' - {}",
-                                        filename,
-                                        self.name,
-                                        err,
-                                    ),
-                                Ok(_) => {
-                                    status.removed_bad += 1;
-                                    status.removed_bytes += stat.st_size as u64;
-                                }
-                            }
-                        },
-                        Err(nix::Error::Sys(nix::errno::Errno::ENOENT)) => {
-                            // chunk hasn't been rewritten yet, keep .bad file
-                            status.still_bad += 1;
-                        },
-                        Err(err) => {
-                            // some other error, warn user and keep .bad file around too
-                            status.still_bad += 1;
-                            crate::task_warn!(
-                                worker,
-                                "error during stat on '{:?}' - {}",
-                                orig_filename,
-                                err,
-                            );
-                        }
-                    }
-                } else if stat.st_atime < min_atime {
+                if stat.st_atime < min_atime {
                     //let age = now - stat.st_atime;
                     //println!("UNLINK {}  {:?}", age/(3600*24), filename);
                     if let Err(err) = unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) {
+                        if bad {
+                            status.still_bad += 1;
+                        }
                         bail!(
                             "unlinking chunk {:?} failed on store '{}' - {}",
                             filename,
@@ -378,13 +344,23 @@ impl ChunkStore {
                             err,
                         );
                     }
-                    status.removed_chunks += 1;
+                    if bad {
+                        status.removed_bad += 1;
+                    } else {
+                        status.removed_chunks += 1;
+                    }
                     status.removed_bytes += stat.st_size as u64;
                 } else if stat.st_atime < oldest_writer {
-                    status.pending_chunks += 1;
+                    if bad {
+                        status.still_bad += 1;
+                    } else {
+                        status.pending_chunks += 1;
+                    }
                     status.pending_bytes += stat.st_size as u64;
                 } else {
-                    status.disk_chunks += 1;
+                    if !bad {
+                        status.disk_chunks += 1;
+                    }
                     status.disk_bytes += stat.st_size as u64;
                 }
             }
diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
index 6d759e78..19efc23f 100644
--- a/src/backup/datastore.rs
+++ b/src/backup/datastore.rs
@@ -446,6 +446,17 @@ impl DataStore {
                     file_name,
                     err,
                 );
+
+                // touch any corresponding .bad files to keep them around, meaning if a chunk is
+                // rewritten correctly they will be removed automatically, as well as if no index
+                // file requires the chunk anymore (won't get to this loop then)
+                for i in 0..=9 {
+                    let bad_ext = format!("{}.bad", i);
+                    let mut bad_path = PathBuf::new();
+                    bad_path.push(self.chunk_path(digest).0);
+                    bad_path.set_extension(bad_ext);
+                    self.chunk_store.cond_touch_path(&bad_path, false)?;
+                }
             }
         }
         Ok(())
-- 
2.20.1





More information about the pbs-devel mailing list