[pbs-devel] applied: [PATCH proxmox-backup] gc: treat .bad files like regular chunks
Wolfgang Bumiller
w.bumiller at proxmox.com
Wed Nov 18 14:28:56 CET 2020
applied
On Thu, Nov 12, 2020 at 03:50:08PM +0100, Stefan Reiter wrote:
> Simplify the phase 2 code by treating .bad files just like regular
> chunks, with the exception of stat logging.
>
> To facilitate, we need to touch .bad files in phase 1. We only do this
> under the condition that 1) the original chunk is missing (as before),
> and 2) the original chunk is still referenced somewhere (since the code
> lives in the error handler for a failed chunk touch, it only gets called
> for chunks we expect to be there, i.e. ones that are referenced).
>
> Untouched they will then be cleaned up after 24 hours (or after the last
> longer-running task finishes).
>
> Reason 2) is also a fix for .bad files not being cleaned up at all if
> the original is no longer referenced anywhere (e.g. a user deleting all
> snapshots after seeing some corrupt chunks appear).
>
> cond_touch_path is introduced to touch arbitrary paths in the chunk
> store with the same logic as touching chunks.
>
> Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
> ---
> src/backup/chunk_store.rs | 68 +++++++++++++--------------------------
> src/backup/datastore.rs | 11 +++++++
> 2 files changed, 33 insertions(+), 46 deletions(-)
>
> diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs
> index b7556f8a..59719bad 100644
> --- a/src/backup/chunk_store.rs
> +++ b/src/backup/chunk_store.rs
> @@ -154,9 +154,11 @@ impl ChunkStore {
> }
>
> pub fn cond_touch_chunk(&self, digest: &[u8; 32], fail_if_not_exist: bool) -> Result<bool, Error> {
> -
> let (chunk_path, _digest_str) = self.chunk_path(digest);
> + self.cond_touch_path(&chunk_path, fail_if_not_exist)
> + }
>
> + pub fn cond_touch_path(&self, path: &Path, fail_if_not_exist: bool) -> Result<bool, Error> {
> const UTIME_NOW: i64 = (1 << 30) - 1;
> const UTIME_OMIT: i64 = (1 << 30) - 2;
>
> @@ -167,7 +169,7 @@ impl ChunkStore {
>
> use nix::NixPath;
>
> - let res = chunk_path.with_nix_path(|cstr| unsafe {
> + let res = path.with_nix_path(|cstr| unsafe {
> let tmp = libc::utimensat(-1, cstr.as_ptr(), ×[0], libc::AT_SYMLINK_NOFOLLOW);
> nix::errno::Errno::result(tmp)
> })?;
> @@ -177,7 +179,7 @@ impl ChunkStore {
> return Ok(false);
> }
>
> - bail!("update atime failed for chunk {:?} - {}", chunk_path, err);
> + bail!("update atime failed for chunk/file {:?} - {}", path, err);
> }
>
> Ok(true)
> @@ -328,49 +330,13 @@ impl ChunkStore {
> let lock = self.mutex.lock();
>
> if let Ok(stat) = fstatat(dirfd, filename, nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
> - if bad {
> - // filename validity checked in iterator
> - let orig_filename = std::ffi::CString::new(&filename.to_bytes()[..64])?;
> - match fstatat(
> - dirfd,
> - orig_filename.as_c_str(),
> - nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW)
> - {
> - Ok(_) => {
> - match unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) {
> - Err(err) =>
> - crate::task_warn!(
> - worker,
> - "unlinking corrupt chunk {:?} failed on store '{}' - {}",
> - filename,
> - self.name,
> - err,
> - ),
> - Ok(_) => {
> - status.removed_bad += 1;
> - status.removed_bytes += stat.st_size as u64;
> - }
> - }
> - },
> - Err(nix::Error::Sys(nix::errno::Errno::ENOENT)) => {
> - // chunk hasn't been rewritten yet, keep .bad file
> - status.still_bad += 1;
> - },
> - Err(err) => {
> - // some other error, warn user and keep .bad file around too
> - status.still_bad += 1;
> - crate::task_warn!(
> - worker,
> - "error during stat on '{:?}' - {}",
> - orig_filename,
> - err,
> - );
> - }
> - }
> - } else if stat.st_atime < min_atime {
> + if stat.st_atime < min_atime {
> //let age = now - stat.st_atime;
> //println!("UNLINK {} {:?}", age/(3600*24), filename);
> if let Err(err) = unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) {
> + if bad {
> + status.still_bad += 1;
> + }
> bail!(
> "unlinking chunk {:?} failed on store '{}' - {}",
> filename,
> @@ -378,13 +344,23 @@ impl ChunkStore {
> err,
> );
> }
> - status.removed_chunks += 1;
> + if bad {
> + status.removed_bad += 1;
> + } else {
> + status.removed_chunks += 1;
> + }
> status.removed_bytes += stat.st_size as u64;
> } else if stat.st_atime < oldest_writer {
> - status.pending_chunks += 1;
> + if bad {
> + status.still_bad += 1;
> + } else {
> + status.pending_chunks += 1;
> + }
> status.pending_bytes += stat.st_size as u64;
> } else {
> - status.disk_chunks += 1;
> + if !bad {
> + status.disk_chunks += 1;
> + }
> status.disk_bytes += stat.st_size as u64;
> }
> }
> diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
> index 6d759e78..19efc23f 100644
> --- a/src/backup/datastore.rs
> +++ b/src/backup/datastore.rs
> @@ -446,6 +446,17 @@ impl DataStore {
> file_name,
> err,
> );
> +
> + // touch any corresponding .bad files to keep them around, meaning if a chunk is
> + // rewritten correctly they will be removed automatically, as well as if no index
> + // file requires the chunk anymore (won't get to this loop then)
> + for i in 0..=9 {
> + let bad_ext = format!("{}.bad", i);
> + let mut bad_path = PathBuf::new();
> + bad_path.push(self.chunk_path(digest).0);
> + bad_path.set_extension(bad_ext);
> + self.chunk_store.cond_touch_path(&bad_path, false)?;
> + }
> }
> }
> Ok(())
> --
> 2.20.1
More information about the pbs-devel
mailing list