[pbs-devel] applied: [PATCH proxmox-backup] gc: treat .bad files like regular chunks

Wolfgang Bumiller w.bumiller at proxmox.com
Wed Nov 18 14:28:56 CET 2020


applied

On Thu, Nov 12, 2020 at 03:50:08PM +0100, Stefan Reiter wrote:
> Simplify the phase 2 code by treating .bad files just like regular
> chunks, with the exception of stat logging.
> 
> To facilitate, we need to touch .bad files in phase 1. We only do this
> under the condition that 1) the original chunk is missing (as before),
> and 2) the original chunk is still referenced somewhere (since the code
> lives in the error handler for a failed chunk touch, it only gets called
> for chunks we expect to be there, i.e. ones that are referenced).
> 
> Untouched they will then be cleaned up after 24 hours (or after the last
> longer-running task finishes).
> 
> Reason 2) is also a fix for .bad files not being cleaned up at all if
> the original is no longer referenced anywhere (e.g. a user deleting all
> snapshots after seeing some corrupt chunks appear).
> 
> cond_touch_path is introduced to touch arbitrary paths in the chunk
> store with the same logic as touching chunks.
> 
> Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
> ---
>  src/backup/chunk_store.rs | 68 +++++++++++++--------------------------
>  src/backup/datastore.rs   | 11 +++++++
>  2 files changed, 33 insertions(+), 46 deletions(-)
> 
> diff --git a/src/backup/chunk_store.rs b/src/backup/chunk_store.rs
> index b7556f8a..59719bad 100644
> --- a/src/backup/chunk_store.rs
> +++ b/src/backup/chunk_store.rs
> @@ -154,9 +154,11 @@ impl ChunkStore {
>      }
>  
>      pub fn cond_touch_chunk(&self, digest: &[u8; 32], fail_if_not_exist: bool) -> Result<bool, Error> {
> -
>          let (chunk_path, _digest_str) = self.chunk_path(digest);
> +        self.cond_touch_path(&chunk_path, fail_if_not_exist)
> +    }
>  
> +    pub fn cond_touch_path(&self, path: &Path, fail_if_not_exist: bool) -> Result<bool, Error> {
>          const UTIME_NOW: i64 = (1 << 30) - 1;
>          const UTIME_OMIT: i64 = (1 << 30) - 2;
>  
> @@ -167,7 +169,7 @@ impl ChunkStore {
>  
>          use nix::NixPath;
>  
> -        let res = chunk_path.with_nix_path(|cstr| unsafe {
> +        let res = path.with_nix_path(|cstr| unsafe {
>              let tmp = libc::utimensat(-1, cstr.as_ptr(), &times[0], libc::AT_SYMLINK_NOFOLLOW);
>              nix::errno::Errno::result(tmp)
>          })?;
> @@ -177,7 +179,7 @@ impl ChunkStore {
>                  return Ok(false);
>              }
>  
> -            bail!("update atime failed for chunk {:?} - {}", chunk_path, err);
> +            bail!("update atime failed for chunk/file {:?} - {}", path, err);
>          }
>  
>          Ok(true)
> @@ -328,49 +330,13 @@ impl ChunkStore {
>              let lock = self.mutex.lock();
>  
>              if let Ok(stat) = fstatat(dirfd, filename, nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW) {
> -                if bad {
> -                    // filename validity checked in iterator
> -                    let orig_filename = std::ffi::CString::new(&filename.to_bytes()[..64])?;
> -                    match fstatat(
> -                        dirfd,
> -                        orig_filename.as_c_str(),
> -                        nix::fcntl::AtFlags::AT_SYMLINK_NOFOLLOW)
> -                    {
> -                        Ok(_) => {
> -                            match unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) {
> -                                Err(err) =>
> -                                    crate::task_warn!(
> -                                        worker,
> -                                        "unlinking corrupt chunk {:?} failed on store '{}' - {}",
> -                                        filename,
> -                                        self.name,
> -                                        err,
> -                                    ),
> -                                Ok(_) => {
> -                                    status.removed_bad += 1;
> -                                    status.removed_bytes += stat.st_size as u64;
> -                                }
> -                            }
> -                        },
> -                        Err(nix::Error::Sys(nix::errno::Errno::ENOENT)) => {
> -                            // chunk hasn't been rewritten yet, keep .bad file
> -                            status.still_bad += 1;
> -                        },
> -                        Err(err) => {
> -                            // some other error, warn user and keep .bad file around too
> -                            status.still_bad += 1;
> -                            crate::task_warn!(
> -                                worker,
> -                                "error during stat on '{:?}' - {}",
> -                                orig_filename,
> -                                err,
> -                            );
> -                        }
> -                    }
> -                } else if stat.st_atime < min_atime {
> +                if stat.st_atime < min_atime {
>                      //let age = now - stat.st_atime;
>                      //println!("UNLINK {}  {:?}", age/(3600*24), filename);
>                      if let Err(err) = unlinkat(Some(dirfd), filename, UnlinkatFlags::NoRemoveDir) {
> +                        if bad {
> +                            status.still_bad += 1;
> +                        }
>                          bail!(
>                              "unlinking chunk {:?} failed on store '{}' - {}",
>                              filename,
> @@ -378,13 +344,23 @@ impl ChunkStore {
>                              err,
>                          );
>                      }
> -                    status.removed_chunks += 1;
> +                    if bad {
> +                        status.removed_bad += 1;
> +                    } else {
> +                        status.removed_chunks += 1;
> +                    }
>                      status.removed_bytes += stat.st_size as u64;
>                  } else if stat.st_atime < oldest_writer {
> -                    status.pending_chunks += 1;
> +                    if bad {
> +                        status.still_bad += 1;
> +                    } else {
> +                        status.pending_chunks += 1;
> +                    }
>                      status.pending_bytes += stat.st_size as u64;
>                  } else {
> -                    status.disk_chunks += 1;
> +                    if !bad {
> +                        status.disk_chunks += 1;
> +                    }
>                      status.disk_bytes += stat.st_size as u64;
>                  }
>              }
> diff --git a/src/backup/datastore.rs b/src/backup/datastore.rs
> index 6d759e78..19efc23f 100644
> --- a/src/backup/datastore.rs
> +++ b/src/backup/datastore.rs
> @@ -446,6 +446,17 @@ impl DataStore {
>                      file_name,
>                      err,
>                  );
> +
> +                // touch any corresponding .bad files to keep them around, meaning if a chunk is
> +                // rewritten correctly they will be removed automatically, as well as if no index
> +                // file requires the chunk anymore (won't get to this loop then)
> +                for i in 0..=9 {
> +                    let bad_ext = format!("{}.bad", i);
> +                    let mut bad_path = PathBuf::new();
> +                    bad_path.push(self.chunk_path(digest).0);
> +                    bad_path.set_extension(bad_ext);
> +                    self.chunk_store.cond_touch_path(&bad_path, false)?;
> +                }
>              }
>          }
>          Ok(())
> -- 
> 2.20.1




More information about the pbs-devel mailing list