[pbs-devel] [RFC v2 proxmox-backup 10/21] datastore: mark namespace as trash instead of deleting it
Fabian Grünbichler
f.gruenbichler at proxmox.com
Fri May 9 14:27:09 CEST 2025
On May 8, 2025 3:05 pm, Christian Ebner wrote:
> As for backup snapshots and groups, mark the namespace as trash
> instead of removing it and the contents right away, if the trash
> should not be bypassed.
>
> Actual removal of the hirarchical folder structure has to be taken
> care of by the garbage collection.
>
> In order to avoid races during removal, first mark the namespaces as
> trash pending, mark the snapshots and groups as trash and only after
> rename the pending marker file to the trash marker file. By this,
> concurrent backups can remove the trash pending marker to avoid the
> namespace being trashed.
>
> On re-creation of a trashed namespace remove the marker file on itself
> and any parent component from deepest to shallowest. As trashing a full
> namespace can also set the trash pending state for recursive namespace
> cleanup, remove encounters of that marker file as well to avoid the
> namespace or its parent being trashed.
this is fairly involved since we don't have locks on namespaces..
should we have them for creation/removal/trashing/untrashing?
I assume those are fairly rare occurrences, I haven't yet analyzed the
interactions here to see whether the two-marker approach is actually
race-free..
OTOH, do we really need to (be able to) trash namespaces?
>
> Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
> ---
> pbs-datastore/src/datastore.rs | 135 +++++++++++++++++++++++++++++----
> src/api2/admin/namespace.rs | 2 +-
> src/server/pull.rs | 2 +-
> 3 files changed, 123 insertions(+), 16 deletions(-)
>
> diff --git a/pbs-datastore/src/datastore.rs b/pbs-datastore/src/datastore.rs
> index 023a6a12e..b1d81e199 100644
> --- a/pbs-datastore/src/datastore.rs
> +++ b/pbs-datastore/src/datastore.rs
> @@ -28,7 +28,9 @@ use pbs_api_types::{
> };
> use pbs_config::BackupLockGuard;
>
> -use crate::backup_info::{BackupDir, BackupGroup, BackupInfo, ListBackupFilter, OLD_LOCKING};
> +use crate::backup_info::{
> + BackupDir, BackupGroup, BackupInfo, ListBackupFilter, OLD_LOCKING, TRASH_MARKER_FILENAME,
> +};
> use crate::chunk_store::ChunkStore;
> use crate::dynamic_index::{DynamicIndexReader, DynamicIndexWriter};
> use crate::fixed_index::{FixedIndexReader, FixedIndexWriter};
> @@ -42,6 +44,8 @@ use crate::DataBlob;
> static DATASTORE_MAP: LazyLock<Mutex<HashMap<String, Arc<DataStoreImpl>>>> =
> LazyLock::new(|| Mutex::new(HashMap::new()));
>
> +const TRASH_PENDING_MARKER_FILENAME: &str = ".pending";
> +
> /// checks if auth_id is owner, or, if owner is a token, if
> /// auth_id is the user of the token
> pub fn check_backup_owner(owner: &Authid, auth_id: &Authid) -> Result<(), Error> {
> @@ -554,6 +558,7 @@ impl DataStore {
> ns_full_path.push(ns.path());
>
> std::fs::create_dir_all(ns_full_path)?;
> + self.untrash_namespace(&ns)?;
>
> Ok(ns)
> }
> @@ -565,6 +570,34 @@ impl DataStore {
> path.exists()
> }
>
> + /// Remove the namespace and all it's parent components from the trash by removing the trash or
> + /// trash-pending marker file for each namespace level from deepest to shallowest. Missing files
> + /// are ignored.
> + pub fn untrash_namespace(&self, namespace: &BackupNamespace) -> Result<(), Error> {
> + let mut namespace = namespace.clone();
> + while namespace.depth() > 0 {
> + let mut trash_file_path = self.base_path();
> + trash_file_path.push(namespace.path());
> + let mut pending_file_path = trash_file_path.clone();
> + pending_file_path.push(TRASH_PENDING_MARKER_FILENAME);
> + if let Err(err) = std::fs::remove_file(&pending_file_path) {
> + // ignore not found, either not trashed or un-trashed by concurrent operation
> + if err.kind() != std::io::ErrorKind::NotFound {
> + bail!("failed to remove trash-pending file {trash_file_path:?}: {err}");
> + }
> + }
> + trash_file_path.push(TRASH_MARKER_FILENAME);
> + if let Err(err) = std::fs::remove_file(&trash_file_path) {
> + // ignore not found, either not trashed or un-trashed by concurrent operation
> + if err.kind() != std::io::ErrorKind::NotFound {
> + bail!("failed to remove trash file {trash_file_path:?}: {err}");
> + }
> + }
> + namespace.pop();
> + }
> + Ok(())
> + }
> +
> /// Remove all backup groups of a single namespace level but not the namespace itself.
> ///
> /// Does *not* descends into child-namespaces and doesn't remoes the namespace itself either.
> @@ -574,6 +607,7 @@ impl DataStore {
> pub fn remove_namespace_groups(
> self: &Arc<Self>,
> ns: &BackupNamespace,
> + skip_trash: bool,
> ) -> Result<(bool, BackupGroupDeleteStats), Error> {
> // FIXME: locking? The single groups/snapshots are already protected, so may not be
> // necessary (depends on what we all allow to do with namespaces)
> @@ -583,20 +617,22 @@ impl DataStore {
> let mut stats = BackupGroupDeleteStats::default();
>
> for group in self.iter_backup_groups(ns.to_owned())? {
> - let delete_stats = group?.destroy(true)?;
> + let delete_stats = group?.destroy(skip_trash)?;
> stats.add(&delete_stats);
> removed_all_groups = removed_all_groups && delete_stats.all_removed();
> }
>
> - let base_file = std::fs::File::open(self.base_path())?;
> - let base_fd = base_file.as_raw_fd();
> - for ty in BackupType::iter() {
> - let mut ty_dir = ns.path();
> - ty_dir.push(ty.to_string());
> - // best effort only, but we probably should log the error
> - if let Err(err) = unlinkat(Some(base_fd), &ty_dir, UnlinkatFlags::RemoveDir) {
> - if err != nix::errno::Errno::ENOENT {
> - log::error!("failed to remove backup type {ty} in {ns} - {err}");
> + if skip_trash {
> + let base_file = std::fs::File::open(self.base_path())?;
> + let base_fd = base_file.as_raw_fd();
> + for ty in BackupType::iter() {
> + let mut ty_dir = ns.path();
> + ty_dir.push(ty.to_string());
> + // best effort only, but we probably should log the error
> + if let Err(err) = unlinkat(Some(base_fd), &ty_dir, UnlinkatFlags::RemoveDir) {
> + if err != nix::errno::Errno::ENOENT {
> + log::error!("failed to remove backup type {ty} in {ns} - {err}");
> + }
> }
> }
> }
> @@ -613,6 +649,7 @@ impl DataStore {
> self: &Arc<Self>,
> ns: &BackupNamespace,
> delete_groups: bool,
> + skip_trash: bool,
> ) -> Result<(bool, BackupGroupDeleteStats), Error> {
> let store = self.name();
> let mut removed_all_requested = true;
> @@ -620,16 +657,68 @@ impl DataStore {
> if delete_groups {
> log::info!("removing whole namespace recursively below {store}:/{ns}",);
> for ns in self.recursive_iter_backup_ns(ns.to_owned(), NamespaceListFilter::Active)? {
> - let (removed_ns_groups, delete_stats) = self.remove_namespace_groups(&ns?)?;
> + let namespace = ns?;
> +
> + if !skip_trash {
> + let mut path = self.base_path();
> + path.push(namespace.path());
> + path.push(TRASH_PENDING_MARKER_FILENAME);
> + if let Err(err) = std::fs::File::create(&path) {
pending marker created here iff delete_groups && !skip_trash
> + if err.kind() != std::io::ErrorKind::AlreadyExists {
> + return Err(err).context("failed to set trash pending marker file");
> + }
> + }
> + }
> +
> + let (removed_ns_groups, delete_stats) =
> + self.remove_namespace_groups(&namespace, skip_trash)?;
> stats.add(&delete_stats);
> removed_all_requested = removed_all_requested && removed_ns_groups;
> +
> + if !skip_trash && !removed_ns_groups {
> + self.untrash_namespace(&namespace)?;
> + }
> }
> } else {
> log::info!("pruning empty namespace recursively below {store}:/{ns}");
> }
>
> - removed_all_requested =
> - removed_all_requested && self.destroy_namespace_recursive(ns, delete_groups)?;
> + if skip_trash {
> + removed_all_requested =
> + removed_all_requested && self.destroy_namespace_recursive(ns, delete_groups)?;
> + return Ok((removed_all_requested, stats));
early return here iff skip_trash
> + }
> +
> + let mut children = self
> + .recursive_iter_backup_ns(ns.to_owned(), NamespaceListFilter::Active)?
> + .collect::<Result<Vec<BackupNamespace>, Error>>()?;
> +
> + children.sort_by_key(|b| std::cmp::Reverse(b.depth()));
> +
> + let mut all_trashed = true;
> + for ns in children.iter() {
> + let mut ns_dir = ns.path();
> + ns_dir.push("ns");
> +
> + if !ns.is_root() {
> + let mut path = self.base_path();
> + path.push(ns.path());
> +
> + let pending_path = path.join(TRASH_PENDING_MARKER_FILENAME);
> + path.push(TRASH_MARKER_FILENAME);
> + if let Err(err) = std::fs::rename(pending_path, path) {
pending marker assumed to exist here iff !skip_trash
> + if err.kind() == std::io::ErrorKind::NotFound {
> + all_trashed = false;
> + } else {
> + return Err(err).context("Renaming pending marker to trash marker failed");
> + }
> + }
> + }
> + }
> +
> + if !all_trashed {
> + bail!("failed to prune namespace, not empty");
wrong error returned as a result..
> + }
>
> Ok((removed_all_requested, stats))
> }
> @@ -657,6 +746,24 @@ impl DataStore {
> let _ = unlinkat(Some(base_fd), &ns_dir, UnlinkatFlags::RemoveDir);
>
> if !ns.is_root() {
> + let rel_trash_path = ns.path().join(TRASH_MARKER_FILENAME);
> + if let Err(err) =
> + unlinkat(Some(base_fd), &rel_trash_path, UnlinkatFlags::NoRemoveDir)
> + {
> + if err != nix::errno::Errno::ENOENT {
> + bail!("removing the trash file '{rel_trash_path:?}' failed - {err}")
> + }
> + }
> + let rel_pending_path = ns.path().join(TRASH_PENDING_MARKER_FILENAME);
> + if let Err(err) =
> + unlinkat(Some(base_fd), &rel_pending_path, UnlinkatFlags::NoRemoveDir)
> + {
> + if err != nix::errno::Errno::ENOENT {
> + bail!(
> + "removing the trash pending file '{rel_pending_path:?}' failed - {err}"
> + )
> + }
> + }
> match unlinkat(Some(base_fd), &ns.path(), UnlinkatFlags::RemoveDir) {
> Ok(()) => log::debug!("removed namespace {ns}"),
> Err(nix::errno::Errno::ENOENT) => {
> diff --git a/src/api2/admin/namespace.rs b/src/api2/admin/namespace.rs
> index e5463524a..a12d97883 100644
> --- a/src/api2/admin/namespace.rs
> +++ b/src/api2/admin/namespace.rs
> @@ -166,7 +166,7 @@ pub fn delete_namespace(
>
> let datastore = DataStore::lookup_datastore(&store, Some(Operation::Write))?;
>
> - let (removed_all, stats) = datastore.remove_namespace_recursive(&ns, delete_groups)?;
> + let (removed_all, stats) = datastore.remove_namespace_recursive(&ns, delete_groups, true)?;
> if !removed_all {
> let err_msg = if delete_groups {
> if datastore.old_locking() {
> diff --git a/src/server/pull.rs b/src/server/pull.rs
> index d3c6fcf6a..a60ccbf10 100644
> --- a/src/server/pull.rs
> +++ b/src/server/pull.rs
> @@ -729,7 +729,7 @@ fn check_and_remove_ns(params: &PullParameters, local_ns: &BackupNamespace) -> R
> let (removed_all, _delete_stats) = params
> .target
> .store
> - .remove_namespace_recursive(local_ns, true)?;
> + .remove_namespace_recursive(local_ns, true, true)?;
>
> Ok(removed_all)
> }
> --
> 2.39.5
>
>
>
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
>
>
>
More information about the pbs-devel
mailing list