[pbs-devel] [PATCH proxmox-backup v12 07/26] api: admin: add (un)mount endpoint for removable datastores

Wed Oct 30 11:07:55 CET 2024

Quoting Hannes Laimer (2024-10-29 15:58:25)
> On Mon Oct 14, 2024 at 3:42 PM CEST, Fabian Grünbichler wrote:
> > On September 4, 2024 4:11 pm, Hannes Laimer wrote:
> > > Signed-off-by: Hannes Laimer <h.laimer at proxmox.com>
> > > ---
> > >  pbs-api-types/src/maintenance.rs |   4 +
> > >  src/api2/admin/datastore.rs      | 243 +++++++++++++++++++++++++++++--
> > >  2 files changed, 237 insertions(+), 10 deletions(-)
> >
> > a pretty substantial patch - no commit message at all??
> >
> > > 
> > > diff --git a/pbs-api-types/src/maintenance.rs b/pbs-api-types/src/maintenance.rs
> > > index 9f51292e..60181258 100644
> > > --- a/pbs-api-types/src/maintenance.rs
> > > +++ b/pbs-api-types/src/maintenance.rs
> > > @@ -78,6 +78,10 @@ pub struct MaintenanceMode {
> > >  }
> > >  
> > >  impl MaintenanceMode {
> > > +    pub fn new(ty: MaintenanceType, message: Option<String>) -> Self {
> > > +        Self { ty, message }
> > > +    }
> > > +
> >
> > nit: if we want this, it should be in its own patch and convert existing
> > constructions as well.. or this could be dropped and the call to `new`
> > below could just init the struct..
> >
> > >      /// Used for deciding whether the datastore is cleared from the internal cache after the last
> > >      /// task finishes, so all open files are closed.
> > >      pub fn is_offline(&self) -> bool {
> > > diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
> > > index 976617d9..3c95888d 100644
> > > --- a/src/api2/admin/datastore.rs
> > > +++ b/src/api2/admin/datastore.rs
> > > @@ -3,7 +3,7 @@
> > >  use std::collections::HashSet;
> > >  use std::ffi::OsStr;
> > >  use std::os::unix::ffi::OsStrExt;
> > > -use std::path::PathBuf;
> > > +use std::path::{Path, PathBuf};
> > >  use std::sync::Arc;
> > >  
> > >  use anyhow::{bail, format_err, Error};
> > > @@ -13,7 +13,7 @@ use hyper::{header, Body, Response, StatusCode};
> > >  use serde::Deserialize;
> > >  use serde_json::{json, Value};
> > >  use tokio_stream::wrappers::ReceiverStream;
> > > -use tracing::{info, warn};
> > > +use tracing::{debug, info, warn};
> > >  
> > >  use proxmox_async::blocking::WrappedReaderStream;
> > >  use proxmox_async::{io::AsyncChannelWriter, stream::AsyncReaderStream};
> > > @@ -29,6 +29,7 @@ use proxmox_sys::fs::{
> > >      file_read_firstline, file_read_optional_string, replace_file, CreateOptions,
> > >  };
> > >  use proxmox_time::CalendarEvent;
> > > +use proxmox_worker_task::WorkerTaskContext;
> > >  
> > >  use pxar::accessor::aio::Accessor;
> > >  use pxar::EntryKind;
> > > @@ -36,12 +37,12 @@ use pxar::EntryKind;
> > >  use pbs_api_types::{
> > >      print_ns_and_snapshot, print_store_and_ns, Authid, BackupContent, BackupNamespace, BackupType,
> > >      Counts, CryptMode, DataStoreConfig, DataStoreListItem, DataStoreStatus,
> > > -    GarbageCollectionJobStatus, GroupListItem, JobScheduleStatus, KeepOptions, Operation,
> > > -    PruneJobOptions, SnapshotListItem, SnapshotVerifyState, BACKUP_ARCHIVE_NAME_SCHEMA,
> > > -    BACKUP_ID_SCHEMA, BACKUP_NAMESPACE_SCHEMA, BACKUP_TIME_SCHEMA, BACKUP_TYPE_SCHEMA,
> > > -    DATASTORE_SCHEMA, IGNORE_VERIFIED_BACKUPS_SCHEMA, MAX_NAMESPACE_DEPTH, NS_MAX_DEPTH_SCHEMA,
> > > -    PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_MODIFY, PRIV_DATASTORE_PRUNE,
> > > -    PRIV_DATASTORE_READ, PRIV_DATASTORE_VERIFY, UPID, UPID_SCHEMA,
> > > +    GarbageCollectionJobStatus, GroupListItem, JobScheduleStatus, KeepOptions, MaintenanceMode,
> > > +    MaintenanceType, Operation, PruneJobOptions, SnapshotListItem, SnapshotVerifyState,
> > > +    BACKUP_ARCHIVE_NAME_SCHEMA, BACKUP_ID_SCHEMA, BACKUP_NAMESPACE_SCHEMA, BACKUP_TIME_SCHEMA,
> > > +    BACKUP_TYPE_SCHEMA, DATASTORE_SCHEMA, IGNORE_VERIFIED_BACKUPS_SCHEMA, MAX_NAMESPACE_DEPTH,
> > > +    NS_MAX_DEPTH_SCHEMA, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_MODIFY,
> > > +    PRIV_DATASTORE_PRUNE, PRIV_DATASTORE_READ, PRIV_DATASTORE_VERIFY, UPID, UPID_SCHEMA,
> > >      VERIFICATION_OUTDATED_AFTER_SCHEMA,
> > >  };
> > >  use pbs_client::pxar::{create_tar, create_zip};
> > > @@ -57,8 +58,8 @@ use pbs_datastore::index::IndexFile;
> > >  use pbs_datastore::manifest::{BackupManifest, CLIENT_LOG_BLOB_NAME, MANIFEST_BLOB_NAME};
> > >  use pbs_datastore::prune::compute_prune_info;
> > >  use pbs_datastore::{
> > > -    check_backup_owner, task_tracking, BackupDir, BackupGroup, DataStore, LocalChunkReader,
> > > -    StoreProgress, CATALOG_NAME,
> > > +    check_backup_owner, is_datastore_available, task_tracking, BackupDir, BackupGroup, DataStore,
> > > +    LocalChunkReader, StoreProgress, CATALOG_NAME,
> > >  };
> > >  use pbs_tools::json::required_string_param;
> > >  use proxmox_rest_server::{formatter, WorkerTask};
> > > @@ -2384,6 +2385,226 @@ pub async fn set_backup_owner(
> > >      .await?
> > >  }
> > >  
> > > +/// Here we
> > > +///
> > > +/// 1. mount the removable device to `<PBS_RUN_DIR>/mount/<RANDOM_UUID>`
> > > +/// 2. bind mount `<PBS_RUN_DIR>/mount/<RANDOM_UUID>/<datastore.path>` to `/mnt/datastore/<datastore.name>`
> > > +/// 3. unmount `<PBS_RUN_DIR>/mount/<RANDOM_UUID>`
> >
> > couldn't we instead mount directly, and adapt get_absolute_path to
> > return /mnt/datastore/{datastore.name}/{datastore.path} ?
> >
> 
> but then we would either mount all, or none, of the datastores on the
> device.

which I think is probably the expected behaviour, but I can see the argument
against it as well..

> 
> > the "concurrent mounts" can be solved by locking (using the UUID or
> > datastore name as lock scope) or by going via a Mount maintenance mode..
> >
> 
> yes, I'm just not sure that locking really adds something. We
> can mount in paralell, we just have to make sure we don't use the same tmp
> dir name for it.
> 
> > seems to me like there's a lot less that could go wrong/fail in that
> > case? but maybe I am missing some background that should have been in
> > the commit message ;)
> >
> 
> most of what can go wrong comes from needing a bunch of tmp things and
> the (bind) mounting that happens, that would still happen, even
> non-concurrently.

well, with one mount per device there would be no need for tmp things and bind mounts at all?

> 
> > if I think of the semantics of "removable datastores", then if we want
> > to support multiple such datastores on a single device (at least as an
> > option in the future), then unmounting only makes sense when thinking of
> > the backing device (after all, I want to unmount to unplug it, and I
> > don't want to do that 10 times if the backing devices contains 10
> > datastores, and keep track of them all, and ensure nothing mounts them
> > again in the meantime, ..).. so only mounting it once in the first place
> > (and pointing the datastore at the relevant subdir of the mountpoint)
> > seems like the better option?
> >
> 
> We could not auto mount if a device that contains multiple datastores, so
> what someone mounted they also have to unmount. I feel like clicking
> "unmount" on one datastore and 4 other ones also end up un-mounted is
> not really something I'd expect, also we'd have to track active
> operations on all of them when unounting, which we can do, but idk...
> This could make sense if we had the notion of a device, and unmounting
> the deivce unmounts all this datastores, but I don't think that is what we want.

well the whole purpose of removable datastores is to allow rotating those
devices or putting them in cold storage.. so if we want to support multiple
datastores on the same device, then a single unmount for all of them would make
a lot of sense IMHO.. just unmounting one of N datastores on the same device
has no purpose?

> 
> > > +///
> > > +/// leaving us with the datastore being mounted directly with its name under /mnt/datastore/...
> > > +///
> > > +/// The reason for the randomized device mounting paths is to avoid two tasks trying to mount to
> > > +/// the same path, this is *very* unlikely since the device is only mounted really shortly, but
> > > +/// technically possible.
> > > +pub fn do_mount_device(datastore: DataStoreConfig) -> Result<(), Error> {
> > > +    if let (Some(uuid), Some(mount_point)) = (
> > > +        datastore.backing_device.as_ref(),
> > > +        datastore.get_mount_point(),
> > > +    ) {
> > > +        if pbs_datastore::is_datastore_available(&datastore) {
> > > +            bail!("datastore '{}' is already mounted", datastore.name);
> > > +        }
> > > +        let tmp_mount_path = format!(
> > > +            "{}/{:x}",
> > > +            pbs_buildcfg::rundir!("/mount"),
> > > +            proxmox_uuid::Uuid::generate()
> > > +        );
> > > +
> > > +        let default_options = proxmox_sys::fs::CreateOptions::new();
> > > +        proxmox_sys::fs::create_path(
> > > +            &tmp_mount_path,
> > > +            Some(default_options.clone()),
> > > +            Some(default_options.clone()),
> > > +        )?;
> > > +
> > > +        debug!("mounting '{uuid}' to '{}'", tmp_mount_path);
> > > +        crate::tools::disks::mount_by_uuid(uuid, Path::new(&tmp_mount_path))?;
> > > +
> > > +        let full_store_path = format!(
> > > +            "{tmp_mount_path}/{}",
> > > +            datastore.path.trim_start_matches('/')
> > > +        );
> > > +
> > > +        proxmox_sys::fs::create_path(
> > > +            &mount_point,
> > > +            Some(default_options.clone()),
> > > +            Some(default_options.clone()),
> > > +        )?;
> > > +
> > > +        // can't be created before it is mounted, so we have to do it here
> > > +        proxmox_sys::fs::create_path(
> > > +            &full_store_path,
> > > +            Some(default_options.clone()),
> > > +            Some(default_options.clone()),
> > > +        )?;
> >
> > shouldn't this create_path call be limited to the initial creation of
> > the datastore? in all other cases it should already be there..
> >
> 
> we'd have to check if `.chunks/` exists for that, and
> create_path already kind of does that. But manually checking
> is probably a little faster(?)
> 
> > > +
> > > +        info!(
> > > +            "mounting '{}'({}) to '{}'",
> > > +            datastore.name, datastore.path, mount_point
> > > +        );
> > > +        if let Err(err) =
> > > +            crate::tools::disks::bind_mount(Path::new(&full_store_path), Path::new(&mount_point))
> > > +        {
> > > +            debug!("unmounting '{}'", tmp_mount_path);
> > > +            let _ = crate::tools::disks::unmount_by_mountpoint(&tmp_mount_path);
> > > +            let _ = std::fs::remove_dir(std::path::Path::new(&tmp_mount_path));
> > > +            return Err(format_err!(
> > > +                "Datastore '{}' cound not be mounted: {}.",
> > > +                datastore.name,
> > > +                err
> > > +            ));
> > > +        }
> > > +
> > > +        debug!("unmounting '{}'", tmp_mount_path);
> > > +        crate::tools::disks::unmount_by_mountpoint(&tmp_mount_path)?;
> > > +        std::fs::remove_dir(std::path::Path::new(&tmp_mount_path))?;
> > > +
> > > +        Ok(())
> > > +    } else {
> > > +        Err(format_err!(
> > > +            "Datastore '{}' cannot be mounted because it is not removable.",
> > > +            datastore.name
> > > +        ))
> > > +    }
> > > +}
> > > +
> > > +#[api(
> > > +    protected: true,
> > > +    input: {
> > > +        properties: {
> > > +            store: {
> > > +                schema: DATASTORE_SCHEMA,
> > > +            },
> > > +        }
> > > +    },
> > > +    returns: {
> > > +        schema: UPID_SCHEMA,
> > > +    },
> > > +    access: {
> > > +        permission: &Permission::Privilege(&["datastore", "{store}"], PRIV_DATASTORE_AUDIT, false),
> > > +    },
> > > +)]
> > > +/// Mount removable datastore.
> > > +pub fn mount(store: String, rpcenv: &mut dyn RpcEnvironment) -> Result<Value, Error> {
> > > +    let (section_config, _digest) = pbs_config::datastore::config()?;
> > > +    let datastore: DataStoreConfig = section_config.lookup("datastore", &store)?;
> > > +
> > > +    if datastore.backing_device.is_none() {
> > > +        bail!("datastore '{store}' is not removable");
> > > +    }
> > > +
> > > +    let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
> > > +    let to_stdout = rpcenv.env_type() == RpcEnvironmentType::CLI;
> > > +
> > > +    let upid = WorkerTask::new_thread(
> > > +        "mount-device",
> > > +        Some(store),
> > > +        auth_id.to_string(),
> > > +        to_stdout,
> > > +        move |_worker| do_mount_device(datastore),
> > > +    )?;
> > > +
> > > +    Ok(json!(upid))
> > > +}
> > > +
> > > +fn do_unmount_device(
> > > +    datastore: DataStoreConfig,
> > > +    worker: Option<&dyn WorkerTaskContext>,
> > > +) -> Result<(), Error> {
> > > +    let mut active_operations = task_tracking::get_active_operations(&datastore.name)?;
> > > +    let mut old_status = String::new();
> > > +    while active_operations.read + active_operations.write > 0 {
> > > +        if let Some(worker) = worker {
> > > +            if worker.abort_requested() {
> > > +                bail!("aborted, due to user request");
> > > +            }
> > > +            let status = format!(
> > > +                "cannot unmount yet, still {} read and {} write operations active",
> > > +                active_operations.read, active_operations.write
> > > +            );
> > > +            if status != old_status {
> > > +                info!("{status}");
> > > +                old_status = status;
> > > +            }
> > > +        }
> > > +        std::thread::sleep(std::time::Duration::from_millis(250));
> >
> > I think once per second is probably enough? on a busy/big server there
> > might be a lot of operations that we are waiting for (and thus a lot of
> > decrements of the counters)..
> >
> 
> makes sense
> 
> > > +        active_operations = task_tracking::get_active_operations(&datastore.name)?;
> > > +    }
> > > +    if let Some(mount_point) = datastore.get_mount_point() {
> >
> > shouldn't this if be right at the start?
> >
> 
> this fn is only called in the unmount endpoint, which does this check.
> But checking again wouldn't hurt, I guess 
> 
> > > +        crate::tools::disks::unmount_by_mountpoint(&mount_point)?;
> > > +
> > > +        let _lock = pbs_config::datastore::lock_config()?;
> > > +        let (mut section_config, _digest) = pbs_config::datastore::config()?;
> > > +        let mut store_config: DataStoreConfig =
> > > +            section_config.lookup("datastore", &datastore.name)?;
> >
> > should we re-check the current maintenance_mode here to avoid races?
> >
> 
> I think the only thing we could race against are other unmount tasks,
> and both would set it to `None`

or an admin setting it to something else manually because they gave up on
unmounting and we give them no way to cancel that ;)

> 
> > > +        store_config.maintenance_mode = None;
> >
> > should use set_maintenance_mode
> >
> 
> we set the maintenance to `Unmount` before starting the unmounting, we
> can't leave that except by setting it like this. If something should go
> wrong we want this locked in, so to say. So manually examinig the
> situation, and editing the config file is needed.

wouldn't it make sense to allow cancelling an unmount by setting the
maintenance mode to something else again?