[pbs-devel] [PATCH proxmox-backup v13 07/26] api: admin: add (un)mount endpoint for removable datastores

Fabian Grünbichler f.gruenbichler at proxmox.com
Thu Nov 21 14:35:00 CET 2024


On November 13, 2024 4:00 pm, Hannes Laimer wrote:
> Removable datastores can be mounted unless
>  - they are already
>  - their device is not present
> For unmounting the maintenance mode is set to `unmount`,
> which prohibits the starting of any new tasks envolving any
> IO, this mode is unset either
>  - on completion of the unmount
>  - on abort of the unmount tasks
> If the unmounting itself should fail, the maintenance mode stays in
> place and requires manual intervention by unsetting it in the config
> file directly. This is intentional, as unmounting should not fail,
> and if it should the situation should be looked at.
> 
> Signed-off-by: Hannes Laimer <h.laimer at proxmox.com>
> ---
> changes since v12:
>  * allow multiple stores on one device
>  * add best effort attempt to unmount after failed creation
> 
>  src/api2/admin/datastore.rs | 267 ++++++++++++++++++++++++++++++++++--
>  1 file changed, 257 insertions(+), 10 deletions(-)
> 
> diff --git a/src/api2/admin/datastore.rs b/src/api2/admin/datastore.rs
> index b73ad0ff..a12262e7 100644
> --- a/src/api2/admin/datastore.rs
> +++ b/src/api2/admin/datastore.rs
> @@ -3,7 +3,7 @@
>  use std::collections::HashSet;
>  use std::ffi::OsStr;
>  use std::os::unix::ffi::OsStrExt;
> -use std::path::PathBuf;
> +use std::path::{Path, PathBuf};
>  use std::sync::Arc;
>  
>  use anyhow::{bail, format_err, Error};
> @@ -13,7 +13,7 @@ use hyper::{header, Body, Response, StatusCode};
>  use serde::Deserialize;
>  use serde_json::{json, Value};
>  use tokio_stream::wrappers::ReceiverStream;
> -use tracing::{info, warn};
> +use tracing::{debug, info, warn};
>  
>  use proxmox_async::blocking::WrappedReaderStream;
>  use proxmox_async::{io::AsyncChannelWriter, stream::AsyncReaderStream};
> @@ -29,6 +29,7 @@ use proxmox_sys::fs::{
>      file_read_firstline, file_read_optional_string, replace_file, CreateOptions,
>  };
>  use proxmox_time::CalendarEvent;
> +use proxmox_worker_task::WorkerTaskContext;
>  
>  use pxar::accessor::aio::Accessor;
>  use pxar::EntryKind;
> @@ -36,12 +37,12 @@ use pxar::EntryKind;
>  use pbs_api_types::{
>      print_ns_and_snapshot, print_store_and_ns, Authid, BackupContent, BackupNamespace, BackupType,
>      Counts, CryptMode, DataStoreConfig, DataStoreListItem, DataStoreStatus,
> -    GarbageCollectionJobStatus, GroupListItem, JobScheduleStatus, KeepOptions, Operation,
> -    PruneJobOptions, SnapshotListItem, SnapshotVerifyState, BACKUP_ARCHIVE_NAME_SCHEMA,
> -    BACKUP_ID_SCHEMA, BACKUP_NAMESPACE_SCHEMA, BACKUP_TIME_SCHEMA, BACKUP_TYPE_SCHEMA,
> -    DATASTORE_SCHEMA, IGNORE_VERIFIED_BACKUPS_SCHEMA, MAX_NAMESPACE_DEPTH, NS_MAX_DEPTH_SCHEMA,
> -    PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_MODIFY, PRIV_DATASTORE_PRUNE,
> -    PRIV_DATASTORE_READ, PRIV_DATASTORE_VERIFY, UPID, UPID_SCHEMA,
> +    GarbageCollectionJobStatus, GroupListItem, JobScheduleStatus, KeepOptions, MaintenanceMode,
> +    MaintenanceType, Operation, PruneJobOptions, SnapshotListItem, SnapshotVerifyState,
> +    BACKUP_ARCHIVE_NAME_SCHEMA, BACKUP_ID_SCHEMA, BACKUP_NAMESPACE_SCHEMA, BACKUP_TIME_SCHEMA,
> +    BACKUP_TYPE_SCHEMA, DATASTORE_SCHEMA, IGNORE_VERIFIED_BACKUPS_SCHEMA, MAX_NAMESPACE_DEPTH,
> +    NS_MAX_DEPTH_SCHEMA, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP, PRIV_DATASTORE_MODIFY,
> +    PRIV_DATASTORE_PRUNE, PRIV_DATASTORE_READ, PRIV_DATASTORE_VERIFY, UPID, UPID_SCHEMA,
>      VERIFICATION_OUTDATED_AFTER_SCHEMA,
>  };
>  use pbs_client::pxar::{create_tar, create_zip};
> @@ -57,8 +58,8 @@ use pbs_datastore::index::IndexFile;
>  use pbs_datastore::manifest::{BackupManifest, CLIENT_LOG_BLOB_NAME, MANIFEST_BLOB_NAME};
>  use pbs_datastore::prune::compute_prune_info;
>  use pbs_datastore::{
> -    check_backup_owner, task_tracking, BackupDir, BackupGroup, DataStore, LocalChunkReader,
> -    StoreProgress, CATALOG_NAME,
> +    check_backup_owner, is_datastore_mounted_at, task_tracking, BackupDir, BackupGroup, DataStore,
> +    LocalChunkReader, StoreProgress, CATALOG_NAME,
>  };
>  use pbs_tools::json::required_string_param;
>  use proxmox_rest_server::{formatter, WorkerTask};
> @@ -2384,6 +2385,250 @@ pub async fn set_backup_owner(
>      .await?
>  }
>  
> +/// Here we
> +///
> +/// 1. mount the removable device to `<PBS_RUN_DIR>/mount/<RANDOM_UUID>`
> +/// 2. bind mount `<PBS_RUN_DIR>/mount/<RANDOM_UUID>/<datastore.path>` to `/mnt/datastore/<datastore.name>`
> +/// 3. unmount `<PBS_RUN_DIR>/mount/<RANDOM_UUID>`
> +///
> +/// leaving us with the datastore being mounted directly with its name under /mnt/datastore/...
> +///
> +/// The reason for the randomized device mounting paths is to avoid two tasks trying to mount to
> +/// the same path, this is *very* unlikely since the device is only mounted really shortly, but
> +/// technically possible.
> +pub fn do_mount_device(datastore: DataStoreConfig) -> Result<(), Error> {
> +    if let (Some(uuid), Some(mount_point)) = (
> +        datastore.backing_device.as_ref(),
> +        datastore.get_mount_point(),
> +    ) {

another variation, see previous patch comments..

> +        if pbs_datastore::is_datastore_mounted_at(mount_point.clone(), uuid.to_string()) {
> +            bail!("device is already mounted at '{}'", mount_point);
> +        }
> +        let tmp_mount_path = format!(
> +            "{}/{:x}",
> +            pbs_buildcfg::rundir!("/mount"),
> +            proxmox_uuid::Uuid::generate()
> +        );
> +
> +        let default_options = proxmox_sys::fs::CreateOptions::new();
> +        proxmox_sys::fs::create_path(
> +            &tmp_mount_path,
> +            Some(default_options.clone()),
> +            Some(default_options.clone()),
> +        )?;
> +
> +        debug!("mounting '{uuid}' to '{}'", tmp_mount_path);

IMHO this could be info!, we are in a task context here with very little
output, and if something went wrong, the extra info can only help..
maybe add in a "temporarily" at the beginning..

> +        crate::tools::disks::mount_by_uuid(uuid, Path::new(&tmp_mount_path))?;

because else, if this fails, the user has no idea what's going on unless
they happen to run in debug mode..

> +
> +        let full_store_path = format!(
> +            "{tmp_mount_path}/{}",
> +            datastore.path.trim_start_matches('/')
> +        );
> +        let backup_user = pbs_config::backup_user()?;
> +        let options = CreateOptions::new()
> +            .owner(backup_user.uid)
> +            .group(backup_user.gid);
> +
> +        proxmox_sys::fs::create_path(
> +            &mount_point,
> +            Some(default_options.clone()),
> +            Some(options.clone()),
> +        )?;

should we add some context to the error here?

> +
> +        // can't be created before it is mounted, so we have to do it here
> +        proxmox_sys::fs::create_path(
> +            &full_store_path,
> +            Some(default_options.clone()),
> +            Some(options.clone()),
> +        )?;

and here?

> +
> +        info!(
> +            "mounting '{}'({}) to '{}'",
> +            datastore.name, datastore.path, mount_point
> +        );

if the message above becomes info, then this should probably say
something like "bind mounting '{full_store_path}' to '{mount_point}'"

> +        if let Err(err) =
> +            crate::tools::disks::bind_mount(Path::new(&full_store_path), Path::new(&mount_point))
> +        {
> +            debug!("unmounting '{}'", tmp_mount_path);
> +            let _ = crate::tools::disks::unmount_by_mountpoint(Path::new(&tmp_mount_path));

should we log errors her?

> +            let _ = std::fs::remove_dir(std::path::Path::new(&tmp_mount_path));

and here? if those fail, we might need additional cleanup?

> +            return Err(format_err!(
> +                "Datastore '{}' cound not be mounted: {}.",
> +                datastore.name,
> +                err
> +            ));
> +        }
> +
> +        debug!("unmounting '{}'", tmp_mount_path);

if the first message becomes info, this should too (and maybe add in
that the path being unmounted was temporary).

> +        crate::tools::disks::unmount_by_mountpoint(Path::new(&tmp_mount_path))?;
> +        std::fs::remove_dir(std::path::Path::new(&tmp_mount_path))?;

error context here might be nice as well
> +
> +        Ok(())
> +    } else {
> +        Err(format_err!(
> +            "Datastore '{}' cannot be mounted because it is not removable.",
> +            datastore.name
> +        ))
> +    }
> +}
> +
> +#[api(
> +    protected: true,
> +    input: {
> +        properties: {
> +            store: {
> +                schema: DATASTORE_SCHEMA,
> +            },
> +        }
> +    },
> +    returns: {
> +        schema: UPID_SCHEMA,
> +    },
> +    access: {
> +        permission: &Permission::Privilege(&["datastore", "{store}"], PRIV_DATASTORE_AUDIT, false),
> +    },
> +)]
> +/// Mount removable datastore.
> +pub fn mount(store: String, rpcenv: &mut dyn RpcEnvironment) -> Result<Value, Error> {
> +    let (section_config, _digest) = pbs_config::datastore::config()?;
> +    let datastore: DataStoreConfig = section_config.lookup("datastore", &store)?;
> +
> +    if datastore.backing_device.is_none() {
> +        bail!("datastore '{store}' is not removable");
> +    }
> +
> +    let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
> +    let to_stdout = rpcenv.env_type() == RpcEnvironmentType::CLI;
> +
> +    let upid = WorkerTask::new_thread(
> +        "mount-device",
> +        Some(store),
> +        auth_id.to_string(),
> +        to_stdout,
> +        move |_worker| do_mount_device(datastore),
> +    )?;
> +
> +    Ok(json!(upid))
> +}
> +
> +fn unset_unmount_maintenance(store: &str) -> Result<(), Error> {
> +    let _lock = pbs_config::datastore::lock_config()?;
> +    let (mut section_config, _digest) = pbs_config::datastore::config()?;
> +    let mut store_config: DataStoreConfig = section_config.lookup("datastore", store)?;
> +    if store_config
> +        .get_maintenance_mode()
> +        .map_or(true, |m| m.ty != MaintenanceType::Unmount)
> +    {
> +        bail!("Maintenance mode should have been 'Unmount'")
> +    }
> +    store_config.maintenance_mode = None;
> +    section_config.set_data(store, "datastore", &store_config)?;
> +    pbs_config::datastore::save_config(&section_config)?;
> +    Ok(())
> +}
> +
> +fn do_unmount_device(
> +    datastore: DataStoreConfig,
> +    worker: Option<&dyn WorkerTaskContext>,
> +) -> Result<(), Error> {
> +    let mut active_operations = task_tracking::get_active_operations(&datastore.name)?;
> +    let mut old_status = String::new();
> +    while active_operations.read + active_operations.write > 0 {
> +        if let Some(worker) = worker {
> +            if worker.abort_requested() {
> +                unset_unmount_maintenance(&datastore.name)?;

this error should be caught and converted to a warning

> +                bail!("aborted, due to user request");

else this much more important information might not be printed

> +            }

this check should also be done below the loop, else this is racy..

> +            let status = format!(
> +                "cannot unmount yet, still {} read and {} write operations active",

this reads a bit strange language-wise, maybe it can be rephrased?

unmounting not possible yet, there are still ..

> +                active_operations.read, active_operations.write
> +            );
> +            if status != old_status {
> +                info!("{status}");
> +                old_status = status;
> +            }
> +        }
> +        std::thread::sleep(std::time::Duration::from_secs(1));
> +        active_operations = task_tracking::get_active_operations(&datastore.name)?;
> +    }
> +    if let Some(mount_point) = datastore.get_mount_point() {
> +        crate::tools::disks::unmount_by_mountpoint(Path::new(&mount_point))?;
> +        unset_unmount_maintenance(&datastore.name)?;

so if I clear the maintenance mode, it will get unmounted anyway, and
only then tell me that the maintenance mode is unexpected? this should
re-lock and read the config before unmounting..

that likely means you actually want the helper above to give you the
lock and check the state, and then have a second helper to unset it and
write the config out (if we had proper locked configs as types this
would be easier :()..

> +    }
> +    Ok(())
> +}
> +
> +#[api(
> +    protected: true,
> +    input: {
> +        properties: {
> +            store: { schema: DATASTORE_SCHEMA },
> +        },
> +    },
> +    returns: {
> +        schema: UPID_SCHEMA,
> +    },
> +    access: {
> +        permission: &Permission::Privilege(&["datastore", "{store}"], PRIV_DATASTORE_MODIFY, true),
> +    }
> +)]
> +/// Unmount a removable device that is associated with the datastore
> +pub async fn unmount(store: String, rpcenv: &mut dyn RpcEnvironment) -> Result<Value, Error> {
> +    let _lock = pbs_config::datastore::lock_config()?;
> +    let (mut section_config, _digest) = pbs_config::datastore::config()?;
> +    let mut datastore: DataStoreConfig = section_config.lookup("datastore", &store)?;
> +
> +    if datastore.backing_device.is_none() {
> +        bail!("datastore '{store}' is not removable");
> +    }
> +
> +    let mount_status = datastore
> +        .get_mount_point()
> +        .zip(datastore.backing_device.as_ref())
> +        .map(|(mount_point, device_uuid)| {
> +            is_datastore_mounted_at(mount_point, device_uuid.to_string())
> +        });

another variant ;)

> +
> +    if mount_status == Some(false) {
> +        bail!("datastore '{store}' is not mounted");
> +    }
> +
> +    datastore.set_maintenance_mode(Some(MaintenanceMode {
> +        ty: MaintenanceType::Unmount,
> +        message: None,
> +    }))?;
> +    section_config.set_data(&store, "datastore", &datastore)?;
> +    pbs_config::datastore::save_config(&section_config)?;
> +
> +    drop(_lock);
> +
> +    let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
> +    let to_stdout = rpcenv.env_type() == RpcEnvironmentType::CLI;
> +
> +    if let Ok(proxy_pid) = proxmox_rest_server::read_pid(pbs_buildcfg::PROXMOX_BACKUP_PROXY_PID_FN)
> +    {
> +        let sock = proxmox_daemon::command_socket::path_from_pid(proxy_pid);
> +        let _ = proxmox_daemon::command_socket::send_raw(
> +            sock,
> +            &format!(
> +                "{{\"command\":\"update-datastore-cache\",\"args\":\"{}\"}}\n",
> +                &store
> +            ),
> +        )
> +        .await;
> +    }
> +
> +    let upid = WorkerTask::new_thread(
> +        "unmount-device",
> +        Some(store),
> +        auth_id.to_string(),
> +        to_stdout,
> +        move |worker| do_unmount_device(datastore, Some(&worker)),
> +    )?;
> +
> +    Ok(json!(upid))
> +}
> +
>  #[sortable]
>  const DATASTORE_INFO_SUBDIRS: SubdirMap = &[
>      (
> @@ -2422,6 +2667,7 @@ const DATASTORE_INFO_SUBDIRS: SubdirMap = &[
>              .get(&API_METHOD_LIST_GROUPS)
>              .delete(&API_METHOD_DELETE_GROUP),
>      ),
> +    ("mount", &Router::new().post(&API_METHOD_MOUNT)),
>      (
>          "namespace",
>          // FIXME: move into datastore:: sub-module?!
> @@ -2456,6 +2702,7 @@ const DATASTORE_INFO_SUBDIRS: SubdirMap = &[
>              .delete(&API_METHOD_DELETE_SNAPSHOT),
>      ),
>      ("status", &Router::new().get(&API_METHOD_STATUS)),
> +    ("unmount", &Router::new().post(&API_METHOD_UNMOUNT)),
>      (
>          "upload-backup-log",
>          &Router::new().upload(&API_METHOD_UPLOAD_BACKUP_LOG),
> -- 
> 2.39.5
> 
> 
> 
> _______________________________________________
> pbs-devel mailing list
> pbs-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pbs-devel
> 
> 
> 




More information about the pbs-devel mailing list