[pbs-devel] [PATCH proxmox-backup v2 2/2] Automatically select a drive (if part of a changer) when loading tapes

Dominik Csapak d.csapak at proxmox.com
Mon Feb 17 12:49:17 CET 2025


One thing i seemingly did not notice last time around:

this 'auto' choosing is just best-effort, no?
there is no mechanism from parallel request to block each other?

e.g. 2 load_media start simultaneously

both run until they chose drive1, -> one of the operations will use it
the other will either fail or timeout

since the update only happens on the actual 'load_media'

would it be possible to hold a lock over 'choose_drives'
that could also update the usage here ?

then the choosing might take a bit longer, but we could avoid the situation above

Also a few smaller comments inline:

On 2/6/25 13:28, Laurențiu Leahu-Vlăducu wrote:
> This patch adds the possibility to load tapes into drives
> automatically by specifying a changer. This is useful for larger tape
> libraries. Choosing a drive is done by using the drive that has not
> been used the longest.
> 
> At the moment, this patch implements the functionality for automatic
> loading both over the API and using the proxmox-backup CLI tool, as
> well as in the web UI when selecting a changer. A second patch will
> add the same functionality when configuring backup jobs.
> 
> Partially fixes #3351
> 
> Signed-off-by: Laurențiu Leahu-Vlăducu <l.leahu-vladucu at proxmox.com>
> ---
>   src/api2/tape/changer.rs        | 131 +++++++++++++++++++++++++++++++-
>   src/bin/proxmox-backup-proxy.rs |   2 +-
>   src/bin/proxmox-tape.rs         |  48 ++++++++++--
>   src/tape/changer/mod.rs         |  23 +++++-
>   src/tape/drive/virtual_tape.rs  |   8 +-
>   src/tape/drive_info.rs          |  56 ++++++++++++++
>   src/tape/mod.rs                 |   1 +
>   www/tape/ChangerStatus.js       |  15 +++-
>   8 files changed, 266 insertions(+), 18 deletions(-)
>   create mode 100644 src/tape/drive_info.rs
> 
> diff --git a/src/api2/tape/changer.rs b/src/api2/tape/changer.rs
> index 7ecf7bff..aa06804d 100644
> --- a/src/api2/tape/changer.rs
> +++ b/src/api2/tape/changer.rs
> @@ -1,14 +1,15 @@
>   use std::collections::HashMap;
>   
> -use anyhow::Error;
> +use anyhow::{bail, Error};
>   use serde_json::Value;
>   
>   use proxmox_router::{list_subdirs_api_method, Permission, Router, RpcEnvironment, SubdirMap};
>   use proxmox_schema::api;
>   
>   use pbs_api_types::{
> -    Authid, ChangerListEntry, LtoTapeDrive, MtxEntryKind, MtxStatusEntry, ScsiTapeChanger,
> -    CHANGER_NAME_SCHEMA, PRIV_TAPE_AUDIT, PRIV_TAPE_READ,
> +    Authid, ChangerListEntry, DeviceActivity, DriveListEntry, LtoTapeDrive, MtxEntryKind,
> +    MtxStatusEntry, ScsiTapeChanger, CHANGER_NAME_SCHEMA, MEDIA_LABEL_SCHEMA, PRIV_TAPE_AUDIT,
> +    PRIV_TAPE_READ, UPID_SCHEMA,
>   };
>   use pbs_config::CachedUserInfo;
>   use pbs_tape::{
> @@ -199,7 +200,131 @@ pub fn list_changers(
>       Ok(list)
>   }
>   
> +#[api(
> +    input: {
> +        properties: {
> +            name: {
> +                schema: CHANGER_NAME_SCHEMA,
> +            },
> +            "label-text": {
> +                schema: MEDIA_LABEL_SCHEMA,
> +            },
> +        },
> +    },
> +    returns: {
> +        schema: UPID_SCHEMA,
> +    },
> +    access: {
> +        permission: &Permission::Privilege(&["tape", "device", "{name}"], PRIV_TAPE_READ, false),
> +    },
> +)]
> +/// Load media with specified label
> +///
> +/// Issue a media load request to the associated changer device.
> +pub fn load_media(
> +    name: String,
> +    label_text: String,
> +    rpcenv: &mut dyn RpcEnvironment,
> +) -> Result<Value, Error> {
> +    let drive = choose_drive(&name, rpcenv);
> +    super::drive::load_media(drive?, label_text, rpcenv)
> +}
> +
> +#[api(
> +    input: {
> +        properties: {
> +            name: {
> +                schema: CHANGER_NAME_SCHEMA,
> +            },
> +            "source-slot": {
> +                description: "Source slot number.",
> +                minimum: 1,
> +            },
> +        },
> +    },
> +    access: {
> +        permission: &Permission::Privilege(&["tape", "device", "{name}"], PRIV_TAPE_READ, false),
> +    },
> +)]
> +/// Load media from the specified slot
> +///
> +/// Issue a media load request to the associated changer device.
> +pub async fn load_slot(
> +    name: String,
> +    source_slot: u64,
> +    rpcenv: &mut dyn RpcEnvironment,
> +) -> Result<(), Error> {
> +    let drive = choose_drive(&name, rpcenv);
> +    super::drive::load_slot(drive?, source_slot).await
> +}
> +
> +/// Returns the idle drives associated with the specified changer.
> +fn get_idle_drives(
> +    changer: &str,
> +    rpcenv: &mut dyn RpcEnvironment,
> +) -> Result<Vec<DriveListEntry>, Error> {
> +    let drives = super::drive::list_drives(Some(changer.to_string()), true, Value::Null, rpcenv)
> +        .map_err(|err| anyhow::format_err!("cannot query drives: {err}"))?;
> +
> +    let filter = drives
> +        .into_iter()
> +        .filter(|entry| matches!(entry.activity, None | Some(DeviceActivity::NoActivity)));
> +
> +    Ok(filter.collect())
> +}
> +
> +/// Returns the drives sorted by the last usage.
> +/// The first drive in the returned vector is the one that has not been used the longest, or never.
> +fn get_drives_sorted_by_last_usage(

would it maybe make sense to filter out the non-idle drives here already?
That way the code in 'choose_drive' would become even more simple
(though no hard feelings for this on my side)

> +    changer: &str,
> +    rpcenv: &mut dyn RpcEnvironment,
> +) -> Result<Vec<DriveListEntry>, Error> {
> +    let drives_last_usage = crate::tape::drive_info::get_drives_last_usage()?;
> +    let mut drives = super::drive::list_drives(Some(changer.into()), true, Value::Null, rpcenv)?;
> +    drives.sort_by(|first, second| {
> +        let first_usage = drives_last_usage.drives.get(&first.config.name);
> +        let second_usage = drives_last_usage.drives.get(&second.config.name);
> +
> +        match (first_usage, second_usage) {
> +            (Some(first_usage), Some(second_usage)) => {
> +                first_usage.last_usage.cmp(&second_usage.last_usage)
> +            }
> +            (Some(_), None) => std::cmp::Ordering::Greater,
> +            (None, Some(_)) => std::cmp::Ordering::Less,
> +            (None, None) => std::cmp::Ordering::Equal,
> +        }
> +    });
> +
> +    Ok(drives)
> +}
> +
> +fn choose_drive(changer: &str, rpcenv: &mut dyn RpcEnvironment) -> Result<String, Error> {
> +    let idle_drives = get_idle_drives(changer, rpcenv)?;
> +    let drives_in_order = get_drives_sorted_by_last_usage(changer, rpcenv);
> +
> +    // If the drives info could not be retrieved, simply try to use the first one (if possible).
> +    let Ok(drives_in_order) = drives_in_order else {
> +        match idle_drives.first() {
> +            Some(idle_drive) => return Ok(idle_drive.config.name.clone()),
> +            None => bail!("there are no idle drives to choose for automatic drive assignment"),
> +        }
> +    };

AFAIK we don't use the let X = foo else {} syntax yet. and personally I'm more in favor of

let drives = match drives {
     Ok(drives) => drives,
     Err(err) => {
         ..
     }
};

here for two reasons:

* it's a bit more obvious that there are two cases (I had to look twice to see the 'else',
   but that might just me not being used to this syntax)
* we can do something with err here, namely i'd like to log that with e.g. log::warn
   (retrieving the drives + usage should not run into an error, so having that in the
   log can only be good for debugging)


> +
> +    let chosen_drive = drives_in_order.iter().find(|drive| {
> +        idle_drives
> +            .iter()
> +            .any(|idle_drive| drive.config.name == idle_drive.config.name)
> +    });
> +
> +    match chosen_drive {
> +        Some(chosen_drive) => Ok(chosen_drive.config.name.clone()),
> +        None => bail!("there are no idle drives to choose for automatic drive assignment"),
> +    }
> +}
> +
>   const SUBDIRS: SubdirMap = &[
> +    ("load-media", &Router::new().post(&API_METHOD_LOAD_MEDIA)),
> +    ("load-slot", &Router::new().post(&API_METHOD_LOAD_SLOT)),
>       ("status", &Router::new().get(&API_METHOD_GET_STATUS)),
>       ("transfer", &Router::new().post(&API_METHOD_TRANSFER)),
>   ];
> diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs
> index edd0a4cc..a18384cb 100644
> --- a/src/bin/proxmox-backup-proxy.rs
> +++ b/src/bin/proxmox-backup-proxy.rs
> @@ -432,7 +432,7 @@ async fn run_task_scheduler() {
>                   } else {
>                       tracing::error!("task scheduler panic - cannot show error message due to unknown error type")
>                   }
> -            },
> +            }
>               Ok(Err(err)) => tracing::error!("task scheduler failed - {err:?}"),
>               Ok(Ok(_)) => {}
>           }

this hunk is
* not related
* not applyable anymore (so you have to rebase anyway)

> diff --git a/src/bin/proxmox-tape.rs b/src/bin/proxmox-tape.rs
> index 8e8584b3..752ae255 100644
> --- a/src/bin/proxmox-tape.rs
> +++ b/src/bin/proxmox-tape.rs
> @@ -1,6 +1,7 @@
>   use std::collections::HashMap;
>   
>   use anyhow::{bail, format_err, Error};
> +use pbs_config::drive::complete_changer_name;
>   use serde_json::{json, Value};
>   
>   use proxmox_human_byte::HumanByte;
> @@ -20,9 +21,10 @@ use pbs_config::drive::complete_drive_name;
>   use pbs_config::media_pool::complete_pool_name;
>   
>   use pbs_api_types::{
> -    Authid, BackupNamespace, GroupListItem, Userid, DATASTORE_MAP_LIST_SCHEMA, DATASTORE_SCHEMA,
> -    DRIVE_NAME_SCHEMA, GROUP_FILTER_LIST_SCHEMA, MEDIA_LABEL_SCHEMA, MEDIA_POOL_NAME_SCHEMA,
> -    NS_MAX_DEPTH_SCHEMA, TAPE_RESTORE_NAMESPACE_SCHEMA, TAPE_RESTORE_SNAPSHOT_SCHEMA,
> +    Authid, BackupNamespace, GroupListItem, Userid, CHANGER_NAME_SCHEMA_AUTOMATIC_DRIVE_ASSIGNMENT,
> +    DATASTORE_MAP_LIST_SCHEMA, DATASTORE_SCHEMA, DRIVE_NAME_SCHEMA, GROUP_FILTER_LIST_SCHEMA,
> +    MEDIA_LABEL_SCHEMA, MEDIA_POOL_NAME_SCHEMA, NS_MAX_DEPTH_SCHEMA, TAPE_RESTORE_NAMESPACE_SCHEMA,
> +    TAPE_RESTORE_SNAPSHOT_SCHEMA,
>   };
>   use pbs_tape::{BlockReadError, MediaContentHeader, PROXMOX_BACKUP_CONTENT_HEADER_MAGIC_1_0};
>   
> @@ -214,6 +216,10 @@ async fn eject_media(mut param: Value) -> Result<(), Error> {
>                   schema: DRIVE_NAME_SCHEMA,
>                   optional: true,
>               },
> +            changer: {
> +                schema: CHANGER_NAME_SCHEMA_AUTOMATIC_DRIVE_ASSIGNMENT,
> +                optional: true,
> +            },
>               "label-text": {
>                   schema: MEDIA_LABEL_SCHEMA,
>               },
> @@ -230,11 +236,21 @@ async fn load_media(mut param: Value) -> Result<(), Error> {
>   
>       let (config, _digest) = pbs_config::drive::config()?;
>   
> -    let drive = extract_drive_name(&mut param, &config)?;
> +    let drive = extract_drive_name(&mut param, &config);
> +    let changer = param["changer"].as_str();
> +
> +    let path = match (changer, drive) {
> +        (Some(changer), Err(_)) => format!("api2/json/tape/changer/{changer}/load-media"),
> +        (None, Ok(drive)) => format!("api2/json/tape/drive/{drive}/load-media"),
> +        _ => bail!("either a changer or a drive has to be specified when loading media"),
> +    };
> +
> +    if let Some(param) = param.as_object_mut() {
> +        param.remove("changer");
> +    }

maybe we could move that remove up to where we extract the parameter, but no hard feelings
from my side

>   
>       let client = connect_to_localhost()?;
>   
> -    let path = format!("api2/json/tape/drive/{}/load-media", drive);
>       let result = client.post(&path, Some(param)).await?;
>   
>       view_task_result(&client, result, &output_format).await?;
> @@ -276,6 +292,10 @@ async fn export_media(mut param: Value) -> Result<(), Error> {
>                   schema: DRIVE_NAME_SCHEMA,
>                   optional: true,
>               },
> +            changer: {
> +                schema: CHANGER_NAME_SCHEMA_AUTOMATIC_DRIVE_ASSIGNMENT,
> +                optional: true,
> +            },
>               "source-slot": {
>                   description: "Source slot number.",
>                   type: u64,
> @@ -288,11 +308,21 @@ async fn export_media(mut param: Value) -> Result<(), Error> {
>   async fn load_media_from_slot(mut param: Value) -> Result<(), Error> {
>       let (config, _digest) = pbs_config::drive::config()?;
>   
> -    let drive = extract_drive_name(&mut param, &config)?;
> +    let drive = extract_drive_name(&mut param, &config);
> +    let changer = param["changer"].as_str();
> +
> +    let path = match (changer, drive) {
> +        (Some(changer), Err(_)) => format!("api2/json/tape/changer/{changer}/load-slot"),
> +        (None, Ok(drive)) => format!("api2/json/tape/drive/{drive}/load-slot"),
> +        _ => bail!("either a changer or a drive has to be specified when loading media"),
> +    };
> +
> +    if let Some(param) = param.as_object_mut() {
> +        param.remove("changer");
> +    }
>   

here too

>       let client = connect_to_localhost()?;
>   
> -    let path = format!("api2/json/tape/drive/{}/load-slot", drive);
>       client.post(&path, Some(param)).await?;
>   
>       Ok(())
> @@ -1091,13 +1121,15 @@ fn main() {
>               CliCommand::new(&API_METHOD_LOAD_MEDIA)
>                   .arg_param(&["label-text"])
>                   .completion_cb("drive", complete_drive_name)
> +                .completion_cb("changer", complete_changer_name)
>                   .completion_cb("label-text", complete_media_label_text),
>           )
>           .insert(
>               "load-media-from-slot",
>               CliCommand::new(&API_METHOD_LOAD_MEDIA_FROM_SLOT)
>                   .arg_param(&["source-slot"])
> -                .completion_cb("drive", complete_drive_name),
> +                .completion_cb("drive", complete_drive_name)
> +                .completion_cb("changer", complete_changer_name),
>           )
>           .insert(
>               "unload",
> diff --git a/src/tape/changer/mod.rs b/src/tape/changer/mod.rs
> index 18ea0f46..22c78180 100644
> --- a/src/tape/changer/mod.rs
> +++ b/src/tape/changer/mod.rs
> @@ -273,6 +273,21 @@ pub trait MediaChange {
>       }
>   }
>   
> +/// Updates the drive's last usage time to now.
> +pub(super) fn update_drive_usage(drive: &str) -> Result<(), Error> {
> +    let _lock = crate::tape::drive_info::lock()?;
> +
> +    let mut drives_last_usage = crate::tape::drive_info::get_drives_last_usage()?;
> +
> +    let now = proxmox_time::epoch_i64();
> +    drives_last_usage
> +        .drives
> +        .entry(drive.into())
> +        .or_default()
> +        .last_usage = now;
> +    crate::tape::drive_info::save_config(&drives_last_usage)
> +}
> +
>   const USE_MTX: bool = false;
>   
>   impl ScsiMediaChange for ScsiTapeChanger {
> @@ -423,7 +438,13 @@ impl MediaChange for MtxMediaChanger {
>       }
>   
>       fn load_media_from_slot(&mut self, slot: u64) -> Result<MtxStatus, Error> {
> -        self.config.load_slot(slot, self.drive_number())
> +        let status = self.config.load_slot(slot, self.drive_number())?;
> +
> +        if let Err(err) = update_drive_usage(self.drive_name()) {
> +            log::warn!("could not update drive usage: {err}");
> +        }
> +
> +        Ok(status)
>       }
>   
>       fn unload_media(&mut self, target_slot: Option<u64>) -> Result<MtxStatus, Error> {
> diff --git a/src/tape/drive/virtual_tape.rs b/src/tape/drive/virtual_tape.rs
> index 866e4d32..213f17fe 100644
> --- a/src/tape/drive/virtual_tape.rs
> +++ b/src/tape/drive/virtual_tape.rs
> @@ -567,7 +567,13 @@ impl MediaChange for VirtualTapeHandle {
>           };
>           self.store_status(&status)?;
>   
> -        self.status()
> +        let status = self.status()?;
> +
> +        if let Err(err) = crate::tape::changer::update_drive_usage(self.drive_name()) {
> +            log::warn!("could not update drive usage: {err}");
> +        }
> +
> +        Ok(status)
>       }
>   
>       fn unload_media(&mut self, _target_slot: Option<u64>) -> Result<MtxStatus, Error> {
> diff --git a/src/tape/drive_info.rs b/src/tape/drive_info.rs
> new file mode 100644
> index 00000000..5bfaab07
> --- /dev/null
> +++ b/src/tape/drive_info.rs
> @@ -0,0 +1,56 @@
> +//! Serialize/deserialize tpae drive info (e.g. useful for statistics)
> +//!
> +//! This can be used to store a state over a longer period of time (e.g. last tape drive usage).
> +
> +use anyhow::Error;
> +use proxmox_product_config::ApiLockGuard;
> +use serde::{Deserialize, Serialize};
> +use std::collections::HashMap;
> +
> +/// Drive info file name
> +/// This has a generic name to be able to extend the information in the future.
> +pub const DRIVE_INFO_FILENAME: &str = concat!(
> +    pbs_buildcfg::PROXMOX_BACKUP_STATE_DIR_M!(),
> +    "/tape_drive_info.json"
> +);
> +/// Lock file name (used to prevent concurrent access)
> +pub const DRIVE_INFO_LOCKFILE: &str = concat!(
> +    pbs_buildcfg::PROXMOX_BACKUP_STATE_DIR_M!(),
> +    "/.tape_drive_info.json.lock"
> +);
> +
> +#[derive(Serialize, Deserialize, Default)]
> +pub struct SingleDriveLastUsage {
> +    #[serde(with = "proxmox_serde::epoch_as_rfc3339")]
> +    pub last_usage: i64,
> +}
> +
> +#[derive(Serialize, Deserialize, Default)]
> +pub struct DrivesLastUsage {
> +    pub drives: HashMap<String, SingleDriveLastUsage>,
> +}
> +
> +/// Get exclusive lock
> +pub fn lock() -> Result<ApiLockGuard, Error> {
> +    proxmox_product_config::open_api_lockfile(DRIVE_INFO_LOCKFILE, Option::None, true)
> +}
> +
> +/// Read and parse the drive info file
> +/// Currently used tu get the drive's last usage
> +pub fn get_drives_last_usage() -> Result<DrivesLastUsage, Error> {
> +    let content = proxmox_sys::fs::file_read_optional_string(DRIVE_INFO_FILENAME)?;
> +
> +    match content {
> +        Some(content) => {
> +            let result = serde_json::from_str::<DrivesLastUsage>(&content)?;
> +            Ok(result)
> +        }
> +        None => Ok(DrivesLastUsage::default()),
> +    }
> +}
> +
> +/// Save the configuration file
> +pub fn save_config(data: &DrivesLastUsage) -> Result<(), Error> {
> +    let json = serde_json::to_string(data)?;
> +    proxmox_product_config::replace_config(DRIVE_INFO_FILENAME, json.as_bytes())
> +}
> diff --git a/src/tape/mod.rs b/src/tape/mod.rs
> index f276f948..8b87152d 100644
> --- a/src/tape/mod.rs
> +++ b/src/tape/mod.rs
> @@ -20,6 +20,7 @@ pub use inventory::*;
>   
>   pub mod changer;
>   pub mod drive;
> +pub mod drive_info;
>   pub mod encryption_keys;
>   
>   mod media_pool;
> diff --git a/www/tape/ChangerStatus.js b/www/tape/ChangerStatus.js
> index e18af90e..1b81db34 100644
> --- a/www/tape/ChangerStatus.js
> +++ b/www/tape/ChangerStatus.js
> @@ -222,12 +222,16 @@ Ext.define('PBS.TapeManagement.ChangerStatus', {
>   		    autoShow: true,
>   		    submitText: gettext('OK'),
>   		    title: gettext('Load Media into Drive'),
> -		    url: `/api2/extjs/tape/drive`,
> +		    url: `/api2/extjs/tape`,
>   		    method: 'POST',
>   		    submitUrl: function(url, values) {
> -			let drive = values.drive;
> -			delete values.drive;
> -			return `${url}/${encodeURIComponent(drive)}/${apiCall}`;
> +			    let drive = values.drive;
> +			    delete values.drive;
> +
> +          let type = drive ? "drive" : "changer";
> +          let item = drive ? drive : changer;
> +
> +          return `${url}/${type}/${encodeURIComponent(item)}/${apiCall}`;

here the indentation is wrong

>   		    },
>   		    items: [
>   			label !== "" ? {
> @@ -248,6 +252,9 @@ Ext.define('PBS.TapeManagement.ChangerStatus', {
>   			    fieldLabel: gettext('Drive'),
>   			    changer: changer,
>   			    name: 'drive',
> +			    emptyText: gettext('Choose Automatically'),
> +			    allowBlank: true,
> +			    autoSelect: false,
>   			},
>   		    ],
>   		    listeners: {





More information about the pbs-devel mailing list