[pbs-devel] [PATCH proxmox-backup v2 2/2] Automatically select a drive (if part of a changer) when loading tapes
Dominik Csapak
d.csapak at proxmox.com
Mon Feb 17 12:49:17 CET 2025
One thing i seemingly did not notice last time around:
this 'auto' choosing is just best-effort, no?
there is no mechanism from parallel request to block each other?
e.g. 2 load_media start simultaneously
both run until they chose drive1, -> one of the operations will use it
the other will either fail or timeout
since the update only happens on the actual 'load_media'
would it be possible to hold a lock over 'choose_drives'
that could also update the usage here ?
then the choosing might take a bit longer, but we could avoid the situation above
Also a few smaller comments inline:
On 2/6/25 13:28, Laurențiu Leahu-Vlăducu wrote:
> This patch adds the possibility to load tapes into drives
> automatically by specifying a changer. This is useful for larger tape
> libraries. Choosing a drive is done by using the drive that has not
> been used the longest.
>
> At the moment, this patch implements the functionality for automatic
> loading both over the API and using the proxmox-backup CLI tool, as
> well as in the web UI when selecting a changer. A second patch will
> add the same functionality when configuring backup jobs.
>
> Partially fixes #3351
>
> Signed-off-by: Laurențiu Leahu-Vlăducu <l.leahu-vladucu at proxmox.com>
> ---
> src/api2/tape/changer.rs | 131 +++++++++++++++++++++++++++++++-
> src/bin/proxmox-backup-proxy.rs | 2 +-
> src/bin/proxmox-tape.rs | 48 ++++++++++--
> src/tape/changer/mod.rs | 23 +++++-
> src/tape/drive/virtual_tape.rs | 8 +-
> src/tape/drive_info.rs | 56 ++++++++++++++
> src/tape/mod.rs | 1 +
> www/tape/ChangerStatus.js | 15 +++-
> 8 files changed, 266 insertions(+), 18 deletions(-)
> create mode 100644 src/tape/drive_info.rs
>
> diff --git a/src/api2/tape/changer.rs b/src/api2/tape/changer.rs
> index 7ecf7bff..aa06804d 100644
> --- a/src/api2/tape/changer.rs
> +++ b/src/api2/tape/changer.rs
> @@ -1,14 +1,15 @@
> use std::collections::HashMap;
>
> -use anyhow::Error;
> +use anyhow::{bail, Error};
> use serde_json::Value;
>
> use proxmox_router::{list_subdirs_api_method, Permission, Router, RpcEnvironment, SubdirMap};
> use proxmox_schema::api;
>
> use pbs_api_types::{
> - Authid, ChangerListEntry, LtoTapeDrive, MtxEntryKind, MtxStatusEntry, ScsiTapeChanger,
> - CHANGER_NAME_SCHEMA, PRIV_TAPE_AUDIT, PRIV_TAPE_READ,
> + Authid, ChangerListEntry, DeviceActivity, DriveListEntry, LtoTapeDrive, MtxEntryKind,
> + MtxStatusEntry, ScsiTapeChanger, CHANGER_NAME_SCHEMA, MEDIA_LABEL_SCHEMA, PRIV_TAPE_AUDIT,
> + PRIV_TAPE_READ, UPID_SCHEMA,
> };
> use pbs_config::CachedUserInfo;
> use pbs_tape::{
> @@ -199,7 +200,131 @@ pub fn list_changers(
> Ok(list)
> }
>
> +#[api(
> + input: {
> + properties: {
> + name: {
> + schema: CHANGER_NAME_SCHEMA,
> + },
> + "label-text": {
> + schema: MEDIA_LABEL_SCHEMA,
> + },
> + },
> + },
> + returns: {
> + schema: UPID_SCHEMA,
> + },
> + access: {
> + permission: &Permission::Privilege(&["tape", "device", "{name}"], PRIV_TAPE_READ, false),
> + },
> +)]
> +/// Load media with specified label
> +///
> +/// Issue a media load request to the associated changer device.
> +pub fn load_media(
> + name: String,
> + label_text: String,
> + rpcenv: &mut dyn RpcEnvironment,
> +) -> Result<Value, Error> {
> + let drive = choose_drive(&name, rpcenv);
> + super::drive::load_media(drive?, label_text, rpcenv)
> +}
> +
> +#[api(
> + input: {
> + properties: {
> + name: {
> + schema: CHANGER_NAME_SCHEMA,
> + },
> + "source-slot": {
> + description: "Source slot number.",
> + minimum: 1,
> + },
> + },
> + },
> + access: {
> + permission: &Permission::Privilege(&["tape", "device", "{name}"], PRIV_TAPE_READ, false),
> + },
> +)]
> +/// Load media from the specified slot
> +///
> +/// Issue a media load request to the associated changer device.
> +pub async fn load_slot(
> + name: String,
> + source_slot: u64,
> + rpcenv: &mut dyn RpcEnvironment,
> +) -> Result<(), Error> {
> + let drive = choose_drive(&name, rpcenv);
> + super::drive::load_slot(drive?, source_slot).await
> +}
> +
> +/// Returns the idle drives associated with the specified changer.
> +fn get_idle_drives(
> + changer: &str,
> + rpcenv: &mut dyn RpcEnvironment,
> +) -> Result<Vec<DriveListEntry>, Error> {
> + let drives = super::drive::list_drives(Some(changer.to_string()), true, Value::Null, rpcenv)
> + .map_err(|err| anyhow::format_err!("cannot query drives: {err}"))?;
> +
> + let filter = drives
> + .into_iter()
> + .filter(|entry| matches!(entry.activity, None | Some(DeviceActivity::NoActivity)));
> +
> + Ok(filter.collect())
> +}
> +
> +/// Returns the drives sorted by the last usage.
> +/// The first drive in the returned vector is the one that has not been used the longest, or never.
> +fn get_drives_sorted_by_last_usage(
would it maybe make sense to filter out the non-idle drives here already?
That way the code in 'choose_drive' would become even more simple
(though no hard feelings for this on my side)
> + changer: &str,
> + rpcenv: &mut dyn RpcEnvironment,
> +) -> Result<Vec<DriveListEntry>, Error> {
> + let drives_last_usage = crate::tape::drive_info::get_drives_last_usage()?;
> + let mut drives = super::drive::list_drives(Some(changer.into()), true, Value::Null, rpcenv)?;
> + drives.sort_by(|first, second| {
> + let first_usage = drives_last_usage.drives.get(&first.config.name);
> + let second_usage = drives_last_usage.drives.get(&second.config.name);
> +
> + match (first_usage, second_usage) {
> + (Some(first_usage), Some(second_usage)) => {
> + first_usage.last_usage.cmp(&second_usage.last_usage)
> + }
> + (Some(_), None) => std::cmp::Ordering::Greater,
> + (None, Some(_)) => std::cmp::Ordering::Less,
> + (None, None) => std::cmp::Ordering::Equal,
> + }
> + });
> +
> + Ok(drives)
> +}
> +
> +fn choose_drive(changer: &str, rpcenv: &mut dyn RpcEnvironment) -> Result<String, Error> {
> + let idle_drives = get_idle_drives(changer, rpcenv)?;
> + let drives_in_order = get_drives_sorted_by_last_usage(changer, rpcenv);
> +
> + // If the drives info could not be retrieved, simply try to use the first one (if possible).
> + let Ok(drives_in_order) = drives_in_order else {
> + match idle_drives.first() {
> + Some(idle_drive) => return Ok(idle_drive.config.name.clone()),
> + None => bail!("there are no idle drives to choose for automatic drive assignment"),
> + }
> + };
AFAIK we don't use the let X = foo else {} syntax yet. and personally I'm more in favor of
let drives = match drives {
Ok(drives) => drives,
Err(err) => {
..
}
};
here for two reasons:
* it's a bit more obvious that there are two cases (I had to look twice to see the 'else',
but that might just me not being used to this syntax)
* we can do something with err here, namely i'd like to log that with e.g. log::warn
(retrieving the drives + usage should not run into an error, so having that in the
log can only be good for debugging)
> +
> + let chosen_drive = drives_in_order.iter().find(|drive| {
> + idle_drives
> + .iter()
> + .any(|idle_drive| drive.config.name == idle_drive.config.name)
> + });
> +
> + match chosen_drive {
> + Some(chosen_drive) => Ok(chosen_drive.config.name.clone()),
> + None => bail!("there are no idle drives to choose for automatic drive assignment"),
> + }
> +}
> +
> const SUBDIRS: SubdirMap = &[
> + ("load-media", &Router::new().post(&API_METHOD_LOAD_MEDIA)),
> + ("load-slot", &Router::new().post(&API_METHOD_LOAD_SLOT)),
> ("status", &Router::new().get(&API_METHOD_GET_STATUS)),
> ("transfer", &Router::new().post(&API_METHOD_TRANSFER)),
> ];
> diff --git a/src/bin/proxmox-backup-proxy.rs b/src/bin/proxmox-backup-proxy.rs
> index edd0a4cc..a18384cb 100644
> --- a/src/bin/proxmox-backup-proxy.rs
> +++ b/src/bin/proxmox-backup-proxy.rs
> @@ -432,7 +432,7 @@ async fn run_task_scheduler() {
> } else {
> tracing::error!("task scheduler panic - cannot show error message due to unknown error type")
> }
> - },
> + }
> Ok(Err(err)) => tracing::error!("task scheduler failed - {err:?}"),
> Ok(Ok(_)) => {}
> }
this hunk is
* not related
* not applyable anymore (so you have to rebase anyway)
> diff --git a/src/bin/proxmox-tape.rs b/src/bin/proxmox-tape.rs
> index 8e8584b3..752ae255 100644
> --- a/src/bin/proxmox-tape.rs
> +++ b/src/bin/proxmox-tape.rs
> @@ -1,6 +1,7 @@
> use std::collections::HashMap;
>
> use anyhow::{bail, format_err, Error};
> +use pbs_config::drive::complete_changer_name;
> use serde_json::{json, Value};
>
> use proxmox_human_byte::HumanByte;
> @@ -20,9 +21,10 @@ use pbs_config::drive::complete_drive_name;
> use pbs_config::media_pool::complete_pool_name;
>
> use pbs_api_types::{
> - Authid, BackupNamespace, GroupListItem, Userid, DATASTORE_MAP_LIST_SCHEMA, DATASTORE_SCHEMA,
> - DRIVE_NAME_SCHEMA, GROUP_FILTER_LIST_SCHEMA, MEDIA_LABEL_SCHEMA, MEDIA_POOL_NAME_SCHEMA,
> - NS_MAX_DEPTH_SCHEMA, TAPE_RESTORE_NAMESPACE_SCHEMA, TAPE_RESTORE_SNAPSHOT_SCHEMA,
> + Authid, BackupNamespace, GroupListItem, Userid, CHANGER_NAME_SCHEMA_AUTOMATIC_DRIVE_ASSIGNMENT,
> + DATASTORE_MAP_LIST_SCHEMA, DATASTORE_SCHEMA, DRIVE_NAME_SCHEMA, GROUP_FILTER_LIST_SCHEMA,
> + MEDIA_LABEL_SCHEMA, MEDIA_POOL_NAME_SCHEMA, NS_MAX_DEPTH_SCHEMA, TAPE_RESTORE_NAMESPACE_SCHEMA,
> + TAPE_RESTORE_SNAPSHOT_SCHEMA,
> };
> use pbs_tape::{BlockReadError, MediaContentHeader, PROXMOX_BACKUP_CONTENT_HEADER_MAGIC_1_0};
>
> @@ -214,6 +216,10 @@ async fn eject_media(mut param: Value) -> Result<(), Error> {
> schema: DRIVE_NAME_SCHEMA,
> optional: true,
> },
> + changer: {
> + schema: CHANGER_NAME_SCHEMA_AUTOMATIC_DRIVE_ASSIGNMENT,
> + optional: true,
> + },
> "label-text": {
> schema: MEDIA_LABEL_SCHEMA,
> },
> @@ -230,11 +236,21 @@ async fn load_media(mut param: Value) -> Result<(), Error> {
>
> let (config, _digest) = pbs_config::drive::config()?;
>
> - let drive = extract_drive_name(&mut param, &config)?;
> + let drive = extract_drive_name(&mut param, &config);
> + let changer = param["changer"].as_str();
> +
> + let path = match (changer, drive) {
> + (Some(changer), Err(_)) => format!("api2/json/tape/changer/{changer}/load-media"),
> + (None, Ok(drive)) => format!("api2/json/tape/drive/{drive}/load-media"),
> + _ => bail!("either a changer or a drive has to be specified when loading media"),
> + };
> +
> + if let Some(param) = param.as_object_mut() {
> + param.remove("changer");
> + }
maybe we could move that remove up to where we extract the parameter, but no hard feelings
from my side
>
> let client = connect_to_localhost()?;
>
> - let path = format!("api2/json/tape/drive/{}/load-media", drive);
> let result = client.post(&path, Some(param)).await?;
>
> view_task_result(&client, result, &output_format).await?;
> @@ -276,6 +292,10 @@ async fn export_media(mut param: Value) -> Result<(), Error> {
> schema: DRIVE_NAME_SCHEMA,
> optional: true,
> },
> + changer: {
> + schema: CHANGER_NAME_SCHEMA_AUTOMATIC_DRIVE_ASSIGNMENT,
> + optional: true,
> + },
> "source-slot": {
> description: "Source slot number.",
> type: u64,
> @@ -288,11 +308,21 @@ async fn export_media(mut param: Value) -> Result<(), Error> {
> async fn load_media_from_slot(mut param: Value) -> Result<(), Error> {
> let (config, _digest) = pbs_config::drive::config()?;
>
> - let drive = extract_drive_name(&mut param, &config)?;
> + let drive = extract_drive_name(&mut param, &config);
> + let changer = param["changer"].as_str();
> +
> + let path = match (changer, drive) {
> + (Some(changer), Err(_)) => format!("api2/json/tape/changer/{changer}/load-slot"),
> + (None, Ok(drive)) => format!("api2/json/tape/drive/{drive}/load-slot"),
> + _ => bail!("either a changer or a drive has to be specified when loading media"),
> + };
> +
> + if let Some(param) = param.as_object_mut() {
> + param.remove("changer");
> + }
>
here too
> let client = connect_to_localhost()?;
>
> - let path = format!("api2/json/tape/drive/{}/load-slot", drive);
> client.post(&path, Some(param)).await?;
>
> Ok(())
> @@ -1091,13 +1121,15 @@ fn main() {
> CliCommand::new(&API_METHOD_LOAD_MEDIA)
> .arg_param(&["label-text"])
> .completion_cb("drive", complete_drive_name)
> + .completion_cb("changer", complete_changer_name)
> .completion_cb("label-text", complete_media_label_text),
> )
> .insert(
> "load-media-from-slot",
> CliCommand::new(&API_METHOD_LOAD_MEDIA_FROM_SLOT)
> .arg_param(&["source-slot"])
> - .completion_cb("drive", complete_drive_name),
> + .completion_cb("drive", complete_drive_name)
> + .completion_cb("changer", complete_changer_name),
> )
> .insert(
> "unload",
> diff --git a/src/tape/changer/mod.rs b/src/tape/changer/mod.rs
> index 18ea0f46..22c78180 100644
> --- a/src/tape/changer/mod.rs
> +++ b/src/tape/changer/mod.rs
> @@ -273,6 +273,21 @@ pub trait MediaChange {
> }
> }
>
> +/// Updates the drive's last usage time to now.
> +pub(super) fn update_drive_usage(drive: &str) -> Result<(), Error> {
> + let _lock = crate::tape::drive_info::lock()?;
> +
> + let mut drives_last_usage = crate::tape::drive_info::get_drives_last_usage()?;
> +
> + let now = proxmox_time::epoch_i64();
> + drives_last_usage
> + .drives
> + .entry(drive.into())
> + .or_default()
> + .last_usage = now;
> + crate::tape::drive_info::save_config(&drives_last_usage)
> +}
> +
> const USE_MTX: bool = false;
>
> impl ScsiMediaChange for ScsiTapeChanger {
> @@ -423,7 +438,13 @@ impl MediaChange for MtxMediaChanger {
> }
>
> fn load_media_from_slot(&mut self, slot: u64) -> Result<MtxStatus, Error> {
> - self.config.load_slot(slot, self.drive_number())
> + let status = self.config.load_slot(slot, self.drive_number())?;
> +
> + if let Err(err) = update_drive_usage(self.drive_name()) {
> + log::warn!("could not update drive usage: {err}");
> + }
> +
> + Ok(status)
> }
>
> fn unload_media(&mut self, target_slot: Option<u64>) -> Result<MtxStatus, Error> {
> diff --git a/src/tape/drive/virtual_tape.rs b/src/tape/drive/virtual_tape.rs
> index 866e4d32..213f17fe 100644
> --- a/src/tape/drive/virtual_tape.rs
> +++ b/src/tape/drive/virtual_tape.rs
> @@ -567,7 +567,13 @@ impl MediaChange for VirtualTapeHandle {
> };
> self.store_status(&status)?;
>
> - self.status()
> + let status = self.status()?;
> +
> + if let Err(err) = crate::tape::changer::update_drive_usage(self.drive_name()) {
> + log::warn!("could not update drive usage: {err}");
> + }
> +
> + Ok(status)
> }
>
> fn unload_media(&mut self, _target_slot: Option<u64>) -> Result<MtxStatus, Error> {
> diff --git a/src/tape/drive_info.rs b/src/tape/drive_info.rs
> new file mode 100644
> index 00000000..5bfaab07
> --- /dev/null
> +++ b/src/tape/drive_info.rs
> @@ -0,0 +1,56 @@
> +//! Serialize/deserialize tpae drive info (e.g. useful for statistics)
> +//!
> +//! This can be used to store a state over a longer period of time (e.g. last tape drive usage).
> +
> +use anyhow::Error;
> +use proxmox_product_config::ApiLockGuard;
> +use serde::{Deserialize, Serialize};
> +use std::collections::HashMap;
> +
> +/// Drive info file name
> +/// This has a generic name to be able to extend the information in the future.
> +pub const DRIVE_INFO_FILENAME: &str = concat!(
> + pbs_buildcfg::PROXMOX_BACKUP_STATE_DIR_M!(),
> + "/tape_drive_info.json"
> +);
> +/// Lock file name (used to prevent concurrent access)
> +pub const DRIVE_INFO_LOCKFILE: &str = concat!(
> + pbs_buildcfg::PROXMOX_BACKUP_STATE_DIR_M!(),
> + "/.tape_drive_info.json.lock"
> +);
> +
> +#[derive(Serialize, Deserialize, Default)]
> +pub struct SingleDriveLastUsage {
> + #[serde(with = "proxmox_serde::epoch_as_rfc3339")]
> + pub last_usage: i64,
> +}
> +
> +#[derive(Serialize, Deserialize, Default)]
> +pub struct DrivesLastUsage {
> + pub drives: HashMap<String, SingleDriveLastUsage>,
> +}
> +
> +/// Get exclusive lock
> +pub fn lock() -> Result<ApiLockGuard, Error> {
> + proxmox_product_config::open_api_lockfile(DRIVE_INFO_LOCKFILE, Option::None, true)
> +}
> +
> +/// Read and parse the drive info file
> +/// Currently used tu get the drive's last usage
> +pub fn get_drives_last_usage() -> Result<DrivesLastUsage, Error> {
> + let content = proxmox_sys::fs::file_read_optional_string(DRIVE_INFO_FILENAME)?;
> +
> + match content {
> + Some(content) => {
> + let result = serde_json::from_str::<DrivesLastUsage>(&content)?;
> + Ok(result)
> + }
> + None => Ok(DrivesLastUsage::default()),
> + }
> +}
> +
> +/// Save the configuration file
> +pub fn save_config(data: &DrivesLastUsage) -> Result<(), Error> {
> + let json = serde_json::to_string(data)?;
> + proxmox_product_config::replace_config(DRIVE_INFO_FILENAME, json.as_bytes())
> +}
> diff --git a/src/tape/mod.rs b/src/tape/mod.rs
> index f276f948..8b87152d 100644
> --- a/src/tape/mod.rs
> +++ b/src/tape/mod.rs
> @@ -20,6 +20,7 @@ pub use inventory::*;
>
> pub mod changer;
> pub mod drive;
> +pub mod drive_info;
> pub mod encryption_keys;
>
> mod media_pool;
> diff --git a/www/tape/ChangerStatus.js b/www/tape/ChangerStatus.js
> index e18af90e..1b81db34 100644
> --- a/www/tape/ChangerStatus.js
> +++ b/www/tape/ChangerStatus.js
> @@ -222,12 +222,16 @@ Ext.define('PBS.TapeManagement.ChangerStatus', {
> autoShow: true,
> submitText: gettext('OK'),
> title: gettext('Load Media into Drive'),
> - url: `/api2/extjs/tape/drive`,
> + url: `/api2/extjs/tape`,
> method: 'POST',
> submitUrl: function(url, values) {
> - let drive = values.drive;
> - delete values.drive;
> - return `${url}/${encodeURIComponent(drive)}/${apiCall}`;
> + let drive = values.drive;
> + delete values.drive;
> +
> + let type = drive ? "drive" : "changer";
> + let item = drive ? drive : changer;
> +
> + return `${url}/${type}/${encodeURIComponent(item)}/${apiCall}`;
here the indentation is wrong
> },
> items: [
> label !== "" ? {
> @@ -248,6 +252,9 @@ Ext.define('PBS.TapeManagement.ChangerStatus', {
> fieldLabel: gettext('Drive'),
> changer: changer,
> name: 'drive',
> + emptyText: gettext('Choose Automatically'),
> + allowBlank: true,
> + autoSelect: false,
> },
> ],
> listeners: {
More information about the pbs-devel
mailing list