[pbs-devel] [PATCH proxmox-backup v7 3/6] pbs-datastore: add active operations tracking

Wolfgang Bumiller w.bumiller at proxmox.com
Tue Feb 8 11:13:00 CET 2022


On Fri, Feb 04, 2022 at 11:17:26AM +0000, Hannes Laimer wrote:
> Saves the currently active read/write operation counts in a file. The
> file is updated whenever a reference returned by lookup_datastore is
> dropped and whenever a reference is returned by lookup_datastore. The
> files are locked before every access, there is one file per datastore.
> 
> Signed-off-by: Hannes Laimer <h.laimer at proxmox.com>
> ---
>  pbs-api-types/src/maintenance.rs   |   1 +
>  pbs-datastore/Cargo.toml           |   1 +
>  pbs-datastore/src/datastore.rs     | 114 +++++++++++++++++++----------
>  pbs-datastore/src/lib.rs           |   4 +
>  pbs-datastore/src/task_tracking.rs |  81 ++++++++++++++++++++
>  src/bin/proxmox-backup-api.rs      |   1 +
>  src/server/mod.rs                  |  16 +++-
>  7 files changed, 180 insertions(+), 38 deletions(-)
>  create mode 100644 pbs-datastore/src/task_tracking.rs
> 
> diff --git a/pbs-api-types/src/maintenance.rs b/pbs-api-types/src/maintenance.rs
> index 4d3ccb3b..1b5753d2 100644
> --- a/pbs-api-types/src/maintenance.rs
> +++ b/pbs-api-types/src/maintenance.rs
> @@ -25,6 +25,7 @@ pub enum MaintenanceType {
>      Offline(String),
>  }
>  
> +#[derive(Copy ,Clone)]

^ wrong space

>  /// Operation requirments, used when checking for maintenance mode.
>  pub enum Operation {
>      Read,
> diff --git a/pbs-datastore/Cargo.toml b/pbs-datastore/Cargo.toml
> index d8cefa00..0c703d44 100644
> @@ -54,13 +56,42 @@ pub fn check_backup_owner(
>  ///
>  /// A Datastore can store severals backups, and provides the
>  /// management interface for backup.
> -pub struct DataStore {
> +pub struct DataStoreImpl {
>      chunk_store: Arc<ChunkStore>,
>      gc_mutex: Mutex<()>,
>      last_gc_status: Mutex<GarbageCollectionStatus>,
>      verify_new: bool,
>  }
>  
> +pub struct DataStore {
> +    inner: Arc<DataStoreImpl>,
> +    operation: Option<Operation>,
> +}
> +
> +impl Clone for DataStore {
> +    fn clone(&self) -> Self {
> +        if let Some(operation) = self.operation.clone() {
> +            if let Err(e) = update_active_operations(self.name(), operation, 1) {
> +                eprintln!("could not update active operations - {}", e);

I think if this fails we should return a value with `operation: None`
*or* have the count as a separate field, otherwise `Drop` will cause a
miscount here, no?

Also, please prefer `log::error!` here over `eprintln!`.

> +            }
> +        }
> +        DataStore {
> +            inner: self.inner.clone(),
> +            operation: self.operation.clone(),
> +        }
> +    }
> +}
> +
> +impl Drop for DataStore {
> +    fn drop(&mut self) {
> +        if let Some(operation) = self.operation.clone() {
> +            if let Err(e) = update_active_operations(self.name(), operation, -1) {
> +                eprintln!("could not update active operations - {}", e);

here too, `eprintln!` -> `log::error!`

> +            }
> +        }
> +    }
> +}
> +
>  impl DataStore {
>      pub fn lookup_datastore(
>          name: &str,
> diff --git a/pbs-datastore/src/lib.rs b/pbs-datastore/src/lib.rs
> index d50a64a5..131040c6 100644
> diff --git a/pbs-datastore/src/task_tracking.rs b/pbs-datastore/src/task_tracking.rs
> new file mode 100644
> index 00000000..a02d9a17
> --- /dev/null
> +++ b/pbs-datastore/src/task_tracking.rs
> @@ -0,0 +1,81 @@
> +use anyhow::Error;
> +use libc::pid_t;
> +use nix::unistd::Pid;
> +use std::path::PathBuf;
> +
> +use pbs_api_types::Operation;
> +use proxmox_sys::fs::{file_read_optional_string, open_file_locked, replace_file, CreateOptions};
> +use proxmox_sys::linux::procfs;
> +use serde::{Deserialize, Serialize};
> +
> +#[derive(Deserialize, Serialize, Clone)]
> +struct TaskOperations {
> +    pid: u32,
> +    starttime: u64,
> +    reading_operations: i64,
> +    writing_operations: i64,
> +}
> +
> +pub fn update_active_operations(name: &str, operation: Operation, count: i64) -> Result<(), Error> {

I'm thinking there might be a way to use shared memory here, but dealing
with dead processes becomes a bit more tricky.

If you're motivated, you could take a look at
`pbs-config/src/config_version_cache.rs` to see what I mean
(DataStoreImpl could contain the mmapped region, but dealing with dead
processes is still a bit of work, would need some atomics for the PID
and registration state of a process...)

> +    let path = PathBuf::from(format!("{}/{}", crate::ACTIVE_OPERATIONS_DIR, name));
> +    let lock_path = PathBuf::from(format!("{}/{}.lock", crate::ACTIVE_OPERATIONS_DIR, name));
> (...)





More information about the pbs-devel mailing list