[pbs-devel] [PATCH proxmox-backup 13/13] api: add /status/metrics API
Wolfgang Bumiller
w.bumiller at proxmox.com
Mon Oct 14 12:02:34 CEST 2024
On Fri, Oct 11, 2024 at 12:51:37PM GMT, Lukas Wagner wrote:
> This one is modelled exactly as the one in PVE (there it
> is available under /cluster/metrics/export).
>
> The returned data format is quite simple, being an array of
> metric records, including a value, a metric name, an id to identify
> the object (e.g. datastore/foo, host), a timestamp and a type
> ('gauge', 'derive', ...). The latter property makes the format
> self-describing and aids the metric collector in choosing a
> representation for storing the metric data.
>
> [
> ...
> {
> "metric": "cpu_avg1",
> "value": 0.12,
> "timestamp": 170053205,
> "id": "host",
> "type": "gauge"
> },
> ...
> ]
>
> In terms of permissions, the new endpoint requires Sys.Audit
> on /system/status for metrics of the 'host' object,
> and Datastore.Audit on /datastore/{store} for 'datastore/{store}'
> metric objects.
>
> Via the 'history' and 'start-time' parameters one can query
> the last 30mins of metric history. If these parameters
> are not provided, only the most recent metric generation
> is returned.
>
> Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
> ---
> src/api2/mod.rs | 1 +
> src/api2/status.rs | 13 ++++--
> src/server/metric_collection/mod.rs | 4 +-
> src/server/metric_collection/pull_metrics.rs | 45 ++++++++++++++++++++
> 4 files changed, 57 insertions(+), 6 deletions(-)
>
> diff --git a/src/api2/mod.rs b/src/api2/mod.rs
> index a83e4c20..f485ae53 100644
> --- a/src/api2/mod.rs
> +++ b/src/api2/mod.rs
> @@ -9,6 +9,7 @@ pub mod admin;
> pub mod backup;
> pub mod config;
> pub mod helpers;
> +pub mod metrics;
This file is missing (and also wouldn't be where I'd expect it, see
below).
> pub mod node;
> pub mod ping;
> pub mod pull;
> diff --git a/src/api2/status.rs b/src/api2/status.rs
> index e46fc1ae..f217a31d 100644
> --- a/src/api2/status.rs
> +++ b/src/api2/status.rs
> @@ -7,6 +7,7 @@ use proxmox_router::list_subdirs_api_method;
> use proxmox_router::{ApiMethod, Permission, Router, RpcEnvironment, SubdirMap};
> use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
> use proxmox_schema::api;
> +use proxmox_sortable_macro::sortable;
>
> use pbs_api_types::{
> Authid, DataStoreStatusListItem, Operation, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP,
> @@ -137,10 +138,14 @@ pub async fn datastore_status(
> Ok(list)
> }
>
> -const SUBDIRS: SubdirMap = &[(
> - "datastore-usage",
> - &Router::new().get(&API_METHOD_DATASTORE_STATUS),
> -)];
> +#[sortable]
> +const SUBDIRS: SubdirMap = &sorted!([
> + (
> + "datastore-usage",
> + &Router::new().get(&API_METHOD_DATASTORE_STATUS),
> + ),
> + ("metrics", &super::metrics::ROUTER),
The API modules should ideally be structured like the paths - this would
be the first time we go up a level to get to a router. Better move
`status.rs` to `status/mod.rs` and add the missing file as
`status/metrics.rs`.
> +]);
>
> pub const ROUTER: Router = Router::new()
> .get(&list_subdirs_api_method!(SUBDIRS))
> diff --git a/src/server/metric_collection/mod.rs b/src/server/metric_collection/mod.rs
> index e6e04c5b..3cbd7425 100644
> --- a/src/server/metric_collection/mod.rs
> +++ b/src/server/metric_collection/mod.rs
> @@ -17,8 +17,8 @@ use proxmox_sys::{
> use crate::tools::disks::{zfs_dataset_stats, BlockDevStat, DiskManage};
>
> mod metric_server;
> -mod pull_metrics;
> -pub mod rrd;
> +pub(crate) mod pull_metrics;
> +pub(crate) mod rrd;
>
> const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(10);
>
> diff --git a/src/server/metric_collection/pull_metrics.rs b/src/server/metric_collection/pull_metrics.rs
> index f4b506cf..1b5f3777 100644
> --- a/src/server/metric_collection/pull_metrics.rs
> +++ b/src/server/metric_collection/pull_metrics.rs
> @@ -39,6 +39,51 @@ pub(super) fn init() -> Result<(), Error> {
> Ok(())
> }
>
> +/// Return most recent metrics
> +///
> +/// If the metric collection loop has no produced any metrics yet, an empty
> +/// `Vec` is returned. Returns an error if the cache could not be accessed.
> +pub fn get_most_recent_metrics() -> Result<Vec<MetricDataPoint>, Error> {
> + let cached_datapoints: Option<MetricDataPoints> = get_cache()?.get()?;
> + let mut points = cached_datapoints.map(|r| r.datapoints).unwrap_or_default();
> +
> + points.sort_unstable_by_key(|p| p.timestamp);
> +
> + Ok(points)
> +}
> +
> +/// Return all cached metrics with a `timestamp > start_time`
> +///
> +/// If the metric collection loop has no produced any metrics yet, an empty
> +/// `Vec` is returned. Returns an error if the cache could not be accessed.
> +pub fn get_all_metrics(start_time: i64) -> Result<Vec<MetricDataPoint>, Error> {
> + let now = proxmox_time::epoch_i64();
> +
> + let delta = now - start_time;
> +
> + if delta < 0 {
> + // start-time in the future, no metrics for you
> + return Ok(Vec::new());
> + }
> +
> + let generations = delta / (METRIC_COLLECTION_INTERVAL.as_secs() as i64);
> + let generations = generations.clamp(0, STORED_METRIC_GENERATIONS as i64);
> +
> + let cached_datapoints: Vec<MetricDataPoints> = get_cache()?.get_last(generations as u32)?;
> +
> + let mut points = Vec::new();
> +
> + for gen in cached_datapoints {
> + if gen.timestamp > start_time {
> + points.extend(gen.datapoints);
> + }
> + }
> +
> + points.sort_unstable_by_key(|p| p.timestamp);
> +
> + Ok(points)
> +}
> +
> /// Convert `DiskStat` `HostStat` into a universal metric data point and cache
> /// them for a later retrieval.
> pub(super) fn update_metrics(
> --
> 2.39.5
More information about the pbs-devel
mailing list