[pbs-devel] [PATCH proxmox-backup 13/13] api: add /status/metrics API

Lukas Wagner l.wagner at proxmox.com
Fri Oct 11 12:51:37 CEST 2024


This one is modelled exactly as the one in PVE (there it
is available under /cluster/metrics/export).

The returned data format is quite simple, being an array of
metric records, including a value, a metric name, an id to identify
the object (e.g. datastore/foo, host), a timestamp and a type
('gauge', 'derive', ...). The latter property makes the format
self-describing and aids the metric collector in choosing a
representation for storing the metric data.

[
    ...
    {
	"metric": "cpu_avg1",
	"value": 0.12,
	"timestamp": 170053205,
	"id": "host",
	"type": "gauge"
    },
    ...
]

In terms of permissions, the new endpoint requires Sys.Audit
on /system/status for metrics of the 'host' object,
and Datastore.Audit on /datastore/{store} for 'datastore/{store}'
metric objects.

Via the 'history' and 'start-time' parameters one can query
the last 30mins of metric history. If these parameters
are not provided, only the most recent metric generation
is returned.

Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
 src/api2/mod.rs                              |  1 +
 src/api2/status.rs                           | 13 ++++--
 src/server/metric_collection/mod.rs          |  4 +-
 src/server/metric_collection/pull_metrics.rs | 45 ++++++++++++++++++++
 4 files changed, 57 insertions(+), 6 deletions(-)

diff --git a/src/api2/mod.rs b/src/api2/mod.rs
index a83e4c20..f485ae53 100644
--- a/src/api2/mod.rs
+++ b/src/api2/mod.rs
@@ -9,6 +9,7 @@ pub mod admin;
 pub mod backup;
 pub mod config;
 pub mod helpers;
+pub mod metrics;
 pub mod node;
 pub mod ping;
 pub mod pull;
diff --git a/src/api2/status.rs b/src/api2/status.rs
index e46fc1ae..f217a31d 100644
--- a/src/api2/status.rs
+++ b/src/api2/status.rs
@@ -7,6 +7,7 @@ use proxmox_router::list_subdirs_api_method;
 use proxmox_router::{ApiMethod, Permission, Router, RpcEnvironment, SubdirMap};
 use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
 use proxmox_schema::api;
+use proxmox_sortable_macro::sortable;
 
 use pbs_api_types::{
     Authid, DataStoreStatusListItem, Operation, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP,
@@ -137,10 +138,14 @@ pub async fn datastore_status(
     Ok(list)
 }
 
-const SUBDIRS: SubdirMap = &[(
-    "datastore-usage",
-    &Router::new().get(&API_METHOD_DATASTORE_STATUS),
-)];
+#[sortable]
+const SUBDIRS: SubdirMap = &sorted!([
+    (
+        "datastore-usage",
+        &Router::new().get(&API_METHOD_DATASTORE_STATUS),
+    ),
+    ("metrics", &super::metrics::ROUTER),
+]);
 
 pub const ROUTER: Router = Router::new()
     .get(&list_subdirs_api_method!(SUBDIRS))
diff --git a/src/server/metric_collection/mod.rs b/src/server/metric_collection/mod.rs
index e6e04c5b..3cbd7425 100644
--- a/src/server/metric_collection/mod.rs
+++ b/src/server/metric_collection/mod.rs
@@ -17,8 +17,8 @@ use proxmox_sys::{
 use crate::tools::disks::{zfs_dataset_stats, BlockDevStat, DiskManage};
 
 mod metric_server;
-mod pull_metrics;
-pub mod rrd;
+pub(crate) mod pull_metrics;
+pub(crate) mod rrd;
 
 const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(10);
 
diff --git a/src/server/metric_collection/pull_metrics.rs b/src/server/metric_collection/pull_metrics.rs
index f4b506cf..1b5f3777 100644
--- a/src/server/metric_collection/pull_metrics.rs
+++ b/src/server/metric_collection/pull_metrics.rs
@@ -39,6 +39,51 @@ pub(super) fn init() -> Result<(), Error> {
     Ok(())
 }
 
+/// Return most recent metrics
+///
+/// If the metric collection loop has no produced any metrics yet, an empty
+/// `Vec` is returned. Returns an error if the cache could not be accessed.
+pub fn get_most_recent_metrics() -> Result<Vec<MetricDataPoint>, Error> {
+    let cached_datapoints: Option<MetricDataPoints> = get_cache()?.get()?;
+    let mut points = cached_datapoints.map(|r| r.datapoints).unwrap_or_default();
+
+    points.sort_unstable_by_key(|p| p.timestamp);
+
+    Ok(points)
+}
+
+/// Return all cached metrics with a `timestamp > start_time`
+///
+/// If the metric collection loop has no produced any metrics yet, an empty
+/// `Vec` is returned. Returns an error if the cache could not be accessed.
+pub fn get_all_metrics(start_time: i64) -> Result<Vec<MetricDataPoint>, Error> {
+    let now = proxmox_time::epoch_i64();
+
+    let delta = now - start_time;
+
+    if delta < 0 {
+        // start-time in the future, no metrics for you
+        return Ok(Vec::new());
+    }
+
+    let generations = delta / (METRIC_COLLECTION_INTERVAL.as_secs() as i64);
+    let generations = generations.clamp(0, STORED_METRIC_GENERATIONS as i64);
+
+    let cached_datapoints: Vec<MetricDataPoints> = get_cache()?.get_last(generations as u32)?;
+
+    let mut points = Vec::new();
+
+    for gen in cached_datapoints {
+        if gen.timestamp > start_time {
+            points.extend(gen.datapoints);
+        }
+    }
+
+    points.sort_unstable_by_key(|p| p.timestamp);
+
+    Ok(points)
+}
+
 /// Convert `DiskStat` `HostStat` into a universal metric data point and cache
 /// them for a later retrieval.
 pub(super) fn update_metrics(
-- 
2.39.5





More information about the pbs-devel mailing list