[pbs-devel] [PATCH proxmox-backup v2 13/13] api: add /status/metrics API

Lukas Wagner l.wagner at proxmox.com
Tue Oct 15 10:46:36 CEST 2024


This one is modelled exactly as the one in PVE (there it
is available under /cluster/metrics/export).

The returned data format is quite simple, being an array of
metric records, including a value, a metric name, an id to identify
the object (e.g. datastore/foo, host), a timestamp and a type
('gauge', 'derive', ...). The latter property makes the format
self-describing and aids the metric collector in choosing a
representation for storing the metric data.

[
    ...
    {
	"metric": "cpu_avg1",
	"value": 0.12,
	"timestamp": 170053205,
	"id": "host",
	"type": "gauge"
    },
    ...
]

In terms of permissions, the new endpoint requires Sys.Audit
on /system/status for metrics of the 'host' object,
and Datastore.Audit on /datastore/{store} for 'datastore/{store}'
metric objects.

Via the 'history' and 'start-time' parameters one can query
the last 30mins of metric history. If these parameters
are not provided, only the most recent metric generation
is returned.

Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---

Notes:
    Changes since v2:
      - move api2/status.rs to api2/status/mod.rs
      - move api2/metrics.rs to api2/status/metrics.rs

 src/api2/status/metrics.rs                   | 73 ++++++++++++++++++++
 src/api2/{status.rs => status/mod.rs}        | 15 ++--
 src/server/metric_collection/mod.rs          |  4 +-
 src/server/metric_collection/pull_metrics.rs | 45 ++++++++++++
 4 files changed, 131 insertions(+), 6 deletions(-)
 create mode 100644 src/api2/status/metrics.rs
 rename src/api2/{status.rs => status/mod.rs} (95%)

diff --git a/src/api2/status/metrics.rs b/src/api2/status/metrics.rs
new file mode 100644
index 00000000..a5583aac
--- /dev/null
+++ b/src/api2/status/metrics.rs
@@ -0,0 +1,73 @@
+use anyhow::Error;
+use pbs_api_types::{Authid, MetricDataPoint, Metrics, PRIV_DATASTORE_AUDIT, PRIV_SYS_AUDIT};
+use pbs_config::CachedUserInfo;
+use proxmox_router::{Permission, Router, RpcEnvironment};
+use proxmox_schema::api;
+
+use crate::server::metric_collection::pull_metrics;
+
+pub const ROUTER: Router = Router::new().get(&API_METHOD_GET_METRICS);
+
+#[api(
+    input: {
+        properties: {
+            "start-time": {
+                optional: true,
+                default: 0,
+                description: "Only return values with a timestamp > start-time. Only has an effect if 'history' is also set",
+            },
+            "history": {
+                optional: true,
+                default: false,
+                description: "Include historic values (last 30 minutes)",
+            }
+        },
+    },
+    access: {
+        description: "Users need Sys.Audit on /system/status for host metrics and Datastore.Audit on /datastore/{store} for datastore metrics",
+        permission: &Permission::Anybody,
+    },
+)]
+/// Return backup server metrics.
+pub fn get_metrics(
+    start_time: i64,
+    history: bool,
+    rpcenv: &mut dyn RpcEnvironment,
+) -> Result<Metrics, Error> {
+    let metrics = if history {
+        pull_metrics::get_all_metrics(start_time)?
+    } else {
+        pull_metrics::get_most_recent_metrics()?
+    };
+
+    let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
+    let user_info = CachedUserInfo::new()?;
+
+    let filter_by_privs = |point: &MetricDataPoint| {
+        let elements: Vec<&str> = point.id.as_str().split('/').collect();
+
+        match elements.as_slice() {
+            ["host"] => {
+                let user_privs =
+                    CachedUserInfo::lookup_privs(&user_info, &auth_id, &["system", "status"]);
+                (user_privs & PRIV_SYS_AUDIT) != 0
+            }
+            ["datastore", datastore_id] => {
+                let user_privs = CachedUserInfo::lookup_privs(
+                    &user_info,
+                    &auth_id,
+                    &["datastore", datastore_id],
+                );
+                (user_privs & PRIV_DATASTORE_AUDIT) != 0
+            }
+            _ => {
+                log::error!("invalid metric object id: {}", point.id);
+                false
+            }
+        }
+    };
+
+    Ok(Metrics {
+        data: metrics.into_iter().filter(filter_by_privs).collect(),
+    })
+}
diff --git a/src/api2/status.rs b/src/api2/status/mod.rs
similarity index 95%
rename from src/api2/status.rs
rename to src/api2/status/mod.rs
index e46fc1ae..113aa985 100644
--- a/src/api2/status.rs
+++ b/src/api2/status/mod.rs
@@ -7,6 +7,7 @@ use proxmox_router::list_subdirs_api_method;
 use proxmox_router::{ApiMethod, Permission, Router, RpcEnvironment, SubdirMap};
 use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
 use proxmox_schema::api;
+use proxmox_sortable_macro::sortable;
 
 use pbs_api_types::{
     Authid, DataStoreStatusListItem, Operation, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP,
@@ -20,6 +21,8 @@ use crate::tools::statistics::linear_regression;
 
 use crate::backup::can_access_any_namespace;
 
+pub mod metrics;
+
 #[api(
     returns: {
         description: "Lists the Status of the Datastores.",
@@ -137,10 +140,14 @@ pub async fn datastore_status(
     Ok(list)
 }
 
-const SUBDIRS: SubdirMap = &[(
-    "datastore-usage",
-    &Router::new().get(&API_METHOD_DATASTORE_STATUS),
-)];
+#[sortable]
+const SUBDIRS: SubdirMap = &sorted!([
+    (
+        "datastore-usage",
+        &Router::new().get(&API_METHOD_DATASTORE_STATUS),
+    ),
+    ("metrics", &metrics::ROUTER),
+]);
 
 pub const ROUTER: Router = Router::new()
     .get(&list_subdirs_api_method!(SUBDIRS))
diff --git a/src/server/metric_collection/mod.rs b/src/server/metric_collection/mod.rs
index e6e04c5b..3cbd7425 100644
--- a/src/server/metric_collection/mod.rs
+++ b/src/server/metric_collection/mod.rs
@@ -17,8 +17,8 @@ use proxmox_sys::{
 use crate::tools::disks::{zfs_dataset_stats, BlockDevStat, DiskManage};
 
 mod metric_server;
-mod pull_metrics;
-pub mod rrd;
+pub(crate) mod pull_metrics;
+pub(crate) mod rrd;
 
 const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(10);
 
diff --git a/src/server/metric_collection/pull_metrics.rs b/src/server/metric_collection/pull_metrics.rs
index f4b506cf..1b5f3777 100644
--- a/src/server/metric_collection/pull_metrics.rs
+++ b/src/server/metric_collection/pull_metrics.rs
@@ -39,6 +39,51 @@ pub(super) fn init() -> Result<(), Error> {
     Ok(())
 }
 
+/// Return most recent metrics
+///
+/// If the metric collection loop has no produced any metrics yet, an empty
+/// `Vec` is returned. Returns an error if the cache could not be accessed.
+pub fn get_most_recent_metrics() -> Result<Vec<MetricDataPoint>, Error> {
+    let cached_datapoints: Option<MetricDataPoints> = get_cache()?.get()?;
+    let mut points = cached_datapoints.map(|r| r.datapoints).unwrap_or_default();
+
+    points.sort_unstable_by_key(|p| p.timestamp);
+
+    Ok(points)
+}
+
+/// Return all cached metrics with a `timestamp > start_time`
+///
+/// If the metric collection loop has no produced any metrics yet, an empty
+/// `Vec` is returned. Returns an error if the cache could not be accessed.
+pub fn get_all_metrics(start_time: i64) -> Result<Vec<MetricDataPoint>, Error> {
+    let now = proxmox_time::epoch_i64();
+
+    let delta = now - start_time;
+
+    if delta < 0 {
+        // start-time in the future, no metrics for you
+        return Ok(Vec::new());
+    }
+
+    let generations = delta / (METRIC_COLLECTION_INTERVAL.as_secs() as i64);
+    let generations = generations.clamp(0, STORED_METRIC_GENERATIONS as i64);
+
+    let cached_datapoints: Vec<MetricDataPoints> = get_cache()?.get_last(generations as u32)?;
+
+    let mut points = Vec::new();
+
+    for gen in cached_datapoints {
+        if gen.timestamp > start_time {
+            points.extend(gen.datapoints);
+        }
+    }
+
+    points.sort_unstable_by_key(|p| p.timestamp);
+
+    Ok(points)
+}
+
 /// Convert `DiskStat` `HostStat` into a universal metric data point and cache
 /// them for a later retrieval.
 pub(super) fn update_metrics(
-- 
2.39.5





More information about the pbs-devel mailing list