[pbs-devel] [PATCH proxmox-backup v2 13/13] api: add /status/metrics API
Lukas Wagner
l.wagner at proxmox.com
Tue Oct 15 10:46:36 CEST 2024
This one is modelled exactly as the one in PVE (there it
is available under /cluster/metrics/export).
The returned data format is quite simple, being an array of
metric records, including a value, a metric name, an id to identify
the object (e.g. datastore/foo, host), a timestamp and a type
('gauge', 'derive', ...). The latter property makes the format
self-describing and aids the metric collector in choosing a
representation for storing the metric data.
[
...
{
"metric": "cpu_avg1",
"value": 0.12,
"timestamp": 170053205,
"id": "host",
"type": "gauge"
},
...
]
In terms of permissions, the new endpoint requires Sys.Audit
on /system/status for metrics of the 'host' object,
and Datastore.Audit on /datastore/{store} for 'datastore/{store}'
metric objects.
Via the 'history' and 'start-time' parameters one can query
the last 30mins of metric history. If these parameters
are not provided, only the most recent metric generation
is returned.
Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
Notes:
Changes since v2:
- move api2/status.rs to api2/status/mod.rs
- move api2/metrics.rs to api2/status/metrics.rs
src/api2/status/metrics.rs | 73 ++++++++++++++++++++
src/api2/{status.rs => status/mod.rs} | 15 ++--
src/server/metric_collection/mod.rs | 4 +-
src/server/metric_collection/pull_metrics.rs | 45 ++++++++++++
4 files changed, 131 insertions(+), 6 deletions(-)
create mode 100644 src/api2/status/metrics.rs
rename src/api2/{status.rs => status/mod.rs} (95%)
diff --git a/src/api2/status/metrics.rs b/src/api2/status/metrics.rs
new file mode 100644
index 00000000..a5583aac
--- /dev/null
+++ b/src/api2/status/metrics.rs
@@ -0,0 +1,73 @@
+use anyhow::Error;
+use pbs_api_types::{Authid, MetricDataPoint, Metrics, PRIV_DATASTORE_AUDIT, PRIV_SYS_AUDIT};
+use pbs_config::CachedUserInfo;
+use proxmox_router::{Permission, Router, RpcEnvironment};
+use proxmox_schema::api;
+
+use crate::server::metric_collection::pull_metrics;
+
+pub const ROUTER: Router = Router::new().get(&API_METHOD_GET_METRICS);
+
+#[api(
+ input: {
+ properties: {
+ "start-time": {
+ optional: true,
+ default: 0,
+ description: "Only return values with a timestamp > start-time. Only has an effect if 'history' is also set",
+ },
+ "history": {
+ optional: true,
+ default: false,
+ description: "Include historic values (last 30 minutes)",
+ }
+ },
+ },
+ access: {
+ description: "Users need Sys.Audit on /system/status for host metrics and Datastore.Audit on /datastore/{store} for datastore metrics",
+ permission: &Permission::Anybody,
+ },
+)]
+/// Return backup server metrics.
+pub fn get_metrics(
+ start_time: i64,
+ history: bool,
+ rpcenv: &mut dyn RpcEnvironment,
+) -> Result<Metrics, Error> {
+ let metrics = if history {
+ pull_metrics::get_all_metrics(start_time)?
+ } else {
+ pull_metrics::get_most_recent_metrics()?
+ };
+
+ let auth_id: Authid = rpcenv.get_auth_id().unwrap().parse()?;
+ let user_info = CachedUserInfo::new()?;
+
+ let filter_by_privs = |point: &MetricDataPoint| {
+ let elements: Vec<&str> = point.id.as_str().split('/').collect();
+
+ match elements.as_slice() {
+ ["host"] => {
+ let user_privs =
+ CachedUserInfo::lookup_privs(&user_info, &auth_id, &["system", "status"]);
+ (user_privs & PRIV_SYS_AUDIT) != 0
+ }
+ ["datastore", datastore_id] => {
+ let user_privs = CachedUserInfo::lookup_privs(
+ &user_info,
+ &auth_id,
+ &["datastore", datastore_id],
+ );
+ (user_privs & PRIV_DATASTORE_AUDIT) != 0
+ }
+ _ => {
+ log::error!("invalid metric object id: {}", point.id);
+ false
+ }
+ }
+ };
+
+ Ok(Metrics {
+ data: metrics.into_iter().filter(filter_by_privs).collect(),
+ })
+}
diff --git a/src/api2/status.rs b/src/api2/status/mod.rs
similarity index 95%
rename from src/api2/status.rs
rename to src/api2/status/mod.rs
index e46fc1ae..113aa985 100644
--- a/src/api2/status.rs
+++ b/src/api2/status/mod.rs
@@ -7,6 +7,7 @@ use proxmox_router::list_subdirs_api_method;
use proxmox_router::{ApiMethod, Permission, Router, RpcEnvironment, SubdirMap};
use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
use proxmox_schema::api;
+use proxmox_sortable_macro::sortable;
use pbs_api_types::{
Authid, DataStoreStatusListItem, Operation, PRIV_DATASTORE_AUDIT, PRIV_DATASTORE_BACKUP,
@@ -20,6 +21,8 @@ use crate::tools::statistics::linear_regression;
use crate::backup::can_access_any_namespace;
+pub mod metrics;
+
#[api(
returns: {
description: "Lists the Status of the Datastores.",
@@ -137,10 +140,14 @@ pub async fn datastore_status(
Ok(list)
}
-const SUBDIRS: SubdirMap = &[(
- "datastore-usage",
- &Router::new().get(&API_METHOD_DATASTORE_STATUS),
-)];
+#[sortable]
+const SUBDIRS: SubdirMap = &sorted!([
+ (
+ "datastore-usage",
+ &Router::new().get(&API_METHOD_DATASTORE_STATUS),
+ ),
+ ("metrics", &metrics::ROUTER),
+]);
pub const ROUTER: Router = Router::new()
.get(&list_subdirs_api_method!(SUBDIRS))
diff --git a/src/server/metric_collection/mod.rs b/src/server/metric_collection/mod.rs
index e6e04c5b..3cbd7425 100644
--- a/src/server/metric_collection/mod.rs
+++ b/src/server/metric_collection/mod.rs
@@ -17,8 +17,8 @@ use proxmox_sys::{
use crate::tools::disks::{zfs_dataset_stats, BlockDevStat, DiskManage};
mod metric_server;
-mod pull_metrics;
-pub mod rrd;
+pub(crate) mod pull_metrics;
+pub(crate) mod rrd;
const METRIC_COLLECTION_INTERVAL: Duration = Duration::from_secs(10);
diff --git a/src/server/metric_collection/pull_metrics.rs b/src/server/metric_collection/pull_metrics.rs
index f4b506cf..1b5f3777 100644
--- a/src/server/metric_collection/pull_metrics.rs
+++ b/src/server/metric_collection/pull_metrics.rs
@@ -39,6 +39,51 @@ pub(super) fn init() -> Result<(), Error> {
Ok(())
}
+/// Return most recent metrics
+///
+/// If the metric collection loop has no produced any metrics yet, an empty
+/// `Vec` is returned. Returns an error if the cache could not be accessed.
+pub fn get_most_recent_metrics() -> Result<Vec<MetricDataPoint>, Error> {
+ let cached_datapoints: Option<MetricDataPoints> = get_cache()?.get()?;
+ let mut points = cached_datapoints.map(|r| r.datapoints).unwrap_or_default();
+
+ points.sort_unstable_by_key(|p| p.timestamp);
+
+ Ok(points)
+}
+
+/// Return all cached metrics with a `timestamp > start_time`
+///
+/// If the metric collection loop has no produced any metrics yet, an empty
+/// `Vec` is returned. Returns an error if the cache could not be accessed.
+pub fn get_all_metrics(start_time: i64) -> Result<Vec<MetricDataPoint>, Error> {
+ let now = proxmox_time::epoch_i64();
+
+ let delta = now - start_time;
+
+ if delta < 0 {
+ // start-time in the future, no metrics for you
+ return Ok(Vec::new());
+ }
+
+ let generations = delta / (METRIC_COLLECTION_INTERVAL.as_secs() as i64);
+ let generations = generations.clamp(0, STORED_METRIC_GENERATIONS as i64);
+
+ let cached_datapoints: Vec<MetricDataPoints> = get_cache()?.get_last(generations as u32)?;
+
+ let mut points = Vec::new();
+
+ for gen in cached_datapoints {
+ if gen.timestamp > start_time {
+ points.extend(gen.datapoints);
+ }
+ }
+
+ points.sort_unstable_by_key(|p| p.timestamp);
+
+ Ok(points)
+}
+
/// Convert `DiskStat` `HostStat` into a universal metric data point and cache
/// them for a later retrieval.
pub(super) fn update_metrics(
--
2.39.5
More information about the pbs-devel
mailing list