[pdm-devel] [PATCH proxmox-datacenter-manager] api: pbs: request latest metrics when using hourly RRD timeframe

Lukas Wagner l.wagner at proxmox.com
Mon Sep 29 11:47:05 CEST 2025


Same as for PVE, when requesting metric data in the hourly timeframe, we
trigger an out-of-order metric collection for this single remote,
waiting for its completion up to a short timeout of five seconds. If
collection does not finish in this time, we simply return what is
currently in the database, which might have a short gap (up to 10
minutes, since this is the regular metric collection interval).

Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
 server/src/api/pbs/rrddata.rs | 10 ++++------
 server/src/api/pve/rrddata.rs | 35 ++++-------------------------------
 server/src/api/rrd_common.rs  | 33 +++++++++++++++++++++++++++++++--
 3 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/server/src/api/pbs/rrddata.rs b/server/src/api/pbs/rrddata.rs
index c6649a2d..aa980d45 100644
--- a/server/src/api/pbs/rrddata.rs
+++ b/server/src/api/pbs/rrddata.rs
@@ -102,15 +102,14 @@ impl DataPoint for PbsDatastoreDataPoint {
     },
 )]
 /// Read PBS node stats
-fn get_pbs_node_rrd_data(
+async fn get_pbs_node_rrd_data(
     remote: String,
     timeframe: RrdTimeframe,
     cf: RrdMode,
     _param: Value,
 ) -> Result<Vec<PbsNodeDataPoint>, Error> {
     let base = format!("pbs/{remote}/host");
-
-    rrd_common::create_datapoints_from_rrd(&base, timeframe, cf)
+    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
 }
 
 #[api(
@@ -128,7 +127,7 @@ fn get_pbs_node_rrd_data(
     },
 )]
 /// Read PBS datastore stats
-fn get_pbs_datastore_rrd_data(
+async fn get_pbs_datastore_rrd_data(
     remote: String,
     datastore: String,
     timeframe: RrdTimeframe,
@@ -136,8 +135,7 @@ fn get_pbs_datastore_rrd_data(
     _param: Value,
 ) -> Result<Vec<PbsDatastoreDataPoint>, Error> {
     let base = format!("pbs/{remote}/datastore/{datastore}");
-
-    rrd_common::create_datapoints_from_rrd(&base, timeframe, cf)
+    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
 }
 
 pub const PBS_NODE_RRD_ROUTER: Router = Router::new().get(&API_METHOD_GET_PBS_NODE_RRD_DATA);
diff --git a/server/src/api/pve/rrddata.rs b/server/src/api/pve/rrddata.rs
index e08d4b43..9fed9671 100644
--- a/server/src/api/pve/rrddata.rs
+++ b/server/src/api/pve/rrddata.rs
@@ -1,5 +1,3 @@
-use std::time::Duration;
-
 use anyhow::Error;
 use serde_json::Value;
 
@@ -12,7 +10,6 @@ use pdm_api_types::rrddata::{LxcDataPoint, NodeDataPoint, PveStorageDataPoint, Q
 use pdm_api_types::{NODE_SCHEMA, PRIV_RESOURCE_AUDIT, PVE_STORAGE_ID_SCHEMA, VMID_SCHEMA};
 
 use crate::api::rrd_common::{self, DataPoint};
-use crate::metric_collection;
 
 impl DataPoint for NodeDataPoint {
     fn new(time: u64) -> Self {
@@ -193,7 +190,7 @@ async fn get_qemu_rrd_data(
     _param: Value,
 ) -> Result<Vec<QemuDataPoint>, Error> {
     let base = format!("pve/{remote}/qemu/{vmid}");
-    get_rrd_datapoints(remote, base, timeframe, cf).await
+    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
 }
 
 #[api(
@@ -222,7 +219,7 @@ async fn get_lxc_rrd_data(
     _param: Value,
 ) -> Result<Vec<LxcDataPoint>, Error> {
     let base = format!("pve/{remote}/lxc/{vmid}");
-    get_rrd_datapoints(remote, base, timeframe, cf).await
+    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
 }
 
 #[api(
@@ -251,7 +248,7 @@ async fn get_node_rrd_data(
     _param: Value,
 ) -> Result<Vec<NodeDataPoint>, Error> {
     let base = format!("pve/{remote}/node/{node}");
-    get_rrd_datapoints(remote, base, timeframe, cf).await
+    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
 }
 
 #[api(
@@ -282,31 +279,7 @@ async fn get_storage_rrd_data(
     _param: Value,
 ) -> Result<Vec<NodeDataPoint>, Error> {
     let base = format!("pve/{remote}/storage/{node}/{storage}");
-    get_rrd_datapoints(remote, base, timeframe, cf).await
-}
-
-async fn get_rrd_datapoints<T: DataPoint + Send + 'static>(
-    remote: String,
-    basepath: String,
-    timeframe: RrdTimeframe,
-    mode: RrdMode,
-) -> Result<Vec<T>, Error> {
-    const WAIT_FOR_NEWEST_METRIC_TIMEOUT: Duration = Duration::from_secs(5);
-
-    if timeframe == RrdTimeframe::Hour {
-        // Let's wait for a limited time for the most recent metrics. If the connection to the remote
-        // is super slow or if the metric collection tasks currently busy with collecting other
-        // metrics, we just return the data we already have, not the newest one.
-        let _ = tokio::time::timeout(WAIT_FOR_NEWEST_METRIC_TIMEOUT, async {
-            metric_collection::trigger_metric_collection(Some(remote), true).await
-        })
-        .await;
-    }
-
-    tokio::task::spawn_blocking(move || {
-        rrd_common::create_datapoints_from_rrd(&basepath, timeframe, mode)
-    })
-    .await?
+    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
 }
 
 pub const QEMU_RRD_ROUTER: Router = Router::new().get(&API_METHOD_GET_QEMU_RRD_DATA);
diff --git a/server/src/api/rrd_common.rs b/server/src/api/rrd_common.rs
index 28868bc1..b5d1a786 100644
--- a/server/src/api/rrd_common.rs
+++ b/server/src/api/rrd_common.rs
@@ -1,9 +1,10 @@
-use std::collections::BTreeMap;
+use std::{collections::BTreeMap, time::Duration};
 
 use anyhow::{bail, Error};
+
 use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
 
-use crate::metric_collection::rrd_cache;
+use crate::metric_collection::{self, rrd_cache};
 
 /// Trait common to all RRD-stored metric objects (nodes, datastores, qemu, lxc, etc.)
 pub trait DataPoint {
@@ -53,3 +54,31 @@ pub fn create_datapoints_from_rrd<T: DataPoint>(
 
     Ok(timemap.into_values().collect())
 }
+
+/// Get RRD datapoints for a given remote/RRD path.
+///
+/// If `timeframe` is set to [`RrdTimeframe::Hour`], then this function will trigger
+/// metric collection for this remote and wait for its completion, up to a timeout of five
+/// seconds. If the timeout is exceeded, we simply go ahead and return what is in the database at
+/// the moment, which might have a gap for the last couple minutes.
+pub async fn get_rrd_datapoints<T: DataPoint + Send + 'static>(
+    remote: String,
+    basepath: String,
+    timeframe: RrdTimeframe,
+    mode: RrdMode,
+) -> Result<Vec<T>, Error> {
+    const WAIT_FOR_NEWEST_METRIC_TIMEOUT: Duration = Duration::from_secs(5);
+
+    if timeframe == RrdTimeframe::Hour {
+        // Let's wait for a limited time for the most recent metrics. If the connection to the remote
+        // is super slow or if the metric collection tasks currently busy with collecting other
+        // metrics, we just return the data we already have, not the newest one.
+        let _ = tokio::time::timeout(WAIT_FOR_NEWEST_METRIC_TIMEOUT, async {
+            metric_collection::trigger_metric_collection(Some(remote), true).await
+        })
+        .await;
+    }
+
+    tokio::task::spawn_blocking(move || create_datapoints_from_rrd(&basepath, timeframe, mode))
+        .await?
+}
-- 
2.47.3





More information about the pdm-devel mailing list