[pdm-devel] [PATCH proxmox-datacenter-manager v7 24/24] api: pve: rrd: trigger and wait for metric collection when requesting RRD data
Lukas Wagner
l.wagner at proxmox.com
Tue Aug 26 15:51:19 CEST 2025
Since we now default to a much longer collection interval (10 min), the hourly
RRD data might have a noticable gap an the end. So circumvent this, we
now trigger metric collection for a single remote when requesting
hourly RRD data, waiting for the completion of metric collection up to a
short timeout of 5 seconds. If the timeout expires, which can happen in
the metric collection for this remote is particularly slow (bad
connection), or if the metric collection task is currently busy with a
full-run that's taking a long time, we simply return the data that we
already have.
Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
Notes:
New in v7.
server/src/api/pve/rrddata.rs | 43 +++++++++++++++++++++++++++--------
1 file changed, 34 insertions(+), 9 deletions(-)
diff --git a/server/src/api/pve/rrddata.rs b/server/src/api/pve/rrddata.rs
index b16c2313..b6a04037 100644
--- a/server/src/api/pve/rrddata.rs
+++ b/server/src/api/pve/rrddata.rs
@@ -1,3 +1,5 @@
+use std::time::Duration;
+
use anyhow::Error;
use serde_json::Value;
@@ -10,6 +12,7 @@ use pdm_api_types::rrddata::{LxcDataPoint, NodeDataPoint, QemuDataPoint};
use pdm_api_types::{NODE_SCHEMA, PRIV_RESOURCE_AUDIT, VMID_SCHEMA};
use crate::api::rrd_common::{self, DataPoint};
+use crate::metric_collection;
impl DataPoint for NodeDataPoint {
fn new(time: u64) -> Self {
@@ -161,7 +164,7 @@ impl DataPoint for LxcDataPoint {
},
)]
/// Read qemu stats
-fn get_qemu_rrd_data(
+async fn get_qemu_rrd_data(
remote: String,
vmid: u32,
timeframe: RrdTimeframe,
@@ -169,8 +172,7 @@ fn get_qemu_rrd_data(
_param: Value,
) -> Result<Vec<QemuDataPoint>, Error> {
let base = format!("pve/{remote}/qemu/{vmid}");
-
- rrd_common::create_datapoints_from_rrd(&base, timeframe, cf)
+ get_rrd_datapoints(remote, base, timeframe, cf).await
}
#[api(
@@ -191,7 +193,7 @@ fn get_qemu_rrd_data(
},
)]
/// Read lxc stats
-fn get_lxc_rrd_data(
+async fn get_lxc_rrd_data(
remote: String,
vmid: u32,
timeframe: RrdTimeframe,
@@ -199,8 +201,7 @@ fn get_lxc_rrd_data(
_param: Value,
) -> Result<Vec<LxcDataPoint>, Error> {
let base = format!("pve/{remote}/lxc/{vmid}");
-
- rrd_common::create_datapoints_from_rrd(&base, timeframe, cf)
+ get_rrd_datapoints(remote, base, timeframe, cf).await
}
#[api(
@@ -221,7 +222,7 @@ fn get_lxc_rrd_data(
},
)]
/// Read node stats
-fn get_node_rrd_data(
+async fn get_node_rrd_data(
remote: String,
node: String,
timeframe: RrdTimeframe,
@@ -229,9 +230,33 @@ fn get_node_rrd_data(
_param: Value,
) -> Result<Vec<NodeDataPoint>, Error> {
let base = format!("pve/{remote}/node/{node}");
-
- rrd_common::create_datapoints_from_rrd(&base, timeframe, cf)
+ get_rrd_datapoints(remote, base, timeframe, cf).await
}
+
+async fn get_rrd_datapoints<T: DataPoint + Send + 'static>(
+ remote: String,
+ basepath: String,
+ timeframe: RrdTimeframe,
+ mode: RrdMode,
+) -> Result<Vec<T>, Error> {
+ const WAIT_FOR_NEWEST_METRIC_TIMEOUT: Duration = Duration::from_secs(5);
+
+ if timeframe == RrdTimeframe::Hour {
+ // Let's wait for a limited time for the most recent metrics. If the connection to the remote
+ // is super slow or if the metric collection tasks currently busy with collecting other
+ // metrics, we just return the data we already have, not the newest one.
+ let _ = tokio::time::timeout(WAIT_FOR_NEWEST_METRIC_TIMEOUT, async {
+ metric_collection::trigger_metric_collection(Some(remote), true).await
+ })
+ .await;
+ }
+
+ tokio::task::spawn_blocking(move || {
+ rrd_common::create_datapoints_from_rrd(&basepath, timeframe, mode)
+ })
+ .await?
+}
+
pub const QEMU_RRD_ROUTER: Router = Router::new().get(&API_METHOD_GET_QEMU_RRD_DATA);
pub const LXC_RRD_ROUTER: Router = Router::new().get(&API_METHOD_GET_LXC_RRD_DATA);
pub const NODE_RRD_ROUTER: Router = Router::new().get(&API_METHOD_GET_NODE_RRD_DATA);
--
2.47.2
More information about the pdm-devel
mailing list