[pdm-devel] [PATCH proxmox-datacenter-manager] api: pbs: request latest metrics when using hourly RRD timeframe

Dominik Csapak d.csapak at proxmox.com
Tue Sep 30 16:51:37 CEST 2025


code makes sense, works as advertised.

consider this

Reviewed-by: Dominik Csapak <d.csapak at proxmox.com>
Tested-by: Dominik Csapak <d.csapak at proxmox.com>

On 9/29/25 11:47 AM, Lukas Wagner wrote:
> Same as for PVE, when requesting metric data in the hourly timeframe, we
> trigger an out-of-order metric collection for this single remote,
> waiting for its completion up to a short timeout of five seconds. If
> collection does not finish in this time, we simply return what is
> currently in the database, which might have a short gap (up to 10
> minutes, since this is the regular metric collection interval).
> 
> Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
> ---
>   server/src/api/pbs/rrddata.rs | 10 ++++------
>   server/src/api/pve/rrddata.rs | 35 ++++-------------------------------
>   server/src/api/rrd_common.rs  | 33 +++++++++++++++++++++++++++++++--
>   3 files changed, 39 insertions(+), 39 deletions(-)
> 
> diff --git a/server/src/api/pbs/rrddata.rs b/server/src/api/pbs/rrddata.rs
> index c6649a2d..aa980d45 100644
> --- a/server/src/api/pbs/rrddata.rs
> +++ b/server/src/api/pbs/rrddata.rs
> @@ -102,15 +102,14 @@ impl DataPoint for PbsDatastoreDataPoint {
>       },
>   )]
>   /// Read PBS node stats
> -fn get_pbs_node_rrd_data(
> +async fn get_pbs_node_rrd_data(
>       remote: String,
>       timeframe: RrdTimeframe,
>       cf: RrdMode,
>       _param: Value,
>   ) -> Result<Vec<PbsNodeDataPoint>, Error> {
>       let base = format!("pbs/{remote}/host");
> -
> -    rrd_common::create_datapoints_from_rrd(&base, timeframe, cf)
> +    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
>   }
>   
>   #[api(
> @@ -128,7 +127,7 @@ fn get_pbs_node_rrd_data(
>       },
>   )]
>   /// Read PBS datastore stats
> -fn get_pbs_datastore_rrd_data(
> +async fn get_pbs_datastore_rrd_data(
>       remote: String,
>       datastore: String,
>       timeframe: RrdTimeframe,
> @@ -136,8 +135,7 @@ fn get_pbs_datastore_rrd_data(
>       _param: Value,
>   ) -> Result<Vec<PbsDatastoreDataPoint>, Error> {
>       let base = format!("pbs/{remote}/datastore/{datastore}");
> -
> -    rrd_common::create_datapoints_from_rrd(&base, timeframe, cf)
> +    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
>   }
>   
>   pub const PBS_NODE_RRD_ROUTER: Router = Router::new().get(&API_METHOD_GET_PBS_NODE_RRD_DATA);
> diff --git a/server/src/api/pve/rrddata.rs b/server/src/api/pve/rrddata.rs
> index e08d4b43..9fed9671 100644
> --- a/server/src/api/pve/rrddata.rs
> +++ b/server/src/api/pve/rrddata.rs
> @@ -1,5 +1,3 @@
> -use std::time::Duration;
> -
>   use anyhow::Error;
>   use serde_json::Value;
>   
> @@ -12,7 +10,6 @@ use pdm_api_types::rrddata::{LxcDataPoint, NodeDataPoint, PveStorageDataPoint, Q
>   use pdm_api_types::{NODE_SCHEMA, PRIV_RESOURCE_AUDIT, PVE_STORAGE_ID_SCHEMA, VMID_SCHEMA};
>   
>   use crate::api::rrd_common::{self, DataPoint};
> -use crate::metric_collection;
>   
>   impl DataPoint for NodeDataPoint {
>       fn new(time: u64) -> Self {
> @@ -193,7 +190,7 @@ async fn get_qemu_rrd_data(
>       _param: Value,
>   ) -> Result<Vec<QemuDataPoint>, Error> {
>       let base = format!("pve/{remote}/qemu/{vmid}");
> -    get_rrd_datapoints(remote, base, timeframe, cf).await
> +    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
>   }
>   
>   #[api(
> @@ -222,7 +219,7 @@ async fn get_lxc_rrd_data(
>       _param: Value,
>   ) -> Result<Vec<LxcDataPoint>, Error> {
>       let base = format!("pve/{remote}/lxc/{vmid}");
> -    get_rrd_datapoints(remote, base, timeframe, cf).await
> +    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
>   }
>   
>   #[api(
> @@ -251,7 +248,7 @@ async fn get_node_rrd_data(
>       _param: Value,
>   ) -> Result<Vec<NodeDataPoint>, Error> {
>       let base = format!("pve/{remote}/node/{node}");
> -    get_rrd_datapoints(remote, base, timeframe, cf).await
> +    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
>   }
>   
>   #[api(
> @@ -282,31 +279,7 @@ async fn get_storage_rrd_data(
>       _param: Value,
>   ) -> Result<Vec<NodeDataPoint>, Error> {
>       let base = format!("pve/{remote}/storage/{node}/{storage}");
> -    get_rrd_datapoints(remote, base, timeframe, cf).await
> -}
> -
> -async fn get_rrd_datapoints<T: DataPoint + Send + 'static>(
> -    remote: String,
> -    basepath: String,
> -    timeframe: RrdTimeframe,
> -    mode: RrdMode,
> -) -> Result<Vec<T>, Error> {
> -    const WAIT_FOR_NEWEST_METRIC_TIMEOUT: Duration = Duration::from_secs(5);
> -
> -    if timeframe == RrdTimeframe::Hour {
> -        // Let's wait for a limited time for the most recent metrics. If the connection to the remote
> -        // is super slow or if the metric collection tasks currently busy with collecting other
> -        // metrics, we just return the data we already have, not the newest one.
> -        let _ = tokio::time::timeout(WAIT_FOR_NEWEST_METRIC_TIMEOUT, async {
> -            metric_collection::trigger_metric_collection(Some(remote), true).await
> -        })
> -        .await;
> -    }
> -
> -    tokio::task::spawn_blocking(move || {
> -        rrd_common::create_datapoints_from_rrd(&basepath, timeframe, mode)
> -    })
> -    .await?
> +    rrd_common::get_rrd_datapoints(remote, base, timeframe, cf).await
>   }
>   
>   pub const QEMU_RRD_ROUTER: Router = Router::new().get(&API_METHOD_GET_QEMU_RRD_DATA);
> diff --git a/server/src/api/rrd_common.rs b/server/src/api/rrd_common.rs
> index 28868bc1..b5d1a786 100644
> --- a/server/src/api/rrd_common.rs
> +++ b/server/src/api/rrd_common.rs
> @@ -1,9 +1,10 @@
> -use std::collections::BTreeMap;
> +use std::{collections::BTreeMap, time::Duration};
>   
>   use anyhow::{bail, Error};
> +
>   use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
>   
> -use crate::metric_collection::rrd_cache;
> +use crate::metric_collection::{self, rrd_cache};
>   
>   /// Trait common to all RRD-stored metric objects (nodes, datastores, qemu, lxc, etc.)
>   pub trait DataPoint {
> @@ -53,3 +54,31 @@ pub fn create_datapoints_from_rrd<T: DataPoint>(
>   
>       Ok(timemap.into_values().collect())
>   }
> +
> +/// Get RRD datapoints for a given remote/RRD path.
> +///
> +/// If `timeframe` is set to [`RrdTimeframe::Hour`], then this function will trigger
> +/// metric collection for this remote and wait for its completion, up to a timeout of five
> +/// seconds. If the timeout is exceeded, we simply go ahead and return what is in the database at
> +/// the moment, which might have a gap for the last couple minutes.
> +pub async fn get_rrd_datapoints<T: DataPoint + Send + 'static>(
> +    remote: String,
> +    basepath: String,
> +    timeframe: RrdTimeframe,
> +    mode: RrdMode,
> +) -> Result<Vec<T>, Error> {
> +    const WAIT_FOR_NEWEST_METRIC_TIMEOUT: Duration = Duration::from_secs(5);
> +
> +    if timeframe == RrdTimeframe::Hour {
> +        // Let's wait for a limited time for the most recent metrics. If the connection to the remote
> +        // is super slow or if the metric collection tasks currently busy with collecting other
> +        // metrics, we just return the data we already have, not the newest one.
> +        let _ = tokio::time::timeout(WAIT_FOR_NEWEST_METRIC_TIMEOUT, async {
> +            metric_collection::trigger_metric_collection(Some(remote), true).await
> +        })
> +        .await;
> +    }
> +
> +    tokio::task::spawn_blocking(move || create_datapoints_from_rrd(&basepath, timeframe, mode))
> +        .await?
> +}





More information about the pdm-devel mailing list