[pdm-devel] [PATCH proxmox-datacenter-manager 16/25] metric collection: record remote response time in metric database

Lukas Wagner l.wagner at proxmox.com
Tue Feb 11 13:05:32 CET 2025


This gives us the ability to retrieve max/avg response times for a given
time window.

Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
 .../src/metric_collection/collection_task.rs  | 38 +++++++++++--------
 server/src/metric_collection/rrd_task.rs      | 31 +++++++++++++++
 2 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/server/src/metric_collection/collection_task.rs b/server/src/metric_collection/collection_task.rs
index f349056..27a9987 100644
--- a/server/src/metric_collection/collection_task.rs
+++ b/server/src/metric_collection/collection_task.rs
@@ -363,6 +363,7 @@ impl MetricCollectionTask {
         let (result_tx, result_rx) = oneshot::channel();
 
         let now = proxmox_time::epoch_i64();
+        let start = Instant::now();
 
         let res: Result<RrdStoreResult, Error> = async {
             match remote.ty {
@@ -376,11 +377,15 @@ impl MetricCollectionTask {
                         )
                         .await?;
 
+                    let duration = start.elapsed();
+
                     sender
                         .send(RrdStoreRequest::Pve {
                             remote: remote.id.clone(),
                             metrics,
                             channel: result_tx,
+                            response_time: duration.as_secs_f64() * 1000.,
+                            request_at: now,
                         })
                         .await?;
                 }
@@ -390,15 +395,19 @@ impl MetricCollectionTask {
                         .metrics(Some(true), Some(status.most_recent_datapoint))
                         .await?;
 
+                    let duration = start.elapsed();
+
                     sender
                         .send(RrdStoreRequest::Pbs {
                             remote: remote.id.clone(),
                             metrics,
                             channel: result_tx,
+                            response_time: duration.as_secs_f64() * 1000.,
+                            request_at: now,
                         })
                         .await?;
                 }
-            }
+            };
 
             result_rx.await.map_err(Error::from)
         }
@@ -582,20 +591,19 @@ pub(super) mod tests {
         while let Some(request) = rx.recv().await {
             number_of_requests += 1;
 
-            match a {
-                RrdStoreRequest::Pve {
-                    remote: _,
-                    metrics,
-                    channel,
-                } => {
-                    let most_recent_timestamp =
-                        metrics.data.iter().fold(0, |acc, e| acc.max(e.timestamp));
-
-                    let _ = channel.send(RrdStoreResult {
-                        most_recent_timestamp,
-                    });
-                }
-                _ => {}
+            if let RrdStoreRequest::Pve {
+                remote: _,
+                metrics,
+                channel,
+                ..
+            } = request
+            {
+                let most_recent_timestamp =
+                    metrics.data.iter().fold(0, |acc, e| acc.max(e.timestamp));
+
+                let _ = channel.send(RrdStoreResult {
+                    most_recent_timestamp,
+                });
             }
         }
 
diff --git a/server/src/metric_collection/rrd_task.rs b/server/src/metric_collection/rrd_task.rs
index 89fe4d3..7f20d2d 100644
--- a/server/src/metric_collection/rrd_task.rs
+++ b/server/src/metric_collection/rrd_task.rs
@@ -20,6 +20,10 @@ pub(super) enum RrdStoreRequest {
         metrics: ClusterMetrics,
         /// Oneshot channel to return the [`RrdStoreResult`].
         channel: oneshot::Sender<RrdStoreResult>,
+        /// Reponse time in ms for the API request.
+        response_time: f64,
+        /// Timestamp at which the request was done.
+        request_at: i64,
     },
     /// Store PBS metrics.
     Pbs {
@@ -29,6 +33,10 @@ pub(super) enum RrdStoreRequest {
         metrics: Metrics,
         /// Oneshot channel to return the [`RrdStoreResult`].
         channel: oneshot::Sender<RrdStoreResult>,
+        /// Reponse time in ms for the API request.
+        response_time: f64,
+        /// Timestamp at which the request was done.
+        request_at: i64,
     },
 }
 
@@ -54,11 +62,14 @@ pub(super) async fn store_in_rrd_task(
                     remote,
                     metrics,
                     channel,
+                    response_time,
+                    request_at,
                 } => {
                     for data_point in metrics.data {
                         most_recent_timestamp = most_recent_timestamp.max(data_point.timestamp);
                         store_metric_pve(&cache_clone, &remote, &data_point);
                     }
+                    store_response_time(&cache_clone, &remote, response_time, request_at);
 
                     channel
                 }
@@ -66,11 +77,14 @@ pub(super) async fn store_in_rrd_task(
                     remote,
                     metrics,
                     channel,
+                    response_time,
+                    request_at,
                 } => {
                     for data_point in metrics.data {
                         most_recent_timestamp = most_recent_timestamp.max(data_point.timestamp);
                         store_metric_pbs(&cache_clone, &remote, &data_point);
                     }
+                    store_response_time(&cache_clone, &remote, response_time, request_at);
 
                     channel
                 }
@@ -130,6 +144,12 @@ fn store_metric_pbs(cache: &RrdCache, remote_name: &str, data_point: &MetricData
     );
 }
 
+fn store_response_time(cache: &RrdCache, remote_name: &str, response_time: f64, timestamp: i64) {
+    let name = format!("local/metric-collection/remotes/{remote_name}/response-time");
+
+    cache.update_value(&name, response_time, timestamp, DataSourceType::Gauge);
+}
+
 #[cfg(test)]
 mod tests {
     use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
@@ -192,6 +212,8 @@ mod tests {
             remote: "some-remote".into(),
             metrics,
             channel: tx_back,
+            response_time: 10.0,
+            request_at: now,
         };
 
         // Act
@@ -218,6 +240,15 @@ mod tests {
             assert!(data.data.iter().any(Option::is_some));
         }
 
+        if let Some(data) = cache.extract_data(
+            "local/metric-collection/remotes/some-remote",
+            "response-time",
+            RrdTimeframe::Hour,
+            RrdMode::Max,
+        )? {
+            assert!(data.data.iter().any(Option::is_some));
+        }
+
         Ok(())
     }
 }
-- 
2.39.5





More information about the pdm-devel mailing list