[pdm-devel] [PATCH proxmox-datacenter-manager 17/25] metric collection: save time needed for collection run to RRD
Lukas Wagner
l.wagner at proxmox.com
Tue Feb 11 13:05:33 CET 2025
For large setups, it might be useful to know how much time was needed to
collect metrics for *all* remotes together, e.g. for making sure that
the collection interval is not exceeded.
Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
.../src/metric_collection/collection_task.rs | 15 ++++++
server/src/metric_collection/rrd_task.rs | 50 +++++++++++++++----
2 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/server/src/metric_collection/collection_task.rs b/server/src/metric_collection/collection_task.rs
index 27a9987..501df9b 100644
--- a/server/src/metric_collection/collection_task.rs
+++ b/server/src/metric_collection/collection_task.rs
@@ -22,6 +22,7 @@ use pdm_api_types::{
};
use pdm_config::metric_collection::COLLECTION_SETTINGS_TYPE;
+use crate::metric_collection::rrd_task::CollectionStats;
use crate::{connection, task_utils};
use super::{
@@ -97,6 +98,20 @@ impl MetricCollectionTask {
if let Some(remotes) = Self::load_remote_config() {
let to_fetch = remotes.order.as_slice();
self.fetch_remotes(&remotes, to_fetch).await;
+
+ let now = Instant::now();
+ self.fetch_remotes(&remotes, &remotes.order).await;
+ let elapsed = now.elapsed();
+
+ if let Err(err) = self.metric_data_tx.send(
+ RrdStoreRequest::CollectionStats {
+ timestamp: proxmox_time::epoch_i64(),
+ stats: CollectionStats {
+ total_time: elapsed.as_secs_f64() * 1000.
+ }
+ }).await {
+ log::error!("could not send collection stats to rrd task: {err}");
+ }
}
}
diff --git a/server/src/metric_collection/rrd_task.rs b/server/src/metric_collection/rrd_task.rs
index 7f20d2d..6a4bacf 100644
--- a/server/src/metric_collection/rrd_task.rs
+++ b/server/src/metric_collection/rrd_task.rs
@@ -38,6 +38,13 @@ pub(super) enum RrdStoreRequest {
/// Timestamp at which the request was done.
request_at: i64,
},
+ /// Store collection stats.
+ CollectionStats {
+ /// Timestamp at which the collection took place.
+ timestamp: i64,
+ /// Statistics.
+ stats: CollectionStats,
+ },
}
/// Result for a [`RrdStoreRequest`].
@@ -46,6 +53,12 @@ pub(super) struct RrdStoreResult {
pub(super) most_recent_timestamp: i64,
}
+/// Statistics for a (full) metric collection run.
+pub(super) struct CollectionStats {
+ /// Total time in ms
+ pub(super) total_time: f64,
+}
+
/// Task which stores received metrics in the RRD. Metric data is fed into
/// this task via a MPSC channel.
pub(super) async fn store_in_rrd_task(
@@ -57,7 +70,8 @@ pub(super) async fn store_in_rrd_task(
//// Involves some blocking file IO
tokio::task::spawn_blocking(move || {
let mut most_recent_timestamp = 0;
- let channel = match msg {
+
+ match msg {
RrdStoreRequest::Pve {
remote,
metrics,
@@ -71,7 +85,13 @@ pub(super) async fn store_in_rrd_task(
}
store_response_time(&cache_clone, &remote, response_time, request_at);
- channel
+ let result = RrdStoreResult {
+ most_recent_timestamp,
+ };
+
+ if let Err(_err) = channel.send(result) {
+ log::error!("could not send RrdStoreStoreResult to metric collection task");
+ };
}
RrdStoreRequest::Pbs {
remote,
@@ -86,14 +106,17 @@ pub(super) async fn store_in_rrd_task(
}
store_response_time(&cache_clone, &remote, response_time, request_at);
- channel
- }
- };
+ let result = RrdStoreResult {
+ most_recent_timestamp,
+ };
- if let Err(_err) = channel.send(RrdStoreResult {
- most_recent_timestamp,
- }) {
- log::error!("could not send RrdStoreStoreResult to metric collection task");
+ if let Err(_err) = channel.send(result) {
+ log::error!("could not send RrdStoreStoreResult to metric collection task");
+ };
+ }
+ RrdStoreRequest::CollectionStats { timestamp, stats } => {
+ store_stats(&cache_clone, &stats, timestamp)
+ }
};
})
.await?;
@@ -150,6 +173,15 @@ fn store_response_time(cache: &RrdCache, remote_name: &str, response_time: f64,
cache.update_value(&name, response_time, timestamp, DataSourceType::Gauge);
}
+fn store_stats(cache: &RrdCache, stats: &CollectionStats, timestamp: i64) {
+ cache.update_value(
+ "local/metric-collection/total-time",
+ stats.total_time,
+ timestamp,
+ DataSourceType::Gauge,
+ );
+}
+
#[cfg(test)]
mod tests {
use proxmox_rrd_api_types::{RrdMode, RrdTimeframe};
--
2.39.5
More information about the pdm-devel
mailing list