[pdm-devel] [RFC proxmox-datacenter-manager 1/2] server: add convenience helper to fetch results from many remote nodes in parallel

Lukas Wagner l.wagner at proxmox.com
Thu Aug 28 16:42:46 CEST 2025


Implementing this over and over again is a lot of work; many parts of
PDM need to fetch some kind of resource from all remotes and all nodes
of these remotes.

The general approach is the same as for remote task fetching (JoinSet
and semaphores to control concurrency), it will be refactored in a later
commit to use this new helper.

Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
 server/src/lib.rs              |   1 +
 server/src/parallel_fetcher.rs | 236 +++++++++++++++++++++++++++++++++
 2 files changed, 237 insertions(+)
 create mode 100644 server/src/parallel_fetcher.rs

diff --git a/server/src/lib.rs b/server/src/lib.rs
index 485bc792..3f8b7708 100644
--- a/server/src/lib.rs
+++ b/server/src/lib.rs
@@ -6,6 +6,7 @@ pub mod auth;
 pub mod context;
 pub mod env;
 pub mod metric_collection;
+pub mod parallel_fetcher;
 pub mod remote_cache;
 pub mod remote_tasks;
 pub mod resource_cache;
diff --git a/server/src/parallel_fetcher.rs b/server/src/parallel_fetcher.rs
new file mode 100644
index 00000000..3fad68a0
--- /dev/null
+++ b/server/src/parallel_fetcher.rs
@@ -0,0 +1,236 @@
+use std::{
+    collections::HashMap,
+    fmt::Debug,
+    future::Future,
+    sync::Arc,
+    time::{Duration, Instant},
+};
+
+use anyhow::Error;
+use pdm_api_types::remotes::{Remote, RemoteType};
+use pve_api_types::ClusterNodeIndexResponse;
+use tokio::{
+    sync::{OwnedSemaphorePermit, Semaphore},
+    task::JoinSet,
+};
+
+use crate::connection;
+
+pub struct ParallelFetcher<C> {
+    pub max_connections: usize,
+    pub max_connections_per_remote: usize,
+    pub context: C,
+}
+
+pub struct FetchResults<T> {
+    pub remote_results: HashMap<String, Result<RemoteResult<T>, Error>>,
+}
+
+impl<T> Default for FetchResults<T> {
+    fn default() -> Self {
+        Self {
+            remote_results: Default::default(),
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct RemoteResult<T> {
+    pub node_results: HashMap<String, Result<NodeResults<T>, Error>>,
+}
+
+impl<T> Default for RemoteResult<T> {
+    fn default() -> Self {
+        Self {
+            node_results: Default::default(),
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct NodeResults<T> {
+    pub data: T,
+    pub api_response_time: Duration,
+}
+
+impl<C: Clone + Send + 'static> ParallelFetcher<C> {
+    pub fn new(context: C) -> Self {
+        Self {
+            max_connections: 20,
+            max_connections_per_remote: 5,
+            context,
+        }
+    }
+
+    pub async fn do_for_all_remote_nodes<A, F, T, Ft>(self, remotes: A, func: F) -> FetchResults<T>
+    where
+        A: Iterator<Item = Remote>,
+        F: Fn(C, Remote, String) -> Ft + Clone + Send + 'static,
+        Ft: Future<Output = Result<T, Error>> + Send + 'static,
+        T: Send + Debug + 'static,
+    {
+        let total_connections_semaphore = Arc::new(Semaphore::new(self.max_connections));
+
+        let mut remote_join_set = JoinSet::new();
+
+        for remote in remotes {
+            let semaphore = Arc::clone(&total_connections_semaphore);
+
+            let f = func.clone();
+
+            remote_join_set.spawn(Self::fetch_remote(
+                remote,
+                self.context.clone(),
+                semaphore,
+                f,
+                self.max_connections_per_remote,
+            ));
+        }
+
+        let mut results = FetchResults::default();
+
+        while let Some(a) = remote_join_set.join_next().await {
+            match a {
+                Ok((remote_name, remote_result)) => {
+                    results.remote_results.insert(remote_name, remote_result);
+                }
+                Err(err) => {
+                    log::error!("join error when waiting for future: {err}")
+                }
+            }
+        }
+
+        results
+    }
+
+    async fn fetch_remote<F, Ft, T>(
+        remote: Remote,
+        context: C,
+        semaphore: Arc<Semaphore>,
+        func: F,
+        max_connections_per_remote: usize,
+    ) -> (String, Result<RemoteResult<T>, Error>)
+    where
+        F: Fn(C, Remote, String) -> Ft + Clone + Send + 'static,
+        Ft: Future<Output = Result<T, Error>> + Send + 'static,
+        T: Send + Debug + 'static,
+    {
+        let mut per_remote_results = RemoteResult::default();
+
+        let mut permit = Some(Arc::clone(&semaphore).acquire_owned().await.unwrap());
+        let per_remote_semaphore = Arc::new(Semaphore::new(max_connections_per_remote));
+
+        match remote.ty {
+            RemoteType::Pve => {
+                let remote_clone = remote.clone();
+
+                let nodes = match async move {
+                    let client = connection::make_pve_client(&remote_clone)?;
+                    let nodes = client.list_nodes().await?;
+
+                    Ok::<Vec<ClusterNodeIndexResponse>, Error>(nodes)
+                }
+                .await
+                {
+                    Ok(nodes) => nodes,
+                    Err(err) => return (remote.id.clone(), Err(err)),
+                };
+
+                let mut nodes_join_set = JoinSet::new();
+
+                for node in nodes {
+                    let permit = if let Some(permit) = permit.take() {
+                        permit
+                    } else {
+                        Arc::clone(&semaphore).acquire_owned().await.unwrap()
+                    };
+
+                    let per_remote_connections_permit = Arc::clone(&per_remote_semaphore)
+                        .acquire_owned()
+                        .await
+                        .unwrap();
+
+                    let func_clone = func.clone();
+                    let remote_clone = remote.clone();
+                    let node_name = node.node.clone();
+                    let context_clone = context.clone();
+
+                    nodes_join_set.spawn(Self::fetch_pve_node(
+                        func_clone,
+                        context_clone,
+                        remote_clone,
+                        node_name,
+                        permit,
+                        per_remote_connections_permit,
+                    ));
+                }
+
+                while let Some(join_result) = nodes_join_set.join_next().await {
+                    match join_result {
+                        Ok((node_name, per_node_result)) => {
+                            per_remote_results
+                                .node_results
+                                .insert(node_name, per_node_result);
+                        }
+                        Err(e) => {
+                            log::error!("join error when waiting for future: {e}")
+                        }
+                    }
+                }
+            }
+            RemoteType::Pbs => {
+                let node = "localhost".to_string();
+
+                let now = Instant::now();
+                let result = func(context, remote.clone(), node.clone()).await;
+                let api_response_time = now.elapsed();
+
+                match result {
+                    Ok(data) => {
+                        per_remote_results.node_results.insert(
+                            node,
+                            Ok(NodeResults {
+                                data,
+                                api_response_time,
+                            }),
+                        );
+                    }
+                    Err(err) => {
+                        per_remote_results.node_results.insert(node, Err(err));
+                    }
+                }
+            }
+        }
+
+        (remote.id, Ok(per_remote_results))
+    }
+
+    async fn fetch_pve_node<F, Ft, T>(
+        func: F,
+        context: C,
+        remote: Remote,
+        node: String,
+        _permit: OwnedSemaphorePermit,
+        _per_remote_connections_permit: OwnedSemaphorePermit,
+    ) -> (String, Result<NodeResults<T>, Error>)
+    where
+        F: Fn(C, Remote, String) -> Ft + Clone + Send + 'static,
+        Ft: Future<Output = Result<T, Error>> + Send + 'static,
+        T: Send + Debug + 'static,
+    {
+        let now = Instant::now();
+        let result = func(context, remote.clone(), node.clone()).await;
+        let api_response_time = now.elapsed();
+
+        match result {
+            Ok(data) => (
+                node,
+                Ok(NodeResults {
+                    data,
+                    api_response_time,
+                }),
+            ),
+            Err(err) => (node, Err(err)),
+        }
+    }
+}
-- 
2.47.2





More information about the pdm-devel mailing list