[pdm-devel] [PATCH datacenter-manager 7/7] server: add some tracing instrumentation

Wolfgang Bumiller w.bumiller at proxmox.com
Tue Feb 4 10:55:54 CET 2025


For debugging the client usage.
To see messages, set PROXMOX_DEBUG=trace and use, for instance:

    # journalctl -f SPAN_NAME=remote_node_caching

Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
---
 .../tasks/remote_node_mapping.rs                   | 14 ++++++++------
 server/src/connection.rs                           | 10 ++++++++++
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/server/src/bin/proxmox-datacenter-api/tasks/remote_node_mapping.rs b/server/src/bin/proxmox-datacenter-api/tasks/remote_node_mapping.rs
index 5912365..f678d4c 100644
--- a/server/src/bin/proxmox-datacenter-api/tasks/remote_node_mapping.rs
+++ b/server/src/bin/proxmox-datacenter-api/tasks/remote_node_mapping.rs
@@ -68,6 +68,7 @@ impl CachingTask {
     }
 
     /// A single iteration of the caching task.
+    #[tracing::instrument(skip_all, name = "remote_node_caching")]
     async fn run_once(&mut self) {
         let (config, digest) = match pdm_config::remotes::config() {
             Ok(cd) => cd,
@@ -82,21 +83,19 @@ impl CachingTask {
             .as_ref()
             .is_none_or(|d| digest != *d)
         {
-            tracing::debug!("new config - updating remote node name cache");
+            log::trace!("new config - updating remote node name cache");
             self.last_config_digest = Some(digest);
 
             // the config got updated - abort the current name-fetching task, we'll
             // spawn a new one
             if let Some(name_task) = self.current_name_task.take() {
-                tracing::debug!("aborting query task");
+                log::trace!("aborting query task");
                 name_task.abort();
             }
 
             if let Err(err) = self.config_update(&config) {
                 log::error!("error updating remote node cache: {err:?}");
             }
-            //} else {
-            //    tracing::debug!("no change to the config");
         }
 
         if self
@@ -104,7 +103,7 @@ impl CachingTask {
             .as_ref()
             .is_none_or(|task| task.is_finished())
         {
-            log::debug!("name task finished, starting reachability query task");
+            log::trace!("name task finished, starting reachability query task");
             self.current_name_task =
                 Some(spawn_aborted_on_shutdown(Self::query_node_names(config)));
         }
@@ -168,8 +167,10 @@ impl CachingTask {
         }
     }
 
+    #[tracing::instrument(skip_all)]
     async fn query_node_names(config: SectionConfigData<Remote>) {
         for (_name, remote) in &config {
+            log::trace!("update remote {:?}", remote.id);
             if let Err(err) = Self::query_node_names_for_remote(remote).await {
                 log::error!("error updating node name cache - {err:?}");
             }
@@ -184,7 +185,7 @@ impl CachingTask {
 
         // now add new nodes
         for node in &remote.nodes {
-            tracing::debug!("querying remote {:?} node {:?}", remote.id, node.hostname);
+            log::debug!("querying remote {:?} node {:?}", remote.id, node.hostname);
 
             // if the host is new, we need to query its name
             let query_result = match query_node_name(remote, &node.hostname).await {
@@ -215,6 +216,7 @@ impl CachingTask {
 
 /// Calls `/cluster/status` directly on a specific node to find its name.
 async fn query_node_name(remote: &Remote, hostname: &str) -> Result<String, Error> {
+    log::trace!("querying node name {hostname:?} for remote {:?}", remote.id);
     let client = server::connection::make_pve_client_with_endpoint(remote, Some(hostname))?;
     let node_status_list = client.cluster_status().await?;
     for node in node_status_list {
diff --git a/server/src/connection.rs b/server/src/connection.rs
index 7ba38f1..397b62f 100644
--- a/server/src/connection.rs
+++ b/server/src/connection.rs
@@ -603,6 +603,7 @@ struct TryClient {
 
 impl TryClient {
     fn reachable(entry: &MultiClientEntry) -> Self {
+        log::trace!("trying reachable client for host {:?}", entry.hostname);
         Self {
             client: Arc::clone(&entry.client),
             hostname: entry.hostname.clone(),
@@ -611,6 +612,10 @@ impl TryClient {
     }
 
     fn unreachable(entry: &MultiClientEntry) -> Self {
+        log::trace!(
+            "trying previouslsy unreachable client for host {:?}",
+            entry.hostname
+        );
         Self {
             client: Arc::clone(&entry.client),
             hostname: entry.hostname.clone(),
@@ -637,6 +642,8 @@ impl MultiClient {
         let mut try_unreachable = None::<std::vec::IntoIter<_>>;
 
         std::iter::from_fn(move || {
+            let _enter = tracing::span!(tracing::Level::TRACE, "multi_client_iterator").entered();
+
             let mut state = state.lock().unwrap();
 
             if let Some(ref mut try_unreachable) = try_unreachable {
@@ -650,6 +657,7 @@ impl MultiClient {
                     // first attempt, just use the current client and remember the starting index
                     let (client, index) = state.get();
                     start_current = Some((index, index));
+                    log::trace!("trying reachable client {index}");
                     Some(TryClient::reachable(client))
                 }
                 Some((start, current)) => {
@@ -674,9 +682,11 @@ impl MultiClient {
                     // remember all the clients we skipped:
                     let mut at = current + 1;
                     while at != new_current {
+                        log::trace!("(remembering unreachable client {at})");
                         unreachable_clients.push(at);
                         at = at.wrapping_add(1);
                     }
+                    log::trace!("trying reachable client {new_current}");
                     Some(TryClient::reachable(client))
                 }
             }
-- 
2.39.5





More information about the pdm-devel mailing list