[pdm-devel] [PATCH datacenter-manager 4/7] server: cache pve node reachability and names

Wolfgang Bumiller w.bumiller at proxmox.com
Tue Feb 4 10:55:51 CET 2025


Add a `remote_cache` module to access the cache which maps
(remote,host) to a struct holding the node name and a "reachable"
boolean. This can e used to connect to specific nodes or skip nodes
known to be currently unreachable (although the latter part is not
currently implemented).

The ClientFactory trait gains helpers to connect to specific nodes.

The unprivileged API gets a `tasks` submodule with a task which
fills the node cache and watches the remote config for updates (via
the config version cache).

Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
---
 lib/pdm-config/src/config_version_cache.rs    |  33 +-
 .../main.rs}                                  |   3 +
 .../bin/proxmox-datacenter-api/tasks/mod.rs   |   1 +
 .../tasks/remote_node_mapping.rs              | 226 ++++++++++++++
 server/src/connection.rs                      |  32 ++
 server/src/lib.rs                             |   1 +
 server/src/remote_cache/mod.rs                | 285 ++++++++++++++++++
 7 files changed, 580 insertions(+), 1 deletion(-)
 rename server/src/bin/{proxmox-datacenter-api.rs => proxmox-datacenter-api/main.rs} (99%)
 create mode 100644 server/src/bin/proxmox-datacenter-api/tasks/mod.rs
 create mode 100644 server/src/bin/proxmox-datacenter-api/tasks/remote_node_mapping.rs
 create mode 100644 server/src/remote_cache/mod.rs

diff --git a/lib/pdm-config/src/config_version_cache.rs b/lib/pdm-config/src/config_version_cache.rs
index b4a47a8..5c4b61a 100644
--- a/lib/pdm-config/src/config_version_cache.rs
+++ b/lib/pdm-config/src/config_version_cache.rs
@@ -25,6 +25,8 @@ struct ConfigVersionCacheDataInner {
     user_cache_generation: AtomicUsize,
     // Traffic control (traffic-control.cfg) generation/version.
     traffic_control_generation: AtomicUsize,
+    // Tracks updates to the remote/hostname/nodename mapping cache.
+    remote_mapping_cache: AtomicUsize,
     // Add further atomics here
 }
 
@@ -81,11 +83,23 @@ pub struct ConfigVersionCache {
 static INSTANCE: OnceCell<Arc<ConfigVersionCache>> = OnceCell::new();
 
 impl ConfigVersionCache {
-    /// Open the memory based communication channel singleton.
+    /// Open the memory backed version cache.
     pub fn new() -> Result<Arc<Self>, Error> {
         INSTANCE.get_or_try_init(Self::open).cloned()
     }
 
+    /// Convenience method to call [`ConfigVersionCache::new`] while turning the error into a log
+    /// message.
+    pub fn new_log_error() -> Option<Arc<Self>> {
+        match Self::new() {
+            Ok(this) => Some(this),
+            Err(err) => {
+                log::error!("failed to open config version cache - {err:?}");
+                None
+            }
+        }
+    }
+
     // Actual work of `new`:
     fn open() -> Result<Arc<Self>, Error> {
         let user = crate::api_user()?;
@@ -141,4 +155,21 @@ impl ConfigVersionCache {
             .traffic_control_generation
             .fetch_add(1, Ordering::AcqRel);
     }
+
+    /// Return the current remote mapping cache generation.
+    pub fn remote_mapping_cache(&self) -> usize {
+        self.shmem
+            .data()
+            .remote_mapping_cache
+            .load(Ordering::Relaxed)
+    }
+
+    /// Increase the remote mapping cache number.
+    pub fn increase_remote_mapping_cache(&self) -> usize {
+        self.shmem
+            .data()
+            .remote_mapping_cache
+            .fetch_add(1, Ordering::Relaxed)
+            + 1
+    }
 }
diff --git a/server/src/bin/proxmox-datacenter-api.rs b/server/src/bin/proxmox-datacenter-api/main.rs
similarity index 99%
rename from server/src/bin/proxmox-datacenter-api.rs
rename to server/src/bin/proxmox-datacenter-api/main.rs
index a79094d..25852c8 100644
--- a/server/src/bin/proxmox-datacenter-api.rs
+++ b/server/src/bin/proxmox-datacenter-api/main.rs
@@ -31,6 +31,8 @@ use server::metric_collection;
 use server::resource_cache;
 use server::task_utils;
 
+mod tasks;
+
 pub const PROXMOX_BACKUP_TCP_KEEPALIVE_TIME: u32 = 5 * 60;
 
 const PDM_LISTEN_ADDR: SocketAddr = SocketAddr::new(
@@ -287,6 +289,7 @@ async fn run(debug: bool) -> Result<(), Error> {
 
     start_task_scheduler();
     metric_collection::start_task();
+    tasks::remote_node_mapping::start_task();
     resource_cache::start_task();
 
     server.await?;
diff --git a/server/src/bin/proxmox-datacenter-api/tasks/mod.rs b/server/src/bin/proxmox-datacenter-api/tasks/mod.rs
new file mode 100644
index 0000000..e6ead88
--- /dev/null
+++ b/server/src/bin/proxmox-datacenter-api/tasks/mod.rs
@@ -0,0 +1 @@
+pub mod remote_node_mapping;
diff --git a/server/src/bin/proxmox-datacenter-api/tasks/remote_node_mapping.rs b/server/src/bin/proxmox-datacenter-api/tasks/remote_node_mapping.rs
new file mode 100644
index 0000000..5912365
--- /dev/null
+++ b/server/src/bin/proxmox-datacenter-api/tasks/remote_node_mapping.rs
@@ -0,0 +1,226 @@
+//! PVE remotes have multiple nodes which have names we cannot necessarily infer from the
+//! "hostname" field, since that might be a different address, potentially with a port.
+//!
+//! We also do not want users to have to maintain the PDM host/node-name combinations (in case they
+//! rename or reinstall nodes). Renaming would break the PDM config, reinstalling would break eg. a
+//! "machine-id" based mapping.
+//!
+//! We also cannot rely in the TLS fingerprints, because a whole cluster could potentially use a
+//! single wildcard certificate.
+//!
+//! Instead, we maintain a cached mapping of `address ↔ name` in `/var`, which gets polled
+//! regularly.
+//! For PVE we can query an address' `/cluster/status` and look for an entry marked as `local:1`.
+//! Later this might be changed to looking for the node name in the result of
+//! `/nodes/localhost/status` - once this is implemented and rolled out long enough in PVE.
+
+use std::future::Future;
+use std::pin::pin;
+
+use anyhow::{bail, Error};
+use tokio::task::JoinHandle;
+
+use proxmox_config_digest::ConfigDigest;
+use proxmox_section_config::typed::SectionConfigData;
+
+use pdm_api_types::remotes::{Remote, RemoteType};
+
+use server::remote_cache::{self, RemoteMappingCache};
+use server::task_utils;
+
+const CONFIG_POLL_INTERVAL: u64 = 60;
+
+fn spawn_aborted_on_shutdown<F>(future: F) -> JoinHandle<()>
+where
+    F: Future + Send + 'static,
+{
+    tokio::spawn(async move {
+        // TODO: The wrapping in a select(shutdown_future, fut) should probably be a helper in
+        // `proxmox_daemon`.
+        let future = pin!(future);
+        let abort_future = pin!(proxmox_daemon::shutdown_future());
+        futures::future::select(future, abort_future).await;
+    })
+}
+
+pub fn start_task() {
+    spawn_aborted_on_shutdown(CachingTask::default().run());
+}
+
+async fn poll_interval() {
+    let delay_target = task_utils::next_aligned_instant(CONFIG_POLL_INTERVAL);
+    tokio::time::sleep_until(tokio::time::Instant::from_std(delay_target)).await;
+}
+
+#[derive(Default)]
+struct CachingTask {
+    current_name_task: Option<JoinHandle<()>>,
+    last_config_digest: Option<ConfigDigest>,
+}
+
+impl CachingTask {
+    /// The caching task's main entry point.
+    async fn run(mut self) {
+        loop {
+            self.run_once().await;
+            poll_interval().await;
+        }
+    }
+
+    /// A single iteration of the caching task.
+    async fn run_once(&mut self) {
+        let (config, digest) = match pdm_config::remotes::config() {
+            Ok(cd) => cd,
+            Err(err) => {
+                log::error!("failed to load remote config, not updating cache - {err:?}");
+                return;
+            }
+        };
+
+        if self
+            .last_config_digest
+            .as_ref()
+            .is_none_or(|d| digest != *d)
+        {
+            tracing::debug!("new config - updating remote node name cache");
+            self.last_config_digest = Some(digest);
+
+            // the config got updated - abort the current name-fetching task, we'll
+            // spawn a new one
+            if let Some(name_task) = self.current_name_task.take() {
+                tracing::debug!("aborting query task");
+                name_task.abort();
+            }
+
+            if let Err(err) = self.config_update(&config) {
+                log::error!("error updating remote node cache: {err:?}");
+            }
+            //} else {
+            //    tracing::debug!("no change to the config");
+        }
+
+        if self
+            .current_name_task
+            .as_ref()
+            .is_none_or(|task| task.is_finished())
+        {
+            log::debug!("name task finished, starting reachability query task");
+            self.current_name_task =
+                Some(spawn_aborted_on_shutdown(Self::query_node_names(config)));
+        }
+    }
+
+    /// If the `remotes.cfg` was updated we need to go over all the remotes and see if there is any
+    /// information we need to update.
+    fn config_update(&mut self, config: &SectionConfigData<Remote>) -> Result<(), Error> {
+        let mut cache = RemoteMappingCache::write()?;
+
+        // prune remotes which were removed:
+        cache
+            .remotes
+            .retain(|name, _entry| config.contains_key(name));
+
+        // now update the existing remotes:
+        for (name, remote) in config {
+            self.prune_remote_nodes(&mut cache, name, remote);
+        }
+
+        cache.save()?;
+
+        Ok(())
+    }
+
+    fn prune_remote_nodes(&mut self, cache: &mut RemoteMappingCache, name: &str, remote: &Remote) {
+        let entry = cache
+            .remotes
+            .entry(name.to_string())
+            .or_insert_with(|| remote_cache::RemoteMapping::new(remote.ty));
+
+        // if the entry changed type, clear it
+        if entry.ty != remote.ty {
+            *entry = remote_cache::RemoteMapping::new(remote.ty);
+        }
+
+        // Only PVE entries currently have a node cache, so skip non-PVE remotes:
+        if remote.ty != RemoteType::Pve {
+            return;
+        }
+
+        // prune nodes which were removed:
+        entry.hosts.retain(|hostname, info| {
+            let retain = remote.nodes.iter().any(|node| node.hostname == *hostname);
+            if !retain {
+                if let Some(node_name) = info.node_name() {
+                    entry.node_to_host.remove(node_name);
+                }
+            }
+            retain
+        });
+
+        // make sure currently known hostnames exist in the cache at least empty:
+        for node in &remote.nodes {
+            if !entry.hosts.contains_key(&node.hostname) {
+                entry.hosts.insert(
+                    node.hostname.clone(),
+                    remote_cache::HostInfo::new(node.hostname.clone()),
+                );
+            }
+        }
+    }
+
+    async fn query_node_names(config: SectionConfigData<Remote>) {
+        for (_name, remote) in &config {
+            if let Err(err) = Self::query_node_names_for_remote(remote).await {
+                log::error!("error updating node name cache - {err:?}");
+            }
+        }
+    }
+
+    async fn query_node_names_for_remote(remote: &Remote) -> Result<(), Error> {
+        // Only PVE entries currently have a node cache, so skip non-PVE remotes:
+        if remote.ty != RemoteType::Pve {
+            return Ok(());
+        }
+
+        // now add new nodes
+        for node in &remote.nodes {
+            tracing::debug!("querying remote {:?} node {:?}", remote.id, node.hostname);
+
+            // if the host is new, we need to query its name
+            let query_result = match query_node_name(remote, &node.hostname).await {
+                Ok(node_name) => Some(node_name),
+                Err(err) => {
+                    log::error!(
+                        "failed to query info for remote '{}' node '{}' - {err:?}",
+                        remote.id,
+                        node.hostname
+                    );
+                    None
+                }
+            };
+
+            let mut cache = RemoteMappingCache::write()?;
+            if let Some(info) = cache.info_by_hostname_mut(&remote.id, &node.hostname) {
+                info.reachable = query_result.is_some();
+            }
+            if let Some(node_name) = query_result {
+                cache.set_node_name(&remote.id, &node.hostname, Some(node_name));
+            }
+            cache.save()?;
+        }
+
+        Ok(())
+    }
+}
+
+/// Calls `/cluster/status` directly on a specific node to find its name.
+async fn query_node_name(remote: &Remote, hostname: &str) -> Result<String, Error> {
+    let client = server::connection::make_pve_client_with_endpoint(remote, Some(hostname))?;
+    let node_status_list = client.cluster_status().await?;
+    for node in node_status_list {
+        if node.local == Some(true) {
+            return Ok(node.name);
+        }
+    }
+    bail!("failed to connect to node {hostname}");
+}
diff --git a/server/src/connection.rs b/server/src/connection.rs
index aeea089..24e2e44 100644
--- a/server/src/connection.rs
+++ b/server/src/connection.rs
@@ -214,6 +214,19 @@ pub trait ClientFactory {
         target_endpoint: Option<&str>,
     ) -> Result<Arc<PveClient>, Error>;
 
+    /// Create a new API client for PVE remotes, but with a specific endpoint.
+    ///
+    /// The default implementation ignores the `node` parameter and forwards to
+    /// `make_pve_client()`.
+    fn make_pve_client_with_node(
+        &self,
+        remote: &Remote,
+        node: &str,
+    ) -> Result<Arc<PveClient>, Error> {
+        let _ = node;
+        self.make_pve_client(remote)
+    }
+
     /// Create a new API client for PVE remotes.
     ///
     /// In case the remote has a user configured (instead of an API token), it will connect and get
@@ -343,6 +356,20 @@ impl ClientFactory for DefaultClientFactory {
         Ok(Arc::new(PveClientImpl(client)))
     }
 
+    fn make_pve_client_with_node(
+        &self,
+        remote: &Remote,
+        node: &str,
+    ) -> Result<Arc<PveClient>, Error> {
+        let cache = crate::remote_cache::RemoteMappingCache::get();
+        match cache.info_by_node_name(&remote.id, node) {
+            Some(info) if info.reachable => {
+                self.make_pve_client_with_endpoint(remote, Some(&info.hostname))
+            }
+            _ => self.make_pve_client(remote),
+        }
+    }
+
     async fn make_pve_client_and_login(&self, remote: &Remote) -> Result<Arc<PveClient>, Error> {
         let client = connect_or_login(remote, None).await?;
         Ok(Arc::new(PveClientImpl(client)))
@@ -377,6 +404,11 @@ pub fn make_pve_client_with_endpoint(
     instance().make_pve_client_with_endpoint(remote, target_endpoint)
 }
 
+/// Create a new API client for PVE remotes and try to make it connect to a specific *node*.
+pub fn make_pve_client_with_node(remote: &Remote, node: &str) -> Result<Arc<PveClient>, Error> {
+    instance().make_pve_client_with_node(remote, node)
+}
+
 /// Create a new API client for PBS remotes
 pub fn make_pbs_client(remote: &Remote) -> Result<Box<PbsClient>, Error> {
     instance().make_pbs_client(remote)
diff --git a/server/src/lib.rs b/server/src/lib.rs
index 12dc912..dcb24f0 100644
--- a/server/src/lib.rs
+++ b/server/src/lib.rs
@@ -6,6 +6,7 @@ pub mod auth;
 pub mod context;
 pub mod env;
 pub mod metric_collection;
+pub mod remote_cache;
 pub mod resource_cache;
 pub mod task_cache;
 pub mod task_utils;
diff --git a/server/src/remote_cache/mod.rs b/server/src/remote_cache/mod.rs
new file mode 100644
index 0000000..69e79f1
--- /dev/null
+++ b/server/src/remote_cache/mod.rs
@@ -0,0 +1,285 @@
+//! This currently only matters for PVE remotes.
+//!
+//! PVE remotes have multiple nodes which have names we cannot necessarily infer from the
+//! "hostname" field, since that might be a different address, potentially with a port.
+//!
+//! We also do not want users to have to maintain the PDM host/node-name combinations (in case they
+//! rename or reinstall nodes). Renaming would break the PDM config, reinstalling would break eg. a
+//! "machine-id" based mapping.
+//!
+//! We also cannot rely in the TLS fingerprints, because a whole cluster could potentially use a
+//! single wildcard certificate.
+//!
+//! Instead, we maintain a cached mapping of `address ↔ name` in `/var`, which gets polled
+//! regularly.
+//! For PVE we can query an address' `/cluster/status` and look for an entry marked as `local:1`.
+//! Later this might be changed to looking for the node name in the result of
+//! `/nodes/localhost/status` - once this is implemented and rolled out long enough in PVE.
+
+use std::collections::HashMap;
+use std::sync::{Arc, Mutex};
+
+use anyhow::{Context as _, Error};
+use serde::{Deserialize, Serialize};
+
+use proxmox_product_config::replace_config;
+use proxmox_product_config::{open_api_lockfile, ApiLockGuard};
+
+use pdm_api_types::remotes::RemoteType;
+use pdm_config::ConfigVersionCache;
+
+const CACHE_FILENAME: &str = concat!(
+    pdm_buildcfg::PDM_CACHE_DIR_M!(),
+    "/remote-mapping-cache.json"
+);
+
+const LOCK_FILE: &str = concat!(
+    pdm_buildcfg::PDM_CACHE_DIR_M!(),
+    "/.remote-mapping-cache.json.lock"
+);
+
+static CURRENT_CACHE: Mutex<Option<CacheState>> = Mutex::new(None);
+
+#[derive(Clone)]
+struct CacheState {
+    cache: Arc<RemoteMappingCache>,
+    generation: usize,
+}
+
+impl CacheState {
+    fn get() -> Self {
+        let mut cache = CURRENT_CACHE.lock().unwrap();
+
+        let version_cache = ConfigVersionCache::new_log_error();
+
+        if let Some(cache) = cache.clone() {
+            if let Some(version_cache) = version_cache.as_deref() {
+                if cache.generation == version_cache.remote_mapping_cache() {
+                    return cache;
+                }
+                // outdated, fall back to reloading
+            }
+            // outdated, or we failed to query the version cache, fall through to the load
+        }
+
+        // we have no valid cache yet:
+        let generation = version_cache.map(|c| c.remote_mapping_cache()).unwrap_or(0);
+
+        let data = Arc::new(RemoteMappingCache::load());
+        let this = CacheState {
+            cache: Arc::clone(&data),
+            generation,
+        };
+        *cache = Some(this.clone());
+        this
+    }
+
+    fn update(cache: RemoteMappingCache) {
+        let mut current_cache = CURRENT_CACHE.lock().unwrap();
+        let generation = match pdm_config::ConfigVersionCache::new_log_error() {
+            Some(version_cache) => version_cache.increase_remote_mapping_cache(),
+            None => 0,
+        };
+        *current_cache = Some(CacheState {
+            generation,
+            cache: Arc::new(cache),
+        });
+    }
+}
+
+pub struct WriteRemoteMappingCache {
+    pub data: RemoteMappingCache,
+    _lock: ApiLockGuard,
+}
+
+impl std::ops::Deref for WriteRemoteMappingCache {
+    type Target = RemoteMappingCache;
+
+    fn deref(&self) -> &Self::Target {
+        &self.data
+    }
+}
+
+impl std::ops::DerefMut for WriteRemoteMappingCache {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.data
+    }
+}
+
+impl WriteRemoteMappingCache {
+    fn new(lock: ApiLockGuard, data: RemoteMappingCache) -> Self {
+        Self { _lock: lock, data }
+    }
+
+    pub fn save(self) -> Result<(), Error> {
+        self.data.save()?;
+        CacheState::update(self.data);
+        Ok(())
+    }
+}
+
+/// File format for `/var/cache/proxmox-datacenter-manager/remote-mapping-cache.json`
+#[derive(Clone, Default, Deserialize, Serialize)]
+pub struct RemoteMappingCache {
+    /// This maps a remote name to its mapping.
+    pub remotes: HashMap<String, RemoteMapping>,
+}
+
+impl RemoteMappingCache {
+    /// Get read only access to the current cache.
+    pub fn get() -> Arc<Self> {
+        Arc::clone(&CacheState::get().cache)
+    }
+
+    /// *Lock* the cache lock file and get mutable access to the current contents.
+    pub fn write() -> Result<WriteRemoteMappingCache, Error> {
+        let write_lock = open_api_lockfile(LOCK_FILE, None, true)?;
+
+        Ok(WriteRemoteMappingCache::new(
+            write_lock,
+            Self::clone(&Self::get()),
+        ))
+    }
+
+    /// Load the current remote mapping cache. This always succeeds and may return an empty cache.
+    fn load() -> Self {
+        fn do_load() -> Result<Option<RemoteMappingCache>, Error> {
+            Ok(proxmox_sys::fs::file_read_optional_string(CACHE_FILENAME)?
+                .map(|content| serde_json::from_str(&content))
+                .transpose()?)
+        }
+
+        match do_load() {
+            Ok(Some(data)) => return data,
+            Ok(None) => (),
+            Err(err) => {
+                log::error!("corrupted remote-mapping-cache.json file, discarding - {err:?}");
+            }
+        }
+
+        Self::default()
+    }
+
+    /// Save the current remote mapping cache. This should only be done by the remote mapping task.
+    fn save(&self) -> Result<(), Error> {
+        let raw = serde_json::to_vec(self).context("failed to serialize remote mapping cache")?;
+        replace_config(CACHE_FILENAME, &raw)
+    }
+
+    /// Attempt to retrieve the host name from a node name.
+    pub fn node_name_to_hostname(&self, remote: &str, node_name: &str) -> Option<&str> {
+        Some(self.remotes.get(remote)?.node_to_host.get(node_name)?)
+    }
+
+    /// Attempt to get the node info for a node name.
+    pub fn info_by_node_name(&self, remote_name: &str, node_name: &str) -> Option<&HostInfo> {
+        let remote = self.remotes.get(remote_name)?;
+        let host = remote.node_to_host.get(node_name)?;
+        remote.hosts.get(host)
+    }
+
+    pub fn info_by_node_name_mut(
+        &mut self,
+        remote_name: &str,
+        node_name: &str,
+    ) -> Option<&mut HostInfo> {
+        let remote = self.remotes.get_mut(remote_name)?;
+        let host = remote.node_to_host.get(node_name)?;
+        remote.hosts.get_mut(host)
+    }
+
+    /// Attempt to retrieve the node name from a host name.
+    pub fn info_by_hostname(&self, remote: &str, hostname: &str) -> Option<&HostInfo> {
+        self.remotes.get(remote)?.hosts.get(hostname)
+    }
+
+    pub fn info_by_hostname_mut(&mut self, remote: &str, hostname: &str) -> Option<&mut HostInfo> {
+        self.remotes.get_mut(remote)?.hosts.get_mut(hostname)
+    }
+
+    /// Mark a host as reachable.
+    pub fn mark_host_reachable(&mut self, remote_name: &str, hostname: &str, reachable: bool) {
+        if let Some(info) = self.info_by_hostname_mut(remote_name, hostname) {
+            info.reachable = reachable;
+        }
+    }
+
+    /// Mark a host as reachable.
+    pub fn mark_node_reachable(&mut self, remote_name: &str, node_name: &str, reachable: bool) {
+        if let Some(info) = self.info_by_node_name_mut(remote_name, node_name) {
+            info.reachable = reachable;
+        }
+    }
+
+    /// Update the node name for a host, if the remote and host exist (otherwise this does
+    /// nothing).
+    pub fn set_node_name(&mut self, remote_name: &str, hostname: &str, node_name: Option<String>) {
+        if let Some(remote) = self.remotes.get_mut(remote_name) {
+            remote.set_node_name(hostname, node_name);
+        }
+    }
+}
+
+/// An entry for a remote in a [`RemoteMappingCache`].
+#[derive(Clone, Deserialize, Serialize)]
+pub struct RemoteMapping {
+    /// The remote type.
+    pub ty: RemoteType,
+
+    /// Maps a `hostname` to information we keep about it.
+    pub hosts: HashMap<String, HostInfo>,
+
+    /// Maps a node name to a hostname, for where we have that info.
+    pub node_to_host: HashMap<String, String>,
+}
+
+impl RemoteMapping {
+    pub fn new(ty: RemoteType) -> Self {
+        Self {
+            ty,
+            hosts: HashMap::new(),
+            node_to_host: HashMap::new(),
+        }
+    }
+
+    /// Update the node name for a host, if the host exists (otherwise this does nothing).
+    pub fn set_node_name(&mut self, hostname: &str, node_name: Option<String>) {
+        if let Some(info) = self.hosts.get_mut(hostname) {
+            if let Some(old) = info.node_name.take() {
+                self.node_to_host.remove(&old);
+            }
+            info.node_name = node_name;
+            if let Some(new) = &info.node_name {
+                self.node_to_host.insert(new.clone(), hostname.to_string());
+            }
+        }
+    }
+}
+
+/// All the data we keep cached for nodes found in [`RemoteMapping`].
+#[derive(Clone, Deserialize, Serialize)]
+pub struct HostInfo {
+    /// This is the host name associated with this node.
+    pub hostname: String,
+
+    /// This is the cluster side node name, if we know it.
+    node_name: Option<String>,
+
+    /// This means we were able to reach the node.
+    /// When a client fails to connect it may update this to mark it as unreachable.
+    pub reachable: bool,
+}
+
+impl HostInfo {
+    pub fn new(hostname: String) -> Self {
+        Self {
+            hostname,
+            node_name: None,
+            reachable: true,
+        }
+    }
+
+    pub fn node_name(&self) -> Option<&str> {
+        self.node_name.as_deref()
+    }
+}
-- 
2.39.5





More information about the pdm-devel mailing list