[pve-devel] [PATCH ha-manager v3 11/15] manager: apply node affinity rules when selecting service nodes

Daniel Kral d.kral at proxmox.com
Fri Jul 4 20:16:51 CEST 2025


Replace the HA group mechanism with the functionally equivalent node
affinity rules' get_node_affinity(...), which enforces the node affinity
rules defined in the rules config.

This allows the $groups parameter to be replaced with the $rules
parameter in select_service_node(...) as all behavior of the HA groups
is now encoded in $service_conf and $rules.

Signed-off-by: Daniel Kral <d.kral at proxmox.com>
---
 src/PVE/HA/Manager.pm            | 83 ++++++--------------------------
 src/PVE/HA/Rules/NodeAffinity.pm | 83 ++++++++++++++++++++++++++++++++
 src/test/test_failover1.pl       | 16 ++++--
 3 files changed, 110 insertions(+), 72 deletions(-)

diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 148447d..4357253 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -10,7 +10,7 @@ use PVE::HA::Groups;
 use PVE::HA::Tools ':exit_codes';
 use PVE::HA::NodeStatus;
 use PVE::HA::Rules;
-use PVE::HA::Rules::NodeAffinity;
+use PVE::HA::Rules::NodeAffinity qw(get_node_affinity);
 use PVE::HA::Usage::Basic;
 use PVE::HA::Usage::Static;
 
@@ -114,57 +114,13 @@ sub flush_master_status {
     $haenv->write_manager_status($ms);
 }
 
-sub get_service_group {
-    my ($groups, $online_node_usage, $service_conf) = @_;
-
-    my $group = {};
-    # add all online nodes to default group to allow try_next when no group set
-    $group->{nodes}->{$_} = 1 for $online_node_usage->list_nodes();
-
-    # overwrite default if service is bound to a specific group
-    if (my $group_id = $service_conf->{group}) {
-        $group = $groups->{ids}->{$group_id} if $groups->{ids}->{$group_id};
-    }
-
-    return $group;
-}
-
-# groups available nodes with their priority as group index
-sub get_node_priority_groups {
-    my ($group, $online_node_usage) = @_;
-
-    my $pri_groups = {};
-    my $group_members = {};
-    foreach my $entry (keys %{ $group->{nodes} }) {
-        my ($node, $pri) = ($entry, 0);
-        if ($entry =~ m/^(\S+):(\d+)$/) {
-            ($node, $pri) = ($1, $2);
-        }
-        next if !$online_node_usage->contains_node($node); # offline
-        $pri_groups->{$pri}->{$node} = 1;
-        $group_members->{$node} = $pri;
-    }
-
-    # add non-group members to unrestricted groups (priority -1)
-    if (!$group->{restricted}) {
-        my $pri = -1;
-        for my $node ($online_node_usage->list_nodes()) {
-            next if defined($group_members->{$node});
-            $pri_groups->{$pri}->{$node} = 1;
-            $group_members->{$node} = -1;
-        }
-    }
-
-    return ($pri_groups, $group_members);
-}
-
 =head3 select_service_node(...)
 
-=head3 select_service_node($groups, $online_node_usage, $sid, $service_conf, $sd, $node_preference)
+=head3 select_service_node($rules, $online_node_usage, $sid, $service_conf, $sd, $node_preference)
 
 Used to select the best fitting node for the service C<$sid>, with the
-configuration C<$service_conf> and state C<$sd>, according to the groups defined
-in C<$groups>, available node utilization in C<$online_node_usage>, and the
+configuration C<$service_conf> and state C<$sd>, according to the rules defined
+in C<$rules>, available node utilization in C<$online_node_usage>, and the
 given C<$node_preference>.
 
 The C<$node_preference> can be set to:
@@ -182,7 +138,7 @@ The C<$node_preference> can be set to:
 =cut
 
 sub select_service_node {
-    my ($groups, $online_node_usage, $sid, $service_conf, $sd, $node_preference) = @_;
+    my ($rules, $online_node_usage, $sid, $service_conf, $sd, $node_preference) = @_;
 
     die "'$node_preference' is not a valid node_preference for select_service_node\n"
         if $node_preference !~ m/(none|best-score|try-next)/;
@@ -190,42 +146,35 @@ sub select_service_node {
     my ($current_node, $tried_nodes, $maintenance_fallback) =
         $sd->@{qw(node failed_nodes maintenance_node)};
 
-    my $group = get_service_group($groups, $online_node_usage, $service_conf);
+    my ($allowed_nodes, $pri_nodes) = get_node_affinity($rules, $sid, $online_node_usage);
 
-    my ($pri_groups, $group_members) = get_node_priority_groups($group, $online_node_usage);
-
-    my @pri_list = sort { $b <=> $a } keys %$pri_groups;
-    return undef if !scalar(@pri_list);
+    return undef if !%$pri_nodes;
 
     # stay on current node if possible (avoids random migrations)
     if (
         $node_preference eq 'none'
-        && $group->{nofailback}
-        && defined($group_members->{$current_node})
+        && !$service_conf->{failback}
+        && $allowed_nodes->{$current_node}
     ) {
         return $current_node;
     }
 
-    # select node from top priority node list
-
-    my $top_pri = $pri_list[0];
-
     # try to avoid nodes where the service failed already if we want to relocate
     if ($node_preference eq 'try-next') {
         foreach my $node (@$tried_nodes) {
-            delete $pri_groups->{$top_pri}->{$node};
+            delete $pri_nodes->{$node};
         }
     }
 
     return $maintenance_fallback
-        if defined($maintenance_fallback) && $pri_groups->{$top_pri}->{$maintenance_fallback};
+        if defined($maintenance_fallback) && $pri_nodes->{$maintenance_fallback};
 
-    return $current_node if $node_preference eq 'none' && $pri_groups->{$top_pri}->{$current_node};
+    return $current_node if $node_preference eq 'none' && $pri_nodes->{$current_node};
 
     my $scores = $online_node_usage->score_nodes_to_start_service($sid, $current_node);
     my @nodes = sort {
         $scores->{$a} <=> $scores->{$b} || $a cmp $b
-    } keys %{ $pri_groups->{$top_pri} };
+    } keys %$pri_nodes;
 
     my $found;
     for (my $i = scalar(@nodes) - 1; $i >= 0; $i--) {
@@ -843,7 +792,7 @@ sub next_state_request_start {
 
     if ($self->{crs}->{rebalance_on_request_start}) {
         my $selected_node = select_service_node(
-            $self->{groups},
+            $self->{rules},
             $self->{online_node_usage},
             $sid,
             $cd,
@@ -1010,7 +959,7 @@ sub next_state_started {
             }
 
             my $node = select_service_node(
-                $self->{groups},
+                $self->{rules},
                 $self->{online_node_usage},
                 $sid,
                 $cd,
@@ -1128,7 +1077,7 @@ sub next_state_recovery {
     $self->recompute_online_node_usage(); # we want the most current node state
 
     my $recovery_node = select_service_node(
-        $self->{groups},
+        $self->{rules},
         $self->{online_node_usage},
         $sid,
         $cd,
diff --git a/src/PVE/HA/Rules/NodeAffinity.pm b/src/PVE/HA/Rules/NodeAffinity.pm
index 2b3d739..0331399 100644
--- a/src/PVE/HA/Rules/NodeAffinity.pm
+++ b/src/PVE/HA/Rules/NodeAffinity.pm
@@ -12,8 +12,13 @@ use PVE::Tools;
 use PVE::HA::Rules;
 use PVE::HA::Tools;
 
+use base qw(Exporter);
 use base qw(PVE::HA::Rules);
 
+our @EXPORT_OK = qw(
+    get_node_affinity
+);
+
 =head1 NAME
 
 PVE::HA::Rules::NodeAffinity
@@ -210,4 +215,82 @@ __PACKAGE__->register_check(
     },
 );
 
+=head1 NODE AFFINITY RULE HELPERS
+
+=cut
+
+my $get_resource_node_affinity_rule = sub {
+    my ($rules, $sid) = @_;
+
+    # with the current restriction a resource can only be in one node affinity rule
+    my $node_affinity_rule;
+    PVE::HA::Rules::foreach_rule(
+        $rules,
+        sub {
+            my ($rule) = @_;
+
+            $node_affinity_rule = dclone($rule) if !$node_affinity_rule;
+        },
+        {
+            sid => $sid,
+            type => 'node-affinity',
+            exclude_disabled_rules => 1,
+        },
+    );
+
+    return $node_affinity_rule;
+};
+
+=head3 get_node_affinity($rules, $sid, $online_node_usage)
+
+Returns a list of two hashes representing the node affinity of C<$sid>
+according to the node affinity rules in C<$rules> and the available nodes in
+C<$online_node_usage>.
+
+The first hash is a hash set of available nodes, i.e. nodes where the
+resource C<$sid> is allowed to be assigned to, and the second hash is a hash set
+of preferred nodes, i.e. nodes where the resource C<$sid> should be assigned to.
+
+If there are no available nodes at all, returns C<undef>.
+
+=cut
+
+sub get_node_affinity : prototype($$$) {
+    my ($rules, $sid, $online_node_usage) = @_;
+
+    my $node_affinity_rule = $get_resource_node_affinity_rule->($rules, $sid);
+
+    # default to a node affinity rule with all available nodes
+    if (!$node_affinity_rule) {
+        for my $node ($online_node_usage->list_nodes()) {
+            $node_affinity_rule->{nodes}->{$node} = { priority => 0 };
+        }
+    }
+
+    # add remaining nodes with low priority for non-strict node affinity rules
+    if (!$node_affinity_rule->{strict}) {
+        for my $node ($online_node_usage->list_nodes()) {
+            next if defined($node_affinity_rule->{nodes}->{$node});
+
+            $node_affinity_rule->{nodes}->{$node} = { priority => -1 };
+        }
+    }
+
+    my $allowed_nodes = {};
+    my $prioritized_nodes = {};
+
+    while (my ($node, $props) = each %{ $node_affinity_rule->{nodes} }) {
+        next if !$online_node_usage->contains_node($node); # node is offline
+
+        $allowed_nodes->{$node} = 1;
+        $prioritized_nodes->{ $props->{priority} }->{$node} = 1;
+    }
+
+    my $preferred_nodes = {};
+    my $highest_priority = (sort { $b <=> $a } keys %$prioritized_nodes)[0];
+    $preferred_nodes = $prioritized_nodes->{$highest_priority} if defined($highest_priority);
+
+    return ($allowed_nodes, $preferred_nodes);
+}
+
 1;
diff --git a/src/test/test_failover1.pl b/src/test/test_failover1.pl
index f6faa38..78a001e 100755
--- a/src/test/test_failover1.pl
+++ b/src/test/test_failover1.pl
@@ -4,12 +4,19 @@ use strict;
 use warnings;
 
 use lib '..';
-use PVE::HA::Groups;
 use PVE::HA::Manager;
 use PVE::HA::Usage::Basic;
 
-my $groups = PVE::HA::Groups->parse_config("groups.tmp", <<EOD);
-group: prefer_node1
+use PVE::HA::Rules;
+use PVE::HA::Rules::NodeAffinity;
+
+PVE::HA::Rules::NodeAffinity->register();
+
+PVE::HA::Rules->init(property_isolation => 1);
+
+my $rules = PVE::HA::Rules->parse_config("rules.tmp", <<EOD);
+node-affinity: prefer_node1
+	resources vm:111
 	nodes node1
 EOD
 
@@ -21,7 +28,6 @@ $online_node_usage->add_node("node3");
 
 my $service_conf = {
     node => 'node1',
-    group => 'prefer_node1',
     failback => 1,
 };
 
@@ -37,7 +43,7 @@ sub test {
     my $select_node_preference = $try_next ? 'try-next' : 'none';
 
     my $node = PVE::HA::Manager::select_service_node(
-        $groups, $online_node_usage, "vm:111", $service_conf, $sd, $select_node_preference,
+        $rules, $online_node_usage, "vm:111", $service_conf, $sd, $select_node_preference,
     );
 
     my (undef, undef, $line) = caller();
-- 
2.39.5





More information about the pve-devel mailing list