[pve-devel] [PATCH ha-manager v3 06/13] manager: apply resource affinity rules when selecting service nodes
Daniel Kral
d.kral at proxmox.com
Fri Jul 4 20:20:49 CEST 2025
Add a mechanism to the node selection subroutine, which enforces the
resource affinity rules defined in the rules config.
The algorithm makes in-place changes to the set of nodes in such a way,
that the final set contains only the nodes where the resource affinity
rules allow the HA resources to run on, depending on the affinity type
of the resource affinity rules.
The HA resource's failback property also slightly changes meaning
because now it also controls how the HA Manager chooses nodes for a HA
resource with resource affinity rules, not only node affinity rules.
Signed-off-by: Daniel Kral <d.kral at proxmox.com>
---
src/PVE/API2/HA/Resources.pm | 3 +-
src/PVE/API2/HA/Status.pm | 4 +-
src/PVE/HA/Manager.pm | 11 +-
src/PVE/HA/Resources.pm | 3 +-
src/PVE/HA/Rules/ResourceAffinity.pm | 150 +++++++++++++++++++++++++++
5 files changed, 167 insertions(+), 4 deletions(-)
diff --git a/src/PVE/API2/HA/Resources.pm b/src/PVE/API2/HA/Resources.pm
index e06d202..6ead5f0 100644
--- a/src/PVE/API2/HA/Resources.pm
+++ b/src/PVE/API2/HA/Resources.pm
@@ -131,7 +131,8 @@ __PACKAGE__->register_method({
description => "HA resource is automatically migrated to the"
. " node with the highest priority according to their node"
. " affinity rule, if a node with a higher priority than"
- . " the current node comes online.",
+ . " the current node comes online, or migrated to the node,"
+ . " which doesn\'t violate any resource affinity rule.",
type => 'boolean',
optional => 1,
default => 1,
diff --git a/src/PVE/API2/HA/Status.pm b/src/PVE/API2/HA/Status.pm
index 4038766..d831650 100644
--- a/src/PVE/API2/HA/Status.pm
+++ b/src/PVE/API2/HA/Status.pm
@@ -113,7 +113,9 @@ __PACKAGE__->register_method({
description => "HA resource is automatically migrated to"
. " the node with the highest priority according to their"
. " node affinity rule, if a node with a higher priority"
- . " than the current node comes online.",
+ . " than the current node comes online, or migrate to"
+ . " the node, which doesn\'t violate any resource"
+ . " affinity rule.",
type => "boolean",
optional => 1,
default => 1,
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index b536c0f..06d83cd 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -11,7 +11,8 @@ use PVE::HA::Tools ':exit_codes';
use PVE::HA::NodeStatus;
use PVE::HA::Rules;
use PVE::HA::Rules::NodeAffinity qw(get_node_affinity);
-use PVE::HA::Rules::ResourceAffinity;
+use PVE::HA::Rules::ResourceAffinity
+ qw(get_resource_affinity apply_positive_resource_affinity apply_negative_resource_affinity);
use PVE::HA::Usage::Basic;
use PVE::HA::Usage::Static;
@@ -151,11 +152,16 @@ sub select_service_node {
return undef if !%$pri_nodes;
+ my ($together, $separate) = get_resource_affinity($rules, $sid, $online_node_usage);
+
# stay on current node if possible (avoids random migrations)
if (
$node_preference eq 'none'
&& !$service_conf->{failback}
&& $allowed_nodes->{$current_node}
+ && PVE::HA::Rules::ResourceAffinity::is_allowed_on_node(
+ $together, $separate, $current_node,
+ )
) {
return $current_node;
}
@@ -167,6 +173,9 @@ sub select_service_node {
}
}
+ apply_positive_resource_affinity($together, $pri_nodes);
+ apply_negative_resource_affinity($separate, $pri_nodes);
+
return $maintenance_fallback
if defined($maintenance_fallback) && $pri_nodes->{$maintenance_fallback};
diff --git a/src/PVE/HA/Resources.pm b/src/PVE/HA/Resources.pm
index b6d4a73..fbb0685 100644
--- a/src/PVE/HA/Resources.pm
+++ b/src/PVE/HA/Resources.pm
@@ -66,7 +66,8 @@ EODESC
description => "Automatically migrate HA resource to the node with"
. " the highest priority according to their node affinity "
. " rules, if a node with a higher priority than the current"
- . " node comes online.",
+ . " node comes online, or migrate to the node, which doesn\'t"
+ . " violate any resource affinity rule.",
type => 'boolean',
optional => 1,
default => 1,
diff --git a/src/PVE/HA/Rules/ResourceAffinity.pm b/src/PVE/HA/Rules/ResourceAffinity.pm
index b024c93..965b9a1 100644
--- a/src/PVE/HA/Rules/ResourceAffinity.pm
+++ b/src/PVE/HA/Rules/ResourceAffinity.pm
@@ -6,8 +6,15 @@ use warnings;
use PVE::HA::HashTools qw(set_intersect sets_are_disjoint);
use PVE::HA::Rules;
+use base qw(Exporter);
use base qw(PVE::HA::Rules);
+our @EXPORT_OK = qw(
+ get_resource_affinity
+ apply_positive_resource_affinity
+ apply_negative_resource_affinity
+);
+
=head1 NAME
PVE::HA::Rules::ResourceAffinity - Resource Affinity Plugin for HA Rules
@@ -436,4 +443,147 @@ sub plugin_canonicalize {
);
}
+=head1 RESOURCE AFFINITY RULE HELPERS
+
+=cut
+
+=head3 get_resource_affinity($rules, $sid, $online_node_usage)
+
+Returns a list of two hashes, where the first describes the positive resource
+affinity and the second hash describes the negative resource affinity for
+resource C<$sid> according to the resource affinity rules in C<$rules> and the
+resource locations in C<$online_node_usage>.
+
+For the positive resource affinity of a resource C<$sid>, each element in the
+hash represents an online node, where other resources, which C<$sid> is in
+positive affinity with, are already running, and how many of them. That is,
+each element represents a node, where the resource must be.
+
+For the negative resource affinity of a resource C<$sid>, each element in the
+hash represents an online node, where other resources, which C<$sid> is in
+negative affinity with, are alreaddy running. That is, each element represents
+a node, where the resource must not be.
+
+For example, if there are already three resources running, which the resource
+C<$sid> is in a positive affinity with, and two running resources, which the
+resource C<$sid> is in a negative affinity with, the returned value will be:
+
+ {
+ together => {
+ node2 => 3
+ },
+ separate => {
+ node1 => 1,
+ node3 => 1
+ }
+ }
+
+=cut
+
+sub get_resource_affinity : prototype($$$) {
+ my ($rules, $sid, $online_node_usage) = @_;
+
+ my $together = {};
+ my $separate = {};
+
+ PVE::HA::Rules::foreach_rule(
+ $rules,
+ sub {
+ my ($rule) = @_;
+
+ for my $csid (keys %{ $rule->{resources} }) {
+ next if $csid eq $sid;
+
+ my $nodes = $online_node_usage->get_service_nodes($csid);
+
+ next if !$nodes || !@$nodes; # skip unassigned nodes
+
+ if ($rule->{affinity} eq 'positive') {
+ $together->{$_}++ for @$nodes;
+ } elsif ($rule->{affinity} eq 'negative') {
+ $separate->{$_} = 1 for @$nodes;
+ } else {
+ die "unimplemented resource affinity type $rule->{affinity}\n";
+ }
+ }
+ },
+ {
+ sid => $sid,
+ type => 'resource-affinity',
+ exclude_disabled_rules => 1,
+ },
+ );
+
+ return ($together, $separate);
+}
+
+=head3 is_allowed_on_node($together, $separate, $node)
+
+Checks whether the resource affinity hashes C<$together> or C<$separate> state
+whether for C<$together> the C<$node> must be selected, or for C<$separate> the
+node C<$node> must be avoided.
+
+=cut
+
+sub is_allowed_on_node : prototype($$$) {
+ my ($together, $separate, $node) = @_;
+
+ return $together->{$node} || !$separate->{$node};
+}
+
+=head3 apply_positive_resource_affinity($together, $allowed_nodes)
+
+Applies the positive resource affinity C<$together> on the allowed node hash set
+C<$allowed_nodes> by modifying it directly.
+
+Positive resource affinity means keeping resources together on a single node and
+therefore minimizing the separation of resources.
+
+The allowed node hash set C<$allowed_nodes> is expected to contain all nodes,
+which are available to the resource this helper is called for, i.e. each node
+is currently online, available according to other location constraints, and the
+resource has not failed running there yet.
+
+=cut
+
+sub apply_positive_resource_affinity : prototype($$) {
+ my ($together, $allowed_nodes) = @_;
+
+ my @possible_nodes = sort keys $together->%*
+ or return; # nothing to do if there is no positive resource affinity
+
+ # select the most populated node from a positive resource affinity
+ @possible_nodes = sort { $together->{$b} <=> $together->{$a} } @possible_nodes;
+ my $majority_node = $possible_nodes[0];
+
+ for my $node (keys %$allowed_nodes) {
+ delete $allowed_nodes->{$node} if $node ne $majority_node;
+ }
+}
+
+=head3 apply_negative_resource_affinity($separate, $allowed_nodes)
+
+Applies the negative resource affinity C<$separate> on the allowed node hash set
+C<$allowed_nodes> by modifying it directly.
+
+Negative resource affinity means keeping resources separate on multiple nodes
+and therefore maximizing the separation of resources.
+
+The allowed node hash set C<$allowed_nodes> is expected to contain all nodes,
+which are available to the resource this helper is called for, i.e. each node
+is currently online, available according to other location constraints, and the
+resource has not failed running there yet.
+
+=cut
+
+sub apply_negative_resource_affinity : prototype($$) {
+ my ($separate, $allowed_nodes) = @_;
+
+ my $forbidden_nodes = { $separate->%* };
+
+ for my $node (keys %$forbidden_nodes) {
+ delete $allowed_nodes->{$node};
+ }
+}
+
1;
--
2.39.5
More information about the pve-devel
mailing list