[pve-devel] [PATCH ha-manager 10/18] compile ha rules to a more compact representation
Daniel Kral
d.kral at proxmox.com
Thu Aug 21 16:35:43 CEST 2025
The initial implementation of fetching a ha resource's rules in the
scheduler and other users is rather inefficient as it needs to go
through the raw rules hash map and gathers quite a lot of information
which doesn't change unless the HA rules need to be re-translated
anyway.
Therefore add a compile stage when re-translating the HA rules, which
compiles each rule plugin's rules into a more compact data structure.
This reduces the unnecessary recomputation of invariant data done in the
hot path of the scheduler's select_service_node(...).
Signed-off-by: Daniel Kral <d.kral at proxmox.com>
---
I didn't add this to the commit message as I'm already referencing the
future in quite a lot of these, so I'll put it here:
The $node_affinity contains a `nodes` property for each ha resource
$sid, because in a future patch series, we'll need more information
about the $node_affinity to properly implement the migration blockers
for node affinity rules as well. I didn't want to add a patch in that
future series just to make a big unnecessary diff there, but if that's
better I'll happily send a v2 for this.
src/PVE/HA/Config.pm | 9 +--
src/PVE/HA/Manager.pm | 29 +++++----
src/PVE/HA/Rules.pm | 59 +++++++++++++++++
src/PVE/HA/Rules/NodeAffinity.pm | 82 ++++++++++++------------
src/PVE/HA/Rules/ResourceAffinity.pm | 95 ++++++++++++++--------------
src/test/test_failover1.pl | 10 +--
6 files changed, 175 insertions(+), 109 deletions(-)
diff --git a/src/PVE/HA/Config.pm b/src/PVE/HA/Config.pm
index bd87a0ab..af2a1cde 100644
--- a/src/PVE/HA/Config.pm
+++ b/src/PVE/HA/Config.pm
@@ -224,7 +224,7 @@ sub read_and_check_rules_config {
return $rules;
}
-sub read_and_check_effective_rules_config {
+sub read_and_compile_rules_config {
my $rules = read_and_check_rules_config();
@@ -239,7 +239,7 @@ sub read_and_check_effective_rules_config {
PVE::HA::Rules->transform($rules, $nodes);
- return $rules;
+ return PVE::HA::Rules->compile($rules, $nodes);
}
sub write_rules_config {
@@ -391,8 +391,9 @@ sub get_resource_motion_info {
my $ss = $manager_status->{service_status};
my $ns = $manager_status->{node_status};
- my $rules = read_and_check_effective_rules_config();
- my ($together, $separate) = get_affinitive_resources($rules, $sid);
+ my $affinity = read_and_compile_rules_config();
+ my ($together, $separate) =
+ get_affinitive_resources($affinity->{'resource-affinity'}, $sid);
for my $csid (sort keys %$together) {
next if !defined($ss->{$csid});
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 3013d369..748667e8 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -121,11 +121,11 @@ sub flush_master_status {
=head3 select_service_node(...)
-=head3 select_service_node($rules, $online_node_usage, $sid, $service_conf, $sd, $node_preference)
+=head3 select_service_node($affinity, $online_node_usage, $sid, $service_conf, $sd, $node_preference)
Used to select the best fitting node for the service C<$sid>, with the
-configuration C<$service_conf> and state C<$sd>, according to the rules defined
-in C<$rules>, available node utilization in C<$online_node_usage>, and the
+configuration C<$service_conf> and state C<$sd>, according to the affinity
+in C<$affinity>, available node utilization in C<$online_node_usage>, and the
given C<$node_preference>.
The C<$node_preference> can be set to:
@@ -143,20 +143,21 @@ The C<$node_preference> can be set to:
=cut
sub select_service_node {
- my ($rules, $online_node_usage, $sid, $service_conf, $sd, $node_preference) = @_;
+ my ($affinity, $online_node_usage, $sid, $service_conf, $sd, $node_preference) = @_;
die "'$node_preference' is not a valid node_preference for select_service_node\n"
if $node_preference !~ m/(none|best-score|try-next)/;
my ($current_node, $tried_nodes, $maintenance_fallback) =
$sd->@{qw(node failed_nodes maintenance_node)};
+ my ($node_affinity, $resource_affinity) = $affinity->@{qw(node-affinity resource-affinity)};
my $online_nodes = { map { $_ => 1 } $online_node_usage->list_nodes() };
- my ($allowed_nodes, $pri_nodes) = get_node_affinity($rules, $sid, $online_nodes);
+ my ($allowed_nodes, $pri_nodes) = get_node_affinity($node_affinity, $sid, $online_nodes);
return undef if !%$pri_nodes;
- my ($together, $separate) = get_resource_affinity($rules, $sid, $online_node_usage);
+ my ($together, $separate) = get_resource_affinity($resource_affinity, $sid, $online_node_usage);
# stay on current node if possible (avoids random migrations)
if (
@@ -418,7 +419,8 @@ sub execute_migration {
my ($haenv, $ss) = $self->@{qw(haenv ss)};
- my ($together, $separate) = get_affinitive_resources($self->{rules}, $sid);
+ my $resource_affinity = $self->{affinity}->{'resource-affinity'};
+ my ($together, $separate) = get_affinitive_resources($resource_affinity, $sid);
for my $csid (sort keys %$separate) {
next if !defined($ss->{$csid});
@@ -746,7 +748,7 @@ sub manage {
PVE::HA::Groups::migrate_groups_to_resources($self->{groups}, $sc);
if (
- !$self->{rules}
+ !$self->{affinity}
|| $has_changed_nodelist
|| $new_rules->{digest} ne $self->{last_rules_digest}
|| $self->{groups}->{digest} ne $self->{last_groups_digest}
@@ -756,11 +758,12 @@ sub manage {
my $nodes = $self->{ns}->list_nodes();
my $messages = PVE::HA::Rules->transform($new_rules, $nodes);
+ my $new_affinity = PVE::HA::Rules->compile($new_rules, $nodes);
$haenv->log('info', $_) for @$messages;
- $self->{rules} = $new_rules;
+ $self->{affinity} = $new_affinity;
- $self->{last_rules_digest} = $self->{rules}->{digest};
+ $self->{last_rules_digest} = $new_rules->{digest};
$self->{last_groups_digest} = $self->{groups}->{digest};
$self->{last_services_digest} = $services_digest;
}
@@ -1020,7 +1023,7 @@ sub next_state_request_start {
if ($self->{crs}->{rebalance_on_request_start}) {
my $selected_node = select_service_node(
- $self->{rules},
+ $self->{affinity},
$self->{online_node_usage},
$sid,
$cd,
@@ -1187,7 +1190,7 @@ sub next_state_started {
}
my $node = select_service_node(
- $self->{rules},
+ $self->{affinity},
$self->{online_node_usage},
$sid,
$cd,
@@ -1306,7 +1309,7 @@ sub next_state_recovery {
$self->recompute_online_node_usage(); # we want the most current node state
my $recovery_node = select_service_node(
- $self->{rules},
+ $self->{affinity},
$self->{online_node_usage},
$sid,
$cd,
diff --git a/src/PVE/HA/Rules.pm b/src/PVE/HA/Rules.pm
index a075feac..d7593532 100644
--- a/src/PVE/HA/Rules.pm
+++ b/src/PVE/HA/Rules.pm
@@ -47,6 +47,12 @@ Each I<rule plugin> is required to implement the methods C<L<type()>>,
C<L<properties()>>, and C<L<options>> from the C<L<PVE::SectionConfig>> to
extend the properties of this I<base plugin> with plugin-specific properties.
+Each I<rule plugin> is required to implement the method
+C<L<< plugin_compile()|/$class->plugin_compile(...) >>> to distill a compiled
+representation of the more verbose C<$rules> from the config file, which is
+returned by C<L<< compile()|/$class->compile(...) >>> to be more appropriate
+and efficient for the scheduler and other users.
+
=head2 REGISTERING CHECKS
In order to C<L<< register checks|/$class->register_check(...) >>> for a rule
@@ -453,6 +459,59 @@ sub transform : prototype($$$) {
return $messages;
}
+=head3 $class->plugin_compile(...)
+
+=head3 $class->plugin_compile($rules, $nodes)
+
+B<MANDATORY:> Must be implemented in a I<rule plugin>.
+
+Called in C<$class->compile($rules, $nodes)> in order to get a more compact
+representation of the rule plugin's rules in C<$rules>, which holds only the
+relevant information for the scheduler and other users.
+
+C<$nodes> is a list of the configured cluster nodes.
+
+=cut
+
+sub plugin_compile {
+ my ($class, $rules, $nodes) = @_;
+
+ die "implement in subclass";
+}
+
+=head3 $class->compile(...)
+
+=head3 $class->compile($rules, $nodes)
+
+Compiles and returns a hash, where each key-value pair represents a rule
+plugin's more compact representation compiled from the more verbose rules
+defined in C<$rules>.
+
+C<$nodes> is a list of the configured cluster nodes.
+
+The transformation to the compact representation for each rule plugin is
+implemented in C<L<< plugin_compile()|/$class->plugin_compile(...) >>>.
+
+=cut
+
+sub compile {
+ my ($class, $rules, $nodes) = @_;
+
+ my $affinity = {};
+
+ for my $type (@$types) {
+ my $plugin = $class->lookup($type);
+ my $plugin_affinity = $plugin->plugin_compile($rules, $nodes);
+
+ die "plugin_compile(...) of type '$type' must return hash reference\n"
+ if ref($plugin_affinity) ne 'HASH';
+
+ $affinity->{$type} = $plugin_affinity;
+ }
+
+ return $affinity;
+}
+
=head1 FUNCTIONS
=cut
diff --git a/src/PVE/HA/Rules/NodeAffinity.pm b/src/PVE/HA/Rules/NodeAffinity.pm
index b7abf9a4..1af442ec 100644
--- a/src/PVE/HA/Rules/NodeAffinity.pm
+++ b/src/PVE/HA/Rules/NodeAffinity.pm
@@ -155,6 +155,40 @@ sub get_plugin_check_arguments {
return $result;
}
+sub plugin_compile {
+ my ($class, $rules, $nodes) = @_;
+
+ my $node_affinity = {};
+
+ PVE::HA::Rules::foreach_rule(
+ $rules,
+ sub {
+ my ($rule) = @_;
+
+ my $effective_nodes = dclone($rule->{nodes});
+
+ # add remaining nodes with low priority for non-strict node affinity
+ if (!$rule->{strict}) {
+ for my $node (@$nodes) {
+ next if defined($effective_nodes->{$node});
+
+ $effective_nodes->{$node} = { priority => -1 };
+ }
+ }
+
+ for my $sid (keys $rule->{resources}->%*) {
+ $node_affinity->{$sid} = {
+ nodes => $effective_nodes,
+ };
+ }
+ },
+ type => 'node-affinity',
+ exclude_disabled_rules => 1,
+ );
+
+ return $node_affinity;
+}
+
=head1 NODE AFFINITY RULE CHECKERS
=cut
@@ -217,30 +251,10 @@ __PACKAGE__->register_check(
=cut
-my $get_resource_node_affinity_rule = sub {
- my ($rules, $sid) = @_;
-
- # with the current restriction a resource can only be in one node affinity rule
- my $node_affinity_rule;
- PVE::HA::Rules::foreach_rule(
- $rules,
- sub {
- my ($rule) = @_;
-
- $node_affinity_rule = dclone($rule) if !$node_affinity_rule;
- },
- sid => $sid,
- type => 'node-affinity',
- exclude_disabled_rules => 1,
- );
-
- return $node_affinity_rule;
-};
-
-=head3 get_node_affinity($rules, $sid, $online_node_usage)
+=head3 get_node_affinity($node_affinity, $sid, $online_node_usage)
Returns a list of two hashes representing the node affinity of C<$sid>
-according to the node affinity rules in C<$rules> and the available nodes in
+according to the node affinity C<$node_affinity> and the available nodes in
the C<$online_nodes> hash.
The first hash is a hash set of available nodes, i.e. nodes where the
@@ -251,31 +265,15 @@ If there are no available nodes at all, returns C<undef>.
=cut
-sub get_node_affinity : prototype($$$) {
- my ($rules, $sid, $online_nodes) = @_;
+sub get_node_affinity {
+ my ($node_affinity, $sid, $online_nodes) = @_;
- my $node_affinity_rule = $get_resource_node_affinity_rule->($rules, $sid);
-
- # default to a node affinity rule with all available nodes
- if (!$node_affinity_rule) {
- for my $node (keys %$online_nodes) {
- $node_affinity_rule->{nodes}->{$node} = { priority => 0 };
- }
- }
-
- # add remaining nodes with low priority for non-strict node affinity rules
- if (!$node_affinity_rule->{strict}) {
- for my $node (keys %$online_nodes) {
- next if defined($node_affinity_rule->{nodes}->{$node});
-
- $node_affinity_rule->{nodes}->{$node} = { priority => -1 };
- }
- }
+ return ($online_nodes, $online_nodes) if !defined($node_affinity->{$sid}->{nodes});
my $allowed_nodes = {};
my $prioritized_nodes = {};
- while (my ($node, $props) = each %{ $node_affinity_rule->{nodes} }) {
+ while (my ($node, $props) = each $node_affinity->{$sid}->{nodes}->%*) {
next if !defined($online_nodes->{$node}); # node is offline
$allowed_nodes->{$node} = 1;
diff --git a/src/PVE/HA/Rules/ResourceAffinity.pm b/src/PVE/HA/Rules/ResourceAffinity.pm
index f2d57ce6..c3cde6e8 100644
--- a/src/PVE/HA/Rules/ResourceAffinity.pm
+++ b/src/PVE/HA/Rules/ResourceAffinity.pm
@@ -100,6 +100,35 @@ sub get_plugin_check_arguments {
return $result;
}
+sub plugin_compile {
+ my ($class, $rules, $nodes) = @_;
+
+ my $positive = {};
+ my $negative = {};
+
+ PVE::HA::Rules::foreach_rule(
+ $rules,
+ sub {
+ my ($rule, $ruleid) = @_;
+
+ my $affinity_set = $rule->{affinity} eq 'positive' ? $positive : $negative;
+
+ for my $sid (keys $rule->{resources}->%*) {
+ for my $csid (keys $rule->{resources}->%*) {
+ $affinity_set->{$sid}->{$csid} = 1 if $csid ne $sid;
+ }
+ }
+ },
+ type => 'resource-affinity',
+ exclude_disabled_rules => 1,
+ );
+
+ return {
+ positive => $positive,
+ negative => $negative,
+ };
+}
+
=head1 RESOURCE AFFINITY RULE CHECKERS
=cut
@@ -403,12 +432,12 @@ __PACKAGE__->register_transform(sub {
=cut
-=head3 get_affinitive_resources($rules, $sid)
+=head3 get_affinitive_resources($resource_affinity, $sid)
Returns a list of two hash sets, where the first hash set contains the
resources, which C<$sid> is positively affinitive to, and the second hash
contains the resources, which C<$sid> is negatively affinitive to, acording to
-the resource affinity rules in C<$rules>.
+the resource's resource affinity in C<$resource_affinity>.
Note that a resource C<$sid> becomes part of any negative affinity relation
of its positively affinitive resources.
@@ -429,36 +458,20 @@ affinitive to C<'ct:200'> and C<'ct:201'>, the returned value will be:
=cut
sub get_affinitive_resources : prototype($$) {
- my ($rules, $sid) = @_;
+ my ($resource_affinity, $sid) = @_;
- my $together = {};
- my $separate = {};
-
- PVE::HA::Rules::foreach_rule(
- $rules,
- sub {
- my ($rule, $ruleid) = @_;
-
- my $affinity_set = $rule->{affinity} eq 'positive' ? $together : $separate;
-
- for my $csid (sort keys %{ $rule->{resources} }) {
- $affinity_set->{$csid} = 1 if $csid ne $sid;
- }
- },
- sid => $sid,
- type => 'resource-affinity',
- exclude_disabled_rules => 1,
- );
+ my $together = $resource_affinity->{positive}->{$sid} // {};
+ my $separate = $resource_affinity->{negative}->{$sid} // {};
return ($together, $separate);
}
-=head3 get_resource_affinity($rules, $sid, $online_node_usage)
+=head3 get_resource_affinity($resource_affinity, $sid, $online_node_usage)
Returns a list of two hashes, where the first describes the positive resource
affinity and the second hash describes the negative resource affinity for
-resource C<$sid> according to the resource affinity rules in C<$rules> and the
-resource locations in C<$online_node_usage>.
+resource C<$sid> according to the resource's resource affinity in
+C<$resource_affinity> and the resource locations in C<$online_node_usage>.
For the positive resource affinity of a resource C<$sid>, each element in the
hash represents an online node, where other resources, which C<$sid> is in
@@ -487,36 +500,26 @@ resource C<$sid> is in a negative affinity with, the returned value will be:
=cut
sub get_resource_affinity : prototype($$$) {
- my ($rules, $sid, $online_node_usage) = @_;
+ my ($resource_affinity, $sid, $online_node_usage) = @_;
my $together = {};
my $separate = {};
- PVE::HA::Rules::foreach_rule(
- $rules,
- sub {
- my ($rule) = @_;
+ my ($positive, $negative) = get_affinitive_resources($resource_affinity, $sid);
- for my $csid (keys %{ $rule->{resources} }) {
- next if $csid eq $sid;
+ for my $csid (keys $positive->%*) {
+ my $nodes = $online_node_usage->get_service_nodes($csid);
+ next if !$nodes || !@$nodes; # skip unassigned ha resources
- my $nodes = $online_node_usage->get_service_nodes($csid);
+ $together->{$_}++ for @$nodes;
+ }
- next if !$nodes || !@$nodes; # skip unassigned nodes
+ for my $csid (keys $negative->%*) {
+ my $nodes = $online_node_usage->get_service_nodes($csid);
+ next if !$nodes || !@$nodes; # skip unassigned ha resources
- if ($rule->{affinity} eq 'positive') {
- $together->{$_}++ for @$nodes;
- } elsif ($rule->{affinity} eq 'negative') {
- $separate->{$_} = 1 for @$nodes;
- } else {
- die "unimplemented resource affinity type $rule->{affinity}\n";
- }
- }
- },
- sid => $sid,
- type => 'resource-affinity',
- exclude_disabled_rules => 1,
- );
+ $separate->{$_} = 1 for @$nodes;
+ }
return ($together, $separate);
}
diff --git a/src/test/test_failover1.pl b/src/test/test_failover1.pl
index 78a001eb..62476a93 100755
--- a/src/test/test_failover1.pl
+++ b/src/test/test_failover1.pl
@@ -20,11 +20,13 @@ node-affinity: prefer_node1
nodes node1
EOD
+my $nodes = ['node1', 'node2', 'node3'];
+
+my $affinity = PVE::HA::Rules->compile($rules, $nodes);
+
# Relies on the fact that the basic plugin doesn't use the haenv.
my $online_node_usage = PVE::HA::Usage::Basic->new();
-$online_node_usage->add_node("node1");
-$online_node_usage->add_node("node2");
-$online_node_usage->add_node("node3");
+$online_node_usage->add_node($_) for @$nodes;
my $service_conf = {
node => 'node1',
@@ -43,7 +45,7 @@ sub test {
my $select_node_preference = $try_next ? 'try-next' : 'none';
my $node = PVE::HA::Manager::select_service_node(
- $rules, $online_node_usage, "vm:111", $service_conf, $sd, $select_node_preference,
+ $affinity, $online_node_usage, "vm:111", $service_conf, $sd, $select_node_preference,
);
my (undef, undef, $line) = caller();
--
2.47.2
More information about the pve-devel
mailing list