[pve-devel] [PATCH pve-ha-manager 7/8] add affinity
Alexandre Derumier
aderumier at odiso.com
Wed Apr 27 17:33:50 CEST 2022
---
src/PVE/HA/Balancer/Nodes.pm | 36 +++++++++++++++--------------
src/PVE/HA/Balancer/Services.pm | 40 +++++++++++++++++++++++----------
src/PVE/HA/Manager.pm | 24 ++++++++++++++++----
3 files changed, 67 insertions(+), 33 deletions(-)
diff --git a/src/PVE/HA/Balancer/Nodes.pm b/src/PVE/HA/Balancer/Nodes.pm
index a06ed62..bce4c38 100644
--- a/src/PVE/HA/Balancer/Nodes.pm
+++ b/src/PVE/HA/Balancer/Nodes.pm
@@ -4,7 +4,7 @@ use strict;
use warnings;
use PVE::HA::Balancer::Topsis;
use PVE::HA::Balancer::AHP;
-
+use PVE::HA::Balancer::Services;
my $compute_node_target_cpu_pct = sub{
my ($node_stats, $vm_stats) = @_;
@@ -21,19 +21,16 @@ my $compute_node_target_mem_pct = sub {
};
my $add_prio = sub {
- my ($self, $sd, $nodename, $group_members_prio) = @_;
-
- my $vm_stats = $sd->{stats};
- my $node_stats = $self->{online_node_stats}->{$nodename}->{stats};
+ my ($self, $nodename, $group_members_prio, $target_stats) = @_;
my $node = {};
$node->{prio} = $group_members_prio->{$nodename};
- $node->{affinity} = 0; #fixme, need to implement vm group
+ $node->{affinity} = $target_stats->{affinity} || 0;
$node->{online_node_usage} = $self->{online_node_usage}->{$nodename};
$node->{name} = $nodename;
$node->{cpu_pressure} = 0; #fixme, need to stream rrd graph first
- $node->{target_cpu_pct} = &$compute_node_target_cpu_pct($node_stats, $vm_stats);
- $node->{target_mem_pct} = &$compute_node_target_mem_pct($node_stats, $vm_stats);
+ $node->{target_cpu_pct} = $target_stats->{cpu_pct} || 0;
+ $node->{target_mem_pct} = $target_stats->{mem_pct} || 0;
return $node;
};
@@ -80,7 +77,7 @@ my $check_cpumodel_compatibility = sub {
};
my $check_target_load = sub {
- my ($self, $sd, $node) = @_;
+ my ($self, $sid, $sd, $node, $target_stats) = @_;
return 1 if !$self->{balancer}->{enabled};
@@ -91,8 +88,8 @@ my $check_target_load = sub {
# if ksm sharing is already huge (20% of total memory), reduce mem threshold to 75%
$max_threshold->{mem} = 75 if $node_stats->{ksm} > $node_stats->{maxmem} * 0.2;
- my $target_mem_percent = &$compute_node_target_mem_pct($node_stats, $vm_stats);
- return if $target_mem_percent > $max_threshold->{mem};
+ $target_stats->{mem_pct} = &$compute_node_target_mem_pct($node_stats, $vm_stats);
+ return if $target_stats->{mem_pct} > $max_threshold->{mem};
#don't use node if already too much global pressure (some cores are already more than 100%, so we can't trust cpu average)
return if $node_stats->{cpu_pressure} > $max_threshold->{cpu_pressure};
@@ -100,8 +97,12 @@ my $check_target_load = sub {
#don't use node if a vm is already overloaded on this node
return if $node_stats->{max_vm_pressure} > $max_threshold->{vm_pressure};
- my $target_cpu_percent = &$compute_node_target_cpu_pct($node_stats, $vm_stats);
- return if $target_cpu_percent > $max_threshold->{cpu};
+ $target_stats->{cpu_pct} = &$compute_node_target_cpu_pct($node_stats, $vm_stats);
+ return if $target_stats->{cpu_pct} > $max_threshold->{cpu};
+
+ #don't use node if affinity is bigger or equal than current affinity
+ $target_stats->{affinity} = PVE::HA::Balancer::Services::compute_affinity($self, $sid, $node);
+ return if defined($vm_stats->{affinity}) && $target_stats->{affinity} != 0 && $target_stats->{affinity} >= $vm_stats->{affinity};
return 1;
};
@@ -131,7 +132,7 @@ my $check_hard_constraints = sub {
};
sub find_target {
- my($self, $cd, $sd, $group_members_prio) = @_;
+ my($self, $sid, $cd, $sd, $group_members_prio) = @_;
my $online_nodes = $self->{online_node_stats};
@@ -142,11 +143,12 @@ sub find_target {
#### FILTERING NODES WITH HARD CONSTRAINTS (vm can't be started)
next if !&$check_hard_constraints($self, $sd, $node, $group_members_prio);
- ### FILTERING too much loaded nodes
- next if !&$check_target_load($self,$sd, $node);
+ ### FILTERING too much loaded nodes and compute target stats
+ my $target_stats = {};
+ next if !&$check_target_load($self, $sid, $sd, $node, $target_stats);
#### compute differents prio
- $target_nodes->{$node} = &$add_prio($self, $sd, $node, $group_members_prio);
+ $target_nodes->{$node} = &$add_prio($self, $node, $group_members_prio, $target_stats);
}
# if ressource aware is enabled, order by score
diff --git a/src/PVE/HA/Balancer/Services.pm b/src/PVE/HA/Balancer/Services.pm
index 6cce6a7..d095b67 100644
--- a/src/PVE/HA/Balancer/Services.pm
+++ b/src/PVE/HA/Balancer/Services.pm
@@ -5,13 +5,30 @@ use warnings;
use PVE::HA::Balancer::Topsis;
use PVE::HA::Balancer::AHP;
-my $check_anti_affinity = sub {
- my ($vmid, $node, $vm_stats) = @_;
-
- #implement me
+sub compute_affinity {
+ my ($self, $sid, $node) = @_;
+
+ my $groups_resources = $self->{groups_resources};
+ my $resources_groups = $self->{resources_groups};
+ my $ss = $self->{ss};
+
+ my $affinity_score = 0;
+
+ my $resource_groups = $resources_groups->{$sid};
+ foreach my $groupid (keys %$resource_groups) {
+ my $affinity = $groups_resources->{$groupid}->{affinity};
+ next if !$affinity;
+ my $resources = $groups_resources->{$groupid}->{resources};
+ foreach my $othersid (keys %$resources) {
+ next if $othersid eq $sid;
+ my $other_service_node = $ss->{$othersid}->{node};
+ $affinity_score++ if ($affinity eq 'separate' && $other_service_node eq $node) ||
+ ($affinity eq 'group' && $other_service_node ne $node);
+ }
+ }
- return undef;
-};
+ return $affinity_score;
+}
my $check_cpu_pressure = sub {
my ($vm_stats) = @_;
@@ -43,7 +60,7 @@ my $check_pseudo_mem_pressure = sub {
};
my $get_bad_vms = sub {
- my($ss, $sc, $online_nodes) = @_;
+ my($self, $ss, $sc, $online_nodes) = @_;
my $bad_vms = {};
@@ -67,7 +84,6 @@ my $get_bad_vms = sub {
my $node_stats = $online_nodes->{$node}->{stats};
my $vm_stats = $sd->{stats};
-
# skip vm is recently started or migrated
next if !defined($vm_stats->{uptime}) || $vm_stats->{uptime} < 300;
@@ -77,12 +93,12 @@ my $get_bad_vms = sub {
#PVE::QemuServer::check_local_resources($vmconf, 1);
- $vm_stats->{affinity} = 0;
$vm_stats->{mem_pseudo_pressure} = 0;
+ $vm_stats->{affinity} = compute_affinity($self, $sid, $node);
my $add_vm = undef;
- $add_vm = 1 if &$check_anti_affinity($sid, $node, $vm_stats);
+ $add_vm = 1 if $vm_stats->{affinity};
$add_vm = 1 if &$check_cpu_pressure($vm_stats);
$add_vm = 1 if &$check_pseudo_mem_pressure($node_stats, $vm_stats);
next if !$add_vm;
@@ -115,7 +131,7 @@ sub get_vm_targetnode {
my $online_nodes = $self->{online_node_stats};
- my $bad_vms = &$get_bad_vms($ss, $sc, $online_nodes);
+ my $bad_vms = &$get_bad_vms($self, $ss, $sc, $online_nodes);
return if !$bad_vms;
my $vm_scores = &$get_score($self, $bad_vms);
@@ -128,7 +144,7 @@ sub get_vm_targetnode {
my $cd = $sc->{$sid};
my $sd = $ss->{$sid};
- my $node = $self->find_node_target($cd , $sd);
+ my $node = $self->find_node_target($sid, $cd , $sd);
next if !$node;
# register last sid we tried to migrate, to not try to balance it in loop
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 4e318bd..03b0520 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -371,7 +371,7 @@ sub manage {
my $sc = $haenv->read_service_config();
$self->{groups} = $haenv->read_group_config(); # update
- $self->{vmgroups} = $haenv->read_vmgroup_config();
+ $self->get_resources_groups();
# compute new service status
@@ -833,7 +833,7 @@ sub next_state_recovery {
$self->recompute_online_node_usage(); # we want the most current node state
- my $recovery_node = $self->find_node_target($cd , $sd);
+ my $recovery_node = $self->find_node_target($sid, $cd , $sd);
if ($recovery_node) {
my $msg = "recover service '$sid' from fenced node '$fenced_node' to node '$recovery_node'";
@@ -871,13 +871,13 @@ sub next_state_recovery {
}
sub find_node_target {
- my($self, $cd, $sd) = @_;
+ my($self, $sid, $cd, $sd) = @_;
my $online_nodes = $self->{online_node_stats};
my $groups = $self->{groups};
my $hagroup = get_service_group($groups, $online_nodes, $cd);
my ($pri_groups, $group_members_prio) = get_node_priority_groups($hagroup, $online_nodes);
- return PVE::HA::Balancer::Nodes::find_target($self, $cd, $sd, $group_members_prio);
+ return PVE::HA::Balancer::Nodes::find_target($self, $sid, $cd, $sd, $group_members_prio);
}
sub loadbalance {
@@ -917,4 +917,20 @@ sub balancer_status {
$self->{balancer}->{enabled} = $dc_ha_cfg->{balancer};
}
+sub get_resources_groups {
+ my ($self) = @_;
+
+ my $resources_groups_config = $self->{haenv}->read_resources_groups_config();
+ my $groups_resources = $resources_groups_config->{ids};
+ my $resources_groups = {};
+ foreach my $groupid (keys %$groups_resources) {
+ my $resources = $groups_resources->{$groupid}->{resources};
+ foreach my $sid (keys %$resources) {
+ $resources_groups->{$sid}->{$groupid} = 1;
+ }
+ }
+ $self->{resources_groups} = $resources_groups;
+ $self->{groups_resources} = $groups_resources;
+}
+
1;
--
2.30.2
More information about the pve-devel
mailing list