[pve-devel] [PATCH ha-manager 06/11] usage: add Usage::Static plugin

Fiona Ebner f.ebner at proxmox.com
Thu Nov 10 15:37:54 CET 2022


for calculating node usage of services based upon static CPU and
memory configuration as well as scoring the nodes with that
information to decide where to start a new or recovered service.

For getting the service stats, it's necessary to also consider the
migration target (if present), becuase the configuration file might
have already moved.

It's necessary to update the cluster filesystem upon stealing the
service to be able to always read the moved config right away when
adding the usage.

Signed-off-by: Fiona Ebner <f.ebner at proxmox.com>
---

For add_service_usage_to_node(), not sure if the callers should rather
handle the error. But I'd make them just do the same, i.e. log warning
and continue.

 debian/pve-ha-manager.install |   1 +
 src/PVE/HA/Env/PVE2.pm        |   4 ++
 src/PVE/HA/Usage.pm           |   1 +
 src/PVE/HA/Usage/Makefile     |   2 +-
 src/PVE/HA/Usage/Static.pm    | 114 ++++++++++++++++++++++++++++++++++
 5 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 src/PVE/HA/Usage/Static.pm

diff --git a/debian/pve-ha-manager.install b/debian/pve-ha-manager.install
index 87fb24c..a7598a9 100644
--- a/debian/pve-ha-manager.install
+++ b/debian/pve-ha-manager.install
@@ -35,5 +35,6 @@
 /usr/share/perl5/PVE/HA/Tools.pm
 /usr/share/perl5/PVE/HA/Usage.pm
 /usr/share/perl5/PVE/HA/Usage/Basic.pm
+/usr/share/perl5/PVE/HA/Usage/Static.pm
 /usr/share/perl5/PVE/Service/pve_ha_crm.pm
 /usr/share/perl5/PVE/Service/pve_ha_lrm.pm
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index 7cecf35..7fac43c 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -176,6 +176,10 @@ sub steal_service {
     } else {
 	die "implement me";
     }
+
+    # Necessary for (at least) static usage plugin to always be able to read service config from new
+    # node right away.
+    $self->cluster_state_update();
 }
 
 sub read_group_config {
diff --git a/src/PVE/HA/Usage.pm b/src/PVE/HA/Usage.pm
index 4c723d1..66d9572 100644
--- a/src/PVE/HA/Usage.pm
+++ b/src/PVE/HA/Usage.pm
@@ -33,6 +33,7 @@ sub contains_node {
     die "implement in subclass";
 }
 
+# Logs a warning to $haenv upon failure, but does not die.
 sub add_service_usage_to_node {
     my ($self, $nodename, $sid, $service_node, $migration_target) = @_;
 
diff --git a/src/PVE/HA/Usage/Makefile b/src/PVE/HA/Usage/Makefile
index ccf1282..5a51359 100644
--- a/src/PVE/HA/Usage/Makefile
+++ b/src/PVE/HA/Usage/Makefile
@@ -1,4 +1,4 @@
-SOURCES=Basic.pm
+SOURCES=Basic.pm Static.pm
 
 .PHONY: install
 install:
diff --git a/src/PVE/HA/Usage/Static.pm b/src/PVE/HA/Usage/Static.pm
new file mode 100644
index 0000000..78883aa
--- /dev/null
+++ b/src/PVE/HA/Usage/Static.pm
@@ -0,0 +1,114 @@
+package PVE::HA::Usage::Static;
+
+use strict;
+use warnings;
+
+use PVE::HA::Resources;
+use PVE::RS::ResourceScheduling::Static;
+
+use base qw(PVE::HA::Usage);
+
+sub new {
+    my ($class, $haenv) = @_;
+
+    my $node_stats = eval { $haenv->get_static_node_stats() };
+    die "did not get static node usage information - $@" if $@;
+
+    my $scheduler = eval { PVE::RS::ResourceScheduling::Static->new(); };
+    die "unable to initialize static scheduling - $@" if $@;
+
+    return bless {
+	'node-stats' => $node_stats,
+	'service-stats' => {},
+	haenv => $haenv,
+	scheduler => $scheduler,
+    }, $class;
+}
+
+sub add_node {
+    my ($self, $nodename) = @_;
+
+    my $stats = $self->{'node-stats'}->{$nodename}
+	or die "did not get static node usage information for '$nodename'\n";
+    die "static node usage information for '$nodename' missing cpu count\n" if !$stats->{cpus};
+    die "static node usage information for '$nodename' missing memory\n" if !$stats->{memory};
+
+    eval { $self->{scheduler}->add_node($nodename, int($stats->{cpus}), int($stats->{memory})); };
+    die "initializing static node usage for '$nodename' failed - $@" if $@;
+}
+
+sub remove_node {
+    my ($self, $nodename) = @_;
+
+    $self->{scheduler}->remove_node($nodename);
+}
+
+sub list_nodes {
+    my ($self) = @_;
+
+    return $self->{scheduler}->list_nodes()->@*;
+}
+
+sub contains_node {
+    my ($self, $nodename) = @_;
+
+    return $self->{scheduler}->contains_node($nodename);
+}
+
+my sub get_service_usage {
+    my ($self, $sid, $service_node, $migration_target) = @_;
+
+    return $self->{'service-stats'}->{$sid} if $self->{'service-stats'}->{$sid};
+
+    my (undef, $type, $id) = $self->{haenv}->parse_sid($sid);
+    my $plugin = PVE::HA::Resources->lookup($type);
+
+    my $stats = eval { $plugin->get_static_stats($id, $service_node); };
+    if (my $err = $@) {
+	# config might've already moved during a migration
+	$stats = eval { $plugin->get_static_stats($id, $migration_target); } if $migration_target;
+	die "did not get static service usage information for '$sid' - $err\n" if !$stats;
+    }
+
+    my $service_stats = {
+	maxcpu => $stats->{maxcpu} + 0.0, # containers allow non-integer cpulimit
+	maxmem => int($stats->{maxmem}),
+    };
+
+    $self->{'service-stats'}->{$sid} = $service_stats;
+
+    return $service_stats;
+}
+
+sub add_service_usage_to_node {
+    my ($self, $nodename, $sid, $service_node, $migration_target) = @_;
+
+    eval {
+	my $service_usage = get_service_usage($self, $sid, $service_node, $migration_target);
+	$self->{scheduler}->add_service_usage_to_node($nodename, $service_usage);
+    };
+    $self->{haenv}->log('warning', "unable to add service '$sid' usage to node '$nodename' - $@")
+	if $@;
+}
+
+sub score_nodes_to_start_service {
+    my ($self, $sid, $service_node) = @_;
+
+    my $score_list = eval {
+	my $service_usage = get_service_usage($self, $sid, $service_node);
+	$self->{scheduler}->score_nodes_to_start_service($service_usage);
+    };
+    if (my $err = $@) {
+	$self->{haenv}->log(
+	    'err',
+	    "unable to score nodes according to static usage for service '$sid' - $err",
+	);
+	# TODO maybe use service count as fallback?
+	return { map { $_ => 1 } $self->list_nodes() };
+    }
+
+    # Take minus the value, so that a lower score is better, which our caller(s) expect(s).
+    return { map { $_->[0] => -$_->[1] } $score_list->@* };
+}
+
+1;
-- 
2.30.2






More information about the pve-devel mailing list