[pve-devel] [PATCH pve-ha-manager v2 1/1] notifications: overhaul fence notification
Lukas Wagner
l.wagner at proxmox.com
Fri Mar 28 11:19:15 CET 2025
- try to make template variable names more clear (in preparation
for #6143)
- add common tempate variables (fqdn, hostname, cluster-name)
- Instead of dumping the status-data variable as a JSON blob we
add template variables for the most useful information and
render it in a structured manner
Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
src/PVE/HA/NodeStatus.pm | 43 +++++++++++++++----
src/PVE/HA/Sim/Env.pm | 8 ++--
src/templates/default/fencing-body.html.hbs | 43 ++++++++++++++++---
src/templates/default/fencing-body.txt.hbs | 40 +++++++++++++----
src/templates/default/fencing-subject.txt.hbs | 6 ++-
5 files changed, 111 insertions(+), 29 deletions(-)
diff --git a/src/PVE/HA/NodeStatus.pm b/src/PVE/HA/NodeStatus.pm
index 9e6d898..3a42746 100644
--- a/src/PVE/HA/NodeStatus.pm
+++ b/src/PVE/HA/NodeStatus.pm
@@ -3,6 +3,8 @@ package PVE::HA::NodeStatus;
use strict;
use warnings;
+use PVE::Notify;
+
use JSON;
my $fence_delay = 60;
@@ -195,15 +197,38 @@ my $send_fence_state_email = sub {
my $haenv = $self->{haenv};
my $status = $haenv->read_manager_status();
- my $template_data = {
- "status-data" => {
- manager_status => $status,
- node_status => $self->{status}
- },
- "node" => $node,
- "subject-prefix" => $subject_prefix,
- "subject" => $subject,
- };
+ my $template_data = PVE::Notify::common_template_data();
+ # Those two are needed for the expected output for test cases,
+ # see src/PVE/HA/Sim/Env.pm
+ $template_data->{"fence-status"} = $subject;
+ $template_data->{"fence-prefix"} = $subject_prefix;
+
+ $template_data->{"is-success"} = 1 ? $subject_prefix eq "SUCCEED" : 0;
+
+ $template_data->{"failed-node"} = $node;
+ $template_data->{"master-node"} = $status->{master_node};
+ # There is a handlebars helper 'timestamp', we should not
+ # name a variable the same way.
+ $template_data->{"fence-timestamp"} = $status->{timestamp};
+
+ $template_data->{"nodes"} = [];
+ for my $key (sort keys $status->{node_status}->%*) {
+ push $template_data->{"nodes"}->@*, {
+ node => $key,
+ status => $status->{node_status}->{$key}
+ };
+ }
+
+ $template_data->{"resources"} = [];
+ for my $key (sort keys $status->{service_status}->%*) {
+ my $resource_status = $status->{service_status}->{$key};
+ push $template_data->{"resources"}->@*, {
+ resource => $key,
+ state => $resource_status->{state},
+ node => $resource_status->{node},
+ running => $resource_status->{running},
+ };
+ }
my $metadata_fields = {
type => 'fencing',
diff --git a/src/PVE/HA/Sim/Env.pm b/src/PVE/HA/Sim/Env.pm
index b2ab231..f250d43 100644
--- a/src/PVE/HA/Sim/Env.pm
+++ b/src/PVE/HA/Sim/Env.pm
@@ -299,12 +299,12 @@ sub log {
sub send_notification {
my ($self, $template_name, $properties) = @_;
- # The template for the subject is "{{subject-prefix}}: {{subject}}"
+ # The template for the subject is "{{fence-status}}: {{fence-message}}"
# We have to perform poor-man's template rendering to pass the test cases.
- my $subject = "{{subject-prefix}}: {{subject}}";
- $subject = $subject =~ s/\{\{subject-prefix}}/$properties->{"subject-prefix"}/r;
- $subject = $subject =~ s/\{\{subject}}/$properties->{"subject"}/r;
+ my $subject = "{{fence-prefix}}: {{fence-status}}";
+ $subject = $subject =~ s/\{\{fence-prefix}}/$properties->{"fence-prefix"}/r;
+ $subject = $subject =~ s/\{\{fence-status}}/$properties->{"fence-status"}/r;
# only log subject, do not spam the logs
$self->log('email', $subject);
diff --git a/src/templates/default/fencing-body.html.hbs b/src/templates/default/fencing-body.html.hbs
index 1420348..901a0e1 100644
--- a/src/templates/default/fencing-body.html.hbs
+++ b/src/templates/default/fencing-body.html.hbs
@@ -1,14 +1,43 @@
<html>
<body>
- The node '{{node}}' failed and needs manual intervention.<br/><br/>
+ The node '{{failed-node}}' in cluster '{{cluster-name}}' failed and
+ needs manual intervention.<br/><br/>
- The PVE HA manager tries to fence it and recover the configured HA resources to
- a healthy node if possible.<br/><br/>
+ {{#if is-success~}}
+ The PVE HA manager successfully fenced '{{failed-node}}'.<br/><br/>
+ {{else}}
+ The PVE HA manager will now fence '{{failed-node}}'.<br/><br/>
+ {{/if}}
- Current fence status: {{subject-prefix}}<br/>
- {{subject}}<br/>
+ <b>Status:</b> {{fence-status}}<br/>
+ <b>Timestamp:</b> {{timestamp fence-timestamp}}<br/>
- <h2 style="font-size: 1em">Overall Cluster status:</h2>
- {{object status-data}}
+ <h2 style="font-size: 1em">Cluster Node Status:</h2>
+ <ul>
+ {{#each nodes}}
+ <li>
+ {{this.node}}: {{this.status}} {{#if (eq this.node ../master-node)}}[master]{{/if}}
+ </li>
+ {{/each}}
+ </ul>
+
+ <h2 style="font-size: 1em">HA Resources:</h2>
+ The following HA resources were running on the failed node and will be
+ recovered to a healthy node if possible:
+ <ul>
+ {{#each resources}}
+ {{#if (eq this.node ../failed-node)}}
+ <li>{{this.resource}} [{{this.node}}]: {{this.state}}</li>
+ {{/if}}
+ {{/each}}
+ </ul>
+ The other HA resources in this cluster are:
+ <ul>
+ {{#each resources}}
+ {{#if (ne this.node ../failed-node)}}
+ <li>{{this.resource}} [{{this.node}}]: {{this.state}}</li>
+ {{/if}}
+ {{/each}}
+ </ul>
</body>
</html>
diff --git a/src/templates/default/fencing-body.txt.hbs b/src/templates/default/fencing-body.txt.hbs
index e46a1fd..6f54122 100644
--- a/src/templates/default/fencing-body.txt.hbs
+++ b/src/templates/default/fencing-body.txt.hbs
@@ -1,11 +1,35 @@
-The node '{{node}}' failed and needs manual intervention.
+The node '{{failed-node}}' in cluster '{{cluster-name}}' failed
+and needs manual intervention.
-The PVE HA manager tries to fence it and recover the configured HA resources to
-a healthy node if possible.
+{{#if is-success~}}
+The PVE HA manager successfully fenced '{{failed-node}}'.
+{{else~}}
+The PVE HA manager will now fence '{{failed-node}}'.
+{{/if}}
+Status: {{fence-status}}
+Timestamp: {{timestamp fence-timestamp}}
-Current fence status: {{subject-prefix}}
-{{subject}}
+Cluster Node Status:
+--------------------
+{{#each nodes~}}
+ - {{this.node}}: {{this.status}} {{#if (eq this.node ../master-node)}}[master]{{/if}}
+{{/each}}
+
+HA Resources:
+-------------
+The following HA resources were running on the failed node and will be
+recovered to a healthy node if possible:
+
+{{#each resources~}}
+{{#if (eq this.node ../failed-node)~}}
+ - {{this.resource}} [{{this.node}}]: {{this.state}}
+{{/if~}}
+{{/each}}
+The other HA resources in this cluster are:
+
+{{#each resources~}}
+{{#if (ne this.node ../failed-node)~}}
+ - {{this.resource}} [{{this.node}}]: {{this.state}}
+{{/if~}}
+{{/each~}}
-Overall Cluster status:
------------------------
-{{object status-data}}
diff --git a/src/templates/default/fencing-subject.txt.hbs b/src/templates/default/fencing-subject.txt.hbs
index 43651f9..1c140e3 100644
--- a/src/templates/default/fencing-subject.txt.hbs
+++ b/src/templates/default/fencing-subject.txt.hbs
@@ -1 +1,5 @@
-{{subject-prefix}}: {{subject}}
+{{#if is-success~}}
+Successfully fenced node '{{failed-node}}'
+{{else}}
+Trying to fence node '{{failed-node}}'
+{{/if}}
--
2.39.5
More information about the pve-devel
mailing list