[pve-devel] [PATCH pve-ha-manager 1/1] notifications: overhaul fence notification

Lukas Wagner l.wagner at proxmox.com
Thu Mar 27 15:23:20 CET 2025


- try to make template variable names more clear (in preparation
  for #6143)
- add common tempate variables (fqdn, hostname, cluster-name)
- Instead of dumping the status-data variable as a JSON blob we
  add template variables for the most useful information and
  render it in a structured manner

Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
 src/PVE/HA/NodeStatus.pm                      | 43 +++++++++++++++----
 src/PVE/HA/Sim/Env.pm                         |  8 ++--
 src/templates/default/fencing-body.html.hbs   | 43 ++++++++++++++++---
 src/templates/default/fencing-body.txt.hbs    | 40 +++++++++++++----
 src/templates/default/fencing-subject.txt.hbs |  6 ++-
 5 files changed, 111 insertions(+), 29 deletions(-)

diff --git a/src/PVE/HA/NodeStatus.pm b/src/PVE/HA/NodeStatus.pm
index 9e6d898..3a42746 100644
--- a/src/PVE/HA/NodeStatus.pm
+++ b/src/PVE/HA/NodeStatus.pm
@@ -3,6 +3,8 @@ package PVE::HA::NodeStatus;
 use strict;
 use warnings;
 
+use PVE::Notify;
+
 use JSON;
 
 my $fence_delay = 60;
@@ -195,15 +197,38 @@ my $send_fence_state_email = sub {
     my $haenv = $self->{haenv};
     my $status = $haenv->read_manager_status();
 
-    my $template_data = {
-	"status-data"    => {
-	    manager_status => $status,
-	    node_status    => $self->{status}
-	},
-	"node"           => $node,
-	"subject-prefix" => $subject_prefix,
-	"subject"        => $subject,
-    };
+    my $template_data = PVE::Notify::common_template_data();
+    # Those two are needed for the expected output for test cases,
+    # see src/PVE/HA/Sim/Env.pm
+    $template_data->{"fence-status"} = $subject;
+    $template_data->{"fence-prefix"} = $subject_prefix;
+
+    $template_data->{"is-success"} = 1 ? $subject_prefix eq "SUCCEED" : 0;
+
+    $template_data->{"failed-node"} = $node;
+    $template_data->{"master-node"} = $status->{master_node};
+    # There is a handlebars helper 'timestamp', we should not
+    # name a variable the same way.
+    $template_data->{"fence-timestamp"} = $status->{timestamp};
+
+    $template_data->{"nodes"} = [];
+    for my $key (sort keys $status->{node_status}->%*) {
+	push $template_data->{"nodes"}->@*, {
+	    node => $key,
+	    status => $status->{node_status}->{$key}
+	};
+    }
+
+    $template_data->{"resources"} = [];
+    for my $key (sort keys $status->{service_status}->%*) {
+	my $resource_status = $status->{service_status}->{$key};
+	push $template_data->{"resources"}->@*, {
+	    resource => $key,
+	    state => $resource_status->{state},
+	    node => $resource_status->{node},
+	    running => $resource_status->{running},
+	};
+    }
 
     my $metadata_fields = {
 	type => 'fencing',
diff --git a/src/PVE/HA/Sim/Env.pm b/src/PVE/HA/Sim/Env.pm
index b2ab231..f250d43 100644
--- a/src/PVE/HA/Sim/Env.pm
+++ b/src/PVE/HA/Sim/Env.pm
@@ -299,12 +299,12 @@ sub log {
 sub send_notification {
     my ($self, $template_name, $properties) = @_;
 
-    # The template for the subject is "{{subject-prefix}}: {{subject}}"
+    # The template for the subject is "{{fence-status}}: {{fence-message}}"
     # We have to perform poor-man's template rendering to pass the test cases.
 
-    my $subject = "{{subject-prefix}}: {{subject}}";
-    $subject = $subject =~ s/\{\{subject-prefix}}/$properties->{"subject-prefix"}/r;
-    $subject = $subject =~ s/\{\{subject}}/$properties->{"subject"}/r;
+    my $subject = "{{fence-prefix}}: {{fence-status}}";
+    $subject = $subject =~ s/\{\{fence-prefix}}/$properties->{"fence-prefix"}/r;
+    $subject = $subject =~ s/\{\{fence-status}}/$properties->{"fence-status"}/r;
 
     # only log subject, do not spam the logs
     $self->log('email', $subject);
diff --git a/src/templates/default/fencing-body.html.hbs b/src/templates/default/fencing-body.html.hbs
index 1420348..901a0e1 100644
--- a/src/templates/default/fencing-body.html.hbs
+++ b/src/templates/default/fencing-body.html.hbs
@@ -1,14 +1,43 @@
 <html>
     <body>
-    The node '{{node}}' failed and needs manual intervention.<br/><br/>
+        The node '{{failed-node}}' in cluster '{{cluster-name}}' failed and
+        needs manual intervention.<br/><br/>
 
-    The PVE HA manager tries to fence it and recover the configured HA resources to
-    a healthy node if possible.<br/><br/>
+        {{#if is-success~}}
+        The PVE HA manager successfully fenced '{{failed-node}}'.<br/><br/>
+        {{else}}
+        The PVE HA manager will now fence '{{failed-node}}'.<br/><br/>
+        {{/if}}
 
-    Current fence status: {{subject-prefix}}<br/>
-    {{subject}}<br/>
+        <b>Status:</b> {{fence-status}}<br/>
+        <b>Timestamp:</b> {{timestamp fence-timestamp}}<br/>
 
-    <h2 style="font-size: 1em">Overall Cluster status:</h2>
-    {{object status-data}}
+        <h2 style="font-size: 1em">Cluster Node Status:</h2>
+        <ul>
+        {{#each nodes}}
+            <li>
+                {{this.node}}: {{this.status}} {{#if (eq this.node ../master-node)}}[master]{{/if}}
+            </li>
+        {{/each}}
+        </ul>
+
+        <h2 style="font-size: 1em">HA Resources:</h2>
+        The following HA resources were running on the failed node and will be
+        recovered to a healthy node if possible:
+        <ul>
+        {{#each resources}}
+            {{#if (eq this.node ../failed-node)}}
+            <li>{{this.resource}} [{{this.node}}]: {{this.state}}</li>
+            {{/if}}
+        {{/each}}
+        </ul>
+        The other HA resources in this cluster are:
+        <ul>
+        {{#each resources}}
+            {{#if (ne this.node ../failed-node)}}
+            <li>{{this.resource}} [{{this.node}}]: {{this.state}}</li>
+            {{/if}}
+        {{/each}}
+        </ul>
     </body>
 </html>
diff --git a/src/templates/default/fencing-body.txt.hbs b/src/templates/default/fencing-body.txt.hbs
index e46a1fd..6f54122 100644
--- a/src/templates/default/fencing-body.txt.hbs
+++ b/src/templates/default/fencing-body.txt.hbs
@@ -1,11 +1,35 @@
-The node '{{node}}' failed and needs manual intervention.
+The node '{{failed-node}}' in cluster '{{cluster-name}}' failed
+and needs manual intervention.
 
-The PVE HA manager tries to fence it and recover the configured HA resources to
-a healthy node if possible.
+{{#if is-success~}}
+The PVE HA manager successfully fenced '{{failed-node}}'.
+{{else~}}
+The PVE HA manager will now fence '{{failed-node}}'.
+{{/if}}
+Status:    {{fence-status}}
+Timestamp: {{timestamp fence-timestamp}}
 
-Current fence status: {{subject-prefix}}
-{{subject}}
+Cluster Node Status:
+--------------------
+{{#each nodes~}}
+  - {{this.node}}: {{this.status}} {{#if (eq this.node ../master-node)}}[master]{{/if}}
+{{/each}}
+
+HA Resources:
+-------------
+The following HA resources were running on the failed node and will be
+recovered to a healthy node if possible:
+
+{{#each resources~}}
+{{#if (eq this.node ../failed-node)~}}
+  - {{this.resource}} [{{this.node}}]: {{this.state}}
+{{/if~}}
+{{/each}}
+The other HA resources in this cluster are:
+
+{{#each resources~}}
+{{#if (ne this.node ../failed-node)~}}
+  - {{this.resource}} [{{this.node}}]: {{this.state}}
+{{/if~}}
+{{/each~}}
 
-Overall Cluster status:
------------------------
-{{object status-data}}
diff --git a/src/templates/default/fencing-subject.txt.hbs b/src/templates/default/fencing-subject.txt.hbs
index 43651f9..1c140e3 100644
--- a/src/templates/default/fencing-subject.txt.hbs
+++ b/src/templates/default/fencing-subject.txt.hbs
@@ -1 +1,5 @@
-{{subject-prefix}}: {{subject}}
+{{#if is-success~}}
+Successfully fenced node '{{failed-node}}'
+{{else}}
+Trying to fence node '{{failed-node}}'
+{{/if}}
-- 
2.39.5





More information about the pve-devel mailing list