[pve-devel] [PATCH v3 pve-manager 45/66] api: replication: send notifications via new notification module

Lukas Wagner l.wagner at proxmox.com
Mon Jul 17 17:00:30 CEST 2023


If the new 'target-replication' option in datacenter.cfg is set
to a notification target, we send notifications that way.
If it is not set, we continue send a notification to the
default target (mail to root at pam).

There is also a new 'replication' option. It controls whether
to send a notification at all.

Signed-off-by: Lukas Wagner <l.wagner at proxmox.com>
---
 PVE/API2/Replication.pm | 63 ++++++++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/PVE/API2/Replication.pm b/PVE/API2/Replication.pm
index 89c5a802..d61518ba 100644
--- a/PVE/API2/Replication.pm
+++ b/PVE/API2/Replication.pm
@@ -15,6 +15,7 @@ use PVE::QemuConfig;
 use PVE::QemuServer;
 use PVE::LXC::Config;
 use PVE::LXC;
+use PVE::Notify;
 
 use PVE::RESTHandler;
 
@@ -91,6 +92,24 @@ my sub _should_mail_at_failcount {
     return $i * 48 == $fail_count;
 };
 
+my $replication_error_subject_template = "Replication Job: '{{job-id}}' failed";
+my $replication_error_body_template = <<EOT;
+{{#verbatim}}
+Replication job '{{job-id}}' with target '{{job-target}}' and schedule '{{job-schedule}}' failed!
+
+Last successful sync: {{timestamp last-sync}}
+Next sync try: {{timestamp next-sync}}
+Failure count: {{failure-count}}
+
+{{#if (eq failure-count 3)}}
+Note: The system  will now reduce the frequency of error reports, as the job
+appears to be stuck.
+{{/if}}
+Error:
+{{verbatim-monospaced error}}
+{{/verbatim}}
+EOT
+
 my sub _handle_job_err {
     my ($job, $err, $mail) = @_;
 
@@ -103,33 +122,37 @@ my sub _handle_job_err {
 
     return if !_should_mail_at_failcount($fail_count);
 
-    my $schedule = $job->{schedule} // '*/15';
-
-    my $msg = "Replication job $job->{id} with target '$job->{target}' and schedule";
-    $msg .= " '$schedule' failed!\n";
-
-    $msg .= "  Last successful sync: ";
-    if (my $last_sync = $jobstate->{last_sync}) {
-	$msg .= render_timestamp($last_sync) ."\n";
-    } else {
-	$msg .= "None/Unknown\n";
-    }
     # not yet updated, so $job->next_sync here is actually the current one.
     # NOTE: Copied from PVE::ReplicationState::job_status()
     my $next_sync = $job->{next_sync} + 60 * ($fail_count <= 3 ? 5 * $fail_count : 30);
-    $msg .= "  Next sync try: " . render_timestamp($next_sync) ."\n";
-    $msg .= "  Failure count: $fail_count\n";
-
 
-    if ($fail_count == 3) {
-	$msg .= "\nNote: The system will now reduce the frequency of error reports,";
-	$msg .= " as the job appears to be stuck.\n";
-    }
+    # The replication job is run every 15 mins if no schedule is set.
+    my $schedule = $job->{schedule} // '*/15';
 
-    $msg .= "\nError:\n$err";
+    my $properties = {
+	"failure-count" => $fail_count,
+	"last-sync"     => $jobstate->{last_sync},
+	"next-sync"     => $next_sync,
+	"job-id"        => $job->{id},
+	"job-target"    => $job->{target},
+	"job-schedule"  => $schedule,
+	"error"         => $err,
+    };
 
     eval {
-	PVE::Tools::sendmail('root', "Replication Job: $job->{id} failed", $msg)
+	my $dcconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
+	my $target = $dcconf->{notify}->{'target-replication'} // PVE::Notify::default_target();
+	my $notify = $dcconf->{notify}->{'replication'} // 'always';
+
+	if ($notify eq 'always') {
+	    PVE::Notify::error(
+		$target,
+		$replication_error_subject_template,
+		$replication_error_body_template,
+		$properties
+	    );
+	}
+
     };
     warn ": $@" if $@;
 }
-- 
2.39.2






More information about the pve-devel mailing list