[pve-devel] [PATCH pve-manager 05/18] PVE::Replication - use new calendar events instead of interval

Dietmar Maurer dietmar at proxmox.com
Tue May 23 09:08:44 CEST 2017


And implement retry algorythm after failure:

  $next_sync = $state->{last_try} + 5*60*$fail_count;

and limit to 3 failures.

Signed-off-by: Dietmar Maurer <dietmar at proxmox.com>
---
 PVE/API2/Replication.pm |  2 +-
 PVE/CLI/pvesr.pm        | 28 ++++++++++++++++++++--------
 PVE/Replication.pm      | 35 +++++++++++++++++++++--------------
 3 files changed, 42 insertions(+), 23 deletions(-)

diff --git a/PVE/API2/Replication.pm b/PVE/API2/Replication.pm
index c400a93c..977d3ec1 100644
--- a/PVE/API2/Replication.pm
+++ b/PVE/API2/Replication.pm
@@ -82,7 +82,7 @@ __PACKAGE__->register_method ({
 	    my $vmid = $d->{guest};
 	    next if !$rpcenv->check($authuser, "/vms/$vmid", [ 'VM.Audit' ]);
 	    $d->{id} = $id;
-	    foreach my $k (qw(last_sync fail_count error duration)) {
+	    foreach my $k (qw(last_sync last_try fail_count error duration)) {
 		$d->{$k} = $state->{$k} if defined($state->{$k});
 	    }
 	    if ($state->{pid} && $state->{ptime}) {
diff --git a/PVE/CLI/pvesr.pm b/PVE/CLI/pvesr.pm
index 38116f7b..115bc2c1 100644
--- a/PVE/CLI/pvesr.pm
+++ b/PVE/CLI/pvesr.pm
@@ -97,14 +97,14 @@ my $print_job_list = sub {
 
     my $format = "%-20s %10s %-20s %10s %5s %8s\n";
 
-    printf($format, "JobID", "GuestID", "Target", "Interval", "Rate", "Enabled");
+    printf($format, "JobID", "GuestID", "Target", "Schedule", "Rate", "Enabled");
 
     foreach my $job (sort { $a->{guest} <=> $b->{guest} } @$list) {
 	my $plugin = PVE::ReplicationConfig->lookup($job->{type});
 	my $tid = $plugin->get_unique_target_id($job);
 
 	printf($format, $job->{id}, $job->{guest}, $tid,
-	       defined($job->{interval}) ? $job->{interval} : '-',
+	       defined($job->{schedule}) ? $job->{schedule} : '*/15',
 	       defined($job->{rate}) ? $job->{rate} : '-',
 	       $job->{disable} ? 'no' : 'yes'
 	    );
@@ -114,21 +114,33 @@ my $print_job_list = sub {
 my $print_job_status = sub {
     my ($list) = @_;
 
-    my $format = "%-20s %10s %-20s %20s %10s %10s %s\n";
+    my $format = "%-20s %10s %-20s %20s %20s %10s %10s %s\n";
 
-    printf($format, "JobID", "GuestID", "Target", "LastSync", "Duration", "FailCount", "State");
+    printf($format, "JobID", "GuestID", "Target", "LastSync", "NextSync", "Duration", "FailCount", "State");
 
     foreach my $job (sort { $a->{guest} <=> $b->{guest} } @$list) {
 	my $plugin = PVE::ReplicationConfig->lookup($job->{type});
 	my $tid = $plugin->get_unique_target_id($job);
 
-	my $timestr = $job->{last_sync} ?
-	    strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{last_sync})) : '-';
+	my $timestr = '-';
+	if ($job->{last_sync}) {
+	    strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{last_sync}));
+	}
+
+	my $nextstr = '-';
+	if (my $next = $job->{next_sync}) {
+	    my $now = time();
+	    if ($next > $now) {
+		$nextstr = strftime("%Y-%m-%d_%H:%M:%S", localtime($job->{next_sync}));
+	    } else {
+		$nextstr = 'now'
+	    }
+	}
 
 	my $state = $job->{pid} ? "SYNCING" : $job->{error} // 'OK';
 
 	printf($format, $job->{id}, $job->{guest}, $tid,
-	       $timestr, $job->{duration} // '-',
+	       $timestr, $nextstr, $job->{duration} // '-',
 	       $job->{fail_count}, $state);
     }
 };
@@ -136,7 +148,7 @@ my $print_job_status = sub {
 our $cmddef = {
     status => [ 'PVE::API2::Replication', 'status', [], { node => $nodename }, $print_job_status ],
 
-    jobs => [ 'PVE::API2::ReplicationConfig', 'index' , [], {}, $print_job_list ],
+    list => [ 'PVE::API2::ReplicationConfig', 'index' , [], {}, $print_job_list ],
     read => [ 'PVE::API2::ReplicationConfig', 'read' , ['id'], {},
 	     sub { my $res = shift; print to_json($res, { pretty => 1, canonical => 1}); }],
     update => [ 'PVE::API2::ReplicationConfig', 'update' , ['id'], {} ],
diff --git a/PVE/Replication.pm b/PVE/Replication.pm
index ff4bbeb4..d878b44a 100644
--- a/PVE/Replication.pm
+++ b/PVE/Replication.pm
@@ -9,6 +9,7 @@ use Time::HiRes qw(gettimeofday tv_interval);
 use PVE::INotify;
 use PVE::ProcFSTools;
 use PVE::Tools;
+use PVE::CalendarEvent;
 use PVE::Cluster;
 use PVE::QemuConfig;
 use PVE::QemuServer;
@@ -46,7 +47,8 @@ my $get_job_state = sub {
     $state = {} if !$state;
 
     $state->{last_iteration} //= 0;
-    $state->{last_sync} //= 0;
+    $state->{last_try} //= 0; # last sync start time
+    $state->{last_sync} //= 0; # last successful sync start time
     $state->{fail_count} //= 0;
 
     return $state;
@@ -93,10 +95,23 @@ sub job_status {
 
 	next if $jobcfg->{disable};
 
-	$jobcfg->{state} = $get_job_state->($stateobj, $jobcfg);
+	my $state = $get_job_state->($stateobj, $jobcfg);
+	$jobcfg->{state} = $state;
 	$jobcfg->{id} = $jobid;
 	$jobcfg->{vmtype} = $vms->{ids}->{$vmid}->{type};
 
+	my $next_sync = 0;
+	if (my $fail_count = $state->{fail_count}) {
+	    if ($fail_count < 3) {
+		$next_sync = $state->{last_try} + 5*60*$fail_count;
+	    }
+	} else {
+	    my $schedule =  $jobcfg->{schedule} || '*/15';
+	    my $calspec = PVE::CalendarEvent::parse_calendar_event($schedule);
+	    $next_sync = PVE::CalendarEvent::compute_next_event($calspec, $state->{last_try}) // 0;
+	}
+	$jobcfg->{next_sync} = $next_sync;
+
 	$jobs->{$jobid} = $jobcfg;
     }
 
@@ -110,15 +125,6 @@ my $get_next_job = sub {
 
     my $jobs = job_status($stateobj);
 
-    # compute next_sync here to make it easy to sort jobs
-    my $next_sync_hash = {};
-    foreach my $jobid (keys %$jobs) {
-	my $jobcfg = $jobs->{$jobid};
-	my $interval = $jobcfg->{interval} || 15;
-	my $last_sync = $jobcfg->{state}->{last_sync};
-	$next_sync_hash->{$jobid} = $last_sync + $interval * 60;
-    }
-
     my $sort_func = sub {
 	my $joba = $jobs->{$a};
 	my $jobb = $jobs->{$b};
@@ -126,7 +132,7 @@ my $get_next_job = sub {
 	my $sb =  $jobb->{state};
 	my $res = $sa->{last_iteration} cmp $sb->{last_iteration};
 	return $res if $res != 0;
-	$res = $next_sync_hash->{$a} <=> $next_sync_hash->{$b};
+	$res = $joba->{next_sync} <=> $jobb->{next_sync};
 	return $res if $res != 0;
 	return  $joba->{guest} <=> $jobb->{guest};
     };
@@ -134,7 +140,7 @@ my $get_next_job = sub {
     foreach my $jobid (sort $sort_func keys %$jobs) {
 	my $jobcfg = $jobs->{$jobid};
 	next if $jobcfg->{state}->{last_iteration} >= $now;
-	if ($now >= $next_sync_hash->{$jobid}) {
+	if ($jobcfg->{next_sync} && ($now >= $jobcfg->{next_sync})) {
 	    $next_jobid = $jobid;
 	    last;
 	}
@@ -174,7 +180,7 @@ my $run_replication = sub {
 
     $state->{pid} = $$;
     $state->{ptime} = PVE::ProcFSTools::read_proc_starttime($state->{pid});
-
+    $state->{last_try} = $start_time;
     $update_job_state->($stateobj, $jobcfg,  $state);
 
     eval { replicate($jobcfg, $start_time); };
@@ -188,6 +194,7 @@ my $run_replication = sub {
 	$state->{fail_count}++;
 	$state->{error} = "$err";
 	$update_job_state->($stateobj, $jobcfg,  $state);
+	warn $err;
    } else {
 	$state->{last_sync} = $start_time;
 	$state->{fail_count} = 0;
-- 
2.11.0




More information about the pve-devel mailing list