[pve-devel] [PATCH pve-manager] replication: add replication log files
Dietmar Maurer
dietmar at proxmox.com
Wed Jun 7 14:29:08 CEST 2017
Signed-off-by: Dietmar Maurer <dietmar at proxmox.com>
---
PVE/Replication.pm | 98 +++++++++++++++++++++++++++++-------------
bin/test/ReplicationTestEnv.pm | 35 +++++++++++----
2 files changed, 94 insertions(+), 39 deletions(-)
diff --git a/PVE/Replication.pm b/PVE/Replication.pm
index eb2583dc..dfd45b83 100644
--- a/PVE/Replication.pm
+++ b/PVE/Replication.pm
@@ -22,6 +22,20 @@ use PVE::ReplicationConfig;
use PVE::ReplicationState;
our $pvesr_lock_path = "/var/lock/pvesr.lck";
+our $replicate_logdir = "/var/log/pve/replicate";
+
+# regression tests should overwrite this
+sub job_logfile_name {
+ my ($jobid) = @_;
+
+ return "${replicate_logdir}/$jobid";
+}
+
+# regression tests should overwrite this
+sub get_log_time {
+
+ return time();
+}
sub job_status {
@@ -113,7 +127,7 @@ my $get_next_job = sub {
};
sub remote_prepare_local_job {
- my ($ssh_info, $jobid, $vmid, $volumes, $storeid_list, $last_sync, $parent_snapname, $force) = @_;
+ my ($ssh_info, $jobid, $vmid, $volumes, $storeid_list, $last_sync, $parent_snapname, $force, $logfunc) = @_;
my $ssh_cmd = PVE::Cluster::ssh_info_to_command($ssh_info);
my $cmd = [@$ssh_cmd, '--', 'pvesr', 'prepare-local-job', $jobid, $vmid];
@@ -131,7 +145,13 @@ sub remote_prepare_local_job {
$remote_snapshots = JSON::decode_json($line);
};
- PVE::Tools::run_command($cmd, outfunc => $parser);
+ my $logger = sub {
+ my $line = shift;
+ chomp $line;
+ $logfunc->("(remote_prepare_local_job) $line");
+ };
+
+ PVE::Tools::run_command($cmd, outfunc => $parser, errfunc => $logger);
die "prepare remote node failed - no result\n"
if !defined($remote_snapshots);
@@ -140,13 +160,19 @@ sub remote_prepare_local_job {
}
sub remote_finalize_local_job {
- my ($ssh_info, $jobid, $vmid, $volumes, $last_sync) = @_;
+ my ($ssh_info, $jobid, $vmid, $volumes, $last_sync, $logfunc) = @_;
my $ssh_cmd = PVE::Cluster::ssh_info_to_command($ssh_info);
my $cmd = [@$ssh_cmd, '--', 'pvesr', 'finalize-local-job', $jobid,
$vmid, @$volumes, '--last_sync', $last_sync];
- PVE::Tools::run_command($cmd);
+ my $logger = sub {
+ my $line = shift;
+ chomp $line;
+ $logfunc->("(remote_finalize_local_job) $line");
+ };
+
+ PVE::Tools::run_command($cmd, outfunc => $logger, errfunc => $logger);
}
# finds local replication snapshots from $last_sync
@@ -167,7 +193,7 @@ sub prepare {
if ($snap eq $snapname || (defined($parent_snapname) && ($snap eq $parent_snapname))) {
$last_snapshots->{$volid}->{$snap} = 1;
} elsif ($snap =~ m/^\Q$prefix\E/) {
- $logfunc->("$jobid: delete stale replication snapshot '$snap' on $volid");
+ $logfunc->("delete stale replication snapshot '$snap' on $volid");
PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap);
}
}
@@ -201,8 +227,6 @@ sub delete_job {
sub replicate {
my ($jobcfg, $state, $start_time, $logfunc) = @_;
- $logfunc = sub {} if !$logfunc; # log nothing by default
-
my $local_node = PVE::INotify::nodename();
die "not implemented - internal error" if $jobcfg->{type} ne 'local';
@@ -247,24 +271,24 @@ sub replicate {
my $sorted_volids = [ sort keys %$volumes ];
- $logfunc->("$jobid: guest => $vmid, type => $vmtype, running => $running");
- $logfunc->("$jobid: volumes => " . join(',', @$sorted_volids));
+ $logfunc->("guest => $vmid, type => $vmtype, running => $running");
+ $logfunc->("volumes => " . join(',', @$sorted_volids));
if (my $remove_job = $jobcfg->{remove_job}) {
- $logfunc->("$jobid: start job removal - mode '${remove_job}'");
+ $logfunc->("start job removal - mode '${remove_job}'");
if ($remove_job eq 'full' && $jobcfg->{target} ne $local_node) {
# remove all remote volumes
my $ssh_info = PVE::Cluster::get_ssh_info($jobcfg->{target});
- remote_prepare_local_job($ssh_info, $jobid, $vmid, [], $state->{storeid_list}, 0, undef, 1);
+ remote_prepare_local_job($ssh_info, $jobid, $vmid, [], $state->{storeid_list}, 0, undef, 1, $logfunc);
}
# remove all local replication snapshots (lastsync => 0)
prepare($storecfg, $sorted_volids, $jobid, 0, undef, $logfunc);
delete_job($jobid); # update config
- $logfunc->("$jobid: job removed");
+ $logfunc->("job removed");
return;
}
@@ -286,7 +310,7 @@ sub replicate {
# prepare remote side
my $remote_snapshots = remote_prepare_local_job(
- $ssh_info, $jobid, $vmid, $sorted_volids, $state->{storeid_list}, $last_sync, $parent_snapname, 0);
+ $ssh_info, $jobid, $vmid, $sorted_volids, $state->{storeid_list}, $last_sync, $parent_snapname, 0, $logfunc);
my $storeid_hash = {};
foreach my $volid (@$sorted_volids) {
@@ -297,7 +321,7 @@ sub replicate {
# freeze filesystem for data consistency
if ($qga) {
- $logfunc->("$jobid: freeze guest filesystem");
+ $logfunc->("freeze guest filesystem");
PVE::QemuServer::vm_mon_cmd($vmid, "guest-fsfreeze-freeze");
}
@@ -305,7 +329,7 @@ sub replicate {
my $replicate_snapshots = {};
eval {
foreach my $volid (@$sorted_volids) {
- $logfunc->("$jobid: create snapshot '${sync_snapname}' on $volid");
+ $logfunc->("create snapshot '${sync_snapname}' on $volid");
PVE::Storage::volume_snapshot($storecfg, $volid, $sync_snapname);
$replicate_snapshots->{$volid} = 1;
}
@@ -314,7 +338,7 @@ sub replicate {
# unfreeze immediately
if ($qga) {
- $logfunc->("$jobid: unfreeze guest filesystem");
+ $logfunc->("unfreeze guest filesystem");
eval { PVE::QemuServer::vm_mon_cmd($vmid, "guest-fsfreeze-thaw"); };
warn $@ if $@; # ignore errors here, because we cannot fix it anyways
}
@@ -322,7 +346,7 @@ sub replicate {
my $cleanup_local_snapshots = sub {
my ($volid_hash, $snapname) = @_;
foreach my $volid (sort keys %$volid_hash) {
- $logfunc->("$jobid: delete previous replication snapshot '$snapname' on $volid");
+ $logfunc->("delete previous replication snapshot '$snapname' on $volid");
eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname, $running); };
warn $@ if $@;
}
@@ -344,17 +368,17 @@ sub replicate {
if (defined($last_snapshots->{$volid}) && defined($remote_snapshots->{$volid})) {
if ($last_snapshots->{$volid}->{$last_sync_snapname} &&
$remote_snapshots->{$volid}->{$last_sync_snapname}) {
- $logfunc->("$jobid: incremental sync '$volid' ($last_sync_snapname => $sync_snapname)");
+ $logfunc->("incremental sync '$volid' ($last_sync_snapname => $sync_snapname)");
$base_snapname = $last_sync_snapname;
} elsif (defined($parent_snapname) &&
($last_snapshots->{$volid}->{$parent_snapname} &&
$remote_snapshots->{$volid}->{$parent_snapname})) {
- $logfunc->("$jobid: incremental sync '$volid' ($parent_snapname => $sync_snapname)");
+ $logfunc->("incremental sync '$volid' ($parent_snapname => $sync_snapname)");
$base_snapname = $parent_snapname;
}
}
- $logfunc->("$jobid: full sync '$volid' ($sync_snapname)") if !defined($base_snapname);
+ $logfunc->("full sync '$volid' ($sync_snapname)") if !defined($base_snapname);
replicate_volume($ssh_info, $storecfg, $volid, $base_snapname, $sync_snapname, $rate, $insecure);
}
};
@@ -370,7 +394,7 @@ sub replicate {
# remove old snapshots because they are no longer needed
$cleanup_local_snapshots->($last_snapshots, $last_sync_snapname);
- remote_finalize_local_job($ssh_info, $jobid, $vmid, $sorted_volids, $start_time);
+ remote_finalize_local_job($ssh_info, $jobid, $vmid, $sorted_volids, $start_time, $logfunc);
die $err if $err;
}
@@ -378,6 +402,8 @@ sub replicate {
my $run_replication_nolock = sub {
my ($jobcfg, $iteration, $start_time, $logfunc) = @_;
+ my $jobid = $jobcfg->{id};
+
# we normaly write errors into the state file,
# but we also catch unexpected errors and log them to syslog
# (for examply when there are problems writing the state file)
@@ -395,10 +421,23 @@ my $run_replication_nolock = sub {
PVE::ReplicationState::write_job_state($jobcfg, $state);
- $logfunc->("$jobcfg->{id}: start replication job") if $logfunc;
+ mkdir $replicate_logdir;
+ my $logfile = job_logfile_name($jobid);
+ open(my $logfd, '>', $logfile) ||
+ die "unable to open replication log '$logfile' - $!\n";
+
+ my $logfunc_wrapper = sub {
+ my ($msg) = @_;
+
+ my $ctime = get_log_time();
+ print $logfd "$ctime $jobid: $msg\n";
+ $logfunc->("$ctime $jobid: $msg") if $logfunc;
+ };
+
+ $logfunc_wrapper->("start replication job");
eval {
- replicate($jobcfg, $state, $start_time, $logfunc);
+ replicate($jobcfg, $state, $start_time, $logfunc_wrapper);
};
my $err = $@;
@@ -411,22 +450,19 @@ my $run_replication_nolock = sub {
$state->{fail_count}++;
$state->{error} = "$err";
PVE::ReplicationState::write_job_state($jobcfg, $state);
- my $msg = "$jobcfg->{id}: end replication job with error: $err";
- if ($logfunc) {
- $logfunc->($msg);
- } else {
- warn "$msg\n";
- }
+ $logfunc_wrapper->("end replication job with error: $err");
} else {
- $logfunc->("$jobcfg->{id}: end replication job") if $logfunc;
+ $logfunc_wrapper->("end replication job");
$state->{last_sync} = $start_time;
$state->{fail_count} = 0;
delete $state->{error};
PVE::ReplicationState::write_job_state($jobcfg, $state);
}
+
+ close($logfd);
};
if (my $err = $@) {
- warn "$jobcfg->{id}: got unexpected error - $err";
+ warn "$jobid: got unexpected replication job error - $err";
}
};
diff --git a/bin/test/ReplicationTestEnv.pm b/bin/test/ReplicationTestEnv.pm
index 1359fb47..ce72fa9c 100755
--- a/bin/test/ReplicationTestEnv.pm
+++ b/bin/test/ReplicationTestEnv.pm
@@ -179,7 +179,24 @@ my $mocked_volume_snapshot_delete = sub {
delete $d->{$snap} || die "no such snapshot '$snap' on '$volid'\n";
};
+my $pve_replication_module = Test::MockModule->new('PVE::Replication');
+
+my $mocked_job_logfile_name = sub {
+ my ($jobid) = @_;
+
+ return ".mocked_replication_log_$jobid";
+};
+
+my $mocked_log_time = 0;
+
+my $mocked_get_log_time = sub {
+ return $mocked_log_time;
+};
+
sub setup {
+ $pve_replication_module->mock(job_logfile_name => $mocked_job_logfile_name);
+ $pve_replication_module->mock(get_log_time => $mocked_get_log_time);
+
$pve_storage_module->mock(config => sub { return $mocked_storage_config; });
$pve_storage_module->mock(volume_snapshot_list => $mocked_volume_snapshot_list);
$pve_storage_module->mock(volume_snapshot => $mocked_volume_snapshot);
@@ -249,18 +266,20 @@ my $status;
sub track_jobs {
my ($ctime) = @_;
+ $mocked_log_time = $ctime;
+
my $logmsg = sub {
my ($msg) = @_;
- print "$ctime $msg\n";
- print $logfh "$ctime $msg\n";
+ print "$msg\n";
+ print $logfh "$msg\n";
};
if (!$status) {
$status = PVE::Replication::job_status();
foreach my $jobid (sort keys %$status) {
my $jobcfg = $status->{$jobid};
- $logmsg->("$jobid: new job next_sync => $jobcfg->{next_sync}");
+ $logmsg->("$ctime $jobid: new job next_sync => $jobcfg->{next_sync}");
}
}
@@ -271,7 +290,7 @@ sub track_jobs {
# detect removed jobs
foreach my $jobid (sort keys %$status) {
if (!$new->{$jobid}) {
- $logmsg->("$jobid: vanished job");
+ $logmsg->("$ctime $jobid: vanished job");
}
}
@@ -279,7 +298,7 @@ sub track_jobs {
my $jobcfg = $new->{$jobid};
my $oldcfg = $status->{$jobid};
if (!$oldcfg) {
- $logmsg->("$jobid: new job next_sync => $jobcfg->{next_sync}");
+ $logmsg->("$ctime $jobid: new job next_sync => $jobcfg->{next_sync}");
next; # no old state to compare
} else {
foreach my $k (qw(target guest vmtype next_sync)) {
@@ -288,7 +307,7 @@ sub track_jobs {
$changes .= ', ' if $changes;
$changes .= "$k => $jobcfg->{$k}";
}
- $logmsg->("$jobid: changed config $changes") if $changes;
+ $logmsg->("$ctime $jobid: changed config $changes") if $changes;
}
}
@@ -305,7 +324,7 @@ sub track_jobs {
$changes .= "$k => $value";
}
}
- $logmsg->("$jobid: changed state $changes") if $changes;
+ $logmsg->("$ctime $jobid: changed state $changes") if $changes;
my $old_storeid_list = $oldstate->{storeid_list};
my $storeid_list = $state->{storeid_list};
@@ -321,7 +340,7 @@ sub track_jobs {
$storeid_list_changes = 1;
}
- $logmsg->("$jobid: changed storeid list " . join(',', @$storeid_list))
+ $logmsg->("$ctime $jobid: changed storeid list " . join(',', @$storeid_list))
if $storeid_list_changes;
}
$status = $new;
--
2.11.0
More information about the pve-devel
mailing list