[pve-devel] [RFC ha-manager 3/3] always fence nodes on dead LRM
Thomas Lamprecht
t.lamprecht at proxmox.com
Tue Apr 26 10:55:29 CEST 2016
fixes a recovery failure if a node starts up with a dead/broken LRM
but working corosync.
So while its quorate it doesn't do anything but the CRM won't fence
it as our "last_online" timestamp only checks if quorate, not if the
HA manager is actually working.
Can be reproduced with having a active node with services, simply
disable the lrm:
$ systemctl disable pve-ha-lrm
and then reboot.
(this would simulate a broken update/reboot)
So the node gets up again and gains quorum but the LRM does not
start and thus no service gets started/migrated/... fencing is
appropriate for such a situation.
Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
Note the changes in the regression test are a result of a dead LRM
which comes online again, with those changes the LRM needs to update
his status timestamp first befor the CRM marks is at online, this
ensures that the node is fully online and working not that "just"
corosync runs.
src/PVE/HA/Manager.pm | 14 +++++++++-----
src/PVE/HA/NodeStatus.pm | 24 ++++++++++++++++++------
src/test/test-shutdown2/log.expect | 20 ++++++++++----------
src/test/test-shutdown3/log.expect | 24 ++++++++++++------------
4 files changed, 49 insertions(+), 33 deletions(-)
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index 54e99b5..e621584 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -258,9 +258,11 @@ sub read_lrm_status {
my $results = {};
my $modes = {};
+ my $timestamps = {};
foreach my $node (@$nodes) {
my $lrm_status = $haenv->read_lrm_status($node);
$modes->{$node} = $lrm_status->{mode} || 'active';
+ $timestamps->{$node} = $lrm_status->{timestamp} || $haenv->get_time();
foreach my $uid (keys %{$lrm_status->{results}}) {
next if $results->{$uid}; # should not happen
$results->{$uid} = $lrm_status->{results}->{$uid};
@@ -268,7 +270,7 @@ sub read_lrm_status {
}
- return ($results, $modes);
+ return ($results, $modes, $timestamps);
}
# read new crm commands and save them into crm master status
@@ -311,15 +313,17 @@ sub manage {
my ($haenv, $ms, $ns, $ss) = ($self->{haenv}, $self->{ms}, $self->{ns}, $self->{ss});
- $ns->update($haenv->get_node_info());
+ my ($lrm_results, $lrm_modes, $timestamps) = $self->read_lrm_status();
- if (!$ns->node_is_online($haenv->nodename())) {
+ my ($node_info, $quorate) = $haenv->get_node_info();
+
+ $ns->update($node_info, $timestamps);
+
+ if (!($ns->node_is_online($haenv->nodename()) || $quorate)) {
$haenv->log('info', "master seems offline");
return;
}
- my ($lrm_results, $lrm_modes) = $self->read_lrm_status();
-
my $sc = $haenv->read_service_config();
$self->{groups} = $haenv->read_group_config(); # update
diff --git a/src/PVE/HA/NodeStatus.pm b/src/PVE/HA/NodeStatus.pm
index d9ef912..3a54d3c 100644
--- a/src/PVE/HA/NodeStatus.pm
+++ b/src/PVE/HA/NodeStatus.pm
@@ -117,7 +117,7 @@ my $set_node_state = sub {
};
sub update {
- my ($self, $node_info) = @_;
+ my ($self, $node_info, $timestamps) = @_;
my $haenv = $self->{haenv};
@@ -125,20 +125,32 @@ sub update {
my $d = $node_info->{$node};
next if !$d->{online};
- # record last time the node was online (required to implement fence delay)
- $self->{last_online}->{$node} = $haenv->get_time();
-
my $state = $self->get_node_state($node);
+ my $ctime = $haenv->get_time();
+
if ($state eq 'online') {
- # &$set_node_state($self, $node, 'online');
+ # if a node is quorate but its LRM is dead mark it as 'unknown'
+ # to allow a possible needed service recovery
+ if (defined($timestamps->{$node}) && ($ctime - $timestamps->{$node}) >= $fence_delay) {
+ &$set_node_state($self, $node, 'unknown');
+ next;
+ }
} elsif ($state eq 'unknown' || $state eq 'gone') {
- &$set_node_state($self, $node, 'online');
+ # mark new nodes or quorate with active LRM nodes as online
+ if (!defined($timestamps->{$node}) || ($ctime - $timestamps->{$node}) < $fence_delay) {
+ &$set_node_state($self, $node, 'online');
+ } else {
+ next;
+ }
} elsif ($state eq 'fence') {
# do nothing, wait until fenced
} else {
die "detected unknown node state '$state";
}
+
+ # record last time the node was online (required to implement fence delay)
+ $self->{last_online}->{$node} = $ctime;
}
foreach my $node (keys %{$self->{status}}) {
diff --git a/src/test/test-shutdown2/log.expect b/src/test/test-shutdown2/log.expect
index c3fbb07..5a9ad0f 100644
--- a/src/test/test-shutdown2/log.expect
+++ b/src/test/test-shutdown2/log.expect
@@ -43,15 +43,15 @@ info 201 node1/lrm: service status vm:103 started
info 500 cmdlist: execute power node3 on
info 500 node3/crm: status change startup => wait_for_quorum
info 500 node3/lrm: status change startup => wait_for_agent_lock
-info 500 node1/crm: node 'node3': state changed from 'unknown' => 'online'
-info 500 node1/crm: migrate service 'vm:103' to node 'node3' (running)
-info 500 node1/crm: service 'vm:103': state changed from 'started' to 'migrate' (node = node1, target = node3)
-info 501 node1/lrm: service vm:103 - start migrate to node 'node3'
-info 501 node1/lrm: service vm:103 - end migrate to node 'node3'
info 504 node3/crm: status change wait_for_quorum => slave
-info 520 node1/crm: service 'vm:103': state changed from 'migrate' to 'started' (node = node3)
-info 525 node3/lrm: got lock 'ha_agent_node3_lock'
-info 525 node3/lrm: status change wait_for_agent_lock => active
-info 525 node3/lrm: starting service vm:103
-info 525 node3/lrm: service status vm:103 started
+info 520 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 520 node1/crm: migrate service 'vm:103' to node 'node3' (running)
+info 520 node1/crm: service 'vm:103': state changed from 'started' to 'migrate' (node = node1, target = node3)
+info 521 node1/lrm: service vm:103 - start migrate to node 'node3'
+info 521 node1/lrm: service vm:103 - end migrate to node 'node3'
+info 540 node1/crm: service 'vm:103': state changed from 'migrate' to 'started' (node = node3)
+info 545 node3/lrm: got lock 'ha_agent_node3_lock'
+info 545 node3/lrm: status change wait_for_agent_lock => active
+info 545 node3/lrm: starting service vm:103
+info 545 node3/lrm: service status vm:103 started
info 1100 hardware: exit simulation - done
diff --git a/src/test/test-shutdown3/log.expect b/src/test/test-shutdown3/log.expect
index 16d8c4e..86ba22d 100644
--- a/src/test/test-shutdown3/log.expect
+++ b/src/test/test-shutdown3/log.expect
@@ -43,17 +43,17 @@ info 201 node1/lrm: service status ct:103 started
info 500 cmdlist: execute power node3 on
info 500 node3/crm: status change startup => wait_for_quorum
info 500 node3/lrm: status change startup => wait_for_agent_lock
-info 500 node1/crm: node 'node3': state changed from 'unknown' => 'online'
-info 500 node1/crm: relocate service 'ct:103' to node 'node3'
-info 500 node1/crm: service 'ct:103': state changed from 'started' to 'relocate' (node = node1, target = node3)
-info 501 node1/lrm: service ct:103 - start relocate to node 'node3'
-info 501 node1/lrm: stopping service ct:103 (relocate)
-info 501 node1/lrm: service status ct:103 stopped
-info 501 node1/lrm: service ct:103 - end relocate to node 'node3'
info 504 node3/crm: status change wait_for_quorum => slave
-info 520 node1/crm: service 'ct:103': state changed from 'relocate' to 'started' (node = node3)
-info 525 node3/lrm: got lock 'ha_agent_node3_lock'
-info 525 node3/lrm: status change wait_for_agent_lock => active
-info 525 node3/lrm: starting service ct:103
-info 525 node3/lrm: service status ct:103 started
+info 520 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 520 node1/crm: relocate service 'ct:103' to node 'node3'
+info 520 node1/crm: service 'ct:103': state changed from 'started' to 'relocate' (node = node1, target = node3)
+info 521 node1/lrm: service ct:103 - start relocate to node 'node3'
+info 521 node1/lrm: stopping service ct:103 (relocate)
+info 521 node1/lrm: service status ct:103 stopped
+info 521 node1/lrm: service ct:103 - end relocate to node 'node3'
+info 540 node1/crm: service 'ct:103': state changed from 'relocate' to 'started' (node = node3)
+info 545 node3/lrm: got lock 'ha_agent_node3_lock'
+info 545 node3/lrm: status change wait_for_agent_lock => active
+info 545 node3/lrm: starting service ct:103
+info 545 node3/lrm: service status ct:103 started
info 1100 hardware: exit simulation - done
--
2.1.4
More information about the pve-devel
mailing list