[pve-devel] [PATCH ha-manager] release LRM agent lock on graceful shutdown
Thomas Lamprecht
t.lamprecht at proxmox.com
Fri Jan 15 08:27:35 CET 2016
Release the agent lock when we shutdown the LRM and stopped all
services.
Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
src/PVE/HA/Env.pm | 9 +++++++++
src/PVE/HA/Env/PVE2.pm | 11 +++++++++++
src/PVE/HA/LRM.pm | 3 +++
src/PVE/HA/Sim/Env.pm | 13 +++++++++++++
src/test/test-reboot1/log.expect | 1 +
src/test/test-shutdown1/log.expect | 18 +++++++++---------
src/test/test-shutdown2/log.expect | 18 +++++++++---------
src/test/test-shutdown3/log.expect | 18 +++++++++---------
src/test/test-shutdown4/log.expect | 18 +++++++++---------
9 files changed, 73 insertions(+), 36 deletions(-)
diff --git a/src/PVE/HA/Env.pm b/src/PVE/HA/Env.pm
index 74ce545..82ba497 100644
--- a/src/PVE/HA/Env.pm
+++ b/src/PVE/HA/Env.pm
@@ -136,6 +136,15 @@ sub get_ha_agent_lock {
return $self->{plug}->get_ha_agent_lock($node);
}
+# release the respective node agent lock.
+# this should only get called if the nodes LRM gracefully shuts down with
+# all services already cleanly stopped!
+sub release_ha_agent_lock {
+ my ($self) = @_;
+
+ return $self->{plug}->release_ha_agent_lock();
+}
+
# return true when cluster is quorate
sub quorate {
my ($self) = @_;
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index 1920a09..8a36a0b 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -277,6 +277,17 @@ sub get_ha_agent_lock {
return $self->get_pve_lock("ha_agent_${node}_lock");
}
+# release the respective node agent lock.
+# this should only get called if the nodes LRM gracefully shuts down with
+# all services already cleanly stopped!
+sub release_ha_agent_lock {
+ my ($self) = @_;
+
+ my $node = $self->nodename();
+
+ return rmdir("$lockdir/ha_agent_${node}_lock");
+}
+
sub quorate {
my ($self) = @_;
diff --git a/src/PVE/HA/LRM.pm b/src/PVE/HA/LRM.pm
index 8092818..60ee448 100644
--- a/src/PVE/HA/LRM.pm
+++ b/src/PVE/HA/LRM.pm
@@ -295,6 +295,9 @@ sub do_one_iteration {
}
$shutdown = 1;
+
+ # shutdown with all services stopped thus release the lock
+ $haenv->release_ha_agent_lock();
}
}
} else {
diff --git a/src/PVE/HA/Sim/Env.pm b/src/PVE/HA/Sim/Env.pm
index 56cc7f8..20d0de5 100644
--- a/src/PVE/HA/Sim/Env.pm
+++ b/src/PVE/HA/Sim/Env.pm
@@ -239,6 +239,19 @@ sub get_ha_agent_lock {
return $self->sim_get_lock($lck);
}
+
+# release the respective node agent lock.
+# this should only get called if the nodes LRM gracefully shuts down with
+# all services already cleanly stopped!
+sub release_ha_agent_lock {
+ my ($self) = @_;
+
+ my $node = $self->nodename();
+
+ my $lock = $self->get_ha_agent_lock_name($node);
+ return $self->sim_get_lock($lock, 1);
+}
+
# return true when cluster is quorate
sub quorate {
my ($self) = @_;
diff --git a/src/test/test-reboot1/log.expect b/src/test/test-reboot1/log.expect
index 802b57f..12c3fe5 100644
--- a/src/test/test-reboot1/log.expect
+++ b/src/test/test-reboot1/log.expect
@@ -31,6 +31,7 @@ info 126 reboot: execute power node3 on
info 125 node3/crm: status change startup => wait_for_quorum
info 126 node3/lrm: status change startup => wait_for_agent_lock
info 144 node3/crm: status change wait_for_quorum => slave
+info 145 node3/lrm: got lock 'ha_agent_node3_lock'
info 145 node3/lrm: status change wait_for_agent_lock => active
info 145 node3/lrm: starting service vm:103
info 145 node3/lrm: service status vm:103 started
diff --git a/src/test/test-shutdown1/log.expect b/src/test/test-shutdown1/log.expect
index 52a684f..5c063ab 100644
--- a/src/test/test-shutdown1/log.expect
+++ b/src/test/test-shutdown1/log.expect
@@ -30,13 +30,13 @@ info 125 node3/crm: killed by poweroff
info 140 node1/crm: node 'node3': state changed from 'online' => 'unknown'
info 180 node1/crm: service 'vm:103': state changed from 'started' to 'fence'
info 180 node1/crm: node 'node3': state changed from 'unknown' => 'fence'
-info 260 node1/crm: got lock 'ha_agent_node3_lock'
-info 260 node1/crm: fencing: acknowleged - got agent lock for node 'node3'
-info 260 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
-info 260 node1/crm: service 'vm:103': state changed from 'fence' to 'stopped'
-info 280 node1/crm: service 'vm:103': state changed from 'stopped' to 'started' (node = node1)
-info 281 node1/lrm: got lock 'ha_agent_node1_lock'
-info 281 node1/lrm: status change wait_for_agent_lock => active
-info 281 node1/lrm: starting service vm:103
-info 281 node1/lrm: service status vm:103 started
+info 180 node1/crm: got lock 'ha_agent_node3_lock'
+info 180 node1/crm: fencing: acknowleged - got agent lock for node 'node3'
+info 180 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+info 180 node1/crm: service 'vm:103': state changed from 'fence' to 'stopped'
+info 180 node1/crm: service 'vm:103': state changed from 'stopped' to 'started' (node = node1)
+info 181 node1/lrm: got lock 'ha_agent_node1_lock'
+info 181 node1/lrm: status change wait_for_agent_lock => active
+info 181 node1/lrm: starting service vm:103
+info 181 node1/lrm: service status vm:103 started
info 720 hardware: exit simulation - done
diff --git a/src/test/test-shutdown2/log.expect b/src/test/test-shutdown2/log.expect
index c9834a1..b367b64 100644
--- a/src/test/test-shutdown2/log.expect
+++ b/src/test/test-shutdown2/log.expect
@@ -30,15 +30,15 @@ info 125 node3/crm: killed by poweroff
info 140 node1/crm: node 'node3': state changed from 'online' => 'unknown'
info 180 node1/crm: service 'vm:103': state changed from 'started' to 'fence'
info 180 node1/crm: node 'node3': state changed from 'unknown' => 'fence'
-info 260 node1/crm: got lock 'ha_agent_node3_lock'
-info 260 node1/crm: fencing: acknowleged - got agent lock for node 'node3'
-info 260 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
-info 260 node1/crm: service 'vm:103': state changed from 'fence' to 'stopped'
-info 280 node1/crm: service 'vm:103': state changed from 'stopped' to 'started' (node = node1)
-info 281 node1/lrm: got lock 'ha_agent_node1_lock'
-info 281 node1/lrm: status change wait_for_agent_lock => active
-info 281 node1/lrm: starting service vm:103
-info 281 node1/lrm: service status vm:103 started
+info 180 node1/crm: got lock 'ha_agent_node3_lock'
+info 180 node1/crm: fencing: acknowleged - got agent lock for node 'node3'
+info 180 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+info 180 node1/crm: service 'vm:103': state changed from 'fence' to 'stopped'
+info 180 node1/crm: service 'vm:103': state changed from 'stopped' to 'started' (node = node1)
+info 181 node1/lrm: got lock 'ha_agent_node1_lock'
+info 181 node1/lrm: status change wait_for_agent_lock => active
+info 181 node1/lrm: starting service vm:103
+info 181 node1/lrm: service status vm:103 started
info 500 cmdlist: execute power node3 on
info 500 node3/crm: status change startup => wait_for_quorum
info 500 node3/lrm: status change startup => wait_for_agent_lock
diff --git a/src/test/test-shutdown3/log.expect b/src/test/test-shutdown3/log.expect
index 7cbd1bc..559cb4f 100644
--- a/src/test/test-shutdown3/log.expect
+++ b/src/test/test-shutdown3/log.expect
@@ -30,15 +30,15 @@ info 125 node3/crm: killed by poweroff
info 140 node1/crm: node 'node3': state changed from 'online' => 'unknown'
info 180 node1/crm: service 'ct:103': state changed from 'started' to 'fence'
info 180 node1/crm: node 'node3': state changed from 'unknown' => 'fence'
-info 260 node1/crm: got lock 'ha_agent_node3_lock'
-info 260 node1/crm: fencing: acknowleged - got agent lock for node 'node3'
-info 260 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
-info 260 node1/crm: service 'ct:103': state changed from 'fence' to 'stopped'
-info 280 node1/crm: service 'ct:103': state changed from 'stopped' to 'started' (node = node1)
-info 281 node1/lrm: got lock 'ha_agent_node1_lock'
-info 281 node1/lrm: status change wait_for_agent_lock => active
-info 281 node1/lrm: starting service ct:103
-info 281 node1/lrm: service status ct:103 started
+info 180 node1/crm: got lock 'ha_agent_node3_lock'
+info 180 node1/crm: fencing: acknowleged - got agent lock for node 'node3'
+info 180 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+info 180 node1/crm: service 'ct:103': state changed from 'fence' to 'stopped'
+info 180 node1/crm: service 'ct:103': state changed from 'stopped' to 'started' (node = node1)
+info 181 node1/lrm: got lock 'ha_agent_node1_lock'
+info 181 node1/lrm: status change wait_for_agent_lock => active
+info 181 node1/lrm: starting service ct:103
+info 181 node1/lrm: service status ct:103 started
info 500 cmdlist: execute power node3 on
info 500 node3/crm: status change startup => wait_for_quorum
info 500 node3/lrm: status change startup => wait_for_agent_lock
diff --git a/src/test/test-shutdown4/log.expect b/src/test/test-shutdown4/log.expect
index 15711ab..6838632 100644
--- a/src/test/test-shutdown4/log.expect
+++ b/src/test/test-shutdown4/log.expect
@@ -33,13 +33,13 @@ info 122 node2/crm: status change slave => master
info 122 node2/crm: node 'node1': state changed from 'online' => 'unknown'
info 200 node2/crm: service 'vm:100': state changed from 'started' to 'fence'
info 200 node2/crm: node 'node1': state changed from 'unknown' => 'fence'
-info 260 node2/crm: got lock 'ha_agent_node1_lock'
-info 260 node2/crm: fencing: acknowleged - got agent lock for node 'node1'
-info 260 node2/crm: node 'node1': state changed from 'fence' => 'unknown'
-info 260 node2/crm: service 'vm:100': state changed from 'fence' to 'stopped'
-info 280 node2/crm: service 'vm:100': state changed from 'stopped' to 'started' (node = node2)
-info 281 node2/lrm: got lock 'ha_agent_node2_lock'
-info 281 node2/lrm: status change wait_for_agent_lock => active
-info 281 node2/lrm: starting service vm:100
-info 281 node2/lrm: service status vm:100 started
+info 200 node2/crm: got lock 'ha_agent_node1_lock'
+info 200 node2/crm: fencing: acknowleged - got agent lock for node 'node1'
+info 200 node2/crm: node 'node1': state changed from 'fence' => 'unknown'
+info 200 node2/crm: service 'vm:100': state changed from 'fence' to 'stopped'
+info 200 node2/crm: service 'vm:100': state changed from 'stopped' to 'started' (node = node2)
+info 201 node2/lrm: got lock 'ha_agent_node2_lock'
+info 201 node2/lrm: status change wait_for_agent_lock => active
+info 201 node2/lrm: starting service vm:100
+info 201 node2/lrm: service status vm:100 started
info 720 hardware: exit simulation - done
--
2.1.4
More information about the pve-devel
mailing list