[pve-devel] [PATCH ha-manager 4/4] TestHardware: call CRM shutdown request and add lock release tests
Thomas Lamprecht
t.lamprecht at proxmox.com
Wed Jan 13 15:15:33 CET 2016
This is needed for regression testing a master lock release.
Adds two tests to check a lock release:
* with no active lrm (test-reboot2)
* with active lrm and fencing (test-shutdown4)
Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
src/PVE/HA/Sim/TestHardware.pm | 19 +++++++++++----
src/test/test-reboot2/README | 2 ++
src/test/test-reboot2/cmdlist | 4 +++
src/test/test-reboot2/hardware_status | 5 ++++
src/test/test-reboot2/log.expect | 35 +++++++++++++++++++++++++
src/test/test-reboot2/manager_status | 1 +
src/test/test-reboot2/service_config | 3 +++
src/test/test-shutdown4/README | 4 +++
src/test/test-shutdown4/cmdlist | 4 +++
src/test/test-shutdown4/hardware_status | 5 ++++
src/test/test-shutdown4/log.expect | 45 +++++++++++++++++++++++++++++++++
src/test/test-shutdown4/manager_status | 1 +
src/test/test-shutdown4/service_config | 3 +++
13 files changed, 126 insertions(+), 5 deletions(-)
create mode 100644 src/test/test-reboot2/README
create mode 100644 src/test/test-reboot2/cmdlist
create mode 100644 src/test/test-reboot2/hardware_status
create mode 100644 src/test/test-reboot2/log.expect
create mode 100644 src/test/test-reboot2/manager_status
create mode 100644 src/test/test-reboot2/service_config
create mode 100644 src/test/test-shutdown4/README
create mode 100644 src/test/test-shutdown4/cmdlist
create mode 100644 src/test/test-shutdown4/hardware_status
create mode 100644 src/test/test-shutdown4/log.expect
create mode 100644 src/test/test-shutdown4/manager_status
create mode 100644 src/test/test-shutdown4/service_config
diff --git a/src/PVE/HA/Sim/TestHardware.pm b/src/PVE/HA/Sim/TestHardware.pm
index d7f4efb..8dae5bb 100644
--- a/src/PVE/HA/Sim/TestHardware.pm
+++ b/src/PVE/HA/Sim/TestHardware.pm
@@ -155,6 +155,7 @@ sub sim_hardware_cmd {
$self->write_hardware_status_nolock($cstatus);
$d->{lrm}->shutdown_request() if $d->{lrm};
+ $d->{crm}->shutdown_request() if $d->{crm};
} elsif ($cmd eq 'restart-lrm') {
if ($d->{lrm}) {
$d->{lrm_restart} = 1;
@@ -216,17 +217,27 @@ sub run {
foreach my $node (@nodes) {
my $d = $self->{nodes}->{$node};
-
+
+ my $cstatus = $self->read_hardware_status_nolock();
+ my $nstatus = $cstatus->{$node} || die "no node status for node '$node'";
+ my $shutdown = $nstatus->{shutdown} || '';
+
if (my $crm = $d->{crm}) {
$d->{crm_env}->loop_start_hook($self->get_time());
- die "implement me (CRM exit)" if !$crm->do_one_iteration();
+ my $exit_crm = !$crm->do_one_iteration();
$d->{crm_env}->loop_end_hook();
my $nodetime = $d->{crm_env}->get_time();
$self->{cur_time} = $nodetime if $nodetime > $self->{cur_time};
+
+ if ($exit_crm) {
+ if (!($shutdown eq 'shutdown' || $shutdown eq 'reboot')) {
+ die "unexpected LRM exit - not implemented"
+ }
+ }
}
if (my $lrm = $d->{lrm}) {
@@ -243,9 +255,7 @@ sub run {
if ($exit_lrm) {
$d->{lrm_env}->log('info', "exit (loop end)");
$d->{lrm} = undef;
- my $cstatus = $self->read_hardware_status_nolock();
- my $nstatus = $cstatus->{$node} || die "no node status for node '$node'";
- my $shutdown = $nstatus->{shutdown} || '';
+
if ($d->{lrm_restart}) {
die "lrm restart during shutdown - not implemented" if $shutdown;
$d->{lrm_restart} = undef;
diff --git a/src/test/test-reboot2/README b/src/test/test-reboot2/README
new file mode 100644
index 0000000..1ea3d05
--- /dev/null
+++ b/src/test/test-reboot2/README
@@ -0,0 +1,2 @@
+This tests if the manager lock gets released on a reboot and another node can
+become the new master faster.
diff --git a/src/test/test-reboot2/cmdlist b/src/test/test-reboot2/cmdlist
new file mode 100644
index 0000000..517a166
--- /dev/null
+++ b/src/test/test-reboot2/cmdlist
@@ -0,0 +1,4 @@
+[
+ [ "power node1 on", "power node2 on", "power node3 on"],
+ [ "reboot node1" ]
+]
diff --git a/src/test/test-reboot2/hardware_status b/src/test/test-reboot2/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-reboot2/hardware_status
@@ -0,0 +1,5 @@
+{
+ "node1": { "power": "off", "network": "off" },
+ "node2": { "power": "off", "network": "off" },
+ "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-reboot2/log.expect b/src/test/test-reboot2/log.expect
new file mode 100644
index 0000000..8bfb1be
--- /dev/null
+++ b/src/test/test-reboot2/log.expect
@@ -0,0 +1,35 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'vm:103' on node 'node2'
+info 22 node2/crm: status change wait_for_quorum => slave
+info 23 node2/lrm: got lock 'ha_agent_node2_lock'
+info 23 node2/lrm: status change wait_for_agent_lock => active
+info 23 node2/lrm: starting service vm:103
+info 23 node2/lrm: service status vm:103 started
+info 24 node3/crm: status change wait_for_quorum => slave
+info 120 cmdlist: execute reboot node1
+info 120 node1/lrm: shutdown LRM, stop all services
+info 120 node1/crm: voluntary release CRM lock
+info 122 node1/lrm: exit (loop end)
+info 122 reboot: execute power node1 off
+info 121 node1/crm: killed by poweroff
+info 122 reboot: execute power node1 on
+info 121 node1/crm: status change startup => wait_for_quorum
+info 122 node1/lrm: status change startup => wait_for_agent_lock
+info 122 node2/crm: got lock 'ha_manager_lock'
+info 122 node2/crm: status change slave => master
+info 140 node1/crm: status change wait_for_quorum => slave
+info 720 hardware: exit simulation - done
diff --git a/src/test/test-reboot2/manager_status b/src/test/test-reboot2/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-reboot2/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-reboot2/service_config b/src/test/test-reboot2/service_config
new file mode 100644
index 0000000..8ea6460
--- /dev/null
+++ b/src/test/test-reboot2/service_config
@@ -0,0 +1,3 @@
+{
+ "vm:103": { "node": "node2", "state": "enabled" }
+}
diff --git a/src/test/test-shutdown4/README b/src/test/test-shutdown4/README
new file mode 100644
index 0000000..e7357f5
--- /dev/null
+++ b/src/test/test-shutdown4/README
@@ -0,0 +1,4 @@
+This tests if the manager lock gets released AND the services from the node with
+the manager lock get cleanly shutdown without changing the state of the service in
+the cluster. That means that the node gets fenced by the new master and the
+service gets restarted.
diff --git a/src/test/test-shutdown4/cmdlist b/src/test/test-shutdown4/cmdlist
new file mode 100644
index 0000000..e84297f
--- /dev/null
+++ b/src/test/test-shutdown4/cmdlist
@@ -0,0 +1,4 @@
+[
+ [ "power node1 on", "power node2 on", "power node3 on"],
+ [ "shutdown node1" ]
+]
diff --git a/src/test/test-shutdown4/hardware_status b/src/test/test-shutdown4/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-shutdown4/hardware_status
@@ -0,0 +1,5 @@
+{
+ "node1": { "power": "off", "network": "off" },
+ "node2": { "power": "off", "network": "off" },
+ "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-shutdown4/log.expect b/src/test/test-shutdown4/log.expect
new file mode 100644
index 0000000..15711ab
--- /dev/null
+++ b/src/test/test-shutdown4/log.expect
@@ -0,0 +1,45 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'vm:100' on node 'node1'
+info 21 node1/lrm: got lock 'ha_agent_node1_lock'
+info 21 node1/lrm: status change wait_for_agent_lock => active
+info 21 node1/lrm: starting service vm:100
+info 21 node1/lrm: service status vm:100 started
+info 22 node2/crm: status change wait_for_quorum => slave
+info 24 node3/crm: status change wait_for_quorum => slave
+info 120 cmdlist: execute shutdown node1
+info 120 node1/lrm: shutdown LRM, stop all services
+info 120 node1/crm: voluntary release CRM lock
+info 121 node1/lrm: stopping service vm:100
+info 121 node1/lrm: service status vm:100 stopped
+info 122 node1/lrm: exit (loop end)
+info 122 shutdown: execute power node1 off
+info 121 node1/crm: killed by poweroff
+info 122 node2/crm: got lock 'ha_manager_lock'
+info 122 node2/crm: status change slave => master
+info 122 node2/crm: node 'node1': state changed from 'online' => 'unknown'
+info 200 node2/crm: service 'vm:100': state changed from 'started' to 'fence'
+info 200 node2/crm: node 'node1': state changed from 'unknown' => 'fence'
+info 260 node2/crm: got lock 'ha_agent_node1_lock'
+info 260 node2/crm: fencing: acknowleged - got agent lock for node 'node1'
+info 260 node2/crm: node 'node1': state changed from 'fence' => 'unknown'
+info 260 node2/crm: service 'vm:100': state changed from 'fence' to 'stopped'
+info 280 node2/crm: service 'vm:100': state changed from 'stopped' to 'started' (node = node2)
+info 281 node2/lrm: got lock 'ha_agent_node2_lock'
+info 281 node2/lrm: status change wait_for_agent_lock => active
+info 281 node2/lrm: starting service vm:100
+info 281 node2/lrm: service status vm:100 started
+info 720 hardware: exit simulation - done
diff --git a/src/test/test-shutdown4/manager_status b/src/test/test-shutdown4/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-shutdown4/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-shutdown4/service_config b/src/test/test-shutdown4/service_config
new file mode 100644
index 0000000..01d6242
--- /dev/null
+++ b/src/test/test-shutdown4/service_config
@@ -0,0 +1,3 @@
+{
+ "vm:100": { "node": "node1", "state": "enabled" }
+}
--
2.1.4
More information about the pve-devel
mailing list