[pve-devel] [PATCH ha-manager 4/4] TestHardware: call CRM shutdown request and add lock release tests

Thomas Lamprecht t.lamprecht at proxmox.com
Wed Jan 13 15:15:33 CET 2016


This is needed for regression testing a master lock release.

Adds two tests to check a lock release:
 * with no active lrm (test-reboot2)
 * with active lrm and fencing (test-shutdown4)

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
 src/PVE/HA/Sim/TestHardware.pm          | 19 +++++++++++----
 src/test/test-reboot2/README            |  2 ++
 src/test/test-reboot2/cmdlist           |  4 +++
 src/test/test-reboot2/hardware_status   |  5 ++++
 src/test/test-reboot2/log.expect        | 35 +++++++++++++++++++++++++
 src/test/test-reboot2/manager_status    |  1 +
 src/test/test-reboot2/service_config    |  3 +++
 src/test/test-shutdown4/README          |  4 +++
 src/test/test-shutdown4/cmdlist         |  4 +++
 src/test/test-shutdown4/hardware_status |  5 ++++
 src/test/test-shutdown4/log.expect      | 45 +++++++++++++++++++++++++++++++++
 src/test/test-shutdown4/manager_status  |  1 +
 src/test/test-shutdown4/service_config  |  3 +++
 13 files changed, 126 insertions(+), 5 deletions(-)
 create mode 100644 src/test/test-reboot2/README
 create mode 100644 src/test/test-reboot2/cmdlist
 create mode 100644 src/test/test-reboot2/hardware_status
 create mode 100644 src/test/test-reboot2/log.expect
 create mode 100644 src/test/test-reboot2/manager_status
 create mode 100644 src/test/test-reboot2/service_config
 create mode 100644 src/test/test-shutdown4/README
 create mode 100644 src/test/test-shutdown4/cmdlist
 create mode 100644 src/test/test-shutdown4/hardware_status
 create mode 100644 src/test/test-shutdown4/log.expect
 create mode 100644 src/test/test-shutdown4/manager_status
 create mode 100644 src/test/test-shutdown4/service_config

diff --git a/src/PVE/HA/Sim/TestHardware.pm b/src/PVE/HA/Sim/TestHardware.pm
index d7f4efb..8dae5bb 100644
--- a/src/PVE/HA/Sim/TestHardware.pm
+++ b/src/PVE/HA/Sim/TestHardware.pm
@@ -155,6 +155,7 @@ sub sim_hardware_cmd {
 	    $self->write_hardware_status_nolock($cstatus);
 
 	    $d->{lrm}->shutdown_request() if $d->{lrm};
+	    $d->{crm}->shutdown_request() if $d->{crm};
 	} elsif ($cmd eq 'restart-lrm') {
 	    if ($d->{lrm}) {
 		$d->{lrm_restart} = 1;
@@ -216,17 +217,27 @@ sub run {
 	foreach my $node (@nodes) {
 
 	    my $d = $self->{nodes}->{$node};
-	    
+
+	    my $cstatus = $self->read_hardware_status_nolock();
+	    my $nstatus = $cstatus->{$node} || die "no node status for node '$node'";
+	    my $shutdown = $nstatus->{shutdown} || '';
+
 	    if (my $crm = $d->{crm}) {
 
 		$d->{crm_env}->loop_start_hook($self->get_time());
 
-		die "implement me (CRM exit)" if !$crm->do_one_iteration();
+		my $exit_crm = !$crm->do_one_iteration();
 
 		$d->{crm_env}->loop_end_hook();
 
 		my $nodetime = $d->{crm_env}->get_time();
 		$self->{cur_time} = $nodetime if $nodetime > $self->{cur_time};
+
+		if ($exit_crm) {
+		    if (!($shutdown eq 'shutdown' || $shutdown eq 'reboot')) {
+			die "unexpected LRM exit - not implemented"
+		    }
+		}
 	    }
 
 	    if (my $lrm = $d->{lrm}) {
@@ -243,9 +255,7 @@ sub run {
 		if ($exit_lrm) {
 		    $d->{lrm_env}->log('info', "exit (loop end)");
 		    $d->{lrm} = undef;
-		    my $cstatus = $self->read_hardware_status_nolock();
-		    my $nstatus = $cstatus->{$node} || die "no node status for node '$node'";
-		    my $shutdown = $nstatus->{shutdown} || '';
+
 		    if ($d->{lrm_restart}) {
 			die "lrm restart during shutdown - not implemented" if $shutdown;
 			$d->{lrm_restart} = undef;
diff --git a/src/test/test-reboot2/README b/src/test/test-reboot2/README
new file mode 100644
index 0000000..1ea3d05
--- /dev/null
+++ b/src/test/test-reboot2/README
@@ -0,0 +1,2 @@
+This tests if the manager lock gets released on a reboot and another node can
+become the new master faster.
diff --git a/src/test/test-reboot2/cmdlist b/src/test/test-reboot2/cmdlist
new file mode 100644
index 0000000..517a166
--- /dev/null
+++ b/src/test/test-reboot2/cmdlist
@@ -0,0 +1,4 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "reboot node1" ]
+]
diff --git a/src/test/test-reboot2/hardware_status b/src/test/test-reboot2/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-reboot2/hardware_status
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-reboot2/log.expect b/src/test/test-reboot2/log.expect
new file mode 100644
index 0000000..8bfb1be
--- /dev/null
+++ b/src/test/test-reboot2/log.expect
@@ -0,0 +1,35 @@
+info      0     hardware: starting simulation
+info     20      cmdlist: execute power node1 on
+info     20    node1/crm: status change startup => wait_for_quorum
+info     20    node1/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node2 on
+info     20    node2/crm: status change startup => wait_for_quorum
+info     20    node2/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node3 on
+info     20    node3/crm: status change startup => wait_for_quorum
+info     20    node3/lrm: status change startup => wait_for_agent_lock
+info     20    node1/crm: got lock 'ha_manager_lock'
+info     20    node1/crm: status change wait_for_quorum => master
+info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info     20    node1/crm: adding new service 'vm:103' on node 'node2'
+info     22    node2/crm: status change wait_for_quorum => slave
+info     23    node2/lrm: got lock 'ha_agent_node2_lock'
+info     23    node2/lrm: status change wait_for_agent_lock => active
+info     23    node2/lrm: starting service vm:103
+info     23    node2/lrm: service status vm:103 started
+info     24    node3/crm: status change wait_for_quorum => slave
+info    120      cmdlist: execute reboot node1
+info    120    node1/lrm: shutdown LRM, stop all services
+info    120    node1/crm: voluntary release CRM lock
+info    122    node1/lrm: exit (loop end)
+info    122       reboot: execute power node1 off
+info    121    node1/crm: killed by poweroff
+info    122       reboot: execute power node1 on
+info    121    node1/crm: status change startup => wait_for_quorum
+info    122    node1/lrm: status change startup => wait_for_agent_lock
+info    122    node2/crm: got lock 'ha_manager_lock'
+info    122    node2/crm: status change slave => master
+info    140    node1/crm: status change wait_for_quorum => slave
+info    720     hardware: exit simulation - done
diff --git a/src/test/test-reboot2/manager_status b/src/test/test-reboot2/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-reboot2/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-reboot2/service_config b/src/test/test-reboot2/service_config
new file mode 100644
index 0000000..8ea6460
--- /dev/null
+++ b/src/test/test-reboot2/service_config
@@ -0,0 +1,3 @@
+{
+    "vm:103": { "node": "node2", "state": "enabled" }
+}
diff --git a/src/test/test-shutdown4/README b/src/test/test-shutdown4/README
new file mode 100644
index 0000000..e7357f5
--- /dev/null
+++ b/src/test/test-shutdown4/README
@@ -0,0 +1,4 @@
+This tests if the manager lock gets released AND the services from the node with
+the manager lock get cleanly shutdown without changing the state of the service in
+the cluster. That means that the node gets fenced by the new master and the
+service gets restarted.
diff --git a/src/test/test-shutdown4/cmdlist b/src/test/test-shutdown4/cmdlist
new file mode 100644
index 0000000..e84297f
--- /dev/null
+++ b/src/test/test-shutdown4/cmdlist
@@ -0,0 +1,4 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "shutdown node1" ]
+]
diff --git a/src/test/test-shutdown4/hardware_status b/src/test/test-shutdown4/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-shutdown4/hardware_status
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-shutdown4/log.expect b/src/test/test-shutdown4/log.expect
new file mode 100644
index 0000000..15711ab
--- /dev/null
+++ b/src/test/test-shutdown4/log.expect
@@ -0,0 +1,45 @@
+info      0     hardware: starting simulation
+info     20      cmdlist: execute power node1 on
+info     20    node1/crm: status change startup => wait_for_quorum
+info     20    node1/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node2 on
+info     20    node2/crm: status change startup => wait_for_quorum
+info     20    node2/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node3 on
+info     20    node3/crm: status change startup => wait_for_quorum
+info     20    node3/lrm: status change startup => wait_for_agent_lock
+info     20    node1/crm: got lock 'ha_manager_lock'
+info     20    node1/crm: status change wait_for_quorum => master
+info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info     20    node1/crm: adding new service 'vm:100' on node 'node1'
+info     21    node1/lrm: got lock 'ha_agent_node1_lock'
+info     21    node1/lrm: status change wait_for_agent_lock => active
+info     21    node1/lrm: starting service vm:100
+info     21    node1/lrm: service status vm:100 started
+info     22    node2/crm: status change wait_for_quorum => slave
+info     24    node3/crm: status change wait_for_quorum => slave
+info    120      cmdlist: execute shutdown node1
+info    120    node1/lrm: shutdown LRM, stop all services
+info    120    node1/crm: voluntary release CRM lock
+info    121    node1/lrm: stopping service vm:100
+info    121    node1/lrm: service status vm:100 stopped
+info    122    node1/lrm: exit (loop end)
+info    122     shutdown: execute power node1 off
+info    121    node1/crm: killed by poweroff
+info    122    node2/crm: got lock 'ha_manager_lock'
+info    122    node2/crm: status change slave => master
+info    122    node2/crm: node 'node1': state changed from 'online' => 'unknown'
+info    200    node2/crm: service 'vm:100': state changed from 'started' to 'fence' 
+info    200    node2/crm: node 'node1': state changed from 'unknown' => 'fence'
+info    260    node2/crm: got lock 'ha_agent_node1_lock'
+info    260    node2/crm: fencing: acknowleged - got agent lock for node 'node1'
+info    260    node2/crm: node 'node1': state changed from 'fence' => 'unknown'
+info    260    node2/crm: service 'vm:100': state changed from 'fence' to 'stopped' 
+info    280    node2/crm: service 'vm:100': state changed from 'stopped' to 'started'  (node = node2)
+info    281    node2/lrm: got lock 'ha_agent_node2_lock'
+info    281    node2/lrm: status change wait_for_agent_lock => active
+info    281    node2/lrm: starting service vm:100
+info    281    node2/lrm: service status vm:100 started
+info    720     hardware: exit simulation - done
diff --git a/src/test/test-shutdown4/manager_status b/src/test/test-shutdown4/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-shutdown4/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-shutdown4/service_config b/src/test/test-shutdown4/service_config
new file mode 100644
index 0000000..01d6242
--- /dev/null
+++ b/src/test/test-shutdown4/service_config
@@ -0,0 +1,3 @@
+{
+    "vm:100": { "node": "node1", "state": "enabled" }
+}
-- 
2.1.4





More information about the pve-devel mailing list