[pve-devel] [PATCH ha-manager 1/7] test/sim: allow to simulate cfs failures

Thomas Lamprecht t.lamprecht at proxmox.com
Wed Nov 22 11:53:06 CET 2017


Add simulated hardware commands for the cluster file system.

This allows to tell the regression test or simulator system that a
certain nodes calls to methods accessing the CFS should fail, i.e.,
die.
With this we can cover a situation which mainly happen during a
cluster file system update.

For now allow to define if the CFS is read-/writeable (state rw) and
if updates of the CFS (state update) should work or fail.

Add 'can read/write' assertions all over the relevant methods.

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
 src/PVE/HA/Sim/Env.pm                          | 29 ++++++++++++++++++++++++++
 src/PVE/HA/Sim/Hardware.pm                     | 23 ++++++++++++++++++++
 src/test/test-cfs-unavailable1/README          |  1 +
 src/test/test-cfs-unavailable1/cmdlist         |  5 +++++
 src/test/test-cfs-unavailable1/hardware_status |  5 +++++
 src/test/test-cfs-unavailable1/manager_status  |  1 +
 src/test/test-cfs-unavailable1/service_config  |  5 +++++
 7 files changed, 69 insertions(+)
 create mode 100644 src/test/test-cfs-unavailable1/README
 create mode 100644 src/test/test-cfs-unavailable1/cmdlist
 create mode 100644 src/test/test-cfs-unavailable1/hardware_status
 create mode 100644 src/test/test-cfs-unavailable1/manager_status
 create mode 100644 src/test/test-cfs-unavailable1/service_config

diff --git a/src/PVE/HA/Sim/Env.pm b/src/PVE/HA/Sim/Env.pm
index e00b2b1..92c9394 100644
--- a/src/PVE/HA/Sim/Env.pm
+++ b/src/PVE/HA/Sim/Env.pm
@@ -53,6 +53,15 @@ sub hardware {
     return $self->{hardware};
 }
 
+my $assert_cfs_can_rw = sub {
+    my ($self, $emsg) = @_;
+
+    $emsg //= 'cfs connection refused - not mounted?';
+
+    die "$emsg\n"
+	if !$self->{hardware}->get_cfs_state($self->{nodename}, 'rw');
+};
+
 sub sim_get_lock {
     my ($self, $lock_name, $unlock) = @_;
 
@@ -125,6 +134,8 @@ sub sim_get_lock {
 sub read_manager_status {
     my ($self) = @_;
 
+    $assert_cfs_can_rw->($self);
+
     my $filename = "$self->{statusdir}/manager_status";
 
     return PVE::HA::Tools::read_json_from_file($filename, {});
@@ -133,6 +144,8 @@ sub read_manager_status {
 sub write_manager_status {
     my ($self, $status_obj) = @_;
 
+    $assert_cfs_can_rw->($self);
+
     my $filename = "$self->{statusdir}/manager_status";
 
     PVE::HA::Tools::write_json_to_file($filename, $status_obj);
@@ -143,6 +156,8 @@ sub read_lrm_status {
 
     $node = $self->{nodename} if !defined($node);
 
+    $assert_cfs_can_rw->($self);
+
     return $self->{hardware}->read_lrm_status($node);
 }
 
@@ -151,6 +166,8 @@ sub write_lrm_status {
 
     my $node = $self->{nodename};
 
+    $assert_cfs_can_rw->($self);
+
     return $self->{hardware}->write_lrm_status($node, $status_obj);
 }
 
@@ -181,12 +198,16 @@ sub is_node_shutdown {
 sub read_service_config {
     my ($self) = @_;
 
+    $assert_cfs_can_rw->($self);
+
     return $self->{hardware}->read_service_config();
 }
 
 sub read_fence_config {
     my ($self) = @_;
 
+    $assert_cfs_can_rw->($self);
+
     return $self->{hardware}->read_fence_config();
 }
 
@@ -209,6 +230,8 @@ sub exec_fence_agent {
 sub read_group_config {
     my ($self) = @_;
 
+    $assert_cfs_can_rw->($self);
+
     return $self->{hardware}->read_group_config();
 }
 
@@ -216,18 +239,24 @@ sub read_group_config {
 sub steal_service {
     my ($self, $sid, $current_node, $new_node) = @_;
 
+    $assert_cfs_can_rw->($self);
+
     return $self->{hardware}->change_service_location($sid, $current_node, $new_node);
 }
 
 sub queue_crm_commands {
     my ($self, $cmd) = @_;
 
+    $assert_cfs_can_rw->($self);
+
     return $self->{hardware}->queue_crm_commands($cmd);
 }
 
 sub read_crm_commands {
     my ($self) = @_;
 
+    $assert_cfs_can_rw->($self);
+
     return $self->{hardware}->read_crm_commands();
 }
 
diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index 6ba2210..2019d8f 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -494,9 +494,22 @@ sub get_node_info {
     return ($node_info, $quorate);
 }
 
+# helper for Sim/ only
+sub get_cfs_state {
+    my ($self, $node, $state) = @_;
+
+    # TODO: ensure nolock is OK when adding this to RTSim
+    my $cstatus = $self->read_hardware_status_nolock();
+    my $res = $cstatus->{$node}->{cfs}->{$state};
+
+    # we assume default true if not defined
+    return !defined($res) || $res;
+}
+
 # simulate hardware commands
 # power <node> <on|off>
 # network <node> <on|off>
+# cfs <node> <rw|update> <work|fail>
 # reboot <node>
 # shutdown <node>
 # restart-lrm <node>
@@ -539,6 +552,7 @@ sub sim_hardware_cmd {
 		    $d->{crm} = $self->crm_control('start', $d, $lock_fh) if !defined($d->{crm});
 		    $d->{lrm} = $self->lrm_control('start', $d, $lock_fh) if !defined($d->{lrm});
 		    $d->{lrm_restart} = undef;
+		    $cstatus->{$node}->{cfs} = {};
 
 		} else {
 
@@ -572,6 +586,15 @@ sub sim_hardware_cmd {
 
 	    $self->write_hardware_status_nolock($cstatus);
 
+	} elsif ($cmd eq 'cfs') {
+	    die "sim_hardware_cmd: unknown cfs action '$action' for node '$node'"
+		if $action !~ m/^(rw|update)$/;
+	    die "sim_hardware_cmd: unknown cfs command '$target' for '$action' on node '$node'"
+		if $target !~ m/^(work|fail)$/;
+
+	    $cstatus->{$node}->{cfs}->{$action} = $target eq 'work';
+	    $self->write_hardware_status_nolock($cstatus);
+
 	} elsif ($cmd eq 'reboot' || $cmd eq 'shutdown') {
 	    $cstatus->{$node}->{shutdown} = $cmd;
 
diff --git a/src/test/test-cfs-unavailable1/README b/src/test/test-cfs-unavailable1/README
new file mode 100644
index 0000000..ffd526c
--- /dev/null
+++ b/src/test/test-cfs-unavailable1/README
@@ -0,0 +1 @@
+Test a cfs update behavior, e.g., cfs is (temporarily) not read and writeable.
diff --git a/src/test/test-cfs-unavailable1/cmdlist b/src/test/test-cfs-unavailable1/cmdlist
new file mode 100644
index 0000000..34596f1
--- /dev/null
+++ b/src/test/test-cfs-unavailable1/cmdlist
@@ -0,0 +1,5 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "cfs node1 rw fail", "service vm:101 stopped" ],
+    [ "cfs node1 rw work" ]
+]
diff --git a/src/test/test-cfs-unavailable1/hardware_status b/src/test/test-cfs-unavailable1/hardware_status
new file mode 100644
index 0000000..119b81c
--- /dev/null
+++ b/src/test/test-cfs-unavailable1/hardware_status
@@ -0,0 +1,5 @@
+{ 
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
\ No newline at end of file
diff --git a/src/test/test-cfs-unavailable1/manager_status b/src/test/test-cfs-unavailable1/manager_status
new file mode 100644
index 0000000..9e26dfe
--- /dev/null
+++ b/src/test/test-cfs-unavailable1/manager_status
@@ -0,0 +1 @@
+{}
\ No newline at end of file
diff --git a/src/test/test-cfs-unavailable1/service_config b/src/test/test-cfs-unavailable1/service_config
new file mode 100644
index 0000000..0e05ab4
--- /dev/null
+++ b/src/test/test-cfs-unavailable1/service_config
@@ -0,0 +1,5 @@
+{
+    "vm:101": { "node": "node1", "state": "enabled" },
+    "vm:102": { "node": "node2" },
+    "vm:103": { "node": "node3", "state": "enabled" }
+}
\ No newline at end of file
-- 
2.11.0





More information about the pve-devel mailing list