[pve-devel] [PATCH ha-manager v3 4/6] add possibility to simulate locks from services

Thomas Lamprecht t.lamprecht at proxmox.com
Wed Sep 14 11:29:43 CEST 2016


In the real PVE2 environment locks can be leftover if a node fails
during an locked action like for example backups.
A left over lock may hinder the service to start on another node,
allow our test environment to simulate such events.

Also add two regression tests without the expected log files.

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
 src/PVE/HA/Sim/Hardware.pm                    | 48 +++++++++++++++++++++++++++
 src/PVE/HA/Sim/Resources.pm                   | 23 ++++++++++---
 src/PVE/HA/Sim/TestHardware.pm                |  9 +++++
 src/test/test-locked-service1/README          |  3 ++
 src/test/test-locked-service1/cmdlist         |  5 +++
 src/test/test-locked-service1/hardware_status |  5 +++
 src/test/test-locked-service1/manager_status  |  1 +
 src/test/test-locked-service1/service_config  |  3 ++
 src/test/test-locked-service2/README          |  5 +++
 src/test/test-locked-service2/cmdlist         |  5 +++
 src/test/test-locked-service2/hardware_status |  5 +++
 src/test/test-locked-service2/manager_status  |  1 +
 src/test/test-locked-service2/service_config  |  3 ++
 13 files changed, 112 insertions(+), 4 deletions(-)
 create mode 100644 src/test/test-locked-service1/README
 create mode 100644 src/test/test-locked-service1/cmdlist
 create mode 100644 src/test/test-locked-service1/hardware_status
 create mode 100644 src/test/test-locked-service1/manager_status
 create mode 100644 src/test/test-locked-service1/service_config
 create mode 100644 src/test/test-locked-service2/README
 create mode 100644 src/test/test-locked-service2/cmdlist
 create mode 100644 src/test/test-locked-service2/hardware_status
 create mode 100644 src/test/test-locked-service2/manager_status
 create mode 100644 src/test/test-locked-service2/service_config

diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index be1037d..2c6b8b6 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -194,6 +194,54 @@ sub change_service_location {
     $self->write_service_config($conf);
 }
 
+sub service_has_lock {
+    my ($self, $sid) = @_;
+
+    my $conf = $self->read_service_config();
+
+    die "no such service '$sid'\n" if !$conf->{$sid};
+
+    return $conf->{$sid}->{lock};
+}
+
+sub lock_service {
+    my ($self, $sid, $lock) = @_;
+
+    my $conf = $self->read_service_config();
+
+    die "no such service '$sid'\n" if !$conf->{$sid};
+
+    $conf->{$sid}->{lock} = $lock || 'backup';
+
+    $self->write_service_config($conf);
+
+    return $conf;
+}
+
+sub unlock_service {
+    my ($self, $sid, $lock) = @_;
+
+    my $conf = $self->read_service_config();
+
+    die "no such service '$sid'\n" if !$conf->{$sid};
+
+    if (!defined($conf->{$sid}->{lock})) {
+	warn "service '$sid' not locked\n";
+	return undef;
+    }
+
+    if (defined($lock) && $conf->{$sid}->{lock} ne $lock) {
+	warn "found lock '$conf->{$sid}->{lock}' trying to remove '$lock' lock\n";
+	return undef;
+    }
+
+    my $removed_lock = delete $conf->{$sid}->{lock};
+
+    $self->write_service_config($conf);
+
+    return $removed_lock;
+}
+
 sub queue_crm_commands_nolock {
     my ($self, $cmd) = @_;
 
diff --git a/src/PVE/HA/Sim/Resources.pm b/src/PVE/HA/Sim/Resources.pm
index 25b034e..fe82332 100644
--- a/src/PVE/HA/Sim/Resources.pm
+++ b/src/PVE/HA/Sim/Resources.pm
@@ -37,14 +37,19 @@ sub config_file {
 sub start {
     my ($class, $haenv, $id) = @_;
 
-    my $service_type = $class->type();
+    my $sid = $class->type() . ":$id";
     my $nodename = $haenv->nodename();
     my $hardware = $haenv->hardware();
     my $ss = $hardware->read_service_status($nodename);
 
+    if (my $lock = $hardware->service_has_lock($sid)) {
+	$haenv->log('err', "service '$sid' locked ($lock), unable to start!");
+	return;
+    }
+
     $haenv->sleep(2);
 
-    $ss->{"$service_type:$id"} = 1;
+    $ss->{$sid} = 1;
 
     $hardware->write_service_status($nodename, $ss);
 
@@ -53,14 +58,19 @@ sub start {
 sub shutdown {
     my ($class, $haenv, $id) = @_;
 
-    my $service_type = $class->type();
+    my $sid = $class->type() . ":$id";
     my $nodename = $haenv->nodename();
     my $hardware = $haenv->hardware();
     my $ss = $hardware->read_service_status($nodename);
 
+    if (my $lock = $hardware->service_has_lock($sid)) {
+	$haenv->log('err', "service '$sid' locked ($lock), unable to shutdown!");
+	return;
+    }
+
     $haenv->sleep(2);
 
-    $ss->{"$service_type:$id"} = 0;
+    $ss->{$sid} = 0;
 
     $hardware->write_service_status($nodename, $ss);
 }
@@ -89,6 +99,11 @@ sub migrate {
     my $cmd = $online ? "migrate" : "relocate";
     $haenv->log("info", "service $sid - start $cmd to node '$target'");
 
+    if (my $lock = $hardware->service_has_lock($sid)) {
+	$haenv->log('err', "service '$sid' locked ($lock), unable to $cmd!");
+	return;
+    }
+
     # explicitly shutdown if $online isn't true (relocate)
     if (!$online && $class->check_running($haenv, $id)) {
 	$haenv->log("info", "stopping service $sid (relocate)");
diff --git a/src/PVE/HA/Sim/TestHardware.pm b/src/PVE/HA/Sim/TestHardware.pm
index cfd48e7..0c7d6cd 100644
--- a/src/PVE/HA/Sim/TestHardware.pm
+++ b/src/PVE/HA/Sim/TestHardware.pm
@@ -89,6 +89,7 @@ sub log {
 # restart-lrm <node>
 # service <sid> <enabled|disabled>
 # service <sid> <migrate|relocate> <target>
+# service <sid> lock/unlock [lockname]
 
 sub sim_hardware_cmd {
     my ($self, $cmdstr, $logid) = @_;
@@ -193,6 +194,14 @@ sub sim_hardware_cmd {
 
 		$self->delete_service($sid);
 
+	    } elsif ($action eq 'lock') {
+
+		$self->lock_service($sid, $target);
+
+	    } elsif ($action eq 'unlock') {
+
+		$self->unlock_service($sid, $target);
+
 	    } else {
 		die "sim_hardware_cmd: unknown service action '$action' " .
 		    "- not implemented\n"
diff --git a/src/test/test-locked-service1/README b/src/test/test-locked-service1/README
new file mode 100644
index 0000000..1c4f7ed
--- /dev/null
+++ b/src/test/test-locked-service1/README
@@ -0,0 +1,3 @@
+Test failover after single node network failure and a interrupted backup,
+simulated by a left over lock. The lock should be removed on service recovery as
+the cause it protected vanished after the fencing of node3.
diff --git a/src/test/test-locked-service1/cmdlist b/src/test/test-locked-service1/cmdlist
new file mode 100644
index 0000000..9bdf6bb
--- /dev/null
+++ b/src/test/test-locked-service1/cmdlist
@@ -0,0 +1,5 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "service vm:103 lock" ],
+    [ "network node3 off" ]
+]
diff --git a/src/test/test-locked-service1/hardware_status b/src/test/test-locked-service1/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-locked-service1/hardware_status
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-locked-service1/manager_status b/src/test/test-locked-service1/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-locked-service1/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-locked-service1/service_config b/src/test/test-locked-service1/service_config
new file mode 100644
index 0000000..c6860e7
--- /dev/null
+++ b/src/test/test-locked-service1/service_config
@@ -0,0 +1,3 @@
+{
+    "vm:103": { "node": "node3", "state": "enabled" }
+}
diff --git a/src/test/test-locked-service2/README b/src/test/test-locked-service2/README
new file mode 100644
index 0000000..e185416
--- /dev/null
+++ b/src/test/test-locked-service2/README
@@ -0,0 +1,5 @@
+Test failover after single node network failure and a interrupted snapshot,
+simulated by a left over lock. The lock should not be deleted automatically,
+as snapshot are mostly a manual triggered activity and need in general a manual
+cleanup, thus the service gets placed into error state.
+
diff --git a/src/test/test-locked-service2/cmdlist b/src/test/test-locked-service2/cmdlist
new file mode 100644
index 0000000..80c42e7
--- /dev/null
+++ b/src/test/test-locked-service2/cmdlist
@@ -0,0 +1,5 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "service vm:103 lock snapshot" ],
+    [ "network node3 off" ]
+]
diff --git a/src/test/test-locked-service2/hardware_status b/src/test/test-locked-service2/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-locked-service2/hardware_status
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-locked-service2/manager_status b/src/test/test-locked-service2/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-locked-service2/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-locked-service2/service_config b/src/test/test-locked-service2/service_config
new file mode 100644
index 0000000..c6860e7
--- /dev/null
+++ b/src/test/test-locked-service2/service_config
@@ -0,0 +1,3 @@
+{
+    "vm:103": { "node": "node3", "state": "enabled" }
+}
-- 
2.1.4




More information about the pve-devel mailing list