[pve-devel] [PATCH ha-manager v2 1/2] add possibility to simulate locks from services

Thomas Lamprecht t.lamprecht at proxmox.com
Mon Sep 12 16:15:24 CEST 2016


In the real PVE2 environment locks can be leftover if a node fails
during an locked action like for example backups.
A left over lock may hinder the service to start on another node,
allow our test environment to simulate such events.

Also add two regression tests without the expected log files.

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---

changes since v1:
* allow locking arbitrary locks, not just one hard coded
* add another test case so we can regression test both behaviours,
  the one were a lock shoul get removed on recovery and the one were
  it mustn't be removed.

 src/PVE/HA/Sim/Hardware.pm                    | 44 +++++++++++++++++++++++++++
 src/PVE/HA/Sim/Resources.pm                   |  5 +++
 src/PVE/HA/Sim/TestHardware.pm                |  9 ++++++
 src/test/test-locked-service1/README          |  3 ++
 src/test/test-locked-service1/cmdlist         |  5 +++
 src/test/test-locked-service1/hardware_status |  5 +++
 src/test/test-locked-service1/manager_status  |  1 +
 src/test/test-locked-service1/service_config  |  3 ++
 src/test/test-locked-service2/README          |  4 +++
 src/test/test-locked-service2/cmdlist         |  5 +++
 src/test/test-locked-service2/hardware_status |  5 +++
 src/test/test-locked-service2/manager_status  |  1 +
 src/test/test-locked-service2/service_config  |  3 ++
 13 files changed, 93 insertions(+)
 create mode 100644 src/test/test-locked-service1/README
 create mode 100644 src/test/test-locked-service1/cmdlist
 create mode 100644 src/test/test-locked-service1/hardware_status
 create mode 100644 src/test/test-locked-service1/manager_status
 create mode 100644 src/test/test-locked-service1/service_config
 create mode 100644 src/test/test-locked-service2/README
 create mode 100644 src/test/test-locked-service2/cmdlist
 create mode 100644 src/test/test-locked-service2/hardware_status
 create mode 100644 src/test/test-locked-service2/manager_status
 create mode 100644 src/test/test-locked-service2/service_config

diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index be1037d..3b078b8 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -194,6 +194,50 @@ sub change_service_location {
     $self->write_service_config($conf);
 }
 
+sub service_has_lock {
+    my ($self, $sid) = @_;
+
+    my $conf = $self->read_service_config();
+
+    die "no such service '$sid'\n" if !$conf->{$sid};
+
+    return $conf->{$sid}->{lock};
+}
+
+sub lock_service {
+    my ($self, $sid, $lock) = @_;
+
+    my $conf = $self->read_service_config();
+
+    die "no such service '$sid'\n" if !$conf->{$sid};
+
+    $conf->{$sid}->{lock} = $lock || 'backup';
+
+    $self->write_service_config($conf);
+
+    return $conf;
+}
+
+sub unlock_service {
+    my ($self, $sid, $lock) = @_;
+
+    my $conf = $self->read_service_config();
+
+    die "no such service '$sid'\n" if !$conf->{$sid};
+
+    die "service '$sid' not locked\n"
+      if !defined($conf->{$sid}->{lock});
+
+    die "found lock '$conf->{$sid}->{lock}' trying to remove '$lock' lock\n"
+      if defined($lock) && $conf->{$sid}->{lock} ne $lock;
+
+    my $removed_lock = delete $conf->{$sid}->{lock};
+
+    $self->write_service_config($conf);
+
+    return $removed_lock;
+}
+
 sub queue_crm_commands_nolock {
     my ($self, $cmd) = @_;
 
diff --git a/src/PVE/HA/Sim/Resources.pm b/src/PVE/HA/Sim/Resources.pm
index 25b034e..ec3d775 100644
--- a/src/PVE/HA/Sim/Resources.pm
+++ b/src/PVE/HA/Sim/Resources.pm
@@ -42,6 +42,11 @@ sub start {
     my $hardware = $haenv->hardware();
     my $ss = $hardware->read_service_status($nodename);
 
+    if ($hardware->service_has_lock("$service_type:$id")) {
+	$haenv->log('err', "service '$service_type:$id' locked, unable to start!");
+	return;
+    }
+
     $haenv->sleep(2);
 
     $ss->{"$service_type:$id"} = 1;
diff --git a/src/PVE/HA/Sim/TestHardware.pm b/src/PVE/HA/Sim/TestHardware.pm
index cfd48e7..0c7d6cd 100644
--- a/src/PVE/HA/Sim/TestHardware.pm
+++ b/src/PVE/HA/Sim/TestHardware.pm
@@ -89,6 +89,7 @@ sub log {
 # restart-lrm <node>
 # service <sid> <enabled|disabled>
 # service <sid> <migrate|relocate> <target>
+# service <sid> lock/unlock [lockname]
 
 sub sim_hardware_cmd {
     my ($self, $cmdstr, $logid) = @_;
@@ -193,6 +194,14 @@ sub sim_hardware_cmd {
 
 		$self->delete_service($sid);
 
+	    } elsif ($action eq 'lock') {
+
+		$self->lock_service($sid, $target);
+
+	    } elsif ($action eq 'unlock') {
+
+		$self->unlock_service($sid, $target);
+
 	    } else {
 		die "sim_hardware_cmd: unknown service action '$action' " .
 		    "- not implemented\n"
diff --git a/src/test/test-locked-service1/README b/src/test/test-locked-service1/README
new file mode 100644
index 0000000..1c4f7ed
--- /dev/null
+++ b/src/test/test-locked-service1/README
@@ -0,0 +1,3 @@
+Test failover after single node network failure and a interrupted backup,
+simulated by a left over lock. The lock should be removed on service recovery as
+the cause it protected vanished after the fencing of node3.
diff --git a/src/test/test-locked-service1/cmdlist b/src/test/test-locked-service1/cmdlist
new file mode 100644
index 0000000..9bdf6bb
--- /dev/null
+++ b/src/test/test-locked-service1/cmdlist
@@ -0,0 +1,5 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "service vm:103 lock" ],
+    [ "network node3 off" ]
+]
diff --git a/src/test/test-locked-service1/hardware_status b/src/test/test-locked-service1/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-locked-service1/hardware_status
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-locked-service1/manager_status b/src/test/test-locked-service1/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-locked-service1/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-locked-service1/service_config b/src/test/test-locked-service1/service_config
new file mode 100644
index 0000000..c6860e7
--- /dev/null
+++ b/src/test/test-locked-service1/service_config
@@ -0,0 +1,3 @@
+{
+    "vm:103": { "node": "node3", "state": "enabled" }
+}
diff --git a/src/test/test-locked-service2/README b/src/test/test-locked-service2/README
new file mode 100644
index 0000000..6680902
--- /dev/null
+++ b/src/test/test-locked-service2/README
@@ -0,0 +1,4 @@
+Test failover after single node network failure and a interrupted snapshot,
+simulated by a left over lock.
+The log should not be deleted automatically, as snapshot are mostly a manual
+triggered activity and need in general a manual cleanup.
diff --git a/src/test/test-locked-service2/cmdlist b/src/test/test-locked-service2/cmdlist
new file mode 100644
index 0000000..80c42e7
--- /dev/null
+++ b/src/test/test-locked-service2/cmdlist
@@ -0,0 +1,5 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "service vm:103 lock snapshot" ],
+    [ "network node3 off" ]
+]
diff --git a/src/test/test-locked-service2/hardware_status b/src/test/test-locked-service2/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-locked-service2/hardware_status
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-locked-service2/manager_status b/src/test/test-locked-service2/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-locked-service2/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-locked-service2/service_config b/src/test/test-locked-service2/service_config
new file mode 100644
index 0000000..c6860e7
--- /dev/null
+++ b/src/test/test-locked-service2/service_config
@@ -0,0 +1,3 @@
+{
+    "vm:103": { "node": "node3", "state": "enabled" }
+}
-- 
2.1.4





More information about the pve-devel mailing list