[pve-devel] [PATCH ha-manager v2 1/2] add 'no_start' flag for resources

Thomas Lamprecht t.lamprecht at proxmox.com
Thu Nov 3 13:51:04 CET 2016


The new 'no_start' flag is for resources which should or can not be
started.
This is mainly useful for templates but could be also useful for
normal VMs/CTs which should get recovered on a node failure but not
started afterwards.

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
 src/PVE/HA/Config.pm                            |  1 +
 src/PVE/HA/LRM.pm                               |  4 +++
 src/PVE/HA/Resources.pm                         |  8 +++++
 src/PVE/HA/Resources/PVECT.pm                   |  1 +
 src/PVE/HA/Resources/PVEVM.pm                   |  1 +
 src/PVE/HA/Sim/Hardware.pm                      |  1 +
 src/PVE/HA/Sim/Resources.pm                     |  1 +
 src/test/test-service-no_start1/README          |  6 ++++
 src/test/test-service-no_start1/cmdlist         |  5 +++
 src/test/test-service-no_start1/hardware_status |  5 +++
 src/test/test-service-no_start1/log.expect      | 28 +++++++++++++++++
 src/test/test-service-no_start1/manager_status  |  1 +
 src/test/test-service-no_start1/service_config  |  3 ++
 src/test/test-service-no_start2/README          |  5 +++
 src/test/test-service-no_start2/cmdlist         |  5 +++
 src/test/test-service-no_start2/hardware_status |  5 +++
 src/test/test-service-no_start2/log.expect      | 42 +++++++++++++++++++++++++
 src/test/test-service-no_start2/manager_status  |  1 +
 src/test/test-service-no_start2/service_config  |  3 ++
 19 files changed, 126 insertions(+)
 create mode 100644 src/test/test-service-no_start1/README
 create mode 100644 src/test/test-service-no_start1/cmdlist
 create mode 100644 src/test/test-service-no_start1/hardware_status
 create mode 100644 src/test/test-service-no_start1/log.expect
 create mode 100644 src/test/test-service-no_start1/manager_status
 create mode 100644 src/test/test-service-no_start1/service_config
 create mode 100644 src/test/test-service-no_start2/README
 create mode 100644 src/test/test-service-no_start2/cmdlist
 create mode 100644 src/test/test-service-no_start2/hardware_status
 create mode 100644 src/test/test-service-no_start2/log.expect
 create mode 100644 src/test/test-service-no_start2/manager_status
 create mode 100644 src/test/test-service-no_start2/service_config

diff --git a/src/PVE/HA/Config.pm b/src/PVE/HA/Config.pm
index 1802a7d..8322ddd 100644
--- a/src/PVE/HA/Config.pm
+++ b/src/PVE/HA/Config.pm
@@ -98,6 +98,7 @@ sub read_and_check_resources_config {
 	$d->{state} = 'enabled' if !defined($d->{state});
 	$d->{max_restart} = 1 if !defined($d->{max_restart});
 	$d->{max_relocate} = 1 if !defined($d->{max_relocate});
+	$d->{no_start} = 0 if !defined($d->{no_start});
 	if (PVE::HA::Resources->lookup($d->{type})) {
 	    if (my $vmd = $vmlist->{ids}->{$name}) {
 		if (!$vmd) {
diff --git a/src/PVE/HA/LRM.pm b/src/PVE/HA/LRM.pm
index 26c5c89..db73137 100644
--- a/src/PVE/HA/LRM.pm
+++ b/src/PVE/HA/LRM.pm
@@ -673,6 +673,8 @@ sub exec_resource_agent {
 
 	return SUCCESS if $running;
 
+	return SUCCESS if $service_config->{no_start};
+
 	$haenv->log("info", "starting service $sid");
 
 	$plugin->start($haenv, $id);
@@ -691,6 +693,8 @@ sub exec_resource_agent {
 
 	return SUCCESS if !$running;
 
+	return SUCCESS if $service_config->{no_start};
+
 	$haenv->log("info", "stopping service $sid");
 
 	$plugin->shutdown($haenv, $id);
diff --git a/src/PVE/HA/Resources.pm b/src/PVE/HA/Resources.pm
index 96d2f8f..064b5fc 100644
--- a/src/PVE/HA/Resources.pm
+++ b/src/PVE/HA/Resources.pm
@@ -41,6 +41,14 @@ my $defaultData = {
 	    default => 1,
 	    minimum => 0,
 	},
+	no_start => {
+	    type => 'boolean',
+	    optional => '1',
+	    default => '0',
+	    description => 'Flag for marking a resource as not able to start. ' .
+		'This can be useful for Templates or also other resources which '.
+		'should be recovered on failure but not touched otherwise',
+	},
 	comment => {
 	    description => "Description.",
 	    type => 'string',
diff --git a/src/PVE/HA/Resources/PVECT.pm b/src/PVE/HA/Resources/PVECT.pm
index d1312ab..77b3753 100644
--- a/src/PVE/HA/Resources/PVECT.pm
+++ b/src/PVE/HA/Resources/PVECT.pm
@@ -29,6 +29,7 @@ sub options {
 	comment => { optional => 1 },
 	max_restart => { optional => 1 },
 	max_relocate => { optional => 1 },
+	no_start => { optional => 1 },
     };
 }
 
diff --git a/src/PVE/HA/Resources/PVEVM.pm b/src/PVE/HA/Resources/PVEVM.pm
index 55d4368..7a24b88 100644
--- a/src/PVE/HA/Resources/PVEVM.pm
+++ b/src/PVE/HA/Resources/PVEVM.pm
@@ -28,6 +28,7 @@ sub options {
 	comment => { optional => 1 },
 	max_restart => { optional => 1 },
 	max_relocate => { optional => 1 },
+	no_start => { optional => 1 },
     };
 }
 
diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index 383b10e..50bbee3 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -103,6 +103,7 @@ sub read_service_config {
 	$d->{state} = 'disabled' if !$d->{state};
 	$d->{max_restart} = 1 if !defined($d->{max_restart});
 	$d->{max_relocate} = 1 if !defined($d->{max_relocate});
+	$d->{no_start} = 0 if !defined($d->{no_start});
     }
 
     return $conf;
diff --git a/src/PVE/HA/Sim/Resources.pm b/src/PVE/HA/Sim/Resources.pm
index bccc0e6..f09cf7e 100644
--- a/src/PVE/HA/Sim/Resources.pm
+++ b/src/PVE/HA/Sim/Resources.pm
@@ -22,6 +22,7 @@ sub options {
 	comment => { optional => 1 },
 	max_restart => { optional => 1 },
 	max_relocate => { optional => 1 },
+	no_start => { optional => 1 },
     };
 }
 
diff --git a/src/test/test-service-no_start1/README b/src/test/test-service-no_start1/README
new file mode 100644
index 0000000..a5351f6
--- /dev/null
+++ b/src/test/test-service-no_start1/README
@@ -0,0 +1,6 @@
+Test if a simulated template with the no_start flag does not cause errors, for
+this the actual start and stop resource agent methods must not be called.
+
+The template gets simulated by using the VirtFail resource, the configuration
+fa:1501 tells the resource to fail to start and to fail to stop, same behavior
+as a template
diff --git a/src/test/test-service-no_start1/cmdlist b/src/test/test-service-no_start1/cmdlist
new file mode 100644
index 0000000..7b568ef
--- /dev/null
+++ b/src/test/test-service-no_start1/cmdlist
@@ -0,0 +1,5 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "service fa:1501 enabled" ],
+    [ "service fa:1501 disabled" ]
+]
diff --git a/src/test/test-service-no_start1/hardware_status b/src/test/test-service-no_start1/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-service-no_start1/hardware_status
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-service-no_start1/log.expect b/src/test/test-service-no_start1/log.expect
new file mode 100644
index 0000000..f0bff6e
--- /dev/null
+++ b/src/test/test-service-no_start1/log.expect
@@ -0,0 +1,28 @@
+info      0     hardware: starting simulation
+info     20      cmdlist: execute power node1 on
+info     20    node1/crm: status change startup => wait_for_quorum
+info     20    node1/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node2 on
+info     20    node2/crm: status change startup => wait_for_quorum
+info     20    node2/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node3 on
+info     20    node3/crm: status change startup => wait_for_quorum
+info     20    node3/lrm: status change startup => wait_for_agent_lock
+info     20    node1/crm: got lock 'ha_manager_lock'
+info     20    node1/crm: status change wait_for_quorum => master
+info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info     20    node1/crm: adding new service 'fa:1501' on node 'node3'
+info     20    node1/crm: service 'fa:1501': state changed from 'started' to 'request_stop'
+info     22    node2/crm: status change wait_for_quorum => slave
+info     24    node3/crm: status change wait_for_quorum => slave
+info     25    node3/lrm: got lock 'ha_agent_node3_lock'
+info     25    node3/lrm: status change wait_for_agent_lock => active
+info     40    node1/crm: service 'fa:1501': state changed from 'request_stop' to 'stopped'
+info    120      cmdlist: execute service fa:1501 enabled
+info    120    node1/crm: service 'fa:1501': state changed from 'stopped' to 'started'  (node = node3)
+info    220      cmdlist: execute service fa:1501 disabled
+info    220    node1/crm: service 'fa:1501': state changed from 'started' to 'request_stop'
+info    240    node1/crm: service 'fa:1501': state changed from 'request_stop' to 'stopped'
+info    820     hardware: exit simulation - done
diff --git a/src/test/test-service-no_start1/manager_status b/src/test/test-service-no_start1/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-service-no_start1/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-service-no_start1/service_config b/src/test/test-service-no_start1/service_config
new file mode 100644
index 0000000..90b696c
--- /dev/null
+++ b/src/test/test-service-no_start1/service_config
@@ -0,0 +1,3 @@
+{
+    "fa:1501": { "node": "node3", "state": "disabled", "no_start": "1" }
+}
diff --git a/src/test/test-service-no_start2/README b/src/test/test-service-no_start2/README
new file mode 100644
index 0000000..c4fc1ad
--- /dev/null
+++ b/src/test/test-service-no_start2/README
@@ -0,0 +1,5 @@
+Test if a simulated template with the no_start flag recovers if its node fails.
+
+The template gets simulated by using the VirtFail resource, the configuration
+fa:1501 tells the resource to fail to start and to fail to stop, same behavior
+as a template
diff --git a/src/test/test-service-no_start2/cmdlist b/src/test/test-service-no_start2/cmdlist
new file mode 100644
index 0000000..dcff2aa
--- /dev/null
+++ b/src/test/test-service-no_start2/cmdlist
@@ -0,0 +1,5 @@
+[
+    [ "power node1 on", "power node2 on", "power node3 on"],
+    [ "service fa:1501 enabled" ],
+    [ "network node3 off" ]
+]
diff --git a/src/test/test-service-no_start2/hardware_status b/src/test/test-service-no_start2/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-service-no_start2/hardware_status
@@ -0,0 +1,5 @@
+{
+  "node1": { "power": "off", "network": "off" },
+  "node2": { "power": "off", "network": "off" },
+  "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-service-no_start2/log.expect b/src/test/test-service-no_start2/log.expect
new file mode 100644
index 0000000..cd2a686
--- /dev/null
+++ b/src/test/test-service-no_start2/log.expect
@@ -0,0 +1,42 @@
+info      0     hardware: starting simulation
+info     20      cmdlist: execute power node1 on
+info     20    node1/crm: status change startup => wait_for_quorum
+info     20    node1/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node2 on
+info     20    node2/crm: status change startup => wait_for_quorum
+info     20    node2/lrm: status change startup => wait_for_agent_lock
+info     20      cmdlist: execute power node3 on
+info     20    node3/crm: status change startup => wait_for_quorum
+info     20    node3/lrm: status change startup => wait_for_agent_lock
+info     20    node1/crm: got lock 'ha_manager_lock'
+info     20    node1/crm: status change wait_for_quorum => master
+info     20    node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info     20    node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info     20    node1/crm: adding new service 'fa:1501' on node 'node3'
+info     20    node1/crm: service 'fa:1501': state changed from 'started' to 'request_stop'
+info     22    node2/crm: status change wait_for_quorum => slave
+info     24    node3/crm: status change wait_for_quorum => slave
+info     25    node3/lrm: got lock 'ha_agent_node3_lock'
+info     25    node3/lrm: status change wait_for_agent_lock => active
+info     40    node1/crm: service 'fa:1501': state changed from 'request_stop' to 'stopped'
+info    120      cmdlist: execute service fa:1501 enabled
+info    120    node1/crm: service 'fa:1501': state changed from 'stopped' to 'started'  (node = node3)
+info    220      cmdlist: execute network node3 off
+info    220    node1/crm: node 'node3': state changed from 'online' => 'unknown'
+info    224    node3/crm: status change slave => wait_for_quorum
+info    225    node3/lrm: status change active => lost_agent_lock
+info    260    node1/crm: service 'fa:1501': state changed from 'started' to 'fence'
+info    260    node1/crm: node 'node3': state changed from 'unknown' => 'fence'
+info    266     watchdog: execute power node3 off
+info    265    node3/crm: killed by poweroff
+info    266    node3/lrm: killed by poweroff
+info    266     hardware: server 'node3' stopped by poweroff (watchdog)
+info    340    node1/crm: got lock 'ha_agent_node3_lock'
+info    340    node1/crm: fencing: acknowledged - got agent lock for node 'node3'
+info    340    node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+info    340    node1/crm: recover service 'fa:1501' from fenced node 'node3' to node 'node1'
+info    340    node1/crm: service 'fa:1501': state changed from 'fence' to 'started'  (node = node1)
+info    341    node1/lrm: got lock 'ha_agent_node1_lock'
+info    341    node1/lrm: status change wait_for_agent_lock => active
+info    820     hardware: exit simulation - done
diff --git a/src/test/test-service-no_start2/manager_status b/src/test/test-service-no_start2/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-service-no_start2/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-service-no_start2/service_config b/src/test/test-service-no_start2/service_config
new file mode 100644
index 0000000..90b696c
--- /dev/null
+++ b/src/test/test-service-no_start2/service_config
@@ -0,0 +1,3 @@
+{
+    "fa:1501": { "node": "node3", "state": "disabled", "no_start": "1" }
+}
-- 
2.1.4





More information about the pve-devel mailing list