[pve-devel] [PATCH ha-manager v2 1/2] add 'no_start' flag for resources
Thomas Lamprecht
t.lamprecht at proxmox.com
Thu Nov 3 13:51:04 CET 2016
The new 'no_start' flag is for resources which should or can not be
started.
This is mainly useful for templates but could be also useful for
normal VMs/CTs which should get recovered on a node failure but not
started afterwards.
Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
src/PVE/HA/Config.pm | 1 +
src/PVE/HA/LRM.pm | 4 +++
src/PVE/HA/Resources.pm | 8 +++++
src/PVE/HA/Resources/PVECT.pm | 1 +
src/PVE/HA/Resources/PVEVM.pm | 1 +
src/PVE/HA/Sim/Hardware.pm | 1 +
src/PVE/HA/Sim/Resources.pm | 1 +
src/test/test-service-no_start1/README | 6 ++++
src/test/test-service-no_start1/cmdlist | 5 +++
src/test/test-service-no_start1/hardware_status | 5 +++
src/test/test-service-no_start1/log.expect | 28 +++++++++++++++++
src/test/test-service-no_start1/manager_status | 1 +
src/test/test-service-no_start1/service_config | 3 ++
src/test/test-service-no_start2/README | 5 +++
src/test/test-service-no_start2/cmdlist | 5 +++
src/test/test-service-no_start2/hardware_status | 5 +++
src/test/test-service-no_start2/log.expect | 42 +++++++++++++++++++++++++
src/test/test-service-no_start2/manager_status | 1 +
src/test/test-service-no_start2/service_config | 3 ++
19 files changed, 126 insertions(+)
create mode 100644 src/test/test-service-no_start1/README
create mode 100644 src/test/test-service-no_start1/cmdlist
create mode 100644 src/test/test-service-no_start1/hardware_status
create mode 100644 src/test/test-service-no_start1/log.expect
create mode 100644 src/test/test-service-no_start1/manager_status
create mode 100644 src/test/test-service-no_start1/service_config
create mode 100644 src/test/test-service-no_start2/README
create mode 100644 src/test/test-service-no_start2/cmdlist
create mode 100644 src/test/test-service-no_start2/hardware_status
create mode 100644 src/test/test-service-no_start2/log.expect
create mode 100644 src/test/test-service-no_start2/manager_status
create mode 100644 src/test/test-service-no_start2/service_config
diff --git a/src/PVE/HA/Config.pm b/src/PVE/HA/Config.pm
index 1802a7d..8322ddd 100644
--- a/src/PVE/HA/Config.pm
+++ b/src/PVE/HA/Config.pm
@@ -98,6 +98,7 @@ sub read_and_check_resources_config {
$d->{state} = 'enabled' if !defined($d->{state});
$d->{max_restart} = 1 if !defined($d->{max_restart});
$d->{max_relocate} = 1 if !defined($d->{max_relocate});
+ $d->{no_start} = 0 if !defined($d->{no_start});
if (PVE::HA::Resources->lookup($d->{type})) {
if (my $vmd = $vmlist->{ids}->{$name}) {
if (!$vmd) {
diff --git a/src/PVE/HA/LRM.pm b/src/PVE/HA/LRM.pm
index 26c5c89..db73137 100644
--- a/src/PVE/HA/LRM.pm
+++ b/src/PVE/HA/LRM.pm
@@ -673,6 +673,8 @@ sub exec_resource_agent {
return SUCCESS if $running;
+ return SUCCESS if $service_config->{no_start};
+
$haenv->log("info", "starting service $sid");
$plugin->start($haenv, $id);
@@ -691,6 +693,8 @@ sub exec_resource_agent {
return SUCCESS if !$running;
+ return SUCCESS if $service_config->{no_start};
+
$haenv->log("info", "stopping service $sid");
$plugin->shutdown($haenv, $id);
diff --git a/src/PVE/HA/Resources.pm b/src/PVE/HA/Resources.pm
index 96d2f8f..064b5fc 100644
--- a/src/PVE/HA/Resources.pm
+++ b/src/PVE/HA/Resources.pm
@@ -41,6 +41,14 @@ my $defaultData = {
default => 1,
minimum => 0,
},
+ no_start => {
+ type => 'boolean',
+ optional => '1',
+ default => '0',
+ description => 'Flag for marking a resource as not able to start. ' .
+ 'This can be useful for Templates or also other resources which '.
+ 'should be recovered on failure but not touched otherwise',
+ },
comment => {
description => "Description.",
type => 'string',
diff --git a/src/PVE/HA/Resources/PVECT.pm b/src/PVE/HA/Resources/PVECT.pm
index d1312ab..77b3753 100644
--- a/src/PVE/HA/Resources/PVECT.pm
+++ b/src/PVE/HA/Resources/PVECT.pm
@@ -29,6 +29,7 @@ sub options {
comment => { optional => 1 },
max_restart => { optional => 1 },
max_relocate => { optional => 1 },
+ no_start => { optional => 1 },
};
}
diff --git a/src/PVE/HA/Resources/PVEVM.pm b/src/PVE/HA/Resources/PVEVM.pm
index 55d4368..7a24b88 100644
--- a/src/PVE/HA/Resources/PVEVM.pm
+++ b/src/PVE/HA/Resources/PVEVM.pm
@@ -28,6 +28,7 @@ sub options {
comment => { optional => 1 },
max_restart => { optional => 1 },
max_relocate => { optional => 1 },
+ no_start => { optional => 1 },
};
}
diff --git a/src/PVE/HA/Sim/Hardware.pm b/src/PVE/HA/Sim/Hardware.pm
index 383b10e..50bbee3 100644
--- a/src/PVE/HA/Sim/Hardware.pm
+++ b/src/PVE/HA/Sim/Hardware.pm
@@ -103,6 +103,7 @@ sub read_service_config {
$d->{state} = 'disabled' if !$d->{state};
$d->{max_restart} = 1 if !defined($d->{max_restart});
$d->{max_relocate} = 1 if !defined($d->{max_relocate});
+ $d->{no_start} = 0 if !defined($d->{no_start});
}
return $conf;
diff --git a/src/PVE/HA/Sim/Resources.pm b/src/PVE/HA/Sim/Resources.pm
index bccc0e6..f09cf7e 100644
--- a/src/PVE/HA/Sim/Resources.pm
+++ b/src/PVE/HA/Sim/Resources.pm
@@ -22,6 +22,7 @@ sub options {
comment => { optional => 1 },
max_restart => { optional => 1 },
max_relocate => { optional => 1 },
+ no_start => { optional => 1 },
};
}
diff --git a/src/test/test-service-no_start1/README b/src/test/test-service-no_start1/README
new file mode 100644
index 0000000..a5351f6
--- /dev/null
+++ b/src/test/test-service-no_start1/README
@@ -0,0 +1,6 @@
+Test if a simulated template with the no_start flag does not cause errors, for
+this the actual start and stop resource agent methods must not be called.
+
+The template gets simulated by using the VirtFail resource, the configuration
+fa:1501 tells the resource to fail to start and to fail to stop, same behavior
+as a template
diff --git a/src/test/test-service-no_start1/cmdlist b/src/test/test-service-no_start1/cmdlist
new file mode 100644
index 0000000..7b568ef
--- /dev/null
+++ b/src/test/test-service-no_start1/cmdlist
@@ -0,0 +1,5 @@
+[
+ [ "power node1 on", "power node2 on", "power node3 on"],
+ [ "service fa:1501 enabled" ],
+ [ "service fa:1501 disabled" ]
+]
diff --git a/src/test/test-service-no_start1/hardware_status b/src/test/test-service-no_start1/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-service-no_start1/hardware_status
@@ -0,0 +1,5 @@
+{
+ "node1": { "power": "off", "network": "off" },
+ "node2": { "power": "off", "network": "off" },
+ "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-service-no_start1/log.expect b/src/test/test-service-no_start1/log.expect
new file mode 100644
index 0000000..f0bff6e
--- /dev/null
+++ b/src/test/test-service-no_start1/log.expect
@@ -0,0 +1,28 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'fa:1501' on node 'node3'
+info 20 node1/crm: service 'fa:1501': state changed from 'started' to 'request_stop'
+info 22 node2/crm: status change wait_for_quorum => slave
+info 24 node3/crm: status change wait_for_quorum => slave
+info 25 node3/lrm: got lock 'ha_agent_node3_lock'
+info 25 node3/lrm: status change wait_for_agent_lock => active
+info 40 node1/crm: service 'fa:1501': state changed from 'request_stop' to 'stopped'
+info 120 cmdlist: execute service fa:1501 enabled
+info 120 node1/crm: service 'fa:1501': state changed from 'stopped' to 'started' (node = node3)
+info 220 cmdlist: execute service fa:1501 disabled
+info 220 node1/crm: service 'fa:1501': state changed from 'started' to 'request_stop'
+info 240 node1/crm: service 'fa:1501': state changed from 'request_stop' to 'stopped'
+info 820 hardware: exit simulation - done
diff --git a/src/test/test-service-no_start1/manager_status b/src/test/test-service-no_start1/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-service-no_start1/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-service-no_start1/service_config b/src/test/test-service-no_start1/service_config
new file mode 100644
index 0000000..90b696c
--- /dev/null
+++ b/src/test/test-service-no_start1/service_config
@@ -0,0 +1,3 @@
+{
+ "fa:1501": { "node": "node3", "state": "disabled", "no_start": "1" }
+}
diff --git a/src/test/test-service-no_start2/README b/src/test/test-service-no_start2/README
new file mode 100644
index 0000000..c4fc1ad
--- /dev/null
+++ b/src/test/test-service-no_start2/README
@@ -0,0 +1,5 @@
+Test if a simulated template with the no_start flag recovers if its node fails.
+
+The template gets simulated by using the VirtFail resource, the configuration
+fa:1501 tells the resource to fail to start and to fail to stop, same behavior
+as a template
diff --git a/src/test/test-service-no_start2/cmdlist b/src/test/test-service-no_start2/cmdlist
new file mode 100644
index 0000000..dcff2aa
--- /dev/null
+++ b/src/test/test-service-no_start2/cmdlist
@@ -0,0 +1,5 @@
+[
+ [ "power node1 on", "power node2 on", "power node3 on"],
+ [ "service fa:1501 enabled" ],
+ [ "network node3 off" ]
+]
diff --git a/src/test/test-service-no_start2/hardware_status b/src/test/test-service-no_start2/hardware_status
new file mode 100644
index 0000000..451beb1
--- /dev/null
+++ b/src/test/test-service-no_start2/hardware_status
@@ -0,0 +1,5 @@
+{
+ "node1": { "power": "off", "network": "off" },
+ "node2": { "power": "off", "network": "off" },
+ "node3": { "power": "off", "network": "off" }
+}
diff --git a/src/test/test-service-no_start2/log.expect b/src/test/test-service-no_start2/log.expect
new file mode 100644
index 0000000..cd2a686
--- /dev/null
+++ b/src/test/test-service-no_start2/log.expect
@@ -0,0 +1,42 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'fa:1501' on node 'node3'
+info 20 node1/crm: service 'fa:1501': state changed from 'started' to 'request_stop'
+info 22 node2/crm: status change wait_for_quorum => slave
+info 24 node3/crm: status change wait_for_quorum => slave
+info 25 node3/lrm: got lock 'ha_agent_node3_lock'
+info 25 node3/lrm: status change wait_for_agent_lock => active
+info 40 node1/crm: service 'fa:1501': state changed from 'request_stop' to 'stopped'
+info 120 cmdlist: execute service fa:1501 enabled
+info 120 node1/crm: service 'fa:1501': state changed from 'stopped' to 'started' (node = node3)
+info 220 cmdlist: execute network node3 off
+info 220 node1/crm: node 'node3': state changed from 'online' => 'unknown'
+info 224 node3/crm: status change slave => wait_for_quorum
+info 225 node3/lrm: status change active => lost_agent_lock
+info 260 node1/crm: service 'fa:1501': state changed from 'started' to 'fence'
+info 260 node1/crm: node 'node3': state changed from 'unknown' => 'fence'
+info 266 watchdog: execute power node3 off
+info 265 node3/crm: killed by poweroff
+info 266 node3/lrm: killed by poweroff
+info 266 hardware: server 'node3' stopped by poweroff (watchdog)
+info 340 node1/crm: got lock 'ha_agent_node3_lock'
+info 340 node1/crm: fencing: acknowledged - got agent lock for node 'node3'
+info 340 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+info 340 node1/crm: recover service 'fa:1501' from fenced node 'node3' to node 'node1'
+info 340 node1/crm: service 'fa:1501': state changed from 'fence' to 'started' (node = node1)
+info 341 node1/lrm: got lock 'ha_agent_node1_lock'
+info 341 node1/lrm: status change wait_for_agent_lock => active
+info 820 hardware: exit simulation - done
diff --git a/src/test/test-service-no_start2/manager_status b/src/test/test-service-no_start2/manager_status
new file mode 100644
index 0000000..0967ef4
--- /dev/null
+++ b/src/test/test-service-no_start2/manager_status
@@ -0,0 +1 @@
+{}
diff --git a/src/test/test-service-no_start2/service_config b/src/test/test-service-no_start2/service_config
new file mode 100644
index 0000000..90b696c
--- /dev/null
+++ b/src/test/test-service-no_start2/service_config
@@ -0,0 +1,3 @@
+{
+ "fa:1501": { "node": "node3", "state": "disabled", "no_start": "1" }
+}
--
2.1.4
More information about the pve-devel
mailing list