[pve-devel] [RFC ha-manager v2 2/3] implement 'stopped' state
Thomas Lamprecht
t.lamprecht at proxmox.com
Tue Nov 15 11:13:56 CET 2016
In stopped state we do not start a service but we recover it on
fencing.
This can be practical for templates or cold standby guests.
With this we remove some unnecessary state transitions to started
where we'd directly go to request_stop anyway.
Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
src/PVE/HA/Manager.pm | 24 +++++++++++++++++-----
src/PVE/HA/Sim/TestHardware.pm | 4 ++--
src/test/test-basic1/log.expect | 1 -
.../test-relocate-policy-default-group/log.expect | 1 -
src/test/test-relocate-policy1/log.expect | 1 -
src/test/test-relocate-to-inactive-node/log.expect | 1 -
src/test/test-resource-failure1/log.expect | 1 -
src/test/test-resource-failure2/log.expect | 1 -
src/test/test-resource-failure5/log.expect | 1 -
src/test/test-resource-failure6/log.expect | 1 -
10 files changed, 21 insertions(+), 15 deletions(-)
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index e58fc0b..9d333fb 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -290,7 +290,8 @@ my $recover_fenced_service = sub {
# $sd *is normally read-only*, fencing is the exception
$cd->{node} = $sd->{node} = $recovery_node;
- &$change_service_state($self, $sid, 'started', node => $recovery_node);
+ my $new_state = ($cd->{state} eq 'enabled') ? 'started' : 'request_stop';
+ &$change_service_state($self, $sid, $new_state, node => $recovery_node);
} else {
# no possible node found, cannot recover
$haenv->log('err', "recovering service '$sid' from fenced node " .
@@ -379,9 +380,11 @@ sub manage {
# add new service
foreach my $sid (sort keys %$sc) {
next if $ss->{$sid}; # already there
- $haenv->log('info', "adding new service '$sid' on node '$sc->{$sid}->{node}'");
+ my $cd = $sc->{$sid};
+ $haenv->log('info', "adding new service '$sid' on node '$cd->{node}'");
# assume we are running to avoid relocate running service at add
- $ss->{$sid} = { state => 'started', node => $sc->{$sid}->{node},
+ my $state = ($cd->{state} eq 'enabled') ? 'started' : 'request_stop';
+ $ss->{$sid} = { state => $state, node => $cd->{node},
uid => compute_new_uuid('started') };
}
@@ -432,7 +435,8 @@ sub manage {
my $lrm_mode = $sd->{node} ? $lrm_modes->{$sd->{node}} : undef;
# unfreeze
- &$change_service_state($self, $sid, 'started')
+ my $state = ($cd->{state} eq 'enabled') ? 'started' : 'request_stop';
+ &$change_service_state($self, $sid, $state)
if $lrm_mode && $lrm_mode eq 'active';
} elsif ($last_state eq 'error') {
@@ -579,6 +583,16 @@ sub next_state_stopped {
return;
}
+ if ($ns->node_is_offline_delayed($sd->{node})) {
+ &$change_service_state($self, $sid, 'fence');
+ return;
+ }
+
+ if ($cd->{state} eq 'stopped') {
+ # almost the same as 'disabled' state but the service will also get recovered
+ return;
+ }
+
if ($cd->{state} eq 'enabled') {
# simply mark it started, if it's on the wrong node
# next_state_started will fix that for us
@@ -613,7 +627,7 @@ sub next_state_started {
return;
}
- if ($cd->{state} eq 'disabled') {
+ if ($cd->{state} eq 'disabled' || $cd->{state} eq 'stopped') {
&$change_service_state($self, $sid, 'request_stop');
return;
}
diff --git a/src/PVE/HA/Sim/TestHardware.pm b/src/PVE/HA/Sim/TestHardware.pm
index 0c7d6cd..c6ad238 100644
--- a/src/PVE/HA/Sim/TestHardware.pm
+++ b/src/PVE/HA/Sim/TestHardware.pm
@@ -87,7 +87,7 @@ sub log {
# reboot <node>
# shutdown <node>
# restart-lrm <node>
-# service <sid> <enabled|disabled>
+# service <sid> <enabled|disabled|stopped>
# service <sid> <migrate|relocate> <target>
# service <sid> lock/unlock [lockname]
@@ -175,7 +175,7 @@ sub sim_hardware_cmd {
}
} elsif ($cmd eq 'service') {
- if ($action eq 'enabled' || $action eq 'disabled') {
+ if ($action eq 'enabled' || $action eq 'disabled' || $action eq 'stopped') {
$self->set_service_state($sid, $action);
diff --git a/src/test/test-basic1/log.expect b/src/test/test-basic1/log.expect
index c24b41e..0d92240 100644
--- a/src/test/test-basic1/log.expect
+++ b/src/test/test-basic1/log.expect
@@ -16,7 +16,6 @@ info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
info 20 node1/crm: adding new service 'vm:101' on node 'node1'
info 20 node1/crm: adding new service 'vm:102' on node 'node2'
info 20 node1/crm: adding new service 'vm:103' on node 'node3'
-info 20 node1/crm: service 'vm:102': state changed from 'started' to 'request_stop'
info 21 node1/lrm: got lock 'ha_agent_node1_lock'
info 21 node1/lrm: status change wait_for_agent_lock => active
info 21 node1/lrm: starting service vm:101
diff --git a/src/test/test-relocate-policy-default-group/log.expect b/src/test/test-relocate-policy-default-group/log.expect
index a7dd644..694bef6 100644
--- a/src/test/test-relocate-policy-default-group/log.expect
+++ b/src/test/test-relocate-policy-default-group/log.expect
@@ -14,7 +14,6 @@ info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
info 20 node1/crm: adding new service 'fa:130' on node 'node2'
-info 20 node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
info 22 node2/crm: status change wait_for_quorum => slave
info 23 node2/lrm: got lock 'ha_agent_node2_lock'
info 23 node2/lrm: status change wait_for_agent_lock => active
diff --git a/src/test/test-relocate-policy1/log.expect b/src/test/test-relocate-policy1/log.expect
index 0383604..834284b 100644
--- a/src/test/test-relocate-policy1/log.expect
+++ b/src/test/test-relocate-policy1/log.expect
@@ -11,7 +11,6 @@ info 20 node3/lrm: status change startup => wait_for_agent_lock
info 20 node1/crm: got lock 'ha_manager_lock'
info 20 node1/crm: status change wait_for_quorum => master
info 20 node1/crm: adding new service 'fa:130' on node 'node3'
-info 20 node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
info 21 node1/lrm: got lock 'ha_agent_node1_lock'
info 21 node1/lrm: status change wait_for_agent_lock => active
info 21 node1/lrm: starting service vm:100
diff --git a/src/test/test-relocate-to-inactive-node/log.expect b/src/test/test-relocate-to-inactive-node/log.expect
index c5cfffb..62bc555 100644
--- a/src/test/test-relocate-to-inactive-node/log.expect
+++ b/src/test/test-relocate-to-inactive-node/log.expect
@@ -14,7 +14,6 @@ info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
info 20 node1/crm: adding new service 'vm:103' on node 'node3'
-info 20 node1/crm: service 'vm:103': state changed from 'started' to 'request_stop'
info 22 node2/crm: status change wait_for_quorum => slave
info 24 node3/crm: status change wait_for_quorum => slave
info 25 node3/lrm: got lock 'ha_agent_node3_lock'
diff --git a/src/test/test-resource-failure1/log.expect b/src/test/test-resource-failure1/log.expect
index c3170fc..8439778 100644
--- a/src/test/test-resource-failure1/log.expect
+++ b/src/test/test-resource-failure1/log.expect
@@ -14,7 +14,6 @@ info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
info 20 node1/crm: adding new service 'fa:110' on node 'node2'
-info 20 node1/crm: service 'fa:110': state changed from 'started' to 'request_stop'
info 22 node2/crm: status change wait_for_quorum => slave
info 23 node2/lrm: got lock 'ha_agent_node2_lock'
info 23 node2/lrm: status change wait_for_agent_lock => active
diff --git a/src/test/test-resource-failure2/log.expect b/src/test/test-resource-failure2/log.expect
index 278e7aa..66ddc04 100644
--- a/src/test/test-resource-failure2/log.expect
+++ b/src/test/test-resource-failure2/log.expect
@@ -14,7 +14,6 @@ info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
info 20 node1/crm: adding new service 'fa:130' on node 'node2'
-info 20 node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
info 22 node2/crm: status change wait_for_quorum => slave
info 23 node2/lrm: got lock 'ha_agent_node2_lock'
info 23 node2/lrm: status change wait_for_agent_lock => active
diff --git a/src/test/test-resource-failure5/log.expect b/src/test/test-resource-failure5/log.expect
index 807a237..4396691 100644
--- a/src/test/test-resource-failure5/log.expect
+++ b/src/test/test-resource-failure5/log.expect
@@ -14,7 +14,6 @@ info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
info 20 node1/crm: adding new service 'fa:130' on node 'node2'
-info 20 node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
info 22 node2/crm: status change wait_for_quorum => slave
info 23 node2/lrm: got lock 'ha_agent_node2_lock'
info 23 node2/lrm: status change wait_for_agent_lock => active
diff --git a/src/test/test-resource-failure6/log.expect b/src/test/test-resource-failure6/log.expect
index 05a8bbd..5738b82 100644
--- a/src/test/test-resource-failure6/log.expect
+++ b/src/test/test-resource-failure6/log.expect
@@ -14,7 +14,6 @@ info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
info 20 node1/crm: adding new service 'fa:130' on node 'node2'
-info 20 node1/crm: service 'fa:130': state changed from 'started' to 'request_stop'
info 22 node2/crm: status change wait_for_quorum => slave
info 23 node2/lrm: got lock 'ha_agent_node2_lock'
info 23 node2/lrm: status change wait_for_agent_lock => active
--
2.1.4
More information about the pve-devel
mailing list