[pve-devel] [PATCH ha-manager 2/4] resource agents: fix relocate
Thomas Lamprecht
t.lamprecht at proxmox.com
Wed Jan 13 15:15:31 CET 2016
Previously a relocate from a running VM wasn't possible as we always
tried to migrate online, this patch fixes that.
Also only VMs may migrate online for now Container will always get relocated.
This avoids also confusion from the 'migrate (running)' log message
in case of CTs as the always get relocated.
Fixes test test-shutdown3
Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
src/PVE/HA/Env/PVE2.pm | 4 ++-
src/PVE/HA/Manager.pm | 9 ++++--
src/PVE/HA/Resources.pm | 12 +++++++-
src/test/test-shutdown3/log.expect | 58 ++++++++++++++++++++++++++++++++++++++
4 files changed, 79 insertions(+), 4 deletions(-)
create mode 100644 src/test/test-shutdown3/log.expect
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index 2dbe4ad..0741f5a 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -461,9 +461,11 @@ sub exec_resource_agent {
return SUCCESS;
}
+ my $online = ($cmd eq 'migrate') ? 1 : 0;
+
my $oldconfig = $plugin->config_file($vmid, $nodename);
- $plugin->migrate($self, $vmid, $target, 1);
+ $plugin->migrate($self, $vmid, $target, $online);
# something went wrong if old config file is still there
if (-f $oldconfig) {
diff --git a/src/PVE/HA/Manager.pm b/src/PVE/HA/Manager.pm
index eed4676..b0788e7 100644
--- a/src/PVE/HA/Manager.pm
+++ b/src/PVE/HA/Manager.pm
@@ -586,8 +586,13 @@ sub next_state_started {
$cd, $sd->{node}, $try_next);
if ($node && ($sd->{node} ne $node)) {
- $haenv->log('info', "migrate service '$sid' to node '$node' (running)");
- &$change_service_state($self, $sid, 'migrate', node => $sd->{node}, target => $node);
+ if ($cd->{type} eq 'vm') {
+ $haenv->log('info', "migrate service '$sid' to node '$node' (running)");
+ &$change_service_state($self, $sid, 'migrate', node => $sd->{node}, target => $node);
+ } else {
+ $haenv->log('info', "relocate service '$sid' to node '$node'");
+ &$change_service_state($self, $sid, 'relocate', node => $sd->{node}, target => $node);
+ }
} else {
# do nothing
}
diff --git a/src/PVE/HA/Resources.pm b/src/PVE/HA/Resources.pm
index bce396a..1129fcb 100644
--- a/src/PVE/HA/Resources.pm
+++ b/src/PVE/HA/Resources.pm
@@ -219,6 +219,11 @@ sub migrate {
online => $online,
};
+ # explicitly shutdown if $online isn't true (relocate)
+ if (!$online && $class->check_running($id)) {
+ $class->shutdown($haenv, $id);
+ }
+
my $upid = PVE::API2::Qemu->migrate_vm($params);
$haenv->upid_wait($upid);
}
@@ -321,9 +326,14 @@ sub migrate {
node => $nodename,
vmid => $id,
target => $target,
- online => $online,
+ online => 0, # we cannot migrate CT (yet) online, only relocate
};
+ # always relocate container for now
+ if ($class->check_running($id)) {
+ $class->shutdown($haenv, $id);
+ }
+
my $upid = PVE::API2::LXC->migrate_vm($params);
$haenv->upid_wait($upid);
}
diff --git a/src/test/test-shutdown3/log.expect b/src/test/test-shutdown3/log.expect
new file mode 100644
index 0000000..7cbd1bc
--- /dev/null
+++ b/src/test/test-shutdown3/log.expect
@@ -0,0 +1,58 @@
+info 0 hardware: starting simulation
+info 20 cmdlist: execute power node1 on
+info 20 node1/crm: status change startup => wait_for_quorum
+info 20 node1/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node2 on
+info 20 node2/crm: status change startup => wait_for_quorum
+info 20 node2/lrm: status change startup => wait_for_agent_lock
+info 20 cmdlist: execute power node3 on
+info 20 node3/crm: status change startup => wait_for_quorum
+info 20 node3/lrm: status change startup => wait_for_agent_lock
+info 20 node1/crm: got lock 'ha_manager_lock'
+info 20 node1/crm: status change wait_for_quorum => master
+info 20 node1/crm: node 'node1': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node2': state changed from 'unknown' => 'online'
+info 20 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 20 node1/crm: adding new service 'ct:103' on node 'node3'
+info 22 node2/crm: status change wait_for_quorum => slave
+info 24 node3/crm: status change wait_for_quorum => slave
+info 25 node3/lrm: got lock 'ha_agent_node3_lock'
+info 25 node3/lrm: status change wait_for_agent_lock => active
+info 25 node3/lrm: starting service ct:103
+info 25 node3/lrm: service status ct:103 started
+info 120 cmdlist: execute shutdown node3
+info 120 node3/lrm: shutdown LRM, stop all services
+info 125 node3/lrm: stopping service ct:103
+info 125 node3/lrm: service status ct:103 stopped
+info 126 node3/lrm: exit (loop end)
+info 126 shutdown: execute power node3 off
+info 125 node3/crm: killed by poweroff
+info 140 node1/crm: node 'node3': state changed from 'online' => 'unknown'
+info 180 node1/crm: service 'ct:103': state changed from 'started' to 'fence'
+info 180 node1/crm: node 'node3': state changed from 'unknown' => 'fence'
+info 260 node1/crm: got lock 'ha_agent_node3_lock'
+info 260 node1/crm: fencing: acknowleged - got agent lock for node 'node3'
+info 260 node1/crm: node 'node3': state changed from 'fence' => 'unknown'
+info 260 node1/crm: service 'ct:103': state changed from 'fence' to 'stopped'
+info 280 node1/crm: service 'ct:103': state changed from 'stopped' to 'started' (node = node1)
+info 281 node1/lrm: got lock 'ha_agent_node1_lock'
+info 281 node1/lrm: status change wait_for_agent_lock => active
+info 281 node1/lrm: starting service ct:103
+info 281 node1/lrm: service status ct:103 started
+info 500 cmdlist: execute power node3 on
+info 500 node3/crm: status change startup => wait_for_quorum
+info 500 node3/lrm: status change startup => wait_for_agent_lock
+info 500 node1/crm: node 'node3': state changed from 'unknown' => 'online'
+info 500 node1/crm: relocate service 'ct:103' to node 'node3'
+info 500 node1/crm: service 'ct:103': state changed from 'started' to 'relocate' (node = node1, target = node3)
+info 501 node1/lrm: service ct:103 - start relocate to node 'node3'
+info 501 node1/lrm: stopping service ct:103 (relocate)
+info 501 node1/lrm: service status ct:103 stopped
+info 501 node1/lrm: service ct:103 - end relocate to node 'node3'
+info 504 node3/crm: status change wait_for_quorum => slave
+info 520 node1/crm: service 'ct:103': state changed from 'relocate' to 'started' (node = node3)
+info 525 node3/lrm: got lock 'ha_agent_node3_lock'
+info 525 node3/lrm: status change wait_for_agent_lock => active
+info 525 node3/lrm: starting service ct:103
+info 525 node3/lrm: service status ct:103 started
+info 1100 hardware: exit simulation - done
--
2.1.4
More information about the pve-devel
mailing list