[pve-devel] [RFC pve-ha-manager 3/5] add fence_agent method to HA environment

Wed Nov 11 12:39:03 CET 2015

Add a fence_agent method to the HA environment which controls
the fencing process.
It accepts the following commands:
 * fence - as the name suggests fence the given node
 * process - pick up fence jobs and process the fence status
 * bail_out - called when loosing the manager lock, kills all fence
 	      jobs and resets the status.

The Sim environment used by regression testing and simulator
implements the same behaviour as ever - waiting for the agent lock.
Bailing out and processing fencing give back hard coded true at the
moment.

The PVE2 environment starts a fence job for the node if it hasn't
any and if hardware fencing is configured (currently hard coded to
true).
On (bigger) errors or when no fence device is left/configured it
switches to the old method - waiting for the agent lock.

Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
---
 src/PVE/HA/Env.pm      | 10 +++++++++
 src/PVE/HA/Env/PVE2.pm | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/PVE/HA/Sim/Env.pm  | 23 ++++++++++++++++++++
 3 files changed, 90 insertions(+)

diff --git a/src/PVE/HA/Env.pm b/src/PVE/HA/Env.pm
index 5c7a544..b94018b 100644
--- a/src/PVE/HA/Env.pm
+++ b/src/PVE/HA/Env.pm
@@ -185,6 +185,16 @@ sub exec_resource_agent {
     return $self->{plug}->exec_resource_agent($sid, $service_config, $cmd, @params)
 }
 
+# cmd can be:
+# -) fence: fence a node, and give the job status back
+# -) process: pick up finished jobs
+# -) bailout: called when lost manager lock, kills all remaining fence jobs
+sub fence_agent {
+    my ($self, $cmd, $node) = @_;
+
+    return $self->{plug}->fence_agent($cmd, $node);
+}
+
 # hack to support regression tests
 sub can_fork {
     my ($self) = @_;
diff --git a/src/PVE/HA/Env/PVE2.pm b/src/PVE/HA/Env/PVE2.pm
index 49654a2..4217560 100644
--- a/src/PVE/HA/Env/PVE2.pm
+++ b/src/PVE/HA/Env/PVE2.pm
@@ -492,4 +492,61 @@ sub exec_resource_agent {
     return EUNKNOWN_COMMAND;
 }
 
+my $hardware_fencing = 1; # enable/disable hardware fencing
+
+sub fence_agent {
+    my ($self, $cmd, $node) = @_;
+
+    # setup execution environment
+    $ENV{'PATH'} = '/sbin:/bin:/usr/sbin:/usr/bin';
+
+    if ($cmd eq 'fence') {
+
+	my $success;
+	if ($hardware_fencing) { # replace with fence config variable
+
+	    $success = PVE::HA::Fence::is_node_fenced($node) || 0;
+	    goto error if ($success<0);
+
+	    if ($success) {
+		PVE::HA::Fence::reset($node);
+	    } elsif (!PVE::HA::Fence::has_fencing_job($node)) {
+
+		goto error if (!PVE::HA::Fence::start_fencing($node));
+
+		$self->log('notice', "Started fencing off node '$node'")
+	    }
+
+	} else {
+
+	    $success = $self->get_ha_agent_lock($node);
+
+	}
+
+	return $success;
+
+      error:
+	$self->log("error", "Could not start fencing on node '$node', no device?");
+	$hardware_fencing = 0; # wait for the agent lock
+	return 1 if $self->get_ha_agent_lock($node);
+	return 0;
+
+    } elsif ($cmd eq 'process') {
+
+	# reset pending fence jobs and node states
+	PVE::HA::Fence::proccess_fencing();
+
+    } elsif ($cmd eq 'bailout') {
+
+	# reset pending fence jobs and node states
+	$self->log('notice', "bailing out from running fence jobs");
+	PVE::HA::Fence::bail_out();
+
+    }
+
+    die "implement me (cmd '$cmd')";
+
+    return 0;
+}
+
 1;
diff --git a/src/PVE/HA/Sim/Env.pm b/src/PVE/HA/Sim/Env.pm
index e09444e..7370684 100644
--- a/src/PVE/HA/Sim/Env.pm
+++ b/src/PVE/HA/Sim/Env.pm
@@ -364,4 +364,27 @@ sub exec_resource_agent {
     die "implement me (cmd '$cmd')";
 }
 
+sub fence_agent {
+    my ($self, $cmd, $node) = @_;
+
+    # fixme add test possibility for fencing
+
+    if ($cmd eq 'fence') {
+
+	return $self->get_ha_agent_lock($node);
+
+    } elsif ($cmd eq 'process') {
+
+	return 1;
+
+    } elsif ($cmd eq 'bailout') {
+
+	return 1;
+
+    }
+
+    die "implement me (cmd '$cmd')";
+    return 0;
+}
+
 1;
-- 
2.1.4