[pve-devel] [PATCH cluster 1/5] corosync: add verify_conf

Stefan Reiter s.reiter at proxmox.com
Wed Nov 20 17:43:02 CET 2019


It does some basic sanity checking, warns the user about encryption
settings and unresolved hostnames, and finally makes sure that all nodes
have the same links configured (as well as comparing the configured
links to specified interfaces, if there are any).

A corosync.conf that has been created and modified strictly through our
API should *always* be valid.

verify_conf is called in 'addnode', warnings and errors are returned via
the API to be displayed in the task log of the node asking to join. If a
verification error occurs, it is handled specially via a "raise" outside
of any lock code that strips extra information from an Exception
instance. This ensures that multi-line formatted errors can be returned.
Warnings are always returned as array, to be printed on the caller.

Includes testing.

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
 data/PVE/API2/ClusterConfig.pm    |  27 +++++++-
 data/PVE/Cluster/Setup.pm         |  20 +++++-
 data/PVE/Corosync.pm              | 103 +++++++++++++++++++++++++++++-
 data/test/corosync_parser_test.pl |  28 ++++++++
 4 files changed, 175 insertions(+), 3 deletions(-)

diff --git a/data/PVE/API2/ClusterConfig.pm b/data/PVE/API2/ClusterConfig.pm
index c426a30..3d778b8 100644
--- a/data/PVE/API2/ClusterConfig.pm
+++ b/data/PVE/API2/ClusterConfig.pm
@@ -3,6 +3,7 @@ package PVE::API2::ClusterConfig;
 use strict;
 use warnings;
 
+use PVE::Exception;
 use PVE::Tools;
 use PVE::SafeSyslog;
 use PVE::RESTHandler;
@@ -219,7 +220,13 @@ __PACKAGE__->register_method ({
 	    },
 	    corosync_conf => {
 		type => 'string',
-	    }
+	    },
+	    warnings => {
+		type => 'array',
+		items => {
+		    type => 'string',
+		},
+	    },
 	},
     },
     code => sub {
@@ -227,11 +234,17 @@ __PACKAGE__->register_method ({
 
 	PVE::Cluster::check_cfs_quorum();
 
+	my $vc_errors;
+	my $vc_warnings;
+
 	my $code = sub {
 	    my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
 	    my $nodelist = PVE::Corosync::nodelist($conf);
 	    my $totem_cfg = PVE::Corosync::totem_config($conf);
 
+	    ($vc_errors, $vc_warnings) = PVE::Corosync::verify_conf($conf);
+	    die if scalar(@$vc_errors);
+
 	    my $name = $param->{node};
 
 	    # ensure we do not reuse an address, that can crash the whole cluster!
@@ -317,11 +330,23 @@ __PACKAGE__->register_method ({
 	};
 
 	$config_change_lock->($code);
+
+	# If vc_errors is set, we died because of verify_conf.
+	# Raise this error, since it contains more information than just a
+	# single-line string.
+	if (defined($vc_errors) && scalar(@$vc_errors)) {
+	    my @combined = ();
+	    push @combined, @$vc_errors, @$vc_warnings;
+	    my %err_hash = map { $_ => $combined[$_] } 0..$#combined;
+	    PVE::Exception::raise("invalid corosync.conf\n", errors => \%err_hash);
+	}
+
 	die $@ if $@;
 
 	my $res = {
 	    corosync_authkey => PVE::Tools::file_get_contents($authfile),
 	    corosync_conf => PVE::Tools::file_get_contents($clusterconf),
+	    warnings => $vc_warnings,
 	};
 
 	return $res;
diff --git a/data/PVE/Cluster/Setup.pm b/data/PVE/Cluster/Setup.pm
index 81e3ef8..5569d6c 100644
--- a/data/PVE/Cluster/Setup.pm
+++ b/data/PVE/Cluster/Setup.pm
@@ -667,7 +667,25 @@ sub join {
     $args->{link1} = $param->{link1} if defined($param->{link1});
 
     print "Request addition of this node\n";
-    my $res = $conn->post("/cluster/config/nodes/$nodename", $args);
+    my $res = eval { $conn->post("/cluster/config/nodes/$nodename", $args); };
+    if ($@) {
+	if ($@->{msg} && $@->{errors}) {
+	    # we received additional info about the error, show the user
+	    $@->{msg} =~ s/^(\n|\s)*|(\n|\s)*$//g;
+	    warn "An error occured on the cluster node: $@->{msg}\n";
+	    foreach my $err (keys %{$@->{errors}}) {
+		warn "* $@->{errors}->{$err}\n"
+	    }
+	    die "Cluster join aborted!\n";
+	}
+	die $@;
+    }
+
+    if (defined($res->{warnings})) {
+	foreach my $warn (@{$res->{warnings}}) {
+	    warn "cluster: $warn\n";
+	}
+    }
 
     print "Join request OK, finishing setup locally\n";
 
diff --git a/data/PVE/Corosync.pm b/data/PVE/Corosync.pm
index f52547b..5973aaf 100644
--- a/data/PVE/Corosync.pm
+++ b/data/PVE/Corosync.pm
@@ -15,6 +15,7 @@ use PVE::Tools;
 use PVE::Tools qw($IPV4RE $IPV6RE);
 
 my $basedir = "/etc/pve";
+our $link_addr_re = qw/^(ring|link)(\d+)_addr$/;
 
 my $conf_array_sections = {
     node => 1,
@@ -294,6 +295,106 @@ sub create_conf {
     return { main => $conf };
 }
 
+# returns (\@errors, \@warnings) to the caller, does *not* 'die' or 'warn'
+# verification was successful if \@errors is empty
+sub verify_conf {
+    my ($conf) = @_;
+
+    my @errors = ();
+    my @warnings = ();
+
+    my $nodelist = nodelist($conf);
+    if (!$nodelist) {
+	push @errors, "no nodes found";
+	return (\@errors, \@warnings);
+    }
+
+    my $totem = $conf->{main}->{totem};
+    if (!$totem) {
+	push @errors, "no totem found";
+	return (\@errors, \@warnings);
+    }
+
+    push @warnings, "warning: authentication/encryption is not explicitly enabled"
+	. " (secauth / crypto_cipher / crypto_hash)"
+	if (!defined($totem->{secauth}) || $totem->{secauth} ne 'on') &&
+	   (!defined($totem->{crypto_cipher}) || $totem->{crypto_cipher} eq 'none');
+
+    my $interfaces = $totem->{interface};
+
+    my $verify_link_ip = sub {
+	my ($key, $link, $node) = @_;
+	my ($resolved_ip, undef) = resolve_hostname_like_corosync($link, $conf);
+	if (defined($resolved_ip)) {
+	    push @warnings, "warning: $key '$link' for node '$node' resolves to"
+		. " '$resolved_ip' - consider replacing it with the currently"
+		. " resolved IP address for stability"
+		if $resolved_ip ne $link;
+	} else {
+	    push @warnings, "warning: unable to resolve $key '$link' for node '$node'"
+	      . " to an IP address according to Corosync's resolve strategy -"
+	      . " cluster could fail on restart!";
+	}
+    };
+
+    my $find_option = sub {
+	my ($node, $num) = @_;
+
+	foreach my $opt (keys %$node) {
+	    next if $opt !~ $link_addr_re;
+	    return ("${1}${2}_addr", $node->{$opt}) if int($2) == $num;
+	}
+
+	return undef;
+    };
+
+    # sort for output order stability
+    my @node_names = sort keys %$nodelist;
+
+    if (%$interfaces) {
+	# if interfaces are defined, *all* links must have a matching interface
+	# definition, and vice versa
+	for my $link (0..1) {
+	    my $have_interface = defined($interfaces->{$link});
+	    foreach my $node (@node_names) {
+		my ($optname, $addr) = $find_option->($nodelist->{$node}, $link);
+		if (defined($optname)) {
+		    $verify_link_ip->($optname, $addr, $node);
+		    push @errors, "node '$node' has '$optname', but there is no"
+				. " interface number $link configured"
+			if !$have_interface;
+		} else {
+		    push @errors, "node '$node' is missing address for interface"
+				. " number $link"
+			if $have_interface;
+		}
+	    }
+	}
+    } else {
+	# without interfaces, only check that links are consistent among nodes
+	for my $link (0..1) {
+	    my $nodes_with_link = {};
+	    foreach my $node (@node_names) {
+		my ($optname, $addr) = $find_option->($nodelist->{$node}, $link);
+		if (defined($optname)) {
+		    $verify_link_ip->($optname, $addr, $node);
+		    $nodes_with_link->{$node} = 1;
+		}
+	    }
+
+	    if (%$nodes_with_link) {
+		foreach my $node (@node_names) {
+		    push @errors, "node '$node' is missing link $link, which"
+				. " is configured on other nodes"
+			if !defined($nodes_with_link->{$node});
+		}
+	    }
+	}
+    }
+
+    return (\@errors, \@warnings);
+}
+
 sub for_all_corosync_addresses {
     my ($corosync_conf, $ip_version, $func) = @_;
 
@@ -304,7 +405,7 @@ sub for_all_corosync_addresses {
     foreach my $node_name (sort keys %$nodelist) {
 	my $node_config = $nodelist->{$node_name};
 	foreach my $node_key (sort keys %$node_config) {
-	    if ($node_key =~ /^(ring|link)\d+_addr$/) {
+	    if ($node_key =~ $link_addr_re) {
 		my $node_address = $node_config->{$node_key};
 
 		my($ip, $version) = resolve_hostname_like_corosync($node_address, $corosync_conf);
diff --git a/data/test/corosync_parser_test.pl b/data/test/corosync_parser_test.pl
index 18ee4f7..42c4853 100755
--- a/data/test/corosync_parser_test.pl
+++ b/data/test/corosync_parser_test.pl
@@ -5,10 +5,35 @@ use lib '..';
 use strict;
 use warnings;
 
+use Test::MockModule;
 use Test::More;
 
 use PVE::Corosync;
 
+my $known_hosts = {
+    "prox1" => "127.0.1.1",
+    "prox1-ring1" => "127.0.2.1",
+    "prox2" => "127.0.1.2",
+    "prox2-ring1" => "127.0.2.2",
+    "prox3" => "127.0.1.3",
+    "prox3-ring1" => "127.0.2.3",
+    "prox4" => "127.0.1.4",
+    "prox4-ring1" => "127.0.2.4",
+};
+
+sub mocked_resolve {
+    my ($hostname) = @_;
+
+    foreach my $host (keys %$known_hosts) {
+	return $known_hosts->{$host} if $hostname eq $host;
+    }
+
+    die "got unknown hostname '$hostname' during mocked resolve_hostname_like_corosync";
+}
+
+my $mocked_corosync_module = new Test::MockModule('PVE::Corosync');
+$mocked_corosync_module->mock('resolve_hostname_like_corosync', \&mocked_resolve);
+
 sub parser_self_check {
     my $cfg_fn = shift;
 
@@ -20,6 +45,9 @@ sub parser_self_check {
 	$raw1 = PVE::Tools::file_get_contents($cfg_fn);
 	$config1 = PVE::Corosync::parse_conf($cfg_fn, $raw1);
 
+	# test verify_config
+	PVE::Corosync::verify_conf($config1);
+
 	# write config
 	$raw2 = PVE::Corosync::write_conf(undef, $config1);
 	# do not actually write cfg, but you can outcomment to do so, e.g. if
-- 
2.20.1





More information about the pve-devel mailing list