[pve-devel] [PATCH v2 cluster 1/5] corosync: add verify_conf

Stefan Reiter s.reiter at proxmox.com
Wed Dec 4 15:04:35 CET 2019


It does some basic sanity checking, warns the user about encryption
settings and unresolved hostnames, and finally makes sure that all nodes
have the same links configured (as well as comparing the configured
links to specified interfaces, if there are any).

A corosync.conf that has been created and modified strictly through our
API should *always* be valid.

verify_conf is called in 'addnode', warnings and errors are returned via
the API to be displayed in the task log of the node asking to join. If a
verification error occurs, it is handled specially via a "raise" outside
of any lock code that strips extra information from an Exception
instance. This ensures that multi-line formatted errors can be returned.
Warnings are always returned as array, to be printed on the caller.

Includes testing.

Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
 data/PVE/API2/ClusterConfig.pm    |  40 +++++++++++-
 data/PVE/Cluster/Setup.pm         |  23 ++++++-
 data/PVE/Corosync.pm              | 104 +++++++++++++++++++++++++++++-
 data/test/corosync_parser_test.pl |  29 +++++++++
 4 files changed, 193 insertions(+), 3 deletions(-)

diff --git a/data/PVE/API2/ClusterConfig.pm b/data/PVE/API2/ClusterConfig.pm
index c426a30..e05fd55 100644
--- a/data/PVE/API2/ClusterConfig.pm
+++ b/data/PVE/API2/ClusterConfig.pm
@@ -3,6 +3,7 @@ package PVE::API2::ClusterConfig;
 use strict;
 use warnings;
 
+use PVE::Exception;
 use PVE::Tools;
 use PVE::SafeSyslog;
 use PVE::RESTHandler;
@@ -219,7 +220,13 @@ __PACKAGE__->register_method ({
 	    },
 	    corosync_conf => {
 		type => 'string',
-	    }
+	    },
+	    warnings => {
+		type => 'array',
+		items => {
+		    type => 'string',
+		},
+	    },
 	},
     },
     code => sub {
@@ -227,11 +234,17 @@ __PACKAGE__->register_method ({
 
 	PVE::Cluster::check_cfs_quorum();
 
+	my $vc_errors;
+	my $vc_warnings;
+
 	my $code = sub {
 	    my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
 	    my $nodelist = PVE::Corosync::nodelist($conf);
 	    my $totem_cfg = PVE::Corosync::totem_config($conf);
 
+	    ($vc_errors, $vc_warnings) = PVE::Corosync::verify_conf($conf);
+	    die if scalar(@$vc_errors);
+
 	    my $name = $param->{node};
 
 	    # ensure we do not reuse an address, that can crash the whole cluster!
@@ -317,11 +330,36 @@ __PACKAGE__->register_method ({
 	};
 
 	$config_change_lock->($code);
+
+	# If vc_errors is set, we died because of verify_conf.
+	# Raise this error, since it contains more information than just a
+	# single-line string.
+	if (defined($vc_errors) && scalar(@$vc_errors)) {
+	    my $err_hash = {};
+	    my $add_errs = sub {
+		my $type = shift;
+		my @arr = @_;
+		return if !scalar(@arr);
+
+		my %newhash = map { $type . $_ => $arr[$_] } 0..$#arr;
+		$err_hash = {
+		    %$err_hash,
+		    %newhash,
+		};
+	    };
+
+	    $add_errs->("warning", @$vc_warnings);
+	    $add_errs->("error", @$vc_errors);
+
+	    PVE::Exception::raise("invalid corosync.conf\n", errors => $err_hash);
+	}
+
 	die $@ if $@;
 
 	my $res = {
 	    corosync_authkey => PVE::Tools::file_get_contents($authfile),
 	    corosync_conf => PVE::Tools::file_get_contents($clusterconf),
+	    warnings => $vc_warnings,
 	};
 
 	return $res;
diff --git a/data/PVE/Cluster/Setup.pm b/data/PVE/Cluster/Setup.pm
index f863885..b115d45 100644
--- a/data/PVE/Cluster/Setup.pm
+++ b/data/PVE/Cluster/Setup.pm
@@ -685,7 +685,28 @@ sub join {
     $args->{link1} = $param->{link1} if defined($param->{link1});
 
     print "Request addition of this node\n";
-    my $res = $conn->post("/cluster/config/nodes/$nodename", $args);
+    my $res = eval { $conn->post("/cluster/config/nodes/$nodename", $args); };
+    if (my $err = $@) {
+	if (ref($err) && $err->isa('PVE::APIClient::Exception')) {
+	    # we received additional info about the error, show the user
+	    chomp $err->{msg};
+	    warn "An error occured on the cluster node: $err->{msg}\n";
+	    foreach my $key (sort keys %{$err->{errors}}) {
+		my $symbol = ($key =~ m/^warning/) ? '*' : '!';
+		warn "$symbol $err->{errors}->{$key}\n";
+	    }
+
+	    die "Cluster join aborted!\n";
+	}
+
+	die $@;
+    }
+
+    if (defined($res->{warnings})) {
+	foreach my $warn (@{$res->{warnings}}) {
+	    warn "cluster: $warn\n";
+	}
+    }
 
     print "Join request OK, finishing setup locally\n";
 
diff --git a/data/PVE/Corosync.pm b/data/PVE/Corosync.pm
index 3208a6b..d282aed 100644
--- a/data/PVE/Corosync.pm
+++ b/data/PVE/Corosync.pm
@@ -15,6 +15,7 @@ use PVE::Tools;
 use PVE::Tools qw($IPV4RE $IPV6RE);
 
 my $basedir = "/etc/pve";
+our $link_addr_re = qw/^(ring|link)(\d+)_addr$/;
 
 my $conf_array_sections = {
     node => 1,
@@ -292,6 +293,107 @@ sub create_conf {
     return { main => $conf };
 }
 
+# returns (\@errors, \@warnings) to the caller, does *not* 'die' or 'warn'
+# verification was successful if \@errors is empty
+sub verify_conf {
+    my ($conf) = @_;
+
+    my @errors = ();
+    my @warnings = ();
+
+    my $nodelist = nodelist($conf);
+    if (!$nodelist) {
+	push @errors, "no nodes found";
+	return (\@errors, \@warnings);
+    }
+
+    my $totem = $conf->{main}->{totem};
+    if (!$totem) {
+	push @errors, "no totem found";
+	return (\@errors, \@warnings);
+    }
+
+    push @warnings, "warning: authentication/encryption is not explicitly enabled"
+	. " (secauth / crypto_cipher / crypto_hash)"
+	if (!defined($totem->{secauth}) || $totem->{secauth} ne 'on') &&
+	   (!defined($totem->{crypto_cipher}) || $totem->{crypto_cipher} eq 'none');
+
+    my $interfaces = $totem->{interface};
+
+    my $verify_link_ip = sub {
+	my ($key, $link, $node) = @_;
+	my ($resolved_ip, undef) = resolve_hostname_like_corosync($link, $conf);
+	if (defined($resolved_ip)) {
+	    push @warnings, "warning: $key '$link' for node '$node' resolves to"
+		. " '$resolved_ip' - consider replacing it with the currently"
+		. " resolved IP address for stability"
+		if $resolved_ip ne $link;
+	} else {
+	    push @warnings, "warning: unable to resolve $key '$link' for node '$node'"
+	      . " to an IP address according to Corosync's resolve strategy -"
+	      . " cluster could fail on restart!";
+	}
+    };
+
+    # sort for output order stability
+    my @node_names = sort keys %$nodelist;
+
+    my $node_links = {};
+    foreach my $node (@node_names) {
+	my $options = $nodelist->{$node};
+	foreach my $opt (keys %$options) {
+	    next if $opt !~ $link_addr_re;
+	    $node_links->{$node}->{$2} = {
+		name => "${1}${2}_addr",
+		addr => $options->{$opt},
+	    };
+	}
+    }
+
+    if (%$interfaces) {
+	# if interfaces are defined, *all* links must have a matching interface
+	# definition, and vice versa
+	for my $link (0..1) {
+	    my $have_interface = defined($interfaces->{$link});
+	    foreach my $node (@node_names) {
+		my $linkdef = $node_links->{$node}->{$link};
+		if (defined($linkdef)) {
+		    $verify_link_ip->($linkdef->{name}, $linkdef->{addr}, $node);
+		    push @errors, "node '$node' has '$linkdef->{name}', but there"
+				. " is no interface number $link configured"
+			if !$have_interface;
+		} else {
+		    push @errors, "node '$node' is missing address for interface"
+				. " number $link"
+			if $have_interface;
+		}
+	    }
+	}
+    } else {
+	# without interfaces, only check that links are consistent among nodes
+	for my $link (0..1) {
+	    my $nodes_with_link = {};
+	    foreach my $node (@node_names) {
+		my $linkdef = $node_links->{$node}->{$link};
+		if (defined($linkdef)) {
+		    $verify_link_ip->($linkdef->{name}, $linkdef->{addr}, $node);
+		    $nodes_with_link->{$node} = 1;
+		}
+	    }
+
+	    if (%$nodes_with_link) {
+		foreach my $node (@node_names) {
+		    push @errors, "node '$node' is missing link $link, which"
+				. " is configured on other nodes"
+			if !defined($nodes_with_link->{$node});
+		}
+	    }
+	}
+    }
+
+    return (\@errors, \@warnings);
+}
+
 sub for_all_corosync_addresses {
     my ($corosync_conf, $ip_version, $func) = @_;
 
@@ -302,7 +404,7 @@ sub for_all_corosync_addresses {
     foreach my $node_name (sort keys %$nodelist) {
 	my $node_config = $nodelist->{$node_name};
 	foreach my $node_key (sort keys %$node_config) {
-	    if ($node_key =~ /^(ring|link)\d+_addr$/) {
+	    if ($node_key =~ $link_addr_re) {
 		my $node_address = $node_config->{$node_key};
 
 		my($ip, $version) = resolve_hostname_like_corosync($node_address, $corosync_conf);
diff --git a/data/test/corosync_parser_test.pl b/data/test/corosync_parser_test.pl
index 18ee4f7..6999d3a 100755
--- a/data/test/corosync_parser_test.pl
+++ b/data/test/corosync_parser_test.pl
@@ -5,10 +5,35 @@ use lib '..';
 use strict;
 use warnings;
 
+use Test::MockModule;
 use Test::More;
 
 use PVE::Corosync;
 
+my $known_hosts = {
+    "prox1" => "127.0.1.1",
+    "prox1-ring1" => "127.0.2.1",
+    "prox2" => "127.0.1.2",
+    "prox2-ring1" => "127.0.2.2",
+    "prox3" => "127.0.1.3",
+    "prox3-ring1" => "127.0.2.3",
+    "prox4" => "127.0.1.4",
+    "prox4-ring1" => "127.0.2.4",
+};
+
+sub mocked_resolve {
+    my ($hostname) = @_;
+
+    foreach my $host (keys %$known_hosts) {
+	return $known_hosts->{$host} if $hostname eq $host;
+    }
+
+    die "got unknown hostname '$hostname' during mocked resolve_hostname_like_corosync";
+}
+
+my $mocked_corosync_module = new Test::MockModule('PVE::Corosync');
+$mocked_corosync_module->mock('resolve_hostname_like_corosync', \&mocked_resolve);
+
 sub parser_self_check {
     my $cfg_fn = shift;
 
@@ -30,6 +55,10 @@ sub parser_self_check {
 	$config2 = PVE::Corosync::parse_conf(undef, $raw2);
     }; warn $@ if $@;
 
+    # test verify_config
+    my ($err, $warn) = PVE::Corosync::verify_conf($config1);
+    die "verify_conf failed: " . join(" ++ ", @$err) if scalar(@$err);
+
     # do not care for whitespace differences
     delete $config1->{digest};
     delete $config2->{digest};
-- 
2.20.1





More information about the pve-devel mailing list