[pve-devel] [PATCH v2 cluster 1/5] corosync: add verify_conf

Thomas Lamprecht t.lamprecht at proxmox.com
Thu Dec 12 16:21:36 CET 2019


On 12/4/19 3:04 PM, Stefan Reiter wrote:
> It does some basic sanity checking, warns the user about encryption
> settings and unresolved hostnames, and finally makes sure that all nodes
> have the same links configured (as well as comparing the configured
> links to specified interfaces, if there are any).
> 
> A corosync.conf that has been created and modified strictly through our
> API should *always* be valid.

True, but you also need to stay backward compatible with what our API
produced since 4.0 and what was allowed to be upgraded to 6.0.
Did you check if the possibilities then, with the pve-upgrade checker
were restrictive enough as you're here?

Also, I'd still like to avoid error-ing if the configuration could worked,
but was a bit garbled up.. This is then still seen as regression for a
lot, and pitch forks may be lit and raised, so I'd like to try hard to
not break those setups, even if we can argue that it's not directly
from our API. If it's at least somewhat reasonable it should not be
an error. (did not checked all checks to closely, so you may already be
flexible enough, so this is more asking if you checked :) )

> 
> verify_conf is called in 'addnode', warnings and errors are returned via
> the API to be displayed in the task log of the node asking to join. If a
> verification error occurs, it is handled specially via a "raise" outside
> of any lock code that strips extra information from an Exception
> instance. This ensures that multi-line formatted errors can be returned.
> Warnings are always returned as array, to be printed on the caller.
> 
> Includes testing.
> 
> Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
> ---
>  data/PVE/API2/ClusterConfig.pm    |  40 +++++++++++-
>  data/PVE/Cluster/Setup.pm         |  23 ++++++-
>  data/PVE/Corosync.pm              | 104 +++++++++++++++++++++++++++++-
>  data/test/corosync_parser_test.pl |  29 +++++++++
>  4 files changed, 193 insertions(+), 3 deletions(-)
> 
> diff --git a/data/PVE/API2/ClusterConfig.pm b/data/PVE/API2/ClusterConfig.pm
> index c426a30..e05fd55 100644
> --- a/data/PVE/API2/ClusterConfig.pm
> +++ b/data/PVE/API2/ClusterConfig.pm
> @@ -3,6 +3,7 @@ package PVE::API2::ClusterConfig;
>  use strict;
>  use warnings;
>  
> +use PVE::Exception;
>  use PVE::Tools;
>  use PVE::SafeSyslog;
>  use PVE::RESTHandler;
> @@ -219,7 +220,13 @@ __PACKAGE__->register_method ({
>  	    },
>  	    corosync_conf => {
>  		type => 'string',
> -	    }
> +	    },
> +	    warnings => {
> +		type => 'array',
> +		items => {
> +		    type => 'string',
> +		},
> +	    },
>  	},
>      },
>      code => sub {
> @@ -227,11 +234,17 @@ __PACKAGE__->register_method ({
>  
>  	PVE::Cluster::check_cfs_quorum();
>  
> +	my $vc_errors;
> +	my $vc_warnings;
> +
>  	my $code = sub {
>  	    my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
>  	    my $nodelist = PVE::Corosync::nodelist($conf);
>  	    my $totem_cfg = PVE::Corosync::totem_config($conf);
>  
> +	    ($vc_errors, $vc_warnings) = PVE::Corosync::verify_conf($conf);
> +	    die if scalar(@$vc_errors);
> +
>  	    my $name = $param->{node};
>  
>  	    # ensure we do not reuse an address, that can crash the whole cluster!
> @@ -317,11 +330,36 @@ __PACKAGE__->register_method ({
>  	};
>  
>  	$config_change_lock->($code);
> +
> +	# If vc_errors is set, we died because of verify_conf.
> +	# Raise this error, since it contains more information than just a
> +	# single-line string.
> +	if (defined($vc_errors) && scalar(@$vc_errors)) {
> +	    my $err_hash = {};
> +	    my $add_errs = sub {
> +		my $type = shift;
> +		my @arr = @_;
> +		return if !scalar(@arr);
> +
> +		my %newhash = map { $type . $_ => $arr[$_] } 0..$#arr;
> +		$err_hash = {
> +		    %$err_hash,
> +		    %newhash,
> +		};
> +	    };
> +
> +	    $add_errs->("warning", @$vc_warnings);
> +	    $add_errs->("error", @$vc_errors);
> +
> +	    PVE::Exception::raise("invalid corosync.conf\n", errors => $err_hash);
> +	}
> +
>  	die $@ if $@;
>  
>  	my $res = {
>  	    corosync_authkey => PVE::Tools::file_get_contents($authfile),
>  	    corosync_conf => PVE::Tools::file_get_contents($clusterconf),
> +	    warnings => $vc_warnings,
>  	};
>  
>  	return $res;
> diff --git a/data/PVE/Cluster/Setup.pm b/data/PVE/Cluster/Setup.pm
> index f863885..b115d45 100644
> --- a/data/PVE/Cluster/Setup.pm
> +++ b/data/PVE/Cluster/Setup.pm
> @@ -685,7 +685,28 @@ sub join {
>      $args->{link1} = $param->{link1} if defined($param->{link1});
>  
>      print "Request addition of this node\n";
> -    my $res = $conn->post("/cluster/config/nodes/$nodename", $args);
> +    my $res = eval { $conn->post("/cluster/config/nodes/$nodename", $args); };
> +    if (my $err = $@) {
> +	if (ref($err) && $err->isa('PVE::APIClient::Exception')) {
> +	    # we received additional info about the error, show the user
> +	    chomp $err->{msg};
> +	    warn "An error occured on the cluster node: $err->{msg}\n";
> +	    foreach my $key (sort keys %{$err->{errors}}) {
> +		my $symbol = ($key =~ m/^warning/) ? '*' : '!';
> +		warn "$symbol $err->{errors}->{$key}\n";
> +	    }
> +
> +	    die "Cluster join aborted!\n";
> +	}
> +
> +	die $@;
> +    }
> +
> +    if (defined($res->{warnings})) {
> +	foreach my $warn (@{$res->{warnings}}) {
> +	    warn "cluster: $warn\n";
> +	}
> +    }
>  
>      print "Join request OK, finishing setup locally\n";
>  
> diff --git a/data/PVE/Corosync.pm b/data/PVE/Corosync.pm
> index 3208a6b..d282aed 100644
> --- a/data/PVE/Corosync.pm
> +++ b/data/PVE/Corosync.pm
> @@ -15,6 +15,7 @@ use PVE::Tools;
>  use PVE::Tools qw($IPV4RE $IPV6RE);
>  
>  my $basedir = "/etc/pve";
> +our $link_addr_re = qw/^(ring|link)(\d+)_addr$/;
>  
>  my $conf_array_sections = {
>      node => 1,
> @@ -292,6 +293,107 @@ sub create_conf {
>      return { main => $conf };
>  }
>  
> +# returns (\@errors, \@warnings) to the caller, does *not* 'die' or 'warn'
> +# verification was successful if \@errors is empty
> +sub verify_conf {
> +    my ($conf) = @_;
> +
> +    my @errors = ();
> +    my @warnings = ();
> +
> +    my $nodelist = nodelist($conf);
> +    if (!$nodelist) {
> +	push @errors, "no nodes found";
> +	return (\@errors, \@warnings);
> +    }
> +
> +    my $totem = $conf->{main}->{totem};
> +    if (!$totem) {
> +	push @errors, "no totem found";
> +	return (\@errors, \@warnings);
> +    }
> +
> +    push @warnings, "warning: authentication/encryption is not explicitly enabled"
> +	. " (secauth / crypto_cipher / crypto_hash)"

you do not check the crypto_hash?

> +	if (!defined($totem->{secauth}) || $totem->{secauth} ne 'on') &&
> +	   (!defined($totem->{crypto_cipher}) || $totem->{crypto_cipher} eq 'none');

This style above is NAK'd ^^
Post-if only if the statement it guards is on a single, not long, line.

Either full-blown `if () { .. }` or split it up and do:

push @warnings, "warning: authentication (secauth) is not explicitly enabled"
    if (!defined($totem->{secauth}) || $totem->{secauth} ne 'on');

push @warnings, "warning: encryption (crypto_cipher) is not explicitly enabled"
    if (!defined($totem->{secauth}) || $totem->{secauth} ne 'on');


> +
> +    my $interfaces = $totem->{interface};
> +
> +    my $verify_link_ip = sub {
> +	my ($key, $link, $node) = @_;
> +	my ($resolved_ip, undef) = resolve_hostname_like_corosync($link, $conf);
> +	if (defined($resolved_ip)) {
> +	    push @warnings, "warning: $key '$link' for node '$node' resolves to"
> +		. " '$resolved_ip' - consider replacing it with the currently"
> +		. " resolved IP address for stability"
> +		if $resolved_ip ne $link;

same here, better to just use full-blown if (...) {…}

> +	} else {
> +	    push @warnings, "warning: unable to resolve $key '$link' for node '$node'"
> +	      . " to an IP address according to Corosync's resolve strategy -"
> +	      . " cluster could fail on restart!";
> +	}
> +    };
> +
> +    # sort for output order stability
> +    my @node_names = sort keys %$nodelist;
> +
> +    my $node_links = {};
> +    foreach my $node (@node_names) {
> +	my $options = $nodelist->{$node};
> +	foreach my $opt (keys %$options) {
> +	    next if $opt !~ $link_addr_re;

maybe a ("private") helper method which return both, type and id, would be nicer,
else one asks what $1, or $2 is:

my ($linktype, $linkid) = $parse_link_entry->($opt) or next;

(that with the next is guessed, needs to ensure it has no bad side-effects
in combination with the my ())

> +	    $node_links->{$node}->{$2} = {
> +		name => "${1}${2}_addr",
> +		addr => $options->{$opt},
> +	    };
> +	}
> +    }
> +
> +    if (%$interfaces) {
> +	# if interfaces are defined, *all* links must have a matching interface
> +	# definition, and vice versa
> +	for my $link (0..1) {
> +	    my $have_interface = defined($interfaces->{$link});
> +	    foreach my $node (@node_names) {
> +		my $linkdef = $node_links->{$node}->{$link};
> +		if (defined($linkdef)) {
> +		    $verify_link_ip->($linkdef->{name}, $linkdef->{addr}, $node);
> +		    push @errors, "node '$node' has '$linkdef->{name}', but there"
> +				. " is no interface number $link configured"
> +			if !$have_interface;
> +		} else {
> +		    push @errors, "node '$node' is missing address for interface"
> +				. " number $link"
> +			if $have_interface;
> +		}
> +	    }
> +	}
> +    } else {
> +	# without interfaces, only check that links are consistent among nodes
> +	for my $link (0..1) {
> +	    my $nodes_with_link = {};
> +	    foreach my $node (@node_names) {
> +		my $linkdef = $node_links->{$node}->{$link};
> +		if (defined($linkdef)) {
> +		    $verify_link_ip->($linkdef->{name}, $linkdef->{addr}, $node);
> +		    $nodes_with_link->{$node} = 1;
> +		}
> +	    }
> +
> +	    if (%$nodes_with_link) {
> +		foreach my $node (@node_names) {
> +		    push @errors, "node '$node' is missing link $link, which"
> +				. " is configured on other nodes"
> +			if !defined($nodes_with_link->{$node});
> +		}
> +	    }
> +	}
> +    }
> +
> +    return (\@errors, \@warnings);
> +}
> +
>  sub for_all_corosync_addresses {
>      my ($corosync_conf, $ip_version, $func) = @_;
>  
> @@ -302,7 +404,7 @@ sub for_all_corosync_addresses {
>      foreach my $node_name (sort keys %$nodelist) {
>  	my $node_config = $nodelist->{$node_name};
>  	foreach my $node_key (sort keys %$node_config) {
> -	    if ($node_key =~ /^(ring|link)\d+_addr$/) {
> +	    if ($node_key =~ $link_addr_re) {
>  		my $node_address = $node_config->{$node_key};
>  
>  		my($ip, $version) = resolve_hostname_like_corosync($node_address, $corosync_conf);
> diff --git a/data/test/corosync_parser_test.pl b/data/test/corosync_parser_test.pl
> index 18ee4f7..6999d3a 100755
> --- a/data/test/corosync_parser_test.pl
> +++ b/data/test/corosync_parser_test.pl
> @@ -5,10 +5,35 @@ use lib '..';
>  use strict;
>  use warnings;
>  
> +use Test::MockModule;
>  use Test::More;
>  
>  use PVE::Corosync;
>  
> +my $known_hosts = {
> +    "prox1" => "127.0.1.1",
> +    "prox1-ring1" => "127.0.2.1",
> +    "prox2" => "127.0.1.2",
> +    "prox2-ring1" => "127.0.2.2",
> +    "prox3" => "127.0.1.3",
> +    "prox3-ring1" => "127.0.2.3",
> +    "prox4" => "127.0.1.4",
> +    "prox4-ring1" => "127.0.2.4",
> +};
> +
> +sub mocked_resolve {
> +    my ($hostname) = @_;
> +
> +    foreach my $host (keys %$known_hosts) {
> +	return $known_hosts->{$host} if $hostname eq $host;
> +    }
> +
> +    die "got unknown hostname '$hostname' during mocked resolve_hostname_like_corosync";
> +}
> +
> +my $mocked_corosync_module = new Test::MockModule('PVE::Corosync');
> +$mocked_corosync_module->mock('resolve_hostname_like_corosync', \&mocked_resolve);
> +
>  sub parser_self_check {
>      my $cfg_fn = shift;
>  
> @@ -30,6 +55,10 @@ sub parser_self_check {
>  	$config2 = PVE::Corosync::parse_conf(undef, $raw2);
>      }; warn $@ if $@;
>  
> +    # test verify_config
> +    my ($err, $warn) = PVE::Corosync::verify_conf($config1);
> +    die "verify_conf failed: " . join(" ++ ", @$err) if scalar(@$err);
> +
>      # do not care for whitespace differences
>      delete $config1->{digest};
>      delete $config2->{digest};
> 





More information about the pve-devel mailing list