[pve-devel] [RFC cluster v2 02/10] node add: factor out code

Thomas Lamprecht t.lamprecht at proxmox.com
Wed Dec 6 16:34:25 CET 2017


On 12/06/2017 03:29 PM, Fabian Grünbichler wrote:
> some nits inline, most not introduced in the patch itself ;)
> 
> On Mon, Dec 04, 2017 at 12:11:09PM +0100, Thomas Lamprecht wrote:
>> Factor out common code, which will be used by the new API endpoint to
>> join a cluster and the old legacy SSH method which we will keep for a
>> bit.
>>
>> Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
>> ---
>>  data/PVE/CLI/pvecm.pm | 152 ++------------------------------------------------
>>  data/PVE/Cluster.pm   | 146 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 151 insertions(+), 147 deletions(-)
>>
>> diff --git a/data/PVE/CLI/pvecm.pm b/data/PVE/CLI/pvecm.pm
>> index a92f831..7aedd3d 100755
>> --- a/data/PVE/CLI/pvecm.pm
>> +++ b/data/PVE/CLI/pvecm.pm
>> @@ -25,40 +25,6 @@ my $backupdir = "/var/lib/pve-cluster/backup";
>>  my $dbfile = "$libdir/config.db";
>>  my $authfile = "/etc/corosync/authkey";
>>  
>> -sub backup_database {
>> -
>> -    print "backup old database\n";
>> -
>> -    mkdir $backupdir;
>> -
>> -    my $ctime = time();
>> -    my $cmd = [
>> -	['echo', '.dump'],
>> -	['sqlite3', $dbfile],
>> -	['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"],
>> -    ];
>> -
>> -    run_command($cmd, 'errmsg' => "cannot backup old database\n");
>> -
>> -    # purge older backup
>> -    my $maxfiles = 10;
>> -
>> -    my @bklist = ();
>> -    foreach my $fn (<$backupdir/config-*.sql.gz>) {
>> -	if ($fn =~ m!/config-(\d+)\.sql.gz$!) {
>> -	    push @bklist, [$fn, $1];
>> -	}
>> -    }
>> -
>> -    @bklist = sort { $b->[1] <=> $a->[1] } @bklist;
>> -
>> -    while (scalar (@bklist) >= $maxfiles) {
>> -	my $d = pop @bklist;
>> -	print "delete old backup '$d->[0]'\n";
>> -	unlink $d->[0];
>> -    }
>> -}
>> -
>>  
>>  __PACKAGE__->register_method ({
>>      name => 'keygen',
>> @@ -300,67 +266,10 @@ __PACKAGE__->register_method ({
>>  	PVE::Cluster::setup_rootsshconfig();
>>  	PVE::Cluster::setup_ssh_keys();
>>  
>> +	PVE::Cluster::assert_joinable($param->{ring0_addr}, $param->{ring1_addr}, $param->{force});
>> +
>>  	my $host = $param->{hostname};
>>  
>> -	my ($errors, $warnings) = ('', '');
>> -
>> -	my $error = sub {
>> -	    my ($msg, $suppress) = @_;
>> -
>> -	    if ($suppress) {
>> -		$warnings .= "* $msg\n";
>> -	    } else {
>> -		$errors .= "* $msg\n";
>> -	    }
>> -	};
>> -
>> -	if (!$param->{force}) {
>> -
>> -	    if (-f $authfile) {
>> -		&$error("authentication key '$authfile' already exists", $param->{force});
>> -	    }
>> -
>> -	    if (-f $clusterconf)  {
>> -		&$error("cluster config '$clusterconf' already exists", $param->{force});
>> -	    }
>> -
>> -	    my $vmlist = PVE::Cluster::get_vmlist();
>> -	    if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
>> -		&$error("this host already contains virtual guests", $param->{force});
>> -	    }
>> -
>> -	    if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) {
>> -		&$error("corosync is already running, is this node already in a cluster?!", $param->{force});
>> -	    }
>> -	}
>> -
>> -	# check if corosync ring IPs are configured on the current nodes interfaces
>> -	my $check_ip = sub {
>> -	    my $ip = shift;
>> -	    if (defined($ip)) {
>> -		if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
>> -		    my $host = $ip;
>> -		    eval { $ip = PVE::Network::get_ip_from_hostname($host); };
>> -		    if ($@) {
>> -			&$error("cannot use '$host': $@\n") ;
>> -			return;
>> -		    }
>> -		}
>> -
>> -		my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
>> -		my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
>> -
>> -		&$error("cannot use IP '$ip', it must be configured exactly once on local node!\n")
>> -		    if (scalar(@$configured_ips) != 1);
>> -	    }
>> -	};
>> -
>> -	&$check_ip($param->{ring0_addr});
>> -	&$check_ip($param->{ring1_addr});
>> -
>> -	warn "warning, ignore the following errors:\n$warnings" if $warnings;
>> -	die "detected the following error(s):\n$errors" if $errors;
>> -
>>  	# make sure known_hosts is on local filesystem
>>  	PVE::Cluster::ssh_unmerge_known_hosts();
>>  
>> @@ -372,11 +281,8 @@ __PACKAGE__->register_method ({
>>  		'pvecm', 'addnode', $nodename, '--force', 1];
>>  
>>  	push @$cmd, '--nodeid', $param->{nodeid} if $param->{nodeid};
>> -
>>  	push @$cmd, '--votes', $param->{votes} if defined($param->{votes});
>> -
>>  	push @$cmd, '--ring0_addr', $param->{ring0_addr} if defined($param->{ring0_addr});
>> -
>>  	push @$cmd, '--ring1_addr', $param->{ring1_addr} if defined($param->{ring1_addr});
>>  
>>  	if (system (@$cmd) != 0) {
>> @@ -394,58 +300,10 @@ __PACKAGE__->register_method ({
>>  
>>  	    system(@$cmd) == 0 || die "can't rsync data from host '$host'\n";
>>  
>> -	    mkdir "/etc/corosync";
>> -	    my $confbase = basename($clusterconf);
>> +	    my $corosync_conf = PVE::Tools::file_get_contents("$tmpdir/corosync.conf");
>> +	    my $corosync_authkey = PVE::Tools::file_get_contents("$tmpdir/authkey");
>>  
>> -	    $cmd = "cp '$tmpdir/$confbase' '/etc/corosync/$confbase'";
>> -	    system($cmd) == 0 || die "can't copy cluster configuration\n";
>> -
>> -	    my $keybase = basename($authfile);
>> -	    system ("cp '$tmpdir/$keybase' '$authfile'") == 0 ||
>> -		die "can't copy '$tmpdir/$keybase' to '$authfile'\n";
>> -
>> -	    print "stopping pve-cluster service\n";
>> -
>> -	    system("umount $basedir -f >/dev/null 2>&1");
>> -	    system("systemctl stop pve-cluster") == 0 ||
>> -		die "can't stop pve-cluster service\n";
>> -
>> -	    backup_database();
>> -
>> -	    unlink $dbfile;
>> -
>> -	    system("systemctl start pve-cluster") == 0 ||
>> -		die "starting pve-cluster failed\n";
>> -
>> -	    system("systemctl start corosync");
>> -
>> -	    # wait for quorum
>> -	    my $printqmsg = 1;
>> -	    while (!PVE::Cluster::check_cfs_quorum(1)) {
>> -		if ($printqmsg) {
>> -		    print "waiting for quorum...";
>> -		    STDOUT->flush();
>> -		    $printqmsg = 0;
>> -		}
>> -		sleep(1);
>> -	    }
>> -	    print "OK\n" if !$printqmsg;
>> -
>> -	    my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
>> -
>> -	    print "generating node certificates\n";
>> -	    PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address);
>> -
>> -	    print "merge known_hosts file\n";
>> -	    PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1);
>> -
>> -	    print "restart services\n";
>> -	    # restart pvedaemon (changed certs)
>> -	    system("systemctl restart pvedaemon");
>> -	    # restart pveproxy (changed certs)
>> -	    system("systemctl restart pveproxy");
>> -
>> -	    print "successfully added node '$nodename' to cluster.\n";
>> +	    PVE::Cluster::finish_join($host, $corosync_conf, $corosync_authkey);
>>  	};
>>  	my $err = $@;
>>  
>> diff --git a/data/PVE/Cluster.pm b/data/PVE/Cluster.pm
>> index 9a248ed..531a19d 100644
>> --- a/data/PVE/Cluster.pm
>> +++ b/data/PVE/Cluster.pm
>> @@ -1669,4 +1669,150 @@ sub ssh_info_to_command {
>>      return $cmd;
>>  }
>>  
>> +sub assert_joinable {
>> +    my ($ring0_addr, $ring1_addr, $force) = @_;
>> +
>> +    my $clusterconf = "/etc/pve/corosync.conf";
>> +    my $authfile = "/etc/corosync/authkey";
> 
> not sure if we want to re-use those somewhere else at some point and
> move them to the other paths in the beginning of the file?
> 

They where on top of the CLIHandler, so yes, probably.
As this was the single place where we used it, I put it initially here
and as it is only cosmetic I let it be for the RFC. AFAIK, the CLIHandler
could need still a cleanup after this series for such things, some may have
become unused.

>> +
>> +    my ($errors, $warnings) = ('', '');
>> +    my $error = sub {
>> +	my ($msg, $suppress) = @_;
>> +
>> +	if ($suppress) {
>> +	    $warnings .= "* $msg\n";
>> +	} else {
>> +	    $errors .= "* $msg\n";
>> +	}
>> +    };
>> +
>> +    if (-f $authfile) {
>> +	$error->("authentication key '$authfile' already exists", $force);
>> +    }
>> +
>> +    if (-f $clusterconf)  {
>> +	$error->("cluster config '$clusterconf' already exists", $force);
>> +    }
> 
> you changed the behaviour (probably to what was originally intended, but
> should be mentioned in the commit message ;))
> 

Oh, sorry, will mention it in the commit message in v3.

>> +
>> +    my $vmlist = PVE::Cluster::get_vmlist();
>> +    if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
>> +	$error->("this host already contains virtual guests", $force);
>> +    }
>> +
>> +    if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) {
>> +	$error->("corosync is already running, is this node already in a cluster?!", $force);
>> +    }
>> +
>> +    # check if corosync ring IPs are configured on the current nodes interfaces
>> +    my $check_ip = sub {
>> +	my $ip = shift // return;
>> +	if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
>> +	    my $host = $ip;
>> +	    eval { $ip = PVE::Network::get_ip_from_hostname($host); };
>> +	    if ($@) {
>> +		$error->("cannot use '$host': $@\n", 1) ;
>> +		return;
>> +	    }
>> +	}
>> +
>> +	my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
>> +	my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
>> +
>> +	$error->("cannot use IP '$ip', it must be configured exactly once on local node!\n")
>> +	    if (scalar(@$configured_ips) != 1);
>> +    };
>> +
>> +    $check_ip->($ring0_addr);
>> +    $check_ip->($ring1_addr);
>> +
>> +    warn "warning, ignore the following errors:\n$warnings" if $warnings;
>> +    die "detected the following error(s):\n$errors" if $errors;
>> +}
>> +
>> +my $backup_cfs_database = sub {
>> +    my ($dbfile) = @_;
>> +
>> +    print "backup old database\n";
>> +
>> +    my $backupdir = "/var/lib/pve-cluster/backup";
>> +    mkdir $backupdir;
>> +
>> +    my $ctime = time();
>> +    my $cmd = [
>> +	['echo', '.dump'],
>> +	['sqlite3', $dbfile],
>> +	['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"],
>> +    ];
>> +
>> +    PVE::Tools::run_command($cmd, 'errmsg' => "cannot backup old database\n");
>> +
>> +    # purge older backup
>> +    my $maxfiles = 10;
>> +
>> +    my @bklist = ();
>> +    foreach my $fn (<$backupdir/config-*.sql.gz>) {
>> +	if ($fn =~ m!/config-(\d+)\.sql.gz$!) {
>> +	    push @bklist, [$fn, $1];
>> +	}
>> +    }
>> +
>> +    @bklist = sort { $b->[1] <=> $a->[1] } @bklist;
>> +
>> +    while (scalar (@bklist) >= $maxfiles) {
>> +	my $d = pop @bklist;
>> +	print "delete old backup '$d->[0]'\n";
>> +	unlink $d->[0];
>> +    }
>> +};
>> +
>> +sub finish_join {
>> +    my ($nodename, $corosync_conf, $corosync_authkey) = @_;
>> +
>> +    my $dbfile = "/var/lib/pve-cluster/config.db";
>> +    my $localclusterconf = "/etc/corosync/corosync.conf";
>> +    my $authfile = "/etc/corosync/authkey";
> 
> see above?
> 
>> +
>> +    mkdir "/etc/corosync";
>> +    PVE::Tools::file_set_contents($authfile, $corosync_authkey);
>> +    PVE::Tools::file_set_contents($localclusterconf, $corosync_conf);
>> +
>> +    print "stopping pve-cluster service\n";
>> +
>> +    system("umount $basedir -f >/dev/null 2>&1");
> 
> this seems a bit harsh, maybe re-order it after stopping the service or
> drop altogether?
> 

Yes, this did not really caught my attention while copying the code.
I'd rather drop it, if the service could be stopped it really *shouldn't*
be running anymore

>> +    die "can't stop pve-cluster service\n" if system("systemctl stop pve-cluster") != 0;
>> +
>> +    $backup_cfs_database->($dbfile);
>> +    unlink $dbfile;
>> +
>> +    system("systemctl start pve-cluster") == 0 || die "starting pve-cluster failed\n";
>> +    system("systemctl start corosync");
>> +
>> +    # wait for quorum
>> +    my $printqmsg = 1;
>> +    while (!PVE::Cluster::check_cfs_quorum(1)) {
>> +	if ($printqmsg) {
>> +	    print "waiting for quorum...";
>> +	    STDOUT->flush();
>> +	    $printqmsg = 0;
>> +	}
>> +	sleep(1);
>> +    }
>> +    print "OK\n" if !$printqmsg;
>> +
>> +    my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
>> +
>> +    print "generating node certificates\n";
>> +    PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address);
>> +
>> +    print "merge known_hosts file\n";
>> +    PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1);
>> +
>> +    print "restart services\n";
>> +    # restart pvedaemon and pveproxy (changed certs)
>> +    system("systemctl restart pvedaemon pveproxy");
>> +
>> +    print "successfully added node '$nodename' to cluster.\n";
>> +}
>> +
>> +
>>  1;
>> -- 
>> 2.11.0






More information about the pve-devel mailing list