[pve-devel] [RFC cluster v2 02/10] node add: factor out code

Fabian Grünbichler f.gruenbichler at proxmox.com
Wed Dec 6 15:29:50 CET 2017


some nits inline, most not introduced in the patch itself ;)

On Mon, Dec 04, 2017 at 12:11:09PM +0100, Thomas Lamprecht wrote:
> Factor out common code, which will be used by the new API endpoint to
> join a cluster and the old legacy SSH method which we will keep for a
> bit.
> 
> Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
> ---
>  data/PVE/CLI/pvecm.pm | 152 ++------------------------------------------------
>  data/PVE/Cluster.pm   | 146 ++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 151 insertions(+), 147 deletions(-)
> 
> diff --git a/data/PVE/CLI/pvecm.pm b/data/PVE/CLI/pvecm.pm
> index a92f831..7aedd3d 100755
> --- a/data/PVE/CLI/pvecm.pm
> +++ b/data/PVE/CLI/pvecm.pm
> @@ -25,40 +25,6 @@ my $backupdir = "/var/lib/pve-cluster/backup";
>  my $dbfile = "$libdir/config.db";
>  my $authfile = "/etc/corosync/authkey";
>  
> -sub backup_database {
> -
> -    print "backup old database\n";
> -
> -    mkdir $backupdir;
> -
> -    my $ctime = time();
> -    my $cmd = [
> -	['echo', '.dump'],
> -	['sqlite3', $dbfile],
> -	['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"],
> -    ];
> -
> -    run_command($cmd, 'errmsg' => "cannot backup old database\n");
> -
> -    # purge older backup
> -    my $maxfiles = 10;
> -
> -    my @bklist = ();
> -    foreach my $fn (<$backupdir/config-*.sql.gz>) {
> -	if ($fn =~ m!/config-(\d+)\.sql.gz$!) {
> -	    push @bklist, [$fn, $1];
> -	}
> -    }
> -
> -    @bklist = sort { $b->[1] <=> $a->[1] } @bklist;
> -
> -    while (scalar (@bklist) >= $maxfiles) {
> -	my $d = pop @bklist;
> -	print "delete old backup '$d->[0]'\n";
> -	unlink $d->[0];
> -    }
> -}
> -
>  
>  __PACKAGE__->register_method ({
>      name => 'keygen',
> @@ -300,67 +266,10 @@ __PACKAGE__->register_method ({
>  	PVE::Cluster::setup_rootsshconfig();
>  	PVE::Cluster::setup_ssh_keys();
>  
> +	PVE::Cluster::assert_joinable($param->{ring0_addr}, $param->{ring1_addr}, $param->{force});
> +
>  	my $host = $param->{hostname};
>  
> -	my ($errors, $warnings) = ('', '');
> -
> -	my $error = sub {
> -	    my ($msg, $suppress) = @_;
> -
> -	    if ($suppress) {
> -		$warnings .= "* $msg\n";
> -	    } else {
> -		$errors .= "* $msg\n";
> -	    }
> -	};
> -
> -	if (!$param->{force}) {
> -
> -	    if (-f $authfile) {
> -		&$error("authentication key '$authfile' already exists", $param->{force});
> -	    }
> -
> -	    if (-f $clusterconf)  {
> -		&$error("cluster config '$clusterconf' already exists", $param->{force});
> -	    }
> -
> -	    my $vmlist = PVE::Cluster::get_vmlist();
> -	    if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
> -		&$error("this host already contains virtual guests", $param->{force});
> -	    }
> -
> -	    if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) {
> -		&$error("corosync is already running, is this node already in a cluster?!", $param->{force});
> -	    }
> -	}
> -
> -	# check if corosync ring IPs are configured on the current nodes interfaces
> -	my $check_ip = sub {
> -	    my $ip = shift;
> -	    if (defined($ip)) {
> -		if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
> -		    my $host = $ip;
> -		    eval { $ip = PVE::Network::get_ip_from_hostname($host); };
> -		    if ($@) {
> -			&$error("cannot use '$host': $@\n") ;
> -			return;
> -		    }
> -		}
> -
> -		my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
> -		my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
> -
> -		&$error("cannot use IP '$ip', it must be configured exactly once on local node!\n")
> -		    if (scalar(@$configured_ips) != 1);
> -	    }
> -	};
> -
> -	&$check_ip($param->{ring0_addr});
> -	&$check_ip($param->{ring1_addr});
> -
> -	warn "warning, ignore the following errors:\n$warnings" if $warnings;
> -	die "detected the following error(s):\n$errors" if $errors;
> -
>  	# make sure known_hosts is on local filesystem
>  	PVE::Cluster::ssh_unmerge_known_hosts();
>  
> @@ -372,11 +281,8 @@ __PACKAGE__->register_method ({
>  		'pvecm', 'addnode', $nodename, '--force', 1];
>  
>  	push @$cmd, '--nodeid', $param->{nodeid} if $param->{nodeid};
> -
>  	push @$cmd, '--votes', $param->{votes} if defined($param->{votes});
> -
>  	push @$cmd, '--ring0_addr', $param->{ring0_addr} if defined($param->{ring0_addr});
> -
>  	push @$cmd, '--ring1_addr', $param->{ring1_addr} if defined($param->{ring1_addr});
>  
>  	if (system (@$cmd) != 0) {
> @@ -394,58 +300,10 @@ __PACKAGE__->register_method ({
>  
>  	    system(@$cmd) == 0 || die "can't rsync data from host '$host'\n";
>  
> -	    mkdir "/etc/corosync";
> -	    my $confbase = basename($clusterconf);
> +	    my $corosync_conf = PVE::Tools::file_get_contents("$tmpdir/corosync.conf");
> +	    my $corosync_authkey = PVE::Tools::file_get_contents("$tmpdir/authkey");
>  
> -	    $cmd = "cp '$tmpdir/$confbase' '/etc/corosync/$confbase'";
> -	    system($cmd) == 0 || die "can't copy cluster configuration\n";
> -
> -	    my $keybase = basename($authfile);
> -	    system ("cp '$tmpdir/$keybase' '$authfile'") == 0 ||
> -		die "can't copy '$tmpdir/$keybase' to '$authfile'\n";
> -
> -	    print "stopping pve-cluster service\n";
> -
> -	    system("umount $basedir -f >/dev/null 2>&1");
> -	    system("systemctl stop pve-cluster") == 0 ||
> -		die "can't stop pve-cluster service\n";
> -
> -	    backup_database();
> -
> -	    unlink $dbfile;
> -
> -	    system("systemctl start pve-cluster") == 0 ||
> -		die "starting pve-cluster failed\n";
> -
> -	    system("systemctl start corosync");
> -
> -	    # wait for quorum
> -	    my $printqmsg = 1;
> -	    while (!PVE::Cluster::check_cfs_quorum(1)) {
> -		if ($printqmsg) {
> -		    print "waiting for quorum...";
> -		    STDOUT->flush();
> -		    $printqmsg = 0;
> -		}
> -		sleep(1);
> -	    }
> -	    print "OK\n" if !$printqmsg;
> -
> -	    my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
> -
> -	    print "generating node certificates\n";
> -	    PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address);
> -
> -	    print "merge known_hosts file\n";
> -	    PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1);
> -
> -	    print "restart services\n";
> -	    # restart pvedaemon (changed certs)
> -	    system("systemctl restart pvedaemon");
> -	    # restart pveproxy (changed certs)
> -	    system("systemctl restart pveproxy");
> -
> -	    print "successfully added node '$nodename' to cluster.\n";
> +	    PVE::Cluster::finish_join($host, $corosync_conf, $corosync_authkey);
>  	};
>  	my $err = $@;
>  
> diff --git a/data/PVE/Cluster.pm b/data/PVE/Cluster.pm
> index 9a248ed..531a19d 100644
> --- a/data/PVE/Cluster.pm
> +++ b/data/PVE/Cluster.pm
> @@ -1669,4 +1669,150 @@ sub ssh_info_to_command {
>      return $cmd;
>  }
>  
> +sub assert_joinable {
> +    my ($ring0_addr, $ring1_addr, $force) = @_;
> +
> +    my $clusterconf = "/etc/pve/corosync.conf";
> +    my $authfile = "/etc/corosync/authkey";

not sure if we want to re-use those somewhere else at some point and
move them to the other paths in the beginning of the file?

> +
> +    my ($errors, $warnings) = ('', '');
> +    my $error = sub {
> +	my ($msg, $suppress) = @_;
> +
> +	if ($suppress) {
> +	    $warnings .= "* $msg\n";
> +	} else {
> +	    $errors .= "* $msg\n";
> +	}
> +    };
> +
> +    if (-f $authfile) {
> +	$error->("authentication key '$authfile' already exists", $force);
> +    }
> +
> +    if (-f $clusterconf)  {
> +	$error->("cluster config '$clusterconf' already exists", $force);
> +    }

you changed the behaviour (probably to what was originally intended, but
should be mentioned in the commit message ;))

> +
> +    my $vmlist = PVE::Cluster::get_vmlist();
> +    if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
> +	$error->("this host already contains virtual guests", $force);
> +    }
> +
> +    if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) {
> +	$error->("corosync is already running, is this node already in a cluster?!", $force);
> +    }
> +
> +    # check if corosync ring IPs are configured on the current nodes interfaces
> +    my $check_ip = sub {
> +	my $ip = shift // return;
> +	if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
> +	    my $host = $ip;
> +	    eval { $ip = PVE::Network::get_ip_from_hostname($host); };
> +	    if ($@) {
> +		$error->("cannot use '$host': $@\n", 1) ;
> +		return;
> +	    }
> +	}
> +
> +	my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
> +	my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
> +
> +	$error->("cannot use IP '$ip', it must be configured exactly once on local node!\n")
> +	    if (scalar(@$configured_ips) != 1);
> +    };
> +
> +    $check_ip->($ring0_addr);
> +    $check_ip->($ring1_addr);
> +
> +    warn "warning, ignore the following errors:\n$warnings" if $warnings;
> +    die "detected the following error(s):\n$errors" if $errors;
> +}
> +
> +my $backup_cfs_database = sub {
> +    my ($dbfile) = @_;
> +
> +    print "backup old database\n";
> +
> +    my $backupdir = "/var/lib/pve-cluster/backup";
> +    mkdir $backupdir;
> +
> +    my $ctime = time();
> +    my $cmd = [
> +	['echo', '.dump'],
> +	['sqlite3', $dbfile],
> +	['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"],
> +    ];
> +
> +    PVE::Tools::run_command($cmd, 'errmsg' => "cannot backup old database\n");
> +
> +    # purge older backup
> +    my $maxfiles = 10;
> +
> +    my @bklist = ();
> +    foreach my $fn (<$backupdir/config-*.sql.gz>) {
> +	if ($fn =~ m!/config-(\d+)\.sql.gz$!) {
> +	    push @bklist, [$fn, $1];
> +	}
> +    }
> +
> +    @bklist = sort { $b->[1] <=> $a->[1] } @bklist;
> +
> +    while (scalar (@bklist) >= $maxfiles) {
> +	my $d = pop @bklist;
> +	print "delete old backup '$d->[0]'\n";
> +	unlink $d->[0];
> +    }
> +};
> +
> +sub finish_join {
> +    my ($nodename, $corosync_conf, $corosync_authkey) = @_;
> +
> +    my $dbfile = "/var/lib/pve-cluster/config.db";
> +    my $localclusterconf = "/etc/corosync/corosync.conf";
> +    my $authfile = "/etc/corosync/authkey";

see above?

> +
> +    mkdir "/etc/corosync";
> +    PVE::Tools::file_set_contents($authfile, $corosync_authkey);
> +    PVE::Tools::file_set_contents($localclusterconf, $corosync_conf);
> +
> +    print "stopping pve-cluster service\n";
> +
> +    system("umount $basedir -f >/dev/null 2>&1");

this seems a bit harsh, maybe re-order it after stopping the service or
drop altogether?

> +    die "can't stop pve-cluster service\n" if system("systemctl stop pve-cluster") != 0;
> +
> +    $backup_cfs_database->($dbfile);
> +    unlink $dbfile;
> +
> +    system("systemctl start pve-cluster") == 0 || die "starting pve-cluster failed\n";
> +    system("systemctl start corosync");
> +
> +    # wait for quorum
> +    my $printqmsg = 1;
> +    while (!PVE::Cluster::check_cfs_quorum(1)) {
> +	if ($printqmsg) {
> +	    print "waiting for quorum...";
> +	    STDOUT->flush();
> +	    $printqmsg = 0;
> +	}
> +	sleep(1);
> +    }
> +    print "OK\n" if !$printqmsg;
> +
> +    my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
> +
> +    print "generating node certificates\n";
> +    PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address);
> +
> +    print "merge known_hosts file\n";
> +    PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1);
> +
> +    print "restart services\n";
> +    # restart pvedaemon and pveproxy (changed certs)
> +    system("systemctl restart pvedaemon pveproxy");
> +
> +    print "successfully added node '$nodename' to cluster.\n";
> +}
> +
> +
>  1;
> -- 
> 2.11.0
> 
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel at pve.proxmox.com
> https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel




More information about the pve-devel mailing list