[pve-devel] [RFC cluster v2 02/10] node add: factor out code
Thomas Lamprecht
t.lamprecht at proxmox.com
Wed Dec 6 16:34:25 CET 2017
On 12/06/2017 03:29 PM, Fabian Grünbichler wrote:
> some nits inline, most not introduced in the patch itself ;)
>
> On Mon, Dec 04, 2017 at 12:11:09PM +0100, Thomas Lamprecht wrote:
>> Factor out common code, which will be used by the new API endpoint to
>> join a cluster and the old legacy SSH method which we will keep for a
>> bit.
>>
>> Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
>> ---
>> data/PVE/CLI/pvecm.pm | 152 ++------------------------------------------------
>> data/PVE/Cluster.pm | 146 ++++++++++++++++++++++++++++++++++++++++++++++++
>> 2 files changed, 151 insertions(+), 147 deletions(-)
>>
>> diff --git a/data/PVE/CLI/pvecm.pm b/data/PVE/CLI/pvecm.pm
>> index a92f831..7aedd3d 100755
>> --- a/data/PVE/CLI/pvecm.pm
>> +++ b/data/PVE/CLI/pvecm.pm
>> @@ -25,40 +25,6 @@ my $backupdir = "/var/lib/pve-cluster/backup";
>> my $dbfile = "$libdir/config.db";
>> my $authfile = "/etc/corosync/authkey";
>>
>> -sub backup_database {
>> -
>> - print "backup old database\n";
>> -
>> - mkdir $backupdir;
>> -
>> - my $ctime = time();
>> - my $cmd = [
>> - ['echo', '.dump'],
>> - ['sqlite3', $dbfile],
>> - ['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"],
>> - ];
>> -
>> - run_command($cmd, 'errmsg' => "cannot backup old database\n");
>> -
>> - # purge older backup
>> - my $maxfiles = 10;
>> -
>> - my @bklist = ();
>> - foreach my $fn (<$backupdir/config-*.sql.gz>) {
>> - if ($fn =~ m!/config-(\d+)\.sql.gz$!) {
>> - push @bklist, [$fn, $1];
>> - }
>> - }
>> -
>> - @bklist = sort { $b->[1] <=> $a->[1] } @bklist;
>> -
>> - while (scalar (@bklist) >= $maxfiles) {
>> - my $d = pop @bklist;
>> - print "delete old backup '$d->[0]'\n";
>> - unlink $d->[0];
>> - }
>> -}
>> -
>>
>> __PACKAGE__->register_method ({
>> name => 'keygen',
>> @@ -300,67 +266,10 @@ __PACKAGE__->register_method ({
>> PVE::Cluster::setup_rootsshconfig();
>> PVE::Cluster::setup_ssh_keys();
>>
>> + PVE::Cluster::assert_joinable($param->{ring0_addr}, $param->{ring1_addr}, $param->{force});
>> +
>> my $host = $param->{hostname};
>>
>> - my ($errors, $warnings) = ('', '');
>> -
>> - my $error = sub {
>> - my ($msg, $suppress) = @_;
>> -
>> - if ($suppress) {
>> - $warnings .= "* $msg\n";
>> - } else {
>> - $errors .= "* $msg\n";
>> - }
>> - };
>> -
>> - if (!$param->{force}) {
>> -
>> - if (-f $authfile) {
>> - &$error("authentication key '$authfile' already exists", $param->{force});
>> - }
>> -
>> - if (-f $clusterconf) {
>> - &$error("cluster config '$clusterconf' already exists", $param->{force});
>> - }
>> -
>> - my $vmlist = PVE::Cluster::get_vmlist();
>> - if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
>> - &$error("this host already contains virtual guests", $param->{force});
>> - }
>> -
>> - if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) {
>> - &$error("corosync is already running, is this node already in a cluster?!", $param->{force});
>> - }
>> - }
>> -
>> - # check if corosync ring IPs are configured on the current nodes interfaces
>> - my $check_ip = sub {
>> - my $ip = shift;
>> - if (defined($ip)) {
>> - if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
>> - my $host = $ip;
>> - eval { $ip = PVE::Network::get_ip_from_hostname($host); };
>> - if ($@) {
>> - &$error("cannot use '$host': $@\n") ;
>> - return;
>> - }
>> - }
>> -
>> - my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
>> - my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
>> -
>> - &$error("cannot use IP '$ip', it must be configured exactly once on local node!\n")
>> - if (scalar(@$configured_ips) != 1);
>> - }
>> - };
>> -
>> - &$check_ip($param->{ring0_addr});
>> - &$check_ip($param->{ring1_addr});
>> -
>> - warn "warning, ignore the following errors:\n$warnings" if $warnings;
>> - die "detected the following error(s):\n$errors" if $errors;
>> -
>> # make sure known_hosts is on local filesystem
>> PVE::Cluster::ssh_unmerge_known_hosts();
>>
>> @@ -372,11 +281,8 @@ __PACKAGE__->register_method ({
>> 'pvecm', 'addnode', $nodename, '--force', 1];
>>
>> push @$cmd, '--nodeid', $param->{nodeid} if $param->{nodeid};
>> -
>> push @$cmd, '--votes', $param->{votes} if defined($param->{votes});
>> -
>> push @$cmd, '--ring0_addr', $param->{ring0_addr} if defined($param->{ring0_addr});
>> -
>> push @$cmd, '--ring1_addr', $param->{ring1_addr} if defined($param->{ring1_addr});
>>
>> if (system (@$cmd) != 0) {
>> @@ -394,58 +300,10 @@ __PACKAGE__->register_method ({
>>
>> system(@$cmd) == 0 || die "can't rsync data from host '$host'\n";
>>
>> - mkdir "/etc/corosync";
>> - my $confbase = basename($clusterconf);
>> + my $corosync_conf = PVE::Tools::file_get_contents("$tmpdir/corosync.conf");
>> + my $corosync_authkey = PVE::Tools::file_get_contents("$tmpdir/authkey");
>>
>> - $cmd = "cp '$tmpdir/$confbase' '/etc/corosync/$confbase'";
>> - system($cmd) == 0 || die "can't copy cluster configuration\n";
>> -
>> - my $keybase = basename($authfile);
>> - system ("cp '$tmpdir/$keybase' '$authfile'") == 0 ||
>> - die "can't copy '$tmpdir/$keybase' to '$authfile'\n";
>> -
>> - print "stopping pve-cluster service\n";
>> -
>> - system("umount $basedir -f >/dev/null 2>&1");
>> - system("systemctl stop pve-cluster") == 0 ||
>> - die "can't stop pve-cluster service\n";
>> -
>> - backup_database();
>> -
>> - unlink $dbfile;
>> -
>> - system("systemctl start pve-cluster") == 0 ||
>> - die "starting pve-cluster failed\n";
>> -
>> - system("systemctl start corosync");
>> -
>> - # wait for quorum
>> - my $printqmsg = 1;
>> - while (!PVE::Cluster::check_cfs_quorum(1)) {
>> - if ($printqmsg) {
>> - print "waiting for quorum...";
>> - STDOUT->flush();
>> - $printqmsg = 0;
>> - }
>> - sleep(1);
>> - }
>> - print "OK\n" if !$printqmsg;
>> -
>> - my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
>> -
>> - print "generating node certificates\n";
>> - PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address);
>> -
>> - print "merge known_hosts file\n";
>> - PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1);
>> -
>> - print "restart services\n";
>> - # restart pvedaemon (changed certs)
>> - system("systemctl restart pvedaemon");
>> - # restart pveproxy (changed certs)
>> - system("systemctl restart pveproxy");
>> -
>> - print "successfully added node '$nodename' to cluster.\n";
>> + PVE::Cluster::finish_join($host, $corosync_conf, $corosync_authkey);
>> };
>> my $err = $@;
>>
>> diff --git a/data/PVE/Cluster.pm b/data/PVE/Cluster.pm
>> index 9a248ed..531a19d 100644
>> --- a/data/PVE/Cluster.pm
>> +++ b/data/PVE/Cluster.pm
>> @@ -1669,4 +1669,150 @@ sub ssh_info_to_command {
>> return $cmd;
>> }
>>
>> +sub assert_joinable {
>> + my ($ring0_addr, $ring1_addr, $force) = @_;
>> +
>> + my $clusterconf = "/etc/pve/corosync.conf";
>> + my $authfile = "/etc/corosync/authkey";
>
> not sure if we want to re-use those somewhere else at some point and
> move them to the other paths in the beginning of the file?
>
They where on top of the CLIHandler, so yes, probably.
As this was the single place where we used it, I put it initially here
and as it is only cosmetic I let it be for the RFC. AFAIK, the CLIHandler
could need still a cleanup after this series for such things, some may have
become unused.
>> +
>> + my ($errors, $warnings) = ('', '');
>> + my $error = sub {
>> + my ($msg, $suppress) = @_;
>> +
>> + if ($suppress) {
>> + $warnings .= "* $msg\n";
>> + } else {
>> + $errors .= "* $msg\n";
>> + }
>> + };
>> +
>> + if (-f $authfile) {
>> + $error->("authentication key '$authfile' already exists", $force);
>> + }
>> +
>> + if (-f $clusterconf) {
>> + $error->("cluster config '$clusterconf' already exists", $force);
>> + }
>
> you changed the behaviour (probably to what was originally intended, but
> should be mentioned in the commit message ;))
>
Oh, sorry, will mention it in the commit message in v3.
>> +
>> + my $vmlist = PVE::Cluster::get_vmlist();
>> + if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
>> + $error->("this host already contains virtual guests", $force);
>> + }
>> +
>> + if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) {
>> + $error->("corosync is already running, is this node already in a cluster?!", $force);
>> + }
>> +
>> + # check if corosync ring IPs are configured on the current nodes interfaces
>> + my $check_ip = sub {
>> + my $ip = shift // return;
>> + if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
>> + my $host = $ip;
>> + eval { $ip = PVE::Network::get_ip_from_hostname($host); };
>> + if ($@) {
>> + $error->("cannot use '$host': $@\n", 1) ;
>> + return;
>> + }
>> + }
>> +
>> + my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
>> + my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
>> +
>> + $error->("cannot use IP '$ip', it must be configured exactly once on local node!\n")
>> + if (scalar(@$configured_ips) != 1);
>> + };
>> +
>> + $check_ip->($ring0_addr);
>> + $check_ip->($ring1_addr);
>> +
>> + warn "warning, ignore the following errors:\n$warnings" if $warnings;
>> + die "detected the following error(s):\n$errors" if $errors;
>> +}
>> +
>> +my $backup_cfs_database = sub {
>> + my ($dbfile) = @_;
>> +
>> + print "backup old database\n";
>> +
>> + my $backupdir = "/var/lib/pve-cluster/backup";
>> + mkdir $backupdir;
>> +
>> + my $ctime = time();
>> + my $cmd = [
>> + ['echo', '.dump'],
>> + ['sqlite3', $dbfile],
>> + ['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"],
>> + ];
>> +
>> + PVE::Tools::run_command($cmd, 'errmsg' => "cannot backup old database\n");
>> +
>> + # purge older backup
>> + my $maxfiles = 10;
>> +
>> + my @bklist = ();
>> + foreach my $fn (<$backupdir/config-*.sql.gz>) {
>> + if ($fn =~ m!/config-(\d+)\.sql.gz$!) {
>> + push @bklist, [$fn, $1];
>> + }
>> + }
>> +
>> + @bklist = sort { $b->[1] <=> $a->[1] } @bklist;
>> +
>> + while (scalar (@bklist) >= $maxfiles) {
>> + my $d = pop @bklist;
>> + print "delete old backup '$d->[0]'\n";
>> + unlink $d->[0];
>> + }
>> +};
>> +
>> +sub finish_join {
>> + my ($nodename, $corosync_conf, $corosync_authkey) = @_;
>> +
>> + my $dbfile = "/var/lib/pve-cluster/config.db";
>> + my $localclusterconf = "/etc/corosync/corosync.conf";
>> + my $authfile = "/etc/corosync/authkey";
>
> see above?
>
>> +
>> + mkdir "/etc/corosync";
>> + PVE::Tools::file_set_contents($authfile, $corosync_authkey);
>> + PVE::Tools::file_set_contents($localclusterconf, $corosync_conf);
>> +
>> + print "stopping pve-cluster service\n";
>> +
>> + system("umount $basedir -f >/dev/null 2>&1");
>
> this seems a bit harsh, maybe re-order it after stopping the service or
> drop altogether?
>
Yes, this did not really caught my attention while copying the code.
I'd rather drop it, if the service could be stopped it really *shouldn't*
be running anymore
>> + die "can't stop pve-cluster service\n" if system("systemctl stop pve-cluster") != 0;
>> +
>> + $backup_cfs_database->($dbfile);
>> + unlink $dbfile;
>> +
>> + system("systemctl start pve-cluster") == 0 || die "starting pve-cluster failed\n";
>> + system("systemctl start corosync");
>> +
>> + # wait for quorum
>> + my $printqmsg = 1;
>> + while (!PVE::Cluster::check_cfs_quorum(1)) {
>> + if ($printqmsg) {
>> + print "waiting for quorum...";
>> + STDOUT->flush();
>> + $printqmsg = 0;
>> + }
>> + sleep(1);
>> + }
>> + print "OK\n" if !$printqmsg;
>> +
>> + my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
>> +
>> + print "generating node certificates\n";
>> + PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address);
>> +
>> + print "merge known_hosts file\n";
>> + PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1);
>> +
>> + print "restart services\n";
>> + # restart pvedaemon and pveproxy (changed certs)
>> + system("systemctl restart pvedaemon pveproxy");
>> +
>> + print "successfully added node '$nodename' to cluster.\n";
>> +}
>> +
>> +
>> 1;
>> --
>> 2.11.0
More information about the pve-devel
mailing list