[pve-devel] [RFC cluster v2 02/10] node add: factor out code
Fabian Grünbichler
f.gruenbichler at proxmox.com
Wed Dec 6 15:29:50 CET 2017
some nits inline, most not introduced in the patch itself ;)
On Mon, Dec 04, 2017 at 12:11:09PM +0100, Thomas Lamprecht wrote:
> Factor out common code, which will be used by the new API endpoint to
> join a cluster and the old legacy SSH method which we will keep for a
> bit.
>
> Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
> ---
> data/PVE/CLI/pvecm.pm | 152 ++------------------------------------------------
> data/PVE/Cluster.pm | 146 ++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 151 insertions(+), 147 deletions(-)
>
> diff --git a/data/PVE/CLI/pvecm.pm b/data/PVE/CLI/pvecm.pm
> index a92f831..7aedd3d 100755
> --- a/data/PVE/CLI/pvecm.pm
> +++ b/data/PVE/CLI/pvecm.pm
> @@ -25,40 +25,6 @@ my $backupdir = "/var/lib/pve-cluster/backup";
> my $dbfile = "$libdir/config.db";
> my $authfile = "/etc/corosync/authkey";
>
> -sub backup_database {
> -
> - print "backup old database\n";
> -
> - mkdir $backupdir;
> -
> - my $ctime = time();
> - my $cmd = [
> - ['echo', '.dump'],
> - ['sqlite3', $dbfile],
> - ['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"],
> - ];
> -
> - run_command($cmd, 'errmsg' => "cannot backup old database\n");
> -
> - # purge older backup
> - my $maxfiles = 10;
> -
> - my @bklist = ();
> - foreach my $fn (<$backupdir/config-*.sql.gz>) {
> - if ($fn =~ m!/config-(\d+)\.sql.gz$!) {
> - push @bklist, [$fn, $1];
> - }
> - }
> -
> - @bklist = sort { $b->[1] <=> $a->[1] } @bklist;
> -
> - while (scalar (@bklist) >= $maxfiles) {
> - my $d = pop @bklist;
> - print "delete old backup '$d->[0]'\n";
> - unlink $d->[0];
> - }
> -}
> -
>
> __PACKAGE__->register_method ({
> name => 'keygen',
> @@ -300,67 +266,10 @@ __PACKAGE__->register_method ({
> PVE::Cluster::setup_rootsshconfig();
> PVE::Cluster::setup_ssh_keys();
>
> + PVE::Cluster::assert_joinable($param->{ring0_addr}, $param->{ring1_addr}, $param->{force});
> +
> my $host = $param->{hostname};
>
> - my ($errors, $warnings) = ('', '');
> -
> - my $error = sub {
> - my ($msg, $suppress) = @_;
> -
> - if ($suppress) {
> - $warnings .= "* $msg\n";
> - } else {
> - $errors .= "* $msg\n";
> - }
> - };
> -
> - if (!$param->{force}) {
> -
> - if (-f $authfile) {
> - &$error("authentication key '$authfile' already exists", $param->{force});
> - }
> -
> - if (-f $clusterconf) {
> - &$error("cluster config '$clusterconf' already exists", $param->{force});
> - }
> -
> - my $vmlist = PVE::Cluster::get_vmlist();
> - if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
> - &$error("this host already contains virtual guests", $param->{force});
> - }
> -
> - if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) {
> - &$error("corosync is already running, is this node already in a cluster?!", $param->{force});
> - }
> - }
> -
> - # check if corosync ring IPs are configured on the current nodes interfaces
> - my $check_ip = sub {
> - my $ip = shift;
> - if (defined($ip)) {
> - if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
> - my $host = $ip;
> - eval { $ip = PVE::Network::get_ip_from_hostname($host); };
> - if ($@) {
> - &$error("cannot use '$host': $@\n") ;
> - return;
> - }
> - }
> -
> - my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
> - my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
> -
> - &$error("cannot use IP '$ip', it must be configured exactly once on local node!\n")
> - if (scalar(@$configured_ips) != 1);
> - }
> - };
> -
> - &$check_ip($param->{ring0_addr});
> - &$check_ip($param->{ring1_addr});
> -
> - warn "warning, ignore the following errors:\n$warnings" if $warnings;
> - die "detected the following error(s):\n$errors" if $errors;
> -
> # make sure known_hosts is on local filesystem
> PVE::Cluster::ssh_unmerge_known_hosts();
>
> @@ -372,11 +281,8 @@ __PACKAGE__->register_method ({
> 'pvecm', 'addnode', $nodename, '--force', 1];
>
> push @$cmd, '--nodeid', $param->{nodeid} if $param->{nodeid};
> -
> push @$cmd, '--votes', $param->{votes} if defined($param->{votes});
> -
> push @$cmd, '--ring0_addr', $param->{ring0_addr} if defined($param->{ring0_addr});
> -
> push @$cmd, '--ring1_addr', $param->{ring1_addr} if defined($param->{ring1_addr});
>
> if (system (@$cmd) != 0) {
> @@ -394,58 +300,10 @@ __PACKAGE__->register_method ({
>
> system(@$cmd) == 0 || die "can't rsync data from host '$host'\n";
>
> - mkdir "/etc/corosync";
> - my $confbase = basename($clusterconf);
> + my $corosync_conf = PVE::Tools::file_get_contents("$tmpdir/corosync.conf");
> + my $corosync_authkey = PVE::Tools::file_get_contents("$tmpdir/authkey");
>
> - $cmd = "cp '$tmpdir/$confbase' '/etc/corosync/$confbase'";
> - system($cmd) == 0 || die "can't copy cluster configuration\n";
> -
> - my $keybase = basename($authfile);
> - system ("cp '$tmpdir/$keybase' '$authfile'") == 0 ||
> - die "can't copy '$tmpdir/$keybase' to '$authfile'\n";
> -
> - print "stopping pve-cluster service\n";
> -
> - system("umount $basedir -f >/dev/null 2>&1");
> - system("systemctl stop pve-cluster") == 0 ||
> - die "can't stop pve-cluster service\n";
> -
> - backup_database();
> -
> - unlink $dbfile;
> -
> - system("systemctl start pve-cluster") == 0 ||
> - die "starting pve-cluster failed\n";
> -
> - system("systemctl start corosync");
> -
> - # wait for quorum
> - my $printqmsg = 1;
> - while (!PVE::Cluster::check_cfs_quorum(1)) {
> - if ($printqmsg) {
> - print "waiting for quorum...";
> - STDOUT->flush();
> - $printqmsg = 0;
> - }
> - sleep(1);
> - }
> - print "OK\n" if !$printqmsg;
> -
> - my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
> -
> - print "generating node certificates\n";
> - PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address);
> -
> - print "merge known_hosts file\n";
> - PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1);
> -
> - print "restart services\n";
> - # restart pvedaemon (changed certs)
> - system("systemctl restart pvedaemon");
> - # restart pveproxy (changed certs)
> - system("systemctl restart pveproxy");
> -
> - print "successfully added node '$nodename' to cluster.\n";
> + PVE::Cluster::finish_join($host, $corosync_conf, $corosync_authkey);
> };
> my $err = $@;
>
> diff --git a/data/PVE/Cluster.pm b/data/PVE/Cluster.pm
> index 9a248ed..531a19d 100644
> --- a/data/PVE/Cluster.pm
> +++ b/data/PVE/Cluster.pm
> @@ -1669,4 +1669,150 @@ sub ssh_info_to_command {
> return $cmd;
> }
>
> +sub assert_joinable {
> + my ($ring0_addr, $ring1_addr, $force) = @_;
> +
> + my $clusterconf = "/etc/pve/corosync.conf";
> + my $authfile = "/etc/corosync/authkey";
not sure if we want to re-use those somewhere else at some point and
move them to the other paths in the beginning of the file?
> +
> + my ($errors, $warnings) = ('', '');
> + my $error = sub {
> + my ($msg, $suppress) = @_;
> +
> + if ($suppress) {
> + $warnings .= "* $msg\n";
> + } else {
> + $errors .= "* $msg\n";
> + }
> + };
> +
> + if (-f $authfile) {
> + $error->("authentication key '$authfile' already exists", $force);
> + }
> +
> + if (-f $clusterconf) {
> + $error->("cluster config '$clusterconf' already exists", $force);
> + }
you changed the behaviour (probably to what was originally intended, but
should be mentioned in the commit message ;))
> +
> + my $vmlist = PVE::Cluster::get_vmlist();
> + if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
> + $error->("this host already contains virtual guests", $force);
> + }
> +
> + if (system("corosync-quorumtool -l >/dev/null 2>&1") == 0) {
> + $error->("corosync is already running, is this node already in a cluster?!", $force);
> + }
> +
> + # check if corosync ring IPs are configured on the current nodes interfaces
> + my $check_ip = sub {
> + my $ip = shift // return;
> + if (!PVE::JSONSchema::pve_verify_ip($ip, 1)) {
> + my $host = $ip;
> + eval { $ip = PVE::Network::get_ip_from_hostname($host); };
> + if ($@) {
> + $error->("cannot use '$host': $@\n", 1) ;
> + return;
> + }
> + }
> +
> + my $cidr = (Net::IP::ip_is_ipv6($ip)) ? "$ip/128" : "$ip/32";
> + my $configured_ips = PVE::Network::get_local_ip_from_cidr($cidr);
> +
> + $error->("cannot use IP '$ip', it must be configured exactly once on local node!\n")
> + if (scalar(@$configured_ips) != 1);
> + };
> +
> + $check_ip->($ring0_addr);
> + $check_ip->($ring1_addr);
> +
> + warn "warning, ignore the following errors:\n$warnings" if $warnings;
> + die "detected the following error(s):\n$errors" if $errors;
> +}
> +
> +my $backup_cfs_database = sub {
> + my ($dbfile) = @_;
> +
> + print "backup old database\n";
> +
> + my $backupdir = "/var/lib/pve-cluster/backup";
> + mkdir $backupdir;
> +
> + my $ctime = time();
> + my $cmd = [
> + ['echo', '.dump'],
> + ['sqlite3', $dbfile],
> + ['gzip', '-', \ ">${backupdir}/config-${ctime}.sql.gz"],
> + ];
> +
> + PVE::Tools::run_command($cmd, 'errmsg' => "cannot backup old database\n");
> +
> + # purge older backup
> + my $maxfiles = 10;
> +
> + my @bklist = ();
> + foreach my $fn (<$backupdir/config-*.sql.gz>) {
> + if ($fn =~ m!/config-(\d+)\.sql.gz$!) {
> + push @bklist, [$fn, $1];
> + }
> + }
> +
> + @bklist = sort { $b->[1] <=> $a->[1] } @bklist;
> +
> + while (scalar (@bklist) >= $maxfiles) {
> + my $d = pop @bklist;
> + print "delete old backup '$d->[0]'\n";
> + unlink $d->[0];
> + }
> +};
> +
> +sub finish_join {
> + my ($nodename, $corosync_conf, $corosync_authkey) = @_;
> +
> + my $dbfile = "/var/lib/pve-cluster/config.db";
> + my $localclusterconf = "/etc/corosync/corosync.conf";
> + my $authfile = "/etc/corosync/authkey";
see above?
> +
> + mkdir "/etc/corosync";
> + PVE::Tools::file_set_contents($authfile, $corosync_authkey);
> + PVE::Tools::file_set_contents($localclusterconf, $corosync_conf);
> +
> + print "stopping pve-cluster service\n";
> +
> + system("umount $basedir -f >/dev/null 2>&1");
this seems a bit harsh, maybe re-order it after stopping the service or
drop altogether?
> + die "can't stop pve-cluster service\n" if system("systemctl stop pve-cluster") != 0;
> +
> + $backup_cfs_database->($dbfile);
> + unlink $dbfile;
> +
> + system("systemctl start pve-cluster") == 0 || die "starting pve-cluster failed\n";
> + system("systemctl start corosync");
> +
> + # wait for quorum
> + my $printqmsg = 1;
> + while (!PVE::Cluster::check_cfs_quorum(1)) {
> + if ($printqmsg) {
> + print "waiting for quorum...";
> + STDOUT->flush();
> + $printqmsg = 0;
> + }
> + sleep(1);
> + }
> + print "OK\n" if !$printqmsg;
> +
> + my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
> +
> + print "generating node certificates\n";
> + PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address);
> +
> + print "merge known_hosts file\n";
> + PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1);
> +
> + print "restart services\n";
> + # restart pvedaemon and pveproxy (changed certs)
> + system("systemctl restart pvedaemon pveproxy");
> +
> + print "successfully added node '$nodename' to cluster.\n";
> +}
> +
> +
> 1;
> --
> 2.11.0
>
>
> _______________________________________________
> pve-devel mailing list
> pve-devel at pve.proxmox.com
> https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
More information about the pve-devel
mailing list