[pve-devel] [PULL cluster v6 00/17] Allow adding/deleting nodes and cluster creation over API

Thomas Lamprecht t.lamprecht at proxmox.com
Wed Feb 7 10:17:41 CET 2018


The sixth iteration of this series, see [1] for the v5 cover letter.
This time just as a pull request, it did not changed to much between
v5 and now (diff attached to the end) and I di not want to spam the
list.

I've addressed the problem where my 'joinee peer is outdated, tell
user to update or fallback to ssh' detection failed.
It was caused by the fork_worker behavior, which just printed an
error of the code to (in this case synced) output pipe, and we lost
the exception, which was used to detect a '501 Method Not
Implemented' error.
I moved out the code for the join to a helper, this has the drawback
that we do not get the task log anymore when using pvecm, only the
cluster log entry.  While it would be nice to have, we hadn't it
earlier, so no loss.  And this can always get addressed later on.
When integrated in the WebUI it gets the log already.

Further I added local locking for add and create, plus some minor
cleanups, as said see the end of this mail for a diff.

The following changes since commit fe883b1c6c0b6a5475a4a25477979ea362639884:

  bump version to 5.0-20 (2018-01-22 15:27:18 +0100)

are available in the git repository at:

  https://github.com/GamerSource/pve-cluster.git clus_add_split_up_v6

you can pull directly via:

  $ git pull --ff-only  https://github.com/GamerSource/pve-cluster.git clus_add_split_up_v6

for you to fetch changes up to e7775ff3d09d95908ff7aee0956350a0f7c4c988:

  lock locally on cluster create and join (2018-02-07 09:24:37 +0100)

[1]: https://pve.proxmox.com/pipermail/pve-devel/2018-January/030495.html

----------------------------------------------------------------
Thomas Lamprecht (17):
      move addnode/delnode from CLI to cluster config API
      tell cluster log when adding/deleting a node
      node add: factor out checks for joining
      node add: factor out local joining steps
      assert_joinable: simplify error and warning handling
      use run_command instead of system
      return cluster config and authkey in addnode API call
      api/cluster: add join endpoint
      pvecm add: use API by default to join cluster
      cluster create: factor out initial corosync config assembly
      cluster create: restart corosync & pmxfs in one go and say so
      move cluster create to API
      api/cluster: create cluster in forked worker
      factor out common parameter definitions
      api/cluster: add endpoint to GET cluster join information
      use resolved IP address for ring0_addr as default
      lock locally on cluster create and join

 data/PVE/API2/ClusterConfig.pm | 445 +++++++++++++++++++++++++++++++-
 data/PVE/CLI/pvecm.pm          | 571 +++--------------------------------------
 data/PVE/Cluster.pm            | 196 +++++++++++++-
 data/PVE/Corosync.pm           |  73 ++++++
 debian/control.in              |   4 +-
 5 files changed, 754 insertions(+), 535 deletions(-)


Changes v5 -> v6:
git diff clus_add_split_up_v5..clus_add_split_up_v6
--
diff --git a/data/PVE/API2/ClusterConfig.pm b/data/PVE/API2/ClusterConfig.pm
index 25f8965..37e7e3c 100644
--- a/data/PVE/API2/ClusterConfig.pm
+++ b/data/PVE/API2/ClusterConfig.pm
@@ -16,7 +16,7 @@ use base qw(PVE::RESTHandler);
 
 my $clusterconf = "/etc/pve/corosync.conf";
 my $authfile = "/etc/corosync/authkey";
-my $local_cluster_change_lock = "/var/lock/pvecm.lock";
+my $local_cluster_lock = "/var/lock/pvecm.lock";
 
 my $ring0_desc = {
     type => 'string', format => 'address',
@@ -146,7 +146,8 @@ __PACKAGE__->register_method ({
 	};
 
 	my $worker = sub {
-	    PVE::Tools::lock_file($local_cluster_change_lock, 10, $code);
+	    PVE::Tools::lock_file($local_cluster_lock, 10, $code);
+	    die $@ if $@;
 	};
 
 	return $rpcenv->fork_worker('clustercreate', '',  $authuser, $worker);
@@ -190,7 +191,7 @@ __PACKAGE__->register_method({
 my $config_change_lock = sub {
     my ($code) = @_;
 
-    PVE::Tools::lock_file($local_cluster_change_lock, 10, sub {
+    PVE::Tools::lock_file($local_cluster_lock, 10, sub {
 	PVE::Cluster::cfs_update(1);
 	my $members = PVE::Cluster::get_members();
 	if (scalar(keys %$members) > 1) {
@@ -494,10 +495,7 @@ __PACKAGE__->register_method ({
 		default => "IP resolved by node's hostname",
 	    }),
 	    ring1_addr => get_standard_option('corosync-ring1-addr'),
-	    fingerprint => get_standard_option('fingerprint-sha256', {
-		description => "SSL certificate fingerprint. Optional in CLI environment.",
-		optional => 1,
-	    }),
+	    fingerprint => get_standard_option('fingerprint-sha256'),
 	    password => {
 		description => "Superuser (root) password of peer node.",
 		type => 'string',
@@ -509,70 +507,12 @@ __PACKAGE__->register_method ({
     code => sub {
 	my ($param) = @_;
 
-	my $nodename = PVE::INotify::nodename();
 	my $rpcenv = PVE::RPCEnvironment::get();
 	my $authuser = $rpcenv->get_user();
 
-	my $code = sub {
-	    PVE::Cluster::setup_sshd_config();
-	    PVE::Cluster::setup_rootsshconfig();
-	    PVE::Cluster::setup_ssh_keys();
-
-	    # check if we can join with the given parameters and current node state
-	    my ($ring0_addr, $ring1_addr) = $param->@{'ring0_addr', 'ring1_addr'};
-	    PVE::Cluster::assert_joinable($ring0_addr, $ring1_addr, $param->{force});
-
-	    # make sure known_hosts is on local filesystem
-	    PVE::Cluster::ssh_unmerge_known_hosts();
-
-	    my $host = $param->{hostname};
-	    my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
-
-	    my $conn_args = {
-		username => 'root at pam',
-		password => $param->{password},
-		cookie_name => 'PVEAuthCookie',
-		protocol => 'https',
-		host => $host,
-		port => 8006,
-	    };
-
-	    if (my $fp = $param->{fingerprint}) {
-		$conn_args->{cached_fingerprints} = { uc($fp) => 1 };
-	    } elsif ($rpcenv->{type} eq 'cli') {
-		$conn_args->{manual_verification} = 1;
-	    } else {
-		raise_param_exc({
-		    fingerprint => "'fingerprint' only optional in interactive CLI environment."
-		});
-	    }
-
-	    print "Etablishing API connection with host '$host'\n";
-
-	    my $conn = PVE::APIClient::LWP->new(%$conn_args);
-	    $conn->login();
-
-	    # login raises an exception on failure, so if we get here we're good
-	    print "Login succeeded.\n";
-
-	    my $args = {};
-	    $args->{force} = $param->{force} if defined($param->{force});
-	    $args->{nodeid} = $param->{nodeid} if $param->{nodeid};
-	    $args->{votes} = $param->{votes} if defined($param->{votes});
-	    $args->{ring0_addr} = $ring0_addr // $local_ip_address;
-	    $args->{ring1_addr} = $ring1_addr if defined($ring1_addr);
-
-	    print "Request addition of this node\n";
-	    my $res = $conn->post("/cluster/config/nodes/$nodename", $args);
-
-	    print "Join request OK, finishing setup locally\n";
-
-	    # added successfuly - now prepare local node
-	    PVE::Cluster::finish_join($nodename, $res->{corosync_conf}, $res->{corosync_authkey});
-	};
-
 	my $worker = sub {
-	    PVE::Tools::lock_file($local_cluster_change_lock, 10, $code);
+	    PVE::Tools::lock_file($local_cluster_lock, 10, \&PVE::Cluster::join, $param);
+	    die $@ if $@;
 	};
 
 	return $rpcenv->fork_worker('clusterjoin', '',  $authuser, $worker);
diff --git a/data/PVE/CLI/pvecm.pm b/data/PVE/CLI/pvecm.pm
index aec27f5..9b98815 100755
--- a/data/PVE/CLI/pvecm.pm
+++ b/data/PVE/CLI/pvecm.pm
@@ -117,7 +117,8 @@ __PACKAGE__->register_method ({
 	    delete $param->{use_ssh};
 	    $param->{password} = $password;
 
-	    eval { PVE::API2::ClusterConfig->join($param) };
+	    my $local_cluster_lock = "/var/lock/pvecm.lock";
+	    PVE::Tools::lock_file($local_cluster_lock, 10, \&PVE::Cluster::join, $param);
 
 	    if (my $err = $@) {
 		if (ref($err) eq 'PVE::APIClient::Exception' && $err->{code} == 501) {
diff --git a/data/PVE/Cluster.pm b/data/PVE/Cluster.pm
index bba7d6d..f8de4a2 100644
--- a/data/PVE/Cluster.pm
+++ b/data/PVE/Cluster.pm
@@ -1705,7 +1705,7 @@ sub assert_joinable {
 	$error->("cluster config '$clusterconf' already exists");
     }
 
-    my $vmlist = PVE::Cluster::get_vmlist();
+    my $vmlist = get_vmlist();
     if ($vmlist && $vmlist->{ids} && scalar(keys %{$vmlist->{ids}})) {
 	$error->("this host already contains virtual guests");
     }
@@ -1774,6 +1774,65 @@ my $backup_cfs_database = sub {
     }
 };
 
+sub join {
+    my ($param) = @_;
+
+    my $nodename = PVE::INotify::nodename();
+
+    setup_sshd_config();
+    setup_rootsshconfig();
+    setup_ssh_keys();
+
+    # check if we can join with the given parameters and current node state
+    my ($ring0_addr, $ring1_addr) = $param->@{'ring0_addr', 'ring1_addr'};
+    assert_joinable($ring0_addr, $ring1_addr, $param->{force});
+
+    # make sure known_hosts is on local filesystem
+    ssh_unmerge_known_hosts();
+
+    my $host = $param->{hostname};
+    my $local_ip_address = remote_node_ip($nodename);
+
+    my $conn_args = {
+	username => 'root at pam',
+	password => $param->{password},
+	cookie_name => 'PVEAuthCookie',
+	protocol => 'https',
+	host => $host,
+	port => 8006,
+    };
+
+    if (my $fp = $param->{fingerprint}) {
+	$conn_args->{cached_fingerprints} = { uc($fp) => 1 };
+    } else {
+	# API schema ensures that we can only get here from CLI handler
+	$conn_args->{manual_verification} = 1;
+    }
+
+    print "Etablishing API connection with host '$host'\n";
+
+    my $conn = PVE::APIClient::LWP->new(%$conn_args);
+    $conn->login();
+
+    # login raises an exception on failure, so if we get here we're good
+    print "Login succeeded.\n";
+
+    my $args = {};
+    $args->{force} = $param->{force} if defined($param->{force});
+    $args->{nodeid} = $param->{nodeid} if $param->{nodeid};
+    $args->{votes} = $param->{votes} if defined($param->{votes});
+    $args->{ring0_addr} = $ring0_addr // $local_ip_address;
+    $args->{ring1_addr} = $ring1_addr if defined($ring1_addr);
+
+    print "Request addition of this node\n";
+    my $res = $conn->post("/cluster/config/nodes/$nodename", $args);
+
+    print "Join request OK, finishing setup locally\n";
+
+    # added successfuly - now prepare local node
+    finish_join($nodename, $res->{corosync_conf}, $res->{corosync_authkey});
+}
+
 sub finish_join {
     my ($nodename, $corosync_conf, $corosync_authkey) = @_;
 
@@ -1793,7 +1852,7 @@ sub finish_join {
 
     # wait for quorum
     my $printqmsg = 1;
-    while (!PVE::Cluster::check_cfs_quorum(1)) {
+    while (!check_cfs_quorum(1)) {
 	if ($printqmsg) {
 	    print "waiting for quorum...";
 	    STDOUT->flush();
@@ -1803,13 +1862,13 @@ sub finish_join {
     }
     print "OK\n" if !$printqmsg;
 
-    my $local_ip_address = PVE::Cluster::remote_node_ip($nodename);
+    my $local_ip_address = remote_node_ip($nodename);
 
     print "generating node certificates\n";
-    PVE::Cluster::gen_pve_node_files($nodename, $local_ip_address);
+    gen_pve_node_files($nodename, $local_ip_address);
 
     print "merge known_hosts file\n";
-    PVE::Cluster::ssh_merge_known_hosts($nodename, $local_ip_address, 1);
+    ssh_merge_known_hosts($nodename, $local_ip_address, 1);
 
     print "node certificate changed, restart pveproxy and pvedaemon services\n";
     run_command(['systemctl', 'reload-or-restart', 'pvedaemon', 'pveproxy']);




More information about the pve-devel mailing list