[pve-devel] [PATCH v3 cluster] allow to add and delete qdevice to cluster

Thomas Lamprecht t.lamprecht at proxmox.com
Tue Feb 26 13:18:53 CET 2019


works OK, in general.

On 2/22/19 2:16 PM, Oguz Bektas wrote:
> v3 changes:
> * die if standalone mode
> * remove redundant quorum checks, as we already check if all nodes are
> online
> * remove 'ERROR' from print statements
> * move the cluster member even/odd count check near the start

please write up this changelog in the stat area below...

> 
> Signed-off-by: Oguz Bektas <o.bektas at proxmox.com>
> ---

... here, as everything below the '---' triple-dash above and the first "diff ..."
below is ignored by 'git am', and this version information is more important for
review, to see what you did since the last revision, the commit message itself
doesn needs to included the revision history, it may even seem like noise there,
it should be a description of why you do things like you do them for the current
revision.

>  data/PVE/CLI/pvecm.pm | 250 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 250 insertions(+)
> 
> diff --git a/data/PVE/CLI/pvecm.pm b/data/PVE/CLI/pvecm.pm
> index 55c3f15..5d3b4a7 100755
> --- a/data/PVE/CLI/pvecm.pm
> +++ b/data/PVE/CLI/pvecm.pm
> @@ -63,6 +63,252 @@ __PACKAGE__->register_method ({
>  	return undef;
>      }});
>  
> +my $foreach_member = sub {
> +    my ($code, $noerr) = @_;
> +
> +    my $members = PVE::Cluster::get_members();
> +    foreach my $node (sort keys %$members) {
> +	if (my $ip = $members->{$node}->{ip}) {
> +	    $code->($node, $ip);
> +	} else {
> +	    die "cannot get the cluster IP for node '$node'.\n" if !$noerr;
> +	    warn "cannot get the cluster IP for node '$node'.\n";
> +	    return undef;
> +	}
> +    }
> +};
> +
> +__PACKAGE__->register_method ({
> +    name => 'setup_qdevice',
> +    path => 'setup_qdevice',
> +    method => 'PUT',
> +    description => "Setup the use of a QDevice",
> +    parameters => {
> +        additionalProperties => 0,
> +	properties => {
> +	    address => {
> +		type => 'string', format => 'ip',
> +		description => "Specifies the network address of an external corosync QDevice" ,
> +	    },
> +	    network => {
> +		type => 'string',
> +		format => 'CIDR',
> +		description => 'The network which should be used to connect to the external qdevice',
> +		optional => 1,
> +	    },
> +	    force => {
> +		type => 'boolean',
> +		description => "Do not throw error on possible dangerous operations.",
> +		optional => 1,
> +	    },
> +	},
> +    },
> +    returns => { type => 'null' },
> +
> +    code => sub {
> +	my ($param) = @_;
> +
> +	die "Node not in a cluster. Aborting.\n"
> +	    if !PVE::Corosync::check_conf_exists(1);
> +
> +	my $members = PVE::Cluster::get_members();
> +	foreach my $node (sort keys %$members) {
> +	    die "All nodes must be online! Node $node is offline, aborting.\n"
> +		if !$members->{$node}->{online};
> +	}
> +
> +	my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
> +
> +	die "QDevice already configured!\n"
> +	    if defined($conf->{main}->{quorum}->{device}) && !$param->{force};
> +
> +	my $network = $param->{network};
> +
> +	my $model = "net";
> +	my $algorithm = 'ffsplit';
> +	if (scalar($members) & 1) {
> +	    if ($param->{force}) {
> +		$algorithm = 'lms';
> +	    } else {
> +		die "Clusters with an odd node count are not officially supported!\n";
> +	    }
> +	}
> +
> +	my $qnetd_addr = $param->{address};
> +	my $base_dir = "/etc/corosync/qdevice/net";
> +	my $db_dir_qnetd = "/etc/corosync/qnetd/nssdb";
> +	my $db_dir_node = "$base_dir/nssdb";
> +	my $ca_export_base = "qnetd-cacert.crt";
> +	my $ca_export_file = "$db_dir_qnetd/$ca_export_base";
> +	my $crq_file_base = "qdevice-net-node.crq";
> +	my $p12_file_base = "qdevice-net-node.p12";
> +	my $qdevice_certutil = "corosync-qdevice-net-certutil";
> +	my $qnetd_certutil= "corosync-qnetd-certutil";
> +	my $clustername = $conf->{main}->{totem}->{cluster_name};
> +
> +	run_command(['ssh-copy-id', '-i', '/root/.ssh/id_rsa', "root\@$qnetd_addr"]);
> +
> +	if (-d $db_dir_node) {
> +	    # FIXME: check on all nodes?!
> +	    if ($param->{force}) {
> +		rmtree $db_dir_node;
> +	    } else {
> +		die "QDevice certificate store already initialised, set force to delete!\n";
> +	    }
> +	}
> +
> +	my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes', '-lroot'];
> +	my $scp_cmd = ['scp', '-o', 'BatchMode=yes'];
> +
> +	print "INFO: initializing qnetd server\n";
> +	run_command(
> +	    [@$ssh_cmd, $qnetd_addr, $qnetd_certutil, "-i"],
> +	    noerr => 1
> +	);
> +
> +	print "INFO: copying CA cert and initializing on all nodes\n";
> +	run_command([@$scp_cmd, "root\@$qnetd_addr:$ca_export_file", "/etc/pve/$ca_export_base"]);
> +	$foreach_member->(sub {
> +	    my ($node, $ip) = @_;

maybe add:
my $outsub = sub { print "node '$node': " . shift };

and pass it as "outfunc" param to run_command below, so one can map the output to the
node it happens on.

> +	    run_command(
> +		[@$ssh_cmd, $ip, $qdevice_certutil, "-i", "-c", "/etc/pve/$ca_export_base"],
> +		noerr => 1
> +	    );
> +	});
> +	unlink "/etc/pve/$ca_export_base";
> +
> +	print "INFO: generating cert request\n";
> +	run_command([$qdevice_certutil, "-r", "-n", $clustername]);
> +
> +	print "INFO: copying exported cert request to qnetd server\n";
> +	run_command([@$scp_cmd, "$db_dir_node/$crq_file_base", "root\@$qnetd_addr:/tmp"]);
> +
> +	print "INFO: sign and export cluster cert\n";
> +	run_command([
> +		@$ssh_cmd, $qnetd_addr, $qnetd_certutil, "-s", "-c",
> +		"/tmp/$crq_file_base", "-n", "$clustername"
> +	    ]);
> +
> +	print "INFO: copy exported CRT to master node\n";
> +	run_command([
> +		@$scp_cmd, "root\@$qnetd_addr:$db_dir_qnetd/cluster-$clustername.crt",
> +		"$db_dir_node"
> +	    ]);
> +
> +	print "INFO: import certificate\n";
> +	run_command(["$qdevice_certutil", "-M", "-c", "$db_dir_node/cluster-$clustername.crt"]);
> +
> +	print "INFO: copy and import pk12 cert to all nodes\n";
> +	run_command([@$scp_cmd, "$db_dir_node/$p12_file_base", "/etc/pve/"]);
> +	$foreach_member->(sub {
> +	    my ($node, $ip) = @_;

same with outfunc as above

> +	    run_command([
> +		    @$ssh_cmd, $ip, "$qdevice_certutil", "-m", "-c",
> +		    "/etc/pve/$p12_file_base"
> +		]);
> +	});
> +	unlink "/etc/pve/$p12_file_base";
> +
> +
> +	my $code = sub {
> +	    my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
> +	    my $quorum_section = $conf->{main}->{quorum};
> +
> +	    die "Qdevice already configured, must be deleted before setting up new one!\n"
> +		if defined($quorum_section->{device}); # must not be forced!
> +
> +	    my $qdev_section = {
> +		model => $model,
> +		"$model" => {
> +		    tls => 'on',
> +		    host => $qnetd_addr,
> +		    algorithm => $algorithm,
> +		}
> +	    };
> +	    $qdev_section->{votes} = 1 if $algorithm eq 'ffsplit';
> +
> +	    $quorum_section->{device} = $qdev_section;
> +
> +	    PVE::Corosync::atomic_write_conf($conf);
> +	};
> +
> +	print "INFO: add QDevice to cluster configuration\n";
> +	PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
> +	die $@ if $@;
> +
> +	$foreach_member->(sub {
> +	    my ($node, $ip) = @_;
> +	    print "INFO: start and enable corosync qdevice daemon on node '$node'...\n";

same with outfunc as above

> +	    run_command([@$ssh_cmd, $ip, 'systemctl', 'start', 'corosync-qdevice']);
> +	    run_command([@$ssh_cmd, $ip, 'systemctl', 'enable', 'corosync-qdevice']);
> +	});
> +
> +	run_command(['corosync-cfgtool', '-R']); # do cluster wide config reload
> +
> +	print "Done\n";

I see two "Done" printed on finish when trying this out, why's that?

> +
> +	return undef;
> +}});
> +
> +__PACKAGE__->register_method ({
> +    name => 'delete_qdevice',
> +    path => 'delete_qdevice',
> +    method => 'DELETE',
> +    description => "Remove a configured QDevice",
> +    parameters => {
> +        additionalProperties => 0,
> +	properties => {},
> +    },
> +    returns => { type => 'null' },
> +
> +    code => sub {
> +	my ($param) = @_;
> +
> +	die "Node not in a cluster. Aborting.\n"
> +	    if !PVE::Corosync::check_conf_exists(1);
> +
> +	my $members = PVE::Cluster::get_members();
> +	foreach my $node (sort keys %$members) {
> +	    die "All nodes must be online! Node $node is offline, aborting.\n"
> +		if !$members->{$node}->{online};
> +	}
> +
> +	my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes', '-lroot'];
> +
> +	my $code = sub {
> +	    my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
> +	    my $quorum_section = $conf->{main}->{quorum};
> +
> +	    die "No QDevice configured!\n" if !defined($quorum_section->{device});
> +
> +	    delete $quorum_section->{device};
> +
> +	    PVE::Corosync::atomic_write_conf($conf);
> +
> +	    # cleanup qdev state (cert storage)
> +	    my $qdev_state_dir =  "/etc/corosync/qdevice";
> +	    #rmtree $qdev_state_dir;
> +
> +	    $foreach_member->(sub {
> +		my (undef, $ip) = @_;
> +		run_command([@$ssh_cmd, $ip, '--', 'rm', '-rf', $qdev_state_dir ]);
> +	    });
> +	};
> +
> +	PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
> +	die $@ if $@;
> +
> +	$foreach_member->(sub {
> +	    my (undef, $ip) = @_;
> +	    run_command([@$ssh_cmd, $ip, 'systemctl', 'stop', 'corosync-qdevice']);
> +	    run_command([@$ssh_cmd, $ip, 'systemctl', 'disable', 'corosync-qdevice']);
> +	});
> +
> +	run_command(['corosync-cfgtool', '-R']);
> +
> +	return undef;
> +}});
> +
>  __PACKAGE__->register_method ({
>      name => 'add',
>      path => 'add',
> @@ -396,6 +642,10 @@ our $cmddef = {
>      expected => [ __PACKAGE__, 'expected', ['expected']],
>      updatecerts => [ __PACKAGE__, 'updatecerts', []],
>      mtunnel => [ __PACKAGE__, 'mtunnel', ['extra-args']],
> +    qdevice => {
> +	setup => [ __PACKAGE__, 'setup_qdevice', ['address']],
> +	delete => [ __PACKAGE__, 'delete_qdevice', []],
> +    }
>  };
>  
>  1;
> 





More information about the pve-devel mailing list