[pve-devel] [RFC v2 cluster] allow to add and delete qdevice to cluster

Thomas Lamprecht t.lamprecht at proxmox.com
Fri Feb 22 11:41:18 CET 2019


On 2/21/19 3:10 PM, Oguz Bektas wrote:
> Allows adding and deleting qdevice through pvecm.
> 
> Requirements:
> * All hosts need corosync-qdevice installed.
> * Box serving as QDevice needs corosync-qnetd installed.
> * Root SSH access from Proxmox host to QDevice
> 
> Original email with patch from Thomas:
> https://pve.proxmox.com/pipermail/pve-devel/2018-July/033041.html
> 
> v2 changes the following:
> * use modified methods from corosync-qdevice-net-certutil
> quick_start() instead, to avoid a two-way root ssh connection
> requirement
> * utilise /etc/pve to copy certificates during initialization
> * removed some functions/variables which are not needed anymore
> 
> Will have to take a look at the FIXME stuffs inside, so this is more
> of a POC as of now.
> 
> Signed-off-by: Oguz Bektas <o.bektas at proxmox.com>
> ---
>  data/PVE/CLI/pvecm.pm | 264 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 264 insertions(+)
> 
> diff --git a/data/PVE/CLI/pvecm.pm b/data/PVE/CLI/pvecm.pm
> index 55c3f15..09f81fc 100755
> --- a/data/PVE/CLI/pvecm.pm
> +++ b/data/PVE/CLI/pvecm.pm
> @@ -63,6 +63,266 @@ __PACKAGE__->register_method ({
>  	return undef;
>      }});
>  
> +my $foreach_member = sub {
> +    my ($code, $noerr) = @_;
> +
> +    my $members = PVE::Cluster::get_members();
> +    foreach my $node (sort keys %$members) {
> +	if (my $ip = $members->{$node}->{ip}) {
> +	    $code->($node, $ip);
> +	} else {
> +	    die "cannot get the cluster IP for node '$node'.\n" if !$noerr;
> +	    warn "cannot get the cluster IP for node '$node'.\n";
> +	    return undef;
> +	}
> +    }
> +};
> +
> +__PACKAGE__->register_method ({
> +    name => 'setup_qdevice',
> +    path => 'setup_qdevice',
> +    method => 'PUT',
> +    description => "Setup the use of a QDevice",
> +    parameters => {
> +        additionalProperties => 0,
> +	properties => {
> +	    address => {
> +		type => 'string', format => 'ip',
> +		description => "Specifies the network address of an external corosync QDevice" ,
> +	    },
> +	    network => {
> +		type => 'string',
> +		format => 'CIDR',
> +		description => 'The network which should be used to connect to the external qdevice',
> +		optional => 1,
> +	    },
> +	    force => {
> +		type => 'boolean',
> +		description => "Do not throw error on possible dangerous operations.",
> +		optional => 1,
> +	    },
> +	},
> +    },
> +    returns => { type => 'null' },
> +
> +    code => sub {
> +	my ($param) = @_;
> +
> +	PVE::Corosync::check_conf_exists(1);
> +
> +	if (!PVE::Cluster::check_cfs_quorum(1)) {
> +	    print "ERROR: cluster must have quorum, aborting\n";
> +	    return undef;
> +	}
> +
> +	my $conf = PVE::Cluster::cfs_read_file("corosync.conf");

error out if this is empty? qdevice makes no sense on stand-alone node.

> +
> +	die "ERROR: QDevice already configured!\n"
> +	    if defined($conf->{main}->{quorum}->{device}) && !$param->{force};

s/ERROR// as we seldom do this and in the gui you'd get double "error" strings, IIRC.

> +
> +	my $network = $param->{network};
> +
> +	my $members = PVE::Cluster::get_members();
> +	foreach my $node (sort keys %$members) {
> +	    die "All nodes must be online! Node $node is offline, aborting.\n"
> +		if !$members->{$node}->{online};
> +	}
> +
> +	my $qnetd_addr = $param->{address};
> +	my $base_dir = "/etc/corosync/qdevice/net";
> +	my $db_dir_qnetd = "/etc/corosync/qnetd/nssdb";
> +	my $db_dir_node = "$base_dir/nssdb";
> +	my $ca_export_base = "qnetd-cacert.crt";
> +	my $ca_export_file = "$db_dir_qnetd/$ca_export_base";
> +	my $crq_file_base = "qdevice-net-node.crq";
> +	my $p12_file_base = "qdevice-net-node.p12";
> +	my $qdevice_certutil = "corosync-qdevice-net-certutil";
> +	my $qnetd_certutil= "corosync-qnetd-certutil";
> +	my $clustername = $conf->{main}->{totem}->{cluster_name};
> +
> +
> +	# copy SSH key to qdevice

unnecessary "what" comment.

> +	run_command(['ssh-copy-id', '-i', '/root/.ssh/id_rsa', "root\@$qnetd_addr"]);
> +
> +	if (-d $db_dir_node) {
> +	    # FIXME: check on all nodes?!

I know this comment is from me, but I'd just check it local only and if something exists on the
other node just overwrite it.

> +	    if ($param->{force}) {
> +		rmtree $db_dir_node;
> +	    } else {
> +		die "QDevice certificate store already initialised, set force to delete!\n";
> +	    }
> +	}
> +
> +	my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes'];
> +	my $scp_cmd = ['scp', '-o', 'BatchMode=yes'];
> +
> +	print "Setup certificates for secure connection\n";
> +	# initialize qnetd server
> +	print "INFO: initializing qnetd server\n";
> +	run_command([
> +		@$ssh_cmd, "root\@$qnetd_addr",
> +		$qnetd_certutil, "-i", 
> +		"||", "true" # avoid exit code if already initialized

just use noerr parameter for run_command if you want to ignore the exit code?

> +	]);
> +
> +	# copy CA cert to all nodes and initialize them

# "copy" is wrong above, it's already on /etc/pve thus all nodes have it already?

> +	print "INFO: copying CA cert and initializing on all nodes\n";
> +	run_command([@$scp_cmd, "root\@$qnetd_addr:$ca_export_file", "/etc/pve/$ca_export_base"]);
> +	$foreach_member->(sub {
> +	    my ($node, $ip) = @_;
> +	    run_command([
> +		@$ssh_cmd, "root\@$ip", $qdevice_certutil, "-i",

we could but "'-l', 'root'" to the base ssh command to make those calls a bit shorter.

> +		"-c", "/etc/pve/$ca_export_base"

I'd rather have a bit longer line than breaking the same command over multiple,
at least if it isn't really really long.

> +	    ]);
> +	});
> +	unlink "/etc/pve/$ca_export_base";
> +
> +	# generate cert request

this comment does not makes any sense if the same info is below in the print statement?

> +	print "INFO: generating cert request\n";
> +	run_command([$qdevice_certutil, "-r", "-n", "$clustername"]);
> +
> +	# copy exported cert request to qnetd server
same as above

> +	print "INFO: copying exported cert request to qnetd server\n";
> +	run_command([@$scp_cmd, "$db_dir_node/$crq_file_base", "root\@$qnetd_addr:/tmp"]);
> +
> +	# sign and export cluster certificate
same as above

> +	print "INFO: sign and export cluster cert\n";
> +	run_command([
> +		@$ssh_cmd, "root\@$qnetd_addr", "$qnetd_certutil", "-s", "-c",
> +		"/tmp/$crq_file_base", "-n", "$clustername"
> +	    ]);
> +
> +	# copy exported CRT to master node
same as above

> +	print "INFO: copy exported crt to master node\n";
> +	run_command([
> +		@$scp_cmd, "root\@$qnetd_addr:$db_dir_qnetd/cluster-$clustername.crt",
> +		"$db_dir_node"
> +	    ]);> +
> +	# import certificate
same as above

> +	print "INFO: import certificate\n";
> +	run_command(["$qdevice_certutil", "-M", "-c", "$db_dir_node/cluster-$clustername.crt"]);
> +
> +	# copy pk12 cert to all nodes and import it
same as above

> +	print "INFO: copy and import pk12 cert to all nodes\n";
> +	run_command([@$scp_cmd, "$db_dir_node/$p12_file_base", "/etc/pve/"]);
> +	$foreach_member->(sub {
> +	    my ($node, $ip) = @_;
> +	    run_command([
> +		    @$ssh_cmd, "root\@$ip", "$qdevice_certutil", "-m", "-c",
> +		    "/etc/pve/$p12_file_base"
> +		]);
> +	});
> +	unlink "/etc/pve/$p12_file_base";
> +
> +	my $model = "net";
> +	my $algorithm = 'ffsplit';
> +	if (scalar($members) & 1) {

I know this is from me, but maybe we should check that early, before doing so much work.

> +	    if ($param->{force}) {
> +		$algorithm = 'lms';
> +	    } else {
> +		die "Clusters with an odd node count are not officially supported!\n";
> +	    }
> +	}
> +
> +	my $code = sub {
> +	    my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
> +	    my $quorum_section = $conf->{main}->{quorum};
> +
> +	    die "Qdevice already configured, must be deleted before setting up new one!\n"
> +		if defined($quorum_section->{device}); # must not be forced!
> +
> +	    my $qdev_section = {
> +		model => $model,
> +		"$model" => {
> +		    tls => 'on',
> +		    host => $qnetd_addr,
> +		    algorithm => $algorithm,
> +		}
> +	    };
> +	    $qdev_section->{votes} = 1 if $algorithm eq 'ffsplit';
> +
> +	    $quorum_section->{device} = $qdev_section;
> +
> +	    PVE::Corosync::atomic_write_conf($conf);
> +	};
> +
> +	print "INFO: add QDevice to cluster configuration\n";
> +	PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
> +	die $@ if $@;
> +
> +	$foreach_member->(sub {
> +	    my ($node, $ip) = @_;
> +	    print "INFO: start and enable corosync qdevice daemon on node '$node'...\n";
> +	    run_command([@$ssh_cmd, $ip, 'systemctl', 'start', 'corosync-qdevice']);
> +	    run_command([@$ssh_cmd, $ip, 'systemctl', 'enable', 'corosync-qdevice']);
> +	});
> +
> +	run_command(['corosync-cfgtool', '-R']); # do cluster wide config reload
> +
> +	print "Done\n";
> +
> +	return undef;
> +}});
> +
> +__PACKAGE__->register_method ({
> +    name => 'delete_qdevice',
> +    path => 'delete_qdevice',
> +    method => 'DELETE',
> +    description => "Remove a configured QDevice",
> +    parameters => {
> +        additionalProperties => 0,
> +	properties => {},
> +    },
> +    returns => { type => 'null' },
> +
> +    code => sub {
> +	my ($param) = @_;
> +
> +	PVE::Corosync::check_conf_exists(1);
> +
> +	if (!PVE::Cluster::check_cfs_quorum(1)) {
> +	    # FIXME: *all* nodes must be online
> +	    print "ERROR: cluster must have quorum\n";
> +	    return undef;
> +	}
> +
> +	my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes'];
> +
> +	my $code = sub {
> +	    my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
> +	    my $quorum_section = $conf->{main}->{quorum};
> +
> +	    die "ERROR: No QDevice configured!\n" if !defined($quorum_section->{device});
> +
> +	    delete $quorum_section->{device};
> +
> +	    PVE::Corosync::atomic_write_conf($conf);
> +
> +	    # cleanup qdev state (cert storage)
> +	    my $qdev_state_dir =  "/etc/corosync/qdevice";
> +	    #rmtree $qdev_state_dir;
> +
> +	    $foreach_member->(sub {
> +		my (undef, $ip) = @_;
> +		run_command([@$ssh_cmd, $ip, '--', 'rm', '-rf', $qdev_state_dir ]);
> +	    });
> +	};
> +
> +	PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
> +	die $@ if $@;
> +
> +	$foreach_member->(sub {
> +	    my (undef, $ip) = @_;
> +	    run_command([@$ssh_cmd, $ip, 'systemctl', 'stop', 'corosync-qdevice']);
> +	    run_command([@$ssh_cmd, $ip, 'systemctl', 'disable', 'corosync-qdevice']);
> +	});
> +
> +	run_command(['corosync-cfgtool', '-R']);
> +
> +	return undef;
> +}});
> +
>  __PACKAGE__->register_method ({
>      name => 'add',
>      path => 'add',
> @@ -396,6 +656,10 @@ our $cmddef = {
>      expected => [ __PACKAGE__, 'expected', ['expected']],
>      updatecerts => [ __PACKAGE__, 'updatecerts', []],
>      mtunnel => [ __PACKAGE__, 'mtunnel', ['extra-args']],
> +    qdevice => {
> +	setup => [ __PACKAGE__, 'setup_qdevice', ['address']],
> +	delete => [ __PACKAGE__, 'delete_qdevice', []],
> +    }
>  };
>  
>  1;
> 





More information about the pve-devel mailing list