[pve-devel] [RFC v2 cluster] allow to add and delete qdevice to cluster
Thomas Lamprecht
t.lamprecht at proxmox.com
Fri Feb 22 11:41:18 CET 2019
On 2/21/19 3:10 PM, Oguz Bektas wrote:
> Allows adding and deleting qdevice through pvecm.
>
> Requirements:
> * All hosts need corosync-qdevice installed.
> * Box serving as QDevice needs corosync-qnetd installed.
> * Root SSH access from Proxmox host to QDevice
>
> Original email with patch from Thomas:
> https://pve.proxmox.com/pipermail/pve-devel/2018-July/033041.html
>
> v2 changes the following:
> * use modified methods from corosync-qdevice-net-certutil
> quick_start() instead, to avoid a two-way root ssh connection
> requirement
> * utilise /etc/pve to copy certificates during initialization
> * removed some functions/variables which are not needed anymore
>
> Will have to take a look at the FIXME stuffs inside, so this is more
> of a POC as of now.
>
> Signed-off-by: Oguz Bektas <o.bektas at proxmox.com>
> ---
> data/PVE/CLI/pvecm.pm | 264 ++++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 264 insertions(+)
>
> diff --git a/data/PVE/CLI/pvecm.pm b/data/PVE/CLI/pvecm.pm
> index 55c3f15..09f81fc 100755
> --- a/data/PVE/CLI/pvecm.pm
> +++ b/data/PVE/CLI/pvecm.pm
> @@ -63,6 +63,266 @@ __PACKAGE__->register_method ({
> return undef;
> }});
>
> +my $foreach_member = sub {
> + my ($code, $noerr) = @_;
> +
> + my $members = PVE::Cluster::get_members();
> + foreach my $node (sort keys %$members) {
> + if (my $ip = $members->{$node}->{ip}) {
> + $code->($node, $ip);
> + } else {
> + die "cannot get the cluster IP for node '$node'.\n" if !$noerr;
> + warn "cannot get the cluster IP for node '$node'.\n";
> + return undef;
> + }
> + }
> +};
> +
> +__PACKAGE__->register_method ({
> + name => 'setup_qdevice',
> + path => 'setup_qdevice',
> + method => 'PUT',
> + description => "Setup the use of a QDevice",
> + parameters => {
> + additionalProperties => 0,
> + properties => {
> + address => {
> + type => 'string', format => 'ip',
> + description => "Specifies the network address of an external corosync QDevice" ,
> + },
> + network => {
> + type => 'string',
> + format => 'CIDR',
> + description => 'The network which should be used to connect to the external qdevice',
> + optional => 1,
> + },
> + force => {
> + type => 'boolean',
> + description => "Do not throw error on possible dangerous operations.",
> + optional => 1,
> + },
> + },
> + },
> + returns => { type => 'null' },
> +
> + code => sub {
> + my ($param) = @_;
> +
> + PVE::Corosync::check_conf_exists(1);
> +
> + if (!PVE::Cluster::check_cfs_quorum(1)) {
> + print "ERROR: cluster must have quorum, aborting\n";
> + return undef;
> + }
> +
> + my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
error out if this is empty? qdevice makes no sense on stand-alone node.
> +
> + die "ERROR: QDevice already configured!\n"
> + if defined($conf->{main}->{quorum}->{device}) && !$param->{force};
s/ERROR// as we seldom do this and in the gui you'd get double "error" strings, IIRC.
> +
> + my $network = $param->{network};
> +
> + my $members = PVE::Cluster::get_members();
> + foreach my $node (sort keys %$members) {
> + die "All nodes must be online! Node $node is offline, aborting.\n"
> + if !$members->{$node}->{online};
> + }
> +
> + my $qnetd_addr = $param->{address};
> + my $base_dir = "/etc/corosync/qdevice/net";
> + my $db_dir_qnetd = "/etc/corosync/qnetd/nssdb";
> + my $db_dir_node = "$base_dir/nssdb";
> + my $ca_export_base = "qnetd-cacert.crt";
> + my $ca_export_file = "$db_dir_qnetd/$ca_export_base";
> + my $crq_file_base = "qdevice-net-node.crq";
> + my $p12_file_base = "qdevice-net-node.p12";
> + my $qdevice_certutil = "corosync-qdevice-net-certutil";
> + my $qnetd_certutil= "corosync-qnetd-certutil";
> + my $clustername = $conf->{main}->{totem}->{cluster_name};
> +
> +
> + # copy SSH key to qdevice
unnecessary "what" comment.
> + run_command(['ssh-copy-id', '-i', '/root/.ssh/id_rsa', "root\@$qnetd_addr"]);
> +
> + if (-d $db_dir_node) {
> + # FIXME: check on all nodes?!
I know this comment is from me, but I'd just check it local only and if something exists on the
other node just overwrite it.
> + if ($param->{force}) {
> + rmtree $db_dir_node;
> + } else {
> + die "QDevice certificate store already initialised, set force to delete!\n";
> + }
> + }
> +
> + my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes'];
> + my $scp_cmd = ['scp', '-o', 'BatchMode=yes'];
> +
> + print "Setup certificates for secure connection\n";
> + # initialize qnetd server
> + print "INFO: initializing qnetd server\n";
> + run_command([
> + @$ssh_cmd, "root\@$qnetd_addr",
> + $qnetd_certutil, "-i",
> + "||", "true" # avoid exit code if already initialized
just use noerr parameter for run_command if you want to ignore the exit code?
> + ]);
> +
> + # copy CA cert to all nodes and initialize them
# "copy" is wrong above, it's already on /etc/pve thus all nodes have it already?
> + print "INFO: copying CA cert and initializing on all nodes\n";
> + run_command([@$scp_cmd, "root\@$qnetd_addr:$ca_export_file", "/etc/pve/$ca_export_base"]);
> + $foreach_member->(sub {
> + my ($node, $ip) = @_;
> + run_command([
> + @$ssh_cmd, "root\@$ip", $qdevice_certutil, "-i",
we could but "'-l', 'root'" to the base ssh command to make those calls a bit shorter.
> + "-c", "/etc/pve/$ca_export_base"
I'd rather have a bit longer line than breaking the same command over multiple,
at least if it isn't really really long.
> + ]);
> + });
> + unlink "/etc/pve/$ca_export_base";
> +
> + # generate cert request
this comment does not makes any sense if the same info is below in the print statement?
> + print "INFO: generating cert request\n";
> + run_command([$qdevice_certutil, "-r", "-n", "$clustername"]);
> +
> + # copy exported cert request to qnetd server
same as above
> + print "INFO: copying exported cert request to qnetd server\n";
> + run_command([@$scp_cmd, "$db_dir_node/$crq_file_base", "root\@$qnetd_addr:/tmp"]);
> +
> + # sign and export cluster certificate
same as above
> + print "INFO: sign and export cluster cert\n";
> + run_command([
> + @$ssh_cmd, "root\@$qnetd_addr", "$qnetd_certutil", "-s", "-c",
> + "/tmp/$crq_file_base", "-n", "$clustername"
> + ]);
> +
> + # copy exported CRT to master node
same as above
> + print "INFO: copy exported crt to master node\n";
> + run_command([
> + @$scp_cmd, "root\@$qnetd_addr:$db_dir_qnetd/cluster-$clustername.crt",
> + "$db_dir_node"
> + ]);> +
> + # import certificate
same as above
> + print "INFO: import certificate\n";
> + run_command(["$qdevice_certutil", "-M", "-c", "$db_dir_node/cluster-$clustername.crt"]);
> +
> + # copy pk12 cert to all nodes and import it
same as above
> + print "INFO: copy and import pk12 cert to all nodes\n";
> + run_command([@$scp_cmd, "$db_dir_node/$p12_file_base", "/etc/pve/"]);
> + $foreach_member->(sub {
> + my ($node, $ip) = @_;
> + run_command([
> + @$ssh_cmd, "root\@$ip", "$qdevice_certutil", "-m", "-c",
> + "/etc/pve/$p12_file_base"
> + ]);
> + });
> + unlink "/etc/pve/$p12_file_base";
> +
> + my $model = "net";
> + my $algorithm = 'ffsplit';
> + if (scalar($members) & 1) {
I know this is from me, but maybe we should check that early, before doing so much work.
> + if ($param->{force}) {
> + $algorithm = 'lms';
> + } else {
> + die "Clusters with an odd node count are not officially supported!\n";
> + }
> + }
> +
> + my $code = sub {
> + my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
> + my $quorum_section = $conf->{main}->{quorum};
> +
> + die "Qdevice already configured, must be deleted before setting up new one!\n"
> + if defined($quorum_section->{device}); # must not be forced!
> +
> + my $qdev_section = {
> + model => $model,
> + "$model" => {
> + tls => 'on',
> + host => $qnetd_addr,
> + algorithm => $algorithm,
> + }
> + };
> + $qdev_section->{votes} = 1 if $algorithm eq 'ffsplit';
> +
> + $quorum_section->{device} = $qdev_section;
> +
> + PVE::Corosync::atomic_write_conf($conf);
> + };
> +
> + print "INFO: add QDevice to cluster configuration\n";
> + PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
> + die $@ if $@;
> +
> + $foreach_member->(sub {
> + my ($node, $ip) = @_;
> + print "INFO: start and enable corosync qdevice daemon on node '$node'...\n";
> + run_command([@$ssh_cmd, $ip, 'systemctl', 'start', 'corosync-qdevice']);
> + run_command([@$ssh_cmd, $ip, 'systemctl', 'enable', 'corosync-qdevice']);
> + });
> +
> + run_command(['corosync-cfgtool', '-R']); # do cluster wide config reload
> +
> + print "Done\n";
> +
> + return undef;
> +}});
> +
> +__PACKAGE__->register_method ({
> + name => 'delete_qdevice',
> + path => 'delete_qdevice',
> + method => 'DELETE',
> + description => "Remove a configured QDevice",
> + parameters => {
> + additionalProperties => 0,
> + properties => {},
> + },
> + returns => { type => 'null' },
> +
> + code => sub {
> + my ($param) = @_;
> +
> + PVE::Corosync::check_conf_exists(1);
> +
> + if (!PVE::Cluster::check_cfs_quorum(1)) {
> + # FIXME: *all* nodes must be online
> + print "ERROR: cluster must have quorum\n";
> + return undef;
> + }
> +
> + my $ssh_cmd = ['ssh', '-o', 'BatchMode=yes'];
> +
> + my $code = sub {
> + my $conf = PVE::Cluster::cfs_read_file("corosync.conf");
> + my $quorum_section = $conf->{main}->{quorum};
> +
> + die "ERROR: No QDevice configured!\n" if !defined($quorum_section->{device});
> +
> + delete $quorum_section->{device};
> +
> + PVE::Corosync::atomic_write_conf($conf);
> +
> + # cleanup qdev state (cert storage)
> + my $qdev_state_dir = "/etc/corosync/qdevice";
> + #rmtree $qdev_state_dir;
> +
> + $foreach_member->(sub {
> + my (undef, $ip) = @_;
> + run_command([@$ssh_cmd, $ip, '--', 'rm', '-rf', $qdev_state_dir ]);
> + });
> + };
> +
> + PVE::Cluster::cfs_lock_file('corosync.conf', 10, $code);
> + die $@ if $@;
> +
> + $foreach_member->(sub {
> + my (undef, $ip) = @_;
> + run_command([@$ssh_cmd, $ip, 'systemctl', 'stop', 'corosync-qdevice']);
> + run_command([@$ssh_cmd, $ip, 'systemctl', 'disable', 'corosync-qdevice']);
> + });
> +
> + run_command(['corosync-cfgtool', '-R']);
> +
> + return undef;
> +}});
> +
> __PACKAGE__->register_method ({
> name => 'add',
> path => 'add',
> @@ -396,6 +656,10 @@ our $cmddef = {
> expected => [ __PACKAGE__, 'expected', ['expected']],
> updatecerts => [ __PACKAGE__, 'updatecerts', []],
> mtunnel => [ __PACKAGE__, 'mtunnel', ['extra-args']],
> + qdevice => {
> + setup => [ __PACKAGE__, 'setup_qdevice', ['address']],
> + delete => [ __PACKAGE__, 'delete_qdevice', []],
> + }
> };
>
> 1;
>
More information about the pve-devel
mailing list