[pve-devel] [PATCH manager v2 2/5] ceph: add MDS create/delete/list API
Thomas Lamprecht
t.lamprecht at proxmox.com
Thu Nov 22 20:34:19 CET 2018
Allow to create, list and destroy and Ceph Metadata Server (MDS) over
the API and the CLI `pveceph` tool.
Besides setting up the local systemd service template and the MDS
data directory we also add a reference to the MDS in the ceph.conf
We note the backing host (node) from the respective MDS and set up a
'mds standby for name' = 'pve' so that the PVE created ones are a
single group. If we decide to add integration for rank/path specific
MDS (possible useful for CephFS with quite a bit of load) then this
may help as a starting point.
On create, check early if a reference already exists in ceph.conf and
abort in that case. If we only see existing data directories later
on we do not remove them, they could well be from an older manual
create - where it's possible dangerous to just remove it. Let the
user handle it themself in that case.
Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
Co-authored-by: Alwin Antreich <a.antreich at proxmox.com>
---
changes v1 -> v2:
* fix directory index, return more info
* added get_cluster_mds_state
PVE/API2/Ceph.pm | 7 ++
PVE/API2/Ceph/MDS.pm | 247 +++++++++++++++++++++++++++++++++++++++++
PVE/API2/Ceph/Makefile | 15 +++
PVE/API2/Makefile | 2 +-
PVE/CLI/pveceph.pm | 3 +
PVE/CephTools.pm | 140 ++++++++++++++++++++++-
6 files changed, 412 insertions(+), 2 deletions(-)
create mode 100644 PVE/API2/Ceph/MDS.pm
create mode 100644 PVE/API2/Ceph/Makefile
diff --git a/PVE/API2/Ceph.pm b/PVE/API2/Ceph.pm
index a6eec24a..d3e8d665 100644
--- a/PVE/API2/Ceph.pm
+++ b/PVE/API2/Ceph.pm
@@ -548,6 +548,7 @@ use PVE::RPCEnvironment;
use PVE::Storage;
use PVE::Tools qw(run_command file_get_contents file_set_contents);
+use PVE::API2::Ceph::MDS;
use PVE::API2::Storage::Config;
use base qw(PVE::RESTHandler);
@@ -559,6 +560,11 @@ __PACKAGE__->register_method ({
path => 'osd',
});
+__PACKAGE__->register_method ({
+ subclass => "PVE::API2::Ceph::MDS",
+ path => 'mds',
+});
+
__PACKAGE__->register_method ({
name => 'index',
path => '',
@@ -590,6 +596,7 @@ __PACKAGE__->register_method ({
{ name => 'mon' },
{ name => 'osd' },
{ name => 'pools' },
+ { name => 'mds' },
{ name => 'stop' },
{ name => 'start' },
{ name => 'status' },
diff --git a/PVE/API2/Ceph/MDS.pm b/PVE/API2/Ceph/MDS.pm
new file mode 100644
index 00000000..30d03285
--- /dev/null
+++ b/PVE/API2/Ceph/MDS.pm
@@ -0,0 +1,247 @@
+package PVE::API2::Ceph::MDS;
+
+use strict;
+use warnings;
+
+use PVE::CephTools;
+use PVE::INotify;
+use PVE::JSONSchema qw(get_standard_option);
+use PVE::RADOS;
+use PVE::RESTHandler;
+use PVE::RPCEnvironment;
+
+use base qw(PVE::RESTHandler);
+
+__PACKAGE__->register_method ({
+ name => 'index',
+ path => '',
+ method => 'GET',
+ description => "MDS directory index.",
+ permissions => {
+ check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1],
+ },
+ proxyto => 'node',
+ protected => 1,
+ parameters => {
+ additionalProperties => 0,
+ properties => {
+ node => get_standard_option('pve-node'),
+ },
+ },
+ returns => {
+ type => 'array',
+ items => {
+ type => "object",
+ properties => {
+ name => {
+ description => "The name (ID) for the MDS",
+ },
+ addr => {
+ type => 'string',
+ optional => 1,
+ },
+ host => {
+ type => 'string',
+ optional => 1,
+ },
+ state => {
+ type => 'string',
+ description => 'State of the MDS',
+ },
+ standby_replay => {
+ type => 'boolean',
+ optional => 1,
+ description => 'If true, the standby MDS is polling the active MDS for faster recovery (hot standby).',
+ },
+ rank => {
+ type => 'integer',
+ optional => 1,
+ },
+ },
+ },
+ links => [ { rel => 'child', href => "{name}" } ],
+ },
+ code => sub {
+ my ($param) = @_;
+
+ my $res = [];
+
+ my $cfg = PVE::CephTools::parse_ceph_config();
+
+ my $mds_hash = {};
+
+ foreach my $section (keys %$cfg) {
+ my $d = $cfg->{$section};
+
+ if ($section =~ m/^mds\.(\S+)$/) {
+ my $mds_id = $1;
+ if (defined($d->{host})) {
+ $mds_hash->{$mds_id} = {
+ name => $mds_id,
+ state => 'unknown',
+ addr => $d->{host},
+ host => $d->{host},
+ };
+ }
+ }
+ }
+
+ if (scalar(keys %$mds_hash) > 0) {
+ # does not include configured but stopped ones
+ my $mds_state = PVE::CephTools::get_cluster_mds_state();
+
+ foreach my $name (keys %$mds_state) {
+ my $d = $mds_state->{$name};
+ # just overwrite, this always provides more info
+ map { $mds_hash->{$name}->{$_} = $d->{$_} } keys %$d;
+ }
+ }
+
+ return PVE::RESTHandler::hash_to_array($mds_hash, 'name');
+ }
+});
+
+__PACKAGE__->register_method ({
+ name => 'createmds',
+ path => '{name}',
+ method => 'POST',
+ description => "Create Ceph Metadata Server (MDS)",
+ proxyto => 'node',
+ protected => 1,
+ permissions => {
+ check => ['perm', '/', [ 'Sys.Modify' ]],
+ },
+ parameters => {
+ additionalProperties => 0,
+ properties => {
+ node => get_standard_option('pve-node'),
+ name => {
+ type => 'string',
+ optional => 1,
+ default => 'nodename',
+ pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?',
+ description => "The ID for the mds, when omitted the same as the nodename",
+ },
+ hotstandby => {
+ type => 'boolean',
+ optional => 1,
+ default => '0',
+ description => "Determines whether a ceph-mds daemon should poll and replay the log of an active MDS. ".
+ "Faster switch on MDS failure, but needs more idle resources.",
+ },
+ },
+ },
+ returns => { type => 'string' },
+ code => sub {
+ my ($param) = @_;
+
+ PVE::CephTools::check_ceph_installed('ceph_mds');
+
+ PVE::CephTools::check_ceph_inited();
+
+ my $rpcenv = PVE::RPCEnvironment::get();
+ my $authuser = $rpcenv->get_user();
+
+ my $nodename = $param->{node};
+ $nodename = INotify::nodename() if $nodename eq 'localhost';
+
+ my $mds_id = $param->{name} // $nodename;
+
+ my $worker = sub {
+ my $timeout = PVE::CephTools::get_config('long_rados_timeout');
+ my $rados = PVE::RADOS->new(timeout => $timeout);
+
+ my $cfg = PVE::CephTools::parse_ceph_config();
+
+ my $section = "mds.$mds_id";
+
+ if (defined($cfg->{$section})) {
+ die "MDS '$mds_id' already referenced in ceph config, abort!\n"
+ }
+
+ if (!defined($cfg->{mds}->{keyring})) {
+ # $id isn't a perl variable but a ceph metavariable
+ my $keyring = '/var/lib/ceph/mds/ceph-$id/keyring';
+
+ $cfg->{mds}->{keyring} = $keyring;
+ }
+
+ $cfg->{$section}->{host} = $nodename;
+ $cfg->{$section}->{"mds standby for name"} = 'pve';
+
+ if ($param->{hotstandby}) {
+ $cfg->{$section}->{"mds standby replay"} = 'true';
+ }
+
+ PVE::CephTools::write_ceph_config($cfg);
+
+ eval { PVE::CephTools::create_mds($mds_id, $rados) };
+ if (my $err = $@) {
+ # we abort early if the section is defined, so we know that we
+ # wrote it at this point. Do not auto remove the service, could
+ # do real harm for previously manual setup MDS
+ warn "Encountered error, remove '$section' from ceph.conf\n";
+ $cfg = PVE::CephTools::parse_ceph_config();
+ delete $cfg->{$section};
+ PVE::CephTools::write_ceph_config($cfg);
+
+ die "$err\n";
+ }
+ };
+
+ return $rpcenv->fork_worker('cephcreatemds', "mds.$mds_id", $authuser, $worker);
+ }
+});
+
+__PACKAGE__->register_method ({
+ name => 'destroymds',
+ path => '{name}',
+ method => 'DELETE',
+ description => "Destroy Ceph Metadata Server",
+ proxyto => 'node',
+ protected => 1,
+ permissions => {
+ check => ['perm', '/', [ 'Sys.Modify' ]],
+ },
+ parameters => {
+ additionalProperties => 0,
+ properties => {
+ node => get_standard_option('pve-node'),
+ name => {
+ description => 'The name (ID) of the mds',
+ type => 'string',
+ pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?',
+ },
+ },
+ },
+ returns => { type => 'string' },
+ code => sub {
+ my ($param) = @_;
+
+ my $rpcenv = PVE::RPCEnvironment::get();
+
+ my $authuser = $rpcenv->get_user();
+
+ PVE::CephTools::check_ceph_inited();
+
+ my $mds_id = $param->{name};
+
+ my $worker = sub {
+ my $timeout = PVE::CephTools::get_config('long_rados_timeout');
+ my $rados = PVE::RADOS->new(timeout => $timeout);
+
+ my $cfg = PVE::CephTools::parse_ceph_config();
+
+ if (defined($cfg->{"mds.$mds_id"})) {
+ delete $cfg->{"mds.$mds_id"};
+ PVE::CephTools::write_ceph_config($cfg);
+ }
+
+ PVE::CephTools::destroy_mds($mds_id, $rados);
+ };
+
+ return $rpcenv->fork_worker('cephdestroymds', "mds.$mds_id", $authuser, $worker);
+ }
+});
+
+1;
diff --git a/PVE/API2/Ceph/Makefile b/PVE/API2/Ceph/Makefile
new file mode 100644
index 00000000..be4d740c
--- /dev/null
+++ b/PVE/API2/Ceph/Makefile
@@ -0,0 +1,15 @@
+include ../../../defines.mk
+
+PERLSOURCE= \
+ MDS.pm
+
+all:
+
+.PHONY: clean
+clean:
+ rm -rf *~
+
+.PHONY: install
+install: ${PERLSOURCE}
+ install -d ${PERLLIBDIR}/PVE/API2/Ceph
+ install -m 0644 ${PERLSOURCE} ${PERLLIBDIR}/PVE/API2/Ceph
diff --git a/PVE/API2/Makefile b/PVE/API2/Makefile
index a62bf909..c5868d7f 100644
--- a/PVE/API2/Makefile
+++ b/PVE/API2/Makefile
@@ -1,6 +1,6 @@
include ../../defines.mk
-SUBDIRS=Hardware
+SUBDIRS=Hardware Ceph
PERLSOURCE = \
Replication.pm \
diff --git a/PVE/CLI/pveceph.pm b/PVE/CLI/pveceph.pm
index a5a04949..90878d9e 100755
--- a/PVE/CLI/pveceph.pm
+++ b/PVE/CLI/pveceph.pm
@@ -19,6 +19,7 @@ use PVE::Tools qw(run_command);
use PVE::JSONSchema qw(get_standard_option);
use PVE::CephTools;
use PVE::API2::Ceph;
+use PVE::API2::Ceph::MDS;
use PVE::CLIHandler;
@@ -175,6 +176,8 @@ our $cmddef = {
destroymon => [ 'PVE::API2::Ceph', 'destroymon', ['monid'], { node => $nodename }, $upid_exit],
createmgr => [ 'PVE::API2::Ceph', 'createmgr', [], { node => $nodename }, $upid_exit],
destroymgr => [ 'PVE::API2::Ceph', 'destroymgr', ['id'], { node => $nodename }, $upid_exit],
+ createmds => [ 'PVE::API2::Ceph::MDS', 'createmds', [], { node => $nodename }, $upid_exit],
+ destroymds => [ 'PVE::API2::Ceph::MDS', 'destroymds', ['id'], { node => $nodename }, $upid_exit],
start => [ 'PVE::API2::Ceph', 'start', ['service'], { node => $nodename }, $upid_exit],
stop => [ 'PVE::API2::Ceph', 'stop', ['service'], { node => $nodename }, $upid_exit],
install => [ __PACKAGE__, 'install', [] ],
diff --git a/PVE/CephTools.pm b/PVE/CephTools.pm
index 8a9afa84..da31ccae 100644
--- a/PVE/CephTools.pm
+++ b/PVE/CephTools.pm
@@ -18,12 +18,14 @@ my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring";
my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring";
my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring";
my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring";
+my $ceph_mds_data_dir = '/var/lib/ceph/mds';
my $ceph_service = {
ceph_bin => "/usr/bin/ceph",
ceph_mon => "/usr/bin/ceph-mon",
ceph_mgr => "/usr/bin/ceph-mgr",
- ceph_osd => "/usr/bin/ceph-osd"
+ ceph_osd => "/usr/bin/ceph-osd",
+ ceph_mds => "/usr/bin/ceph-mds",
};
my $config_hash = {
@@ -33,6 +35,7 @@ my $config_hash = {
pve_ckeyring_path => $pve_ckeyring_path,
ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring,
ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring,
+ ceph_mds_data_dir => $ceph_mds_data_dir,
long_rados_timeout => 60,
};
@@ -297,4 +300,139 @@ sub systemd_managed {
}
}
+sub list_local_mds_ids {
+ my $mds_list = [];
+
+ PVE::Tools::dir_glob_foreach($ceph_mds_data_dir, qr/$ccname-(\S+)/, sub {
+ my (undef, $mds_id) = @_;
+ push @$mds_list, $mds_id;
+ });
+
+ return $mds_list;
+}
+
+sub get_cluster_mds_state {
+ my ($rados) = @_;
+
+ my $mds_state = {};
+
+ if (!defined($rados)) {
+ $rados = PVE::RADOS->new();
+ }
+
+ my $add_state = sub {
+ my ($mds) = @_;
+
+ my $state = {};
+ $state->{addr} = $mds->{addr};
+ $state->{rank} = $mds->{rank};
+ $state->{standby_replay} = $mds->{standby_replay} ? 1 : 0;
+ $state->{state} = $mds->{state};
+
+ $mds_state->{$mds->{name}} = $state;
+ };
+
+ my $mds_dump = $rados->mon_command({ prefix => 'mds stat' });
+ my $fsmap = $mds_dump->{fsmap};
+
+
+ foreach my $mds (@{$fsmap->{standbys}}) {
+ $add_state->($mds);
+ }
+
+ my $fs_info = $fsmap->{filesystems}->[0];
+ my $active_mds = $fs_info->{mdsmap}->{info};
+
+ # normally there'S only one active MDS, but we can have multiple active for
+ # different ranks (e.g., different cephs path hierachy). So just add all.
+ foreach my $mds (values %$active_mds) {
+ $add_state->($mds);
+ }
+
+ return $mds_state;
+}
+
+sub create_mds {
+ my ($id, $rados) = @_;
+
+ # `ceph fs status` fails with numeric only ID.
+ die "ID: $id, numeric only IDs are not supported\n"
+ if $id =~ /^\d+$/;
+
+ if (!defined($rados)) {
+ $rados = PVE::RADOS->new();
+ }
+
+ my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
+ my $service_keyring = "$service_dir/keyring";
+ my $service_name = "mds.$id";
+
+ die "ceph MDS directory '$service_dir' already exists\n"
+ if -d $service_dir;
+
+ print "creating MDS directory '$service_dir'\n";
+ eval { File::Path::mkpath($service_dir) };
+ my $err = $@;
+ die "creation MDS directory '$service_dir' failed\n" if $err;
+
+ # http://docs.ceph.com/docs/luminous/install/manual-deployment/#adding-mds
+ my $priv = [
+ mon => 'allow profile mds',
+ osd => 'allow rwx',
+ mds => 'allow *',
+ ];
+
+ print "creating keys for '$service_name'\n";
+ my $output = $rados->mon_command({
+ prefix => 'auth get-or-create',
+ entity => $service_name,
+ caps => $priv,
+ format => 'plain',
+ });
+
+ PVE::Tools::file_set_contents($service_keyring, $output);
+
+ print "setting ceph as owner for service directory\n";
+ run_command(["chown", 'ceph:ceph', '-R', $service_dir]);
+
+ print "enabling service 'ceph-mds\@$id.service'\n";
+ ceph_service_cmd('enable', $service_name);
+ print "starting service 'ceph-mds\@$id.service'\n";
+ ceph_service_cmd('start', $service_name);
+
+ return undef;
+};
+
+sub destroy_mds {
+ my ($id, $rados) = @_;
+
+ if (!defined($rados)) {
+ $rados = PVE::RADOS->new();
+ }
+
+ my $service_name = "mds.$id";
+ my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
+
+ print "disabling service 'ceph-mds\@$id.service'\n";
+ ceph_service_cmd('disable', $service_name);
+ print "stopping service 'ceph-mds\@$id.service'\n";
+ ceph_service_cmd('stop', $service_name);
+
+ if (-d $service_dir) {
+ print "removing ceph-mds directory '$service_dir'\n";
+ File::Path::remove_tree($service_dir);
+ } else {
+ warn "cannot cleanup MDS $id directory, '$service_dir' not found\n"
+ }
+
+ print "removing ceph auth for '$service_name'\n";
+ $rados->mon_command({
+ prefix => 'auth del',
+ entity => $service_name,
+ format => 'plain'
+ });
+
+ return undef;
+};
+
1;
--
2.19.1
More information about the pve-devel
mailing list