[pve-devel] [PATCH manager v2 2/5] ceph: add MDS create/delete/list API
Dominik Csapak
d.csapak at proxmox.com
Fri Nov 23 11:02:00 CET 2018
1 minor thing inline, rest looks good
On 11/22/18 8:34 PM, Thomas Lamprecht wrote:
> Allow to create, list and destroy and Ceph Metadata Server (MDS) over
> the API and the CLI `pveceph` tool.
>
> Besides setting up the local systemd service template and the MDS
> data directory we also add a reference to the MDS in the ceph.conf
> We note the backing host (node) from the respective MDS and set up a
> 'mds standby for name' = 'pve' so that the PVE created ones are a
> single group. If we decide to add integration for rank/path specific
> MDS (possible useful for CephFS with quite a bit of load) then this
> may help as a starting point.
>
> On create, check early if a reference already exists in ceph.conf and
> abort in that case. If we only see existing data directories later
> on we do not remove them, they could well be from an older manual
> create - where it's possible dangerous to just remove it. Let the
> user handle it themself in that case.
>
> Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
> Co-authored-by: Alwin Antreich <a.antreich at proxmox.com>
> ---
>
> changes v1 -> v2:
> * fix directory index, return more info
> * added get_cluster_mds_state
>
>
> PVE/API2/Ceph.pm | 7 ++
> PVE/API2/Ceph/MDS.pm | 247 +++++++++++++++++++++++++++++++++++++++++
> PVE/API2/Ceph/Makefile | 15 +++
> PVE/API2/Makefile | 2 +-
> PVE/CLI/pveceph.pm | 3 +
> PVE/CephTools.pm | 140 ++++++++++++++++++++++-
> 6 files changed, 412 insertions(+), 2 deletions(-)
> create mode 100644 PVE/API2/Ceph/MDS.pm
> create mode 100644 PVE/API2/Ceph/Makefile
>
> diff --git a/PVE/API2/Ceph.pm b/PVE/API2/Ceph.pm
> index a6eec24a..d3e8d665 100644
> --- a/PVE/API2/Ceph.pm
> +++ b/PVE/API2/Ceph.pm
> @@ -548,6 +548,7 @@ use PVE::RPCEnvironment;
> use PVE::Storage;
> use PVE::Tools qw(run_command file_get_contents file_set_contents);
>
> +use PVE::API2::Ceph::MDS;
> use PVE::API2::Storage::Config;
>
> use base qw(PVE::RESTHandler);
> @@ -559,6 +560,11 @@ __PACKAGE__->register_method ({
> path => 'osd',
> });
>
> +__PACKAGE__->register_method ({
> + subclass => "PVE::API2::Ceph::MDS",
> + path => 'mds',
> +});
> +
> __PACKAGE__->register_method ({
> name => 'index',
> path => '',
> @@ -590,6 +596,7 @@ __PACKAGE__->register_method ({
> { name => 'mon' },
> { name => 'osd' },
> { name => 'pools' },
> + { name => 'mds' },
> { name => 'stop' },
> { name => 'start' },
> { name => 'status' },
> diff --git a/PVE/API2/Ceph/MDS.pm b/PVE/API2/Ceph/MDS.pm
> new file mode 100644
> index 00000000..30d03285
> --- /dev/null
> +++ b/PVE/API2/Ceph/MDS.pm
> @@ -0,0 +1,247 @@
> +package PVE::API2::Ceph::MDS;
> +
> +use strict;
> +use warnings;
> +
> +use PVE::CephTools;
> +use PVE::INotify;
> +use PVE::JSONSchema qw(get_standard_option);
> +use PVE::RADOS;
> +use PVE::RESTHandler;
> +use PVE::RPCEnvironment;
> +
> +use base qw(PVE::RESTHandler);
> +
> +__PACKAGE__->register_method ({
> + name => 'index',
> + path => '',
> + method => 'GET',
> + description => "MDS directory index.",
> + permissions => {
> + check => ['perm', '/', [ 'Sys.Audit', 'Datastore.Audit' ], any => 1],
> + },
> + proxyto => 'node',
> + protected => 1,
> + parameters => {
> + additionalProperties => 0,
> + properties => {
> + node => get_standard_option('pve-node'),
> + },
> + },
> + returns => {
> + type => 'array',
> + items => {
> + type => "object",
> + properties => {
> + name => {
> + description => "The name (ID) for the MDS",
> + },
> + addr => {
> + type => 'string',
> + optional => 1,
> + },
> + host => {
> + type => 'string',
> + optional => 1,
> + },
> + state => {
> + type => 'string',
> + description => 'State of the MDS',
> + },
> + standby_replay => {
> + type => 'boolean',
> + optional => 1,
> + description => 'If true, the standby MDS is polling the active MDS for faster recovery (hot standby).',
> + },
> + rank => {
> + type => 'integer',
> + optional => 1,
> + },
> + },
> + },
> + links => [ { rel => 'child', href => "{name}" } ],
> + },
> + code => sub {
> + my ($param) = @_;
> +
> + my $res = [];
> +
> + my $cfg = PVE::CephTools::parse_ceph_config();
> +
> + my $mds_hash = {};
> +
> + foreach my $section (keys %$cfg) {
> + my $d = $cfg->{$section};
> +
> + if ($section =~ m/^mds\.(\S+)$/) {
> + my $mds_id = $1;
> + if (defined($d->{host})) {
> + $mds_hash->{$mds_id} = {
> + name => $mds_id,
> + state => 'unknown',
> + addr => $d->{host},
> + host => $d->{host},
> + };
> + }
> + }
> + }
> +
> + if (scalar(keys %$mds_hash) > 0) {
> + # does not include configured but stopped ones
> + my $mds_state = PVE::CephTools::get_cluster_mds_state();
> +
> + foreach my $name (keys %$mds_state) {
> + my $d = $mds_state->{$name};
> + # just overwrite, this always provides more info
> + map { $mds_hash->{$name}->{$_} = $d->{$_} } keys %$d;
> + }
> + }
> +
> + return PVE::RESTHandler::hash_to_array($mds_hash, 'name');
> + }
> +});
> +
> +__PACKAGE__->register_method ({
> + name => 'createmds',
> + path => '{name}',
> + method => 'POST',
> + description => "Create Ceph Metadata Server (MDS)",
> + proxyto => 'node',
> + protected => 1,
> + permissions => {
> + check => ['perm', '/', [ 'Sys.Modify' ]],
> + },
> + parameters => {
> + additionalProperties => 0,
> + properties => {
> + node => get_standard_option('pve-node'),
> + name => {
> + type => 'string',
> + optional => 1,
> + default => 'nodename',
> + pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?',
> + description => "The ID for the mds, when omitted the same as the nodename",
> + },
> + hotstandby => {
> + type => 'boolean',
> + optional => 1,
> + default => '0',
> + description => "Determines whether a ceph-mds daemon should poll and replay the log of an active MDS. ".
> + "Faster switch on MDS failure, but needs more idle resources.",
> + },
> + },
> + },
> + returns => { type => 'string' },
> + code => sub {
> + my ($param) = @_;
> +
> + PVE::CephTools::check_ceph_installed('ceph_mds');
> +
> + PVE::CephTools::check_ceph_inited();
> +
> + my $rpcenv = PVE::RPCEnvironment::get();
> + my $authuser = $rpcenv->get_user();
> +
> + my $nodename = $param->{node};
> + $nodename = INotify::nodename() if $nodename eq 'localhost';
> +
> + my $mds_id = $param->{name} // $nodename;
> +
> + my $worker = sub {
> + my $timeout = PVE::CephTools::get_config('long_rados_timeout');
> + my $rados = PVE::RADOS->new(timeout => $timeout);
> +
> + my $cfg = PVE::CephTools::parse_ceph_config();
> +
> + my $section = "mds.$mds_id";
> +
> + if (defined($cfg->{$section})) {
> + die "MDS '$mds_id' already referenced in ceph config, abort!\n"
> + }
> +
> + if (!defined($cfg->{mds}->{keyring})) {
> + # $id isn't a perl variable but a ceph metavariable
> + my $keyring = '/var/lib/ceph/mds/ceph-$id/keyring';
> +
> + $cfg->{mds}->{keyring} = $keyring;
> + }
> +
> + $cfg->{$section}->{host} = $nodename;
> + $cfg->{$section}->{"mds standby for name"} = 'pve';
> +
> + if ($param->{hotstandby}) {
> + $cfg->{$section}->{"mds standby replay"} = 'true';
> + }
> +
> + PVE::CephTools::write_ceph_config($cfg);
> +
> + eval { PVE::CephTools::create_mds($mds_id, $rados) };
> + if (my $err = $@) {
> + # we abort early if the section is defined, so we know that we
> + # wrote it at this point. Do not auto remove the service, could
> + # do real harm for previously manual setup MDS
> + warn "Encountered error, remove '$section' from ceph.conf\n";
> + $cfg = PVE::CephTools::parse_ceph_config();
> + delete $cfg->{$section};
> + PVE::CephTools::write_ceph_config($cfg);
> +
> + die "$err\n";
> + }
> + };
> +
> + return $rpcenv->fork_worker('cephcreatemds', "mds.$mds_id", $authuser, $worker);
> + }
> +});
> +
> +__PACKAGE__->register_method ({
> + name => 'destroymds',
> + path => '{name}',
> + method => 'DELETE',
> + description => "Destroy Ceph Metadata Server",
> + proxyto => 'node',
> + protected => 1,
> + permissions => {
> + check => ['perm', '/', [ 'Sys.Modify' ]],
> + },
> + parameters => {
> + additionalProperties => 0,
> + properties => {
> + node => get_standard_option('pve-node'),
> + name => {
> + description => 'The name (ID) of the mds',
> + type => 'string',
> + pattern => '[a-zA-Z0-9]([a-zA-Z0-9\-]*[a-zA-Z0-9])?',
> + },
> + },
> + },
> + returns => { type => 'string' },
> + code => sub {
> + my ($param) = @_;
> +
> + my $rpcenv = PVE::RPCEnvironment::get();
> +
> + my $authuser = $rpcenv->get_user();
> +
> + PVE::CephTools::check_ceph_inited();
> +
> + my $mds_id = $param->{name};
> +
> + my $worker = sub {
> + my $timeout = PVE::CephTools::get_config('long_rados_timeout');
> + my $rados = PVE::RADOS->new(timeout => $timeout);
> +
> + my $cfg = PVE::CephTools::parse_ceph_config();
> +
> + if (defined($cfg->{"mds.$mds_id"})) {
> + delete $cfg->{"mds.$mds_id"};
> + PVE::CephTools::write_ceph_config($cfg);
> + }
> +
> + PVE::CephTools::destroy_mds($mds_id, $rados);
> + };
> +
> + return $rpcenv->fork_worker('cephdestroymds', "mds.$mds_id", $authuser, $worker);
> + }
> +});
> +
> +1;
> diff --git a/PVE/API2/Ceph/Makefile b/PVE/API2/Ceph/Makefile
> new file mode 100644
> index 00000000..be4d740c
> --- /dev/null
> +++ b/PVE/API2/Ceph/Makefile
> @@ -0,0 +1,15 @@
> +include ../../../defines.mk
> +
> +PERLSOURCE= \
> + MDS.pm
> +
> +all:
> +
> +.PHONY: clean
> +clean:
> + rm -rf *~
> +
> +.PHONY: install
> +install: ${PERLSOURCE}
> + install -d ${PERLLIBDIR}/PVE/API2/Ceph
> + install -m 0644 ${PERLSOURCE} ${PERLLIBDIR}/PVE/API2/Ceph
> diff --git a/PVE/API2/Makefile b/PVE/API2/Makefile
> index a62bf909..c5868d7f 100644
> --- a/PVE/API2/Makefile
> +++ b/PVE/API2/Makefile
> @@ -1,6 +1,6 @@
> include ../../defines.mk
>
> -SUBDIRS=Hardware
> +SUBDIRS=Hardware Ceph
>
> PERLSOURCE = \
> Replication.pm \
> diff --git a/PVE/CLI/pveceph.pm b/PVE/CLI/pveceph.pm
> index a5a04949..90878d9e 100755
> --- a/PVE/CLI/pveceph.pm
> +++ b/PVE/CLI/pveceph.pm
> @@ -19,6 +19,7 @@ use PVE::Tools qw(run_command);
> use PVE::JSONSchema qw(get_standard_option);
> use PVE::CephTools;
> use PVE::API2::Ceph;
> +use PVE::API2::Ceph::MDS;
>
> use PVE::CLIHandler;
>
> @@ -175,6 +176,8 @@ our $cmddef = {
> destroymon => [ 'PVE::API2::Ceph', 'destroymon', ['monid'], { node => $nodename }, $upid_exit],
> createmgr => [ 'PVE::API2::Ceph', 'createmgr', [], { node => $nodename }, $upid_exit],
> destroymgr => [ 'PVE::API2::Ceph', 'destroymgr', ['id'], { node => $nodename }, $upid_exit],
> + createmds => [ 'PVE::API2::Ceph::MDS', 'createmds', [], { node => $nodename }, $upid_exit],
> + destroymds => [ 'PVE::API2::Ceph::MDS', 'destroymds', ['id'], { node => $nodename }, $upid_exit],
> start => [ 'PVE::API2::Ceph', 'start', ['service'], { node => $nodename }, $upid_exit],
> stop => [ 'PVE::API2::Ceph', 'stop', ['service'], { node => $nodename }, $upid_exit],
> install => [ __PACKAGE__, 'install', [] ],
> diff --git a/PVE/CephTools.pm b/PVE/CephTools.pm
> index 8a9afa84..da31ccae 100644
> --- a/PVE/CephTools.pm
> +++ b/PVE/CephTools.pm
> @@ -18,12 +18,14 @@ my $pve_mon_key_path = "/etc/pve/priv/$ccname.mon.keyring";
> my $pve_ckeyring_path = "/etc/pve/priv/$ccname.client.admin.keyring";
> my $ceph_bootstrap_osd_keyring = "/var/lib/ceph/bootstrap-osd/$ccname.keyring";
> my $ceph_bootstrap_mds_keyring = "/var/lib/ceph/bootstrap-mds/$ccname.keyring";
> +my $ceph_mds_data_dir = '/var/lib/ceph/mds';
>
> my $ceph_service = {
> ceph_bin => "/usr/bin/ceph",
> ceph_mon => "/usr/bin/ceph-mon",
> ceph_mgr => "/usr/bin/ceph-mgr",
> - ceph_osd => "/usr/bin/ceph-osd"
> + ceph_osd => "/usr/bin/ceph-osd",
> + ceph_mds => "/usr/bin/ceph-mds",
> };
>
> my $config_hash = {
> @@ -33,6 +35,7 @@ my $config_hash = {
> pve_ckeyring_path => $pve_ckeyring_path,
> ceph_bootstrap_osd_keyring => $ceph_bootstrap_osd_keyring,
> ceph_bootstrap_mds_keyring => $ceph_bootstrap_mds_keyring,
> + ceph_mds_data_dir => $ceph_mds_data_dir,
> long_rados_timeout => 60,
> };
>
> @@ -297,4 +300,139 @@ sub systemd_managed {
> }
> }
>
> +sub list_local_mds_ids {
> + my $mds_list = [];
> +
> + PVE::Tools::dir_glob_foreach($ceph_mds_data_dir, qr/$ccname-(\S+)/, sub {
> + my (undef, $mds_id) = @_;
> + push @$mds_list, $mds_id;
> + });
> +
> + return $mds_list;
> +}
this is not used afaics ? could be removed in a fixup ofc
> +
> +sub get_cluster_mds_state {
> + my ($rados) = @_;
> +
> + my $mds_state = {};
> +
> + if (!defined($rados)) {
> + $rados = PVE::RADOS->new();
> + }
> +
> + my $add_state = sub {
> + my ($mds) = @_;
> +
> + my $state = {};
> + $state->{addr} = $mds->{addr};
> + $state->{rank} = $mds->{rank};
> + $state->{standby_replay} = $mds->{standby_replay} ? 1 : 0;
> + $state->{state} = $mds->{state};
> +
> + $mds_state->{$mds->{name}} = $state;
> + };
> +
> + my $mds_dump = $rados->mon_command({ prefix => 'mds stat' });
> + my $fsmap = $mds_dump->{fsmap};
> +
> +
> + foreach my $mds (@{$fsmap->{standbys}}) {
> + $add_state->($mds);
> + }
> +
> + my $fs_info = $fsmap->{filesystems}->[0];
> + my $active_mds = $fs_info->{mdsmap}->{info};
> +
> + # normally there'S only one active MDS, but we can have multiple active for
> + # different ranks (e.g., different cephs path hierachy). So just add all.
> + foreach my $mds (values %$active_mds) {
> + $add_state->($mds);
> + }
> +
> + return $mds_state;
> +}
> +
> +sub create_mds {
> + my ($id, $rados) = @_;
> +
> + # `ceph fs status` fails with numeric only ID.
> + die "ID: $id, numeric only IDs are not supported\n"
> + if $id =~ /^\d+$/;
> +
> + if (!defined($rados)) {
> + $rados = PVE::RADOS->new();
> + }
> +
> + my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
> + my $service_keyring = "$service_dir/keyring";
> + my $service_name = "mds.$id";
> +
> + die "ceph MDS directory '$service_dir' already exists\n"
> + if -d $service_dir;
> +
> + print "creating MDS directory '$service_dir'\n";
> + eval { File::Path::mkpath($service_dir) };
> + my $err = $@;
> + die "creation MDS directory '$service_dir' failed\n" if $err;
> +
> + # http://docs.ceph.com/docs/luminous/install/manual-deployment/#adding-mds
> + my $priv = [
> + mon => 'allow profile mds',
> + osd => 'allow rwx',
> + mds => 'allow *',
> + ];
> +
> + print "creating keys for '$service_name'\n";
> + my $output = $rados->mon_command({
> + prefix => 'auth get-or-create',
> + entity => $service_name,
> + caps => $priv,
> + format => 'plain',
> + });
> +
> + PVE::Tools::file_set_contents($service_keyring, $output);
> +
> + print "setting ceph as owner for service directory\n";
> + run_command(["chown", 'ceph:ceph', '-R', $service_dir]);
> +
> + print "enabling service 'ceph-mds\@$id.service'\n";
> + ceph_service_cmd('enable', $service_name);
> + print "starting service 'ceph-mds\@$id.service'\n";
> + ceph_service_cmd('start', $service_name);
> +
> + return undef;
> +};
> +
> +sub destroy_mds {
> + my ($id, $rados) = @_;
> +
> + if (!defined($rados)) {
> + $rados = PVE::RADOS->new();
> + }
> +
> + my $service_name = "mds.$id";
> + my $service_dir = "/var/lib/ceph/mds/$ccname-$id";
> +
> + print "disabling service 'ceph-mds\@$id.service'\n";
> + ceph_service_cmd('disable', $service_name);
> + print "stopping service 'ceph-mds\@$id.service'\n";
> + ceph_service_cmd('stop', $service_name);
> +
> + if (-d $service_dir) {
> + print "removing ceph-mds directory '$service_dir'\n";
> + File::Path::remove_tree($service_dir);
> + } else {
> + warn "cannot cleanup MDS $id directory, '$service_dir' not found\n"
> + }
> +
> + print "removing ceph auth for '$service_name'\n";
> + $rados->mon_command({
> + prefix => 'auth del',
> + entity => $service_name,
> + format => 'plain'
> + });
> +
> + return undef;
> +};
> +
> 1;
>
More information about the pve-devel
mailing list