[pve-devel] applied: [PATCH manager 5/5] ceph pools: allow to create erasure code pools
Thomas Lamprecht
t.lamprecht at proxmox.com
Thu Apr 28 20:31:03 CEST 2022
On 28.04.22 13:58, Aaron Lauterer wrote:
> To use erasure coded (EC) pools for RBD storages, we need two pools. One
> regular replicated pool that will hold the RBD omap and other metadata
> and the EC pool which will hold the image data.
>
> The coupling happens when an RBD image is created by adding the
> --data-pool parameter. This is why we have the 'data-pool' parameter in
> the storage configuration.
>
> To follow already established semantics, we will create a 'X-metadata'
> and 'X-data' pool. The storage configuration is always added as it is
> the only thing that links the two together (besides naming schemes).
>
> Different pg_num defaults are chosen for the replicated metadata pool as
> it will not hold a lot of data.
>
> Signed-off-by: Aaron Lauterer <a.lauterer at proxmox.com>
> ---
> we need some more follow ups as setting pool parameters on ec pools will
> currently fail when trying to set the size. In that case, we most likely
> should adapt the gui as well and make inon changeable settings, such as
> size, read only.
>
> changes since rfc:
>
> ec profile parameters need to be configured during pool creation now. or
> alternatively the name of an ec profile can be provided.
>
> cleanup of ec profile & crush rule on pool destruction
>
> PVE/API2/Ceph/Pools.pm | 120 ++++++++++++++++++++++++++++++++++++++---
> PVE/Ceph/Tools.pm | 11 ++--
> 2 files changed, 122 insertions(+), 9 deletions(-)
>
applied but with a bit API interface rework, mostly tested the non-ec case for not getting
broken, so you way want to recheck if I did nothing too stupid for the EC case.
>
> @@ -330,10 +332,41 @@ __PACKAGE__->register_method ({
> properties => {
> node => get_standard_option('pve-node'),
> add_storages => {
> - description => "Configure VM and CT storage using the new pool.",
> + description => "Configure VM and CT storage using the new pool. ".
> + "Always enabled for erasure coded pools.",
> type => 'boolean',
> optional => 1,
> },
> + k => {
> + type => 'integer',
> + description => "Number of data chunks. Will create an erasure coded pool plus a ".
> + "replicated pool for metadata.",
> + optional => 1,
> + },
> + m => {
> + type => 'integer',
> + description => "Number of coding chunks. Will create an erasure coded pool plus a ".
> + "replicated pool for metadata.",
> + optional => 1,
> + },
> + 'failure-domain' => {
> + type => 'string',
> + description => "CRUSH failure domain. Default is 'host'. Will create an erasure ".
> + "coded pool plus a replicated pool for metadata.",
> + optional => 1,
> + },
> + 'device-class' => {
> + type => 'string',
> + description => "CRUSH device class. Will create an erasure coded pool plus a ".
> + "replicated pool for metadata.",
> + optional => 1,
> + },
> + ecprofile => {
> + description => "Override the erasure code (EC) profile to use. Will create an ".
> + "erasure coded pool plus a replicated pool for metadata.",
> + type => 'string',
> + optional => 1,
> + },
moved to a format string 'erasurce-coded', that allows also to drop most of the param
existence checking as we can set the correct optional'ness in there.
Also avoids bloating the API to much for just this.
> %{ $ceph_pool_common_options->() },
> },
> },
> @@ -344,10 +377,31 @@ __PACKAGE__->register_method ({
> PVE::Cluster::check_cfs_quorum();
> PVE::Ceph::Tools::check_ceph_configured();
>
> - my $pool = extract_param($param, 'name');
> + my $pool = my $name = extract_param($param, 'name');
> my $node = extract_param($param, 'node');
> my $add_storages = extract_param($param, 'add_storages');
>
> + my $ec_k = extract_param($param, 'k');
> + my $ec_m = extract_param($param, 'm');
> + my $ec_failure_domain = extract_param($param, 'failure-domain');
> + my $ec_device_class = extract_param($param, 'device-class');
> +
> + my $is_ec = 0;
> +
> + my $ecprofile = extract_param($param, 'ecprofile');
> + die "Erasure code profile '$ecprofile' does not exist.\n"
> + if $ecprofile && !PVE::Ceph::Tools::ecprofile_exists($ecprofile);
> +
> + if ($ec_k || $ec_m || $ec_failure_domain || $ec_device_class) {
> + die "'k' and 'm' parameters are needed for an erasure coded pool\n"
> + if !$ec_k || !$ec_m;
> +
> + $is_ec = 1;
> + }
> +
> + $is_ec = 1 if $ecprofile;
> + $add_storages = 1 if $is_ec;
> +
> my $rpcenv = PVE::RPCEnvironment::get();
> my $user = $rpcenv->get_user();
>
> @@ -370,13 +424,47 @@ __PACKAGE__->register_method ({
> $param->{application} //= 'rbd';
> $param->{pg_autoscale_mode} //= 'warn';
>
> - my $worker = sub {
> + my $data_param = {};
> + my $data_pool = '';
> + if (!$ecprofile) {
> + $ecprofile = PVE::Ceph::Tools::get_ecprofile_name($pool);
> + eval {
> + PVE::Ceph::Tools::create_ecprofile(
> + $ecprofile,
> + $ec_k,
> + $ec_m,
> + $ec_failure_domain,
> + $ec_device_class,
> + );
Dominik is right, this can block and should happen in the worker (moved)
> + };
> + die "could not create erasure code profile '$ecprofile': $@\n" if $@;
> + }
> +
> + if ($is_ec) {
> + # copy all params, should be a flat hash
> + $data_param = { map { $_ => $param->{$_} } keys %$param };
>
> + $data_param->{pool_type} = 'erasure';
> + $data_param->{allow_ec_overwrites} = 'true';
> + $data_param->{erasure_code_profile} = $ecprofile;
> + delete $data_param->{size};
> + delete $data_param->{min_size};
> +
> + # metadata pool should be ok with 32 PGs
> + $param->{pg_num} = 32;
> +
> + $pool = "${name}-metadata";
> + $data_pool = "${name}-data";
> + }
> +
> + my $worker = sub {
> PVE::Ceph::Tools::create_pool($pool, $param);
>
> + PVE::Ceph::Tools::create_pool($data_pool, $data_param) if $is_ec;
> +
> if ($add_storages) {
> - eval { $add_storage->($pool, "${pool}") };
> - die "adding PVE storage for ceph pool '$pool' failed: $@\n" if $@;
> + eval { $add_storage->($pool, "${name}", $data_pool) };
> + die "adding PVE storage for ceph pool '$name' failed: $@\n" if $@;
> }
> };
>
> @@ -414,6 +502,12 @@ __PACKAGE__->register_method ({
> optional => 1,
> default => 0,
> },
> + remove_ecprofile => {
> + description => "Remove the erasure code profile. Used for erasure code pools. Default is true",
"Used for erasure code pools" is a bit redundant, I dropped that and mentioned that the default is true
"if applicable".
> + type => 'boolean',
> + optional => 1,
> + default => 1,
> + },
> },
> },
> returns => { type => 'string' },
> @@ -428,6 +522,7 @@ __PACKAGE__->register_method ({
> if $param->{remove_storages};
>
> my $pool = $param->{name};
> + my $remove_ecprofile = $param->{remove_ecprofile} // 1;
>
> my $worker = sub {
> my $storages = $get_storages->($pool);
> @@ -447,8 +542,21 @@ __PACKAGE__->register_method ({
> }
> }
>
added $rados reuse to all helpers and used it here for all calls.
> + my $pool_properties = PVE::Ceph::Tools::get_pool_properties($pool);
> +
> PVE::Ceph::Tools::destroy_pool($pool);
>
> + if (my $ecprofile = $pool_properties->{erasure_code_profile}) {
> + my $crush_rule = $pool_properties->{crush_rule};
> + eval { PVE::Ceph::Tools::destroy_crush_rule($crush_rule); };
> + warn "removing crush rule '${crush_rule}' failed: $@\n" if $@;
> +
> + if ($remove_ecprofile) {
> + eval { PVE::Ceph::Tools::destroy_ecprofile($ecprofile) };
> + warn "removing EC profile '${ecprofile}' failed: $@\n" if $@;
> + }
> + }
> +
> if ($param->{remove_storages}) {
> my $err;
> foreach my $storeid (keys %$storages) {
> diff --git a/PVE/Ceph/Tools.pm b/PVE/Ceph/Tools.pm
> index 7e94ee58..f50cec67 100644
> --- a/PVE/Ceph/Tools.pm
> +++ b/PVE/Ceph/Tools.pm
> @@ -8,7 +8,7 @@ use File::Basename;
> use IO::File;
> use JSON;
>
> -use PVE::Tools qw(run_command dir_glob_foreach);
> +use PVE::Tools qw(run_command dir_glob_foreach extract_param);
> use PVE::Cluster qw(cfs_read_file);
> use PVE::RADOS;
> use PVE::Ceph::Services;
> @@ -277,12 +277,17 @@ sub create_pool {
>
> my $pg_num = $param->{pg_num} || 128;
>
> - $rados->mon_command({
> + my $mon_params = {
> prefix => "osd pool create",
> pool => $pool,
> pg_num => int($pg_num),
> format => 'plain',
> - });
> + };
> + $mon_params->{pool_type} = extract_param($param, 'pool_type') if $param->{pool_type};
> + $mon_params->{erasure_code_profile} = extract_param($param, 'erasure_code_profile')
> + if $param->{erasure_code_profile};
> +
> + $rados->mon_command($mon_params);
>
> set_pool($pool, $param);
>
More information about the pve-devel
mailing list