[pve-devel] applied: [PATCH manager 5/5] ceph pools: allow to create erasure code pools

Thomas Lamprecht t.lamprecht at proxmox.com
Thu Apr 28 20:31:03 CEST 2022


On 28.04.22 13:58, Aaron Lauterer wrote:
> To use erasure coded (EC) pools for RBD storages, we need two pools. One
> regular replicated pool that will hold the RBD omap and other metadata
> and the EC pool which will hold the image data.
> 
> The coupling happens when an RBD image is created by adding the
> --data-pool parameter. This is why we have the 'data-pool' parameter in
> the storage configuration.
> 
> To follow already established semantics, we will create a 'X-metadata'
> and 'X-data' pool. The storage configuration is always added as it is
> the only thing that links the two together (besides naming schemes).
> 
> Different pg_num defaults are chosen for the replicated metadata pool as
> it will not hold a lot of data.
> 
> Signed-off-by: Aaron Lauterer <a.lauterer at proxmox.com>
> ---
> we need some more follow ups as setting pool parameters on ec pools will
> currently fail when trying to set the size. In that case, we most likely
> should adapt the gui as well and make inon changeable settings, such as
> size, read only.
> 
> changes since rfc:
> 
> ec profile parameters need to be configured during pool creation now. or
> alternatively the name of an ec profile can be provided.
> 
> cleanup of ec profile & crush rule on pool destruction
> 
>  PVE/API2/Ceph/Pools.pm | 120 ++++++++++++++++++++++++++++++++++++++---
>  PVE/Ceph/Tools.pm      |  11 ++--
>  2 files changed, 122 insertions(+), 9 deletions(-)
> 

applied but with a bit API interface rework, mostly tested the non-ec case for not getting
broken, so you way want to recheck if I did nothing too stupid for the EC case.

>  
> @@ -330,10 +332,41 @@ __PACKAGE__->register_method ({
>  	properties => {
>  	    node => get_standard_option('pve-node'),
>  	    add_storages => {
> -		description => "Configure VM and CT storage using the new pool.",
> +		description => "Configure VM and CT storage using the new pool. ".
> +				"Always enabled for erasure coded pools.",
>  		type => 'boolean',
>  		optional => 1,
>  	    },
> +	    k => {
> +		type => 'integer',
> +		description => "Number of data chunks. Will create an erasure coded pool plus a ".
> +				"replicated pool for metadata.",
> +		optional => 1,
> +	    },
> +	    m => {
> +		type => 'integer',
> +		description => "Number of coding chunks. Will create an erasure coded pool plus a ".
> +				"replicated pool for metadata.",
> +		optional => 1,
> +	    },
> +	    'failure-domain' => {
> +		type => 'string',
> +		description => "CRUSH failure domain. Default is 'host'. Will create an erasure ".
> +				"coded pool plus a replicated pool for metadata.",
> +		optional => 1,
> +	    },
> +	    'device-class' => {
> +		type => 'string',
> +		description => "CRUSH device class. Will create an erasure coded pool plus a ".
> +				"replicated pool for metadata.",
> +		optional => 1,
> +	    },
> +	    ecprofile => {
> +		description => "Override the erasure code (EC) profile to use. Will create an ".
> +				"erasure coded pool plus a replicated pool for metadata.",
> +		type => 'string',
> +		optional => 1,
> +	    },

moved to a format string 'erasurce-coded', that allows also to drop most of the param
existence checking as we can set the correct optional'ness in there.
Also avoids bloating the API to much for just this.

>  	    %{ $ceph_pool_common_options->() },
>  	},
>      },
> @@ -344,10 +377,31 @@ __PACKAGE__->register_method ({
>  	PVE::Cluster::check_cfs_quorum();
>  	PVE::Ceph::Tools::check_ceph_configured();
>  
> -	my $pool = extract_param($param, 'name');
> +	my $pool = my $name = extract_param($param, 'name');
>  	my $node = extract_param($param, 'node');
>  	my $add_storages = extract_param($param, 'add_storages');
>  
> +	my $ec_k = extract_param($param, 'k');
> +	my $ec_m = extract_param($param, 'm');
> +	my $ec_failure_domain = extract_param($param, 'failure-domain');
> +	my $ec_device_class = extract_param($param, 'device-class');
> +
> +	my $is_ec = 0;
> +
> +	my $ecprofile = extract_param($param, 'ecprofile');
> +	die "Erasure code profile '$ecprofile' does not exist.\n"
> +	    if $ecprofile && !PVE::Ceph::Tools::ecprofile_exists($ecprofile);
> +
> +	if ($ec_k || $ec_m || $ec_failure_domain || $ec_device_class) {
> +	    die "'k' and 'm' parameters are needed for an erasure coded pool\n"
> +		if !$ec_k || !$ec_m;
> +
> +	    $is_ec = 1;
> +	}
> +
> +	$is_ec = 1 if $ecprofile;
> +	$add_storages = 1 if $is_ec;
> +
>  	my $rpcenv = PVE::RPCEnvironment::get();
>  	my $user = $rpcenv->get_user();
>  
> @@ -370,13 +424,47 @@ __PACKAGE__->register_method ({
>  	$param->{application} //= 'rbd';
>  	$param->{pg_autoscale_mode} //= 'warn';
>  
> -	my $worker = sub {
> +	my $data_param = {};
> +	my $data_pool = '';
> +	if (!$ecprofile) {
> +	    $ecprofile = PVE::Ceph::Tools::get_ecprofile_name($pool);
> +	    eval {
> +		PVE::Ceph::Tools::create_ecprofile(
> +		    $ecprofile,
> +		    $ec_k,
> +		    $ec_m,
> +		    $ec_failure_domain,
> +		    $ec_device_class,
> +		);

Dominik is right, this can block and should happen in the worker (moved)

> +	    };
> +	    die "could not create erasure code profile '$ecprofile': $@\n" if $@;
> +	}
> +
> +	if ($is_ec) {
> +	    # copy all params, should be a flat hash
> +	    $data_param = { map { $_ => $param->{$_} } keys %$param };
>  
> +	    $data_param->{pool_type} = 'erasure';
> +	    $data_param->{allow_ec_overwrites} = 'true';
> +	    $data_param->{erasure_code_profile} = $ecprofile;
> +	    delete $data_param->{size};
> +	    delete $data_param->{min_size};
> +
> +	    # metadata pool should be ok with 32 PGs
> +	    $param->{pg_num} = 32;
> +
> +	    $pool = "${name}-metadata";
> +	    $data_pool = "${name}-data";
> +	}
> +
> +	my $worker = sub {
>  	    PVE::Ceph::Tools::create_pool($pool, $param);
>  
> +	    PVE::Ceph::Tools::create_pool($data_pool, $data_param) if $is_ec;
> +
>  	    if ($add_storages) {
> -		eval { $add_storage->($pool, "${pool}") };
> -		die "adding PVE storage for ceph pool '$pool' failed: $@\n" if $@;
> +		eval { $add_storage->($pool, "${name}", $data_pool) };
> +		die "adding PVE storage for ceph pool '$name' failed: $@\n" if $@;
>  	    }
>  	};
>  
> @@ -414,6 +502,12 @@ __PACKAGE__->register_method ({
>  		optional => 1,
>  		default => 0,
>  	    },
> +	    remove_ecprofile => {
> +		description => "Remove the erasure code profile. Used for erasure code pools. Default is true",

"Used for erasure code pools" is a bit redundant, I dropped that and mentioned that the default is true
"if applicable".

> +		type => 'boolean',
> +		optional => 1,
> +		default => 1,
> +	    },
>  	},
>      },
>      returns => { type => 'string' },
> @@ -428,6 +522,7 @@ __PACKAGE__->register_method ({
>  	    if $param->{remove_storages};
>  
>  	my $pool = $param->{name};
> +	my $remove_ecprofile = $param->{remove_ecprofile} // 1;
>  
>  	my $worker = sub {
>  	    my $storages = $get_storages->($pool);
> @@ -447,8 +542,21 @@ __PACKAGE__->register_method ({
>  		}
>  	    }
>  

added $rados reuse to all helpers and used it here for all calls.

> +	    my $pool_properties = PVE::Ceph::Tools::get_pool_properties($pool);
> +
>  	    PVE::Ceph::Tools::destroy_pool($pool);
>  
> +	    if (my $ecprofile = $pool_properties->{erasure_code_profile}) {
> +		my $crush_rule = $pool_properties->{crush_rule};
> +		eval { PVE::Ceph::Tools::destroy_crush_rule($crush_rule); };
> +		warn "removing crush rule '${crush_rule}' failed: $@\n" if $@;
> +
> +		if ($remove_ecprofile) {
> +		    eval { PVE::Ceph::Tools::destroy_ecprofile($ecprofile) };
> +		    warn "removing EC profile '${ecprofile}' failed: $@\n" if $@;
> +		}
> +	    }
> +
>  	    if ($param->{remove_storages}) {
>  		my $err;
>  		foreach my $storeid (keys %$storages) {
> diff --git a/PVE/Ceph/Tools.pm b/PVE/Ceph/Tools.pm
> index 7e94ee58..f50cec67 100644
> --- a/PVE/Ceph/Tools.pm
> +++ b/PVE/Ceph/Tools.pm
> @@ -8,7 +8,7 @@ use File::Basename;
>  use IO::File;
>  use JSON;
>  
> -use PVE::Tools qw(run_command dir_glob_foreach);
> +use PVE::Tools qw(run_command dir_glob_foreach extract_param);
>  use PVE::Cluster qw(cfs_read_file);
>  use PVE::RADOS;
>  use PVE::Ceph::Services;
> @@ -277,12 +277,17 @@ sub create_pool {
>  
>      my $pg_num = $param->{pg_num} || 128;
>  
> -    $rados->mon_command({
> +    my $mon_params = {
>  	prefix => "osd pool create",
>  	pool => $pool,
>  	pg_num => int($pg_num),
>  	format => 'plain',
> -    });
> +    };
> +    $mon_params->{pool_type} = extract_param($param, 'pool_type') if $param->{pool_type};
> +    $mon_params->{erasure_code_profile} = extract_param($param, 'erasure_code_profile')
> +	if $param->{erasure_code_profile};
> +
> +    $rados->mon_command($mon_params);
>  
>      set_pool($pool, $param);
>  






More information about the pve-devel mailing list