[pve-devel] [PATCH manager 11/20] ceph: osd: rework creation with ceph-volume
Alexandre DERUMIER
aderumier at odiso.com
Wed Jun 5 06:45:29 CEST 2019
Hi,
It could be great to have 2 others improvements:
add support for multiple osd by disk (can by usefull for nvme)
#ceph-volume lvm batch --osds-per-device <numberofosd> /dev/sdX
add support for encryption
#ceph-volume lvm ..... --dmcrypt
----- Mail original -----
De: "Dominik Csapak" <d.csapak at proxmox.com>
À: "pve-devel" <pve-devel at pve.proxmox.com>
Envoyé: Mardi 4 Juin 2019 14:47:50
Objet: [pve-devel] [PATCH manager 11/20] ceph: osd: rework creation with ceph-volume
this completely rewrites the ceph os creation api call using ceph-volume
since ceph-disk is not available anymore
breaking changes:
no filestore anymore, journal_dev -> db_dev
it is now possible to give a specific size for db/wal, default
is to read from ceph db/config and fallback is
10% of osd for block.db and 1% of osd for block.wal
the reason is that ceph-volume does not autocreate those itself
(like ceph-disk) but you have to create it yourself
if the db/wal device has an lvm on it with naming scheme 'ceph-UUID'
it uses that and creates a new lv
if we detect partitions, we create a new partition at the end
if the disk is not used at all, we create a pv/vg/lv for it
it is not possible to create osds on luminous with this api call anymore,
anyone needing this has to use ceph-disk directly
Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
PVE/API2/Ceph/OSD.pm | 186 +++++++++++++++++++++++++++++++++++----------------
1 file changed, 130 insertions(+), 56 deletions(-)
diff --git a/PVE/API2/Ceph/OSD.pm b/PVE/API2/Ceph/OSD.pm
index adb0025c..ae938016 100644
--- a/PVE/API2/Ceph/OSD.pm
+++ b/PVE/API2/Ceph/OSD.pm
@@ -5,12 +5,14 @@ use warnings;
use Cwd qw(abs_path);
use IO::File;
+use UUID;
use PVE::Ceph::Tools;
use PVE::Ceph::Services;
use PVE::CephConfig;
use PVE::Cluster qw(cfs_read_file cfs_write_file);
use PVE::Diskmanage;
+use PVE::Storage::LVMPlugin;
use PVE::Exception qw(raise_param_exc);
use PVE::JSONSchema qw(get_standard_option);
use PVE::RADOS;
@@ -198,28 +200,39 @@ __PACKAGE__->register_method ({
description => "Block device name.",
type => 'string',
},
- journal_dev => {
- description => "Block device name for journal (filestore) or block.db (bluestore).",
+ db_dev => {
+ description => "Block device name for block.db.",
optional => 1,
type => 'string',
},
- wal_dev => {
- description => "Block device name for block.wal (bluestore only).",
+ db_size => {
+ description => "Size in GiB for block.db. ".
+ "If a block.db is requested but the size is not given, will be ".
+ "automatically selected by: bluestore_block_db_size from the ".
+ "ceph database (osd or global section) or config (osd or global section)".
+ "in that order. If this is not available, it will be sized 10% of the size ".
+ "of the OSD device. Fails if the available size is not enough.",
optional => 1,
- type => 'string',
+ type => 'number',
+ requires => 'db_dev',
+ minimum => 1.0,
},
- fstype => {
- description => "File system type (filestore only).",
- type => 'string',
- enum => ['xfs', 'ext4'],
- default => 'xfs',
+ wal_dev => {
+ description => "Block device name for block.wal.",
optional => 1,
+ type => 'string',
},
- bluestore => {
- description => "Use bluestore instead of filestore. This is the default.",
- type => 'boolean',
- default => 1,
+ wal_size => {
+ description => "Size in GiB for block.wal. ".
+ "If a block.wal is requested but the size is not given, will be ".
+ "automatically selected by: bluestore_block_wal_size from the ".
+ "ceph database (osd or global section) or config (osd or global section)".
+ "in that order. If this is not available, it will be sized 1% of the size ".
+ "of the OSD device. Fails if the available size is not enough.",
optional => 1,
+ minimum => 0.5,
+ requires => 'wal_dev',
+ type => 'number',
},
},
},
@@ -231,44 +244,53 @@ __PACKAGE__->register_method ({
my $authuser = $rpcenv->get_user();
- raise_param_exc({ 'bluestore' => "conflicts with parameter 'fstype'" })
- if (defined($param->{fstype}) && defined($param->{bluestore}) && $param->{bluestore});
-
PVE::Ceph::Tools::check_ceph_inited();
PVE::Ceph::Tools::setup_pve_symlinks();
PVE::Ceph::Tools::check_ceph_installed('ceph_osd');
+ PVE::Ceph::Tools::check_ceph_installed('ceph_volume');
- my $bluestore = $param->{bluestore} // 1;
-
- my $journal_dev;
+ my $dev;
+ my $db_dev;
+ my $db_devname;
+ my $db_size;
my $wal_dev;
+ my $wal_devname;
+ my $wal_size;
- if ($param->{journal_dev} && ($param->{journal_dev} ne $param->{dev})) {
- $journal_dev = PVE::Diskmanage::verify_blockdev_path($param->{journal_dev});
+ if ($param->{db_dev} && ($param->{db_dev} ne $param->{dev})) {
+ $db_dev = PVE::Diskmanage::verify_blockdev_path($param->{db_dev});
+ if (defined($param->{db_size})) {
+ $db_size = PVE::Tools::convert_size($param->{db_size}, 'gb' => 'b') ;
+ }
+ ($db_devname = $db_dev) =~ s|/dev/||;
}
if ($param->{wal_dev} &&
($param->{wal_dev} ne $param->{dev}) &&
- (!$param->{journal_dev} || $param->{wal_dev} ne $param->{journal_dev})) {
- raise_param_exc({ 'wal_dev' => "can only be set with paramater 'bluestore'"})
- if !$bluestore;
+ (!$param->{db_dev} || $param->{wal_dev} ne $param->{db_dev})) {
$wal_dev = PVE::Diskmanage::verify_blockdev_path($param->{wal_dev});
+ if (defined($param->{wal_size})) {
+ $wal_size = PVE::Tools::convert_size($param->{wal_size}, 'gb' => 'b') ;
+ }
+ ($wal_devname = $wal_dev) =~ s|/dev/||;
}
- $param->{dev} = PVE::Diskmanage::verify_blockdev_path($param->{dev});
+ $dev = PVE::Diskmanage::verify_blockdev_path($param->{dev});
- my $devname = $param->{dev};
- $devname =~ s|/dev/||;
+ (my $devname = $dev) =~ s|/dev/||;
+ my $devs = [$devname];
+ push @$devs, $db_devname if $db_devname;
+ push @$devs, $wal_devname if $wal_devname;
- my $disklist = PVE::Diskmanage::get_disks($devname, 1);
+ my $disklist = PVE::Diskmanage::get_disks($devs, 1);
my $diskinfo = $disklist->{$devname};
die "unable to get device info for '$devname'\n"
if !$diskinfo;
- die "device '$param->{dev}' is in use\n"
+ die "device '$dev' is in use\n"
if $diskinfo->{used};
my $devpath = $diskinfo->{devpath};
@@ -286,46 +308,98 @@ __PACKAGE__->register_method ({
file_set_contents($ceph_bootstrap_osd_keyring, $bindata);
};
- my $worker = sub {
- my $upid = shift;
+ my $create_part_or_lv = sub {
+ my ($dev, $size, $type) = @_;
+
+ if ($size =~ m/^(\d+)$/) {
+ $size = $1;
+ } else {
+ die "invalid size '$size'\n";
+ }
+
+ die "'$dev->{devpath}' is smaller than requested size '$size' bytes\n"
+ if $dev->{size} < $size;
+
+ if (!$dev->{used}) {
+ # create pv,vg,lv
+
+ my $vg = "ceph-" . UUID::uuid();
+ my $lv = $type . "-" . UUID::uuid();
+
+ PVE::Storage::LVMPlugin::lvm_create_volume_group($dev->{devpath}, $vg);
+ PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}b");
+
+ return "$vg/$lv";
- my $fstype = $param->{fstype} || 'xfs';
+ } elsif ($dev->{used} eq 'LVM') {
+ # check pv/vg and create lv
+ my $vgs = PVE::Storage::LVMPlugin::lvm_vgs(1);
+ my $vg;
+ for my $vgname ( sort keys %$vgs ) {
+ next if $vgname !~ /^ceph-/;
- my $ccname = PVE::Ceph::Tools::get_config('ccname');
+ for my $pv ( @{$vgs->{$vgname}->{pvs}} ) {
+ next if $pv->{name} ne $dev->{devpath};
+ $vg = $vgname;
+ last;
+ }
+ last if $vg;
+ }
+
+ die "no ceph vg found on '$dev->{devpath}'\n" if !$vg;
+ die "vg '$vg' has not enough free space\n" if $vgs->{$vg}->{free} < $size;
+
+ my $lv = $type . "-" . "012345";
+
+ PVE::Storage::LVMPlugin::lvcreate($vg, $lv, "${size}b");
+
+ return "$vg/$lv";
+
+ } elsif ($dev->{used} eq 'partitions') {
+ # create new partition at the end
+
+ return PVE::Diskmanage::append_partition($dev->{devpath}, $size);
+ }
+
+ die "cannot use '$dev->{devpath}' for '$type'\n";
+ };
+
+ my $worker = sub {
+ my $upid = shift;
+
+ PVE::Diskmanage::locked_disk_action(sub {
+ # get db/wal size
+ if (($db_dev && !defined($db_size)) || ($wal_dev && !defined($wal_size))) {
+ my $sizes = PVE::Ceph::Tools::get_db_wal_sizes();
+ $db_size = $sizes->{db} // int($disklist->{$devname}->{size} / 10); # 10% of OSD
+ $wal_size = $sizes->{wal} // int($disklist->{$devname}->{size} / 100); # 1% of OSD
+ }
- my $cmd = ['ceph-disk', 'prepare', '--zap-disk',
- '--cluster', $ccname, '--cluster-uuid', $fsid ];
+ my $cmd = ['ceph-volume', 'lvm', 'create', '--cluster-fsid', $fsid ];
- if ($bluestore) {
print "create OSD on $devpath (bluestore)\n";
- push @$cmd, '--bluestore';
- if ($journal_dev) {
- print "using device '$journal_dev' for block.db\n";
- push @$cmd, '--block.db', $journal_dev;
+ if ($db_dev) {
+ print "creating block.db on '$db_dev'\n";
+ my $part_or_lv = $create_part_or_lv->($disklist->{$db_devname}, $db_size, 'osd-db');
+ print "using '$part_or_lv' for block.db\n";
+ push @$cmd, '--block.db', $part_or_lv;
}
if ($wal_dev) {
- print "using device '$wal_dev' for block.wal\n";
- push @$cmd, '--block.wal', $wal_dev;
+ print "creating block.wal on '$wal_dev'\n";
+ my $part_or_lv = $create_part_or_lv->($disklist->{$wal_devname}, $wal_size, 'osd-wal');
+ print "using '$part_or_lv' for block.wal\n";
+ push @$cmd, '--block.wal', $part_or_lv;
}
- push @$cmd, $devpath;
- } else {
- print "create OSD on $devpath ($fstype)\n";
- push @$cmd, '--filestore', '--fs-type', $fstype;
- if ($journal_dev) {
- print "using device '$journal_dev' for journal\n";
- push @$cmd, '--journal-dev', $devpath, $journal_dev;
- } else {
- push @$cmd, $devpath;
- }
- }
+ push @$cmd, '--data', $devpath;
- PVE::Ceph::Tools::wipe_disks($devpath);
+ PVE::Ceph::Tools::wipe_disks($devpath);
- run_command($cmd);
+ run_command($cmd);
+ });
};
return $rpcenv->fork_worker('cephcreateosd', $devname, $authuser, $worker);
--
2.11.0
_______________________________________________
pve-devel mailing list
pve-devel at pve.proxmox.com
https://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
More information about the pve-devel
mailing list