[pve-devel] [PATCH v3 manager 1/3] api ceph osd: add OSD index, metadata and lv-info
Dominik Csapak
d.csapak at proxmox.com
Mon Oct 24 11:38:23 CEST 2022
one small nit (inline) , otherwise the series looks good to me
On 10/20/22 15:36, Aaron Lauterer wrote:
> To get more details for a single OSD, we add two new endpoints:
> * nodes/{node}/ceph/osd/{osdid}/metadata
> * nodes/{node}/ceph/osd/{osdid}/lv-info
>
> The {osdid} endpoint itself gets a new GET handler to return the index.
>
> The metadata one provides various metadata regarding the OSD.
>
> Such as
> * process id
> * memory usage
> * info about devices used (bdev/block, db, wal)
> * size
> * disks used (sdX)
> ...
> * network addresses and ports used
> ...
>
> Memory usage and PID are retrieved from systemd while the rest can be
> retrieved from the metadata provided by Ceph.
>
> The second one (lv-info) returns the following infos for a logical
> volume:
> * creation time
> * lv name
> * lv path
> * lv size
> * lv uuid
> * vg name
>
> Possible volumes are:
> * block (default value if not provided)
> * db
> * wal
>
> 'ceph-volume' is used to gather the infos, except for the creation time
> of the LV which is retrieved via 'lvs'.
>
> Signed-off-by: Aaron Lauterer <a.lauterer at proxmox.com>
> ---
> changes since
> v2:
> - rephrased errormsgs on run_command
> - reworked systemctl show call and parsing of the results
> - expanded error msg if no LV info is found to mention that it could be,
> because the OSD is a bit older. This will hopefully reduce potential
> concerns if user encounter it
> - return array of devices instead of optionl bdev, db, and wal
> - add 'device' to the devices metadata (block, db, wal), used to be done
> in the UI
>
> v1:
> - squashed all API commits into one
> - moved all new API endpoints into sub endpoints to {osdid}
> - {osdid} itself returns the necessary index
> - incorporated other code improvements
>
>
> PVE/API2/Ceph/OSD.pm | 323 +++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 323 insertions(+)
>
> diff --git a/PVE/API2/Ceph/OSD.pm b/PVE/API2/Ceph/OSD.pm
> index 93433b3a..bac5c393 100644
> --- a/PVE/API2/Ceph/OSD.pm
> +++ b/PVE/API2/Ceph/OSD.pm
> @@ -5,6 +5,7 @@ use warnings;
>
> use Cwd qw(abs_path);
> use IO::File;
> +use JSON;
> use UUID;
>
> use PVE::Ceph::Tools;
> @@ -516,6 +517,328 @@ __PACKAGE__->register_method ({
> return $rpcenv->fork_worker('cephcreateosd', $devs->{dev}->{name}, $authuser, $worker);
> }});
>
> +my $OSD_DEV_RETURN_PROPS = {
> + device => {
> + type => 'string',
> + enum => ['block', 'db', 'wal'],
> + description => 'Kind of OSD device',
> + },
> + dev_node => {
> + type => 'string',
> + description => 'Device node',
> + },
> + devices => {
> + type => 'string',
> + description => 'Physical disks used',
> + },
> + size => {
> + type => 'integer',
> + description => 'Size in bytes',
> + },
> + support_discard => {
> + type => 'boolean',
> + description => 'Discard support of the physical device',
> + },
> + type => {
> + type => 'string',
> + description => 'Type of device. For example, hdd or ssd',
> + },
> +};
> +
> +__PACKAGE__->register_method ({
> + name => 'osdindex',
> + path => '{osdid}',
> + method => 'GET',
> + permissions => { user => 'all' },
> + description => "OSD index.",
> + parameters => {
> + additionalProperties => 0,
> + properties => {
> + node => get_standard_option('pve-node'),
> + osdid => {
> + description => 'OSD ID',
> + type => 'integer',
> + },
> + },
> + },
> + returns => {
> + type => 'array',
> + items => {
> + type => "object",
> + properties => {},
> + },
> + links => [ { rel => 'child', href => "{name}" } ],
> + },
> + code => sub {
> + my ($param) = @_;
> +
> + my $result = [
> + { name => 'metadata' },
> + { name => 'lv-info' },
> + ];
> +
> + return $result;
> + }});
> +
> +__PACKAGE__->register_method ({
> + name => 'osddetails',
> + path => '{osdid}/metadata',
> + method => 'GET',
> + description => "Get OSD details",
> + proxyto => 'node',
> + protected => 1,
> + permissions => {
> + check => ['perm', '/', [ 'Sys.Audit' ], any => 1],
> + },
> + parameters => {
> + additionalProperties => 0,
> + properties => {
> + node => get_standard_option('pve-node'),
> + osdid => {
> + description => 'OSD ID',
> + type => 'integer',
> + },
> + },
> + },
> + returns => {
> + type => 'object',
> + properties => {
> + osd => {
> + type => 'object',
> + description => 'General information about the OSD',
> + properties => {
> + hostname => {
> + type => 'string',
> + description => 'Name of the host containing the OSD.',
> + },
> + id => {
> + type => 'integer',
> + description => 'ID of the OSD.',
> + },
> + mem_usage => {
> + type => 'integer',
> + description => 'Memory usage of the OSD service.',
> + },
> + osd_data => {
> + type => 'string',
> + description => "Path to the OSD's data directory.",
> + },
> + osd_objectstore => {
> + type => 'string',
> + description => 'The type of object store used.',
> + },
> + pid => {
> + type => 'integer',
> + description => 'OSD process ID.',
> + },
> + version => {
> + type => 'string',
> + description => 'Ceph version of the OSD service.',
> + },
> + front_addr => {
> + type => 'string',
> + description => 'Address and port used to talk to clients and monitors.',
> + },
> + back_addr => {
> + type => 'string',
> + description => 'Address and port used to talk to other OSDs.',
> + },
> + hb_front_addr => {
> + type => 'string',
> + description => 'Heartbeat address and port for clients and monitors.',
> + },
> + hb_back_addr => {
> + type => 'string',
> + description => 'Heartbeat address and port for other OSDs.',
> + },
> + },
> + },
> + devices => {
> + type => 'array',
> + description => 'Array containing data about devices',
> + items => {
> + type => "object",
> + properties => $OSD_DEV_RETURN_PROPS,
> + },
> + }
> + }
> + },
> + code => sub {
> + my ($param) = @_;
> +
> + PVE::Ceph::Tools::check_ceph_inited();
> +
> + my $osdid = $param->{osdid};
> + my $rados = PVE::RADOS->new();
> + my $metadata = $rados->mon_command({ prefix => 'osd metadata', id => int($osdid) });
> +
> + die "OSD '${osdid}' does not exists on host '${nodename}'\n"
> + if $nodename ne $metadata->{hostname};
> +
> + my $raw = '';
> + my $pid;
> + my $memory;
> + my $parser = sub {
> + my $line = shift;
> + if ($line =~ m/^MainPID=([0-9]*)$/) {
> + $pid = $1;
> + } elsif ($line =~ m/^MemoryCurrent=([0-9]*|\[not set\])$/) {
> + $memory = $1 eq "[not set]" ? 0 : $1;
> + } else {
> + die "got unexpected data from systemctl: '${line}'\n";
> + }
> + };
> +
i don't know if it can happen, but when the output does not contain the pid/memory
those are left undef und will trigger a
"Use of uninitialized value in int at" below
so i'd either error out here if one of these is not set, or set them with a
sensible default, or catch the defaultness below with e.g.
pid => defined($pid) ? int(pid) : undef,
> + my $cmd = [
> + '/bin/systemctl',
> + 'show',
> + "ceph-osd\@${osdid}.service",
> + '--property',
> + 'MainPID,MemoryCurrent',
> + ];
> + run_command($cmd, errmsg => 'fetching OSD PID and memory usage failed', outfunc => $parser);
> +
> + my $data = {
> + osd => {
> + hostname => $metadata->{hostname},
> + id => $metadata->{id},
> + mem_usage => int($memory),
> + osd_data => $metadata->{osd_data},
> + osd_objectstore => $metadata->{osd_objectstore},
> + pid => int($pid),
> + version => "$metadata->{ceph_version_short} ($metadata->{ceph_release})",
> + front_addr => $metadata->{front_addr},
> + back_addr => $metadata->{back_addr},
> + hb_front_addr => $metadata->{hb_front_addr},
> + hb_back_addr => $metadata->{hb_back_addr},
> + },
> + };
> +
> + $data->{devices} = [];
> +
> + my $get_data = sub {
> + my ($dev, $prefix, $device) = @_;
> + push (
> + @{$data->{devices}},
> + {
> + dev_node => $metadata->{"${prefix}_${dev}_dev_node"},
> + physical_device => $metadata->{"${prefix}_${dev}_devices"},
> + size => int($metadata->{"${prefix}_${dev}_size"}),
> + support_discard => int($metadata->{"${prefix}_${dev}_support_discard"}),
> + type => $metadata->{"${prefix}_${dev}_type"},
> + device => $device,
> + }
> + );
> + };
> +
> + $get_data->("bdev", "bluestore", "block");
> + $get_data->("db", "bluefs", "db") if $metadata->{bluefs_dedicated_db};
> + $get_data->("wal", "bluefs", "wal") if $metadata->{bluefs_dedicated_wal};
> +
> + return $data;
> + }});
> +
> +__PACKAGE__->register_method ({
> + name => 'osdvolume',
> + path => '{osdid}/lv-info',
> + method => 'GET',
> + description => "Get OSD volume details",
> + proxyto => 'node',
> + protected => 1,
> + permissions => {
> + check => ['perm', '/', [ 'Sys.Audit' ], any => 1],
> + },
> + parameters => {
> + additionalProperties => 0,
> + properties => {
> + node => get_standard_option('pve-node'),
> + osdid => {
> + description => 'OSD ID',
> + type => 'integer',
> + },
> + type => {
> + description => 'OSD device type',
> + type => 'string',
> + enum => ['block', 'db', 'wal'],
> + default => 'block',
> + optional => 1,
> + },
> + },
> + },
> + returns => {
> + type => 'object',
> + properties => {
> + creation_time => {
> + type => 'string',
> + description => "Creation time as reported by `lvs`.",
> + },
> + lv_name => {
> + type => 'string',
> + description => 'Name of the logical volume (LV).',
> + },
> + lv_path => {
> + type => 'string',
> + description => 'Path to the logical volume (LV).',
> + },
> + lv_size => {
> + type => 'integer',
> + description => 'Size of the logical volume (LV).',
> + },
> + lv_uuid => {
> + type => 'string',
> + description => 'UUID of the logical volume (LV).',
> + },
> + vg_name => {
> + type => 'string',
> + description => 'Name of the volume group (VG).',
> + },
> + },
> + },
> + code => sub {
> + my ($param) = @_;
> +
> + PVE::Ceph::Tools::check_ceph_inited();
> +
> + my $osdid = $param->{osdid};
> + my $type = $param->{type} // 'block';
> +
> + my $raw = '';
> + my $parser = sub { $raw .= shift };
> + my $cmd = ['/usr/sbin/ceph-volume', 'lvm', 'list', '--format', 'json'];
> + run_command($cmd, errmsg => 'listing Ceph LVM volumes failed', outfunc => $parser);
> +
> + my $result;
> + if ($raw =~ m/^(\{.*\})$/s) { #untaint
> + $result = JSON::decode_json($1);
> + } else {
> + die "got unexpected data from ceph-volume: '${raw}'\n";
> + }
> + if (!$result->{$osdid}) {
> + die "OSD '${osdid}' not found in 'ceph-volume lvm list' on node '${nodename}'.\n"
> + ."Maybe it was created before LVM became the default?\n";
> + }
> +
> + my $lv_data = { map { $_->{type} => $_ } @{$result->{$osdid}} };
> + my $volume = $lv_data->{$type} || die "volume type '${type}' not found for OSD ${osdid}\n";
> +
> + $raw = '';
> + $cmd = ['/sbin/lvs', $volume->{lv_path}, '--reportformat', 'json', '-o', 'lv_time'];
> + run_command($cmd, errmsg => 'listing logical volumes failed', outfunc => $parser);
> +
> + if ($raw =~ m/(\{.*\})$/s) { #untaint, lvs has whitespace at beginning
> + $result = JSON::decode_json($1);
> + } else {
> + die "got unexpected data from lvs: '${raw}'\n";
> + }
> +
> + my $data = { map { $_ => $volume->{$_} } qw(lv_name lv_path lv_uuid vg_name) };
> + $data->{lv_size} = int($volume->{lv_size});
> +
> + $data->{creation_time} = @{$result->{report}}[0]->{lv}[0]->{lv_time};
> +
> + return $data;
> + }});
> +
> # Check if $osdid belongs to $nodename
> # $tree ... rados osd tree (passing the tree makes it easy to test)
> sub osd_belongs_to_node {
More information about the pve-devel
mailing list