[pve-devel] applied: [PATCH v2 storage] rbd: fix #3969: add rbd dev paths with cluster info

Fabian Grünbichler f.gruenbichler at proxmox.com
Wed Apr 27 14:20:46 CEST 2022


with two small follow-ups:
- re-order conditions for fallback to old path (ideally avoiding one 
  stat())
- drop aliased private sub

please don't forget to submit the udev change upstream as well ;)

On April 13, 2022 11:43 am, Aaron Lauterer wrote:
> By adding our own customized rbd udev rules and ceph-rbdnamer we can
> create device paths that include the cluster fsid and avoid any
> ambiguity if the same pool and namespace combination is used in
> different clusters we connect to.
> 
> Additionally to the '/dev/rbd/<pool>/...' paths we now have
> '/dev/rbd-pve/<cluster fsid>/<pool>/...' paths.
> 
> The other half of the patch makes use of the new device paths in the RBD
> plugin.
> 
> The new 'get_rbd_dev_path' method the full device path. In case that the
> image has been mapped before the rbd-pve udev rule has been installed,
> it returns the old path.
> 
> The cluster fsid is read from the 'ceph.conf' file in the case of a
> hyperconverged setup. In the case of an external Ceph cluster we need to
> fetch it via a rados api call.
> 
> Co-authored-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
> Signed-off-by: Aaron Lauterer <a.lauterer at proxmox.com>
> ---
> 
> Testing the migration phase can be done by starting a VM first, then
> install the new package version. Don't forget to have the disk image on
> an rbd storage with krbd enabled.
> 
> `qm showcmd VMID --pretty` should still show the old rbd path for that
> disk.
> Add a new disk to the running VM and run showcmd again. It should show
> the new rbd path for the added disk.
> 
> 
> The udev rules seem to get applied automatically after installing the
> package. So we should be fine to assume that once we run this new code,
> newly mapped rbd images should also be exposed in the new /dev/rbd-pve
> path.
> 
> I also realized that the rescan logic (pct rescan, qm rescan) gets
> confused by having two pools with the same name on different clusters.
> Something we need to take a look at as well.
> 
> changes since v1:
> * reverted changes to get_rbd_path
> * added additional get_rbd_dev_path function that returns the full path
> 
> 
>  Makefile                   |  1 +
>  PVE/Storage/RBDPlugin.pm   | 60 ++++++++++++++++++++++++++++----------
>  udev-rbd/50-rbd-pve.rules  |  2 ++
>  udev-rbd/Makefile          | 21 +++++++++++++
>  udev-rbd/ceph-rbdnamer-pve | 24 +++++++++++++++
>  5 files changed, 92 insertions(+), 16 deletions(-)
>  create mode 100644 udev-rbd/50-rbd-pve.rules
>  create mode 100644 udev-rbd/Makefile
>  create mode 100755 udev-rbd/ceph-rbdnamer-pve
> 
> diff --git a/Makefile b/Makefile
> index 431db16..029b586 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -41,6 +41,7 @@ install: PVE pvesm.1 pvesm.bash-completion pvesm.zsh-completion
>  	install -d ${DESTDIR}${SBINDIR}
>  	install -m 0755 pvesm ${DESTDIR}${SBINDIR}
>  	make -C PVE install
> +	make -C udev-rbd install
>  	install -d ${DESTDIR}/usr/share/man/man1
>  	install -m 0644 pvesm.1 ${DESTDIR}/usr/share/man/man1/
>  	gzip -9 -n ${DESTDIR}/usr/share/man/man1/pvesm.1
> diff --git a/PVE/Storage/RBDPlugin.pm b/PVE/Storage/RBDPlugin.pm
> index e287e28..3293565 100644
> --- a/PVE/Storage/RBDPlugin.pm
> +++ b/PVE/Storage/RBDPlugin.pm
> @@ -8,6 +8,7 @@ use JSON;
>  use Net::IP;
>  
>  use PVE::CephConfig;
> +use PVE::Cluster qw(cfs_read_file);;
>  use PVE::JSONSchema qw(get_standard_option);
>  use PVE::ProcFSTools;
>  use PVE::RADOS;
> @@ -22,6 +23,16 @@ my $get_parent_image_name = sub {
>      return $parent->{image} . "@" . $parent->{snapshot};
>  };
>  
> +my $librados_connect = sub {
> +    my ($scfg, $storeid, $options) = @_;
> +
> +    my $librados_config = PVE::CephConfig::ceph_connect_option($scfg, $storeid);
> +
> +    my $rados = PVE::RADOS->new(%$librados_config);
> +
> +    return $rados;
> +};
> +
>  my sub get_rbd_path {
>      my ($scfg, $volume) = @_;
>      my $path = $scfg->{pool} ? $scfg->{pool} : 'rbd';
> @@ -30,6 +41,32 @@ my sub get_rbd_path {
>      return $path;
>  };
>  
> +my sub get_rbd_dev_path {
> +    my ($scfg, $storeid, $volume) = @_;
> +
> +    my $cluster_id = '';
> +    if ($scfg->{monhost}) {
> +	my $rados = $librados_connect->($scfg, $storeid);
> +	$cluster_id = $rados->mon_command({ prefix => 'fsid', format => 'json' })->{fsid};
> +    } else {
> +	$cluster_id = cfs_read_file('ceph.conf')->{global}->{fsid};
> +    }
> +
> +    my $uuid_pattern = "([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})";
> +    if ($cluster_id =~ qr/^${uuid_pattern}$/is) {
> +	$cluster_id = $1; # use untained value
> +    } else {
> +	die "cluster fsid has invalid format\n";
> +    }
> +
> +    my $rbd_path = get_rbd_path($scfg, $volume);
> +    my $pve_path = "/dev/rbd-pve/${cluster_id}/${rbd_path}";
> +    my $path = "/dev/rbd/${rbd_path}";
> +
> +    return $path if -e $path && !-e $pve_path; # mapped before rbd-pve udev rule existed
> +    return $pve_path;
> +}
> +
>  my $build_cmd = sub {
>      my ($binary, $scfg, $storeid, $op, @options) = @_;
>  
> @@ -70,16 +107,6 @@ my $rados_cmd = sub {
>      return $build_cmd->('/usr/bin/rados', $scfg, $storeid, $op, @options);
>  };
>  
> -my $librados_connect = sub {
> -    my ($scfg, $storeid, $options) = @_;
> -
> -    my $librados_config = PVE::CephConfig::ceph_connect_option($scfg, $storeid);
> -
> -    my $rados = PVE::RADOS->new(%$librados_config);
> -
> -    return $rados;
> -};
> -
>  # needed for volumes created using ceph jewel (or higher)
>  my $krbd_feature_update = sub {
>      my ($scfg, $storeid, $name) = @_;
> @@ -380,9 +407,10 @@ sub path {
>      my ($vtype, $name, $vmid) = $class->parse_volname($volname);
>      $name .= '@'.$snapname if $snapname;
>  
> -    my $rbd_path = get_rbd_path($scfg, $name);
> -    return ("/dev/rbd/${rbd_path}", $vmid, $vtype) if $scfg->{krbd};
> +    my $rbd_dev_path = get_rbd_dev_path($scfg, $storeid, $name);
> +    return ($rbd_dev_path, $vmid, $vtype) if $scfg->{krbd};
>  
> +    my $rbd_path = get_rbd_path($scfg, $name);
>      my $path = "rbd:${rbd_path}";
>  
>      $path .= ":conf=$cmd_option->{ceph_conf}" if $cmd_option->{ceph_conf};
> @@ -619,8 +647,8 @@ sub deactivate_storage {
>  }
>  
>  my sub get_kernel_device_path {
> -    my ($scfg, $name) = @_;
> -    return "/dev/rbd/" . get_rbd_path($scfg, $name);
> +    my ($scfg, $storeid, $name) = @_;
> +    return get_rbd_dev_path($scfg, $storeid, $name);
>  };
>  
>  sub map_volume {
> @@ -631,7 +659,7 @@ sub map_volume {
>      my $name = $img_name;
>      $name .= '@'.$snapname if $snapname;
>  
> -    my $kerneldev = get_kernel_device_path($scfg, $name);
> +    my $kerneldev = get_kernel_device_path($scfg, $storeid, $name);
>  
>      return $kerneldev if -b $kerneldev; # already mapped
>  
> @@ -650,7 +678,7 @@ sub unmap_volume {
>      my ($vtype, $name, $vmid) = $class->parse_volname($volname);
>      $name .= '@'.$snapname if $snapname;
>  
> -    my $kerneldev = get_kernel_device_path($scfg, $name);
> +    my $kerneldev = get_kernel_device_path($scfg, $storeid, $name);
>  
>      if (-b $kerneldev) {
>  	my $cmd = $rbd_cmd->($scfg, $storeid, 'unmap', $kerneldev);
> diff --git a/udev-rbd/50-rbd-pve.rules b/udev-rbd/50-rbd-pve.rules
> new file mode 100644
> index 0000000..79432df
> --- /dev/null
> +++ b/udev-rbd/50-rbd-pve.rules
> @@ -0,0 +1,2 @@
> +KERNEL=="rbd[0-9]*", ENV{DEVTYPE}=="disk", PROGRAM="/usr/libexec/ceph-rbdnamer-pve %k", SYMLINK+="rbd-pve/%c"
> +KERNEL=="rbd[0-9]*", ENV{DEVTYPE}=="partition", PROGRAM="/usr/libexec/ceph-rbdnamer-pve %k", SYMLINK+="rbd-pve/%c-part%n"
> diff --git a/udev-rbd/Makefile b/udev-rbd/Makefile
> new file mode 100644
> index 0000000..065933b
> --- /dev/null
> +++ b/udev-rbd/Makefile
> @@ -0,0 +1,21 @@
> +PACKAGE=libpve-storage-perl
> +
> +DESTDIR=
> +PREFIX=/usr
> +LIBEXECDIR=${PREFIX}/libexec
> +LIBDIR=${PREFIX}/lib
> +
> +all:
> +
> +.PHONY: install
> +install: 50-rbd-pve.rules ceph-rbdnamer-pve
> +	install -d ${DESTDIR}${LIBEXECDIR}
> +	install -m 0755 ceph-rbdnamer-pve ${DESTDIR}${LIBEXECDIR}
> +	install -d ${DESTDIR}${LIBDIR}/udev/rules.d
> +	install -m 0644 50-rbd-pve.rules ${DESTDIR}${LIBDIR}/udev/rules.d
> +
> +.PHONY: clean
> +clean:
> +
> +.PHONY: distclean
> +distclean: clean
> diff --git a/udev-rbd/ceph-rbdnamer-pve b/udev-rbd/ceph-rbdnamer-pve
> new file mode 100755
> index 0000000..23dd626
> --- /dev/null
> +++ b/udev-rbd/ceph-rbdnamer-pve
> @@ -0,0 +1,24 @@
> +#!/bin/sh
> +
> +DEV=$1
> +NUM=`echo $DEV | sed 's#p.*##g; s#[a-z]##g'`
> +POOL=`cat /sys/devices/rbd/$NUM/pool`
> +CLUSTER_FSID=`cat /sys/devices/rbd/$NUM/cluster_fsid`
> +
> +if [ -f /sys/devices/rbd/$NUM/pool_ns ]; then
> +    NAMESPACE=`cat /sys/devices/rbd/$NUM/pool_ns`
> +else
> +    NAMESPACE=""
> +fi
> +IMAGE=`cat /sys/devices/rbd/$NUM/name`
> +SNAP=`cat /sys/devices/rbd/$NUM/current_snap`
> +
> +echo -n "/$CLUSTER_FSID/$POOL"
> +
> +if [ -n "$NAMESPACE" ]; then
> +    echo -n "/$NAMESPACE"
> +fi
> +echo -n "/$IMAGE"
> +if [ "$SNAP" != "-" ]; then
> +    echo -n "@$SNAP"
> +fi
> -- 
> 2.30.2
> 
> 
> 
> _______________________________________________
> pve-devel mailing list
> pve-devel at lists.proxmox.com
> https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel
> 
> 
> 





More information about the pve-devel mailing list