[pve-devel] [PATCH container 02/20] add PVE::LXC::{CGroup, Command} submodules
w.bumiller at proxmox.com
w.bumiller at proxmox.com
Fri Apr 3 16:37:22 CEST 2020
From: Wolfgang Bumiller <w.bumiller at proxmox.com>
Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
---
src/PVE/LXC/CGroup.pm | 128 +++++++++++++++++++++++++++
src/PVE/LXC/Command.pm | 196 +++++++++++++++++++++++++++++++++++++++++
src/PVE/LXC/Makefile | 2 +
3 files changed, 326 insertions(+)
create mode 100644 src/PVE/LXC/CGroup.pm
create mode 100644 src/PVE/LXC/Command.pm
diff --git a/src/PVE/LXC/CGroup.pm b/src/PVE/LXC/CGroup.pm
new file mode 100644
index 0000000..7561fb2
--- /dev/null
+++ b/src/PVE/LXC/CGroup.pm
@@ -0,0 +1,128 @@
+# cgroup handler
+#
+# This package should deal with figuring out the right cgroup path for a
+# container (via the command socket), reading and writing cgroup values, and
+# handling cgroup v1 & v2 differences.
+#
+# Note that the long term plan is to have resource manage functions intead of
+# dealing with cgroup files on the outside.
+
+package PVE::LXC::CGroup;
+
+use strict;
+use warnings;
+
+use PVE::LXC::Command;
+
+# We don't want to do a command socket round trip for every cgroup read/write,
+# so any cgroup function needs to have the container's path cached, so this
+# package has to be instantiated.
+#
+# LXC keeps separate paths by controller (although they're normally all the
+# same, in our # case anyway), so we cache them by controller as well.
+sub new {
+ my ($class, $vmid) = @_;
+
+ my $self = { vmid => $vmid };
+
+ return bless $self, $class;
+}
+
+my $CPUSET_BASE = undef;
+# Find the cpuset cgroup controller.
+#
+# This is a function, not a method!
+sub cpuset_controller_path() {
+ if (!defined($CPUSET_BASE)) {
+ my $CPUSET_PATHS = [
+ # legacy cpuset cgroup:
+ ['/sys/fs/cgroup/cpuset', 'cpuset.effective_cpus'],
+ # pure cgroupv2 environment:
+ ['/sys/fs/cgroup', 'cpuset.cpus.effective'],
+ # hybrid, with cpuset moved to cgroupv2
+ ['/sys/fs/cgroup/unified', 'cpuset.cpus.effective'],
+ ];
+
+ my ($result) = grep { -f "$_->[0]/$_->[1]" } @$CPUSET_PATHS;
+ die "failed to find cpuset controller\n" if !defined($result);
+
+ $CPUSET_BASE = $result->[0];
+ }
+
+ return $CPUSET_BASE;
+}
+
+my $CGROUP_MODE = undef;
+# Figure out which cgroup mode we're operating under:
+#
+# Returns 1 if cgroupv1 controllers exist (hybrid or legacy mode), and 2 in a
+# cgroupv2-only environment.
+#
+# This is a function, not a method!
+sub cgroup_mode() {
+ if (!defined($CGROUP_MODE)) {
+ my ($v1, $v2) = PVE::LXC::get_cgroup_subsystems();
+ if (keys %$v1) {
+ # hybrid or legacy mode
+ $CGROUP_MODE = 1;
+ } elsif ($v2) {
+ $CGROUP_MODE = 2;
+ }
+ }
+
+ die "unknown cgroup mode\n" if !defined($CGROUP_MODE);
+ return $CGROUP_MODE;
+}
+
+# Get a subdirectory (without the cgroup mount point) for a controller.
+#
+# If `$controller` is `undef`, get the unified (cgroupv2) path.
+#
+# Note that in cgroup v2, lxc uses the activated controller names
+# (`cgroup.controllers` file) as list of controllers for the unified hierarchy,
+# so this returns a result when a `controller` is provided even when using
+# a pure cgroupv2 setup.
+my sub get_subdir {
+ my ($self, $controller, $limiting) = @_;
+
+ my $entry_name = $controller || 'unified';
+ my $entry = ($self->{controllers}->{$entry_name} //= {});
+
+ my $kind = $limiting ? 'limit' : 'ns';
+ my $path = $entry->{$kind};
+
+ return $path if defined $path;
+
+ $path = PVE::LXC::Command::get_cgroup_path(
+ $self->{vmid},
+ $controller,
+ $limiting,
+ ) or return undef;
+
+ # untaint:
+ if ($path =~ /\.\./) {
+ die "lxc returned suspicious path: '$path'\n";
+ }
+ ($path) = ($path =~ /^(.*)$/s);
+
+ $entry->{$kind} = $path;
+
+ return $path;
+}
+
+# Get a path for a controller.
+#
+# `$controller` may be `undef`, see get_subdir above for details.
+sub get_path {
+ my ($self, $controller) = @_;
+
+ my $path = get_subdir($self, $controller)
+ or return undef;
+
+ # The main mount point we currenlty assume to be in a standard location.
+ return "/sys/fs/cgroup/$path" if cgroup_mode() == 2;
+ return "/sys/fs/cgroup/unified/$path" if !defined($controller);
+ return "/sys/fs/cgroup/$controller/$path";
+}
+
+1;
diff --git a/src/PVE/LXC/Command.pm b/src/PVE/LXC/Command.pm
new file mode 100644
index 0000000..2fd4e81
--- /dev/null
+++ b/src/PVE/LXC/Command.pm
@@ -0,0 +1,196 @@
+# LXC command socket client.
+#
+# For now this is only used to fetch the cgroup paths.
+# This can also be extended to replace a few more `lxc-*` CLI invocations.
+# (such as lxc-stop, info, freeze, unfreeze, or getting the init pid)
+
+package PVE::LXC::Command;
+
+use strict;
+use warnings;
+
+use IO::Socket::UNIX;
+use Socket qw(SOCK_STREAM SOL_SOCKET SO_PASSCRED);
+
+use base 'Exporter';
+
+use constant {
+ LXC_CMD_GET_CGROUP => 6,
+ LXC_CMD_GET_LIMITING_CGROUP => 19,
+};
+
+our @EXPORT_OK = qw(
+ raw_command_transaction
+ simple_command
+ get_cgroup_path
+);
+
+# Get the command socket for a container.
+my sub _get_command_socket($) {
+ my ($vmid) = @_;
+
+ my $sock = IO::Socket::UNIX->new(
+ Type => SOCK_STREAM(),
+ Peer => "\0/var/lib/lxc/$vmid/command",
+ );
+ if (!defined($sock)) {
+ return undef if $!{ECONNREFUSED};
+ die "failed to connect to command socket: $!\n";
+ }
+
+ # The documentation for this talks more about the receiving end, and it
+ # also *mostly works without, but then the kernel *sometimes* fails to
+ # provide correct credentials.
+ setsockopt($sock, SOL_SOCKET, SO_PASSCRED, 1)
+ or die "failed to pass credentials to command socket: $!\n";
+
+ return $sock;
+}
+
+# Create an lxc_cmd_req struct.
+my sub _lxc_cmd_req($$) {
+ my ($cmd, $datalen) = @_;
+
+ # struct lxc_cmd_req {
+ # lxc_cmd_t cmd;
+ # int datalen;
+ # const void *data;
+ # };
+ #
+ # Obviously the pointer makes no sense in the payload so we just use NULL.
+ my $packet = pack('i!i!L!', $cmd, $datalen, 0);
+
+ return $packet;
+}
+
+# Unpack an lxc_cmd_rsp into result into its result and payload length.
+my sub _unpack_lxc_cmd_rsp($) {
+ my ($packet) = @_;
+
+ #struct lxc_cmd_rsp {
+ # int ret; /* 0 on success, -errno on failure */
+ # int datalen;
+ # void *data;
+ #};
+
+ # We drop the pointless pointer value.
+ my ($ret, $len, undef) = unpack("i!i!L!", $packet);
+
+ return ($ret, $len);
+}
+
+# Send a complete packet:
+my sub _do_send($$) {
+ my ($sock, $data) = @_;
+ my $sent = send($sock, $data, 0)
+ // die "failed to send to command socket: $!\n";
+ die "short write on command socket ($sent != ".length($data).")\n"
+ if $sent != length($data);
+}
+
+# Send a complete packet:
+my sub _do_recv($\$$) {
+ my ($sock, $scalar, $len) = @_;
+ my $got = recv($sock, $$scalar, $len, 0)
+ // die "failed to read from command socket: $!\n";
+ die "short read on command socket ($len != ".length($$scalar).")\n"
+ if length($$scalar) != $len;
+}
+
+# Receive a response from an lxc command socket.
+#
+# Performs the return value check (negative errno values) and returns the
+# return value and payload in array context, or just the payload in scalar
+# context.
+my sub _recv_response($) {
+ my ($socket) = @_;
+
+ my $buf = pack('i!i!L!', 0, 0, 0); # struct lxc_cmd_rsp
+ _do_recv($socket, $buf, length($buf));
+
+ my ($res, $datalen) = _unpack_lxc_cmd_rsp($buf);
+ my $data;
+ _do_recv($socket, $data, $datalen)
+ if $datalen > 0;
+
+ if ($res < 0) {
+ $! = -$res;
+ die "command failed: $!\n";
+ }
+
+ return wantarray ? ($res, $data) : $data;
+}
+
+# Perform a command transaction: Send command & payload, receive and unpack the
+# response.
+sub raw_command_transaction($$;$) {
+ my ($socket, $cmd, $data) = @_;
+
+ $data //= '';
+
+ my $req = _lxc_cmd_req(LXC_CMD_GET_CGROUP, length($data));
+ _do_send($socket, $req);
+ if (length($data) > 0) {
+ _do_send($socket, $data);
+ }
+
+ return _recv_response($socket);
+}
+
+# Perform a command transaction for a VMID where no command socket has been
+# established yet.
+#
+# Returns ($ret, $data):
+# $ret: numeric return value (typically 0)
+# $data: optional data returned for the command, if any, otherwise undef
+#
+# Returns undef if the container is not running, dies on errors.
+sub simple_command($$;$) {
+ my ($vmid, $cmd, $data) = @_;
+
+ my $socket = _get_command_socket($vmid)
+ or return undef;
+ return raw_command_transaction($socket, $cmd, $data);
+}
+
+# Retrieve the cgroup path for a running container.
+# If $limiting is set, get the payload path without the namespace subdirectory,
+# otherwise return the full namespaced path.
+#
+# Returns undef if the container is not running, dies on errors.
+sub get_cgroup_path($;$$) {
+ my ($vmid, $subsystem, $limiting) = @_;
+
+ # subsystem name must be a zero-terminated C string.
+ my ($res, $data) = simple_command(
+ $vmid,
+ $limiting ? LXC_CMD_GET_LIMITING_CGROUP : LXC_CMD_GET_CGROUP,
+ pack('Z*', $subsystem),
+ );
+ return undef if !defined $res;
+
+ # data is a zero-terminated string:
+ return unpack('Z*', $data);
+}
+
+# Retrieve the cgroup path for a running container.
+# If $limiting is set, get the payload path without the namespace subdirectory,
+# otherwise return the full namespaced path.
+#
+# Returns undef if the container is not running, dies on errors.
+sub get_limiting_cgroup_path($;$) {
+ my ($vmid, $subsystem) = @_;
+
+ # subsystem name must be a zero-terminated C string.
+ my ($res, $data) = simple_command(
+ $vmid,
+ LXC_CMD_GET_LIMITING_CGROUP,
+ pack('Z*', $subsystem),
+ );
+ return undef if !defined $res;
+
+ # data is a zero-terminated string:
+ return unpack('Z*', $data);
+}
+
+1;
diff --git a/src/PVE/LXC/Makefile b/src/PVE/LXC/Makefile
index d889204..f4f4dc1 100644
--- a/src/PVE/LXC/Makefile
+++ b/src/PVE/LXC/Makefile
@@ -1,4 +1,6 @@
SOURCES= \
+ CGroup.pm \
+ Command.pm \
Config.pm \
Create.pm \
Migrate.pm \
--
2.20.1
More information about the pve-devel
mailing list