[pve-devel] [PATCH container 02/20] add PVE::LXC::{CGroup, Command} submodules

w.bumiller at proxmox.com w.bumiller at proxmox.com
Fri Apr 3 16:37:22 CEST 2020


From: Wolfgang Bumiller <w.bumiller at proxmox.com>

Signed-off-by: Wolfgang Bumiller <w.bumiller at proxmox.com>
---
 src/PVE/LXC/CGroup.pm  | 128 +++++++++++++++++++++++++++
 src/PVE/LXC/Command.pm | 196 +++++++++++++++++++++++++++++++++++++++++
 src/PVE/LXC/Makefile   |   2 +
 3 files changed, 326 insertions(+)
 create mode 100644 src/PVE/LXC/CGroup.pm
 create mode 100644 src/PVE/LXC/Command.pm

diff --git a/src/PVE/LXC/CGroup.pm b/src/PVE/LXC/CGroup.pm
new file mode 100644
index 0000000..7561fb2
--- /dev/null
+++ b/src/PVE/LXC/CGroup.pm
@@ -0,0 +1,128 @@
+# cgroup handler
+#
+# This package should deal with figuring out the right cgroup path for a
+# container (via the command socket), reading and writing cgroup values, and
+# handling cgroup v1 & v2 differences.
+#
+# Note that the long term plan is to have resource manage functions intead of
+# dealing with cgroup files on the outside.
+
+package PVE::LXC::CGroup;
+
+use strict;
+use warnings;
+
+use PVE::LXC::Command;
+
+# We don't want to do a command socket round trip for every cgroup read/write,
+# so any cgroup function needs to have the container's path cached, so this
+# package has to be instantiated.
+#
+# LXC keeps separate paths by controller (although they're normally all the
+# same, in our # case anyway), so we cache them by controller as well.
+sub new {
+    my ($class, $vmid) = @_;
+
+    my $self = { vmid => $vmid };
+
+    return bless $self, $class;
+}
+
+my $CPUSET_BASE = undef;
+# Find the cpuset cgroup controller.
+#
+# This is a function, not a method!
+sub cpuset_controller_path() {
+    if (!defined($CPUSET_BASE)) {
+	my $CPUSET_PATHS = [
+	    # legacy cpuset cgroup:
+	    ['/sys/fs/cgroup/cpuset',  'cpuset.effective_cpus'],
+	    # pure cgroupv2 environment:
+	    ['/sys/fs/cgroup',         'cpuset.cpus.effective'],
+	    # hybrid, with cpuset moved to cgroupv2
+	    ['/sys/fs/cgroup/unified', 'cpuset.cpus.effective'],
+	];
+
+	my ($result) = grep { -f "$_->[0]/$_->[1]" } @$CPUSET_PATHS;
+	die "failed to find cpuset controller\n" if !defined($result);
+
+	$CPUSET_BASE = $result->[0];
+    }
+
+    return $CPUSET_BASE;
+}
+
+my $CGROUP_MODE = undef;
+# Figure out which cgroup mode we're operating under:
+#
+# Returns 1 if cgroupv1 controllers exist (hybrid or legacy mode), and 2 in a
+# cgroupv2-only environment.
+#
+# This is a function, not a method!
+sub cgroup_mode() {
+    if (!defined($CGROUP_MODE)) {
+	my ($v1, $v2) = PVE::LXC::get_cgroup_subsystems();
+	if (keys %$v1) {
+	    # hybrid or legacy mode
+	    $CGROUP_MODE = 1;
+	} elsif ($v2) {
+	    $CGROUP_MODE = 2;
+	}
+    }
+
+    die "unknown cgroup mode\n" if !defined($CGROUP_MODE);
+    return $CGROUP_MODE;
+}
+
+# Get a subdirectory (without the cgroup mount point) for a controller.
+#
+# If `$controller` is `undef`, get the unified (cgroupv2) path.
+#
+# Note that in cgroup v2, lxc uses the activated controller names
+# (`cgroup.controllers` file) as list of controllers for the unified hierarchy,
+# so this returns a result when a `controller` is provided even when using
+# a pure cgroupv2 setup.
+my sub get_subdir {
+    my ($self, $controller, $limiting) = @_;
+
+    my $entry_name = $controller || 'unified';
+    my $entry = ($self->{controllers}->{$entry_name} //= {});
+
+    my $kind = $limiting ? 'limit' : 'ns';
+    my $path = $entry->{$kind};
+
+    return $path if defined $path;
+
+    $path = PVE::LXC::Command::get_cgroup_path(
+	$self->{vmid},
+	$controller,
+	$limiting,
+    ) or return undef;
+
+    # untaint:
+    if ($path =~ /\.\./) {
+	die "lxc returned suspicious path: '$path'\n";
+    }
+    ($path) = ($path =~ /^(.*)$/s);
+
+    $entry->{$kind} = $path;
+
+    return $path;
+}
+
+# Get a path for a controller.
+#
+# `$controller` may be `undef`, see get_subdir above for details.
+sub get_path {
+    my ($self, $controller) = @_;
+
+    my $path = get_subdir($self, $controller)
+	or return undef;
+
+    # The main mount point we currenlty assume to be in a standard location.
+    return "/sys/fs/cgroup/$path" if cgroup_mode() == 2;
+    return "/sys/fs/cgroup/unified/$path" if !defined($controller);
+    return "/sys/fs/cgroup/$controller/$path";
+}
+
+1;
diff --git a/src/PVE/LXC/Command.pm b/src/PVE/LXC/Command.pm
new file mode 100644
index 0000000..2fd4e81
--- /dev/null
+++ b/src/PVE/LXC/Command.pm
@@ -0,0 +1,196 @@
+# LXC command socket client.
+#
+# For now this is only used to fetch the cgroup paths.
+# This can also be extended to replace a few more `lxc-*` CLI invocations.
+# (such as lxc-stop, info, freeze, unfreeze, or getting the init pid)
+
+package PVE::LXC::Command;
+
+use strict;
+use warnings;
+
+use IO::Socket::UNIX;
+use Socket qw(SOCK_STREAM SOL_SOCKET SO_PASSCRED);
+
+use base 'Exporter';
+
+use constant {
+    LXC_CMD_GET_CGROUP => 6,
+    LXC_CMD_GET_LIMITING_CGROUP => 19,
+};
+
+our @EXPORT_OK = qw(
+    raw_command_transaction
+    simple_command
+    get_cgroup_path
+);
+
+# Get the command socket for a container.
+my sub _get_command_socket($) {
+    my ($vmid) = @_;
+
+    my $sock = IO::Socket::UNIX->new(
+	Type => SOCK_STREAM(),
+	Peer => "\0/var/lib/lxc/$vmid/command",
+    );
+    if (!defined($sock)) {
+	return undef if $!{ECONNREFUSED};
+	die "failed to connect to command socket: $!\n";
+    }
+
+    # The documentation for this talks more about the receiving end, and it
+    # also *mostly works without, but then the kernel *sometimes* fails to
+    # provide correct credentials.
+    setsockopt($sock, SOL_SOCKET, SO_PASSCRED, 1)
+        or die "failed to pass credentials to command socket: $!\n";
+
+    return $sock;
+}
+
+# Create an lxc_cmd_req struct.
+my sub _lxc_cmd_req($$) {
+    my ($cmd, $datalen) = @_;
+
+    # struct lxc_cmd_req {
+    #     lxc_cmd_t cmd;
+    #     int datalen;
+    #     const void *data;
+    # };
+    #
+    # Obviously the pointer makes no sense in the payload so we just use NULL.
+    my $packet = pack('i!i!L!', $cmd, $datalen, 0);
+
+    return $packet;
+}
+
+# Unpack an lxc_cmd_rsp into result into its result and payload length.
+my sub _unpack_lxc_cmd_rsp($) {
+    my ($packet) = @_;
+
+    #struct lxc_cmd_rsp {
+    #    int ret; /* 0 on success, -errno on failure */
+    #    int datalen;
+    #    void *data;
+    #};
+
+    # We drop the pointless pointer value.
+    my ($ret, $len, undef) = unpack("i!i!L!", $packet);
+
+    return ($ret, $len);
+}
+
+# Send a complete packet:
+my sub _do_send($$) {
+    my ($sock, $data) = @_;
+    my $sent = send($sock, $data, 0)
+	// die "failed to send to command socket: $!\n";
+    die "short write on command socket ($sent != ".length($data).")\n"
+	if $sent != length($data);
+}
+
+# Send a complete packet:
+my sub _do_recv($\$$) {
+    my ($sock, $scalar, $len) = @_;
+    my $got = recv($sock, $$scalar, $len, 0)
+	// die "failed to read from command socket: $!\n";
+    die "short read on command socket ($len != ".length($$scalar).")\n"
+	if length($$scalar) != $len;
+}
+
+# Receive a response from an lxc command socket.
+#
+# Performs the return value check (negative errno values) and returns the
+# return value and payload in array context, or just the payload in scalar
+# context.
+my sub _recv_response($) {
+    my ($socket) = @_;
+
+    my $buf = pack('i!i!L!', 0, 0, 0); # struct lxc_cmd_rsp
+    _do_recv($socket, $buf, length($buf));
+
+    my ($res, $datalen) = _unpack_lxc_cmd_rsp($buf);
+    my $data;
+    _do_recv($socket, $data, $datalen)
+	if $datalen > 0;
+
+    if ($res < 0) {
+	$! = -$res;
+	die "command failed: $!\n";
+    }
+
+    return wantarray ? ($res, $data) : $data;
+}
+
+# Perform a command transaction: Send command & payload, receive and unpack the
+# response.
+sub raw_command_transaction($$;$) {
+    my ($socket, $cmd, $data) = @_;
+
+    $data //= '';
+
+    my $req = _lxc_cmd_req(LXC_CMD_GET_CGROUP, length($data));
+    _do_send($socket, $req);
+    if (length($data) > 0) {
+	_do_send($socket, $data);
+    }
+
+    return _recv_response($socket);
+}
+
+# Perform a command transaction for a VMID where no command socket has been
+# established yet.
+#
+# Returns ($ret, $data):
+#    $ret: numeric return value (typically 0)
+#    $data: optional data returned for the command, if any, otherwise undef
+#
+# Returns undef if the container is not running, dies on errors.
+sub simple_command($$;$) {
+    my ($vmid, $cmd, $data) = @_;
+
+    my $socket = _get_command_socket($vmid)
+	or return undef;
+    return raw_command_transaction($socket, $cmd, $data);
+}
+
+# Retrieve the cgroup path for a running container.
+# If $limiting is set, get the payload path without the namespace subdirectory,
+# otherwise return the full namespaced path.
+#
+# Returns undef if the container is not running, dies on errors.
+sub get_cgroup_path($;$$) {
+    my ($vmid, $subsystem, $limiting) = @_;
+
+    # subsystem name must be a zero-terminated C string.
+    my ($res, $data) = simple_command(
+	$vmid,
+	$limiting ? LXC_CMD_GET_LIMITING_CGROUP : LXC_CMD_GET_CGROUP,
+	pack('Z*', $subsystem),
+    );
+    return undef if !defined $res;
+
+    # data is a zero-terminated string:
+    return unpack('Z*', $data);
+}
+
+# Retrieve the cgroup path for a running container.
+# If $limiting is set, get the payload path without the namespace subdirectory,
+# otherwise return the full namespaced path.
+#
+# Returns undef if the container is not running, dies on errors.
+sub get_limiting_cgroup_path($;$) {
+    my ($vmid, $subsystem) = @_;
+
+    # subsystem name must be a zero-terminated C string.
+    my ($res, $data) = simple_command(
+	$vmid,
+	LXC_CMD_GET_LIMITING_CGROUP,
+	pack('Z*', $subsystem),
+    );
+    return undef if !defined $res;
+
+    # data is a zero-terminated string:
+    return unpack('Z*', $data);
+}
+
+1;
diff --git a/src/PVE/LXC/Makefile b/src/PVE/LXC/Makefile
index d889204..f4f4dc1 100644
--- a/src/PVE/LXC/Makefile
+++ b/src/PVE/LXC/Makefile
@@ -1,4 +1,6 @@
 SOURCES= \
+	CGroup.pm \
+	Command.pm \
 	Config.pm \
 	Create.pm \
 	Migrate.pm \
-- 
2.20.1





More information about the pve-devel mailing list