[pve-devel] [PATCH lxc] add patches for cgroup handling in non-unified cgroup setups

Stoiko Ivanov s.ivanov at proxmox.com
Tue Jul 20 12:52:00 CEST 2021


I opened a PR at lxc-upstream with these changes [0].

Testing in my hybrid layout environment fixes the issue with
priviledged container reported in the forum.
(Note that the issue also occurs with unprivileged container, if they
have a `lxc.cgroup.devices.(allow|deny)` entry (which they don't in
our default config)

[0] https://github.com/lxc/lxc/pull/3911

Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
---
I quickly considered also updating lxc to 4.0.10 (which was released last
friday, but ran into some issues with mounting /sys/devices/virtual/net,
which I did not yet get to looking at.

 ...populate-hierarchy-for-device-cgroup.patch | 102 ++++++++++++++++++
 ...nneeded-variables-from-cgroup_tree_c.patch |  65 +++++++++++
 2 files changed, 167 insertions(+)
 create mode 100644 debian/patches/pve/0012-cgroups-populate-hierarchy-for-device-cgroup.patch
 create mode 100644 debian/patches/pve/0013-cgroups-remove-unneeded-variables-from-cgroup_tree_c.patch

diff --git a/debian/patches/pve/0012-cgroups-populate-hierarchy-for-device-cgroup.patch b/debian/patches/pve/0012-cgroups-populate-hierarchy-for-device-cgroup.patch
new file mode 100644
index 0000000..d24e45c
--- /dev/null
+++ b/debian/patches/pve/0012-cgroups-populate-hierarchy-for-device-cgroup.patch
@@ -0,0 +1,102 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stoiko Ivanov <s.ivanov at proxmox.com>
+Date: Mon, 19 Jul 2021 16:55:43 +0200
+Subject: [PATCH] cgroups: populate hierarchy for device cgroup
+
+With the changes introduced in:
+b7b1e3a34ce28b01206c48227930ff83d399e7b6
+the hierarchy-struct did not have the path_lim set anymore, which is
+needed by setup_limits_legacy to actually access the cgroup directory.
+
+The issue can be reproduced with a container config having
+```
+lxc.cgroup.devices.deny = a
+```
+(or any lxc.cgroup.devices entry) set on a system booted with
+systemd.unified_cgroup_hierarchy=0.
+
+This affects all privileged containers on PVE (due to the default
+devices.deny entry).
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ src/lxc/cgroups/cgfsng.c | 39 +++++++++++++++++++--------------------
+ 1 file changed, 19 insertions(+), 20 deletions(-)
+
+diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
+index 9e1ece5ca..e27baa625 100644
+--- a/src/lxc/cgroups/cgfsng.c
++++ b/src/lxc/cgroups/cgfsng.c
+@@ -794,8 +794,12 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		if (fd_limit < 0)
+ 			return syserror_ret(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+ 
++		limit_path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
++		h->dfd_lim = move_fd(fd_limit);
++		h->path_lim = move_ptr(limit_path);
++
+ 		TRACE("Created limit cgroup %d->%d(%s)",
+-		      fd_limit, h->dfd_base, cgroup_limit_dir);
++		      h->dfd_lim, h->dfd_base, cgroup_limit_dir);
+ 
+ 		/*
+ 		 * With isolation the devices legacy cgroup needs to be
+@@ -807,44 +811,39 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		    !ops->setup_limits_legacy(ops, conf, true))
+ 			return log_error(false, "Failed to setup legacy device limits");
+ 
+-		limit_path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+-		path = must_make_path(limit_path, cgroup_leaf, NULL);
++		path = must_make_path(h->path_lim, cgroup_leaf, NULL);
+ 
+ 		/*
+ 		 * If we use a separate limit cgroup, the leaf cgroup, i.e. the
+ 		 * cgroup the container actually resides in, is below fd_limit.
+ 		 */
+-		fd_final = __cgroup_tree_create(fd_limit, cgroup_leaf, 0755, cpuset_v1, false);
++		fd_final = __cgroup_tree_create(h->dfd_lim, cgroup_leaf, 0755, cpuset_v1, false);
+ 		if (fd_final < 0) {
+ 			/* Ensure we don't leave any garbage behind. */
+ 			if (cgroup_tree_prune(h->dfd_base, cgroup_limit_dir))
+ 				SYSWARN("Failed to destroy %d(%s)", h->dfd_base, cgroup_limit_dir);
+ 			else
+ 				TRACE("Removed cgroup tree %d(%s)", h->dfd_base, cgroup_limit_dir);
++			return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+ 		}
++		h->dfd_con = move_fd(fd_final);
++		h->path_con = move_ptr(path);
++
+ 	} else {
+ 		path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+ 
+ 		fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
+-	}
+-	if (fd_final < 0)
+-		return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+-
+-	if (payload) {
+-		h->dfd_con = move_fd(fd_final);
+-		h->path_con = move_ptr(path);
++		if (fd_final < 0)
++			return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+ 
+-		if (fd_limit < 0)
++		if (payload) {
++			h->dfd_con = move_fd(fd_final);
+ 			h->dfd_lim = h->dfd_con;
+-		else
+-			h->dfd_lim = move_fd(fd_limit);
+-
+-		if (limit_path)
+-			h->path_lim = move_ptr(limit_path);
+-		else
++			h->path_con = move_ptr(path);
+ 			h->path_lim = h->path_con;
+-	} else {
+-		h->dfd_mon = move_fd(fd_final);
++		} else {
++			h->dfd_mon = move_fd(fd_final);
++		}
+ 	}
+ 
+ 	return true;
diff --git a/debian/patches/pve/0013-cgroups-remove-unneeded-variables-from-cgroup_tree_c.patch b/debian/patches/pve/0013-cgroups-remove-unneeded-variables-from-cgroup_tree_c.patch
new file mode 100644
index 0000000..692233d
--- /dev/null
+++ b/debian/patches/pve/0013-cgroups-remove-unneeded-variables-from-cgroup_tree_c.patch
@@ -0,0 +1,65 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Stoiko Ivanov <s.ivanov at proxmox.com>
+Date: Tue, 20 Jul 2021 10:30:36 +0200
+Subject: [PATCH] cgroups: remove unneeded variables from cgroup_tree_create
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ src/lxc/cgroups/cgfsng.c | 13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+diff --git a/src/lxc/cgroups/cgfsng.c b/src/lxc/cgroups/cgfsng.c
+index e27baa625..35ba0fb9d 100644
+--- a/src/lxc/cgroups/cgfsng.c
++++ b/src/lxc/cgroups/cgfsng.c
+@@ -779,7 +779,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 			       const char *cgroup_leaf, bool payload)
+ {
+ 	__do_close int fd_limit = -EBADF, fd_final = -EBADF;
+-	__do_free char *path = NULL, *limit_path = NULL;
+ 	bool cpuset_v1 = false;
+ 
+ 	/*
+@@ -794,9 +793,8 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		if (fd_limit < 0)
+ 			return syserror_ret(false, "Failed to create limiting cgroup %d(%s)", h->dfd_base, cgroup_limit_dir);
+ 
+-		limit_path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
++		h->path_lim = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+ 		h->dfd_lim = move_fd(fd_limit);
+-		h->path_lim = move_ptr(limit_path);
+ 
+ 		TRACE("Created limit cgroup %d->%d(%s)",
+ 		      h->dfd_lim, h->dfd_base, cgroup_limit_dir);
+@@ -811,8 +809,6 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		    !ops->setup_limits_legacy(ops, conf, true))
+ 			return log_error(false, "Failed to setup legacy device limits");
+ 
+-		path = must_make_path(h->path_lim, cgroup_leaf, NULL);
+-
+ 		/*
+ 		 * If we use a separate limit cgroup, the leaf cgroup, i.e. the
+ 		 * cgroup the container actually resides in, is below fd_limit.
+@@ -827,11 +823,9 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 			return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+ 		}
+ 		h->dfd_con = move_fd(fd_final);
+-		h->path_con = move_ptr(path);
++		h->path_con = must_make_path(h->path_lim, cgroup_leaf, NULL);
+ 
+ 	} else {
+-		path = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
+-
+ 		fd_final = __cgroup_tree_create(h->dfd_base, cgroup_limit_dir, 0755, cpuset_v1, false);
+ 		if (fd_final < 0)
+ 			return syserror_ret(false, "Failed to create %s cgroup %d(%s)", payload ? "payload" : "monitor", h->dfd_base, cgroup_limit_dir);
+@@ -839,7 +833,8 @@ static bool cgroup_tree_create(struct cgroup_ops *ops, struct lxc_conf *conf,
+ 		if (payload) {
+ 			h->dfd_con = move_fd(fd_final);
+ 			h->dfd_lim = h->dfd_con;
+-			h->path_con = move_ptr(path);
++			h->path_con = make_cgroup_path(h, h->at_base, cgroup_limit_dir, NULL);
++
+ 			h->path_lim = h->path_con;
+ 		} else {
+ 			h->dfd_mon = move_fd(fd_final);
-- 
2.30.2






More information about the pve-devel mailing list