[pve-devel] [PATCH container 1/2] add rebalance function from pvestatd to PVE::LXC

Wolfgang Bumiller w.bumiller at proxmox.com
Thu Nov 17 11:27:02 CET 2016

pvestatd already depends on PVE::LXC and this way we can
use it at startup without having to import pvestatd.

Additionally: provide a way to get a new set of cores for a
not-yet started container.
 src/PVE/LXC.pm | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 135 insertions(+)

diff --git a/src/PVE/LXC.pm b/src/PVE/LXC.pm
index 3a284f7..598dff5 100644
--- a/src/PVE/LXC.pm
+++ b/src/PVE/LXC.pm
@@ -21,6 +21,7 @@ use PVE::Tools qw($IPV6RE $IPV4RE dir_glob_foreach lock_file lock_file_full O_PA
 use PVE::Network;
 use PVE::AccessControl;
 use PVE::ProcFSTools;
+use PVE::CpuSet;
 use PVE::LXC::Config;
 use Time::HiRes qw (gettimeofday);
@@ -1499,5 +1500,139 @@ sub userns_command {
     return [];
+# Container startup provides a $new_cores count. We then return a cpuset
+# the container will get assigned.
+sub rebalance_lxc_containers {
+    my ($new_cores) = @_;
+    return if !-d '/sys/fs/cgroup/cpuset/lxc'; # nothing to do...
+    my $all_cpus = PVE::CpuSet->new_from_cgroup('lxc', 'effective_cpus');
+    my @allowed_cpus = $all_cpus->members();
+    my $cpucount = scalar(@allowed_cpus);
+    my $max_cpuid = PVE::CpuSet::max_cpuid();
+    my @cpu_ctcount = (0) x $max_cpuid;
+    my @balanced_cts;
+    my $ctlist = PVE::LXC::config_list();
+    foreach my $vmid (sort keys %$ctlist) {
+	next if ! -d "/sys/fs/cgroup/cpuset/lxc/$vmid";
+	my ($conf, $cpuset);
+	eval {
+	    $conf = PVE::LXC::Config->load_config($vmid);
+	    $cpuset = PVE::CpuSet->new_from_cgroup("lxc/$vmid");
+	};
+	if (my $err = $@) {
+	    warn $err;
+	    next;
+	}
+	my @cpuset_members = $cpuset->members();
+	if (!PVE::LXC::Config->has_lxc_entry($conf, 'lxc.cgroup.cpuset.cpus')) {
+	    my $cores = $conf->{cores} || $cpucount;
+	    $cores = $cpucount if $cores > $cpucount;
+	    # see if the number of cores was hot-reduced or
+	    # hasn't been enacted at all yet
+	    my $newset = PVE::CpuSet->new();
+	    if ($cores <  scalar(@cpuset_members)) {
+		$newset->insert(@cpuset_members[0..$cores-1]);
+	    } elsif ($cores > scalar(@cpuset_members)) {
+		my $count = $newset->insert(@cpuset_members);
+		foreach my $cpu (@allowed_cpus) {
+		    $count += $newset->insert($cpu);
+		    last if $count >= $cores;
+		}
+	    } else {
+		$newset->insert(@cpuset_members);
+	    }
+	    # Apply hot-plugged changes if any:
+	    if (!$newset->is_equal($cpuset)) {
+		@cpuset_members = $newset->members();
+		syslog('info', "detected changed cpu set for lxc/$vmid: " .
+		       $newset->short_string());
+		$newset->write_to_cgroup("lxc/$vmid");
+	    }
+	    # Note: no need to rebalance if we already use all cores
+	    push @balanced_cts, [$vmid, $cores, $newset]
+		if defined($conf->{cores}) && ($cores != $cpucount);
+	}
+	foreach my $cpu (@cpuset_members) {
+	    $cpu_ctcount[$cpu]++ if $cpu <= $max_cpuid;
+	}
+    }
+    my $find_best_cpu = sub {
+	my ($cpulist, $cpu) = @_;
+	my $cur_cost = $cpu_ctcount[$cpu];
+	my $cur_cpu = $cpu;
+	foreach my $candidate (@$cpulist) {
+	    my $cost = $cpu_ctcount[$candidate];
+	    if ($cost < ($cur_cost -1)) {
+		$cur_cost = $cost;
+		$cur_cpu = $candidate;
+	    }
+	}
+	return $cur_cpu;
+    };
+    my $fill_cpus = sub {
+	my ($cores, $rest, @members) = @_;
+	my $newset = PVE::CpuSet->new();
+	foreach my $cpu (@members) {
+	    my $best = &$find_best_cpu($rest, $cpu);
+	    if ($best != $cpu) {
+		$cpu_ctcount[$best]++;
+		$cpu_ctcount[$cpu]--;
+	    }
+	    $newset->insert($best);
+	}
+	return $newset;
+    };
+    foreach my $bct (@balanced_cts) {
+	my ($vmid, $cores, $cpuset) = @$bct;
+	my $rest = [];
+	foreach my $cpu (@allowed_cpus) {
+	    next if $cpuset->has($cpu);
+	    push @$rest, $cpu;
+	}
+	my $newset = &$fill_cpus($cores, $rest, $cpuset->members());
+	if (!$newset->is_equal($cpuset)) {
+	    syslog('info', "modified cpu set for lxc/$vmid: " .
+		   $newset->short_string());
+	    eval { $newset->write_to_cgroup("lxc/$vmid"); };
+	    warn $@ if $@;
+	}
+    }
+    if ($new_cores) {
+	$new_cores = $cpucount if $cpucount < $new_cores;
+	# split allowed cpus into virtual lists of current vs rest cpus:
+	my @virtual_members = @allowed_cpus[0..$new_cores-1];
+	my $virtual_rest = [@allowed_cpus[$new_cores..$cpucount-1]];
+	# account for the cpus we're supposedly using:
+	$cpu_ctcount[$_]++ foreach @virtual_members;
+	return &$fill_cpus($new_cores, $virtual_rest, @virtual_members);
+    }
+    return undef;

More information about the pve-devel mailing list