diff mbox series

[4/7] cgroup: Constrain 'sched_load_balance' flag when DL tasks are present

Message ID 1502918443-30169-5-git-send-email-mathieu.poirier@linaro.org
State New
Headers show
Series sched/deadline: fix cpusets bandwidth accounting | expand

Commit Message

Mathieu Poirier Aug. 16, 2017, 9:20 p.m. UTC
This patch prevents the 'sched_load_balance' flag from being fippled off
when DL tasks are present in a CPUset.  Otherwise we end up with the
DL tasks using CPUs belonging to different root domains, something that
breaks the mathematical model behind DL bandwidth management.

For example on a 4 core system CPUset "set1" has been created and CPUs
0 and 1 assigned to it.  A DL task has also been spun off.  By default
the DL task can use all the CPUs in the default CPUset.

If we set the base CPUset's cpuset.sched_load_balance to '0', CPU 0 and 1
are added to a newly created root domain while CPU 2 and 3 endup in the
default root domain.  But the DL task is still part of the base CPUset and
as such can use CPUs 0 to 3, spanning at the same time more than one root
domain.

Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>

---
 kernel/cgroup/cpuset.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

-- 
2.7.4
diff mbox series

Patch

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index f6d1e485dc2d..18df143b4013 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -447,6 +447,85 @@  static void free_trial_cpuset(struct cpuset *trial)
 	kfree(trial);
 }
 
+static bool cpuset_has_dl_tasks(struct cpuset *cs)
+{
+	bool dl_tasks = false;
+	struct css_task_iter it;
+	struct task_struct *task;
+
+	/* Go through each task in @cs looking for a DL task */
+	css_task_iter_start(&cs->css, &it);
+
+	while (!dl_tasks && (task = css_task_iter_next(&it))) {
+		if (dl_task(task))
+			dl_tasks = true;
+	}
+
+	css_task_iter_end(&it);
+
+	return dl_tasks;
+}
+
+/*
+ * Assumes RCU read lock and cpuset_mutex are held.
+ */
+static int validate_dl_change(struct cpuset *cur, struct cpuset *trial)
+{
+	bool populated = false, dl_tasks = false;
+	int ret = -EBUSY;
+	struct cgroup_subsys_state *pos_css;
+	struct cpuset *cs;
+
+	/*
+	 * The cpuset.sched_load_balance flag is flipped off on
+	 * the current cpuset.
+	 */
+	if (is_sched_load_balance(cur) &&
+	    !is_sched_load_balance(trial)) {
+		/* See if at least one descendant cpuset is populated */
+		cpuset_for_each_descendant_pre(cs, pos_css, cur) {
+			/* Skip over ourselve */
+			if (cs == cur)
+				continue;
+
+			/* Empty cpusets are of no interest */
+			if (cpumask_empty(cs->cpus_allowed)) {
+				pos_css = css_rightmost_descendant(pos_css);
+				continue;
+			}
+
+			/*
+			 * @cur has at least one children and CPUs have been
+			 * assigned to it - there is no need to go further.
+			 */
+			populated = true;
+			break;
+		}
+
+		dl_tasks = cpuset_has_dl_tasks(cur);
+
+		/*
+		 * This CPUset has a children that is populated by (at least)
+		 * one CPU.  When the sched_load_balance flag gets flipped off
+		 * it will create a new root domain for the children CPUset,
+		 * and that new root domain will include the CPUs assigned to
+		 * the CPUset.
+		 *
+		 * Since the tasks in the current CPUset have not been assigned
+		 * to the children CPUset they will simply stay here and use
+		 * all the CPUs available in this set.  For DL tasks this can't
+		 * be allowed since they will be executing on CPUs associated to
+		 * more than one root domains.
+		 */
+		if (populated && dl_tasks)
+			goto out;
+	}
+
+	ret = 0;
+out:
+	return ret;
+}
+
 /*
  * validate_change() - Used to validate that any proposed cpuset change
  *		       follows the structural rules for cpusets.
@@ -481,6 +560,9 @@  static int validate_change(struct cpuset *cur, struct cpuset *trial)
 		if (!is_cpuset_subset(c, trial))
 			goto out;
 
+	if (validate_dl_change(cur, trial))
+		goto out;
+
 	/* Remaining checks don't apply to root cpuset */
 	ret = 0;
 	if (cur == &top_cpuset)