[RFC,v2,6/8] sched/tune: compute and keep track of per CPU boost value

Message ID 20161027174108.31139-7-patrick.bellasi@arm.com
State New
Headers show

Commit Message

Patrick Bellasi Oct. 27, 2016, 5:41 p.m.
When per task boosting is enabled, we can have multiple RUNNABLE tasks
which are concurrently scheduled on the same CPU but each one with a
different boost value.
For example, we could have a scenarios like this:

  Task   SchedTune CGroup   Boost Value
    T1               root            0
    T2       low-priority           10
    T3        interactive           90

In these conditions we expect a CPU to be configured according to a
proper "aggregation" of the required boost values for all the tasks
currently RUNNABLE on this CPU.

A simple aggregation function is the one which tracks the MAX boost
value for all the tasks RUNNABLE on a CPU. This approach allows to
always satisfy the most boost demanding task while at the same time:
 a) boosting all its co-scheduled tasks, thus reducing potential
    side-effects on most boost demanding tasks.
 b) reduces the number of frequency switch requested by schedutil,
    thus being more friendly to architectures with slow frequency
    switching times.

Every time a task enters/exits the RQ of a CPU the max boost value
should be updated considering all the boost groups currently "affecting"
that CPU, i.e. which have at least one RUNNABLE task currently allocated
on a RQ of that CPU.

This patch introduces the required support to keep track of the boost
groups currently affecting a CPU. Thanks to the limited number of boost
groups, a small and memory efficient per-cpu array of boost groups
values (cpu_boost_groups) is updated by schedtune_boostgroup_update()
but only when a schedtune CGroup boost value is changed. However, this
is expected to be an infrequent operation, perhaps done just one time at
system boot time, or whenever user-space need to tune the boost value
for a specific group of tasks (e.g. touch boost behavior on Android
systems).

Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Patrick Bellasi <patrick.bellasi@arm.com>

---
 kernel/sched/fair.c |  2 +-
 kernel/sched/tune.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/tune.h | 14 ++++++++++
 3 files changed, 88 insertions(+), 1 deletion(-)

-- 
2.10.1

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 26c3911..313a815 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5581,7 +5581,7 @@  schedtune_margin(unsigned long signal, unsigned int boost)
 static inline unsigned long
 schedtune_cpu_margin(unsigned long util, int cpu)
 {
-	unsigned int boost = get_sysctl_sched_cfs_boost();
+	unsigned int boost = schedtune_cpu_boost(cpu);
 
 	if (boost == 0)
 		return 0UL;
diff --git a/kernel/sched/tune.c b/kernel/sched/tune.c
index 4eaea1d..6a51a4d 100644
--- a/kernel/sched/tune.c
+++ b/kernel/sched/tune.c
@@ -104,6 +104,73 @@  struct boost_groups {
 /* Boost groups affecting each CPU in the system */
 DEFINE_PER_CPU(struct boost_groups, cpu_boost_groups);
 
+static void
+schedtune_cpu_update(int cpu)
+{
+	struct boost_groups *bg;
+	unsigned int boost_max;
+	int idx;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+
+	/* The root boost group is always active */
+	boost_max = bg->group[0].boost;
+	for (idx = 1; idx < boostgroups_max; ++idx) {
+		/*
+		 * A boost group affects a CPU only if it has
+		 * RUNNABLE tasks on that CPU
+		 */
+		if (bg->group[idx].tasks == 0)
+			continue;
+		boost_max = max(boost_max, bg->group[idx].boost);
+	}
+
+	bg->boost_max = boost_max;
+}
+
+static void
+schedtune_boostgroup_update(int idx, int boost)
+{
+	struct boost_groups *bg;
+	int cur_boost_max;
+	int old_boost;
+	int cpu;
+
+	/* Update per CPU boost groups */
+	for_each_possible_cpu(cpu) {
+		bg = &per_cpu(cpu_boost_groups, cpu);
+
+		/*
+		 * Keep track of current boost values to compute the per CPU
+		 * maximum only when it has been affected by the new value of
+		 * the updated boost group
+		 */
+		cur_boost_max = bg->boost_max;
+		old_boost = bg->group[idx].boost;
+
+		/* Update the boost value of this boost group */
+		bg->group[idx].boost = boost;
+
+		/* Check if this update increase current max */
+		if (boost > cur_boost_max && bg->group[idx].tasks) {
+			bg->boost_max = boost;
+			continue;
+		}
+
+		/* Check if this update has decreased current max */
+		if (cur_boost_max == old_boost && old_boost > boost)
+			schedtune_cpu_update(cpu);
+	}
+}
+
+int schedtune_cpu_boost(int cpu)
+{
+	struct boost_groups *bg;
+
+	bg = &per_cpu(cpu_boost_groups, cpu);
+	return bg->boost_max;
+}
+
 static u64
 boost_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
@@ -123,6 +190,9 @@  boost_write(struct cgroup_subsys_state *css, struct cftype *cft,
 	st->boost = boost;
 	if (css == &root_schedtune.css)
 		sysctl_sched_cfs_boost = boost;
+	/* Update CPU boost */
+	schedtune_boostgroup_update(st->idx, st->boost);
+
 	return 0;
 }
 
@@ -199,6 +269,9 @@  schedtune_css_alloc(struct cgroup_subsys_state *parent_css)
 static void
 schedtune_boostgroup_release(struct schedtune *st)
 {
+	/* Reset this boost group */
+	schedtune_boostgroup_update(st->idx, 0);
+
 	/* Keep track of allocated boost groups */
 	allocated_group[st->idx] = NULL;
 }
diff --git a/kernel/sched/tune.h b/kernel/sched/tune.h
index 515d02a..e936b91 100644
--- a/kernel/sched/tune.h
+++ b/kernel/sched/tune.h
@@ -10,4 +10,18 @@ 
 
 extern struct reciprocal_value schedtune_spc_rdiv;
 
+#ifdef CONFIG_CGROUP_SCHED_TUNE
+
+int schedtune_cpu_boost(int cpu);
+
+#else /* CONFIG_CGROUP_SCHED_TUNE */
+
+#define schedtune_cpu_boost(cpu)  get_sysctl_sched_cfs_boost()
+
+#endif /* CONFIG_CGROUP_SCHED_TUNE */
+
+#else /* CONFIG_SCHED_TUNE */
+
+#define schedtune_cpu_boost(cpu)  0
+
 #endif /* CONFIG_SCHED_TUNE */