diff mbox

[RFC,13/16] sched: Take task wakeups into account in energy estimates

Message ID 1400869003-27769-14-git-send-email-morten.rasmussen@arm.com
State New
Headers show

Commit Message

Morten Rasmussen May 23, 2014, 6:16 p.m. UTC
The energy cost of waking a cpu and sending it back to sleep can be
quite significant for short running frequently waking tasks if placed on
an idle cpu in a deep sleep state. By factoring task wakeups in such
tasks can be placed on cpus where the wakeup energy cost is lower. For
example, partly utilized cpus in a shallower idle state, or cpus in a
cluster/die that is already awake.

Current cpu utilization of the target cpu is factored in guess how many
task wakeups that translate into cpu wakeups (idle exits). It is a very
naive approach, but it is virtually impossible to get an accurate estimate.

wake_energy(task) = unused_util(cpu) * wakeups(task) * wakeup_energy(cpu)

There is no per cpu wakeup tracking, so we can't estimate the energy
savings when removing tasks from a cpu. It is also nearly impossible to
figure out which task is the cause of cpu wakeups if multiple tasks are
scheduled on the same cpu.

Support for multiple idle-states per sched_group (e.g. WFI and core
shutdown on ARM) is not implemented yet. wakeup_energy in struct
sched_energy needs to be a table instead and cpuidle needs to tells
what the most likely state is.

Signed-off-by: Morten Rasmussen <morten.rasmussen@arm.com>
---
 kernel/sched/fair.c |   19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)
diff mbox

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 39e9cd8..5a52467 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4271,11 +4271,13 @@  static void find_max_util(const struct cpumask *mask, int cpu, int util,
  *				+ 1-curr_util(sg) * idle_power(sg)
  *	energy_after = new_util(sg) * busy_power(sg)
  *				+ 1-new_util(sg) * idle_power(sg)
+ *				+ new_util(sg) * task_wakeups
+ *							* wakeup_energy(sg)
  *	energy_diff += energy_before - energy_after
  * }
  *
  */
-static int energy_diff_util(int cpu, int util)
+static int energy_diff_util(int cpu, int util, int wakeups)
 {
 	struct sched_domain *sd;
 	int i;
@@ -4368,7 +4370,8 @@  static int energy_diff_util(int cpu, int util)
 		 * The utilization change has no impact at this level (or any
 		 * parent level).
 		 */
-		if (aff_util_bef == aff_util_aft && curr_cap_idx == new_cap_idx)
+		if (aff_util_bef == aff_util_aft && curr_cap_idx == new_cap_idx
+				&& unused_util_aft < 100)
 			goto unlock;
 
 		/* Energy before */
@@ -4380,6 +4383,13 @@  static int energy_diff_util(int cpu, int util)
 		energy_diff += (aff_util_aft*new_state->power)/new_state->cap;
 		energy_diff += (unused_util_aft * sge->idle_power)
 				/new_state->cap;
+		/*
+		 * Estimate how many of the wakeups that happens while cpu is
+		 * idle assuming they are uniformly distributed. Ignoring
+		 * wakeups caused by other tasks.
+		 */
+		energy_diff += (wakeups * sge->wakeup_energy >> 10)
+				* unused_util_aft/new_state->cap;
 	}
 
 	/*
@@ -4410,6 +4420,8 @@  static int energy_diff_util(int cpu, int util)
 		energy_diff += (aff_util_aft*new_state->power)/new_state->cap;
 		energy_diff += (unused_util_aft * sse->idle_power)
 				/new_state->cap;
+		energy_diff += (wakeups * sse->wakeup_energy >> 10)
+				* unused_util_aft/new_state->cap;
 	}
 
 unlock:
@@ -4420,7 +4432,8 @@  unlock:
 
 static int energy_diff_task(int cpu, struct task_struct *p)
 {
-	return energy_diff_util(cpu, p->se.avg.load_avg_contrib);
+	return energy_diff_util(cpu, p->se.avg.load_avg_contrib,
+			p->se.avg.wakeup_avg_sum);
 }
 
 #else