[RFC,1/1] Weighted approach to gather and use history in TEO governor

Message ID 20200511141055.43029-2-psampat@linux.ibm.com
State New
Headers show
Series
  • [RFC,1/1] Weighted approach to gather and use history in TEO governor
Related show

Commit Message

Pratik R. Sampat May 11, 2020, 2:10 p.m.
Complementing the current self correcting window algorithm, an alternate
approach is devised to leverage history based on probability of
occurrence of states

Each CPU maintains a matrix wherein each idle state maintains a
probability distribution for itself and the other corresponding states.

The probability distribution is nothing but a n*n matrix, where
n = drv->state_count.
Each entry in the array signifies a weight for that row.
The weights can vary from the range [0-10000].

For example:
state_mat[2][1] = 3000 means that when state 2 is entered idle with, the
probability that the interval will last long enough to satisfy state 1's
residency is 30%.
The trailing zeros correspond to having more resolution while increasing
or reducing the weights for correction.

Initially the weights are distributed in a way such that the index of
that state in question has a higher probability of choosing itself, as
we have no reason to believe otherwise yet. Initial bias to itself is
60% and the rest 40% is equally distributed to the rest of the states.

Selection of an idle state:
When the TEO governor chooses an idle state, the probability
distribution for that state is looked at. A weighted random number
generator is used using the weights as bias to choose the next idle
state. The algorithm leans to choose that or a shallower state than that
for its next prediction

Correction of the probability distribution:
On wakeup, the weights are updated. The state which it should have woken
up with (could be the hit / miss / early hit state) is increased in
weight by the "LEARNING_RATE" % and the rest of the states for that
index are reduced by the same factor.
The LEARNING RATE is experimentally chosen to be 10 %

Signed-off-by: Pratik Rajesh Sampat <psampat@linux.ibm.com>
---
 drivers/cpuidle/governors/teo.c | 96 +++++++++++++++++++++++++++++++--
 1 file changed, 91 insertions(+), 5 deletions(-)

Patch

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index de7e706efd46..84058d797b43 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -50,6 +50,7 @@ 
 #include <linux/kernel.h>
 #include <linux/sched/clock.h>
 #include <linux/tick.h>
+#include <linux/random.h>
 
 /*
  * The PULSE value is added to metrics when they grow and the DECAY_SHIFT value
@@ -64,6 +65,12 @@ 
  */
 #define INTERVALS	8
 
+/*
+ * Percentage of the amount of weight to be shifted in the idle state weight
+ * distribution for correction
+ */
+#define LEARNING_RATE	10
+
 /**
  * struct teo_idle_state - Idle state data used by the TEO cpuidle governor.
  * @early_hits: "Early" CPU wakeups "matching" this state.
@@ -98,6 +105,8 @@  struct teo_idle_state {
  * @states: Idle states data corresponding to this CPU.
  * @interval_idx: Index of the most recent saved idle interval.
  * @intervals: Saved idle duration values.
+ * @state_mat: Each idle state maintains a weights corresponding to that
+ * state, storing the probability distribution of occurrence for that state
  */
 struct teo_cpu {
 	u64 time_span_ns;
@@ -105,6 +114,7 @@  struct teo_cpu {
 	struct teo_idle_state states[CPUIDLE_STATE_MAX];
 	int interval_idx;
 	u64 intervals[INTERVALS];
+	int state_mat[CPUIDLE_STATE_MAX][CPUIDLE_STATE_MAX];
 };
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
@@ -117,7 +127,8 @@  static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
 static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
-	int i, idx_hit = -1, idx_timer = -1;
+	int i, idx_hit = -1, idx_timer = -1, idx = -1;
+	int last_idx = dev->last_state_idx;
 	u64 measured_ns;
 
 	if (cpu_data->time_span_ns >= cpu_data->sleep_length_ns) {
@@ -183,16 +194,50 @@  static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 		if (idx_timer > idx_hit) {
 			misses += PULSE;
-			if (idx_hit >= 0)
+			idx = idx_timer;
+			if (idx_hit >= 0) {
 				cpu_data->states[idx_hit].early_hits += PULSE;
+				idx = idx_hit;
+			}
 		} else {
 			hits += PULSE;
+			idx = last_idx;
 		}
 
 		cpu_data->states[idx_timer].misses = misses;
 		cpu_data->states[idx_timer].hits = hits;
 	}
 
+	/*
+	 * Rearrange the weight distribution of the state, increase the weight
+	 * by the LEARNING RATE % for the idle state that was supposed to be
+	 * chosen and reduce by the same amount for rest of the states
+	 *
+	 * If the weights are greater than (100 - LEARNING_RATE) % or lesser
+	 * than LEARNING_RATE %, do not increase or decrease the confidence
+	 * respectively
+	 */
+	for (i = 0; i < drv->state_count; i++) {
+		unsigned int delta;
+
+		if (idx == -1)
+			break;
+		if (i ==  idx) {
+			delta = (LEARNING_RATE * cpu_data->state_mat[last_idx][i]) / 100;
+			if (cpu_data->state_mat[last_idx][i] + delta >=
+			    (100 - LEARNING_RATE) * 100)
+				continue;
+			cpu_data->state_mat[last_idx][i] += delta;
+			continue;
+		}
+		delta = (LEARNING_RATE * cpu_data->state_mat[last_idx][i]) /
+			((drv->state_count - 1) * 100);
+		if (cpu_data->state_mat[last_idx][i] - delta <=
+		    LEARNING_RATE * 100)
+			continue;
+		cpu_data->state_mat[last_idx][i] -= delta;
+	}
+
 	/*
 	 * Save idle duration values corresponding to non-timer wakeups for
 	 * pattern detection.
@@ -244,7 +289,7 @@  static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	s64 latency_req = cpuidle_governor_latency_req(dev->cpu);
 	u64 duration_ns;
 	unsigned int hits, misses, early_hits;
-	int max_early_idx, prev_max_early_idx, constraint_idx, idx, i;
+	int max_early_idx, prev_max_early_idx, constraint_idx, idx, i, og_idx;
 	ktime_t delta_tick;
 
 	if (dev->last_state_idx >= 0) {
@@ -374,10 +419,14 @@  static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	if (constraint_idx < idx)
 		idx = constraint_idx;
 
+	og_idx = idx;
+
 	if (idx < 0) {
 		idx = 0; /* No states enabled. Must use 0. */
 	} else if (idx > 0) {
-		unsigned int count = 0;
+		unsigned int weights_list[CPUIDLE_STATE_MAX];
+		unsigned int i, j = 0, rnd_wt, rnd_num = 0;
+		unsigned int count = 0, sum_weights = 0;
 		u64 sum = 0;
 
 		/*
@@ -412,6 +461,28 @@  static int teo_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 								       idx, avg_ns);
 			}
 		}
+		/*
+		 * In case, the recent history yields a shallower state, then
+		 * the probability distribution is looked at.
+		 * The weighted random number generator uses the weights as a
+		 * bias to choose the next idle state
+		 */
+		if (og_idx != idx) {
+			for (i = 0; i <= idx; i++) {
+				if (dev->states_usage[i].disable)
+					continue;
+				sum_weights += cpu_data->state_mat[idx][i];
+				weights_list[j++] = sum_weights;
+			}
+			get_random_bytes(&rnd_num, sizeof(rnd_num));
+			rnd_num = rnd_num % 100;
+			rnd_wt = (rnd_num * sum_weights) / 100;
+			for (i = 0; i < j; i++) {
+				if (rnd_wt < weights_list[i])
+					break;
+			}
+			idx = i;
+		}
 	}
 
 	/*
@@ -468,13 +539,28 @@  static int teo_enable_device(struct cpuidle_driver *drv,
 			     struct cpuidle_device *dev)
 {
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
-	int i;
+	int i, j;
 
 	memset(cpu_data, 0, sizeof(*cpu_data));
 
 	for (i = 0; i < INTERVALS; i++)
 		cpu_data->intervals[i] = U64_MAX;
 
+	/*
+	 * Populate initial weights for each state
+	 * The stop state is initially more biased for itself.
+	 *
+	 * Currently the initial distribution of probabilities are 70%-30%.
+	 * The trailing 0s are for increased resolution.
+	 */
+	for (i = 0; i < drv->state_count; i++) {
+		for (j = 0; j < drv->state_count; j++) {
+			if (i == j)
+				cpu_data->state_mat[i][j] = 6000;
+			else
+				cpu_data->state_mat[i][j] = 4000 / (drv->state_count - 1);
+		}
+	}
 	return 0;
 }