diff mbox series

[RFC,2/2] cpufreq/schedutil: Remove iowait boost

Message ID 20240304201625.100619-3-christian.loehle@arm.com
State New
Headers show
Series Introduce per-task io utilization boost | expand

Commit Message

Christian Loehle March 4, 2024, 8:16 p.m. UTC
The previous commit provides a new cpu_util_cfs_boost_io interface for
schedutil which uses the io boosted utilization of the per-task
tracking strategy. Schedutil iowait boosting is therefore no longer
necessary so remove it.

Signed-off-by: Christian Loehle <christian.loehle@arm.com>
---
 kernel/sched/cpufreq_schedutil.c | 152 +------------------------------
 1 file changed, 5 insertions(+), 147 deletions(-)

Comments

Rafael J. Wysocki March 18, 2024, 2:07 p.m. UTC | #1
On Mon, Mar 4, 2024 at 9:17 PM Christian Loehle
<christian.loehle@arm.com> wrote:
>
> The previous commit provides a new cpu_util_cfs_boost_io interface for
> schedutil which uses the io boosted utilization of the per-task
> tracking strategy. Schedutil iowait boosting is therefore no longer
> necessary so remove it.

I'm wondering about the cases when schedutil is used without EAS.

Are they still going to be handled as before after this change?
Rafael J. Wysocki March 18, 2024, 5:08 p.m. UTC | #2
On Mon, Mar 18, 2024 at 5:40 PM Christian Loehle
<christian.loehle@arm.com> wrote:
>
> On 18/03/2024 14:07, Rafael J. Wysocki wrote:
> > On Mon, Mar 4, 2024 at 9:17 PM Christian Loehle
> > <christian.loehle@arm.com> wrote:
> >>
> >> The previous commit provides a new cpu_util_cfs_boost_io interface for
> >> schedutil which uses the io boosted utilization of the per-task
> >> tracking strategy. Schedutil iowait boosting is therefore no longer
> >> necessary so remove it.
> >
> > I'm wondering about the cases when schedutil is used without EAS.
> >
> > Are they still going to be handled as before after this change?
>
> Well they should still get boosted (under the new conditions) and according
> to my tests that does work.

OK

> Anything in particular you're worried about?

It is not particularly clear to me how exactly the boost is taken into
account without EAS.

> So in terms of throughput I see similar results with EAS and CAS+sugov.
> I'm happy including numbers in the cover letter for future versions, too.
> So far my intuition was that nobody would care enough to include them
> (as long as it generally still works).

Well, IMV clear understanding of the changes is more important.
Christian Loehle March 19, 2024, 1:58 p.m. UTC | #3
On 18/03/2024 17:08, Rafael J. Wysocki wrote:
> On Mon, Mar 18, 2024 at 5:40 PM Christian Loehle
> <christian.loehle@arm.com> wrote:
>>
>> On 18/03/2024 14:07, Rafael J. Wysocki wrote:
>>> On Mon, Mar 4, 2024 at 9:17 PM Christian Loehle
>>> <christian.loehle@arm.com> wrote:
>>>>
>>>> The previous commit provides a new cpu_util_cfs_boost_io interface for
>>>> schedutil which uses the io boosted utilization of the per-task
>>>> tracking strategy. Schedutil iowait boosting is therefore no longer
>>>> necessary so remove it.
>>>
>>> I'm wondering about the cases when schedutil is used without EAS.
>>>
>>> Are they still going to be handled as before after this change?
>>
>> Well they should still get boosted (under the new conditions) and according
>> to my tests that does work.
> 
> OK
> 
>> Anything in particular you're worried about?
> 
> It is not particularly clear to me how exactly the boost is taken into
> account without EAS.

So a quick rundown for now, I'll try to include something along the lines in
future versions then, too.
Every task_struct carries an io_boost_level in the range of [0..8] with it.
The boost is in units of utilization (w.r.t SCHED_CAPACITY_SCALE, independent
of CPU the task might be currently enqueued on).
The boost is taken into account for:
1. sugov frequency selection with
io_boost = cpu_util_io_boost(sg_cpu->cpu);
util = max(util, io_boost);

The io boost of all tasks enqueued on the rq will be max-aggregated with the
util here. (See cfs_rq->io_boost_tasks).

2. Task placement, for EAS in feec();
Otherwise select_idle_sibling() / select_idle_capacity() to ensure the CPU
satisfies the requested io_boost of the task to be enqueued.

Determining the io_boost_level is a bit more involved than with sugov's
implementation and happens in dequeue_io_boost(), hopefully that part
is reasonably understandable from the code.

Hope that helps.

Kind Regards,
Christian


> 
>> So in terms of throughput I see similar results with EAS and CAS+sugov.
>> I'm happy including numbers in the cover letter for future versions, too.
>> So far my intuition was that nobody would care enough to include them
>> (as long as it generally still works).
> 
> Well, IMV clear understanding of the changes is more important.
diff mbox series

Patch

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index cd0ca3cbd212..ed9fc88a74fc 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -6,8 +6,6 @@ 
  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
  */
 
-#define IOWAIT_BOOST_MIN	(SCHED_CAPACITY_SCALE / 8)
-
 struct sugov_tunables {
 	struct gov_attr_set	attr_set;
 	unsigned int		rate_limit_us;
@@ -42,10 +40,6 @@  struct sugov_cpu {
 	struct sugov_policy	*sg_policy;
 	unsigned int		cpu;
 
-	bool			iowait_boost_pending;
-	unsigned int		iowait_boost;
-	u64			last_update;
-
 	unsigned long		util;
 	unsigned long		bw_min;
 
@@ -195,141 +189,17 @@  unsigned long sugov_effective_cpu_perf(int cpu, unsigned long actual,
 	return max(min, max);
 }
 
-static void sugov_get_util(struct sugov_cpu *sg_cpu, unsigned long boost)
+static void sugov_get_util(struct sugov_cpu *sg_cpu)
 {
 	unsigned long min, max, util = cpu_util_cfs_boost(sg_cpu->cpu);
 	unsigned long io_boost = cpu_util_io_boost(sg_cpu->cpu);
 
-	/*
-	 * XXX: This already includes io boost now, makes little sense with
-	 * sugov iowait boost on top
-	 */
 	util = max(util, io_boost);
 	util = effective_cpu_util(sg_cpu->cpu, util, &min, &max);
-	util = max(util, boost);
 	sg_cpu->bw_min = min;
 	sg_cpu->util = sugov_effective_cpu_perf(sg_cpu->cpu, util, min, max);
 }
 
-/**
- * sugov_iowait_reset() - Reset the IO boost status of a CPU.
- * @sg_cpu: the sugov data for the CPU to boost
- * @time: the update time from the caller
- * @set_iowait_boost: true if an IO boost has been requested
- *
- * The IO wait boost of a task is disabled after a tick since the last update
- * of a CPU. If a new IO wait boost is requested after more then a tick, then
- * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy
- * efficiency by ignoring sporadic wakeups from IO.
- */
-static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time,
-			       bool set_iowait_boost)
-{
-	s64 delta_ns = time - sg_cpu->last_update;
-
-	/* Reset boost only if a tick has elapsed since last request */
-	if (delta_ns <= TICK_NSEC)
-		return false;
-
-	sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0;
-	sg_cpu->iowait_boost_pending = set_iowait_boost;
-
-	return true;
-}
-
-/**
- * sugov_iowait_boost() - Updates the IO boost status of a CPU.
- * @sg_cpu: the sugov data for the CPU to boost
- * @time: the update time from the caller
- * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait
- *
- * Each time a task wakes up after an IO operation, the CPU utilization can be
- * boosted to a certain utilization which doubles at each "frequent and
- * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization
- * of the maximum OPP.
- *
- * To keep doubling, an IO boost has to be requested at least once per tick,
- * otherwise we restart from the utilization of the minimum OPP.
- */
-static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
-			       unsigned int flags)
-{
-	bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT;
-
-	/* Reset boost if the CPU appears to have been idle enough */
-	if (sg_cpu->iowait_boost &&
-	    sugov_iowait_reset(sg_cpu, time, set_iowait_boost))
-		return;
-
-	/* Boost only tasks waking up after IO */
-	if (!set_iowait_boost)
-		return;
-
-	/* Ensure boost doubles only one time at each request */
-	if (sg_cpu->iowait_boost_pending)
-		return;
-	sg_cpu->iowait_boost_pending = true;
-
-	/* Double the boost at each request */
-	if (sg_cpu->iowait_boost) {
-		sg_cpu->iowait_boost =
-			min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE);
-		return;
-	}
-
-	/* First wakeup after IO: start with minimum boost */
-	sg_cpu->iowait_boost = IOWAIT_BOOST_MIN;
-}
-
-/**
- * sugov_iowait_apply() - Apply the IO boost to a CPU.
- * @sg_cpu: the sugov data for the cpu to boost
- * @time: the update time from the caller
- * @max_cap: the max CPU capacity
- *
- * A CPU running a task which woken up after an IO operation can have its
- * utilization boosted to speed up the completion of those IO operations.
- * The IO boost value is increased each time a task wakes up from IO, in
- * sugov_iowait_apply(), and it's instead decreased by this function,
- * each time an increase has not been requested (!iowait_boost_pending).
- *
- * A CPU which also appears to have been idle for at least one tick has also
- * its IO boost utilization reset.
- *
- * This mechanism is designed to boost high frequently IO waiting tasks, while
- * being more conservative on tasks which does sporadic IO operations.
- */
-static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time,
-			       unsigned long max_cap)
-{
-	/* No boost currently required */
-	if (!sg_cpu->iowait_boost)
-		return 0;
-
-	/* Reset boost if the CPU appears to have been idle enough */
-	if (sugov_iowait_reset(sg_cpu, time, false))
-		return 0;
-
-	if (!sg_cpu->iowait_boost_pending) {
-		/*
-		 * No boost pending; reduce the boost value.
-		 */
-		sg_cpu->iowait_boost >>= 1;
-		if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) {
-			sg_cpu->iowait_boost = 0;
-			return 0;
-		}
-	}
-
-	sg_cpu->iowait_boost_pending = false;
-
-	/*
-	 * sg_cpu->util is already in capacity scale; convert iowait_boost
-	 * into the same scale so we can compare.
-	 */
-	return (sg_cpu->iowait_boost * max_cap) >> SCHED_CAPACITY_SHIFT;
-}
-
 #ifdef CONFIG_NO_HZ_COMMON
 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
 {
@@ -357,18 +227,12 @@  static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
 					      u64 time, unsigned long max_cap,
 					      unsigned int flags)
 {
-	unsigned long boost;
-
-	sugov_iowait_boost(sg_cpu, time, flags);
-	sg_cpu->last_update = time;
-
 	ignore_dl_rate_limit(sg_cpu);
 
 	if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
 		return false;
 
-	boost = sugov_iowait_apply(sg_cpu, time, max_cap);
-	sugov_get_util(sg_cpu, boost);
+	sugov_get_util(sg_cpu);
 
 	return true;
 }
@@ -458,7 +322,7 @@  static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
 	sg_cpu->sg_policy->last_freq_update_time = time;
 }
 
-static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
+static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
@@ -469,11 +333,8 @@  static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 
 	for_each_cpu(j, policy->cpus) {
 		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
-		unsigned long boost;
-
-		boost = sugov_iowait_apply(j_sg_cpu, time, max_cap);
-		sugov_get_util(j_sg_cpu, boost);
 
+		sugov_get_util(j_sg_cpu);
 		util = max(j_sg_cpu->util, util);
 	}
 
@@ -489,13 +350,10 @@  sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
 
 	raw_spin_lock(&sg_policy->update_lock);
 
-	sugov_iowait_boost(sg_cpu, time, flags);
-	sg_cpu->last_update = time;
-
 	ignore_dl_rate_limit(sg_cpu);
 
 	if (sugov_should_update_freq(sg_policy, time)) {
-		next_f = sugov_next_freq_shared(sg_cpu, time);
+		next_f = sugov_next_freq_shared(sg_cpu);
 
 		if (!sugov_update_next_freq(sg_policy, time, next_f))
 			goto unlock;