diff mbox

[4/4] sched: Idle task shortcut optimization

Message ID 1389949444-14821-4-git-send-email-daniel.lezcano@linaro.org
State New
Headers show

Commit Message

Daniel Lezcano Jan. 17, 2014, 9:04 a.m. UTC
With the previous patch, we have no ambiguity on going to idle. So we can pick
directly the idle task instead of looking up all the domains which will in any
case return no task except the idle_task.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 kernel/sched/core.c      |   43 ++++++++++++++++++++++++++++++++++++++-----
 kernel/sched/idle_task.c |   15 +++++----------
 2 files changed, 43 insertions(+), 15 deletions(-)

Comments

Daniel Lezcano Jan. 17, 2014, 3:06 p.m. UTC | #1
On 01/17/2014 03:26 PM, Peter Zijlstra wrote:
> On Fri, Jan 17, 2014 at 10:04:04AM +0100, Daniel Lezcano wrote:
>> @@ -2679,11 +2715,8 @@ need_resched:
>>
>>   	pre_schedule(rq, prev);
>>
>> -	if (unlikely(!rq->nr_running))
>> -		rq->idle_stamp = idle_balance(rq) ? 0 : rq_clock(rq);
>> -
>>   	put_prev_task(rq, prev);
>> -	next = pick_next_task(rq);
>> +	next = pick_next_task_or_idle(rq);
>>   	clear_tsk_need_resched(prev);
>>   	clear_preempt_need_resched();
>>   	rq->skip_clock_update = 0;
>
> I have vague memories that we need to have idle_balance() before
> put_prev_task(), but I can't recollect why this would be so.
>
> That said, if I resurrect these patches:
>
>    https://lkml.org/lkml/2012/6/14/271
>
> I suppose we could write something like:
>
> struct task_struct *pick_next_task(struct rq *rq, struct task_struct *prev)
> {
> 	const struct sched_class *class;
> 	struct task_struct *p;
>
> again:
> 	if (likely(rq->nr_running)) {
>
> 		if (likely(rq->nr_running == rq->cfs.h_nr_running))
> 			return fair_sched_class.pick_next_task(rq, prev);
>
> 		for_each_class(class) {
> 			p = class->pick_next_task(rq, prev);
> 			if (p)
> 				return p;
> 		}
> 	}
>
> 	if (idle_balance(rq))
> 		goto again;
>
> 	rq->idle_stamp = rq_clock(rq);
>
> 	return idle_sched_class.pick_next_task(rq, prev);
> }
>
> Which keeps idle_balance() before put_prev_task(), and by using
> idle_sched_clas.pick_next_task() doesn't rape the idle class interface
> like you did :-)

But put_prev_task is called before pick_next_task, so idle_balance() is 
called after now, no  ?
Daniel Lezcano Jan. 17, 2014, 3:09 p.m. UTC | #2
On 01/17/2014 03:09 PM, Peter Zijlstra wrote:
> On Fri, Jan 17, 2014 at 10:04:04AM +0100, Daniel Lezcano wrote:
>> +	schedstat_inc(rq, sched_goidle);
>> +#ifdef CONFIG_SMP
>> +	/* Trigger the post schedule to do an idle_enter for CFS */
>> +	rq->post_schedule = 1;
>> +#endif
>> +	return rq->idle;
>
> Urgh, that retains the stupid idle crap like it is.
>
> I've not yet tested this, but is there a reason something like the below
> couldn't work?

I tested it by running hackbench and it seems to work as expected.

> ---
> Subject: sched: Clean up idle task SMP logic
> From: Peter Zijlstra <peterz@infradead.org>
> Date: Fri Jan 17 14:54:02 CET 2014
>
> The idle post_schedule hook is just a vile waste of time, fix it proper.
>
> Signed-off-by: Peter Zijlstra <peterz@infradead.org>
> ---
>   kernel/sched/fair.c      |    5 +++--
>   kernel/sched/idle_task.c |   21 ++++++---------------
>   2 files changed, 9 insertions(+), 17 deletions(-)
>
> Index: linux-2.6/kernel/sched/fair.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched/fair.c
> +++ linux-2.6/kernel/sched/fair.c
> @@ -2416,7 +2416,8 @@ void idle_exit_fair(struct rq *this_rq)
>   	update_rq_runnable_avg(this_rq, 0);
>   }
>
> -#else
> +#else /* CONFIG_SMP */
> +
>   static inline void update_entity_load_avg(struct sched_entity *se,
>   					  int update_cfs_rq) {}
>   static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {}
> @@ -2428,7 +2429,7 @@ static inline void dequeue_entity_load_a
>   					   int sleep) {}
>   static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq,
>   					      int force_update) {}
> -#endif
> +#endif /* CONFIG_SMP */
>
>   static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
>   {
> Index: linux-2.6/kernel/sched/idle_task.c
> ===================================================================
> --- linux-2.6.orig/kernel/sched/idle_task.c
> +++ linux-2.6/kernel/sched/idle_task.c
> @@ -13,18 +13,8 @@ select_task_rq_idle(struct task_struct *
>   {
>   	return task_cpu(p); /* IDLE tasks as never migrated */
>   }
> -
> -static void pre_schedule_idle(struct rq *rq, struct task_struct *prev)
> -{
> -	idle_exit_fair(rq);
> -	rq_last_tick_reset(rq);
> -}
> -
> -static void post_schedule_idle(struct rq *rq)
> -{
> -	idle_enter_fair(rq);
> -}
>   #endif /* CONFIG_SMP */
> +
>   /*
>    * Idle tasks are unconditionally rescheduled:
>    */
> @@ -37,8 +27,7 @@ static struct task_struct *pick_next_tas
>   {
>   	schedstat_inc(rq, sched_goidle);
>   #ifdef CONFIG_SMP
> -	/* Trigger the post schedule to do an idle_enter for CFS */
> -	rq->post_schedule = 1;
> +	idle_enter_fair(rq);
>   #endif
>   	return rq->idle;
>   }
> @@ -58,6 +47,10 @@ dequeue_task_idle(struct rq *rq, struct
>
>   static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
>   {
> +#ifdef CONFIG_SMP
> +	idle_exit_fair(rq);
> +	rq_last_tick_reset(rq);
> +#endif
>   }
>
>   static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
> @@ -101,8 +94,6 @@ const struct sched_class idle_sched_clas
>
>   #ifdef CONFIG_SMP
>   	.select_task_rq		= select_task_rq_idle,
> -	.pre_schedule		= pre_schedule_idle,
> -	.post_schedule		= post_schedule_idle,
>   #endif
>
>   	.set_curr_task          = set_curr_task_idle,
>
Daniel Lezcano Jan. 17, 2014, 3:26 p.m. UTC | #3
On 01/17/2014 04:23 PM, Peter Zijlstra wrote:
> On Fri, Jan 17, 2014 at 04:06:45PM +0100, Daniel Lezcano wrote:
>>> I suppose we could write something like:
>>>
>>> struct task_struct *pick_next_task(struct rq *rq, struct task_struct *prev)
>>> {
>>> 	const struct sched_class *class;
>>> 	struct task_struct *p;
>>>
>>> again:
>>> 	if (likely(rq->nr_running)) {
>>>
>>> 		if (likely(rq->nr_running == rq->cfs.h_nr_running))
>>> 			return fair_sched_class.pick_next_task(rq, prev);
>>>
>>> 		for_each_class(class) {
>>> 			p = class->pick_next_task(rq, prev);
>>> 			if (p)
>>> 				return p;
>>> 		}
>>> 	}
>>>
>>> 	if (idle_balance(rq))
>>> 		goto again;
>>>
>>> 	rq->idle_stamp = rq_clock(rq);
>>>
>>> 	return idle_sched_class.pick_next_task(rq, prev);
>>> }
>>>
>>> Which keeps idle_balance() before put_prev_task(), and by using
>>> idle_sched_clas.pick_next_task() doesn't rape the idle class interface
>>> like you did :-)
>>
>> But put_prev_task is called before pick_next_task, so idle_balance() is
>> called after now, no  ?
>
> No, put_prev_task() is called by the pick_next_task() that returns a
> task. So in the idle case above, the idle_sched_class.pick_next_task()
> will do the required put_prev_task().

Ah, ok. Let me try it.
Daniel Lezcano Jan. 17, 2014, 4:06 p.m. UTC | #4
On 01/17/2014 04:33 PM, Peter Zijlstra wrote:
> On Fri, Jan 17, 2014 at 04:26:13PM +0100, Daniel Lezcano wrote:
>>
>> Ah, ok. Let me try it.
>>
>
> http://programming.kicks-ass.net/sekrit/patches.tar.bz2
>
> has a queue that applies to tip/master.
>
> The patches as on lkml need a little help in applying.
>
> They've not been near a compiler yet though :/

Thanks a lot for the series. That makes my life easier :)

I did a small compilation fix with the pick_next_task function for the 
deadline scheduler and currently hackbench is running. I will give you 
the results in a moment.
Daniel Lezcano Jan. 17, 2014, 4:37 p.m. UTC | #5
On 01/17/2014 04:33 PM, Peter Zijlstra wrote:
> On Fri, Jan 17, 2014 at 04:26:13PM +0100, Daniel Lezcano wrote:
>>
>> Ah, ok. Let me try it.
>>
>
> http://programming.kicks-ass.net/sekrit/patches.tar.bz2
>
> has a queue that applies to tip/master.
>
> The patches as on lkml need a little help in applying.
>
> They've not been near a compiler yet though :/
>

Here are the results:

Col1 : tip/sched/core
Col2 : the patchset above
Col3 : the patchset above + idle task shortcut

hackbench -s 4096 -l 1000 -g 10 -f 40 -T
  33.306 32.720 31.902
  32.344	32.139 32.214
  33.342	33.281 33.056
  33.319	33.421 31.789
  32.325	32.540 31.941
  33.701	32.978 32.229
  34.981	32.418 30.218
  32.379	31.656 31.717
  32.135	32.241 32.812
  32.531	32.790 31.967

avg:
  33.036 32.618 31.984

hackbench -p -s 4096 -l 1000 -g 10 -f 40
  27.595 27.601 26.331
  25.192	29.336 30.222
  26.057	28.579 28.351
  28.397	29.419 28.554
  27.628	25.045 30.470
  28.976	28.027 29.823
  28.764	29.361 26.902
  27.632	30.101 26.285
  29.129	29.248 26.975
  26.020	25.047 29.324

avg:
  27.539 28.176 28.324

hackbench -P -s 4096 -l 1000 -g 10 -f 40
  32.788 32.057 30.691
  33.158	33.251 34.315
  32.713	33.076 32.880
  32.488	32.878 32.792
  31.240	33.003 32.958
  32.493	33.096 32.244
  33.940	33.660 31.550
  34.733	33.777 31.829
  32.598	34.117 33.714
  34.091	33.196 31.336
avg:
  33.024 33.211 32.431
Daniel Lezcano Jan. 21, 2014, 8:41 a.m. UTC | #6
On 01/17/2014 04:33 PM, Peter Zijlstra wrote:
> On Fri, Jan 17, 2014 at 04:26:13PM +0100, Daniel Lezcano wrote:
>>
>> Ah, ok. Let me try it.
>>
>
> http://programming.kicks-ass.net/sekrit/patches.tar.bz2
>
> has a queue that applies to tip/master.
>
> The patches as on lkml need a little help in applying.
>
> They've not been near a compiler yet though :/

Hi Peter,

do you want me to resend the full serie with your patchset included ?

Thanks
   -- Daniel
Daniel Lezcano Jan. 21, 2014, 9:28 a.m. UTC | #7
On 01/21/2014 10:06 AM, Peter Zijlstra wrote:
> On Tue, Jan 21, 2014 at 09:41:31AM +0100, Daniel Lezcano wrote:
>> On 01/17/2014 04:33 PM, Peter Zijlstra wrote:
>>> On Fri, Jan 17, 2014 at 04:26:13PM +0100, Daniel Lezcano wrote:
>>>>
>>>> Ah, ok. Let me try it.
>>>>
>>>
>>> http://programming.kicks-ass.net/sekrit/patches.tar.bz2
>>>
>>> has a queue that applies to tip/master.
>>>
>>> The patches as on lkml need a little help in applying.
>>>
>>> They've not been near a compiler yet though :/
>>
>> Hi Peter,
>>
>> do you want me to resend the full serie with your patchset included ?
>
> Nah, I'll send it out, I wanted PJT or Ben to look over one more patch I
> did, meant to do that yesterday but you know how those things go :-)

Yeah :)

Thanks !

   -- Daniel
diff mbox

Patch

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b7e3635..c7a8f4e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2586,7 +2586,43 @@  pick_next_task(struct rq *rq)
 			return p;
 	}
 
-	BUG(); /* the idle class will always have a runnable task */
+	/*
+	 * We must have at least one task to run, the idle task is
+	 * returned by the shortcut in pick_next_task_or_idle. If we
+	 * fall here, we don't have any task to run, so something is
+	 * wrong when we thought the cpu was not going to idle.
+	 */
+	BUG();
+}
+
+static inline struct task_struct *pick_next_task_or_idle(struct rq *rq)
+{
+	if (likely(rq->nr_running))
+		return pick_next_task(rq);
+
+	rq->idle_stamp = 0;
+
+	/*
+	 * If there is a task balanced on this cpu, pick the next task,
+	 * otherwise fall in the optimization by picking the idle task
+	 * directly.
+	 */
+	if (idle_balance(rq))
+		return pick_next_task(rq);
+
+	rq->idle_stamp = rq_clock(rq);
+
+	/*
+	 * Optimization: pick_next_task will return rq->idle in any case but
+	 * after walking through the different sched domains. Let's add a
+	 * shortcut to directly return the idle task.
+	 */
+	schedstat_inc(rq, sched_goidle);
+#ifdef CONFIG_SMP
+	/* Trigger the post schedule to do an idle_enter for CFS */
+	rq->post_schedule = 1;
+#endif
+	return rq->idle;
 }
 
 /*
@@ -2679,11 +2715,8 @@  need_resched:
 
 	pre_schedule(rq, prev);
 
-	if (unlikely(!rq->nr_running))
-		rq->idle_stamp = idle_balance(rq) ? 0 : rq_clock(rq);
-
 	put_prev_task(rq, prev);
-	next = pick_next_task(rq);
+	next = pick_next_task_or_idle(rq);
 	clear_tsk_need_resched(prev);
 	clear_preempt_need_resched();
 	rq->skip_clock_update = 0;
diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c
index 516c3d9..0b4c94b 100644
--- a/kernel/sched/idle_task.c
+++ b/kernel/sched/idle_task.c
@@ -33,16 +33,6 @@  static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
 	resched_task(rq->idle);
 }
 
-static struct task_struct *pick_next_task_idle(struct rq *rq)
-{
-	schedstat_inc(rq, sched_goidle);
-#ifdef CONFIG_SMP
-	/* Trigger the post schedule to do an idle_enter for CFS */
-	rq->post_schedule = 1;
-#endif
-	return rq->idle;
-}
-
 /*
  * It is not legal to sleep in the idle task - print a warning
  * message if some code attempts to do it:
@@ -60,6 +50,11 @@  static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
 {
 }
 
+static struct task_struct *pick_next_task_idle(struct rq *rq)
+{
+	return NULL;
+}
+
 static void task_tick_idle(struct rq *rq, struct task_struct *curr, int queued)
 {
 }