sched/fair: Rearrange select_task_rq_fair() to optimize it

Message ID 8a34a16da90b9f83ffe60316a074a5e4d05b59b0.1524479666.git.viresh.kumar@linaro.org
State New
Headers show
Series
  • sched/fair: Rearrange select_task_rq_fair() to optimize it
Related show

Commit Message

Viresh Kumar April 23, 2018, 10:38 a.m.
Rearrange select_task_rq_fair() a bit to avoid executing some
conditional statements in few specific code-paths. That gets rid of the
goto as well.

This shouldn't result in any functional changes.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

---
 kernel/sched/fair.c | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

-- 
2.15.0.194.g9af6a3dea062

Comments

Valentin Schneider April 24, 2018, 10:02 a.m. | #1
Hi,

On 23/04/18 11:38, Viresh Kumar wrote:
> Rearrange select_task_rq_fair() a bit to avoid executing some

> conditional statements in few specific code-paths. That gets rid of the

> goto as well.

> 


I'd argue making things easier to read is a non-negligible part as well.

> This shouldn't result in any functional changes.

> 

> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> ---

>  kernel/sched/fair.c | 24 +++++++++---------------

>  1 file changed, 9 insertions(+), 15 deletions(-)

> 

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> index 54dc31e7ab9b..cacee15076a4 100644

> --- a/kernel/sched/fair.c

> +++ b/kernel/sched/fair.c

> @@ -6636,6 +6636,7 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f

>  		 */

>  		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&

>  		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {

> +			sd = NULL; /* Prefer wake_affine over balance flags */

>  			affine_sd = tmp;

>  			break;

>  		}

> @@ -6646,33 +6647,26 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f

>  			break;

>  	}

>  

> -	if (affine_sd) {

> -		sd = NULL; /* Prefer wake_affine over balance flags */

> -		if (cpu == prev_cpu)

> -			goto pick_cpu;

> -

> -		new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);

> -	}

> -

> -	if (sd && !(sd_flag & SD_BALANCE_FORK)) {

> +	if (sd) {

>  		/*

>  		 * We're going to need the task's util for capacity_spare_wake

>  		 * in find_idlest_group. Sync it up to prev_cpu's

>  		 * last_update_time.

>  		 */

> -		sync_entity_load_avg(&p->se);

> -	}

> +		if (!(sd_flag & SD_BALANCE_FORK))

> +			sync_entity_load_avg(&p->se);

> +

> +		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);

> +	} else {

> +		if (affine_sd && cpu != prev_cpu)

> +			new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);

>  

> -	if (!sd) {

> -pick_cpu:

>  		if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */

>  			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);

>  

>  			if (want_affine)

>  				current->recent_used_cpu = cpu;

>  		}

> -	} else {

> -		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);

>  	}

>  	rcu_read_unlock();

>  

> 


I stared at it for a bit and don't see anything wrong. I was just thinking
that the original flow made it a bit clearer to follow the 'wake_affine' path.

What about this ? It re-bumps up the number of conditionals and adds an indent
level in the domain loop (that could be prevented by hiding the 
cpu != prev_cpu check in wake_affine()), which isn't great, but you get to
squash some more if's.

---------------------->8-------------------------

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index cacee15..ad09b67 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6613,7 +6613,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
 static int
 select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
 {
-	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+	struct sched_domain *tmp, *sd = NULL;
 	int cpu = smp_processor_id();
 	int new_cpu = prev_cpu;
 	int want_affine = 0;
@@ -6636,8 +6636,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 		 */
 		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
 		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
+			if (cpu != prev_cpu)
+				new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
+
 			sd = NULL; /* Prefer wake_affine over balance flags */
-			affine_sd = tmp;
 			break;
 		}
 
@@ -6657,16 +6659,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 			sync_entity_load_avg(&p->se);
 
 		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
-	} else {
-		if (affine_sd && cpu != prev_cpu)
-			new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);
+	} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
+		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
-		if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
-			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
-
-			if (want_affine)
-				current->recent_used_cpu = cpu;
-		}
+		if (want_affine)
+			current->recent_used_cpu = cpu;
 	}
 	rcu_read_unlock();
Viresh Kumar April 24, 2018, 10:30 a.m. | #2
On 24-04-18, 11:02, Valentin Schneider wrote:
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> index cacee15..ad09b67 100644

> --- a/kernel/sched/fair.c

> +++ b/kernel/sched/fair.c

> @@ -6613,7 +6613,7 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)

>  static int

>  select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)

>  {

> -	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;

> +	struct sched_domain *tmp, *sd = NULL;

>  	int cpu = smp_processor_id();

>  	int new_cpu = prev_cpu;

>  	int want_affine = 0;

> @@ -6636,8 +6636,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f

>  		 */

>  		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&

>  		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {

> +			if (cpu != prev_cpu)

> +				new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);

> +

>  			sd = NULL; /* Prefer wake_affine over balance flags */

> -			affine_sd = tmp;

>  			break;

>  		}

>  

> @@ -6657,16 +6659,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f

>  			sync_entity_load_avg(&p->se);

>  

>  		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);

> -	} else {

> -		if (affine_sd && cpu != prev_cpu)

> -			new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);

> +	} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */

> +		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);

>  

> -		if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */

> -			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);

> -

> -			if (want_affine)

> -				current->recent_used_cpu = cpu;

> -		}

> +		if (want_affine)

> +			current->recent_used_cpu = cpu;

>  	}

>  	rcu_read_unlock();


LGTM.

I will merge it as part of the current patch, but maybe wait for a few
days before sending V2.

-- 
viresh
Peter Zijlstra April 24, 2018, 10:43 a.m. | #3
On Tue, Apr 24, 2018 at 11:02:26AM +0100, Valentin Schneider wrote:
> I'd argue making things easier to read is a non-negligible part as well.


Right, so I don't object to either of these (I think); but it would be
good to see this in combination with that proposed EAS change.

I think you (valentin) wanted to side-step the entire domain loop in
that case or something.

But yes, getting this code more readable is defninitely useful.
Valentin Schneider April 24, 2018, 11:19 a.m. | #4
On 24/04/18 11:43, Peter Zijlstra wrote:
> On Tue, Apr 24, 2018 at 11:02:26AM +0100, Valentin Schneider wrote:

>> I'd argue making things easier to read is a non-negligible part as well.

> 

> Right, so I don't object to either of these (I think); but it would be

> good to see this in combination with that proposed EAS change.

> 


True, I would've said the call to find_energy_efficient_cpu() ([1]) could
simply be added to the if (sd) {} case, but...

> I think you (valentin) wanted to side-step the entire domain loop in

> that case or something.

> 


...this would change more things. Admittedly I've been sort of out of the loop
(no pun intended) lately, but this doesn't ring a bell. That might have been
the other frenchie (Quentin) :)

> But yes, getting this code more readable is defninitely useful.

> 


[1]: See [RFC PATCH v2 5/6] sched/fair: Select an energy-efficient CPU on task wake-up
@ https://lkml.org/lkml/2018/4/6/856
Peter Zijlstra April 24, 2018, 12:35 p.m. | #5
On Tue, Apr 24, 2018 at 12:19:07PM +0100, Valentin Schneider wrote:
> On 24/04/18 11:43, Peter Zijlstra wrote:

> > On Tue, Apr 24, 2018 at 11:02:26AM +0100, Valentin Schneider wrote:

> >> I'd argue making things easier to read is a non-negligible part as well.

> > 

> > Right, so I don't object to either of these (I think); but it would be

> > good to see this in combination with that proposed EAS change.

> > 

> 

> True, I would've said the call to find_energy_efficient_cpu() ([1]) could

> simply be added to the if (sd) {} case, but...


I think the proposal was to put it before the for_each_domain() loop
entirely, however...

> > I think you (valentin) wanted to side-step the entire domain loop in

> > that case or something.

> > 

> 

> ...this would change more things. Admittedly I've been sort of out of the loop

> (no pun intended) lately, but this doesn't ring a bell. That might have been

> the other frenchie (Quentin) :)


It does indeed appear I confused the two of you, it was Quentin playing
with that.

In any case, if there not going to be conflicts here, this all looks
good.
Joel Fernandes April 24, 2018, 3:46 p.m. | #6
On Tue, Apr 24, 2018 at 5:35 AM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, Apr 24, 2018 at 12:19:07PM +0100, Valentin Schneider wrote:

>> On 24/04/18 11:43, Peter Zijlstra wrote:

>> > On Tue, Apr 24, 2018 at 11:02:26AM +0100, Valentin Schneider wrote:

>> >> I'd argue making things easier to read is a non-negligible part as well.

>> >

>> > Right, so I don't object to either of these (I think); but it would be

>> > good to see this in combination with that proposed EAS change.

>> >

>>

>> True, I would've said the call to find_energy_efficient_cpu() ([1]) could

>> simply be added to the if (sd) {} case, but...

>

> I think the proposal was to put it before the for_each_domain() loop

> entirely, however...

>

>> > I think you (valentin) wanted to side-step the entire domain loop in

>> > that case or something.

>> >

>>

>> ...this would change more things. Admittedly I've been sort of out of the loop

>> (no pun intended) lately, but this doesn't ring a bell. That might have been

>> the other frenchie (Quentin) :)

>

> It does indeed appear I confused the two of you, it was Quentin playing

> with that.

>

> In any case, if there not going to be conflicts here, this all looks

> good.


Both Viresh's and Valentin's patch looks lovely to me too. I couldn't
spot anything wrong with them either. One suggestion I was thinking
off is can we add better comments to this code (atleast label fast
path vs slow path) ?

Also, annotate the conditions for the fast/slow path with
likely/unlikely since fast path is the common case? so like:

if (unlikely(sd)) {
  /* Fast path, common case */
  ...
} else if (...) {
  /* Slow path */
}


thanks,

- Joel
Joel Fernandes April 24, 2018, 3:47 p.m. | #7
On Tue, Apr 24, 2018 at 8:46 AM, Joel Fernandes <joel.opensrc@gmail.com> wrote:
> On Tue, Apr 24, 2018 at 5:35 AM, Peter Zijlstra <peterz@infradead.org> wrote:

>> On Tue, Apr 24, 2018 at 12:19:07PM +0100, Valentin Schneider wrote:

>>> On 24/04/18 11:43, Peter Zijlstra wrote:

>>> > On Tue, Apr 24, 2018 at 11:02:26AM +0100, Valentin Schneider wrote:

>>> >> I'd argue making things easier to read is a non-negligible part as well.

>>> >

>>> > Right, so I don't object to either of these (I think); but it would be

>>> > good to see this in combination with that proposed EAS change.

>>> >

>>>

>>> True, I would've said the call to find_energy_efficient_cpu() ([1]) could

>>> simply be added to the if (sd) {} case, but...

>>

>> I think the proposal was to put it before the for_each_domain() loop

>> entirely, however...

>>

>>> > I think you (valentin) wanted to side-step the entire domain loop in

>>> > that case or something.

>>> >

>>>

>>> ...this would change more things. Admittedly I've been sort of out of the loop

>>> (no pun intended) lately, but this doesn't ring a bell. That might have been

>>> the other frenchie (Quentin) :)

>>

>> It does indeed appear I confused the two of you, it was Quentin playing

>> with that.

>>

>> In any case, if there not going to be conflicts here, this all looks

>> good.

>

> Both Viresh's and Valentin's patch looks lovely to me too. I couldn't

> spot anything wrong with them either. One suggestion I was thinking

> off is can we add better comments to this code (atleast label fast

> path vs slow path) ?

>

> Also, annotate the conditions for the fast/slow path with

> likely/unlikely since fast path is the common case? so like:

>

> if (unlikely(sd)) {

>   /* Fast path, common case */

>   ...

> } else if (...) {

>   /* Slow path */

> }


Aargh, I messed that up, I meant:

if (unlikely(sd)) {
   /* Slow path */
   ...
} else if (...) {
   /* Fast path */
}


thanks, :-)

- Joel
Rohit Jain April 24, 2018, 10:34 p.m. | #8
On 04/24/2018 08:47 AM, Joel Fernandes wrote:
> On Tue, Apr 24, 2018 at 8:46 AM, Joel Fernandes <joel.opensrc@gmail.com> wrote:

>> On Tue, Apr 24, 2018 at 5:35 AM, Peter Zijlstra <peterz@infradead.org> wrote:

>>> On Tue, Apr 24, 2018 at 12:19:07PM +0100, Valentin Schneider wrote:

>>>> On 24/04/18 11:43, Peter Zijlstra wrote:

>>>>> On Tue, Apr 24, 2018 at 11:02:26AM +0100, Valentin Schneider wrote:

>>>>>> I'd argue making things easier to read is a non-negligible part as well.

>>>>> Right, so I don't object to either of these (I think); but it would be

>>>>> good to see this in combination with that proposed EAS change.

>>>>>

>>>> True, I would've said the call to find_energy_efficient_cpu() ([1]) could

>>>> simply be added to the if (sd) {} case, but...

>>> I think the proposal was to put it before the for_each_domain() loop

>>> entirely, however...

>>>

>>>>> I think you (valentin) wanted to side-step the entire domain loop in

>>>>> that case or something.

>>>>>

>>>> ...this would change more things. Admittedly I've been sort of out of the loop

>>>> (no pun intended) lately, but this doesn't ring a bell. That might have been

>>>> the other frenchie (Quentin) :)

>>> It does indeed appear I confused the two of you, it was Quentin playing

>>> with that.

>>>

>>> In any case, if there not going to be conflicts here, this all looks

>>> good.

>> Both Viresh's and Valentin's patch looks lovely to me too. I couldn't

>> spot anything wrong with them either. One suggestion I was thinking

>> off is can we add better comments to this code (atleast label fast

>> path vs slow path) ?

>>

>> Also, annotate the conditions for the fast/slow path with

>> likely/unlikely since fast path is the common case? so like:

>>

>> if (unlikely(sd)) {

>>    /* Fast path, common case */

>>    ...

>> } else if (...) {

>>    /* Slow path */

>> }

> Aargh, I messed that up, I meant:

>

> if (unlikely(sd)) {

>     /* Slow path */

>     ...

> } else if (...) {

>     /* Fast path */

> }


Including the "unlikely" suggestion and the original patch, as expected
with a quick hackbench test on a 44 core 2 socket x86 machine causes no
change in performance.

Thanks,
Rohit

<snip>
Viresh Kumar April 25, 2018, 2:51 a.m. | #9
On 24-04-18, 15:34, Rohit Jain wrote:
> Including the "unlikely" suggestion and the original patch, as expected

> with a quick hackbench test on a 44 core 2 socket x86 machine causes no

> change in performance.


Want me to include your Tested-by in next version ?

-- 
viresh
Viresh Kumar April 25, 2018, 5:15 a.m. | #10
On 24-04-18, 14:35, Peter Zijlstra wrote:
> In any case, if there not going to be conflicts here, this all looks

> good.


Thanks Peter.

I also had another patch and wasn't sure if that would be the right
thing to do. The main purpose of this is to avoid calling
sync_entity_load_avg() unnecessarily.

+++ b/kernel/sched/fair.c
@@ -6196,9 +6196,6 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p
 {
        int new_cpu = cpu;
 
-       if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed))
-               return prev_cpu;
-
        while (sd) {
                struct sched_group *group;
                struct sched_domain *tmp;
@@ -6652,15 +6649,19 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
        if (unlikely(sd)) {
                /* Slow path */
 
-               /*
-                * We're going to need the task's util for capacity_spare_wake
-                * in find_idlest_group. Sync it up to prev_cpu's
-                * last_update_time.
-                */
-               if (!(sd_flag & SD_BALANCE_FORK))
-                       sync_entity_load_avg(&p->se);
+               if (!cpumask_intersects(sched_domain_span(sd), &p->cpus_allowed)) {
+                       new_cpu = prev_cpu;
+               } else {
+                       /*
+                        * We're going to need the task's util for
+                        * capacity_spare_wake in find_idlest_group. Sync it up
+                        * to prev_cpu's last_update_time.
+                        */
+                       if (!(sd_flag & SD_BALANCE_FORK))
+                               sync_entity_load_avg(&p->se);
 
-               new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
+                       new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
+               }
        } else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
                /* Fast path */

-- 
viresh
Quentin Perret April 25, 2018, 8:12 a.m. | #11
On Tuesday 24 Apr 2018 at 14:35:23 (+0200), Peter Zijlstra wrote:
> On Tue, Apr 24, 2018 at 12:19:07PM +0100, Valentin Schneider wrote:

> > On 24/04/18 11:43, Peter Zijlstra wrote:

> > > On Tue, Apr 24, 2018 at 11:02:26AM +0100, Valentin Schneider wrote:

> > >> I'd argue making things easier to read is a non-negligible part as well.

> > > 

> > > Right, so I don't object to either of these (I think); but it would be

> > > good to see this in combination with that proposed EAS change.

> > > 

> > 

> > True, I would've said the call to find_energy_efficient_cpu() ([1]) could

> > simply be added to the if (sd) {} case, but...

> 

> I think the proposal was to put it before the for_each_domain() loop

> entirely, however...

> 

> > > I think you (valentin) wanted to side-step the entire domain loop in

> > > that case or something.

> > > 

> > 

> > ...this would change more things. Admittedly I've been sort of out of the loop

> > (no pun intended) lately, but this doesn't ring a bell. That might have been

> > the other frenchie (Quentin) :)

> 

> It does indeed appear I confused the two of you, it was Quentin playing

> with that.

> 

> In any case, if there not going to be conflicts here, this all looks

> good.


So, the proposal was to re-use the loop to find a non-overutilized sched
domain in which we can use EAS. But yes, I don't see why this would
conflict with this patch so I don't have objections against it.

Thanks,
Quentin
Quentin Perret April 25, 2018, 8:13 a.m. | #12
On Wednesday 25 Apr 2018 at 10:45:09 (+0530), Viresh Kumar wrote:
> On 24-04-18, 14:35, Peter Zijlstra wrote:

> > In any case, if there not going to be conflicts here, this all looks

> > good.

> 

> Thanks Peter.

> 

> I also had another patch and wasn't sure if that would be the right

> thing to do. The main purpose of this is to avoid calling

> sync_entity_load_avg() unnecessarily.


While you're at it, you could probably remove the one in wake_cap() ? I
think having just one in select_task_rq_fair() should be enough.

Thanks,
Quentin
Viresh Kumar April 25, 2018, 9:03 a.m. | #13
On 25-04-18, 09:13, Quentin Perret wrote:
> While you're at it, you could probably remove the one in wake_cap() ? I

> think having just one in select_task_rq_fair() should be enough.


Just make it clear, you are asking me to remove sync_entity_load_avg()
in wake_cap() ? But aren't we required to do that, as in the very next
line we call task_util(p) ?

-- 
viresh
Quentin Perret April 25, 2018, 9:39 a.m. | #14
On Wednesday 25 Apr 2018 at 14:33:27 (+0530), Viresh Kumar wrote:
> On 25-04-18, 09:13, Quentin Perret wrote:

> > While you're at it, you could probably remove the one in wake_cap() ? I

> > think having just one in select_task_rq_fair() should be enough.

> 

> Just make it clear, you are asking me to remove sync_entity_load_avg()

> in wake_cap() ? But aren't we required to do that, as in the very next

> line we call task_util(p) ?


Right, we do need to call sync_entity_load_avg() at some point before
calling task_util(), but we don't need to re-call it in strf()
after in this case. So my point was just that if you want to re-work
the wake-up path and make sure we don't call sync_entity_load_avg()
if not needed then this might need fixing as well ... Or maybe we don't
care since re-calling sync_entity_load_avg() should be really cheap ...
Viresh Kumar April 25, 2018, 10:13 a.m. | #15
On 25-04-18, 10:39, Quentin Perret wrote:
> On Wednesday 25 Apr 2018 at 14:33:27 (+0530), Viresh Kumar wrote:

> > On 25-04-18, 09:13, Quentin Perret wrote:

> > > While you're at it, you could probably remove the one in wake_cap() ? I

> > > think having just one in select_task_rq_fair() should be enough.

> > 

> > Just make it clear, you are asking me to remove sync_entity_load_avg()

> > in wake_cap() ? But aren't we required to do that, as in the very next

> > line we call task_util(p) ?

> 

> Right, we do need to call sync_entity_load_avg() at some point before

> calling task_util(), but we don't need to re-call it in strf()

> after in this case. So my point was just that if you want to re-work

> the wake-up path and make sure we don't call sync_entity_load_avg()

> if not needed then this might need fixing as well ... Or maybe we don't

> care since re-calling sync_entity_load_avg() should be really cheap ...


These are in two very different paths and I am not sure of a clean way
to avoid calling sync_entity_load_avg() again. Maybe will leave it as
is for now.

-- 
viresh
Quentin Perret April 25, 2018, 10:55 a.m. | #16
On Wednesday 25 Apr 2018 at 15:43:13 (+0530), Viresh Kumar wrote:
> On 25-04-18, 10:39, Quentin Perret wrote:

> > On Wednesday 25 Apr 2018 at 14:33:27 (+0530), Viresh Kumar wrote:

> > > On 25-04-18, 09:13, Quentin Perret wrote:

> > > > While you're at it, you could probably remove the one in wake_cap() ? I

> > > > think having just one in select_task_rq_fair() should be enough.

> > > 

> > > Just make it clear, you are asking me to remove sync_entity_load_avg()

> > > in wake_cap() ? But aren't we required to do that, as in the very next

> > > line we call task_util(p) ?

> > 

> > Right, we do need to call sync_entity_load_avg() at some point before

> > calling task_util(), but we don't need to re-call it in strf()

> > after in this case. So my point was just that if you want to re-work

> > the wake-up path and make sure we don't call sync_entity_load_avg()

> > if not needed then this might need fixing as well ... Or maybe we don't

> > care since re-calling sync_entity_load_avg() should be really cheap ...

> 

> These are in two very different paths and I am not sure of a clean way

> to avoid calling sync_entity_load_avg() again. Maybe will leave it as

> is for now.


Fair enough, I don't really like this double call but, looking into more
details, I'm not sure how to avoid it cleanly either ...
Rohit Jain April 25, 2018, 4:48 p.m. | #17
On 04/24/2018 07:51 PM, Viresh Kumar wrote:
> On 24-04-18, 15:34, Rohit Jain wrote:

>> Including the "unlikely" suggestion and the original patch, as expected

>> with a quick hackbench test on a 44 core 2 socket x86 machine causes no

>> change in performance.

> Want me to include your Tested-by in next version ?

>


Please feel free to include it.

I was not sure if this is too small a test to say tested by.

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 54dc31e7ab9b..cacee15076a4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6636,6 +6636,7 @@  select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 		 */
 		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
 		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
+			sd = NULL; /* Prefer wake_affine over balance flags */
 			affine_sd = tmp;
 			break;
 		}
@@ -6646,33 +6647,26 @@  select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 			break;
 	}
 
-	if (affine_sd) {
-		sd = NULL; /* Prefer wake_affine over balance flags */
-		if (cpu == prev_cpu)
-			goto pick_cpu;
-
-		new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);
-	}
-
-	if (sd && !(sd_flag & SD_BALANCE_FORK)) {
+	if (sd) {
 		/*
 		 * We're going to need the task's util for capacity_spare_wake
 		 * in find_idlest_group. Sync it up to prev_cpu's
 		 * last_update_time.
 		 */
-		sync_entity_load_avg(&p->se);
-	}
+		if (!(sd_flag & SD_BALANCE_FORK))
+			sync_entity_load_avg(&p->se);
+
+		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
+	} else {
+		if (affine_sd && cpu != prev_cpu)
+			new_cpu = wake_affine(affine_sd, p, cpu, prev_cpu, sync);
 
-	if (!sd) {
-pick_cpu:
 		if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
 			new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
 
 			if (want_affine)
 				current->recent_used_cpu = cpu;
 		}
-	} else {
-		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
 	}
 	rcu_read_unlock();