sched/fair: Introduce fits_capacity()

Message ID	b477ac75a2b163048bdaeb37f57b4c3f04f75a31.1559631700.git.viresh.kumar@linaro.org
State	Accepted
Commit	60e17f5cef838e9ca7946ced208ceddcec6c315d
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: best guess record for domain of linux-kernel-owner@vger.kernel.org designates 209.132.180.67 as permitted sender) client-ip=209.132.180.67; From: Viresh Kumar <viresh.kumar@linaro.org> To: Ingo Molnar <mingo@redhat.com>, Peter Zijlstra <peterz@infradead.org> Cc: Viresh Kumar <viresh.kumar@linaro.org>, linux-kernel@vger.kernel.org, Vincent Guittot <vincent.guittot@linaro.org> Subject: [PATCH] sched/fair: Introduce fits_capacity() Date: Tue, 4 Jun 2019 12:31:52 +0530 Message-Id: <b477ac75a2b163048bdaeb37f57b4c3f04f75a31.1559631700.git.viresh.kumar@linaro.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk
Series	sched/fair: Introduce fits_capacity() \| expand sched/fair: Introduce fits_capacity()

Viresh Kumar June 4, 2019, 7:01 a.m. UTC

The same formula to check utilization against capacity (after
considering capacity_margin) is already used at 5 different locations.

This patch creates a new macro, fits_capacity(), which can be used from
all these locations without exposing the details of it and hence
simplify code.

All the 5 code locations are updated as well to use it..

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

---
 kernel/sched/fair.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

-- 
2.21.0.rc0.269.g1a574e7a288b

Peter Oskolkov June 4, 2019, 3:59 p.m. UTC | #1

On Tue, Jun 4, 2019 at 12:02 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>

> The same formula to check utilization against capacity (after

> considering capacity_margin) is already used at 5 different locations.

>

> This patch creates a new macro, fits_capacity(), which can be used from

> all these locations without exposing the details of it and hence

> simplify code.

>

> All the 5 code locations are updated as well to use it..

>

> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> ---

>  kernel/sched/fair.c | 14 +++++++-------

>  1 file changed, 7 insertions(+), 7 deletions(-)

>

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> index 7f8d477f90fe..db3a218b7928 100644

> --- a/kernel/sched/fair.c

> +++ b/kernel/sched/fair.c

> @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu)

>   * (default: ~20%)

>   */

>  static unsigned int capacity_margin                    = 1280;

> +

> +#define fits_capacity(cap, max)        ((cap) * capacity_margin < (max) * 1024)


Any reason to have this as a macro and not as an inline function?

>  #endif

>

>  #ifdef CONFIG_CFS_BANDWIDTH

> @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)

>

>  static inline int task_fits_capacity(struct task_struct *p, long capacity)

>  {

> -       return capacity * 1024 > task_util_est(p) * capacity_margin;

> +       return fits_capacity(task_util_est(p), capacity);

>  }

>

>  static inline void update_misfit_status(struct task_struct *p, struct rq *rq)

> @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu);

>

>  static inline bool cpu_overutilized(int cpu)

>  {

> -       return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);

> +       return !fits_capacity(cpu_util(cpu), capacity_of(cpu));

>  }

>

>  static inline void update_overutilized_status(struct rq *rq)

> @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)

>                         /* Skip CPUs that will be overutilized. */

>                         util = cpu_util_next(cpu, p, cpu);

>                         cpu_cap = capacity_of(cpu);

> -                       if (cpu_cap * 1024 < util * capacity_margin)

> +                       if (!fits_capacity(util, cpu_cap))

>                                 continue;

>

>                         /* Always use prev_cpu as a candidate. */

> @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)

>  static inline bool

>  group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

>  {

> -       return sg->sgc->min_capacity * capacity_margin <

> -                                               ref->sgc->min_capacity * 1024;

> +       return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity);

>  }

>

>  /*

> @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

>  static inline bool

>  group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

>  {

> -       return sg->sgc->max_capacity * capacity_margin <

> -                                               ref->sgc->max_capacity * 1024;

> +       return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity);

>  }

>

>  static inline enum

> --

> 2.21.0.rc0.269.g1a574e7a288b

>

Quentin Perret June 5, 2019, 9:16 a.m. UTC | #2

Hi Viresh,

On Tuesday 04 Jun 2019 at 12:31:52 (+0530), Viresh Kumar wrote:
> The same formula to check utilization against capacity (after

> considering capacity_margin) is already used at 5 different locations.

> 

> This patch creates a new macro, fits_capacity(), which can be used from

> all these locations without exposing the details of it and hence

> simplify code.

> 

> All the 5 code locations are updated as well to use it..

> 

> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> ---

>  kernel/sched/fair.c | 14 +++++++-------

>  1 file changed, 7 insertions(+), 7 deletions(-)

> 

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> index 7f8d477f90fe..db3a218b7928 100644

> --- a/kernel/sched/fair.c

> +++ b/kernel/sched/fair.c

> @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu)

>   * (default: ~20%)

>   */

>  static unsigned int capacity_margin			= 1280;

> +

> +#define fits_capacity(cap, max)	((cap) * capacity_margin < (max) * 1024)

>  #endif

>  

>  #ifdef CONFIG_CFS_BANDWIDTH

> @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)

>  

>  static inline int task_fits_capacity(struct task_struct *p, long capacity)

>  {

> -	return capacity * 1024 > task_util_est(p) * capacity_margin;

> +	return fits_capacity(task_util_est(p), capacity);

>  }

>  

>  static inline void update_misfit_status(struct task_struct *p, struct rq *rq)

> @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu);

>  

>  static inline bool cpu_overutilized(int cpu)

>  {

> -	return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);

> +	return !fits_capacity(cpu_util(cpu), capacity_of(cpu));


This ...

>  }

>  

>  static inline void update_overutilized_status(struct rq *rq)

> @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)

>  			/* Skip CPUs that will be overutilized. */

>  			util = cpu_util_next(cpu, p, cpu);

>  			cpu_cap = capacity_of(cpu);

> -			if (cpu_cap * 1024 < util * capacity_margin)

> +			if (!fits_capacity(util, cpu_cap))


... and this isn't _strictly_ equivalent to the existing code but I
guess we can live with the difference :-)

>  				continue;

>  

>  			/* Always use prev_cpu as a candidate. */

> @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)

>  static inline bool

>  group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

>  {

> -	return sg->sgc->min_capacity * capacity_margin <

> -						ref->sgc->min_capacity * 1024;

> +	return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity);

>  }

>  

>  /*

> @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

>  static inline bool

>  group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

>  {

> -	return sg->sgc->max_capacity * capacity_margin <

> -						ref->sgc->max_capacity * 1024;

> +	return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity);

>  }

>  

>  static inline enum

> -- 

> 2.21.0.rc0.269.g1a574e7a288b

> 


Also, since we're talking about making the capacity_margin code more
consistent, one small thing I had in mind: we have a capacity margin
in sugov too, which happens to be 1.25 has well (see map_util_freq()).
Conceptually, capacity_margin in fair.c and the sugov margin are both
about answering: "do I have enough CPU capacity to serve X of util, or
do I need more ?"

So perhaps we should factorize the capacity_margin code some more to use
it in both places in a consistent way ? This could be done in a separate
patch, though.

Thanks,
Quentin

Viresh Kumar June 6, 2019, 2:52 a.m. UTC | #3

On 05-06-19, 10:16, Quentin Perret wrote:
> Hi Viresh,

> 

> On Tuesday 04 Jun 2019 at 12:31:52 (+0530), Viresh Kumar wrote:

> > The same formula to check utilization against capacity (after

> > considering capacity_margin) is already used at 5 different locations.

> > 

> > This patch creates a new macro, fits_capacity(), which can be used from

> > all these locations without exposing the details of it and hence

> > simplify code.

> > 

> > All the 5 code locations are updated as well to use it..

> > 

> > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> > ---

> >  kernel/sched/fair.c | 14 +++++++-------

> >  1 file changed, 7 insertions(+), 7 deletions(-)

> > 

> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> > index 7f8d477f90fe..db3a218b7928 100644

> > --- a/kernel/sched/fair.c

> > +++ b/kernel/sched/fair.c

> > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu)

> >   * (default: ~20%)

> >   */

> >  static unsigned int capacity_margin			= 1280;

> > +

> > +#define fits_capacity(cap, max)	((cap) * capacity_margin < (max) * 1024)

> >  #endif

> >  

> >  #ifdef CONFIG_CFS_BANDWIDTH

> > @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)

> >  

> >  static inline int task_fits_capacity(struct task_struct *p, long capacity)

> >  {

> > -	return capacity * 1024 > task_util_est(p) * capacity_margin;

> > +	return fits_capacity(task_util_est(p), capacity);

> >  }

> >  

> >  static inline void update_misfit_status(struct task_struct *p, struct rq *rq)

> > @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu);

> >  

> >  static inline bool cpu_overutilized(int cpu)

> >  {

> > -	return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);

> > +	return !fits_capacity(cpu_util(cpu), capacity_of(cpu));

> 

> This ...

> 

> >  }

> >  

> >  static inline void update_overutilized_status(struct rq *rq)

> > @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)

> >  			/* Skip CPUs that will be overutilized. */

> >  			util = cpu_util_next(cpu, p, cpu);

> >  			cpu_cap = capacity_of(cpu);

> > -			if (cpu_cap * 1024 < util * capacity_margin)

> > +			if (!fits_capacity(util, cpu_cap))

> 

> ... and this isn't _strictly_ equivalent to the existing code but I

> guess we can live with the difference :-)


Yes, I missed the == part it seems. Good catch. Though as you said,
maybe we don't need to take that into account and can live with the
new macro :)

> 

> >  				continue;

> >  

> >  			/* Always use prev_cpu as a candidate. */

> > @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)

> >  static inline bool

> >  group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

> >  {

> > -	return sg->sgc->min_capacity * capacity_margin <

> > -						ref->sgc->min_capacity * 1024;

> > +	return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity);

> >  }

> >  

> >  /*

> > @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

> >  static inline bool

> >  group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

> >  {

> > -	return sg->sgc->max_capacity * capacity_margin <

> > -						ref->sgc->max_capacity * 1024;

> > +	return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity);

> >  }

> >  

> >  static inline enum

> > -- 

> > 2.21.0.rc0.269.g1a574e7a288b

> > 

> 

> Also, since we're talking about making the capacity_margin code more

> consistent, one small thing I had in mind: we have a capacity margin

> in sugov too, which happens to be 1.25 has well (see map_util_freq()).

> Conceptually, capacity_margin in fair.c and the sugov margin are both

> about answering: "do I have enough CPU capacity to serve X of util, or

> do I need more ?"

> 

> So perhaps we should factorize the capacity_margin code some more to use

> it in both places in a consistent way ? This could be done in a separate

> patch, though.


Hmm, even if the values are same currently I am not sure if we want
the same for ever. I will write a patch for it though, if Peter/Rafael
feel the same as you.

Thanks Quentin.

-- 
viresh

Viresh Kumar June 6, 2019, 2:54 a.m. UTC | #4

On 04-06-19, 08:59, Peter Oskolkov wrote:
> On Tue, Jun 4, 2019 at 12:02 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> >

> > The same formula to check utilization against capacity (after

> > considering capacity_margin) is already used at 5 different locations.

> >

> > This patch creates a new macro, fits_capacity(), which can be used from

> > all these locations without exposing the details of it and hence

> > simplify code.

> >

> > All the 5 code locations are updated as well to use it..

> >

> > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> > ---

> >  kernel/sched/fair.c | 14 +++++++-------

> >  1 file changed, 7 insertions(+), 7 deletions(-)

> >

> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> > index 7f8d477f90fe..db3a218b7928 100644

> > --- a/kernel/sched/fair.c

> > +++ b/kernel/sched/fair.c

> > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu)

> >   * (default: ~20%)

> >   */

> >  static unsigned int capacity_margin                    = 1280;

> > +

> > +#define fits_capacity(cap, max)        ((cap) * capacity_margin < (max) * 1024)

> 

> Any reason to have this as a macro and not as an inline function?


I don't have any strong preference here, I used a macro as I didn't
feel that type-checking is really required on the parameters and
eventually this will get open coded anyway.

Though I would be fine to make it a routine if maintainers want it
that way.

Thanks Peter.

-- 
viresh

Quentin Perret June 6, 2019, 8:12 a.m. UTC | #5

On Thursday 06 Jun 2019 at 08:22:04 (+0530), Viresh Kumar wrote:
> On 05-06-19, 10:16, Quentin Perret wrote:

> > Hi Viresh,

> > 

> > On Tuesday 04 Jun 2019 at 12:31:52 (+0530), Viresh Kumar wrote:

> > > The same formula to check utilization against capacity (after

> > > considering capacity_margin) is already used at 5 different locations.

> > > 

> > > This patch creates a new macro, fits_capacity(), which can be used from

> > > all these locations without exposing the details of it and hence

> > > simplify code.

> > > 

> > > All the 5 code locations are updated as well to use it..

> > > 

> > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> > > ---

> > >  kernel/sched/fair.c | 14 +++++++-------

> > >  1 file changed, 7 insertions(+), 7 deletions(-)

> > > 

> > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> > > index 7f8d477f90fe..db3a218b7928 100644

> > > --- a/kernel/sched/fair.c

> > > +++ b/kernel/sched/fair.c

> > > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu)

> > >   * (default: ~20%)

> > >   */

> > >  static unsigned int capacity_margin			= 1280;

> > > +

> > > +#define fits_capacity(cap, max)	((cap) * capacity_margin < (max) * 1024)

> > >  #endif

> > >  

> > >  #ifdef CONFIG_CFS_BANDWIDTH

> > > @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep)

> > >  

> > >  static inline int task_fits_capacity(struct task_struct *p, long capacity)

> > >  {

> > > -	return capacity * 1024 > task_util_est(p) * capacity_margin;

> > > +	return fits_capacity(task_util_est(p), capacity);

> > >  }

> > >  

> > >  static inline void update_misfit_status(struct task_struct *p, struct rq *rq)

> > > @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu);

> > >  

> > >  static inline bool cpu_overutilized(int cpu)

> > >  {

> > > -	return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);

> > > +	return !fits_capacity(cpu_util(cpu), capacity_of(cpu));

> > 

> > This ...

> > 

> > >  }

> > >  

> > >  static inline void update_overutilized_status(struct rq *rq)

> > > @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)

> > >  			/* Skip CPUs that will be overutilized. */

> > >  			util = cpu_util_next(cpu, p, cpu);

> > >  			cpu_cap = capacity_of(cpu);

> > > -			if (cpu_cap * 1024 < util * capacity_margin)

> > > +			if (!fits_capacity(util, cpu_cap))

> > 

> > ... and this isn't _strictly_ equivalent to the existing code but I

> > guess we can live with the difference :-)

> 

> Yes, I missed the == part it seems. Good catch. Though as you said,

> maybe we don't need to take that into account and can live with the

> new macro :)

> 

> > 

> > >  				continue;

> > >  

> > >  			/* Always use prev_cpu as a candidate. */

> > > @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)

> > >  static inline bool

> > >  group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

> > >  {

> > > -	return sg->sgc->min_capacity * capacity_margin <

> > > -						ref->sgc->min_capacity * 1024;

> > > +	return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity);

> > >  }

> > >  

> > >  /*

> > > @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

> > >  static inline bool

> > >  group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)

> > >  {

> > > -	return sg->sgc->max_capacity * capacity_margin <

> > > -						ref->sgc->max_capacity * 1024;

> > > +	return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity);

> > >  }

> > >  

> > >  static inline enum

> > > -- 

> > > 2.21.0.rc0.269.g1a574e7a288b

> > > 

> > 

> > Also, since we're talking about making the capacity_margin code more

> > consistent, one small thing I had in mind: we have a capacity margin

> > in sugov too, which happens to be 1.25 has well (see map_util_freq()).

> > Conceptually, capacity_margin in fair.c and the sugov margin are both

> > about answering: "do I have enough CPU capacity to serve X of util, or

> > do I need more ?"

> > 

> > So perhaps we should factorize the capacity_margin code some more to use

> > it in both places in a consistent way ? This could be done in a separate

> > patch, though.

> 

> Hmm, even if the values are same currently I am not sure if we want

> the same for ever.


Right, that's a good point. It is cheaper to raise the freq on a CPU than
to migrate a task, so perhaps there could be a case for different
thresholds ...

> I will write a patch for it though, if Peter/Rafael

> feel the same as you.


Sounds good, thanks !
Quentin

Peter Zijlstra June 17, 2019, 3:02 p.m. UTC | #6

On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote:
> Hmm, even if the values are same currently I am not sure if we want

> the same for ever. I will write a patch for it though, if Peter/Rafael

> feel the same as you.


Is it really the same variable or just two numbers that happen to be the
same?

Viresh Kumar June 18, 2019, 3:12 a.m. UTC | #7

+Rafael

On 17-06-19, 17:02, Peter Zijlstra wrote:
> On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote:

> > Hmm, even if the values are same currently I am not sure if we want

> > the same for ever. I will write a patch for it though, if Peter/Rafael

> > feel the same as you.

> 

> Is it really the same variable or just two numbers that happen to be the

> same?


In both cases we are trying to keep the load under 80% of what can be supported.
But I am not sure of the answer to your question.

Maybe Rafael knows :)

-- 
viresh

Rafael J. Wysocki June 18, 2019, 7:26 a.m. UTC | #8

On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>

> +Rafael

>

> On 17-06-19, 17:02, Peter Zijlstra wrote:

> > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote:

> > > Hmm, even if the values are same currently I am not sure if we want

> > > the same for ever. I will write a patch for it though, if Peter/Rafael

> > > feel the same as you.

> >

> > Is it really the same variable or just two numbers that happen to be the

> > same?

>

> In both cases we are trying to keep the load under 80% of what can be supported.

> But I am not sure of the answer to your question.

>

> Maybe Rafael knows :)


Which variable?

Viresh Kumar June 18, 2019, 7:47 a.m. UTC | #9

On 18-06-19, 09:26, Rafael J. Wysocki wrote:
> On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> >

> > +Rafael

> >

> > On 17-06-19, 17:02, Peter Zijlstra wrote:

> > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote:

> > > > Hmm, even if the values are same currently I am not sure if we want

> > > > the same for ever. I will write a patch for it though, if Peter/Rafael

> > > > feel the same as you.

> > >

> > > Is it really the same variable or just two numbers that happen to be the

> > > same?

> >

> > In both cases we are trying to keep the load under 80% of what can be supported.

> > But I am not sure of the answer to your question.

> >

> > Maybe Rafael knows :)

> 

> Which variable?

Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually)
to get enough room for more load and similar thing is done in fair.c at several
places to see if the new task can fit in a runqueue without overloading it.

Quentin suggested to use common code for this calculation and that is what is
getting discussed here.

-- 
viresh

Rafael J. Wysocki June 18, 2019, 8:10 a.m. UTC | #10

On Tue, Jun 18, 2019 at 9:47 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>

> On 18-06-19, 09:26, Rafael J. Wysocki wrote:

> > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> > >

> > > +Rafael

> > >

> > > On 17-06-19, 17:02, Peter Zijlstra wrote:

> > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote:

> > > > > Hmm, even if the values are same currently I am not sure if we want

> > > > > the same for ever. I will write a patch for it though, if Peter/Rafael

> > > > > feel the same as you.

> > > >

> > > > Is it really the same variable or just two numbers that happen to be the

> > > > same?

> > >

> > > In both cases we are trying to keep the load under 80% of what can be supported.

> > > But I am not sure of the answer to your question.

> > >

> > > Maybe Rafael knows :)

> >

> > Which variable?

>

> Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually)

> to get enough room for more load and similar thing is done in fair.c at several

> places to see if the new task can fit in a runqueue without overloading it.


For the schedutil part, see the changelog of the commit that introduced it:

9bdcb44e391d cpufreq: schedutil: New governor based on scheduler
utilization data

As for the other places, I don't know about the exact reasoning.

> Quentin suggested to use common code for this calculation and that is what is

> getting discussed here.


I guess if the rationale for the formula is the same in all cases, it
would be good to consolidate that code and document the rationale
while at it.

Otherwise, I'm not sure.

Quentin Perret June 18, 2019, 8:22 a.m. UTC | #11

On Tuesday 18 Jun 2019 at 13:17:28 (+0530), Viresh Kumar wrote:
> On 18-06-19, 09:26, Rafael J. Wysocki wrote:

> > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> > >

> > > +Rafael

> > >

> > > On 17-06-19, 17:02, Peter Zijlstra wrote:

> > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote:

> > > > > Hmm, even if the values are same currently I am not sure if we want

> > > > > the same for ever. I will write a patch for it though, if Peter/Rafael

> > > > > feel the same as you.

> > > >

> > > > Is it really the same variable or just two numbers that happen to be the

> > > > same?

> > >

> > > In both cases we are trying to keep the load under 80% of what can be supported.

> > > But I am not sure of the answer to your question.

> > >

> > > Maybe Rafael knows :)

> > 

> > Which variable?

> 

> Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually)

> to get enough room for more load and similar thing is done in fair.c at several

> places to see if the new task can fit in a runqueue without overloading it.

> 

> Quentin suggested to use common code for this calculation and that is what is

> getting discussed here.


Right, sugov and load balance happen to use the same margin (1.25)
to check if a given util fits in a given capacity, though the thresholds
are hardcoded in different places (see map_util_freq() and
capacity_margin). So my suggestion was to unify the capacity_margin code
for frequency selection and CPU selection, for clarity and consistency.

But again, this is a small thing and FWIW Viresh's patch LGTM as-is so
no objection from my end if you guys would like to merge it.

Thanks,
Quentin

Quentin Perret June 18, 2019, 8:25 a.m. UTC | #12

On Tuesday 18 Jun 2019 at 10:10:48 (+0200), Rafael J. Wysocki wrote:
> On Tue, Jun 18, 2019 at 9:47 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> >

> > On 18-06-19, 09:26, Rafael J. Wysocki wrote:

> > > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> > > >

> > > > +Rafael

> > > >

> > > > On 17-06-19, 17:02, Peter Zijlstra wrote:

> > > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote:

> > > > > > Hmm, even if the values are same currently I am not sure if we want

> > > > > > the same for ever. I will write a patch for it though, if Peter/Rafael

> > > > > > feel the same as you.

> > > > >

> > > > > Is it really the same variable or just two numbers that happen to be the

> > > > > same?

> > > >

> > > > In both cases we are trying to keep the load under 80% of what can be supported.

> > > > But I am not sure of the answer to your question.

> > > >

> > > > Maybe Rafael knows :)

> > >

> > > Which variable?

> >

> > Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually)

> > to get enough room for more load and similar thing is done in fair.c at several

> > places to see if the new task can fit in a runqueue without overloading it.

> 

> For the schedutil part, see the changelog of the commit that introduced it:

> 

> 9bdcb44e391d cpufreq: schedutil: New governor based on scheduler

> utilization data

> 

> As for the other places, I don't know about the exact reasoning.

> 

> > Quentin suggested to use common code for this calculation and that is what is

> > getting discussed here.

> 

> I guess if the rationale for the formula is the same in all cases, it

> would be good to consolidate that code and document the rationale

> while at it.


I _think_ it is, but I guess others could correct me if this is
incorrect.

When choosing a CPU or a frequency using a util value, we look for a
capacity that will provide us with 20% of idle time. And in both case we
use the same threshold, just hardcoded in different places. Hence the
suggestion to unify things.

I hope that makes sense :-)

Thanks,
Quentin

Rafael J. Wysocki June 18, 2019, 8:36 a.m. UTC | #13

On Tue, Jun 18, 2019 at 10:25 AM Quentin Perret <quentin.perret@arm.com> wrote:
>

> On Tuesday 18 Jun 2019 at 10:10:48 (+0200), Rafael J. Wysocki wrote:

> > On Tue, Jun 18, 2019 at 9:47 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> > >

> > > On 18-06-19, 09:26, Rafael J. Wysocki wrote:

> > > > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> > > > >

> > > > > +Rafael

> > > > >

> > > > > On 17-06-19, 17:02, Peter Zijlstra wrote:

> > > > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote:

> > > > > > > Hmm, even if the values are same currently I am not sure if we want

> > > > > > > the same for ever. I will write a patch for it though, if Peter/Rafael

> > > > > > > feel the same as you.

> > > > > >

> > > > > > Is it really the same variable or just two numbers that happen to be the

> > > > > > same?

> > > > >

> > > > > In both cases we are trying to keep the load under 80% of what can be supported.

> > > > > But I am not sure of the answer to your question.

> > > > >

> > > > > Maybe Rafael knows :)

> > > >

> > > > Which variable?

> > >

> > > Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually)

> > > to get enough room for more load and similar thing is done in fair.c at several

> > > places to see if the new task can fit in a runqueue without overloading it.

> >

> > For the schedutil part, see the changelog of the commit that introduced it:

> >

> > 9bdcb44e391d cpufreq: schedutil: New governor based on scheduler

> > utilization data

> >

> > As for the other places, I don't know about the exact reasoning.

> >

> > > Quentin suggested to use common code for this calculation and that is what is

> > > getting discussed here.

> >

> > I guess if the rationale for the formula is the same in all cases, it

> > would be good to consolidate that code and document the rationale

> > while at it.

>

> I _think_ it is, but I guess others could correct me if this is

> incorrect.

>

> When choosing a CPU or a frequency using a util value, we look for a

> capacity that will provide us with 20% of idle time. And in both case we

> use the same threshold, just hardcoded in different places. Hence the

> suggestion to unify things.

>

> I hope that makes sense :-)


Well, for schedutil, the 1.25 value comes from the case when
utilization is not frequency-invariant  the next-frequency formula is
recursive (the next frequency is proportional to the current one).  It
is chosen to get the new frequency equal to the old one if (util /
max) is .8.  That translates to the "capacity that will provide 20%
more of idle time" in the frequency-invariant utilization case, but
the original rationale was different.

Quentin Perret June 18, 2019, 9:02 a.m. UTC | #14

On Tuesday 18 Jun 2019 at 10:36:56 (+0200), Rafael J. Wysocki wrote:
> On Tue, Jun 18, 2019 at 10:25 AM Quentin Perret <quentin.perret@arm.com> wrote:

> >

> > On Tuesday 18 Jun 2019 at 10:10:48 (+0200), Rafael J. Wysocki wrote:

> > > On Tue, Jun 18, 2019 at 9:47 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> > > >

> > > > On 18-06-19, 09:26, Rafael J. Wysocki wrote:

> > > > > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:

> > > > > >

> > > > > > +Rafael

> > > > > >

> > > > > > On 17-06-19, 17:02, Peter Zijlstra wrote:

> > > > > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote:

> > > > > > > > Hmm, even if the values are same currently I am not sure if we want

> > > > > > > > the same for ever. I will write a patch for it though, if Peter/Rafael

> > > > > > > > feel the same as you.

> > > > > > >

> > > > > > > Is it really the same variable or just two numbers that happen to be the

> > > > > > > same?

> > > > > >

> > > > > > In both cases we are trying to keep the load under 80% of what can be supported.

> > > > > > But I am not sure of the answer to your question.

> > > > > >

> > > > > > Maybe Rafael knows :)

> > > > >

> > > > > Which variable?

> > > >

> > > > Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually)

> > > > to get enough room for more load and similar thing is done in fair.c at several

> > > > places to see if the new task can fit in a runqueue without overloading it.

> > >

> > > For the schedutil part, see the changelog of the commit that introduced it:

> > >

> > > 9bdcb44e391d cpufreq: schedutil: New governor based on scheduler

> > > utilization data

> > >

> > > As for the other places, I don't know about the exact reasoning.

> > >

> > > > Quentin suggested to use common code for this calculation and that is what is

> > > > getting discussed here.

> > >

> > > I guess if the rationale for the formula is the same in all cases, it

> > > would be good to consolidate that code and document the rationale

> > > while at it.

> >

> > I _think_ it is, but I guess others could correct me if this is

> > incorrect.

> >

> > When choosing a CPU or a frequency using a util value, we look for a

> > capacity that will provide us with 20% of idle time. And in both case we

> > use the same threshold, just hardcoded in different places. Hence the

> > suggestion to unify things.

> >

> > I hope that makes sense :-)

> 

> Well, for schedutil, the 1.25 value comes from the case when

> utilization is not frequency-invariant  the next-frequency formula is

> recursive (the next frequency is proportional to the current one).  It

> is chosen to get the new frequency equal to the old one if (util /

> max) is .8.  That translates to the "capacity that will provide 20%

> more of idle time" in the frequency-invariant utilization case, but

> the original rationale was different.


OK, thanks, I wasn't aware of this. I understood it the other way
around, but re-reading the commit message you shared earlier this makes
sense.

I guess it is also worth mentioning that, to the best of my knowledge,
the vast majority of real-world sugov users are in fact frequency
invariant. So perhaps there is still a case for the code factorization
suggested earlier. But in the end it is really just a cleanup to help
maintainability, so if you guys don't buy in there is no point pushing
further :-)

Thanks,
Quentin

Viresh Kumar July 17, 2019, 10:33 a.m. UTC | #15

On 04-06-19, 12:31, Viresh Kumar wrote:
> The same formula to check utilization against capacity (after

> considering capacity_margin) is already used at 5 different locations.

> 

> This patch creates a new macro, fits_capacity(), which can be used from

> all these locations without exposing the details of it and hence

> simplify code.

> 

> All the 5 code locations are updated as well to use it..

> 

> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> ---

>  kernel/sched/fair.c | 14 +++++++-------

>  1 file changed, 7 insertions(+), 7 deletions(-)


@Peter: Do you suggest any modifications to this patch? Do I need to
resend it ?

-- 
viresh

Peter Zijlstra July 17, 2019, 2:59 p.m. UTC | #16

On Wed, Jul 17, 2019 at 04:03:56PM +0530, Viresh Kumar wrote:
> On 04-06-19, 12:31, Viresh Kumar wrote:

> > The same formula to check utilization against capacity (after

> > considering capacity_margin) is already used at 5 different locations.

> > 

> > This patch creates a new macro, fits_capacity(), which can be used from

> > all these locations without exposing the details of it and hence

> > simplify code.

> > 

> > All the 5 code locations are updated as well to use it..

> > 

> > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> > ---

> >  kernel/sched/fair.c | 14 +++++++-------

> >  1 file changed, 7 insertions(+), 7 deletions(-)

> 

> @Peter: Do you suggest any modifications to this patch? Do I need to

> resend it ?


Ah, I've picked it up with a few (small) edits.

Thanks!

---
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -96,12 +96,12 @@ int __weak arch_asym_cpu_priority(int cp
 }
 
 /*
- * The margin used when comparing utilization with CPU capacity:
- * util * margin < capacity * 1024
+ * The margin used when comparing utilization with CPU capacity.
  *
  * (default: ~20%)
  */
-static unsigned int capacity_margin			= 1280;
+#define fits_capacity(cap, max)	((cap) * 1280 < (max) * 1024)
+
 #endif
 
 #ifdef CONFIG_CFS_BANDWIDTH
@@ -3754,7 +3754,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq,
 
 static inline int task_fits_capacity(struct task_struct *p, long capacity)
 {
-	return capacity * 1024 > task_util_est(p) * capacity_margin;
+	return fits_capacity(task_util_est(p), capacity);
 }
 
 static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
@@ -5181,7 +5181,7 @@ static inline unsigned long cpu_util(int
 
 static inline bool cpu_overutilized(int cpu)
 {
-	return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin);
+	return !fits_capacity(cpu_util(cpu), capacity_of(cpu));
 }
 
 static inline void update_overutilized_status(struct rq *rq)
@@ -6402,7 +6402,7 @@ static int find_energy_efficient_cpu(str
 			/* Skip CPUs that will be overutilized. */
 			util = cpu_util_next(cpu, p, cpu);
 			cpu_cap = capacity_of(cpu);
-			if (cpu_cap * 1024 < util * capacity_margin)
+			if (!fits_capacity(util, cpu_cap))
 				continue;
 
 			/* Always use prev_cpu as a candidate. */
@@ -7957,8 +7957,7 @@ group_is_overloaded(struct lb_env *env,
 static inline bool
 group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
 {
-	return sg->sgc->min_capacity * capacity_margin <
-						ref->sgc->min_capacity * 1024;
+	return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity);
 }
 
 /*
@@ -7968,8 +7967,7 @@ group_smaller_min_cpu_capacity(struct sc
 static inline bool
 group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
 {
-	return sg->sgc->max_capacity * capacity_margin <
-						ref->sgc->max_capacity * 1024;
+	return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity);
 }
 
 static inline enum

sched/fair: Introduce fits_capacity()

Commit Message

Comments

Patch