Message ID | b477ac75a2b163048bdaeb37f57b4c3f04f75a31.1559631700.git.viresh.kumar@linaro.org |
---|---|
State | Accepted |
Commit | 60e17f5cef838e9ca7946ced208ceddcec6c315d |
Headers | show |
Series | sched/fair: Introduce fits_capacity() | expand |
On Tue, Jun 4, 2019 at 12:02 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > The same formula to check utilization against capacity (after > considering capacity_margin) is already used at 5 different locations. > > This patch creates a new macro, fits_capacity(), which can be used from > all these locations without exposing the details of it and hence > simplify code. > > All the 5 code locations are updated as well to use it.. > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > --- > kernel/sched/fair.c | 14 +++++++------- > 1 file changed, 7 insertions(+), 7 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 7f8d477f90fe..db3a218b7928 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu) > * (default: ~20%) > */ > static unsigned int capacity_margin = 1280; > + > +#define fits_capacity(cap, max) ((cap) * capacity_margin < (max) * 1024) Any reason to have this as a macro and not as an inline function? > #endif > > #ifdef CONFIG_CFS_BANDWIDTH > @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) > > static inline int task_fits_capacity(struct task_struct *p, long capacity) > { > - return capacity * 1024 > task_util_est(p) * capacity_margin; > + return fits_capacity(task_util_est(p), capacity); > } > > static inline void update_misfit_status(struct task_struct *p, struct rq *rq) > @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu); > > static inline bool cpu_overutilized(int cpu) > { > - return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); > + return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); > } > > static inline void update_overutilized_status(struct rq *rq) > @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) > /* Skip CPUs that will be overutilized. */ > util = cpu_util_next(cpu, p, cpu); > cpu_cap = capacity_of(cpu); > - if (cpu_cap * 1024 < util * capacity_margin) > + if (!fits_capacity(util, cpu_cap)) > continue; > > /* Always use prev_cpu as a candidate. */ > @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) > static inline bool > group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > { > - return sg->sgc->min_capacity * capacity_margin < > - ref->sgc->min_capacity * 1024; > + return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity); > } > > /* > @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > static inline bool > group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > { > - return sg->sgc->max_capacity * capacity_margin < > - ref->sgc->max_capacity * 1024; > + return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity); > } > > static inline enum > -- > 2.21.0.rc0.269.g1a574e7a288b >
Hi Viresh, On Tuesday 04 Jun 2019 at 12:31:52 (+0530), Viresh Kumar wrote: > The same formula to check utilization against capacity (after > considering capacity_margin) is already used at 5 different locations. > > This patch creates a new macro, fits_capacity(), which can be used from > all these locations without exposing the details of it and hence > simplify code. > > All the 5 code locations are updated as well to use it.. > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > --- > kernel/sched/fair.c | 14 +++++++------- > 1 file changed, 7 insertions(+), 7 deletions(-) > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 7f8d477f90fe..db3a218b7928 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu) > * (default: ~20%) > */ > static unsigned int capacity_margin = 1280; > + > +#define fits_capacity(cap, max) ((cap) * capacity_margin < (max) * 1024) > #endif > > #ifdef CONFIG_CFS_BANDWIDTH > @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) > > static inline int task_fits_capacity(struct task_struct *p, long capacity) > { > - return capacity * 1024 > task_util_est(p) * capacity_margin; > + return fits_capacity(task_util_est(p), capacity); > } > > static inline void update_misfit_status(struct task_struct *p, struct rq *rq) > @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu); > > static inline bool cpu_overutilized(int cpu) > { > - return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); > + return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); This ... > } > > static inline void update_overutilized_status(struct rq *rq) > @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) > /* Skip CPUs that will be overutilized. */ > util = cpu_util_next(cpu, p, cpu); > cpu_cap = capacity_of(cpu); > - if (cpu_cap * 1024 < util * capacity_margin) > + if (!fits_capacity(util, cpu_cap)) ... and this isn't _strictly_ equivalent to the existing code but I guess we can live with the difference :-) > continue; > > /* Always use prev_cpu as a candidate. */ > @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) > static inline bool > group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > { > - return sg->sgc->min_capacity * capacity_margin < > - ref->sgc->min_capacity * 1024; > + return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity); > } > > /* > @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > static inline bool > group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > { > - return sg->sgc->max_capacity * capacity_margin < > - ref->sgc->max_capacity * 1024; > + return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity); > } > > static inline enum > -- > 2.21.0.rc0.269.g1a574e7a288b > Also, since we're talking about making the capacity_margin code more consistent, one small thing I had in mind: we have a capacity margin in sugov too, which happens to be 1.25 has well (see map_util_freq()). Conceptually, capacity_margin in fair.c and the sugov margin are both about answering: "do I have enough CPU capacity to serve X of util, or do I need more ?" So perhaps we should factorize the capacity_margin code some more to use it in both places in a consistent way ? This could be done in a separate patch, though. Thanks, Quentin
On 05-06-19, 10:16, Quentin Perret wrote: > Hi Viresh, > > On Tuesday 04 Jun 2019 at 12:31:52 (+0530), Viresh Kumar wrote: > > The same formula to check utilization against capacity (after > > considering capacity_margin) is already used at 5 different locations. > > > > This patch creates a new macro, fits_capacity(), which can be used from > > all these locations without exposing the details of it and hence > > simplify code. > > > > All the 5 code locations are updated as well to use it.. > > > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > > --- > > kernel/sched/fair.c | 14 +++++++------- > > 1 file changed, 7 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > > index 7f8d477f90fe..db3a218b7928 100644 > > --- a/kernel/sched/fair.c > > +++ b/kernel/sched/fair.c > > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu) > > * (default: ~20%) > > */ > > static unsigned int capacity_margin = 1280; > > + > > +#define fits_capacity(cap, max) ((cap) * capacity_margin < (max) * 1024) > > #endif > > > > #ifdef CONFIG_CFS_BANDWIDTH > > @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) > > > > static inline int task_fits_capacity(struct task_struct *p, long capacity) > > { > > - return capacity * 1024 > task_util_est(p) * capacity_margin; > > + return fits_capacity(task_util_est(p), capacity); > > } > > > > static inline void update_misfit_status(struct task_struct *p, struct rq *rq) > > @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu); > > > > static inline bool cpu_overutilized(int cpu) > > { > > - return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); > > + return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); > > This ... > > > } > > > > static inline void update_overutilized_status(struct rq *rq) > > @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) > > /* Skip CPUs that will be overutilized. */ > > util = cpu_util_next(cpu, p, cpu); > > cpu_cap = capacity_of(cpu); > > - if (cpu_cap * 1024 < util * capacity_margin) > > + if (!fits_capacity(util, cpu_cap)) > > ... and this isn't _strictly_ equivalent to the existing code but I > guess we can live with the difference :-) Yes, I missed the == part it seems. Good catch. Though as you said, maybe we don't need to take that into account and can live with the new macro :) > > > continue; > > > > /* Always use prev_cpu as a candidate. */ > > @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) > > static inline bool > > group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > > { > > - return sg->sgc->min_capacity * capacity_margin < > > - ref->sgc->min_capacity * 1024; > > + return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity); > > } > > > > /* > > @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > > static inline bool > > group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > > { > > - return sg->sgc->max_capacity * capacity_margin < > > - ref->sgc->max_capacity * 1024; > > + return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity); > > } > > > > static inline enum > > -- > > 2.21.0.rc0.269.g1a574e7a288b > > > > Also, since we're talking about making the capacity_margin code more > consistent, one small thing I had in mind: we have a capacity margin > in sugov too, which happens to be 1.25 has well (see map_util_freq()). > Conceptually, capacity_margin in fair.c and the sugov margin are both > about answering: "do I have enough CPU capacity to serve X of util, or > do I need more ?" > > So perhaps we should factorize the capacity_margin code some more to use > it in both places in a consistent way ? This could be done in a separate > patch, though. Hmm, even if the values are same currently I am not sure if we want the same for ever. I will write a patch for it though, if Peter/Rafael feel the same as you. Thanks Quentin. -- viresh
On 04-06-19, 08:59, Peter Oskolkov wrote: > On Tue, Jun 4, 2019 at 12:02 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > The same formula to check utilization against capacity (after > > considering capacity_margin) is already used at 5 different locations. > > > > This patch creates a new macro, fits_capacity(), which can be used from > > all these locations without exposing the details of it and hence > > simplify code. > > > > All the 5 code locations are updated as well to use it.. > > > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > > --- > > kernel/sched/fair.c | 14 +++++++------- > > 1 file changed, 7 insertions(+), 7 deletions(-) > > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > > index 7f8d477f90fe..db3a218b7928 100644 > > --- a/kernel/sched/fair.c > > +++ b/kernel/sched/fair.c > > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu) > > * (default: ~20%) > > */ > > static unsigned int capacity_margin = 1280; > > + > > +#define fits_capacity(cap, max) ((cap) * capacity_margin < (max) * 1024) > > Any reason to have this as a macro and not as an inline function? I don't have any strong preference here, I used a macro as I didn't feel that type-checking is really required on the parameters and eventually this will get open coded anyway. Though I would be fine to make it a routine if maintainers want it that way. Thanks Peter. -- viresh
On Thursday 06 Jun 2019 at 08:22:04 (+0530), Viresh Kumar wrote: > On 05-06-19, 10:16, Quentin Perret wrote: > > Hi Viresh, > > > > On Tuesday 04 Jun 2019 at 12:31:52 (+0530), Viresh Kumar wrote: > > > The same formula to check utilization against capacity (after > > > considering capacity_margin) is already used at 5 different locations. > > > > > > This patch creates a new macro, fits_capacity(), which can be used from > > > all these locations without exposing the details of it and hence > > > simplify code. > > > > > > All the 5 code locations are updated as well to use it.. > > > > > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > > > --- > > > kernel/sched/fair.c | 14 +++++++------- > > > 1 file changed, 7 insertions(+), 7 deletions(-) > > > > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > > > index 7f8d477f90fe..db3a218b7928 100644 > > > --- a/kernel/sched/fair.c > > > +++ b/kernel/sched/fair.c > > > @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu) > > > * (default: ~20%) > > > */ > > > static unsigned int capacity_margin = 1280; > > > + > > > +#define fits_capacity(cap, max) ((cap) * capacity_margin < (max) * 1024) > > > #endif > > > > > > #ifdef CONFIG_CFS_BANDWIDTH > > > @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) > > > > > > static inline int task_fits_capacity(struct task_struct *p, long capacity) > > > { > > > - return capacity * 1024 > task_util_est(p) * capacity_margin; > > > + return fits_capacity(task_util_est(p), capacity); > > > } > > > > > > static inline void update_misfit_status(struct task_struct *p, struct rq *rq) > > > @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu); > > > > > > static inline bool cpu_overutilized(int cpu) > > > { > > > - return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); > > > + return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); > > > > This ... > > > > > } > > > > > > static inline void update_overutilized_status(struct rq *rq) > > > @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) > > > /* Skip CPUs that will be overutilized. */ > > > util = cpu_util_next(cpu, p, cpu); > > > cpu_cap = capacity_of(cpu); > > > - if (cpu_cap * 1024 < util * capacity_margin) > > > + if (!fits_capacity(util, cpu_cap)) > > > > ... and this isn't _strictly_ equivalent to the existing code but I > > guess we can live with the difference :-) > > Yes, I missed the == part it seems. Good catch. Though as you said, > maybe we don't need to take that into account and can live with the > new macro :) > > > > > > continue; > > > > > > /* Always use prev_cpu as a candidate. */ > > > @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) > > > static inline bool > > > group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > > > { > > > - return sg->sgc->min_capacity * capacity_margin < > > > - ref->sgc->min_capacity * 1024; > > > + return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity); > > > } > > > > > > /* > > > @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > > > static inline bool > > > group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref) > > > { > > > - return sg->sgc->max_capacity * capacity_margin < > > > - ref->sgc->max_capacity * 1024; > > > + return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity); > > > } > > > > > > static inline enum > > > -- > > > 2.21.0.rc0.269.g1a574e7a288b > > > > > > > Also, since we're talking about making the capacity_margin code more > > consistent, one small thing I had in mind: we have a capacity margin > > in sugov too, which happens to be 1.25 has well (see map_util_freq()). > > Conceptually, capacity_margin in fair.c and the sugov margin are both > > about answering: "do I have enough CPU capacity to serve X of util, or > > do I need more ?" > > > > So perhaps we should factorize the capacity_margin code some more to use > > it in both places in a consistent way ? This could be done in a separate > > patch, though. > > Hmm, even if the values are same currently I am not sure if we want > the same for ever. Right, that's a good point. It is cheaper to raise the freq on a CPU than to migrate a task, so perhaps there could be a case for different thresholds ... > I will write a patch for it though, if Peter/Rafael > feel the same as you. Sounds good, thanks ! Quentin
On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote: > Hmm, even if the values are same currently I am not sure if we want > the same for ever. I will write a patch for it though, if Peter/Rafael > feel the same as you. Is it really the same variable or just two numbers that happen to be the same?
+Rafael On 17-06-19, 17:02, Peter Zijlstra wrote: > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote: > > Hmm, even if the values are same currently I am not sure if we want > > the same for ever. I will write a patch for it though, if Peter/Rafael > > feel the same as you. > > Is it really the same variable or just two numbers that happen to be the > same? In both cases we are trying to keep the load under 80% of what can be supported. But I am not sure of the answer to your question. Maybe Rafael knows :) -- viresh
On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > +Rafael > > On 17-06-19, 17:02, Peter Zijlstra wrote: > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote: > > > Hmm, even if the values are same currently I am not sure if we want > > > the same for ever. I will write a patch for it though, if Peter/Rafael > > > feel the same as you. > > > > Is it really the same variable or just two numbers that happen to be the > > same? > > In both cases we are trying to keep the load under 80% of what can be supported. > But I am not sure of the answer to your question. > > Maybe Rafael knows :) Which variable?
On 18-06-19, 09:26, Rafael J. Wysocki wrote: > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > +Rafael > > > > On 17-06-19, 17:02, Peter Zijlstra wrote: > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote: > > > > Hmm, even if the values are same currently I am not sure if we want > > > > the same for ever. I will write a patch for it though, if Peter/Rafael > > > > feel the same as you. > > > > > > Is it really the same variable or just two numbers that happen to be the > > > same? > > > > In both cases we are trying to keep the load under 80% of what can be supported. > > But I am not sure of the answer to your question. > > > > Maybe Rafael knows :) > > Which variable? Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually) to get enough room for more load and similar thing is done in fair.c at several places to see if the new task can fit in a runqueue without overloading it. Quentin suggested to use common code for this calculation and that is what is getting discussed here. -- viresh
On Tue, Jun 18, 2019 at 9:47 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > On 18-06-19, 09:26, Rafael J. Wysocki wrote: > > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > > > +Rafael > > > > > > On 17-06-19, 17:02, Peter Zijlstra wrote: > > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote: > > > > > Hmm, even if the values are same currently I am not sure if we want > > > > > the same for ever. I will write a patch for it though, if Peter/Rafael > > > > > feel the same as you. > > > > > > > > Is it really the same variable or just two numbers that happen to be the > > > > same? > > > > > > In both cases we are trying to keep the load under 80% of what can be supported. > > > But I am not sure of the answer to your question. > > > > > > Maybe Rafael knows :) > > > > Which variable? > > Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually) > to get enough room for more load and similar thing is done in fair.c at several > places to see if the new task can fit in a runqueue without overloading it. For the schedutil part, see the changelog of the commit that introduced it: 9bdcb44e391d cpufreq: schedutil: New governor based on scheduler utilization data As for the other places, I don't know about the exact reasoning. > Quentin suggested to use common code for this calculation and that is what is > getting discussed here. I guess if the rationale for the formula is the same in all cases, it would be good to consolidate that code and document the rationale while at it. Otherwise, I'm not sure.
On Tuesday 18 Jun 2019 at 13:17:28 (+0530), Viresh Kumar wrote: > On 18-06-19, 09:26, Rafael J. Wysocki wrote: > > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > > > +Rafael > > > > > > On 17-06-19, 17:02, Peter Zijlstra wrote: > > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote: > > > > > Hmm, even if the values are same currently I am not sure if we want > > > > > the same for ever. I will write a patch for it though, if Peter/Rafael > > > > > feel the same as you. > > > > > > > > Is it really the same variable or just two numbers that happen to be the > > > > same? > > > > > > In both cases we are trying to keep the load under 80% of what can be supported. > > > But I am not sure of the answer to your question. > > > > > > Maybe Rafael knows :) > > > > Which variable? > > Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually) > to get enough room for more load and similar thing is done in fair.c at several > places to see if the new task can fit in a runqueue without overloading it. > > Quentin suggested to use common code for this calculation and that is what is > getting discussed here. Right, sugov and load balance happen to use the same margin (1.25) to check if a given util fits in a given capacity, though the thresholds are hardcoded in different places (see map_util_freq() and capacity_margin). So my suggestion was to unify the capacity_margin code for frequency selection and CPU selection, for clarity and consistency. But again, this is a small thing and FWIW Viresh's patch LGTM as-is so no objection from my end if you guys would like to merge it. Thanks, Quentin
On Tuesday 18 Jun 2019 at 10:10:48 (+0200), Rafael J. Wysocki wrote: > On Tue, Jun 18, 2019 at 9:47 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > On 18-06-19, 09:26, Rafael J. Wysocki wrote: > > > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > > > > > +Rafael > > > > > > > > On 17-06-19, 17:02, Peter Zijlstra wrote: > > > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote: > > > > > > Hmm, even if the values are same currently I am not sure if we want > > > > > > the same for ever. I will write a patch for it though, if Peter/Rafael > > > > > > feel the same as you. > > > > > > > > > > Is it really the same variable or just two numbers that happen to be the > > > > > same? > > > > > > > > In both cases we are trying to keep the load under 80% of what can be supported. > > > > But I am not sure of the answer to your question. > > > > > > > > Maybe Rafael knows :) > > > > > > Which variable? > > > > Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually) > > to get enough room for more load and similar thing is done in fair.c at several > > places to see if the new task can fit in a runqueue without overloading it. > > For the schedutil part, see the changelog of the commit that introduced it: > > 9bdcb44e391d cpufreq: schedutil: New governor based on scheduler > utilization data > > As for the other places, I don't know about the exact reasoning. > > > Quentin suggested to use common code for this calculation and that is what is > > getting discussed here. > > I guess if the rationale for the formula is the same in all cases, it > would be good to consolidate that code and document the rationale > while at it. I _think_ it is, but I guess others could correct me if this is incorrect. When choosing a CPU or a frequency using a util value, we look for a capacity that will provide us with 20% of idle time. And in both case we use the same threshold, just hardcoded in different places. Hence the suggestion to unify things. I hope that makes sense :-) Thanks, Quentin
On Tue, Jun 18, 2019 at 10:25 AM Quentin Perret <quentin.perret@arm.com> wrote: > > On Tuesday 18 Jun 2019 at 10:10:48 (+0200), Rafael J. Wysocki wrote: > > On Tue, Jun 18, 2019 at 9:47 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > > > On 18-06-19, 09:26, Rafael J. Wysocki wrote: > > > > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > > > > > > > +Rafael > > > > > > > > > > On 17-06-19, 17:02, Peter Zijlstra wrote: > > > > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote: > > > > > > > Hmm, even if the values are same currently I am not sure if we want > > > > > > > the same for ever. I will write a patch for it though, if Peter/Rafael > > > > > > > feel the same as you. > > > > > > > > > > > > Is it really the same variable or just two numbers that happen to be the > > > > > > same? > > > > > > > > > > In both cases we are trying to keep the load under 80% of what can be supported. > > > > > But I am not sure of the answer to your question. > > > > > > > > > > Maybe Rafael knows :) > > > > > > > > Which variable? > > > > > > Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually) > > > to get enough room for more load and similar thing is done in fair.c at several > > > places to see if the new task can fit in a runqueue without overloading it. > > > > For the schedutil part, see the changelog of the commit that introduced it: > > > > 9bdcb44e391d cpufreq: schedutil: New governor based on scheduler > > utilization data > > > > As for the other places, I don't know about the exact reasoning. > > > > > Quentin suggested to use common code for this calculation and that is what is > > > getting discussed here. > > > > I guess if the rationale for the formula is the same in all cases, it > > would be good to consolidate that code and document the rationale > > while at it. > > I _think_ it is, but I guess others could correct me if this is > incorrect. > > When choosing a CPU or a frequency using a util value, we look for a > capacity that will provide us with 20% of idle time. And in both case we > use the same threshold, just hardcoded in different places. Hence the > suggestion to unify things. > > I hope that makes sense :-) Well, for schedutil, the 1.25 value comes from the case when utilization is not frequency-invariant the next-frequency formula is recursive (the next frequency is proportional to the current one). It is chosen to get the new frequency equal to the old one if (util / max) is .8. That translates to the "capacity that will provide 20% more of idle time" in the frequency-invariant utilization case, but the original rationale was different.
On Tuesday 18 Jun 2019 at 10:36:56 (+0200), Rafael J. Wysocki wrote: > On Tue, Jun 18, 2019 at 10:25 AM Quentin Perret <quentin.perret@arm.com> wrote: > > > > On Tuesday 18 Jun 2019 at 10:10:48 (+0200), Rafael J. Wysocki wrote: > > > On Tue, Jun 18, 2019 at 9:47 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > > > > > On 18-06-19, 09:26, Rafael J. Wysocki wrote: > > > > > On Tue, Jun 18, 2019 at 5:12 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > > > > > > > > > > > +Rafael > > > > > > > > > > > > On 17-06-19, 17:02, Peter Zijlstra wrote: > > > > > > > On Thu, Jun 06, 2019 at 08:22:04AM +0530, Viresh Kumar wrote: > > > > > > > > Hmm, even if the values are same currently I am not sure if we want > > > > > > > > the same for ever. I will write a patch for it though, if Peter/Rafael > > > > > > > > feel the same as you. > > > > > > > > > > > > > > Is it really the same variable or just two numbers that happen to be the > > > > > > > same? > > > > > > > > > > > > In both cases we are trying to keep the load under 80% of what can be supported. > > > > > > But I am not sure of the answer to your question. > > > > > > > > > > > > Maybe Rafael knows :) > > > > > > > > > > Which variable? > > > > > > > > Schedutil multiplies the target frequency by 1.25 (20% more capacity eventually) > > > > to get enough room for more load and similar thing is done in fair.c at several > > > > places to see if the new task can fit in a runqueue without overloading it. > > > > > > For the schedutil part, see the changelog of the commit that introduced it: > > > > > > 9bdcb44e391d cpufreq: schedutil: New governor based on scheduler > > > utilization data > > > > > > As for the other places, I don't know about the exact reasoning. > > > > > > > Quentin suggested to use common code for this calculation and that is what is > > > > getting discussed here. > > > > > > I guess if the rationale for the formula is the same in all cases, it > > > would be good to consolidate that code and document the rationale > > > while at it. > > > > I _think_ it is, but I guess others could correct me if this is > > incorrect. > > > > When choosing a CPU or a frequency using a util value, we look for a > > capacity that will provide us with 20% of idle time. And in both case we > > use the same threshold, just hardcoded in different places. Hence the > > suggestion to unify things. > > > > I hope that makes sense :-) > > Well, for schedutil, the 1.25 value comes from the case when > utilization is not frequency-invariant the next-frequency formula is > recursive (the next frequency is proportional to the current one). It > is chosen to get the new frequency equal to the old one if (util / > max) is .8. That translates to the "capacity that will provide 20% > more of idle time" in the frequency-invariant utilization case, but > the original rationale was different. OK, thanks, I wasn't aware of this. I understood it the other way around, but re-reading the commit message you shared earlier this makes sense. I guess it is also worth mentioning that, to the best of my knowledge, the vast majority of real-world sugov users are in fact frequency invariant. So perhaps there is still a case for the code factorization suggested earlier. But in the end it is really just a cleanup to help maintainability, so if you guys don't buy in there is no point pushing further :-) Thanks, Quentin
On 04-06-19, 12:31, Viresh Kumar wrote: > The same formula to check utilization against capacity (after > considering capacity_margin) is already used at 5 different locations. > > This patch creates a new macro, fits_capacity(), which can be used from > all these locations without exposing the details of it and hence > simplify code. > > All the 5 code locations are updated as well to use it.. > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > --- > kernel/sched/fair.c | 14 +++++++------- > 1 file changed, 7 insertions(+), 7 deletions(-) @Peter: Do you suggest any modifications to this patch? Do I need to resend it ? -- viresh
On Wed, Jul 17, 2019 at 04:03:56PM +0530, Viresh Kumar wrote: > On 04-06-19, 12:31, Viresh Kumar wrote: > > The same formula to check utilization against capacity (after > > considering capacity_margin) is already used at 5 different locations. > > > > This patch creates a new macro, fits_capacity(), which can be used from > > all these locations without exposing the details of it and hence > > simplify code. > > > > All the 5 code locations are updated as well to use it.. > > > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> > > --- > > kernel/sched/fair.c | 14 +++++++------- > > 1 file changed, 7 insertions(+), 7 deletions(-) > > @Peter: Do you suggest any modifications to this patch? Do I need to > resend it ? Ah, I've picked it up with a few (small) edits. Thanks! --- --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -96,12 +96,12 @@ int __weak arch_asym_cpu_priority(int cp } /* - * The margin used when comparing utilization with CPU capacity: - * util * margin < capacity * 1024 + * The margin used when comparing utilization with CPU capacity. * * (default: ~20%) */ -static unsigned int capacity_margin = 1280; +#define fits_capacity(cap, max) ((cap) * 1280 < (max) * 1024) + #endif #ifdef CONFIG_CFS_BANDWIDTH @@ -3754,7 +3754,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, static inline int task_fits_capacity(struct task_struct *p, long capacity) { - return capacity * 1024 > task_util_est(p) * capacity_margin; + return fits_capacity(task_util_est(p), capacity); } static inline void update_misfit_status(struct task_struct *p, struct rq *rq) @@ -5181,7 +5181,7 @@ static inline unsigned long cpu_util(int static inline bool cpu_overutilized(int cpu) { - return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); + return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); } static inline void update_overutilized_status(struct rq *rq) @@ -6402,7 +6402,7 @@ static int find_energy_efficient_cpu(str /* Skip CPUs that will be overutilized. */ util = cpu_util_next(cpu, p, cpu); cpu_cap = capacity_of(cpu); - if (cpu_cap * 1024 < util * capacity_margin) + if (!fits_capacity(util, cpu_cap)) continue; /* Always use prev_cpu as a candidate. */ @@ -7957,8 +7957,7 @@ group_is_overloaded(struct lb_env *env, static inline bool group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) { - return sg->sgc->min_capacity * capacity_margin < - ref->sgc->min_capacity * 1024; + return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity); } /* @@ -7968,8 +7967,7 @@ group_smaller_min_cpu_capacity(struct sc static inline bool group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref) { - return sg->sgc->max_capacity * capacity_margin < - ref->sgc->max_capacity * 1024; + return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity); } static inline enum
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7f8d477f90fe..db3a218b7928 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -102,6 +102,8 @@ int __weak arch_asym_cpu_priority(int cpu) * (default: ~20%) */ static unsigned int capacity_margin = 1280; + +#define fits_capacity(cap, max) ((cap) * capacity_margin < (max) * 1024) #endif #ifdef CONFIG_CFS_BANDWIDTH @@ -3727,7 +3729,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) static inline int task_fits_capacity(struct task_struct *p, long capacity) { - return capacity * 1024 > task_util_est(p) * capacity_margin; + return fits_capacity(task_util_est(p), capacity); } static inline void update_misfit_status(struct task_struct *p, struct rq *rq) @@ -5143,7 +5145,7 @@ static inline unsigned long cpu_util(int cpu); static inline bool cpu_overutilized(int cpu) { - return (capacity_of(cpu) * 1024) < (cpu_util(cpu) * capacity_margin); + return !fits_capacity(cpu_util(cpu), capacity_of(cpu)); } static inline void update_overutilized_status(struct rq *rq) @@ -6304,7 +6306,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) /* Skip CPUs that will be overutilized. */ util = cpu_util_next(cpu, p, cpu); cpu_cap = capacity_of(cpu); - if (cpu_cap * 1024 < util * capacity_margin) + if (!fits_capacity(util, cpu_cap)) continue; /* Always use prev_cpu as a candidate. */ @@ -7853,8 +7855,7 @@ group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs) static inline bool group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) { - return sg->sgc->min_capacity * capacity_margin < - ref->sgc->min_capacity * 1024; + return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity); } /* @@ -7864,8 +7865,7 @@ group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref) static inline bool group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref) { - return sg->sgc->max_capacity * capacity_margin < - ref->sgc->max_capacity * 1024; + return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity); } static inline enum
The same formula to check utilization against capacity (after considering capacity_margin) is already used at 5 different locations. This patch creates a new macro, fits_capacity(), which can be used from all these locations without exposing the details of it and hence simplify code. All the 5 code locations are updated as well to use it.. Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> --- kernel/sched/fair.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) -- 2.21.0.rc0.269.g1a574e7a288b