diff mbox series

[v5,03/10] cpufreq/schedutil: add rt utilization tracking

Message ID 1527253951-22709-4-git-send-email-vincent.guittot@linaro.org
State Superseded
Headers show
Series track CPU utilization | expand

Commit Message

Vincent Guittot May 25, 2018, 1:12 p.m. UTC
Add both cfs and rt utilization when selecting an OPP for cfs tasks as rt
can preempt and steal cfs's running time.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

---
 kernel/sched/cpufreq_schedutil.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

-- 
2.7.4

Comments

Viresh Kumar May 30, 2018, 7:03 a.m. UTC | #1
On 25-05-18, 15:12, Vincent Guittot wrote:
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

>  	/*

>  	 * Utilization required by DEADLINE must always be granted while, for

> @@ -197,7 +205,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>  	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet

>  	 * ready for such an interface. So, we only do the latter for now.

>  	 */

> -	return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs));

> +	return min(sg_cpu->max, util);


Need to update comment above this line to include RT in that ?

>  }


-- 
viresh
Vincent Guittot May 30, 2018, 8:23 a.m. UTC | #2
On 30 May 2018 at 09:03, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 25-05-18, 15:12, Vincent Guittot wrote:

>> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

>>       /*

>>        * Utilization required by DEADLINE must always be granted while, for

>> @@ -197,7 +205,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>>        * util_cfs + util_dl as requested freq. However, cpufreq is not yet

>>        * ready for such an interface. So, we only do the latter for now.

>>        */

>> -     return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs));

>> +     return min(sg_cpu->max, util);

>

> Need to update comment above this line to include RT in that ?


yes good point

>

>>  }

>

> --

> viresh
Patrick Bellasi May 30, 2018, 9:40 a.m. UTC | #3
On 25-May 15:12, Vincent Guittot wrote:
> Add both cfs and rt utilization when selecting an OPP for cfs tasks as rt

> can preempt and steal cfs's running time.

> 

> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---

>  kernel/sched/cpufreq_schedutil.c | 14 +++++++++++---

>  1 file changed, 11 insertions(+), 3 deletions(-)

> 

> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

> index 28592b6..a84b5a5 100644

> --- a/kernel/sched/cpufreq_schedutil.c

> +++ b/kernel/sched/cpufreq_schedutil.c

> @@ -56,6 +56,7 @@ struct sugov_cpu {

>  	/* The fields below are only needed when sharing a policy: */

>  	unsigned long		util_cfs;

>  	unsigned long		util_dl;

> +	unsigned long		util_rt;

>  	unsigned long		max;

>  

>  	/* The field below is for single-CPU policies only: */

> @@ -178,14 +179,21 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)

>  	sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);

>  	sg_cpu->util_cfs = cpu_util_cfs(rq);

>  	sg_cpu->util_dl  = cpu_util_dl(rq);

> +	sg_cpu->util_rt  = cpu_util_rt(rq);

>  }

>  

>  static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>  {

>  	struct rq *rq = cpu_rq(sg_cpu->cpu);

> +	unsigned long util;

>  

> -	if (rq->rt.rt_nr_running)

> -		return sg_cpu->max;

> +	if (rq->rt.rt_nr_running) {

> +		util = sg_cpu->max;


Why not just adding the following lines while keeping the return in
for the rq->rt.rt_nr_running case?

> +	} else {

> +		util = sg_cpu->util_dl;

> +		util += sg_cpu->util_cfs;

> +		util += sg_cpu->util_rt;

> +	}

>  

>  	/*

>  	 * Utilization required by DEADLINE must always be granted while, for

> @@ -197,7 +205,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>  	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet

>  	 * ready for such an interface. So, we only do the latter for now.

>  	 */

> -	return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs));

> +	return min(sg_cpu->max, util);


... for the rq->rt.rt_nr_running case we don't really need to min
clamp util = sg_cpu->max with itself...

>  }

>  

>  static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)

> -- 

> 2.7.4

> 


-- 
#include <best/regards.h>

Patrick Bellasi
Vincent Guittot May 30, 2018, 9:53 a.m. UTC | #4
On 30 May 2018 at 11:40, Patrick Bellasi <patrick.bellasi@arm.com> wrote:
> On 25-May 15:12, Vincent Guittot wrote:

>> Add both cfs and rt utilization when selecting an OPP for cfs tasks as rt

>> can preempt and steal cfs's running time.

>>

>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

>> ---

>>  kernel/sched/cpufreq_schedutil.c | 14 +++++++++++---

>>  1 file changed, 11 insertions(+), 3 deletions(-)

>>

>> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

>> index 28592b6..a84b5a5 100644

>> --- a/kernel/sched/cpufreq_schedutil.c

>> +++ b/kernel/sched/cpufreq_schedutil.c

>> @@ -56,6 +56,7 @@ struct sugov_cpu {

>>       /* The fields below are only needed when sharing a policy: */

>>       unsigned long           util_cfs;

>>       unsigned long           util_dl;

>> +     unsigned long           util_rt;

>>       unsigned long           max;

>>

>>       /* The field below is for single-CPU policies only: */

>> @@ -178,14 +179,21 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)

>>       sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);

>>       sg_cpu->util_cfs = cpu_util_cfs(rq);

>>       sg_cpu->util_dl  = cpu_util_dl(rq);

>> +     sg_cpu->util_rt  = cpu_util_rt(rq);

>>  }

>>

>>  static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>>  {

>>       struct rq *rq = cpu_rq(sg_cpu->cpu);

>> +     unsigned long util;

>>

>> -     if (rq->rt.rt_nr_running)

>> -             return sg_cpu->max;

>> +     if (rq->rt.rt_nr_running) {

>> +             util = sg_cpu->max;

>

> Why not just adding the following lines while keeping the return in

> for the rq->rt.rt_nr_running case?

>

>> +     } else {

>> +             util = sg_cpu->util_dl;

>> +             util += sg_cpu->util_cfs;

>> +             util += sg_cpu->util_rt;

>> +     }

>>

>>       /*

>>        * Utilization required by DEADLINE must always be granted while, for

>> @@ -197,7 +205,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>>        * util_cfs + util_dl as requested freq. However, cpufreq is not yet

>>        * ready for such an interface. So, we only do the latter for now.

>>        */

>> -     return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs));

>> +     return min(sg_cpu->max, util);

>

> ... for the rq->rt.rt_nr_running case we don't really need to min

> clamp util = sg_cpu->max with itself...


yes good point

>

>>  }

>>

>>  static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)

>> --

>> 2.7.4

>>

>

> --

> #include <best/regards.h>

>

> Patrick Bellasi
Quentin Perret May 30, 2018, 4:46 p.m. UTC | #5
Hi Vincent,

On Friday 25 May 2018 at 15:12:24 (+0200), Vincent Guittot wrote:
> Add both cfs and rt utilization when selecting an OPP for cfs tasks as rt

> can preempt and steal cfs's running time.

> 

> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---

>  kernel/sched/cpufreq_schedutil.c | 14 +++++++++++---

>  1 file changed, 11 insertions(+), 3 deletions(-)

> 

> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

> index 28592b6..a84b5a5 100644

> --- a/kernel/sched/cpufreq_schedutil.c

> +++ b/kernel/sched/cpufreq_schedutil.c

> @@ -56,6 +56,7 @@ struct sugov_cpu {

>  	/* The fields below are only needed when sharing a policy: */

>  	unsigned long		util_cfs;

>  	unsigned long		util_dl;

> +	unsigned long		util_rt;

>  	unsigned long		max;

>  

>  	/* The field below is for single-CPU policies only: */

> @@ -178,14 +179,21 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)

>  	sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);

>  	sg_cpu->util_cfs = cpu_util_cfs(rq);

>  	sg_cpu->util_dl  = cpu_util_dl(rq);

> +	sg_cpu->util_rt  = cpu_util_rt(rq);

>  }

>  

>  static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>  {

>  	struct rq *rq = cpu_rq(sg_cpu->cpu);

> +	unsigned long util;

>  

> -	if (rq->rt.rt_nr_running)

> -		return sg_cpu->max;

> +	if (rq->rt.rt_nr_running) {

> +		util = sg_cpu->max;


So I understand why we want to got to max freq when a RT task is running,
but I think there are use cases where we might want to be more conservative
and use the util_avg of the RT rq instead. The first use case is
battery-powered devices where going to max isn't really affordable from
an energy standpoint. Android, for example, has been using a RT
utilization signal to select OPPs for quite a while now, because going
to max blindly is _very_ expensive.

And the second use-case is thermal pressure. On some modern CPUs, going to
max freq can lead to stringent thermal capping very quickly, at the
point where your CPUs might not have enough capacity to serve your tasks
properly. And that can ultimately hurt the very RT tasks you originally
tried to run fast. In these systems, in the long term, you'd be better off
not asking for more than what you really need ...

So what about having a sched_feature to select between going to max and
using the RT util_avg ? Obviously the default should keep the current
behaviour.

Thanks,
Quentin
Juri Lelli May 31, 2018, 8:46 a.m. UTC | #6
On 30/05/18 17:46, Quentin Perret wrote:
> Hi Vincent,

> 

> On Friday 25 May 2018 at 15:12:24 (+0200), Vincent Guittot wrote:

> > Add both cfs and rt utilization when selecting an OPP for cfs tasks as rt

> > can preempt and steal cfs's running time.

> > 

> > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> > ---

> >  kernel/sched/cpufreq_schedutil.c | 14 +++++++++++---

> >  1 file changed, 11 insertions(+), 3 deletions(-)

> > 

> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

> > index 28592b6..a84b5a5 100644

> > --- a/kernel/sched/cpufreq_schedutil.c

> > +++ b/kernel/sched/cpufreq_schedutil.c

> > @@ -56,6 +56,7 @@ struct sugov_cpu {

> >  	/* The fields below are only needed when sharing a policy: */

> >  	unsigned long		util_cfs;

> >  	unsigned long		util_dl;

> > +	unsigned long		util_rt;

> >  	unsigned long		max;

> >  

> >  	/* The field below is for single-CPU policies only: */

> > @@ -178,14 +179,21 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)

> >  	sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);

> >  	sg_cpu->util_cfs = cpu_util_cfs(rq);

> >  	sg_cpu->util_dl  = cpu_util_dl(rq);

> > +	sg_cpu->util_rt  = cpu_util_rt(rq);

> >  }

> >  

> >  static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

> >  {

> >  	struct rq *rq = cpu_rq(sg_cpu->cpu);

> > +	unsigned long util;

> >  

> > -	if (rq->rt.rt_nr_running)

> > -		return sg_cpu->max;

> > +	if (rq->rt.rt_nr_running) {

> > +		util = sg_cpu->max;

> 

> So I understand why we want to got to max freq when a RT task is running,

> but I think there are use cases where we might want to be more conservative

> and use the util_avg of the RT rq instead. The first use case is

> battery-powered devices where going to max isn't really affordable from

> an energy standpoint. Android, for example, has been using a RT

> utilization signal to select OPPs for quite a while now, because going

> to max blindly is _very_ expensive.

> 

> And the second use-case is thermal pressure. On some modern CPUs, going to

> max freq can lead to stringent thermal capping very quickly, at the

> point where your CPUs might not have enough capacity to serve your tasks

> properly. And that can ultimately hurt the very RT tasks you originally

> tried to run fast. In these systems, in the long term, you'd be better off

> not asking for more than what you really need ...


Proposed the same at last LPC. Peter NAKed it (since RT is all about
meeting deadlines, and when using FIFO/RR we don't really know how fast
the CPU should go to meet them, so go to max is the only safe decision).

> So what about having a sched_feature to select between going to max and

> using the RT util_avg ? Obviously the default should keep the current

> behaviour.


Peter, would SCHED_FEAT make a difference? :)

Or Patrick's utilization capping applied to RT..
Peter Zijlstra June 1, 2018, 4:23 p.m. UTC | #7
On Thu, May 31, 2018 at 10:46:07AM +0200, Juri Lelli wrote:
> On 30/05/18 17:46, Quentin Perret wrote:


> > So I understand why we want to got to max freq when a RT task is running,

> > but I think there are use cases where we might want to be more conservative

> > and use the util_avg of the RT rq instead. The first use case is

> > battery-powered devices where going to max isn't really affordable from

> > an energy standpoint. Android, for example, has been using a RT

> > utilization signal to select OPPs for quite a while now, because going

> > to max blindly is _very_ expensive.

> > 

> > And the second use-case is thermal pressure. On some modern CPUs, going to

> > max freq can lead to stringent thermal capping very quickly, at the

> > point where your CPUs might not have enough capacity to serve your tasks

> > properly. And that can ultimately hurt the very RT tasks you originally

> > tried to run fast. In these systems, in the long term, you'd be better off

> > not asking for more than what you really need ...

> 

> Proposed the same at last LPC. Peter NAKed it (since RT is all about

> meeting deadlines, and when using FIFO/RR we don't really know how fast

> the CPU should go to meet them, so go to max is the only safe decision).

> 

> > So what about having a sched_feature to select between going to max and

> > using the RT util_avg ? Obviously the default should keep the current

> > behaviour.

> 

> Peter, would SCHED_FEAT make a difference? :)


Hurmph...

> Or Patrick's utilization capping applied to RT..


There might be something there, IIRC that tracks the max potential
utilization for the running tasks. So at that point we can set a
frequency to minimize idle time.

It's not perfect, because while the clamping thing effectively sets a
per-task bandwidth, the max filter is wrong. Also there's no CBS to
enforce anything.

With RT servers we could aggregate the group bandwidth and limit from
that...
Patrick Bellasi June 1, 2018, 5:23 p.m. UTC | #8
On 01-Jun 18:23, Peter Zijlstra wrote:
> On Thu, May 31, 2018 at 10:46:07AM +0200, Juri Lelli wrote:

> > On 30/05/18 17:46, Quentin Perret wrote:

> 

> > > So I understand why we want to got to max freq when a RT task is running,

> > > but I think there are use cases where we might want to be more conservative

> > > and use the util_avg of the RT rq instead. The first use case is

> > > battery-powered devices where going to max isn't really affordable from

> > > an energy standpoint. Android, for example, has been using a RT

> > > utilization signal to select OPPs for quite a while now, because going

> > > to max blindly is _very_ expensive.

> > > 

> > > And the second use-case is thermal pressure. On some modern CPUs, going to

> > > max freq can lead to stringent thermal capping very quickly, at the

> > > point where your CPUs might not have enough capacity to serve your tasks

> > > properly. And that can ultimately hurt the very RT tasks you originally

> > > tried to run fast. In these systems, in the long term, you'd be better off

> > > not asking for more than what you really need ...

> > 

> > Proposed the same at last LPC. Peter NAKed it (since RT is all about

> > meeting deadlines, and when using FIFO/RR we don't really know how fast

> > the CPU should go to meet them, so go to max is the only safe decision).

> > 

> > > So what about having a sched_feature to select between going to max and

> > > using the RT util_avg ? Obviously the default should keep the current

> > > behaviour.

> > 

> > Peter, would SCHED_FEAT make a difference? :)

> 

> Hurmph...

> 

> > Or Patrick's utilization capping applied to RT..

> 

> There might be something there, IIRC that tracks the max potential

> utilization for the running tasks. So at that point we can set a

> frequency to minimize idle time.


Or we can do the opposite: we go to max by default (as it is now) and
if you think that some RT tasks don't need the full speed, you can
apply a util_max to them.

That way, when a RT task is running alone on a CPU, we can run it
only at a custom max freq which is known to be ok according to your
latency requirements.

If instead it's running with other CFS tasks, we add already the CFS
utilization, which will result into a speedup of the RT task to give
back the CPU to CFS.

> It's not perfect, because while the clamping thing effectively sets a

> per-task bandwidth, the max filter is wrong. Also there's no CBS to

> enforce anything.


Right, well... from user-space potentially if you carefully set the RT
cpu's controller (both bandwidth and clamping) and keep track of the
allocated bandwidth, you can still ensure that all your RT tasks will
be able to run, according to their prio.

> With RT servers we could aggregate the group bandwidth and limit from

> that...


What we certainly miss I think it's the EDF scheduler: it's not
possible to run certain RT tasks before others irrespectively of they
relative priority.

-- 
#include <best/regards.h>

Patrick Bellasi
Quentin Perret June 4, 2018, 10:17 a.m. UTC | #9
On Friday 01 Jun 2018 at 18:23:59 (+0100), Patrick Bellasi wrote:
> On 01-Jun 18:23, Peter Zijlstra wrote:

> > On Thu, May 31, 2018 at 10:46:07AM +0200, Juri Lelli wrote:

> > > On 30/05/18 17:46, Quentin Perret wrote:

> > 

> > > > So I understand why we want to got to max freq when a RT task is running,

> > > > but I think there are use cases where we might want to be more conservative

> > > > and use the util_avg of the RT rq instead. The first use case is

> > > > battery-powered devices where going to max isn't really affordable from

> > > > an energy standpoint. Android, for example, has been using a RT

> > > > utilization signal to select OPPs for quite a while now, because going

> > > > to max blindly is _very_ expensive.

> > > > 

> > > > And the second use-case is thermal pressure. On some modern CPUs, going to

> > > > max freq can lead to stringent thermal capping very quickly, at the

> > > > point where your CPUs might not have enough capacity to serve your tasks

> > > > properly. And that can ultimately hurt the very RT tasks you originally

> > > > tried to run fast. In these systems, in the long term, you'd be better off

> > > > not asking for more than what you really need ...

> > > 

> > > Proposed the same at last LPC. Peter NAKed it (since RT is all about

> > > meeting deadlines, and when using FIFO/RR we don't really know how fast

> > > the CPU should go to meet them, so go to max is the only safe decision).

> > > 

> > > > So what about having a sched_feature to select between going to max and

> > > > using the RT util_avg ? Obviously the default should keep the current

> > > > behaviour.

> > > 

> > > Peter, would SCHED_FEAT make a difference? :)

> > 

> > Hurmph...


:)

> > 

> > > Or Patrick's utilization capping applied to RT..

> > 

> > There might be something there, IIRC that tracks the max potential

> > utilization for the running tasks. So at that point we can set a

> > frequency to minimize idle time.

> 

> Or we can do the opposite: we go to max by default (as it is now) and

> if you think that some RT tasks don't need the full speed, you can

> apply a util_max to them.

> 

> That way, when a RT task is running alone on a CPU, we can run it

> only at a custom max freq which is known to be ok according to your

> latency requirements.

> 

> If instead it's running with other CFS tasks, we add already the CFS

> utilization, which will result into a speedup of the RT task to give

> back the CPU to CFS.


Hmmm why not setting a util_min constraint instead ? The default for a
RT task should be a util_min of 1024, which means go to max. And then
if userspace has knowledge about the tasks it could decide to lower the
util_min value. That way, you would still let the util_avg grow if a
RT task runs flat out for a long time, and you would still be able to go
to higher frequencies. But if the util of the RT tasks is very low, you
wouldn't necessarily run at max freq, you would run at the freq matching
the util_min constraint.

So you probably want to: 1) forbid setting max_util constraints for RT;
2) have schedutil look at the min-capped RT util if rt_nr_running > 0;
and 3) have schedutil look at the non-capped RT util if rt_nr_running == 0.

Does that make any sense ?

Thanks,
Quentin

> 

> > It's not perfect, because while the clamping thing effectively sets a

> > per-task bandwidth, the max filter is wrong. Also there's no CBS to

> > enforce anything.

> 

> Right, well... from user-space potentially if you carefully set the RT

> cpu's controller (both bandwidth and clamping) and keep track of the

> allocated bandwidth, you can still ensure that all your RT tasks will

> be able to run, according to their prio.

> 

> > With RT servers we could aggregate the group bandwidth and limit from

> > that...

> 

> What we certainly miss I think it's the EDF scheduler: it's not

> possible to run certain RT tasks before others irrespectively of they

> relative priority.

> 

> -- 

> #include <best/regards.h>

> 

> Patrick Bellasi
Patrick Bellasi June 4, 2018, 3:16 p.m. UTC | #10
On 04-Jun 11:17, Quentin Perret wrote:
> On Friday 01 Jun 2018 at 18:23:59 (+0100), Patrick Bellasi wrote:

> > On 01-Jun 18:23, Peter Zijlstra wrote:

> > > On Thu, May 31, 2018 at 10:46:07AM +0200, Juri Lelli wrote:

> > > > On 30/05/18 17:46, Quentin Perret wrote:


[...]

> > > There might be something there, IIRC that tracks the max potential

> > > utilization for the running tasks. So at that point we can set a

> > > frequency to minimize idle time.

> > 

> > Or we can do the opposite: we go to max by default (as it is now) and

> > if you think that some RT tasks don't need the full speed, you can

> > apply a util_max to them.

> > 

> > That way, when a RT task is running alone on a CPU, we can run it

> > only at a custom max freq which is known to be ok according to your

> > latency requirements.

> > 

> > If instead it's running with other CFS tasks, we add already the CFS

> > utilization, which will result into a speedup of the RT task to give

> > back the CPU to CFS.

> 

> Hmmm why not setting a util_min constraint instead ? The default for a

> RT task should be a util_min of 1024, which means go to max. And then

> if userspace has knowledge about the tasks it could decide to lower the

> util_min value. That way, you would still let the util_avg grow if a

> RT task runs flat out for a long time, and you would still be able to go

> to higher frequencies. But if the util of the RT tasks is very low, you

> wouldn't necessarily run at max freq, you would run at the freq matching

> the util_min constraint.

> 

> So you probably want to: 1) forbid setting max_util constraints for RT;

> 2) have schedutil look at the min-capped RT util if rt_nr_running > 0;

> and 3) have schedutil look at the non-capped RT util if rt_nr_running == 0.

> 

> Does that make any sense ?


I would say that it "could" make sense... it really depends on
use-space IMO. You could have long running RT tasks which still you
don't want to run at max OPP for power/energy concerns, maybe?

Anyway, the good point is that this is a user-space policy.

Right now we do not do anything special for RT task from the
util_clamp side.  The user-space is in charge to configure it
correctly, and it could also very well decide to use different clamps
for different RT tasks, maybe.
Thus, I would probably avoid the special cases you describe in the
above last two points.

-- 
#include <best/regards.h>

Patrick Bellasi
diff mbox series

Patch

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 28592b6..a84b5a5 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -56,6 +56,7 @@  struct sugov_cpu {
 	/* The fields below are only needed when sharing a policy: */
 	unsigned long		util_cfs;
 	unsigned long		util_dl;
+	unsigned long		util_rt;
 	unsigned long		max;
 
 	/* The field below is for single-CPU policies only: */
@@ -178,14 +179,21 @@  static void sugov_get_util(struct sugov_cpu *sg_cpu)
 	sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
 	sg_cpu->util_cfs = cpu_util_cfs(rq);
 	sg_cpu->util_dl  = cpu_util_dl(rq);
+	sg_cpu->util_rt  = cpu_util_rt(rq);
 }
 
 static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 {
 	struct rq *rq = cpu_rq(sg_cpu->cpu);
+	unsigned long util;
 
-	if (rq->rt.rt_nr_running)
-		return sg_cpu->max;
+	if (rq->rt.rt_nr_running) {
+		util = sg_cpu->max;
+	} else {
+		util = sg_cpu->util_dl;
+		util += sg_cpu->util_cfs;
+		util += sg_cpu->util_rt;
+	}
 
 	/*
 	 * Utilization required by DEADLINE must always be granted while, for
@@ -197,7 +205,7 @@  static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
 	 * ready for such an interface. So, we only do the latter for now.
 	 */
-	return min(sg_cpu->max, (sg_cpu->util_dl + sg_cpu->util_cfs));
+	return min(sg_cpu->max, util);
 }
 
 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)