[07/11] cpufreq/schedutil: take into account interrupt

Message ID 1530200714-4504-8-git-send-email-vincent.guittot@linaro.org
State New
Headers show
Series
  • track CPU utilization
Related show

Commit Message

Vincent Guittot June 28, 2018, 3:45 p.m.
The time spent under interrupt can be significant but it is not reflected
in the utilization of CPU when deciding to choose an OPP. Now that we have
access to this metric, schedutil can take it into account when selecting
the OPP for a CPU.
rqs utilization don't see the time spend under interrupt context and report
their value in the normal context time window. We need to compensate this when
adding interrupt utilization

The CPU utilization is :
  irq util_avg + (1 - irq util_avg / max capacity ) * /Sum rq util_avg

A test with iperf on hikey (octo arm64) gives:
iperf -c server_address -r -t 5

w/o patch		w/ patch
Tx 276 Mbits/sec        304 Mbits/sec +10%
Rx 299 Mbits/sec        328 Mbits/sec +09%

8 iterations
stdev is lower than 1%
Only WFI idle state is enable (shallowest diel state)

Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

---
 kernel/sched/cpufreq_schedutil.c | 25 +++++++++++++++++++++----
 kernel/sched/sched.h             | 13 +++++++++++++
 2 files changed, 34 insertions(+), 4 deletions(-)

-- 
2.7.4

Comments

Viresh Kumar July 6, 2018, 6 a.m. | #1
On 28-06-18, 17:45, Vincent Guittot wrote:
> The time spent under interrupt can be significant but it is not reflected

> in the utilization of CPU when deciding to choose an OPP. Now that we have

> access to this metric, schedutil can take it into account when selecting

> the OPP for a CPU.

> rqs utilization don't see the time spend under interrupt context and report

> their value in the normal context time window. We need to compensate this when

> adding interrupt utilization

> 

> The CPU utilization is :

>   irq util_avg + (1 - irq util_avg / max capacity ) * /Sum rq util_avg

> 

> A test with iperf on hikey (octo arm64) gives:

> iperf -c server_address -r -t 5

> 

> w/o patch		w/ patch

> Tx 276 Mbits/sec        304 Mbits/sec +10%

> Rx 299 Mbits/sec        328 Mbits/sec +09%

> 

> 8 iterations

> stdev is lower than 1%

> Only WFI idle state is enable (shallowest diel state)

> 

> Cc: Ingo Molnar <mingo@redhat.com>

> Cc: Peter Zijlstra <peterz@infradead.org>

> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---

>  kernel/sched/cpufreq_schedutil.c | 25 +++++++++++++++++++++----

>  kernel/sched/sched.h             | 13 +++++++++++++

>  2 files changed, 34 insertions(+), 4 deletions(-)

> 

> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

> index edfbfc1..b77bfef 100644

> --- a/kernel/sched/cpufreq_schedutil.c

> +++ b/kernel/sched/cpufreq_schedutil.c

> @@ -58,6 +58,7 @@ struct sugov_cpu {

>  	unsigned long		util_dl;

>  	unsigned long		bw_dl;

>  	unsigned long		util_rt;

> +	unsigned long		util_irq;

>  	unsigned long		max;

>  

>  	/* The field below is for single-CPU policies only: */

> @@ -190,21 +191,30 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)

>  	sg_cpu->util_dl  = cpu_util_dl(rq);

>  	sg_cpu->bw_dl    = cpu_bw_dl(rq);

>  	sg_cpu->util_rt  = cpu_util_rt(rq);

> +	sg_cpu->util_irq = cpu_util_irq(rq);

>  }

>  

>  static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>  {

>  	struct rq *rq = cpu_rq(sg_cpu->cpu);

> -	unsigned long util;

> +	unsigned long util, max = sg_cpu->max;

>  

>  	if (rq->rt.rt_nr_running)

>  		return sg_cpu->max;

>  

> +	if (unlikely(sg_cpu->util_irq >= max))

> +		return max;

> +

> +	/* Sum rq utilization */

>  	util = sg_cpu->util_cfs;

>  	util += sg_cpu->util_rt;

>  

> -	if ((util + sg_cpu->util_dl) >= sg_cpu->max)

> -		return sg_cpu->max;

> +	/*

> +	 * Interrupt time is not seen by rqs utilization nso we can compare


                                                         nso ?

> +	 * them with the CPU capacity

> +	 */

> +	if ((util + sg_cpu->util_dl) >= max)

> +		return max;

>  

>  	/*

>  	 * As there is still idle time on the CPU, we need to compute the

> @@ -220,10 +230,17 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>  	 * ready for such an interface. So, we only do the latter for now.

>  	 */

>  

> +	/* Weight rqs utilization to normal context window */

> +	util *= (max - sg_cpu->util_irq);

> +	util /= max;

> +

> +	/* Add interrupt utilization */

> +	util += sg_cpu->util_irq;

> +

>  	/* Add DL bandwidth requirement */

>  	util += sg_cpu->bw_dl;

>  

> -	return min(sg_cpu->max, util);

> +	return min(max, util);

>  }

>  

>  /**

> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

> index 377be2b..9438e68 100644

> --- a/kernel/sched/sched.h

> +++ b/kernel/sched/sched.h

> @@ -2221,4 +2221,17 @@ static inline unsigned long cpu_util_rt(struct rq *rq)

>  {

>  	return rq->avg_rt.util_avg;

>  }

> +

> +#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)

> +static inline unsigned long cpu_util_irq(struct rq *rq)

> +{

> +	return rq->avg_irq.util_avg;

> +}

> +#else

> +static inline unsigned long cpu_util_irq(struct rq *rq)

> +{

> +	return 0;

> +}

> +

> +#endif

>  #endif


Acked-by: Viresh Kumar <viresh.kumar@linaro.org>


-- 
viresh
Peter Zijlstra July 6, 2018, 9:14 a.m. | #2
On Fri, Jul 06, 2018 at 11:30:33AM +0530, Viresh Kumar wrote:
> On 28-06-18, 17:45, Vincent Guittot wrote:

> > The time spent under interrupt can be significant but it is not reflected

> > in the utilization of CPU when deciding to choose an OPP. Now that we have

> > access to this metric, schedutil can take it into account when selecting

> > the OPP for a CPU.

> > rqs utilization don't see the time spend under interrupt context and report

> > their value in the normal context time window. We need to compensate this when

> > adding interrupt utilization

> > 

> > The CPU utilization is :

> >   irq util_avg + (1 - irq util_avg / max capacity ) * /Sum rq util_avg

> > 

> > A test with iperf on hikey (octo arm64) gives:

> > iperf -c server_address -r -t 5

> > 

> > w/o patch		w/ patch

> > Tx 276 Mbits/sec        304 Mbits/sec +10%

> > Rx 299 Mbits/sec        328 Mbits/sec +09%

> > 

> > 8 iterations

> > stdev is lower than 1%

> > Only WFI idle state is enable (shallowest diel state)


Also s/diel/idle/

> > +	/*

> > +	 * Interrupt time is not seen by rqs utilization nso we can compare

> 

>                                                          nso ?

> 

> > +	 * them with the CPU capacity

> > +	 */


Already fixed ;-)
Vincent Guittot July 6, 2018, 9:21 a.m. | #3
On Fri, 6 Jul 2018 at 11:14, Peter Zijlstra <peterz@infradead.org> wrote:
>

> On Fri, Jul 06, 2018 at 11:30:33AM +0530, Viresh Kumar wrote:

> > On 28-06-18, 17:45, Vincent Guittot wrote:

> > > The time spent under interrupt can be significant but it is not reflected

> > > in the utilization of CPU when deciding to choose an OPP. Now that we have

> > > access to this metric, schedutil can take it into account when selecting

> > > the OPP for a CPU.

> > > rqs utilization don't see the time spend under interrupt context and report

> > > their value in the normal context time window. We need to compensate this when

> > > adding interrupt utilization

> > >

> > > The CPU utilization is :

> > >   irq util_avg + (1 - irq util_avg / max capacity ) * /Sum rq util_avg

> > >

> > > A test with iperf on hikey (octo arm64) gives:

> > > iperf -c server_address -r -t 5

> > >

> > > w/o patch           w/ patch

> > > Tx 276 Mbits/sec        304 Mbits/sec +10%

> > > Rx 299 Mbits/sec        328 Mbits/sec +09%

> > >

> > > 8 iterations

> > > stdev is lower than 1%

> > > Only WFI idle state is enable (shallowest diel state)

>

> Also s/diel/idle/

>

> > > +   /*

> > > +    * Interrupt time is not seen by rqs utilization nso we can compare

> >

> >                                                          nso ?

> >

> > > +    * them with the CPU capacity

> > > +    */

>

> Already fixed ;-)


Thanks

Patch

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index edfbfc1..b77bfef 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -58,6 +58,7 @@  struct sugov_cpu {
 	unsigned long		util_dl;
 	unsigned long		bw_dl;
 	unsigned long		util_rt;
+	unsigned long		util_irq;
 	unsigned long		max;
 
 	/* The field below is for single-CPU policies only: */
@@ -190,21 +191,30 @@  static void sugov_get_util(struct sugov_cpu *sg_cpu)
 	sg_cpu->util_dl  = cpu_util_dl(rq);
 	sg_cpu->bw_dl    = cpu_bw_dl(rq);
 	sg_cpu->util_rt  = cpu_util_rt(rq);
+	sg_cpu->util_irq = cpu_util_irq(rq);
 }
 
 static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 {
 	struct rq *rq = cpu_rq(sg_cpu->cpu);
-	unsigned long util;
+	unsigned long util, max = sg_cpu->max;
 
 	if (rq->rt.rt_nr_running)
 		return sg_cpu->max;
 
+	if (unlikely(sg_cpu->util_irq >= max))
+		return max;
+
+	/* Sum rq utilization */
 	util = sg_cpu->util_cfs;
 	util += sg_cpu->util_rt;
 
-	if ((util + sg_cpu->util_dl) >= sg_cpu->max)
-		return sg_cpu->max;
+	/*
+	 * Interrupt time is not seen by rqs utilization nso we can compare
+	 * them with the CPU capacity
+	 */
+	if ((util + sg_cpu->util_dl) >= max)
+		return max;
 
 	/*
 	 * As there is still idle time on the CPU, we need to compute the
@@ -220,10 +230,17 @@  static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 	 * ready for such an interface. So, we only do the latter for now.
 	 */
 
+	/* Weight rqs utilization to normal context window */
+	util *= (max - sg_cpu->util_irq);
+	util /= max;
+
+	/* Add interrupt utilization */
+	util += sg_cpu->util_irq;
+
 	/* Add DL bandwidth requirement */
 	util += sg_cpu->bw_dl;
 
-	return min(sg_cpu->max, util);
+	return min(max, util);
 }
 
 /**
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 377be2b..9438e68 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2221,4 +2221,17 @@  static inline unsigned long cpu_util_rt(struct rq *rq)
 {
 	return rq->avg_rt.util_avg;
 }
+
+#if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
+static inline unsigned long cpu_util_irq(struct rq *rq)
+{
+	return rq->avg_irq.util_avg;
+}
+#else
+static inline unsigned long cpu_util_irq(struct rq *rq)
+{
+	return 0;
+}
+
+#endif
 #endif