diff mbox series

[v2] sched/rt: fix call to cpufreq_update_util

Message ID 1526541403-16380-1-git-send-email-vincent.guittot@linaro.org
State New
Headers show
Series [v2] sched/rt: fix call to cpufreq_update_util | expand

Commit Message

Vincent Guittot May 17, 2018, 7:16 a.m. UTC
With commit 8f111bc357aa ("cpufreq/schedutil: Rewrite CPUFREQ_RT support")
schedutil governor uses rq->rt.rt_nr_running to detect whether a RT task is
currently running on the CPU and to set frequency to max if necessary.
cpufreq_update_util() is called in enqueue/dequeue_top_rt_rq() but
rq->rt.rt_nr_running as not been updated yet when dequeue_top_rt_rq() is
called so schedutil still considers that a RT task is running when the
last task is dequeued. The update of rq->rt.rt_nr_running happens later
in dequeue_rt_stack()

Fixes: 8f111bc357aa ('cpufreq/schedutil: Rewrite CPUFREQ_RT support')
Cc: <stable@vger.kernel.org> # v4.16+
Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

---
- v2:
  - remove blank line

 kernel/sched/rt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

-- 
2.7.4

Comments

Peter Zijlstra May 17, 2018, 9:05 a.m. UTC | #1
On Thu, May 17, 2018 at 09:16:43AM +0200, Vincent Guittot wrote:
> With commit

>

>  8f111bc357aa ("cpufreq/schedutil: Rewrite CPUFREQ_RT support")

>

> schedutil governor uses rq->rt.rt_nr_running to detect whether a RT task is

> currently running on the CPU and to set frequency to max if necessary.

>

> cpufreq_update_util() is called in enqueue/dequeue_top_rt_rq() but

> rq->rt.rt_nr_running as not been updated yet when dequeue_top_rt_rq() is

> called so schedutil still considers that a RT task is running when the

> last task is dequeued. The update of rq->rt.rt_nr_running happens later

> in dequeue_rt_stack()

> 

> Fixes: 8f111bc357aa ('cpufreq/schedutil: Rewrite CPUFREQ_RT support')

> Cc: <stable@vger.kernel.org> # v4.16+

> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---

> - v2:

>   - remove blank line

> 

>  kernel/sched/rt.c | 6 +++---

>  1 file changed, 3 insertions(+), 3 deletions(-)

> 

> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

> index 7aef6b4..a9f1119 100644

> --- a/kernel/sched/rt.c

> +++ b/kernel/sched/rt.c

> @@ -1000,9 +1000,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)

>  

>  	sub_nr_running(rq, rt_rq->rt_nr_running);

>  	rt_rq->rt_queued = 0;

> -

> -	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */

> -	cpufreq_update_util(rq, 0);

>  }

>  

>  static void

> @@ -1288,6 +1285,9 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)

>  		if (on_rt_rq(rt_se))

>  			__dequeue_rt_entity(rt_se, flags);

>  	}

> +

> +	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */

> +	cpufreq_update_util(rq_of_rt_rq(rt_rq_of_se(back)), 0);

>  }


Hurm.. I think this is also wrong. See how dequeue_rt_stack() is also
called from the enqueue path. Also, nothing calls cpufreq_update_util()
on the throttle path now.

And talking about throttle; I think we wanted to check rt_queued in that
case.

How about the below?

---
 kernel/sched/cpufreq_schedutil.c |  2 +-
 kernel/sched/rt.c                | 24 ++++++++++++++----------
 kernel/sched/sched.h             |  5 +++++
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e13df951aca7..1751f9630e49 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -185,7 +185,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 	struct rq *rq = cpu_rq(sg_cpu->cpu);
 	unsigned long util;
 
-	if (rq->rt.rt_nr_running) {
+	if (rt_rq_is_runnable(&rq->rt)) {
 		util = sg_cpu->max;
 	} else {
 		util = sg_cpu->util_dl;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7aef6b4e885a..a1108a7da777 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -979,8 +979,14 @@ static void update_curr_rt(struct rq *rq)
 		if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
 			raw_spin_lock(&rt_rq->rt_runtime_lock);
 			rt_rq->rt_time += delta_exec;
-			if (sched_rt_runtime_exceeded(rt_rq))
+			if (sched_rt_runtime_exceeded(rt_rq)) {
+				struct rq *rq = rq_of_rt_rq(rt_rq);
+
+				if (&rq->rt == rt_rq)
+					cpufreq_update_util(rq, 0);
+
 				resched_curr(rq);
+			}
 			raw_spin_unlock(&rt_rq->rt_runtime_lock);
 		}
 	}
@@ -1000,9 +1006,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
 
 	sub_nr_running(rq, rt_rq->rt_nr_running);
 	rt_rq->rt_queued = 0;
-
-	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-	cpufreq_update_util(rq, 0);
 }
 
 static void
@@ -1019,9 +1022,6 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
 
 	add_nr_running(rq, rt_rq->rt_nr_running);
 	rt_rq->rt_queued = 1;
-
-	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-	cpufreq_update_util(rq, 0);
 }
 
 #if defined CONFIG_SMP
@@ -1330,6 +1330,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
+
+	cpufreq_update_util(rq, 0);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -1340,6 +1342,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	dequeue_rt_entity(rt_se, flags);
 
 	dequeue_pushable_task(rq, p);
+
+	cpufreq_update_util(rq, 0);
 }
 
 /*
@@ -1533,6 +1537,9 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	struct task_struct *p;
 	struct rt_rq *rt_rq = &rq->rt;
 
+	if (!rt_rq->rt_queued)
+		return NULL;
+
 	if (need_pull_rt_task(rq, prev)) {
 		/*
 		 * This is OK, because current is on_cpu, which avoids it being
@@ -1560,9 +1567,6 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
 	if (prev->sched_class == &rt_sched_class)
 		update_curr_rt(rq);
 
-	if (!rt_rq->rt_queued)
-		return NULL;
-
 	put_prev_task(rq, prev);
 
 	p = _pick_next_task_rt(rq);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c9895d35c5f7..bec7f2eecaf3 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -609,6 +609,11 @@ struct rt_rq {
 #endif
 };
 
+static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
+{
+	return rq_rq->rt_queued && rt_rq->rt_nr_running;
+}
+
 /* Deadline class' related fields in a runqueue */
 struct dl_rq {
 	/* runqueue is an rbtree, ordered by deadline */
Peter Zijlstra May 17, 2018, 9:32 a.m. UTC | #2
On Thu, May 17, 2018 at 11:05:30AM +0200, Peter Zijlstra wrote:
> Hurm.. I think this is also wrong. See how dequeue_rt_stack() is also

> called from the enqueue path. Also, nothing calls cpufreq_update_util()

> on the throttle path now.

> 

> And talking about throttle; I think we wanted to check rt_queued in that

> case.


Bah, clearly you also want to update on unthrottle.. but I think there's
more buggered wrt throtteling.
Vincent Guittot May 17, 2018, 8:12 p.m. UTC | #3
Le Thursday 17 May 2018 à 11:32:06 (+0200), Peter Zijlstra a écrit :
> On Thu, May 17, 2018 at 11:05:30AM +0200, Peter Zijlstra wrote:

> > Hurm.. I think this is also wrong. See how dequeue_rt_stack() is also

> > called from the enqueue path. Also, nothing calls cpufreq_update_util()

> > on the throttle path now.


Yes I missed the point that rt_entities were dequeued then re-enqueued when
a task was either enqueued or dequeued.

That's also means that enqueue_top_rt_rq() is always called when a task is
enqueued or dequeued and also when groups are throttled or unthrottled
In fact, the only point where it's not called, is when root rt_rq is
throttled. So I have prepared the patch below which call cpufreq_update util
in enqueue_top_rt_rq() and also when we throttle the root rt_rq.
According to the tests I have done , it seems to work for enqueue/dequeue and
throttle/unthrottle use cases.

I have re-used your rt_rq_is_runnable() which takes into account rt_queued

---
 kernel/sched/cpufreq_schedutil.c |  2 +-
 kernel/sched/rt.c                | 16 ++++++++++------
 kernel/sched/sched.h             |  5 +++++
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index e13df95..1751f96 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -185,7 +185,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
 	struct rq *rq = cpu_rq(sg_cpu->cpu);
 	unsigned long util;
 
-	if (rq->rt.rt_nr_running) {
+	if (rt_rq_is_runnable(&rq->rt)) {
 		util = sg_cpu->max;
 	} else {
 		util = sg_cpu->util_dl;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7aef6b4..d6b9517 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
 
 	rt_se = rt_rq->tg->rt_se[cpu];
 
-	if (!rt_se)
+	if (!rt_se) {
 		dequeue_top_rt_rq(rt_rq);
+		/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+		cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);
+	}
 	else if (on_rt_rq(rt_se))
 		dequeue_rt_entity(rt_se, 0);
 }
@@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)
 	sub_nr_running(rq, rt_rq->rt_nr_running);
 	rt_rq->rt_queued = 0;
 
-	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-	cpufreq_update_util(rq, 0);
 }
 
 static void
@@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)
 
 	if (rt_rq->rt_queued)
 		return;
-	if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running)
+
+	if (rt_rq_throttled(rt_rq))
 		return;
 
-	add_nr_running(rq, rt_rq->rt_nr_running);
-	rt_rq->rt_queued = 1;
+	if (rt_rq->rt_nr_running) {
+		add_nr_running(rq, rt_rq->rt_nr_running);
+		rt_rq->rt_queued = 1;
+	}
 
 	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
 	cpufreq_update_util(rq, 0);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c9895d3..bbebcfe 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -609,6 +609,11 @@ struct rt_rq {
 #endif
 };
 
+static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
+{
+	return rt_rq->rt_queued && rt_rq->rt_nr_running;
+}
+
 /* Deadline class' related fields in a runqueue */
 struct dl_rq {
 	/* runqueue is an rbtree, ordered by deadline */
-- 
2.7.4





> > 

> > And talking about throttle; I think we wanted to check rt_queued in that

> > case.

> 

> Bah, clearly you also want to update on unthrottle.. but I think there's

> more buggered wrt throtteling.
Vincent Guittot June 1, 2018, 2:37 p.m. UTC | #4
Hi Peter,

On 17 May 2018 at 22:12, Vincent Guittot <vincent.guittot@linaro.org> wrote:
> Le Thursday 17 May 2018 à 11:32:06 (+0200), Peter Zijlstra a écrit :

>> On Thu, May 17, 2018 at 11:05:30AM +0200, Peter Zijlstra wrote:

>> > Hurm.. I think this is also wrong. See how dequeue_rt_stack() is also

>> > called from the enqueue path. Also, nothing calls cpufreq_update_util()

>> > on the throttle path now.

>

> Yes I missed the point that rt_entities were dequeued then re-enqueued when

> a task was either enqueued or dequeued.

>

> That's also means that enqueue_top_rt_rq() is always called when a task is

> enqueued or dequeued and also when groups are throttled or unthrottled

> In fact, the only point where it's not called, is when root rt_rq is

> throttled. So I have prepared the patch below which call cpufreq_update util

> in enqueue_top_rt_rq() and also when we throttle the root rt_rq.

> According to the tests I have done , it seems to work for enqueue/dequeue and

> throttle/unthrottle use cases.

>

> I have re-used your rt_rq_is_runnable() which takes into account rt_queued


What is the status for this problem ?

The patch below conflict with
b29bbbbff0d6 ('sched/cpufreq: always consider blocked FAIR utilization')
that has been merged in tip/sched/core

Do you want me to rebase it  ?

Regards,
Vincent

>

> ---

>  kernel/sched/cpufreq_schedutil.c |  2 +-

>  kernel/sched/rt.c                | 16 ++++++++++------

>  kernel/sched/sched.h             |  5 +++++

>  3 files changed, 16 insertions(+), 7 deletions(-)

>

> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

> index e13df95..1751f96 100644

> --- a/kernel/sched/cpufreq_schedutil.c

> +++ b/kernel/sched/cpufreq_schedutil.c

> @@ -185,7 +185,7 @@ static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)

>         struct rq *rq = cpu_rq(sg_cpu->cpu);

>         unsigned long util;

>

> -       if (rq->rt.rt_nr_running) {

> +       if (rt_rq_is_runnable(&rq->rt)) {

>                 util = sg_cpu->max;

>         } else {

>                 util = sg_cpu->util_dl;

> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

> index 7aef6b4..d6b9517 100644

> --- a/kernel/sched/rt.c

> +++ b/kernel/sched/rt.c

> @@ -508,8 +508,11 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)

>

>         rt_se = rt_rq->tg->rt_se[cpu];

>

> -       if (!rt_se)

> +       if (!rt_se) {

>                 dequeue_top_rt_rq(rt_rq);

> +               /* Kick cpufreq (see the comment in kernel/sched/sched.h). */

> +               cpufreq_update_util(rq_of_rt_rq(rt_rq), 0);

> +       }

>         else if (on_rt_rq(rt_se))

>                 dequeue_rt_entity(rt_se, 0);

>  }

> @@ -1001,8 +1004,6 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq)

>         sub_nr_running(rq, rt_rq->rt_nr_running);

>         rt_rq->rt_queued = 0;

>

> -       /* Kick cpufreq (see the comment in kernel/sched/sched.h). */

> -       cpufreq_update_util(rq, 0);

>  }

>

>  static void

> @@ -1014,11 +1015,14 @@ enqueue_top_rt_rq(struct rt_rq *rt_rq)

>

>         if (rt_rq->rt_queued)

>                 return;

> -       if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running)

> +

> +       if (rt_rq_throttled(rt_rq))

>                 return;

>

> -       add_nr_running(rq, rt_rq->rt_nr_running);

> -       rt_rq->rt_queued = 1;

> +       if (rt_rq->rt_nr_running) {

> +               add_nr_running(rq, rt_rq->rt_nr_running);

> +               rt_rq->rt_queued = 1;

> +       }

>

>         /* Kick cpufreq (see the comment in kernel/sched/sched.h). */

>         cpufreq_update_util(rq, 0);

> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

> index c9895d3..bbebcfe 100644

> --- a/kernel/sched/sched.h

> +++ b/kernel/sched/sched.h

> @@ -609,6 +609,11 @@ struct rt_rq {

>  #endif

>  };

>

> +static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)

> +{

> +       return rt_rq->rt_queued && rt_rq->rt_nr_running;

> +}

> +

>  /* Deadline class' related fields in a runqueue */

>  struct dl_rq {

>         /* runqueue is an rbtree, ordered by deadline */

> --

> 2.7.4

>

>

>

>

>

>> >

>> > And talking about throttle; I think we wanted to check rt_queued in that

>> > case.

>>

>> Bah, clearly you also want to update on unthrottle.. but I think there's

>> more buggered wrt throtteling.

>

>

>

>
diff mbox series

Patch

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 7aef6b4..a9f1119 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1000,9 +1000,6 @@  dequeue_top_rt_rq(struct rt_rq *rt_rq)
 
 	sub_nr_running(rq, rt_rq->rt_nr_running);
 	rt_rq->rt_queued = 0;
-
-	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-	cpufreq_update_util(rq, 0);
 }
 
 static void
@@ -1288,6 +1285,9 @@  static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags)
 		if (on_rt_rq(rt_se))
 			__dequeue_rt_entity(rt_se, flags);
 	}
+
+	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
+	cpufreq_update_util(rq_of_rt_rq(rt_rq_of_se(back)), 0);
 }
 
 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags)