[RESEND,v4,2/3] sched/rt: Fix wrong SMP scheduler behavior for equal prio cases

Message ID 1425886348-3191-2-git-send-email-xlpang@126.com
State New
Headers show

Commit Message

Xunlei Pang March 9, 2015, 7:32 a.m.
From: Xunlei Pang <pang.xunlei@linaro.org>

Currently, SMP RT scheduler has some trouble in dealing with
equal prio cases.

For example, in check_preempt_equal_prio():
When RT1(current task) gets preempted by RT2, if there is a
migratable RT3 with same prio, RT3 will be pushed away instead
of RT1 afterwards, because RT1 will be enqueued to the tail of
the pushable list when going through succeeding put_prev_task_rt()
triggered by resched. This broke FIFO.

Furthermore, this is also problematic for normal preempted cases
if there're some rt tasks queued with the same prio as current.
Because current will be put behind these tasks in the pushable
queue.

So, if a task is running and gets preempted by a higher priority
task (or even with same priority for migrating), this patch ensures
that it is put ahead of any existing task with the same priority in
the pushable queue.

Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
---
 kernel/sched/rt.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

Comments

pang.xunlei March 27, 2015, 2:39 p.m. | #1
Ping Steve

> From: Xunlei Pang <pang.xunlei@linaro.org>
>
> Currently, SMP RT scheduler has some trouble in dealing with
> equal prio cases.
>
> For example, in check_preempt_equal_prio():
> When RT1(current task) gets preempted by RT2, if there is a
> migratable RT3 with same prio, RT3 will be pushed away instead
> of RT1 afterwards, because RT1 will be enqueued to the tail of
> the pushable list when going through succeeding put_prev_task_rt()
> triggered by resched. This broke FIFO.
>
> Furthermore, this is also problematic for normal preempted cases
> if there're some rt tasks queued with the same prio as current.
> Because current will be put behind these tasks in the pushable
> queue.
>
> So, if a task is running and gets preempted by a higher priority
> task (or even with same priority for migrating), this patch ensures
> that it is put ahead of any existing task with the same priority in
> the pushable queue.
>
> Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
> ---
>  kernel/sched/rt.c | 23 ++++++++++++++++-------
>  1 file changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
> index f4d4b07..86cd79f 100644
> --- a/kernel/sched/rt.c
> +++ b/kernel/sched/rt.c
> @@ -347,11 +347,15 @@ static inline void set_post_schedule(struct rq *rq)
>         rq->post_schedule = has_pushable_tasks(rq);
>  }
>
> -static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
> +static void enqueue_pushable_task(struct rq *rq,
> +                               struct task_struct *p, bool head)
>  {
>         plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
>         plist_node_init(&p->pushable_tasks, p->prio);
> -       plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
> +       if (head)
> +               plist_add_head(&p->pushable_tasks, &rq->rt.pushable_tasks);
> +       else
> +               plist_add_tail(&p->pushable_tasks, &rq->rt.pushable_tasks);
>
>         /* Update the highest prio pushable task */
>         if (p->prio < rq->rt.highest_prio.next)
> @@ -373,7 +377,8 @@ static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
>
>  #else
>
> -static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
> +static inline void enqueue_pushable_task(struct rq *rq,
> +                                       struct task_struct *p, bool head)
>  {
>  }
>
> @@ -1248,7 +1253,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
>         enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
>
>         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
> -               enqueue_pushable_task(rq, p);
> +               enqueue_pushable_task(rq, p, false);
>  }
>
>  static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
> @@ -1494,8 +1499,12 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
>          * The previous task needs to be made eligible for pushing
>          * if it is still active
>          */
> -       if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
> -               enqueue_pushable_task(rq, p);
> +       if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) {
> +               if (task_running(rq, p) && (preempt_count() & PREEMPT_ACTIVE))
> +                       enqueue_pushable_task(rq, p, true);
> +               else
> +                       enqueue_pushable_task(rq, p, false);
> +       }
>  }
>
>  #ifdef CONFIG_SMP
> @@ -1914,7 +1923,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
>                 rq->rt.rt_nr_migratory--;
>         } else {
>                 if (!task_current(rq, p))
> -                       enqueue_pushable_task(rq, p);
> +                       enqueue_pushable_task(rq, p, false);
>                 rq->rt.rt_nr_migratory++;
>         }
>
> --
> 1.9.1
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
pang.xunlei April 1, 2015, 2:41 a.m. | #2
On 27 March 2015 at 23:28, Steven Rostedt <rostedt@goodmis.org> wrote:
> On Mon,  9 Mar 2015 15:32:27 +0800
> Xunlei Pang <xlpang@126.com> wrote:
>
>> From: Xunlei Pang <pang.xunlei@linaro.org>
>>
>> Currently, SMP RT scheduler has some trouble in dealing with
>> equal prio cases.
>>
>> For example, in check_preempt_equal_prio():
>> When RT1(current task) gets preempted by RT2, if there is a
>> migratable RT3 with same prio, RT3 will be pushed away instead
>> of RT1 afterwards, because RT1 will be enqueued to the tail of
>> the pushable list when going through succeeding put_prev_task_rt()
>> triggered by resched. This broke FIFO.
>>
>> Furthermore, this is also problematic for normal preempted cases
>> if there're some rt tasks queued with the same prio as current.
>> Because current will be put behind these tasks in the pushable
>> queue.
>>
>> So, if a task is running and gets preempted by a higher priority
>> task (or even with same priority for migrating), this patch ensures
>> that it is put ahead of any existing task with the same priority in
>> the pushable queue.
>>
>> Signed-off-by: Xunlei Pang <pang.xunlei@linaro.org>
>> ---
>>  kernel/sched/rt.c | 23 ++++++++++++++++-------
>>  1 file changed, 16 insertions(+), 7 deletions(-)
>>
>> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
>> index f4d4b07..86cd79f 100644
>> --- a/kernel/sched/rt.c
>> +++ b/kernel/sched/rt.c
>> @@ -347,11 +347,15 @@ static inline void set_post_schedule(struct rq *rq)
>>       rq->post_schedule = has_pushable_tasks(rq);
>>  }
>>
>> -static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
>> +static void enqueue_pushable_task(struct rq *rq,
>> +                             struct task_struct *p, bool head)
>
> Nit.
>
> static void
> enqueue_pushable_task(struct rq *rq, struct task_struct *p, bool head)
>
> Is a better breaking of the line.

I'll adjust it to be one line.

>
>>  {
>>       plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
>>       plist_node_init(&p->pushable_tasks, p->prio);
>> -     plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
>> +     if (head)
>> +             plist_add_head(&p->pushable_tasks, &rq->rt.pushable_tasks);
>> +     else
>> +             plist_add_tail(&p->pushable_tasks, &rq->rt.pushable_tasks);
>>
>>       /* Update the highest prio pushable task */
>>       if (p->prio < rq->rt.highest_prio.next)
>> @@ -373,7 +377,8 @@ static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
>>
>>  #else
>>
>> -static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
>> +static inline void enqueue_pushable_task(struct rq *rq,
>> +                                     struct task_struct *p, bool head)
>
> Same here.
>
>>  {
>>  }
>>
>> @@ -1248,7 +1253,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
>>       enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
>>
>>       if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
>> -             enqueue_pushable_task(rq, p);
>> +             enqueue_pushable_task(rq, p, false);
>>  }
>>
>>  static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
>> @@ -1494,8 +1499,12 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
>>        * The previous task needs to be made eligible for pushing
>>        * if it is still active
>>        */
>> -     if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
>> -             enqueue_pushable_task(rq, p);
>> +     if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) {
>> +             if (task_running(rq, p) && (preempt_count() & PREEMPT_ACTIVE))
>
> put_prev_task_rt() is called by put_prev_task() which is called by
> several functions: rt_mutex_setprio(), __sched_setscheduler(),
> sched_setnuma(), migrate_tasks(), and sched_move_task(). It's not part
> of being preempted.
>
> Now it is also called by pick_next_task_rt() which I'm assuming is what
> you want it to affect.
>
> The above definitely needs a comment about what it is doing. Also, I'm
> not so sure we care about testing task_running(). I'm thinking the
> check for PREEMPT_ACTIVE is good enough, as that would only be set from
> being called within preempt_schedule().

Indeed.

>
> Also, we could get rid of the if statement and do:
>
>         enqueue_pushable_task(rq, p, !!(preempt_count() & PREEMPT_ACTIVE));

Agree, will do. Thanks.

>
>
> -- Steve
>
>> +                     enqueue_pushable_task(rq, p, true);
>> +             else
>> +                     enqueue_pushable_task(rq, p, false);
>> +     }
>>  }
>>
>>  #ifdef CONFIG_SMP
>> @@ -1914,7 +1923,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
>>               rq->rt.rt_nr_migratory--;
>>       } else {
>>               if (!task_current(rq, p))
>> -                     enqueue_pushable_task(rq, p);
>> +                     enqueue_pushable_task(rq, p, false);
>>               rq->rt.rt_nr_migratory++;
>>       }
>>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Patch

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index f4d4b07..86cd79f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -347,11 +347,15 @@  static inline void set_post_schedule(struct rq *rq)
 	rq->post_schedule = has_pushable_tasks(rq);
 }
 
-static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
+static void enqueue_pushable_task(struct rq *rq,
+				struct task_struct *p, bool head)
 {
 	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
 	plist_node_init(&p->pushable_tasks, p->prio);
-	plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
+	if (head)
+		plist_add_head(&p->pushable_tasks, &rq->rt.pushable_tasks);
+	else
+		plist_add_tail(&p->pushable_tasks, &rq->rt.pushable_tasks);
 
 	/* Update the highest prio pushable task */
 	if (p->prio < rq->rt.highest_prio.next)
@@ -373,7 +377,8 @@  static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
 
 #else
 
-static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
+static inline void enqueue_pushable_task(struct rq *rq,
+					struct task_struct *p, bool head)
 {
 }
 
@@ -1248,7 +1253,7 @@  enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
 
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
-		enqueue_pushable_task(rq, p);
+		enqueue_pushable_task(rq, p, false);
 }
 
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
@@ -1494,8 +1499,12 @@  static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
 	 */
-	if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
-		enqueue_pushable_task(rq, p);
+	if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) {
+		if (task_running(rq, p) && (preempt_count() & PREEMPT_ACTIVE))
+			enqueue_pushable_task(rq, p, true);
+		else
+			enqueue_pushable_task(rq, p, false);
+	}
 }
 
 #ifdef CONFIG_SMP
@@ -1914,7 +1923,7 @@  static void set_cpus_allowed_rt(struct task_struct *p,
 		rq->rt.rt_nr_migratory--;
 	} else {
 		if (!task_current(rq, p))
-			enqueue_pushable_task(rq, p);
+			enqueue_pushable_task(rq, p, false);
 		rq->rt.rt_nr_migratory++;
 	}