diff mbox series

[RFC,4/4] sched, rt: support schedstat for RT sched class

Message ID 20201119035230.45330-5-laoar.shao@gmail.com
State New
Headers show
Series sched: support schedstat for RT sched class | expand

Commit Message

Yafang Shao Nov. 19, 2020, 3:52 a.m. UTC
We want to measure the latency of RT tasks in our production
environment with schedstat facility, but currently schedstat is only
supported for fair sched class. This patch enable it for RT sched class
as well.

The schedstat statistics are define in struct sched_entity, which is a
member of struct task_struct, so we can resue it for RT sched class.

The schedstat usage in RT sched class is similar with fair sched class,
for example,
		fair				RT
enqueue		update_stats_enqueue_fair	update_stats_enqueue_rt
dequeue		update_stats_dequeue_fair	update_stats_dequeue_rt
put_prev_task	update_stats_wait_start		update_stats_wait_start
set_next_task	update_stats_wait_end		update_stats_wait_end
show		/proc/[pid]/sched		/proc/[pid]/sched

The sched:sched_stats_* tracepoints can be used to trace RT tasks as
well.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 kernel/sched/rt.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |  2 ++
 2 files changed, 63 insertions(+)

Comments

jun qian Nov. 20, 2020, 2:39 a.m. UTC | #1
Yafang Shao <laoar.shao@gmail.com> 于2020年11月19日周四 上午11:55写道:
>

> We want to measure the latency of RT tasks in our production

> environment with schedstat facility, but currently schedstat is only

> supported for fair sched class. This patch enable it for RT sched class

> as well.

>

> The schedstat statistics are define in struct sched_entity, which is a

> member of struct task_struct, so we can resue it for RT sched class.

>

> The schedstat usage in RT sched class is similar with fair sched class,

> for example,

>                 fair                            RT

> enqueue         update_stats_enqueue_fair       update_stats_enqueue_rt

> dequeue         update_stats_dequeue_fair       update_stats_dequeue_rt

> put_prev_task   update_stats_wait_start         update_stats_wait_start

> set_next_task   update_stats_wait_end           update_stats_wait_end

> show            /proc/[pid]/sched               /proc/[pid]/sched

>

> The sched:sched_stats_* tracepoints can be used to trace RT tasks as

> well.

>

> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

> ---

>  kernel/sched/rt.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++

>  kernel/sched/sched.h |  2 ++

>  2 files changed, 63 insertions(+)

>

> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

> index b9ec886702a1..a318236b7166 100644

> --- a/kernel/sched/rt.c

> +++ b/kernel/sched/rt.c

> @@ -1246,6 +1246,46 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)

>         dec_rt_group(rt_se, rt_rq);

>  }

>


Does the deadline schedule class should be considered also?

thanks

> +static inline void

> +update_stats_enqueue_rt(struct rq *rq, struct sched_entity *se,

> +                       struct sched_rt_entity *rt_se, int flags)

> +{

> +       struct rt_rq *rt_rq = &rq->rt;

> +

> +       if (!schedstat_enabled())

> +               return;

> +

> +       if (rt_se != rt_rq->curr)

> +               update_stats_wait_start(rq, se);

> +

> +       if (flags & ENQUEUE_WAKEUP)

> +               update_stats_enqueue_sleeper(rq, se);

> +}

> +

> +static inline void

> +update_stats_dequeue_rt(struct rq *rq, struct sched_entity *se,

> +                       struct sched_rt_entity *rt_se, int flags)

> +{

> +       struct rt_rq *rt_rq = &rq->rt;

> +

> +       if (!schedstat_enabled())

> +               return;

> +

> +       if (rt_se != rt_rq->curr)

> +               update_stats_wait_end(rq, se);

> +

> +       if ((flags & DEQUEUE_SLEEP) && rt_entity_is_task(rt_se)) {

> +               struct task_struct *tsk = rt_task_of(rt_se);

> +

> +               if (tsk->state & TASK_INTERRUPTIBLE)

> +                       __schedstat_set(se->statistics.sleep_start,

> +                                       rq_clock(rq));

> +               if (tsk->state & TASK_UNINTERRUPTIBLE)

> +                       __schedstat_set(se->statistics.block_start,

> +                                       rq_clock(rq));

> +       }

> +}

> +

>  /*

>   * Change rt_se->run_list location unless SAVE && !MOVE

>   *

> @@ -1275,6 +1315,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag

>         struct rt_prio_array *array = &rt_rq->active;

>         struct rt_rq *group_rq = group_rt_rq(rt_se);

>         struct list_head *queue = array->queue + rt_se_prio(rt_se);

> +       struct task_struct *task = rt_task_of(rt_se);

>

>         /*

>          * Don't enqueue the group if its throttled, or when empty.

> @@ -1288,6 +1329,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag

>                 return;

>         }

>

> +       update_stats_enqueue_rt(rq_of_rt_rq(rt_rq), &task->se, rt_se, flags);

> +

>         if (move_entity(flags)) {

>                 WARN_ON_ONCE(rt_se->on_list);

>                 if (flags & ENQUEUE_HEAD)

> @@ -1307,7 +1350,9 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag

>  {

>         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);

>         struct rt_prio_array *array = &rt_rq->active;

> +       struct task_struct *task = rt_task_of(rt_se);

>

> +       update_stats_dequeue_rt(rq_of_rt_rq(rt_rq), &task->se, rt_se, flags);

>         if (move_entity(flags)) {

>                 WARN_ON_ONCE(!rt_se->on_list);

>                 __delist_rt_entity(rt_se, array);

> @@ -1374,6 +1419,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)

>         if (flags & ENQUEUE_WAKEUP)

>                 rt_se->timeout = 0;

>

> +       check_schedstat_required();

>         enqueue_rt_entity(rt_se, flags);

>

>         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)

> @@ -1574,6 +1620,12 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag

>

>  static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)

>  {

> +       struct sched_rt_entity *rt_se = &p->rt;

> +       struct rt_rq *rt_rq = &rq->rt;

> +

> +       if (on_rt_rq(&p->rt))

> +               update_stats_wait_end(rq, &p->se);

> +

>         update_stats_curr_start(rq, &p->se);

>

>         /* The running task is never eligible for pushing */

> @@ -1591,6 +1643,8 @@ static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f

>                 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);

>

>         rt_queue_push_tasks(rq);

> +

> +       rt_rq->curr = rt_se;

>  }

>

>  static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,

> @@ -1638,6 +1692,11 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)

>

>  static void put_prev_task_rt(struct rq *rq, struct task_struct *p)

>  {

> +       struct rt_rq *rt_rq = &rq->rt;

> +

> +       if (on_rt_rq(&p->rt))

> +               update_stats_wait_start(rq, &p->se);

> +

>         update_curr_rt(rq);

>

>         update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);

> @@ -1648,6 +1707,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)

>          */

>         if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)

>                 enqueue_pushable_task(rq, p);

> +

> +       rt_rq->curr = NULL;

>  }

>

>  #ifdef CONFIG_SMP

> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

> index 28986736ced9..7787afbd5723 100644

> --- a/kernel/sched/sched.h

> +++ b/kernel/sched/sched.h

> @@ -649,6 +649,8 @@ struct rt_rq {

>         struct rq               *rq;

>         struct task_group       *tg;

>  #endif

> +

> +       struct sched_rt_entity  *curr;

>  };

>

>  static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)

> --

> 2.18.4

>
Yafang Shao Nov. 21, 2020, 4:36 a.m. UTC | #2
On Fri, Nov 20, 2020 at 10:39 AM jun qian <qianjun.kernel@gmail.com> wrote:
>

> Yafang Shao <laoar.shao@gmail.com> 于2020年11月19日周四 上午11:55写道:

> >

> > We want to measure the latency of RT tasks in our production

> > environment with schedstat facility, but currently schedstat is only

> > supported for fair sched class. This patch enable it for RT sched class

> > as well.

> >

> > The schedstat statistics are define in struct sched_entity, which is a

> > member of struct task_struct, so we can resue it for RT sched class.

> >

> > The schedstat usage in RT sched class is similar with fair sched class,

> > for example,

> >                 fair                            RT

> > enqueue         update_stats_enqueue_fair       update_stats_enqueue_rt

> > dequeue         update_stats_dequeue_fair       update_stats_dequeue_rt

> > put_prev_task   update_stats_wait_start         update_stats_wait_start

> > set_next_task   update_stats_wait_end           update_stats_wait_end

> > show            /proc/[pid]/sched               /proc/[pid]/sched

> >

> > The sched:sched_stats_* tracepoints can be used to trace RT tasks as

> > well.

> >

> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>

> > ---

> >  kernel/sched/rt.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++

> >  kernel/sched/sched.h |  2 ++

> >  2 files changed, 63 insertions(+)

> >

> > diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

> > index b9ec886702a1..a318236b7166 100644

> > --- a/kernel/sched/rt.c

> > +++ b/kernel/sched/rt.c

> > @@ -1246,6 +1246,46 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)

> >         dec_rt_group(rt_se, rt_rq);

> >  }

> >

>

> Does the deadline schedule class should be considered also?

>


deadline sched class can be supported as well per my understanding, I
think we can do it later.
This patchset only aims to support RT sched class.

> thanks

>

> > +static inline void

> > +update_stats_enqueue_rt(struct rq *rq, struct sched_entity *se,

> > +                       struct sched_rt_entity *rt_se, int flags)

> > +{

> > +       struct rt_rq *rt_rq = &rq->rt;

> > +

> > +       if (!schedstat_enabled())

> > +               return;

> > +

> > +       if (rt_se != rt_rq->curr)

> > +               update_stats_wait_start(rq, se);

> > +

> > +       if (flags & ENQUEUE_WAKEUP)

> > +               update_stats_enqueue_sleeper(rq, se);

> > +}

> > +

> > +static inline void

> > +update_stats_dequeue_rt(struct rq *rq, struct sched_entity *se,

> > +                       struct sched_rt_entity *rt_se, int flags)

> > +{

> > +       struct rt_rq *rt_rq = &rq->rt;

> > +

> > +       if (!schedstat_enabled())

> > +               return;

> > +

> > +       if (rt_se != rt_rq->curr)

> > +               update_stats_wait_end(rq, se);

> > +

> > +       if ((flags & DEQUEUE_SLEEP) && rt_entity_is_task(rt_se)) {

> > +               struct task_struct *tsk = rt_task_of(rt_se);

> > +

> > +               if (tsk->state & TASK_INTERRUPTIBLE)

> > +                       __schedstat_set(se->statistics.sleep_start,

> > +                                       rq_clock(rq));

> > +               if (tsk->state & TASK_UNINTERRUPTIBLE)

> > +                       __schedstat_set(se->statistics.block_start,

> > +                                       rq_clock(rq));

> > +       }

> > +}

> > +

> >  /*

> >   * Change rt_se->run_list location unless SAVE && !MOVE

> >   *

> > @@ -1275,6 +1315,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag

> >         struct rt_prio_array *array = &rt_rq->active;

> >         struct rt_rq *group_rq = group_rt_rq(rt_se);

> >         struct list_head *queue = array->queue + rt_se_prio(rt_se);

> > +       struct task_struct *task = rt_task_of(rt_se);

> >

> >         /*

> >          * Don't enqueue the group if its throttled, or when empty.

> > @@ -1288,6 +1329,8 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag

> >                 return;

> >         }

> >

> > +       update_stats_enqueue_rt(rq_of_rt_rq(rt_rq), &task->se, rt_se, flags);

> > +

> >         if (move_entity(flags)) {

> >                 WARN_ON_ONCE(rt_se->on_list);

> >                 if (flags & ENQUEUE_HEAD)

> > @@ -1307,7 +1350,9 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag

> >  {

> >         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);

> >         struct rt_prio_array *array = &rt_rq->active;

> > +       struct task_struct *task = rt_task_of(rt_se);

> >

> > +       update_stats_dequeue_rt(rq_of_rt_rq(rt_rq), &task->se, rt_se, flags);

> >         if (move_entity(flags)) {

> >                 WARN_ON_ONCE(!rt_se->on_list);

> >                 __delist_rt_entity(rt_se, array);

> > @@ -1374,6 +1419,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)

> >         if (flags & ENQUEUE_WAKEUP)

> >                 rt_se->timeout = 0;

> >

> > +       check_schedstat_required();

> >         enqueue_rt_entity(rt_se, flags);

> >

> >         if (!task_current(rq, p) && p->nr_cpus_allowed > 1)

> > @@ -1574,6 +1620,12 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag

> >

> >  static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)

> >  {

> > +       struct sched_rt_entity *rt_se = &p->rt;

> > +       struct rt_rq *rt_rq = &rq->rt;

> > +

> > +       if (on_rt_rq(&p->rt))

> > +               update_stats_wait_end(rq, &p->se);

> > +

> >         update_stats_curr_start(rq, &p->se);

> >

> >         /* The running task is never eligible for pushing */

> > @@ -1591,6 +1643,8 @@ static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f

> >                 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);

> >

> >         rt_queue_push_tasks(rq);

> > +

> > +       rt_rq->curr = rt_se;

> >  }

> >

> >  static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,

> > @@ -1638,6 +1692,11 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)

> >

> >  static void put_prev_task_rt(struct rq *rq, struct task_struct *p)

> >  {

> > +       struct rt_rq *rt_rq = &rq->rt;

> > +

> > +       if (on_rt_rq(&p->rt))

> > +               update_stats_wait_start(rq, &p->se);

> > +

> >         update_curr_rt(rq);

> >

> >         update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);

> > @@ -1648,6 +1707,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)

> >          */

> >         if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)

> >                 enqueue_pushable_task(rq, p);

> > +

> > +       rt_rq->curr = NULL;

> >  }

> >

> >  #ifdef CONFIG_SMP

> > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

> > index 28986736ced9..7787afbd5723 100644

> > --- a/kernel/sched/sched.h

> > +++ b/kernel/sched/sched.h

> > @@ -649,6 +649,8 @@ struct rt_rq {

> >         struct rq               *rq;

> >         struct task_group       *tg;

> >  #endif

> > +

> > +       struct sched_rt_entity  *curr;

> >  };

> >

> >  static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)

> > --

> > 2.18.4

> >




-- 
Thanks
Yafang
diff mbox series

Patch

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index b9ec886702a1..a318236b7166 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1246,6 +1246,46 @@  void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 	dec_rt_group(rt_se, rt_rq);
 }
 
+static inline void
+update_stats_enqueue_rt(struct rq *rq, struct sched_entity *se,
+			struct sched_rt_entity *rt_se, int flags)
+{
+	struct rt_rq *rt_rq = &rq->rt;
+
+	if (!schedstat_enabled())
+		return;
+
+	if (rt_se != rt_rq->curr)
+		update_stats_wait_start(rq, se);
+
+	if (flags & ENQUEUE_WAKEUP)
+		update_stats_enqueue_sleeper(rq, se);
+}
+
+static inline void
+update_stats_dequeue_rt(struct rq *rq, struct sched_entity *se,
+			struct sched_rt_entity *rt_se, int flags)
+{
+	struct rt_rq *rt_rq = &rq->rt;
+
+	if (!schedstat_enabled())
+		return;
+
+	if (rt_se != rt_rq->curr)
+		update_stats_wait_end(rq, se);
+
+	if ((flags & DEQUEUE_SLEEP) && rt_entity_is_task(rt_se)) {
+		struct task_struct *tsk = rt_task_of(rt_se);
+
+		if (tsk->state & TASK_INTERRUPTIBLE)
+			__schedstat_set(se->statistics.sleep_start,
+					rq_clock(rq));
+		if (tsk->state & TASK_UNINTERRUPTIBLE)
+			__schedstat_set(se->statistics.block_start,
+					rq_clock(rq));
+	}
+}
+
 /*
  * Change rt_se->run_list location unless SAVE && !MOVE
  *
@@ -1275,6 +1315,7 @@  static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
 	struct rt_prio_array *array = &rt_rq->active;
 	struct rt_rq *group_rq = group_rt_rq(rt_se);
 	struct list_head *queue = array->queue + rt_se_prio(rt_se);
+	struct task_struct *task = rt_task_of(rt_se);
 
 	/*
 	 * Don't enqueue the group if its throttled, or when empty.
@@ -1288,6 +1329,8 @@  static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
 		return;
 	}
 
+	update_stats_enqueue_rt(rq_of_rt_rq(rt_rq), &task->se, rt_se, flags);
+
 	if (move_entity(flags)) {
 		WARN_ON_ONCE(rt_se->on_list);
 		if (flags & ENQUEUE_HEAD)
@@ -1307,7 +1350,9 @@  static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag
 {
 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
 	struct rt_prio_array *array = &rt_rq->active;
+	struct task_struct *task = rt_task_of(rt_se);
 
+	update_stats_dequeue_rt(rq_of_rt_rq(rt_rq), &task->se, rt_se, flags);
 	if (move_entity(flags)) {
 		WARN_ON_ONCE(!rt_se->on_list);
 		__delist_rt_entity(rt_se, array);
@@ -1374,6 +1419,7 @@  enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 	if (flags & ENQUEUE_WAKEUP)
 		rt_se->timeout = 0;
 
+	check_schedstat_required();
 	enqueue_rt_entity(rt_se, flags);
 
 	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
@@ -1574,6 +1620,12 @@  static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flag
 
 static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first)
 {
+	struct sched_rt_entity *rt_se = &p->rt;
+	struct rt_rq *rt_rq = &rq->rt;
+
+	if (on_rt_rq(&p->rt))
+		update_stats_wait_end(rq, &p->se);
+
 	update_stats_curr_start(rq, &p->se);
 
 	/* The running task is never eligible for pushing */
@@ -1591,6 +1643,8 @@  static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f
 		update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0);
 
 	rt_queue_push_tasks(rq);
+
+	rt_rq->curr = rt_se;
 }
 
 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
@@ -1638,6 +1692,11 @@  static struct task_struct *pick_next_task_rt(struct rq *rq)
 
 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 {
+	struct rt_rq *rt_rq = &rq->rt;
+
+	if (on_rt_rq(&p->rt))
+		update_stats_wait_start(rq, &p->se);
+
 	update_curr_rt(rq);
 
 	update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1);
@@ -1648,6 +1707,8 @@  static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 	 */
 	if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
+
+	rt_rq->curr = NULL;
 }
 
 #ifdef CONFIG_SMP
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 28986736ced9..7787afbd5723 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -649,6 +649,8 @@  struct rt_rq {
 	struct rq		*rq;
 	struct task_group	*tg;
 #endif
+
+	struct sched_rt_entity  *curr;
 };
 
 static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)