diff mbox series

[2/5] sched/fair: rename sum_nr_running to sum_h_nr_running

Message ID 1563523105-24673-3-git-send-email-vincent.guittot@linaro.org
State New
Headers show
Series sched/fair: rework the CFS load balance | expand

Commit Message

Vincent Guittot July 19, 2019, 7:58 a.m. UTC
sum_nr_running will track rq->nr_running task and sum_h_nr_running
will track cfs->h_nr_running so we can use both to detect when other
scheduling class are running and preempt CFS.

There is no functional changes.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

---
 kernel/sched/fair.c | 31 +++++++++++++++++--------------
 1 file changed, 17 insertions(+), 14 deletions(-)

-- 
2.7.4

Comments

Peter Zijlstra July 19, 2019, 12:51 p.m. UTC | #1
On Fri, Jul 19, 2019 at 09:58:22AM +0200, Vincent Guittot wrote:
> sum_nr_running will track rq->nr_running task and sum_h_nr_running

> will track cfs->h_nr_running so we can use both to detect when other

> scheduling class are running and preempt CFS.

> 

> There is no functional changes.

> 

> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---

>  kernel/sched/fair.c | 31 +++++++++++++++++--------------

>  1 file changed, 17 insertions(+), 14 deletions(-)

> 

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> index 7a530fd..67f0acd 100644

> --- a/kernel/sched/fair.c

> +++ b/kernel/sched/fair.c

> @@ -7650,6 +7650,7 @@ struct sg_lb_stats {

>  	unsigned long group_capacity;

>  	unsigned long group_util; /* Total utilization of the group */

>  	unsigned int sum_nr_running; /* Nr tasks running in the group */

> +	unsigned int sum_h_nr_running; /* Nr tasks running in the group */

>  	unsigned int idle_cpus;

>  	unsigned int group_weight;

>  	enum group_type group_type;


> @@ -8000,6 +8002,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,

>  

>  		sgs->group_load += cpu_runnable_load(rq);

>  		sgs->group_util += cpu_util(i);

> +		sgs->sum_h_nr_running += rq->cfs.h_nr_running;

>  		sgs->sum_nr_running += rq->cfs.h_nr_running;

>  

>  		nr_running = rq->nr_running;


Maybe completely remove sum_nr_running in this patch, and introduce it
again later when you change what it counts.
Vincent Guittot July 19, 2019, 1:44 p.m. UTC | #2
On Fri, 19 Jul 2019 at 14:51, Peter Zijlstra <peterz@infradead.org> wrote:
>

> On Fri, Jul 19, 2019 at 09:58:22AM +0200, Vincent Guittot wrote:

> > sum_nr_running will track rq->nr_running task and sum_h_nr_running

> > will track cfs->h_nr_running so we can use both to detect when other

> > scheduling class are running and preempt CFS.

> >

> > There is no functional changes.

> >

> > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> > ---

> >  kernel/sched/fair.c | 31 +++++++++++++++++--------------

> >  1 file changed, 17 insertions(+), 14 deletions(-)

> >

> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> > index 7a530fd..67f0acd 100644

> > --- a/kernel/sched/fair.c

> > +++ b/kernel/sched/fair.c

> > @@ -7650,6 +7650,7 @@ struct sg_lb_stats {

> >       unsigned long group_capacity;

> >       unsigned long group_util; /* Total utilization of the group */

> >       unsigned int sum_nr_running; /* Nr tasks running in the group */

> > +     unsigned int sum_h_nr_running; /* Nr tasks running in the group */

> >       unsigned int idle_cpus;

> >       unsigned int group_weight;

> >       enum group_type group_type;

>

> > @@ -8000,6 +8002,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,

> >

> >               sgs->group_load += cpu_runnable_load(rq);

> >               sgs->group_util += cpu_util(i);

> > +             sgs->sum_h_nr_running += rq->cfs.h_nr_running;

> >               sgs->sum_nr_running += rq->cfs.h_nr_running;

> >

> >               nr_running = rq->nr_running;

>

> Maybe completely remove sum_nr_running in this patch, and introduce it

> again later when you change what it counts.


yes
Srikar Dronamraju July 26, 2019, 2:17 a.m. UTC | #3
* Vincent Guittot <vincent.guittot@linaro.org> [2019-07-19 09:58:22]:

> sum_nr_running will track rq->nr_running task and sum_h_nr_running

> will track cfs->h_nr_running so we can use both to detect when other

> scheduling class are running and preempt CFS.

> 

> There is no functional changes.

> 

> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---

>  

>  	ld_moved = 0;

> -	if (busiest->nr_running > 1) {

> +	if (busiest->cfs.h_nr_running > 1) {


We should be looking for nr_running here.
There could be only one cfs task but that may not be the current running
task, so it could be a good one to be picked for load balancing.

No?

>  		/*

>  		 * Attempt to move tasks. If find_busiest_group has found

>  		 * an imbalance but busiest->nr_running <= 1, the group is

> -- 

> 2.7.4

> 


-- 
Thanks and Regards
Srikar Dronamraju
Vincent Guittot July 26, 2019, 8:41 a.m. UTC | #4
On Fri, 26 Jul 2019 at 04:17, Srikar Dronamraju
<srikar@linux.vnet.ibm.com> wrote:
>

> * Vincent Guittot <vincent.guittot@linaro.org> [2019-07-19 09:58:22]:

>

> > sum_nr_running will track rq->nr_running task and sum_h_nr_running

> > will track cfs->h_nr_running so we can use both to detect when other

> > scheduling class are running and preempt CFS.

> >

> > There is no functional changes.

> >

> > Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> > ---

> >

> >       ld_moved = 0;

> > -     if (busiest->nr_running > 1) {

> > +     if (busiest->cfs.h_nr_running > 1) {

>

> We should be looking for nr_running here.

> There could be only one cfs task but that may not be the current running

> task, so it could be a good one to be picked for load balancing.

>

> No?


Yes you're right.
That's what i have done on the new version that I'm preparing


>

> >               /*

> >                * Attempt to move tasks. If find_busiest_group has found

> >                * an imbalance but busiest->nr_running <= 1, the group is

> > --

> > 2.7.4

> >

>

> --

> Thanks and Regards

> Srikar Dronamraju

>
diff mbox series

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7a530fd..67f0acd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7650,6 +7650,7 @@  struct sg_lb_stats {
 	unsigned long group_capacity;
 	unsigned long group_util; /* Total utilization of the group */
 	unsigned int sum_nr_running; /* Nr tasks running in the group */
+	unsigned int sum_h_nr_running; /* Nr tasks running in the group */
 	unsigned int idle_cpus;
 	unsigned int group_weight;
 	enum group_type group_type;
@@ -7695,6 +7696,7 @@  static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
 		.busiest_stat = {
 			.avg_load = 0UL,
 			.sum_nr_running = 0,
+			.sum_h_nr_running = 0,
 			.group_type = group_other,
 		},
 	};
@@ -7885,7 +7887,7 @@  static inline int sg_imbalanced(struct sched_group *group)
 static inline bool
 group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
 {
-	if (sgs->sum_nr_running < sgs->group_weight)
+	if (sgs->sum_h_nr_running < sgs->group_weight)
 		return true;
 
 	if ((sgs->group_capacity * 100) >
@@ -7906,7 +7908,7 @@  group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
 static inline bool
 group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
 {
-	if (sgs->sum_nr_running <= sgs->group_weight)
+	if (sgs->sum_h_nr_running <= sgs->group_weight)
 		return false;
 
 	if ((sgs->group_capacity * 100) <
@@ -8000,6 +8002,7 @@  static inline void update_sg_lb_stats(struct lb_env *env,
 
 		sgs->group_load += cpu_runnable_load(rq);
 		sgs->group_util += cpu_util(i);
+		sgs->sum_h_nr_running += rq->cfs.h_nr_running;
 		sgs->sum_nr_running += rq->cfs.h_nr_running;
 
 		nr_running = rq->nr_running;
@@ -8030,8 +8033,8 @@  static inline void update_sg_lb_stats(struct lb_env *env,
 	sgs->group_capacity = group->sgc->capacity;
 	sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
 
-	if (sgs->sum_nr_running)
-		sgs->load_per_task = sgs->group_load / sgs->sum_nr_running;
+	if (sgs->sum_h_nr_running)
+		sgs->load_per_task = sgs->group_load / sgs->sum_h_nr_running;
 
 	sgs->group_weight = group->group_weight;
 
@@ -8088,7 +8091,7 @@  static bool update_sd_pick_busiest(struct lb_env *env,
 	 * capable CPUs may harm throughput. Maximize throughput,
 	 * power/energy consequences are not considered.
 	 */
-	if (sgs->sum_nr_running <= sgs->group_weight &&
+	if (sgs->sum_h_nr_running <= sgs->group_weight &&
 	    group_smaller_min_cpu_capacity(sds->local, sg))
 		return false;
 
@@ -8119,7 +8122,7 @@  static bool update_sd_pick_busiest(struct lb_env *env,
 	 * perform better since they share less core resources.  Hence when we
 	 * have idle threads, we want them to be the higher ones.
 	 */
-	if (sgs->sum_nr_running &&
+	if (sgs->sum_h_nr_running &&
 	    sched_asym_prefer(env->dst_cpu, sg->asym_prefer_cpu)) {
 		sgs->group_asym_capacity = 1;
 		if (!sds->busiest)
@@ -8137,9 +8140,9 @@  static bool update_sd_pick_busiest(struct lb_env *env,
 #ifdef CONFIG_NUMA_BALANCING
 static inline enum fbq_type fbq_classify_group(struct sg_lb_stats *sgs)
 {
-	if (sgs->sum_nr_running > sgs->nr_numa_running)
+	if (sgs->sum_h_nr_running > sgs->nr_numa_running)
 		return regular;
-	if (sgs->sum_nr_running > sgs->nr_preferred_running)
+	if (sgs->sum_h_nr_running > sgs->nr_preferred_running)
 		return remote;
 	return all;
 }
@@ -8214,7 +8217,7 @@  static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		 */
 		if (prefer_sibling && sds->local &&
 		    group_has_capacity(env, local) &&
-		    (sgs->sum_nr_running > local->sum_nr_running + 1)) {
+		    (sgs->sum_h_nr_running > local->sum_h_nr_running + 1)) {
 			sgs->group_no_capacity = 1;
 			sgs->group_type = group_classify(sg, sgs);
 		}
@@ -8226,7 +8229,7 @@  static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 
 next_group:
 		/* Now, start updating sd_lb_stats */
-		sds->total_running += sgs->sum_nr_running;
+		sds->total_running += sgs->sum_h_nr_running;
 		sds->total_load += sgs->group_load;
 		sds->total_capacity += sgs->group_capacity;
 
@@ -8280,7 +8283,7 @@  void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
 	local = &sds->local_stat;
 	busiest = &sds->busiest_stat;
 
-	if (!local->sum_nr_running)
+	if (!local->sum_h_nr_running)
 		local->load_per_task = cpu_avg_load_per_task(env->dst_cpu);
 	else if (busiest->load_per_task > local->load_per_task)
 		imbn = 1;
@@ -8378,7 +8381,7 @@  static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
 	 */
 	if (busiest->group_type == group_overloaded &&
 	    local->group_type   == group_overloaded) {
-		load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE;
+		load_above_capacity = busiest->sum_h_nr_running * SCHED_CAPACITY_SCALE;
 		if (load_above_capacity > busiest->group_capacity) {
 			load_above_capacity -= busiest->group_capacity;
 			load_above_capacity *= scale_load_down(NICE_0_LOAD);
@@ -8459,7 +8462,7 @@  static struct sched_group *find_busiest_group(struct lb_env *env)
 		goto force_balance;
 
 	/* There is no busy sibling group to pull tasks from */
-	if (!sds.busiest || busiest->sum_nr_running == 0)
+	if (!sds.busiest || busiest->sum_h_nr_running == 0)
 		goto out_balanced;
 
 	/* XXX broken for overlapping NUMA groups */
@@ -8781,7 +8784,7 @@  static int load_balance(int this_cpu, struct rq *this_rq,
 	env.src_rq = busiest;
 
 	ld_moved = 0;
-	if (busiest->nr_running > 1) {
+	if (busiest->cfs.h_nr_running > 1) {
 		/*
 		 * Attempt to move tasks. If find_busiest_group has found
 		 * an imbalance but busiest->nr_running <= 1, the group is