@@ -7650,6 +7650,7 @@ struct sg_lb_stats {
unsigned long group_capacity;
unsigned long group_util; /* Total utilization of the group */
unsigned int sum_nr_running; /* Nr tasks running in the group */
+ unsigned int sum_h_nr_running; /* Nr tasks running in the group */
unsigned int idle_cpus;
unsigned int group_weight;
enum group_type group_type;
@@ -7695,6 +7696,7 @@ static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
.busiest_stat = {
.avg_load = 0UL,
.sum_nr_running = 0,
+ .sum_h_nr_running = 0,
.group_type = group_other,
},
};
@@ -7885,7 +7887,7 @@ static inline int sg_imbalanced(struct sched_group *group)
static inline bool
group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
{
- if (sgs->sum_nr_running < sgs->group_weight)
+ if (sgs->sum_h_nr_running < sgs->group_weight)
return true;
if ((sgs->group_capacity * 100) >
@@ -7906,7 +7908,7 @@ group_has_capacity(struct lb_env *env, struct sg_lb_stats *sgs)
static inline bool
group_is_overloaded(struct lb_env *env, struct sg_lb_stats *sgs)
{
- if (sgs->sum_nr_running <= sgs->group_weight)
+ if (sgs->sum_h_nr_running <= sgs->group_weight)
return false;
if ((sgs->group_capacity * 100) <
@@ -8000,6 +8002,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_load += cpu_runnable_load(rq);
sgs->group_util += cpu_util(i);
+ sgs->sum_h_nr_running += rq->cfs.h_nr_running;
sgs->sum_nr_running += rq->cfs.h_nr_running;
nr_running = rq->nr_running;
@@ -8030,8 +8033,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_capacity = group->sgc->capacity;
sgs->avg_load = (sgs->group_load*SCHED_CAPACITY_SCALE) / sgs->group_capacity;
- if (sgs->sum_nr_running)
- sgs->load_per_task = sgs->group_load / sgs->sum_nr_running;
+ if (sgs->sum_h_nr_running)
+ sgs->load_per_task = sgs->group_load / sgs->sum_h_nr_running;
sgs->group_weight = group->group_weight;
@@ -8088,7 +8091,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
* capable CPUs may harm throughput. Maximize throughput,
* power/energy consequences are not considered.
*/
- if (sgs->sum_nr_running <= sgs->group_weight &&
+ if (sgs->sum_h_nr_running <= sgs->group_weight &&
group_smaller_min_cpu_capacity(sds->local, sg))
return false;
@@ -8119,7 +8122,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
* perform better since they share less core resources. Hence when we
* have idle threads, we want them to be the higher ones.
*/
- if (sgs->sum_nr_running &&
+ if (sgs->sum_h_nr_running &&
sched_asym_prefer(env->dst_cpu, sg->asym_prefer_cpu)) {
sgs->group_asym_capacity = 1;
if (!sds->busiest)
@@ -8137,9 +8140,9 @@ static bool update_sd_pick_busiest(struct lb_env *env,
#ifdef CONFIG_NUMA_BALANCING
static inline enum fbq_type fbq_classify_group(struct sg_lb_stats *sgs)
{
- if (sgs->sum_nr_running > sgs->nr_numa_running)
+ if (sgs->sum_h_nr_running > sgs->nr_numa_running)
return regular;
- if (sgs->sum_nr_running > sgs->nr_preferred_running)
+ if (sgs->sum_h_nr_running > sgs->nr_preferred_running)
return remote;
return all;
}
@@ -8214,7 +8217,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
*/
if (prefer_sibling && sds->local &&
group_has_capacity(env, local) &&
- (sgs->sum_nr_running > local->sum_nr_running + 1)) {
+ (sgs->sum_h_nr_running > local->sum_h_nr_running + 1)) {
sgs->group_no_capacity = 1;
sgs->group_type = group_classify(sg, sgs);
}
@@ -8226,7 +8229,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
next_group:
/* Now, start updating sd_lb_stats */
- sds->total_running += sgs->sum_nr_running;
+ sds->total_running += sgs->sum_h_nr_running;
sds->total_load += sgs->group_load;
sds->total_capacity += sgs->group_capacity;
@@ -8280,7 +8283,7 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
local = &sds->local_stat;
busiest = &sds->busiest_stat;
- if (!local->sum_nr_running)
+ if (!local->sum_h_nr_running)
local->load_per_task = cpu_avg_load_per_task(env->dst_cpu);
else if (busiest->load_per_task > local->load_per_task)
imbn = 1;
@@ -8378,7 +8381,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
*/
if (busiest->group_type == group_overloaded &&
local->group_type == group_overloaded) {
- load_above_capacity = busiest->sum_nr_running * SCHED_CAPACITY_SCALE;
+ load_above_capacity = busiest->sum_h_nr_running * SCHED_CAPACITY_SCALE;
if (load_above_capacity > busiest->group_capacity) {
load_above_capacity -= busiest->group_capacity;
load_above_capacity *= scale_load_down(NICE_0_LOAD);
@@ -8459,7 +8462,7 @@ static struct sched_group *find_busiest_group(struct lb_env *env)
goto force_balance;
/* There is no busy sibling group to pull tasks from */
- if (!sds.busiest || busiest->sum_nr_running == 0)
+ if (!sds.busiest || busiest->sum_h_nr_running == 0)
goto out_balanced;
/* XXX broken for overlapping NUMA groups */
@@ -8781,7 +8784,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
env.src_rq = busiest;
ld_moved = 0;
- if (busiest->nr_running > 1) {
+ if (busiest->cfs.h_nr_running > 1) {
/*
* Attempt to move tasks. If find_busiest_group has found
* an imbalance but busiest->nr_running <= 1, the group is
sum_nr_running will track rq->nr_running task and sum_h_nr_running will track cfs->h_nr_running so we can use both to detect when other scheduling class are running and preempt CFS. There is no functional changes. Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org> --- kernel/sched/fair.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) -- 2.7.4