diff mbox series

[v4,07/11] sched/fair: evenly spread tasks when not overloaded

Message ID 1571405198-27570-8-git-send-email-vincent.guittot@linaro.org
State New
Headers show
Series sched/fair: rework the CFS load balance | expand

Commit Message

Vincent Guittot Oct. 18, 2019, 1:26 p.m. UTC
When there is only 1 cpu per group, using the idle cpus to evenly spread
tasks doesn't make sense and nr_running is a better metrics.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

---
 kernel/sched/fair.c | 40 ++++++++++++++++++++++++++++------------
 1 file changed, 28 insertions(+), 12 deletions(-)

-- 
2.7.4

Comments

Mel Gorman Oct. 30, 2019, 4:03 p.m. UTC | #1
On Fri, Oct 18, 2019 at 03:26:34PM +0200, Vincent Guittot wrote:
> When there is only 1 cpu per group, using the idle cpus to evenly spread

> tasks doesn't make sense and nr_running is a better metrics.

> 

> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---

>  kernel/sched/fair.c | 40 ++++++++++++++++++++++++++++------------

>  1 file changed, 28 insertions(+), 12 deletions(-)

> 

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> index 9ac2264..9b8e20d 100644

> --- a/kernel/sched/fair.c

> +++ b/kernel/sched/fair.c

> @@ -8601,18 +8601,34 @@ static struct sched_group *find_busiest_group(struct lb_env *env)

>  	    busiest->sum_nr_running > local->sum_nr_running + 1)

>  		goto force_balance;

>  

> -	if (busiest->group_type != group_overloaded &&

> -	     (env->idle == CPU_NOT_IDLE ||

> -	      local->idle_cpus <= (busiest->idle_cpus + 1)))

> -		/*

> -		 * If the busiest group is not overloaded

> -		 * and there is no imbalance between this and busiest group

> -		 * wrt idle CPUs, it is balanced. The imbalance

> -		 * becomes significant if the diff is greater than 1 otherwise

> -		 * we might end up to just move the imbalance on another

> -		 * group.

> -		 */

> -		goto out_balanced;

> +	if (busiest->group_type != group_overloaded) {

> +		if (env->idle == CPU_NOT_IDLE)

> +			/*

> +			 * If the busiest group is not overloaded (and as a

> +			 * result the local one too) but this cpu is already

> +			 * busy, let another idle cpu try to pull task.

> +			 */

> +			goto out_balanced;

> +

> +		if (busiest->group_weight > 1 &&

> +		    local->idle_cpus <= (busiest->idle_cpus + 1))

> +			/*

> +			 * If the busiest group is not overloaded

> +			 * and there is no imbalance between this and busiest

> +			 * group wrt idle CPUs, it is balanced. The imbalance

> +			 * becomes significant if the diff is greater than 1

> +			 * otherwise we might end up to just move the imbalance

> +			 * on another group. Of course this applies only if

> +			 * there is more than 1 CPU per group.

> +			 */

> +			goto out_balanced;

> +

> +		if (busiest->sum_h_nr_running == 1)

> +			/*

> +			 * busiest doesn't have any tasks waiting to run

> +			 */

> +			goto out_balanced;

> +	}

>  


While outside the scope of this patch, it appears that this would still
allow slight imbalances in idle CPUs to pull tasks across NUMA domains
too easily.

-- 
Mel Gorman
SUSE Labs
diff mbox series

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 9ac2264..9b8e20d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8601,18 +8601,34 @@  static struct sched_group *find_busiest_group(struct lb_env *env)
 	    busiest->sum_nr_running > local->sum_nr_running + 1)
 		goto force_balance;
 
-	if (busiest->group_type != group_overloaded &&
-	     (env->idle == CPU_NOT_IDLE ||
-	      local->idle_cpus <= (busiest->idle_cpus + 1)))
-		/*
-		 * If the busiest group is not overloaded
-		 * and there is no imbalance between this and busiest group
-		 * wrt idle CPUs, it is balanced. The imbalance
-		 * becomes significant if the diff is greater than 1 otherwise
-		 * we might end up to just move the imbalance on another
-		 * group.
-		 */
-		goto out_balanced;
+	if (busiest->group_type != group_overloaded) {
+		if (env->idle == CPU_NOT_IDLE)
+			/*
+			 * If the busiest group is not overloaded (and as a
+			 * result the local one too) but this cpu is already
+			 * busy, let another idle cpu try to pull task.
+			 */
+			goto out_balanced;
+
+		if (busiest->group_weight > 1 &&
+		    local->idle_cpus <= (busiest->idle_cpus + 1))
+			/*
+			 * If the busiest group is not overloaded
+			 * and there is no imbalance between this and busiest
+			 * group wrt idle CPUs, it is balanced. The imbalance
+			 * becomes significant if the diff is greater than 1
+			 * otherwise we might end up to just move the imbalance
+			 * on another group. Of course this applies only if
+			 * there is more than 1 CPU per group.
+			 */
+			goto out_balanced;
+
+		if (busiest->sum_h_nr_running == 1)
+			/*
+			 * busiest doesn't have any tasks waiting to run
+			 */
+			goto out_balanced;
+	}
 
 force_balance:
 	/* Looks like there is an imbalance. Compute it */