diff mbox series

[v4,10/24] sched/fair: Use IPCC scores to select a busiest runqueue

Message ID 20230613042422.5344-11-ricardo.neri-calderon@linux.intel.com
State New
Headers show
Series sched: Introduce classes of tasks for load balance | expand

Commit Message

Ricardo Neri June 13, 2023, 4:24 a.m. UTC
Use IPCC scores to break a tie between two runqueues with the same priority
and number of running tasks: select the runqueue of which the task enqueued
last would get a higher IPC boost when migrated to the destination CPU.
(These tasks are migrated first during load balance.)

For now, restrict the utilization of IPCC scores to scheduling domains
marked with the SD_ASYM_PACKING flag.

Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Lukasz Luba <lukasz.luba@arm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Perry Yuan <Perry.Yuan@amd.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Zhao Liu <zhao1.liu@linux.intel.com>
Cc: x86@kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
Changes since v3:
 * Do not compute the IPCC stats using the current tasks of runqueues.
   Instead, use the tasks at the back of the queue. These are the tasks
   that will be pulled first during load balance. (Vincent)

Changes since v2:
 * Only use IPCC scores to break ties if the sched domain uses
   asym_packing. (Ionela)
 * Handle errors of arch_get_ipcc_score(). (Ionela)

Changes since v1:
 * Fixed a bug when selecting a busiest runqueue: when comparing two
   runqueues with equal nr_running, we must compute the IPCC score delta
   of both.
 * Renamed local variables to improve the layout of the code block.
   (PeterZ)
 * Used the new interface names.
---
 kernel/sched/fair.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

Comments

Ionela Voinescu June 22, 2023, 9:03 a.m. UTC | #1
On Monday 12 Jun 2023 at 21:24:08 (-0700), Ricardo Neri wrote:
> Use IPCC scores to break a tie between two runqueues with the same priority
> and number of running tasks: select the runqueue of which the task enqueued
> last would get a higher IPC boost when migrated to the destination CPU.
> (These tasks are migrated first during load balance.)
> 
> For now, restrict the utilization of IPCC scores to scheduling domains
> marked with the SD_ASYM_PACKING flag.
> 
> Cc: Ben Segall <bsegall@google.com>
> Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Lukasz Luba <lukasz.luba@arm.com>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Perry Yuan <Perry.Yuan@amd.com>
> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Tim C. Chen <tim.c.chen@intel.com>
> Cc: Valentin Schneider <vschneid@redhat.com>
> Cc: Zhao Liu <zhao1.liu@linux.intel.com>
> Cc: x86@kernel.org
> Cc: linux-pm@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> ---
> Changes since v3:
>  * Do not compute the IPCC stats using the current tasks of runqueues.
>    Instead, use the tasks at the back of the queue. These are the tasks
>    that will be pulled first during load balance. (Vincent)
> 
> Changes since v2:
>  * Only use IPCC scores to break ties if the sched domain uses
>    asym_packing. (Ionela)
>  * Handle errors of arch_get_ipcc_score(). (Ionela)
> 
> Changes since v1:
>  * Fixed a bug when selecting a busiest runqueue: when comparing two
>    runqueues with equal nr_running, we must compute the IPCC score delta
>    of both.
>  * Renamed local variables to improve the layout of the code block.
>    (PeterZ)
>  * Used the new interface names.
> ---
>  kernel/sched/fair.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 61 insertions(+)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index fcec791ede4f..da3e009eef42 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -9564,6 +9564,41 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
>  	return sched_asym_ipcc_prefer(a_stats, b_stats);
>  }
>  
> +/**
> + * ipcc_score_delta - Get the IPCC score delta wrt the load balance's dst_cpu
> + * @rq:		A runqueue
> + * @env:	Load balancing environment
> + *
> + * Returns: The IPCC score delta that the last task enqueued in @rq would get
> + * if placed in the destination CPU of @env. LONG_MIN to indicate that the
> + * delta should not be used.
> + */
> +static long ipcc_score_delta(struct rq *rq, struct lb_env *env)
> +{
> +	unsigned long score_src, score_dst;
> +	unsigned short ipcc;
> +
> +	if (!sched_ipcc_enabled())
> +		return LONG_MIN;
> +
> +	/* Only asym_packing uses IPCC scores at the moment. */
> +	if (!(env->sd->flags & SD_ASYM_PACKING))
> +		return LONG_MIN;
> +
> +	if (rq_last_task_ipcc(env->dst_cpu, rq, &ipcc))
> +		return LONG_MIN;
> +
> +	score_dst = arch_get_ipcc_score(ipcc, env->dst_cpu);
> +	if (IS_ERR_VALUE(score_dst))
> +		return LONG_MIN;
> +
> +	score_src = arch_get_ipcc_score(ipcc, cpu_of(rq));
> +	if (IS_ERR_VALUE(score_src))
> +		return LONG_MIN;
> +
> +	return score_dst - score_src;
> +}
> +
>  #else /* CONFIG_IPC_CLASSES */
>  static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
>  				    struct rq *rq)
> @@ -9594,6 +9629,11 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
>  	return false;
>  }
>  
> +static long ipcc_score_delta(struct rq *rq, struct lb_env *env)
> +{
> +	return LONG_MIN;
> +}
> +
>  #endif /* CONFIG_IPC_CLASSES */
>  
>  /**
> @@ -10769,6 +10809,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
>  {
>  	struct rq *busiest = NULL, *rq;
>  	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
> +	long busiest_ipcc_delta = LONG_MIN;
>  	unsigned int busiest_nr = 0;
>  	int i;
>  
> @@ -10885,6 +10926,26 @@ static struct rq *find_busiest_queue(struct lb_env *env,
>  			if (busiest_nr < nr_running) {
>  				busiest_nr = nr_running;
>  				busiest = rq;
> +
> +				/*
> +				 * Remember the IPCC score of the busiest
> +				 * runqueue. We may need it to break a tie with
> +				 * other queues with equal nr_running.
> +				 */
> +				busiest_ipcc_delta = ipcc_score_delta(busiest, env);
> +			/*
> +			 * For ties, select @rq if doing would give its last
> +			 * queued task a bigger IPC boost when migrated to
> +			 * dst_cpu.
> +			 */
> +			} else if (busiest_nr == nr_running) {
> +				long delta = ipcc_score_delta(rq, env);
> +
> +				if (busiest_ipcc_delta < delta) {
> +					busiest_ipcc_delta = delta;
> +					busiest_nr = nr_running;

nit: there's no need as busiest_nr is already equal to nr_running.

Ionela.

> +					busiest = rq;
> +				}
>  			}
>  			break;
>  
> -- 
> 2.25.1
>
Ricardo Neri June 24, 2023, 12:25 a.m. UTC | #2
On Thu, Jun 22, 2023 at 10:03:17AM +0100, Ionela Voinescu wrote:
> On Monday 12 Jun 2023 at 21:24:08 (-0700), Ricardo Neri wrote:
> > Use IPCC scores to break a tie between two runqueues with the same priority
> > and number of running tasks: select the runqueue of which the task enqueued
> > last would get a higher IPC boost when migrated to the destination CPU.
> > (These tasks are migrated first during load balance.)
> > 
> > For now, restrict the utilization of IPCC scores to scheduling domains
> > marked with the SD_ASYM_PACKING flag.
> > 
> > Cc: Ben Segall <bsegall@google.com>
> > Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> > Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> > Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> > Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> > Cc: Len Brown <len.brown@intel.com>
> > Cc: Lukasz Luba <lukasz.luba@arm.com>
> > Cc: Mel Gorman <mgorman@suse.de>
> > Cc: Perry Yuan <Perry.Yuan@amd.com>
> > Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > Cc: Tim C. Chen <tim.c.chen@intel.com>
> > Cc: Valentin Schneider <vschneid@redhat.com>
> > Cc: Zhao Liu <zhao1.liu@linux.intel.com>
> > Cc: x86@kernel.org
> > Cc: linux-pm@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> > ---
> > Changes since v3:
> >  * Do not compute the IPCC stats using the current tasks of runqueues.
> >    Instead, use the tasks at the back of the queue. These are the tasks
> >    that will be pulled first during load balance. (Vincent)
> > 
> > Changes since v2:
> >  * Only use IPCC scores to break ties if the sched domain uses
> >    asym_packing. (Ionela)
> >  * Handle errors of arch_get_ipcc_score(). (Ionela)
> > 
> > Changes since v1:
> >  * Fixed a bug when selecting a busiest runqueue: when comparing two
> >    runqueues with equal nr_running, we must compute the IPCC score delta
> >    of both.
> >  * Renamed local variables to improve the layout of the code block.
> >    (PeterZ)
> >  * Used the new interface names.
> > ---
> >  kernel/sched/fair.c | 61 +++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 61 insertions(+)
> > 
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index fcec791ede4f..da3e009eef42 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -9564,6 +9564,41 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
> >  	return sched_asym_ipcc_prefer(a_stats, b_stats);
> >  }
> >  
> > +/**
> > + * ipcc_score_delta - Get the IPCC score delta wrt the load balance's dst_cpu
> > + * @rq:		A runqueue
> > + * @env:	Load balancing environment
> > + *
> > + * Returns: The IPCC score delta that the last task enqueued in @rq would get
> > + * if placed in the destination CPU of @env. LONG_MIN to indicate that the
> > + * delta should not be used.
> > + */
> > +static long ipcc_score_delta(struct rq *rq, struct lb_env *env)
> > +{
> > +	unsigned long score_src, score_dst;
> > +	unsigned short ipcc;
> > +
> > +	if (!sched_ipcc_enabled())
> > +		return LONG_MIN;
> > +
> > +	/* Only asym_packing uses IPCC scores at the moment. */
> > +	if (!(env->sd->flags & SD_ASYM_PACKING))
> > +		return LONG_MIN;
> > +
> > +	if (rq_last_task_ipcc(env->dst_cpu, rq, &ipcc))
> > +		return LONG_MIN;
> > +
> > +	score_dst = arch_get_ipcc_score(ipcc, env->dst_cpu);
> > +	if (IS_ERR_VALUE(score_dst))
> > +		return LONG_MIN;
> > +
> > +	score_src = arch_get_ipcc_score(ipcc, cpu_of(rq));
> > +	if (IS_ERR_VALUE(score_src))
> > +		return LONG_MIN;
> > +
> > +	return score_dst - score_src;
> > +}
> > +
> >  #else /* CONFIG_IPC_CLASSES */
> >  static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
> >  				    struct rq *rq)
> > @@ -9594,6 +9629,11 @@ static bool sched_asym_ipcc_pick(struct sched_group *a,
> >  	return false;
> >  }
> >  
> > +static long ipcc_score_delta(struct rq *rq, struct lb_env *env)
> > +{
> > +	return LONG_MIN;
> > +}
> > +
> >  #endif /* CONFIG_IPC_CLASSES */
> >  
> >  /**
> > @@ -10769,6 +10809,7 @@ static struct rq *find_busiest_queue(struct lb_env *env,
> >  {
> >  	struct rq *busiest = NULL, *rq;
> >  	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
> > +	long busiest_ipcc_delta = LONG_MIN;
> >  	unsigned int busiest_nr = 0;
> >  	int i;
> >  
> > @@ -10885,6 +10926,26 @@ static struct rq *find_busiest_queue(struct lb_env *env,
> >  			if (busiest_nr < nr_running) {
> >  				busiest_nr = nr_running;
> >  				busiest = rq;
> > +
> > +				/*
> > +				 * Remember the IPCC score of the busiest
> > +				 * runqueue. We may need it to break a tie with
> > +				 * other queues with equal nr_running.
> > +				 */
> > +				busiest_ipcc_delta = ipcc_score_delta(busiest, env);
> > +			/*
> > +			 * For ties, select @rq if doing would give its last
> > +			 * queued task a bigger IPC boost when migrated to
> > +			 * dst_cpu.
> > +			 */
> > +			} else if (busiest_nr == nr_running) {
> > +				long delta = ipcc_score_delta(rq, env);
> > +
> > +				if (busiest_ipcc_delta < delta) {
> > +					busiest_ipcc_delta = delta;
> > +					busiest_nr = nr_running;
> 
> nit: there's no need as busiest_nr is already equal to nr_running.

True! I will remove this pointless assignment.

Thanks and BR,
Ricardo
diff mbox series

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index fcec791ede4f..da3e009eef42 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9564,6 +9564,41 @@  static bool sched_asym_ipcc_pick(struct sched_group *a,
 	return sched_asym_ipcc_prefer(a_stats, b_stats);
 }
 
+/**
+ * ipcc_score_delta - Get the IPCC score delta wrt the load balance's dst_cpu
+ * @rq:		A runqueue
+ * @env:	Load balancing environment
+ *
+ * Returns: The IPCC score delta that the last task enqueued in @rq would get
+ * if placed in the destination CPU of @env. LONG_MIN to indicate that the
+ * delta should not be used.
+ */
+static long ipcc_score_delta(struct rq *rq, struct lb_env *env)
+{
+	unsigned long score_src, score_dst;
+	unsigned short ipcc;
+
+	if (!sched_ipcc_enabled())
+		return LONG_MIN;
+
+	/* Only asym_packing uses IPCC scores at the moment. */
+	if (!(env->sd->flags & SD_ASYM_PACKING))
+		return LONG_MIN;
+
+	if (rq_last_task_ipcc(env->dst_cpu, rq, &ipcc))
+		return LONG_MIN;
+
+	score_dst = arch_get_ipcc_score(ipcc, env->dst_cpu);
+	if (IS_ERR_VALUE(score_dst))
+		return LONG_MIN;
+
+	score_src = arch_get_ipcc_score(ipcc, cpu_of(rq));
+	if (IS_ERR_VALUE(score_src))
+		return LONG_MIN;
+
+	return score_dst - score_src;
+}
+
 #else /* CONFIG_IPC_CLASSES */
 static void update_sg_lb_ipcc_stats(int dst_cpu, struct sg_lb_stats *sgs,
 				    struct rq *rq)
@@ -9594,6 +9629,11 @@  static bool sched_asym_ipcc_pick(struct sched_group *a,
 	return false;
 }
 
+static long ipcc_score_delta(struct rq *rq, struct lb_env *env)
+{
+	return LONG_MIN;
+}
+
 #endif /* CONFIG_IPC_CLASSES */
 
 /**
@@ -10769,6 +10809,7 @@  static struct rq *find_busiest_queue(struct lb_env *env,
 {
 	struct rq *busiest = NULL, *rq;
 	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
+	long busiest_ipcc_delta = LONG_MIN;
 	unsigned int busiest_nr = 0;
 	int i;
 
@@ -10885,6 +10926,26 @@  static struct rq *find_busiest_queue(struct lb_env *env,
 			if (busiest_nr < nr_running) {
 				busiest_nr = nr_running;
 				busiest = rq;
+
+				/*
+				 * Remember the IPCC score of the busiest
+				 * runqueue. We may need it to break a tie with
+				 * other queues with equal nr_running.
+				 */
+				busiest_ipcc_delta = ipcc_score_delta(busiest, env);
+			/*
+			 * For ties, select @rq if doing would give its last
+			 * queued task a bigger IPC boost when migrated to
+			 * dst_cpu.
+			 */
+			} else if (busiest_nr == nr_running) {
+				long delta = ipcc_score_delta(rq, env);
+
+				if (busiest_ipcc_delta < delta) {
+					busiest_ipcc_delta = delta;
+					busiest_nr = nr_running;
+					busiest = rq;
+				}
 			}
 			break;