diff mbox series

[v3,13/24] thermal: intel: hfi: Store per-CPU IPCC scores

Message ID 20230207051105.11575-14-ricardo.neri-calderon@linux.intel.com
State New
Headers show
Series sched: Introduce classes of tasks for load balance | expand

Commit Message

Ricardo Neri Feb. 7, 2023, 5:10 a.m. UTC
The scheduler reads the IPCC scores when balancing load. These reads can
be quite frequent. Hardware can also update the HFI table frequently.
Concurrent access may cause a lot of lock contention. It gets worse as the
number of CPUs increases.

Instead, create separate per-CPU IPCC scores that the scheduler can read
without the HFI table lock.

Cc: Ben Segall <bsegall@google.com>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Ionela Voinescu <ionela.voinescu@arm.com>
Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Lukasz Luba <lukasz.luba@arm.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tim C. Chen <tim.c.chen@intel.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: x86@kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
Changes since v2:
 * Only create these per-CPU variables when Intel Thread Director is
   supported.

Changes since v1:
 * Added this patch.
---
 drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

Comments

Rafael J. Wysocki March 27, 2023, 4:37 p.m. UTC | #1
On Tue, Feb 7, 2023 at 6:02 AM Ricardo Neri
<ricardo.neri-calderon@linux.intel.com> wrote:
>
> The scheduler reads the IPCC scores when balancing load. These reads can
> be quite frequent. Hardware can also update the HFI table frequently.
> Concurrent access may cause a lot of lock contention. It gets worse as the
> number of CPUs increases.
>
> Instead, create separate per-CPU IPCC scores that the scheduler can read
> without the HFI table lock.
>
> Cc: Ben Segall <bsegall@google.com>
> Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Lukasz Luba <lukasz.luba@arm.com>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> Cc: Tim C. Chen <tim.c.chen@intel.com>
> Cc: Valentin Schneider <vschneid@redhat.com>
> Cc: x86@kernel.org
> Cc: linux-pm@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> ---
> Changes since v2:
>  * Only create these per-CPU variables when Intel Thread Director is
>    supported.
>
> Changes since v1:
>  * Added this patch.
> ---
>  drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
>  1 file changed, 46 insertions(+)
>
> diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> index 2527ae3836c7..b06021828892 100644
> --- a/drivers/thermal/intel/intel_hfi.c
> +++ b/drivers/thermal/intel/intel_hfi.c
> @@ -29,6 +29,7 @@
>  #include <linux/kernel.h>
>  #include <linux/math.h>
>  #include <linux/mutex.h>
> +#include <linux/percpu.h>
>  #include <linux/percpu-defs.h>
>  #include <linux/printk.h>
>  #include <linux/processor.h>
> @@ -170,6 +171,43 @@ static struct workqueue_struct *hfi_updates_wq;
>  #define HFI_UPDATE_INTERVAL            HZ
>  #define HFI_MAX_THERM_NOTIFY_COUNT     16
>
> +#ifdef CONFIG_IPC_CLASSES

It would be good to provide a (concise) description of this variable.

> +static int __percpu *hfi_ipcc_scores;
> +
> +static int alloc_hfi_ipcc_scores(void)
> +{
> +       if (!cpu_feature_enabled(X86_FEATURE_ITD))
> +               return 0;
> +
> +       hfi_ipcc_scores = __alloc_percpu(sizeof(*hfi_ipcc_scores) *
> +                                        hfi_features.nr_classes,
> +                                        sizeof(*hfi_ipcc_scores));
> +
> +       return !hfi_ipcc_scores;

I would do

if (!hfi_ipcc_scores)
        return -ENOMEM;

return 0;

Or make the function return bool.

> +}
> +
> +static void set_hfi_ipcc_score(void *caps, int cpu)
> +{
> +       int i, *hfi_class;
> +
> +       if (!cpu_feature_enabled(X86_FEATURE_ITD))
> +               return;
> +
> +       hfi_class = per_cpu_ptr(hfi_ipcc_scores, cpu);
> +
> +       for (i = 0;  i < hfi_features.nr_classes; i++) {
> +               struct hfi_cpu_data *class_caps;
> +
> +               class_caps = caps + i * hfi_features.class_stride;
> +               WRITE_ONCE(hfi_class[i], class_caps->perf_cap);

As it stands, it is unclear why WRITE_ONCE() is needed here.

> +       }
> +}
> +
> +#else
> +static int alloc_hfi_ipcc_scores(void) { return 0; }
> +static void set_hfi_ipcc_score(void *caps, int cpu) { }
> +#endif /* CONFIG_IPC_CLASSES */
> +
>  static void get_hfi_caps(struct hfi_instance *hfi_instance,
>                          struct thermal_genl_cpu_caps *cpu_caps)
>  {
> @@ -192,6 +230,8 @@ static void get_hfi_caps(struct hfi_instance *hfi_instance,
>                 cpu_caps[i].efficiency = caps->ee_cap << 2;
>
>                 ++i;
> +
> +               set_hfi_ipcc_score(caps, cpu);
>         }
>         raw_spin_unlock_irq(&hfi_instance->table_lock);
>  }
> @@ -580,8 +620,14 @@ void __init intel_hfi_init(void)
>         if (!hfi_updates_wq)
>                 goto err_nomem;
>
> +       if (alloc_hfi_ipcc_scores())
> +               goto err_ipcc;
> +
>         return;
>
> +err_ipcc:
> +       destroy_workqueue(hfi_updates_wq);
> +
>  err_nomem:
>         for (j = 0; j < i; ++j) {
>                 hfi_instance = &hfi_instances[j];
> --
Ricardo Neri March 28, 2023, 11:43 p.m. UTC | #2
On Mon, Mar 27, 2023 at 06:37:32PM +0200, Rafael J. Wysocki wrote:
> On Tue, Feb 7, 2023 at 6:02 AM Ricardo Neri
> <ricardo.neri-calderon@linux.intel.com> wrote:
> >
> > The scheduler reads the IPCC scores when balancing load. These reads can
> > be quite frequent. Hardware can also update the HFI table frequently.
> > Concurrent access may cause a lot of lock contention. It gets worse as the
> > number of CPUs increases.
> >
> > Instead, create separate per-CPU IPCC scores that the scheduler can read
> > without the HFI table lock.
> >
> > Cc: Ben Segall <bsegall@google.com>
> > Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> > Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> > Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> > Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> > Cc: Len Brown <len.brown@intel.com>
> > Cc: Lukasz Luba <lukasz.luba@arm.com>
> > Cc: Mel Gorman <mgorman@suse.de>
> > Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> > Cc: Steven Rostedt <rostedt@goodmis.org>
> > Cc: Tim C. Chen <tim.c.chen@intel.com>
> > Cc: Valentin Schneider <vschneid@redhat.com>
> > Cc: x86@kernel.org
> > Cc: linux-pm@vger.kernel.org
> > Cc: linux-kernel@vger.kernel.org
> > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> > ---
> > Changes since v2:
> >  * Only create these per-CPU variables when Intel Thread Director is
> >    supported.
> >
> > Changes since v1:
> >  * Added this patch.
> > ---
> >  drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
> >  1 file changed, 46 insertions(+)
> >
> > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> > index 2527ae3836c7..b06021828892 100644
> > --- a/drivers/thermal/intel/intel_hfi.c
> > +++ b/drivers/thermal/intel/intel_hfi.c
> > @@ -29,6 +29,7 @@
> >  #include <linux/kernel.h>
> >  #include <linux/math.h>
> >  #include <linux/mutex.h>
> > +#include <linux/percpu.h>
> >  #include <linux/percpu-defs.h>
> >  #include <linux/printk.h>
> >  #include <linux/processor.h>
> > @@ -170,6 +171,43 @@ static struct workqueue_struct *hfi_updates_wq;
> >  #define HFI_UPDATE_INTERVAL            HZ
> >  #define HFI_MAX_THERM_NOTIFY_COUNT     16
> >
> > +#ifdef CONFIG_IPC_CLASSES
> 
> It would be good to provide a (concise) description of this variable.
> 
> > +static int __percpu *hfi_ipcc_scores;

Do you mean hfi_ipcc_scores or CONFIG_IPC_CLASSES?

> > +
> > +static int alloc_hfi_ipcc_scores(void)
> > +{
> > +       if (!cpu_feature_enabled(X86_FEATURE_ITD))
> > +               return 0;
> > +
> > +       hfi_ipcc_scores = __alloc_percpu(sizeof(*hfi_ipcc_scores) *
> > +                                        hfi_features.nr_classes,
> > +                                        sizeof(*hfi_ipcc_scores));
> > +
> > +       return !hfi_ipcc_scores;
> 
> I would do
> 
> if (!hfi_ipcc_scores)
>         return -ENOMEM;
> 
> return 0;
> 
> Or make the function return bool.

Sure, I can make this function return -ENOMEM.

> 
> > +}
> > +
> > +static void set_hfi_ipcc_score(void *caps, int cpu)
> > +{
> > +       int i, *hfi_class;
> > +
> > +       if (!cpu_feature_enabled(X86_FEATURE_ITD))
> > +               return;
> > +
> > +       hfi_class = per_cpu_ptr(hfi_ipcc_scores, cpu);
> > +
> > +       for (i = 0;  i < hfi_features.nr_classes; i++) {
> > +               struct hfi_cpu_data *class_caps;
> > +
> > +               class_caps = caps + i * hfi_features.class_stride;
> > +               WRITE_ONCE(hfi_class[i], class_caps->perf_cap);
> 
> As it stands, it is unclear why WRITE_ONCE() is needed here.

The CPU handling the HFI interrupt will update all the per-CPU IPCC
scores. My intention is to ensure that a WRITE of a given IPCC score
is completed before another CPU READs an IPCC score. The corresponding
READ_ONCE happens in patch 15.
Rafael J. Wysocki March 29, 2023, 12:08 p.m. UTC | #3
On Wed, Mar 29, 2023 at 1:32 AM Ricardo Neri
<ricardo.neri-calderon@linux.intel.com> wrote:
>
> On Mon, Mar 27, 2023 at 06:37:32PM +0200, Rafael J. Wysocki wrote:
> > On Tue, Feb 7, 2023 at 6:02 AM Ricardo Neri
> > <ricardo.neri-calderon@linux.intel.com> wrote:
> > >
> > > The scheduler reads the IPCC scores when balancing load. These reads can
> > > be quite frequent. Hardware can also update the HFI table frequently.
> > > Concurrent access may cause a lot of lock contention. It gets worse as the
> > > number of CPUs increases.
> > >
> > > Instead, create separate per-CPU IPCC scores that the scheduler can read
> > > without the HFI table lock.
> > >
> > > Cc: Ben Segall <bsegall@google.com>
> > > Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> > > Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> > > Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> > > Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > Cc: Len Brown <len.brown@intel.com>
> > > Cc: Lukasz Luba <lukasz.luba@arm.com>
> > > Cc: Mel Gorman <mgorman@suse.de>
> > > Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > Cc: Tim C. Chen <tim.c.chen@intel.com>
> > > Cc: Valentin Schneider <vschneid@redhat.com>
> > > Cc: x86@kernel.org
> > > Cc: linux-pm@vger.kernel.org
> > > Cc: linux-kernel@vger.kernel.org
> > > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> > > ---
> > > Changes since v2:
> > >  * Only create these per-CPU variables when Intel Thread Director is
> > >    supported.
> > >
> > > Changes since v1:
> > >  * Added this patch.
> > > ---
> > >  drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
> > >  1 file changed, 46 insertions(+)
> > >
> > > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> > > index 2527ae3836c7..b06021828892 100644
> > > --- a/drivers/thermal/intel/intel_hfi.c
> > > +++ b/drivers/thermal/intel/intel_hfi.c
> > > @@ -29,6 +29,7 @@
> > >  #include <linux/kernel.h>
> > >  #include <linux/math.h>
> > >  #include <linux/mutex.h>
> > > +#include <linux/percpu.h>
> > >  #include <linux/percpu-defs.h>
> > >  #include <linux/printk.h>
> > >  #include <linux/processor.h>
> > > @@ -170,6 +171,43 @@ static struct workqueue_struct *hfi_updates_wq;
> > >  #define HFI_UPDATE_INTERVAL            HZ
> > >  #define HFI_MAX_THERM_NOTIFY_COUNT     16
> > >
> > > +#ifdef CONFIG_IPC_CLASSES
> >
> > It would be good to provide a (concise) description of this variable.
> >
> > > +static int __percpu *hfi_ipcc_scores;
>
> Do you mean hfi_ipcc_scores or CONFIG_IPC_CLASSES?

hfi_ipcc_scores (as the latter is not a variable).
Ricardo Neri March 30, 2023, 2:15 a.m. UTC | #4
On Wed, Mar 29, 2023 at 02:08:30PM +0200, Rafael J. Wysocki wrote:
> On Wed, Mar 29, 2023 at 1:32 AM Ricardo Neri
> <ricardo.neri-calderon@linux.intel.com> wrote:
> >
> > On Mon, Mar 27, 2023 at 06:37:32PM +0200, Rafael J. Wysocki wrote:
> > > On Tue, Feb 7, 2023 at 6:02 AM Ricardo Neri
> > > <ricardo.neri-calderon@linux.intel.com> wrote:
> > > >
> > > > The scheduler reads the IPCC scores when balancing load. These reads can
> > > > be quite frequent. Hardware can also update the HFI table frequently.
> > > > Concurrent access may cause a lot of lock contention. It gets worse as the
> > > > number of CPUs increases.
> > > >
> > > > Instead, create separate per-CPU IPCC scores that the scheduler can read
> > > > without the HFI table lock.
> > > >
> > > > Cc: Ben Segall <bsegall@google.com>
> > > > Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
> > > > Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
> > > > Cc: Ionela Voinescu <ionela.voinescu@arm.com>
> > > > Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
> > > > Cc: Len Brown <len.brown@intel.com>
> > > > Cc: Lukasz Luba <lukasz.luba@arm.com>
> > > > Cc: Mel Gorman <mgorman@suse.de>
> > > > Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > > > Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> > > > Cc: Steven Rostedt <rostedt@goodmis.org>
> > > > Cc: Tim C. Chen <tim.c.chen@intel.com>
> > > > Cc: Valentin Schneider <vschneid@redhat.com>
> > > > Cc: x86@kernel.org
> > > > Cc: linux-pm@vger.kernel.org
> > > > Cc: linux-kernel@vger.kernel.org
> > > > Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > > > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> > > > ---
> > > > Changes since v2:
> > > >  * Only create these per-CPU variables when Intel Thread Director is
> > > >    supported.
> > > >
> > > > Changes since v1:
> > > >  * Added this patch.
> > > > ---
> > > >  drivers/thermal/intel/intel_hfi.c | 46 +++++++++++++++++++++++++++++++
> > > >  1 file changed, 46 insertions(+)
> > > >
> > > > diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
> > > > index 2527ae3836c7..b06021828892 100644
> > > > --- a/drivers/thermal/intel/intel_hfi.c
> > > > +++ b/drivers/thermal/intel/intel_hfi.c
> > > > @@ -29,6 +29,7 @@
> > > >  #include <linux/kernel.h>
> > > >  #include <linux/math.h>
> > > >  #include <linux/mutex.h>
> > > > +#include <linux/percpu.h>
> > > >  #include <linux/percpu-defs.h>
> > > >  #include <linux/printk.h>
> > > >  #include <linux/processor.h>
> > > > @@ -170,6 +171,43 @@ static struct workqueue_struct *hfi_updates_wq;
> > > >  #define HFI_UPDATE_INTERVAL            HZ
> > > >  #define HFI_MAX_THERM_NOTIFY_COUNT     16
> > > >
> > > > +#ifdef CONFIG_IPC_CLASSES
> > >
> > > It would be good to provide a (concise) description of this variable.
> > >
> > > > +static int __percpu *hfi_ipcc_scores;
> >
> > Do you mean hfi_ipcc_scores or CONFIG_IPC_CLASSES?
> 
> hfi_ipcc_scores (as the latter is not a variable).

I thought so. Thank you for clarifying.
diff mbox series

Patch

diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
index 2527ae3836c7..b06021828892 100644
--- a/drivers/thermal/intel/intel_hfi.c
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -29,6 +29,7 @@ 
 #include <linux/kernel.h>
 #include <linux/math.h>
 #include <linux/mutex.h>
+#include <linux/percpu.h>
 #include <linux/percpu-defs.h>
 #include <linux/printk.h>
 #include <linux/processor.h>
@@ -170,6 +171,43 @@  static struct workqueue_struct *hfi_updates_wq;
 #define HFI_UPDATE_INTERVAL		HZ
 #define HFI_MAX_THERM_NOTIFY_COUNT	16
 
+#ifdef CONFIG_IPC_CLASSES
+static int __percpu *hfi_ipcc_scores;
+
+static int alloc_hfi_ipcc_scores(void)
+{
+	if (!cpu_feature_enabled(X86_FEATURE_ITD))
+		return 0;
+
+	hfi_ipcc_scores = __alloc_percpu(sizeof(*hfi_ipcc_scores) *
+					 hfi_features.nr_classes,
+					 sizeof(*hfi_ipcc_scores));
+
+	return !hfi_ipcc_scores;
+}
+
+static void set_hfi_ipcc_score(void *caps, int cpu)
+{
+	int i, *hfi_class;
+
+	if (!cpu_feature_enabled(X86_FEATURE_ITD))
+		return;
+
+	hfi_class = per_cpu_ptr(hfi_ipcc_scores, cpu);
+
+	for (i = 0;  i < hfi_features.nr_classes; i++) {
+		struct hfi_cpu_data *class_caps;
+
+		class_caps = caps + i * hfi_features.class_stride;
+		WRITE_ONCE(hfi_class[i], class_caps->perf_cap);
+	}
+}
+
+#else
+static int alloc_hfi_ipcc_scores(void) { return 0; }
+static void set_hfi_ipcc_score(void *caps, int cpu) { }
+#endif /* CONFIG_IPC_CLASSES */
+
 static void get_hfi_caps(struct hfi_instance *hfi_instance,
 			 struct thermal_genl_cpu_caps *cpu_caps)
 {
@@ -192,6 +230,8 @@  static void get_hfi_caps(struct hfi_instance *hfi_instance,
 		cpu_caps[i].efficiency = caps->ee_cap << 2;
 
 		++i;
+
+		set_hfi_ipcc_score(caps, cpu);
 	}
 	raw_spin_unlock_irq(&hfi_instance->table_lock);
 }
@@ -580,8 +620,14 @@  void __init intel_hfi_init(void)
 	if (!hfi_updates_wq)
 		goto err_nomem;
 
+	if (alloc_hfi_ipcc_scores())
+		goto err_ipcc;
+
 	return;
 
+err_ipcc:
+	destroy_workqueue(hfi_updates_wq);
+
 err_nomem:
 	for (j = 0; j < i; ++j) {
 		hfi_instance = &hfi_instances[j];