diff mbox series

[2/2] cpufreq: intel_pstate: Process HWP Guaranteed change notification

Message ID 20210820024006.2347720-2-srinivas.pandruvada@linux.intel.com
State Accepted
Commit d0e936adbd2250cb03f2e840c6651d18edc22ace
Headers show
Series [1/2] thermal: intel: Allow processing of HWP interrupt | expand

Commit Message

Srinivas Pandruvada Aug. 20, 2021, 2:40 a.m. UTC
It is possible that HWP guaranteed ratio is changed in response to
change in power and thermal limits. For example when Intel Speed Select
performance profile is changed or there is change in TDP, hardware can
send notifications. It is possible that the guaranteed ratio is
increased. This creates an issue when turbo is disabled, as the old
limits set in MSR_HWP_REQUEST are still lower and hardware will clip
to older limits.

This change enables HWP interrupt and process HWP interrupts. When
guaranteed is changed, calls cpufreq_update_policy() so that driver
callbacks are called to update to new HWP limits. This callback
is called from a delayed workqueue of 10ms to avoid frequent updates.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
 drivers/cpufreq/intel_pstate.c | 39 ++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

Comments

Daniel Lezcano Aug. 20, 2021, 1:10 p.m. UTC | #1
On 20/08/2021 04:40, Srinivas Pandruvada wrote:
> It is possible that HWP guaranteed ratio is changed in response to
> change in power and thermal limits. For example when Intel Speed Select
> performance profile is changed or there is change in TDP, hardware can
> send notifications. It is possible that the guaranteed ratio is
> increased. This creates an issue when turbo is disabled, as the old
> limits set in MSR_HWP_REQUEST are still lower and hardware will clip
> to older limits.
> 
> This change enables HWP interrupt and process HWP interrupts. When
> guaranteed is changed, calls cpufreq_update_policy() so that driver
> callbacks are called to update to new HWP limits. This callback
> is called from a delayed workqueue of 10ms to avoid frequent updates.
> 
> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>

Does this patch depend on 1/2 ?

> ---
>  drivers/cpufreq/intel_pstate.c | 39 ++++++++++++++++++++++++++++++++++
>  1 file changed, 39 insertions(+)
> 
> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
> index bb4549959b11..0fd2375c1f1e 100644
> --- a/drivers/cpufreq/intel_pstate.c
> +++ b/drivers/cpufreq/intel_pstate.c
> @@ -32,6 +32,7 @@
>  #include <asm/cpu_device_id.h>
>  #include <asm/cpufeature.h>
>  #include <asm/intel-family.h>
> +#include "../drivers/thermal/intel/thermal_interrupt.h"
>  
>  #define INTEL_PSTATE_SAMPLING_INTERVAL	(10 * NSEC_PER_MSEC)
>  
> @@ -219,6 +220,7 @@ struct global_params {
>   * @sched_flags:	Store scheduler flags for possible cross CPU update
>   * @hwp_boost_min:	Last HWP boosted min performance
>   * @suspended:		Whether or not the driver has been suspended.
> + * @hwp_notify_work:	workqueue for HWP notifications.
>   *
>   * This structure stores per CPU instance data for all CPUs.
>   */
> @@ -257,6 +259,7 @@ struct cpudata {
>  	unsigned int sched_flags;
>  	u32 hwp_boost_min;
>  	bool suspended;
> +	struct delayed_work hwp_notify_work;
>  };
>  
>  static struct cpudata **all_cpu_data;
> @@ -1625,6 +1628,40 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void)
>  
>  /************************** sysfs end ************************/
>  
> +static void intel_pstate_notify_work(struct work_struct *work)
> +{
> +	mutex_lock(&intel_pstate_driver_lock);
> +	cpufreq_update_policy(smp_processor_id());
> +	wrmsrl(MSR_HWP_STATUS, 0);
> +	mutex_unlock(&intel_pstate_driver_lock);
> +}
> +
> +void notify_hwp_interrupt(void)
> +{
> +	unsigned int this_cpu = smp_processor_id();
> +	struct cpudata *cpudata;
> +	u64 value;
> +
> +	if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
> +		return;
> +
> +	rdmsrl(MSR_HWP_STATUS, value);
> +	if (!(value & 0x01))
> +		return;
> +
> +	cpudata = all_cpu_data[this_cpu];
> +	schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10));
> +}
> +
> +static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
> +{
> +	/* Enable HWP notification interrupt for guaranteed performance change */
> +	if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
> +		INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
> +		wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);
> +	}
> +}
> +
>  static void intel_pstate_hwp_enable(struct cpudata *cpudata)
>  {
>  	/* First disable HWP notification interrupt as we don't process them */
> @@ -1634,6 +1671,8 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)
>  	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
>  	if (cpudata->epp_default == -EINVAL)
>  		cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
> +
> +	intel_pstate_enable_hwp_interrupt(cpudata);
>  }
>  
>  static int atom_get_min_pstate(void)
>
Rafael J. Wysocki Aug. 25, 2021, 6:11 p.m. UTC | #2
On Fri, Aug 20, 2021 at 4:40 AM Srinivas Pandruvada
<srinivas.pandruvada@linux.intel.com> wrote:
>

> It is possible that HWP guaranteed ratio is changed in response to

> change in power and thermal limits. For example when Intel Speed Select

> performance profile is changed or there is change in TDP, hardware can

> send notifications. It is possible that the guaranteed ratio is

> increased. This creates an issue when turbo is disabled, as the old

> limits set in MSR_HWP_REQUEST are still lower and hardware will clip

> to older limits.

>

> This change enables HWP interrupt and process HWP interrupts. When

> guaranteed is changed, calls cpufreq_update_policy() so that driver

> callbacks are called to update to new HWP limits. This callback

> is called from a delayed workqueue of 10ms to avoid frequent updates.

>

> Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>


Applied along with the [1/2] as 5.15 material, thanks!

> ---

>  drivers/cpufreq/intel_pstate.c | 39 ++++++++++++++++++++++++++++++++++

>  1 file changed, 39 insertions(+)

>

> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c

> index bb4549959b11..0fd2375c1f1e 100644

> --- a/drivers/cpufreq/intel_pstate.c

> +++ b/drivers/cpufreq/intel_pstate.c

> @@ -32,6 +32,7 @@

>  #include <asm/cpu_device_id.h>

>  #include <asm/cpufeature.h>

>  #include <asm/intel-family.h>

> +#include "../drivers/thermal/intel/thermal_interrupt.h"

>

>  #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)

>

> @@ -219,6 +220,7 @@ struct global_params {

>   * @sched_flags:       Store scheduler flags for possible cross CPU update

>   * @hwp_boost_min:     Last HWP boosted min performance

>   * @suspended:         Whether or not the driver has been suspended.

> + * @hwp_notify_work:   workqueue for HWP notifications.

>   *

>   * This structure stores per CPU instance data for all CPUs.

>   */

> @@ -257,6 +259,7 @@ struct cpudata {

>         unsigned int sched_flags;

>         u32 hwp_boost_min;

>         bool suspended;

> +       struct delayed_work hwp_notify_work;

>  };

>

>  static struct cpudata **all_cpu_data;

> @@ -1625,6 +1628,40 @@ static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void)

>

>  /************************** sysfs end ************************/

>

> +static void intel_pstate_notify_work(struct work_struct *work)

> +{

> +       mutex_lock(&intel_pstate_driver_lock);

> +       cpufreq_update_policy(smp_processor_id());

> +       wrmsrl(MSR_HWP_STATUS, 0);

> +       mutex_unlock(&intel_pstate_driver_lock);

> +}

> +

> +void notify_hwp_interrupt(void)

> +{

> +       unsigned int this_cpu = smp_processor_id();

> +       struct cpudata *cpudata;

> +       u64 value;

> +

> +       if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))

> +               return;

> +

> +       rdmsrl(MSR_HWP_STATUS, value);

> +       if (!(value & 0x01))

> +               return;

> +

> +       cpudata = all_cpu_data[this_cpu];

> +       schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10));

> +}

> +

> +static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)

> +{

> +       /* Enable HWP notification interrupt for guaranteed performance change */

> +       if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {

> +               INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);

> +               wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);

> +       }

> +}

> +

>  static void intel_pstate_hwp_enable(struct cpudata *cpudata)

>  {

>         /* First disable HWP notification interrupt as we don't process them */

> @@ -1634,6 +1671,8 @@ static void intel_pstate_hwp_enable(struct cpudata *cpudata)

>         wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);

>         if (cpudata->epp_default == -EINVAL)

>                 cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);

> +

> +       intel_pstate_enable_hwp_interrupt(cpudata);

>  }

>

>  static int atom_get_min_pstate(void)

> --

> 2.31.1

>
Srinivas Pandruvada Sept. 8, 2021, 1:54 a.m. UTC | #3
Hi Wang,

I think this is Lenovo system. The FW sends some interrupts prematurely
before driver is ready,

There is a fix available but We have decided to revert the commit.

Please try the patch attached here.
https://bugzilla.kernel.org/show_bug.cgi?id=214329

Thanks,
Srinivas

On Wed, 2021-09-08 at 02:13 +0800, Xi Ruoyao wrote:
> Hi Srinivas,

> 

> Sorry for distrubing, but the mainline kernel panics on my system

> during

> boot. By reverting this commit the problem seems fixed.

> 

> I don't have kdump set up on the system, so I could only took a photo

> containing (a part of :( ) panic message.  The system is a laptop with

> Core i7-1065G7.  My kernel config is also attached.

> 

> Not sure if there is something wrong in this commit, or there is a

> firmware bug from the vendor of my laptop.  In the latter case, can we

> have something in kernel config or cmdline to disable HWP as a

> workaround?

> 

> On Thu, 2021-08-19 at 19:40 -0700, Srinivas Pandruvada wrote:

> > It is possible that HWP guaranteed ratio is changed in response to

> > change in power and thermal limits. For example when Intel Speed

> > Select

> > performance profile is changed or there is change in TDP, hardware

> > can

> > send notifications. It is possible that the guaranteed ratio is

> > increased. This creates an issue when turbo is disabled, as the old

> > limits set in MSR_HWP_REQUEST are still lower and hardware will clip

> > to older limits.

> > 

> > This change enables HWP interrupt and process HWP interrupts. When

> > guaranteed is changed, calls cpufreq_update_policy() so that driver

> > callbacks are called to update to new HWP limits. This callback

> > is called from a delayed workqueue of 10ms to avoid frequent updates.

> > 

> > Signed-off-by: Srinivas Pandruvada

> > <srinivas.pandruvada@linux.intel.com>

> > ---

> >  drivers/cpufreq/intel_pstate.c | 39

> > ++++++++++++++++++++++++++++++++++

> >  1 file changed, 39 insertions(+)

> > 

> > diff --git a/drivers/cpufreq/intel_pstate.c

> > b/drivers/cpufreq/intel_pstate.c

> > index bb4549959b11..0fd2375c1f1e 100644

> > --- a/drivers/cpufreq/intel_pstate.c

> > +++ b/drivers/cpufreq/intel_pstate.c

> > @@ -32,6 +32,7 @@

> >  #include <asm/cpu_device_id.h>

> >  #include <asm/cpufeature.h>

> >  #include <asm/intel-family.h>

> > +#include "../drivers/thermal/intel/thermal_interrupt.h"

> >  

> >  #define INTEL_PSTATE_SAMPLING_INTERVAL (10 * NSEC_PER_MSEC)

> >  

> > @@ -219,6 +220,7 @@ struct global_params {

> >   * @sched_flags:       Store scheduler flags for possible cross CPU

> > update

> >   * @hwp_boost_min:     Last HWP boosted min performance

> >   * @suspended:         Whether or not the driver has been suspended.

> > + * @hwp_notify_work:   workqueue for HWP notifications.

> >   *

> >   * This structure stores per CPU instance data for all CPUs.

> >   */

> > @@ -257,6 +259,7 @@ struct cpudata {

> >         unsigned int sched_flags;

> >         u32 hwp_boost_min;

> >         bool suspended;

> > +       struct delayed_work hwp_notify_work;

> >  };

> >  

> >  static struct cpudata **all_cpu_data;

> > @@ -1625,6 +1628,40 @@ static void

> > intel_pstate_sysfs_hide_hwp_dynamic_boost(void)

> >  

> >  /************************** sysfs end ************************/

> >  

> > +static void intel_pstate_notify_work(struct work_struct *work)

> > +{

> > +       mutex_lock(&intel_pstate_driver_lock);

> > +       cpufreq_update_policy(smp_processor_id());

> > +       wrmsrl(MSR_HWP_STATUS, 0);

> > +       mutex_unlock(&intel_pstate_driver_lock);

> > +}

> > +

> > +void notify_hwp_interrupt(void)

> > +{

> > +       unsigned int this_cpu = smp_processor_id();

> > +       struct cpudata *cpudata;

> > +       u64 value;

> > +

> > +       if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))

> > +               return;

> > +

> > +       rdmsrl(MSR_HWP_STATUS, value);

> > +       if (!(value & 0x01))

> > +               return;

> > +

> > +       cpudata = all_cpu_data[this_cpu];

> > +       schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work,

> > msecs_to_jiffies(10));

> > +}

> > +

> > +static void intel_pstate_enable_hwp_interrupt(struct cpudata

> > *cpudata)

> > +{

> > +       /* Enable HWP notification interrupt for guaranteed

> > performance change */

> > +       if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {

> > +               INIT_DELAYED_WORK(&cpudata->hwp_notify_work,

> > intel_pstate_notify_work);

> > +               wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);

> > +       }

> > +}

> > +

> >  static void intel_pstate_hwp_enable(struct cpudata *cpudata)

> >  {

> >         /* First disable HWP notification interrupt as we don't

> > process them */

> > @@ -1634,6 +1671,8 @@ static void intel_pstate_hwp_enable(struct

> > cpudata *cpudata)

> >         wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);

> >         if (cpudata->epp_default == -EINVAL)

> >                 cpudata->epp_default = intel_pstate_get_epp(cpudata,

> > 0);

> > +

> > +       intel_pstate_enable_hwp_interrupt(cpudata);

> >  }

> >  

> >  static int atom_get_min_pstate(void)

>
Xi Ruoyao Sept. 8, 2021, 5:45 a.m. UTC | #4
On Tue, 2021-09-07 at 18:54 -0700, Srinivas Pandruvada wrote:
> Hi Wang,

> 

> I think this is Lenovo system. The FW sends some interrupts prematurely

> before driver is ready,


It's not Lenovo, but I think this kind of FW bugs exist in wild.

> There is a fix available but We have decided to revert the commit.

> 

> Please try the patch attached here.

> https://bugzilla.kernel.org/show_bug.cgi?id=214329


I can confirm it fixed the issue (at least on my machine :).

Thanks!

-- 
Xi Ruoyao <xry111@mengyan1223.wang>
School of Aerospace Science and Technology, Xidian University
diff mbox series

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index bb4549959b11..0fd2375c1f1e 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -32,6 +32,7 @@ 
 #include <asm/cpu_device_id.h>
 #include <asm/cpufeature.h>
 #include <asm/intel-family.h>
+#include "../drivers/thermal/intel/thermal_interrupt.h"
 
 #define INTEL_PSTATE_SAMPLING_INTERVAL	(10 * NSEC_PER_MSEC)
 
@@ -219,6 +220,7 @@  struct global_params {
  * @sched_flags:	Store scheduler flags for possible cross CPU update
  * @hwp_boost_min:	Last HWP boosted min performance
  * @suspended:		Whether or not the driver has been suspended.
+ * @hwp_notify_work:	workqueue for HWP notifications.
  *
  * This structure stores per CPU instance data for all CPUs.
  */
@@ -257,6 +259,7 @@  struct cpudata {
 	unsigned int sched_flags;
 	u32 hwp_boost_min;
 	bool suspended;
+	struct delayed_work hwp_notify_work;
 };
 
 static struct cpudata **all_cpu_data;
@@ -1625,6 +1628,40 @@  static void intel_pstate_sysfs_hide_hwp_dynamic_boost(void)
 
 /************************** sysfs end ************************/
 
+static void intel_pstate_notify_work(struct work_struct *work)
+{
+	mutex_lock(&intel_pstate_driver_lock);
+	cpufreq_update_policy(smp_processor_id());
+	wrmsrl(MSR_HWP_STATUS, 0);
+	mutex_unlock(&intel_pstate_driver_lock);
+}
+
+void notify_hwp_interrupt(void)
+{
+	unsigned int this_cpu = smp_processor_id();
+	struct cpudata *cpudata;
+	u64 value;
+
+	if (!hwp_active || !boot_cpu_has(X86_FEATURE_HWP_NOTIFY))
+		return;
+
+	rdmsrl(MSR_HWP_STATUS, value);
+	if (!(value & 0x01))
+		return;
+
+	cpudata = all_cpu_data[this_cpu];
+	schedule_delayed_work_on(this_cpu, &cpudata->hwp_notify_work, msecs_to_jiffies(10));
+}
+
+static void intel_pstate_enable_hwp_interrupt(struct cpudata *cpudata)
+{
+	/* Enable HWP notification interrupt for guaranteed performance change */
+	if (boot_cpu_has(X86_FEATURE_HWP_NOTIFY)) {
+		INIT_DELAYED_WORK(&cpudata->hwp_notify_work, intel_pstate_notify_work);
+		wrmsrl_on_cpu(cpudata->cpu, MSR_HWP_INTERRUPT, 0x01);
+	}
+}
+
 static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 {
 	/* First disable HWP notification interrupt as we don't process them */
@@ -1634,6 +1671,8 @@  static void intel_pstate_hwp_enable(struct cpudata *cpudata)
 	wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1);
 	if (cpudata->epp_default == -EINVAL)
 		cpudata->epp_default = intel_pstate_get_epp(cpudata, 0);
+
+	intel_pstate_enable_hwp_interrupt(cpudata);
 }
 
 static int atom_get_min_pstate(void)