diff mbox series

[v4,02/13] cpufreq: amd-pstate: enable AMD Precision Boost mode switch

Message ID b3db7981e407d5f111eeb27a8504a4ed7979ba60.1657876961.git.Perry.Yuan@amd.com
State New
Headers show
Series AMD Pstate Enhancement And Issue Fixs | expand

Commit Message

Yuan, Perry July 15, 2022, 10:04 a.m. UTC
Add support to switch AMD precision boost state to scale cpu max
frequency that will help to improve the processor throughput.

when set boost state to be enabled, user will need to execute below commands,
the CPU will reach absolute maximum performance level or the highest perf which
CPU physical support. This performance level may not be sustainable for
long durations, it will help to improve the IO workload tasks.

* turn on CPU boost state under root
  echo 1 > /sys/devices/system/cpu/cpufreq/boost

If user set boost off,the CPU can reach to the maximum sustained
performance level of the process, that level is the process can maintain
continously working and definitely it can save some power compared to
boost on mode.

* turn off CPU boost state under root
  echo 0 > /sys/devices/system/cpu/cpufreq/boost

Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
---
 arch/x86/include/asm/msr-index.h |  2 ++
 drivers/cpufreq/amd-pstate.c     | 22 +++++++++++++++++++---
 2 files changed, 21 insertions(+), 3 deletions(-)

Comments

Huang Rui July 19, 2022, 12:45 a.m. UTC | #1
On Fri, Jul 15, 2022 at 06:04:21PM +0800, Yuan, Perry wrote:
> Add support to switch AMD precision boost state to scale cpu max
> frequency that will help to improve the processor throughput.
> 
> when set boost state to be enabled, user will need to execute below commands,
> the CPU will reach absolute maximum performance level or the highest perf which
> CPU physical support. This performance level may not be sustainable for
> long durations, it will help to improve the IO workload tasks.
> 
> * turn on CPU boost state under root
>   echo 1 > /sys/devices/system/cpu/cpufreq/boost
> 
> If user set boost off,the CPU can reach to the maximum sustained
> performance level of the process, that level is the process can maintain
> continously working and definitely it can save some power compared to
> boost on mode.
> 
> * turn off CPU boost state under root
>   echo 0 > /sys/devices/system/cpu/cpufreq/boost
> 
> Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> ---
>  arch/x86/include/asm/msr-index.h |  2 ++
>  drivers/cpufreq/amd-pstate.c     | 22 +++++++++++++++++++---
>  2 files changed, 21 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
> index 869508de8269..b952fd6d6916 100644
> --- a/arch/x86/include/asm/msr-index.h
> +++ b/arch/x86/include/asm/msr-index.h
> @@ -559,6 +559,8 @@
>  #define AMD_CPPC_MIN_PERF(x)		(((x) & 0xff) << 8)
>  #define AMD_CPPC_DES_PERF(x)		(((x) & 0xff) << 16)
>  #define AMD_CPPC_ENERGY_PERF_PREF(x)	(((x) & 0xff) << 24)
> +#define AMD_CPPC_PRECISION_BOOST_BIT	25
> +#define AMD_CPPC_PRECISION_BOOST_ENABLED	BIT_ULL(AMD_CPPC_PRECISION_BOOST_BIT)

The bit 25 (CpbDis) of MSRC001_0015 [Hardware Configuration] indicates the
core performance boost disable flag.

Please see the section 17.2 Core Performance Boost of PPR:

https://www.amd.com/system/files/TechDocs/40332.pdf

Core performance boost (CPB) dynamically monitors processor activity to
create an estimate of power consumption. If the estimated processor
consumption is below an internally defined power limit and software has
requested P0 on a given core, hardware may transition the core to a
frequency and voltage beyond those defined for P0. If the estimated power
consumption exceeds the defined power limit, some or all cores are limited
to the frequency and voltage defined by P0.

The boost state is designed for legacy ACPI P-State function which is
to request higher frequency beyond P0 State (it's equal to nominal
frequency in CPPC), and we already have the operation like
MSR_K7_HWCR_CPB_DIS in acpi-cpufreq driver. However, in CPPC, we can modify
the performance hint beyond the nominal perf to reach the goal. That won't
need this control anymore. And furthermore, this function for legacy ACPI
P-State should not be mixed them up with CPPC policy. We should prevent the
effect for this flag in CPPC.

Thanks,
Ray

>  
>  /* AMD Performance Counter Global Status and Control MSRs */
>  #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS	0xc0000300
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 9ac75c1cde9c..188e055e24a2 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -122,6 +122,7 @@ struct amd_cpudata {
>  
>  	u64 freq;
>  	bool	boost_supported;
> +	u64 	cppc_hw_conf_cached;
>  };
>  
>  static inline int pstate_enable(bool enable)
> @@ -438,18 +439,27 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
>  {
>  	struct amd_cpudata *cpudata = policy->driver_data;
>  	int ret;
> +	u64 value;
>  
>  	if (!cpudata->boost_supported) {
>  		pr_err("Boost mode is not supported by this processor or SBIOS\n");
>  		return -EINVAL;
>  	}
>  
> -	if (state)
> +	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> +	if (ret)
> +		return ret;
> +
> +	if (state) {
> +		value |= AMD_CPPC_PRECISION_BOOST_ENABLED;
>  		policy->cpuinfo.max_freq = cpudata->max_freq;
> -	else
> +	} else {
> +		value &= ~AMD_CPPC_PRECISION_BOOST_ENABLED;
>  		policy->cpuinfo.max_freq = cpudata->nominal_freq;
> -
> +	}
>  	policy->max = policy->cpuinfo.max_freq;
> +	WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
> +	wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, value);
>  
>  	ret = freq_qos_update_request(&cpudata->req[1],
>  				      policy->cpuinfo.max_freq);
> @@ -478,6 +488,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>  	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
>  	struct device *dev;
>  	struct amd_cpudata *cpudata;
> +	u64 value;
>  
>  	dev = get_cpu_device(policy->cpu);
>  	if (!dev)
> @@ -542,6 +553,11 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>  
>  	policy->driver_data = cpudata;
>  
> +	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> +	if (ret)
> +		return ret;
> +	WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
> +
>  	amd_pstate_boost_init(cpudata);
>  
>  	return 0;
> -- 
> 2.32.0
>
Yuan, Perry July 21, 2022, 9:15 a.m. UTC | #2
[AMD Official Use Only - General]

Hi Ray. 

> -----Original Message-----
> From: Huang, Ray <Ray.Huang@amd.com>
> Sent: Tuesday, July 19, 2022 8:46 AM
> To: Yuan, Perry <Perry.Yuan@amd.com>
> Cc: rafael.j.wysocki@intel.com; viresh.kumar@linaro.org; Sharma, Deepak
> <Deepak.Sharma@amd.com>; Limonciello, Mario
> <Mario.Limonciello@amd.com>; Fontenot, Nathan
> <Nathan.Fontenot@amd.com>; Deucher, Alexander
> <Alexander.Deucher@amd.com>; Su, Jinzhou (Joe) <Jinzhou.Su@amd.com>;
> Huang, Shimmer <Shimmer.Huang@amd.com>; Du, Xiaojian
> <Xiaojian.Du@amd.com>; Meng, Li (Jassmine) <Li.Meng@amd.com>; linux-
> pm@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH v4 02/13] cpufreq: amd-pstate: enable AMD Precision Boost
> mode switch
> 
> On Fri, Jul 15, 2022 at 06:04:21PM +0800, Yuan, Perry wrote:
> > Add support to switch AMD precision boost state to scale cpu max
> > frequency that will help to improve the processor throughput.
> >
> > when set boost state to be enabled, user will need to execute below
> > commands, the CPU will reach absolute maximum performance level or the
> > highest perf which CPU physical support. This performance level may
> > not be sustainable for long durations, it will help to improve the IO workload
> tasks.
> >
> > * turn on CPU boost state under root
> >   echo 1 > /sys/devices/system/cpu/cpufreq/boost
> >
> > If user set boost off,the CPU can reach to the maximum sustained
> > performance level of the process, that level is the process can
> > maintain continously working and definitely it can save some power
> > compared to boost on mode.
> >
> > * turn off CPU boost state under root
> >   echo 0 > /sys/devices/system/cpu/cpufreq/boost
> >
> > Signed-off-by: Perry Yuan <Perry.Yuan@amd.com>
> > ---
> >  arch/x86/include/asm/msr-index.h |  2 ++
> >  drivers/cpufreq/amd-pstate.c     | 22 +++++++++++++++++++---
> >  2 files changed, 21 insertions(+), 3 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/msr-index.h
> > b/arch/x86/include/asm/msr-index.h
> > index 869508de8269..b952fd6d6916 100644
> > --- a/arch/x86/include/asm/msr-index.h
> > +++ b/arch/x86/include/asm/msr-index.h
> > @@ -559,6 +559,8 @@
> >  #define AMD_CPPC_MIN_PERF(x)		(((x) & 0xff) << 8)
> >  #define AMD_CPPC_DES_PERF(x)		(((x) & 0xff) << 16)
> >  #define AMD_CPPC_ENERGY_PERF_PREF(x)	(((x) & 0xff) << 24)
> > +#define AMD_CPPC_PRECISION_BOOST_BIT	25
> > +#define AMD_CPPC_PRECISION_BOOST_ENABLED
> 	BIT_ULL(AMD_CPPC_PRECISION_BOOST_BIT)
> 
> The bit 25 (CpbDis) of MSRC001_0015 [Hardware Configuration] indicates the
> core performance boost disable flag.
> 
> Please see the section 17.2 Core Performance Boost of PPR:
> 
> https://www.amd.com/system/files/TechDocs/40332.pdf
> 
> Core performance boost (CPB) dynamically monitors processor activity to create
> an estimate of power consumption. If the estimated processor consumption is
> below an internally defined power limit and software has requested P0 on a
> given core, hardware may transition the core to a frequency and voltage beyond
> those defined for P0. If the estimated power consumption exceeds the defined
> power limit, some or all cores are limited to the frequency and voltage defined
> by P0.
> 
> The boost state is designed for legacy ACPI P-State function which is to request
> higher frequency beyond P0 State (it's equal to nominal frequency in CPPC), and
> we already have the operation like MSR_K7_HWCR_CPB_DIS in acpi-cpufreq
> driver. However, in CPPC, we can modify the performance hint beyond the
> nominal perf to reach the goal. That won't need this control anymore. And
> furthermore, this function for legacy ACPI P-State should not be mixed them up
> with CPPC policy. We should prevent the effect for this flag in CPPC.
> 
> Thanks,
> Ray

I did not notice that acpi_cpufreq already use this bit to control performance boost.
Seems like the patch is not needed for CPPC like you said.
I will drop the patch in V5 and use target perf to get target perf to firmware. 
That will also do the same thing to limit the perf level and power consumption.

Thanks for your feedback. Will send V5 soon.

Perry .

> 
> >
> >  /* AMD Performance Counter Global Status and Control MSRs */
> >  #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS	0xc0000300
> > diff --git a/drivers/cpufreq/amd-pstate.c
> > b/drivers/cpufreq/amd-pstate.c index 9ac75c1cde9c..188e055e24a2 100644
> > --- a/drivers/cpufreq/amd-pstate.c
> > +++ b/drivers/cpufreq/amd-pstate.c
> > @@ -122,6 +122,7 @@ struct amd_cpudata {
> >
> >  	u64 freq;
> >  	bool	boost_supported;
> > +	u64 	cppc_hw_conf_cached;
> >  };
> >
> >  static inline int pstate_enable(bool enable) @@ -438,18 +439,27 @@
> > static int amd_pstate_set_boost(struct cpufreq_policy *policy, int
> > state)  {
> >  	struct amd_cpudata *cpudata = policy->driver_data;
> >  	int ret;
> > +	u64 value;
> >
> >  	if (!cpudata->boost_supported) {
> >  		pr_err("Boost mode is not supported by this processor or
> SBIOS\n");
> >  		return -EINVAL;
> >  	}
> >
> > -	if (state)
> > +	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> > +	if (ret)
> > +		return ret;
> > +
> > +	if (state) {
> > +		value |= AMD_CPPC_PRECISION_BOOST_ENABLED;
> >  		policy->cpuinfo.max_freq = cpudata->max_freq;
> > -	else
> > +	} else {
> > +		value &= ~AMD_CPPC_PRECISION_BOOST_ENABLED;
> >  		policy->cpuinfo.max_freq = cpudata->nominal_freq;
> > -
> > +	}
> >  	policy->max = policy->cpuinfo.max_freq;
> > +	WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
> > +	wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, value);
> >
> >  	ret = freq_qos_update_request(&cpudata->req[1],
> >  				      policy->cpuinfo.max_freq);
> > @@ -478,6 +488,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy
> *policy)
> >  	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> >  	struct device *dev;
> >  	struct amd_cpudata *cpudata;
> > +	u64 value;
> >
> >  	dev = get_cpu_device(policy->cpu);
> >  	if (!dev)
> > @@ -542,6 +553,11 @@ static int amd_pstate_cpu_init(struct
> > cpufreq_policy *policy)
> >
> >  	policy->driver_data = cpudata;
> >
> > +	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
> > +	if (ret)
> > +		return ret;
> > +	WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
> > +
> >  	amd_pstate_boost_init(cpudata);
> >
> >  	return 0;
> > --
> > 2.32.0
> >
diff mbox series

Patch

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 869508de8269..b952fd6d6916 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -559,6 +559,8 @@ 
 #define AMD_CPPC_MIN_PERF(x)		(((x) & 0xff) << 8)
 #define AMD_CPPC_DES_PERF(x)		(((x) & 0xff) << 16)
 #define AMD_CPPC_ENERGY_PERF_PREF(x)	(((x) & 0xff) << 24)
+#define AMD_CPPC_PRECISION_BOOST_BIT	25
+#define AMD_CPPC_PRECISION_BOOST_ENABLED	BIT_ULL(AMD_CPPC_PRECISION_BOOST_BIT)
 
 /* AMD Performance Counter Global Status and Control MSRs */
 #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS	0xc0000300
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 9ac75c1cde9c..188e055e24a2 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -122,6 +122,7 @@  struct amd_cpudata {
 
 	u64 freq;
 	bool	boost_supported;
+	u64 	cppc_hw_conf_cached;
 };
 
 static inline int pstate_enable(bool enable)
@@ -438,18 +439,27 @@  static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
 {
 	struct amd_cpudata *cpudata = policy->driver_data;
 	int ret;
+	u64 value;
 
 	if (!cpudata->boost_supported) {
 		pr_err("Boost mode is not supported by this processor or SBIOS\n");
 		return -EINVAL;
 	}
 
-	if (state)
+	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
+	if (ret)
+		return ret;
+
+	if (state) {
+		value |= AMD_CPPC_PRECISION_BOOST_ENABLED;
 		policy->cpuinfo.max_freq = cpudata->max_freq;
-	else
+	} else {
+		value &= ~AMD_CPPC_PRECISION_BOOST_ENABLED;
 		policy->cpuinfo.max_freq = cpudata->nominal_freq;
-
+	}
 	policy->max = policy->cpuinfo.max_freq;
+	WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
+	wrmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, value);
 
 	ret = freq_qos_update_request(&cpudata->req[1],
 				      policy->cpuinfo.max_freq);
@@ -478,6 +488,7 @@  static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
 	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
 	struct device *dev;
 	struct amd_cpudata *cpudata;
+	u64 value;
 
 	dev = get_cpu_device(policy->cpu);
 	if (!dev)
@@ -542,6 +553,11 @@  static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	policy->driver_data = cpudata;
 
+	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_HW_CTL, &value);
+	if (ret)
+		return ret;
+	WRITE_ONCE(cpudata->cppc_hw_conf_cached, value);
+
 	amd_pstate_boost_init(cpudata);
 
 	return 0;