diff mbox series

[V5,5/7] cpufreq: amd-pstate: Update amd-pstate preferred core ranking dynamically

Message ID 20230905015116.2268926-6-li.meng@amd.com
State New
Headers show
Series amd-pstate preferred core | expand

Commit Message

Meng, Li (Jassmine) Sept. 5, 2023, 1:51 a.m. UTC
Preferred core rankings can be changed dynamically by the
platform based on the workload and platform conditions and
accounting for thermals and aging.
When this occurs, cpu priority need to be set.

Signed-off-by: Meng Li <li.meng@amd.com>
Reviewed-by: Wyes Karny <wyes.karny@amd.com>
---
 drivers/cpufreq/amd-pstate.c | 36 ++++++++++++++++++++++++++++++++++--
 include/linux/amd-pstate.h   | 11 +++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

Comments

Huang Rui Sept. 6, 2023, 1:57 p.m. UTC | #1
On Tue, Sep 05, 2023 at 09:51:14AM +0800, Meng, Li (Jassmine) wrote:
> Preferred core rankings can be changed dynamically by the
> platform based on the workload and platform conditions and
> accounting for thermals and aging.
> When this occurs, cpu priority need to be set.
> 
> Signed-off-by: Meng Li <li.meng@amd.com>
> Reviewed-by: Wyes Karny <wyes.karny@amd.com>
> ---
>  drivers/cpufreq/amd-pstate.c | 36 ++++++++++++++++++++++++++++++++++--
>  include/linux/amd-pstate.h   | 11 +++++++++++
>  2 files changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 454eb6e789e7..8c19e1d50d29 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -318,6 +318,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
>  	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
>  	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
>  	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
> +	WRITE_ONCE(cpudata->cppc_highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
>  
>  	return 0;
>  }
> @@ -339,6 +340,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
>  	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
>  		   cppc_perf.lowest_nonlinear_perf);
>  	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
> +	WRITE_ONCE(cpudata->cppc_highest_perf, cppc_perf.highest_perf);
>  
>  	if (cppc_state == AMD_PSTATE_ACTIVE)
>  		return 0;
> @@ -545,7 +547,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
>  	if (target_perf < capacity)
>  		des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
>  
> -	min_perf = READ_ONCE(cpudata->highest_perf);
> +	min_perf = READ_ONCE(cpudata->lowest_perf);
>  	if (_min_perf < capacity)
>  		min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
>  
> @@ -748,6 +750,34 @@ static void amd_pstate_init_prefcore(void)
>  	schedule_work(&sched_prefcore_work);
>  }
>  
> +static void amd_pstate_update_highest_perf(unsigned int cpu)
> +{
> +	struct cpufreq_policy *policy;
> +	struct amd_cpudata *cpudata;
> +	u32 prev_high = 0, cur_high = 0;
> +	u64 highest_perf;
> +	int ret;
> +
> +	if (!prefcore)
> +		return;
> +
> +	ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
> +	if (ret)
> +		return;
> +
> +	policy = cpufreq_cpu_get(cpu);
> +	cpudata = policy->driver_data;
> +	cur_high = highest_perf;
> +	prev_high = READ_ONCE(cpudata->cppc_highest_perf);
> +
> +	if (prev_high != cur_high) {
> +		WRITE_ONCE(cpudata->cppc_highest_perf, cur_high);
> +		sched_set_itmt_core_prio(cur_high, cpu);
> +	}
> +
> +	cpufreq_cpu_put(policy);
> +}
> +
>  static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>  {
>  	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> @@ -912,7 +942,7 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
>  	u32 perf;
>  	struct amd_cpudata *cpudata = policy->driver_data;
>  
> -	perf = READ_ONCE(cpudata->highest_perf);
> +	perf = READ_ONCE(cpudata->cppc_highest_perf);
>  
>  	return sysfs_emit(buf, "%u\n", perf);
>  }
> @@ -1479,6 +1509,7 @@ static struct cpufreq_driver amd_pstate_driver = {
>  	.suspend	= amd_pstate_cpu_suspend,
>  	.resume		= amd_pstate_cpu_resume,
>  	.set_boost	= amd_pstate_set_boost,
> +	.update_highest_perf	= amd_pstate_update_highest_perf,
>  	.name		= "amd-pstate",
>  	.attr		= amd_pstate_attr,
>  };
> @@ -1493,6 +1524,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
>  	.online		= amd_pstate_epp_cpu_online,
>  	.suspend	= amd_pstate_epp_suspend,
>  	.resume		= amd_pstate_epp_resume,
> +	.update_highest_perf	= amd_pstate_update_highest_perf,
>  	.name		= "amd-pstate-epp",
>  	.attr		= amd_pstate_epp_attr,
>  };
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index 446394f84606..2159fd5693fe 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -31,6 +31,11 @@ struct amd_aperf_mperf {
>  	u64 mperf;
>  	u64 tsc;
>  };
> +	/* For platforms that do not support the preferred core feature, the
> +	 * highest_pef may be configured with 166 or 255, to avoid max frequency
> +	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
> +	 * the default max perf.
> +	 */

This seems a duplicate commments?

Thanks,
Ray

>  
>  /**
>   * struct amd_cpudata - private CPU data for AMD P-State
> @@ -39,11 +44,16 @@ struct amd_aperf_mperf {
>   * @cppc_req_cached: cached performance request hints
>   * @highest_perf: the maximum performance an individual processor may reach,
>   *		  assuming ideal conditions
> + *		  For platforms that do not support the preferred core feature, the
> + *		  highest_pef may be configured with 166 or 255, to avoid max frequency
> + *		  calculated wrongly. we take the fixed value as the highest_perf.
>   * @nominal_perf: the maximum sustained performance level of the processor,
>   *		  assuming ideal operating conditions
>   * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
>   *			   savings are achieved
>   * @lowest_perf: the absolute lowest performance level of the processor
> + * @cppc_highest_perf: the maximum performance an individual processor may reach,
> + *		  assuming ideal conditions
>   * @max_freq: the frequency that mapped to highest_perf
>   * @min_freq: the frequency that mapped to lowest_perf
>   * @nominal_freq: the frequency that mapped to nominal_perf
> @@ -70,6 +80,7 @@ struct amd_cpudata {
>  	u32	nominal_perf;
>  	u32	lowest_nonlinear_perf;
>  	u32	lowest_perf;
> +	u32     cppc_highest_perf;
>  
>  	u32	max_freq;
>  	u32	min_freq;
> -- 
> 2.34.1
>
Wyes Karny Sept. 6, 2023, 5:49 p.m. UTC | #2
Hi Meng Li,

On 05 Sep 09:51, Meng Li wrote:
> Preferred core rankings can be changed dynamically by the
> platform based on the workload and platform conditions and
> accounting for thermals and aging.
> When this occurs, cpu priority need to be set.
> 
> Signed-off-by: Meng Li <li.meng@amd.com>
> Reviewed-by: Wyes Karny <wyes.karny@amd.com>
> ---
>  drivers/cpufreq/amd-pstate.c | 36 ++++++++++++++++++++++++++++++++++--
>  include/linux/amd-pstate.h   | 11 +++++++++++
>  2 files changed, 45 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 454eb6e789e7..8c19e1d50d29 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -318,6 +318,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
>  	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
>  	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
>  	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
> +	WRITE_ONCE(cpudata->cppc_highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));

Is there any reason to change this variable name form
`prefcore_highest_perf`(in v3)  to `cppc_highest_perf`?  I feel
`cppc_highest_perf` is bit confusing as there is already `highest_perf`
variable present. How about something like `prefcore_ranking` variable
name?

Thanks,
Wyes

>  
>  	return 0;
>  }
> @@ -339,6 +340,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
>  	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
>  		   cppc_perf.lowest_nonlinear_perf);
>  	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
> +	WRITE_ONCE(cpudata->cppc_highest_perf, cppc_perf.highest_perf);
>  
>  	if (cppc_state == AMD_PSTATE_ACTIVE)
>  		return 0;
> @@ -545,7 +547,7 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
>  	if (target_perf < capacity)
>  		des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
>  
> -	min_perf = READ_ONCE(cpudata->highest_perf);
> +	min_perf = READ_ONCE(cpudata->lowest_perf);
>  	if (_min_perf < capacity)
>  		min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
>  
> @@ -748,6 +750,34 @@ static void amd_pstate_init_prefcore(void)
>  	schedule_work(&sched_prefcore_work);
>  }
>  
> +static void amd_pstate_update_highest_perf(unsigned int cpu)
> +{
> +	struct cpufreq_policy *policy;
> +	struct amd_cpudata *cpudata;
> +	u32 prev_high = 0, cur_high = 0;
> +	u64 highest_perf;
> +	int ret;
> +
> +	if (!prefcore)
> +		return;
> +
> +	ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
> +	if (ret)
> +		return;
> +
> +	policy = cpufreq_cpu_get(cpu);
> +	cpudata = policy->driver_data;
> +	cur_high = highest_perf;
> +	prev_high = READ_ONCE(cpudata->cppc_highest_perf);
> +
> +	if (prev_high != cur_high) {
> +		WRITE_ONCE(cpudata->cppc_highest_perf, cur_high);
> +		sched_set_itmt_core_prio(cur_high, cpu);
> +	}
> +
> +	cpufreq_cpu_put(policy);
> +}
> +
>  static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>  {
>  	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
> @@ -912,7 +942,7 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
>  	u32 perf;
>  	struct amd_cpudata *cpudata = policy->driver_data;
>  
> -	perf = READ_ONCE(cpudata->highest_perf);
> +	perf = READ_ONCE(cpudata->cppc_highest_perf);
>  
>  	return sysfs_emit(buf, "%u\n", perf);
>  }
> @@ -1479,6 +1509,7 @@ static struct cpufreq_driver amd_pstate_driver = {
>  	.suspend	= amd_pstate_cpu_suspend,
>  	.resume		= amd_pstate_cpu_resume,
>  	.set_boost	= amd_pstate_set_boost,
> +	.update_highest_perf	= amd_pstate_update_highest_perf,
>  	.name		= "amd-pstate",
>  	.attr		= amd_pstate_attr,
>  };
> @@ -1493,6 +1524,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
>  	.online		= amd_pstate_epp_cpu_online,
>  	.suspend	= amd_pstate_epp_suspend,
>  	.resume		= amd_pstate_epp_resume,
> +	.update_highest_perf	= amd_pstate_update_highest_perf,
>  	.name		= "amd-pstate-epp",
>  	.attr		= amd_pstate_epp_attr,
>  };
> diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
> index 446394f84606..2159fd5693fe 100644
> --- a/include/linux/amd-pstate.h
> +++ b/include/linux/amd-pstate.h
> @@ -31,6 +31,11 @@ struct amd_aperf_mperf {
>  	u64 mperf;
>  	u64 tsc;
>  };
> +	/* For platforms that do not support the preferred core feature, the
> +	 * highest_pef may be configured with 166 or 255, to avoid max frequency
> +	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
> +	 * the default max perf.
> +	 */
>  
>  /**
>   * struct amd_cpudata - private CPU data for AMD P-State
> @@ -39,11 +44,16 @@ struct amd_aperf_mperf {
>   * @cppc_req_cached: cached performance request hints
>   * @highest_perf: the maximum performance an individual processor may reach,
>   *		  assuming ideal conditions
> + *		  For platforms that do not support the preferred core feature, the
> + *		  highest_pef may be configured with 166 or 255, to avoid max frequency
> + *		  calculated wrongly. we take the fixed value as the highest_perf.
>   * @nominal_perf: the maximum sustained performance level of the processor,
>   *		  assuming ideal operating conditions
>   * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
>   *			   savings are achieved
>   * @lowest_perf: the absolute lowest performance level of the processor
> + * @cppc_highest_perf: the maximum performance an individual processor may reach,
> + *		  assuming ideal conditions
>   * @max_freq: the frequency that mapped to highest_perf
>   * @min_freq: the frequency that mapped to lowest_perf
>   * @nominal_freq: the frequency that mapped to nominal_perf
> @@ -70,6 +80,7 @@ struct amd_cpudata {
>  	u32	nominal_perf;
>  	u32	lowest_nonlinear_perf;
>  	u32	lowest_perf;
> +	u32     cppc_highest_perf;
>  
>  	u32	max_freq;
>  	u32	min_freq;
> -- 
> 2.34.1
>
Peter Zijlstra Sept. 8, 2023, 1:27 p.m. UTC | #3
On Tue, Sep 05, 2023 at 09:51:14AM +0800, Meng Li wrote:

> +static void amd_pstate_update_highest_perf(unsigned int cpu)
> +{
> +	struct cpufreq_policy *policy;
> +	struct amd_cpudata *cpudata;
> +	u32 prev_high = 0, cur_high = 0;
> +	u64 highest_perf;
> +	int ret;
> +
> +	if (!prefcore)
> +		return;
> +
> +	ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
> +	if (ret)
> +		return;
> +
> +	policy = cpufreq_cpu_get(cpu);
> +	cpudata = policy->driver_data;
> +	cur_high = highest_perf;
> +	prev_high = READ_ONCE(cpudata->cppc_highest_perf);
> +
> +	if (prev_high != cur_high) {
> +		WRITE_ONCE(cpudata->cppc_highest_perf, cur_high);
> +		sched_set_itmt_core_prio(cur_high, cpu);

I just noticed, your cur_high is explicitly 'u32', but
sched_set_itmt_core_prio() and the rest of the scheduler use 'int' (aka
s32). If you somehow get that top bit set things might not work out as
expected.

Please double check.

> +	}
> +
> +	cpufreq_cpu_put(policy);
> +}
diff mbox series

Patch

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 454eb6e789e7..8c19e1d50d29 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -318,6 +318,7 @@  static int pstate_init_perf(struct amd_cpudata *cpudata)
 	WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
 	WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
 	WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+	WRITE_ONCE(cpudata->cppc_highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
 
 	return 0;
 }
@@ -339,6 +340,7 @@  static int cppc_init_perf(struct amd_cpudata *cpudata)
 	WRITE_ONCE(cpudata->lowest_nonlinear_perf,
 		   cppc_perf.lowest_nonlinear_perf);
 	WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
+	WRITE_ONCE(cpudata->cppc_highest_perf, cppc_perf.highest_perf);
 
 	if (cppc_state == AMD_PSTATE_ACTIVE)
 		return 0;
@@ -545,7 +547,7 @@  static void amd_pstate_adjust_perf(unsigned int cpu,
 	if (target_perf < capacity)
 		des_perf = DIV_ROUND_UP(cap_perf * target_perf, capacity);
 
-	min_perf = READ_ONCE(cpudata->highest_perf);
+	min_perf = READ_ONCE(cpudata->lowest_perf);
 	if (_min_perf < capacity)
 		min_perf = DIV_ROUND_UP(cap_perf * _min_perf, capacity);
 
@@ -748,6 +750,34 @@  static void amd_pstate_init_prefcore(void)
 	schedule_work(&sched_prefcore_work);
 }
 
+static void amd_pstate_update_highest_perf(unsigned int cpu)
+{
+	struct cpufreq_policy *policy;
+	struct amd_cpudata *cpudata;
+	u32 prev_high = 0, cur_high = 0;
+	u64 highest_perf;
+	int ret;
+
+	if (!prefcore)
+		return;
+
+	ret = amd_pstate_get_highest_perf(cpu, &highest_perf);
+	if (ret)
+		return;
+
+	policy = cpufreq_cpu_get(cpu);
+	cpudata = policy->driver_data;
+	cur_high = highest_perf;
+	prev_high = READ_ONCE(cpudata->cppc_highest_perf);
+
+	if (prev_high != cur_high) {
+		WRITE_ONCE(cpudata->cppc_highest_perf, cur_high);
+		sched_set_itmt_core_prio(cur_high, cpu);
+	}
+
+	cpufreq_cpu_put(policy);
+}
+
 static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
 {
 	int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
@@ -912,7 +942,7 @@  static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
 	u32 perf;
 	struct amd_cpudata *cpudata = policy->driver_data;
 
-	perf = READ_ONCE(cpudata->highest_perf);
+	perf = READ_ONCE(cpudata->cppc_highest_perf);
 
 	return sysfs_emit(buf, "%u\n", perf);
 }
@@ -1479,6 +1509,7 @@  static struct cpufreq_driver amd_pstate_driver = {
 	.suspend	= amd_pstate_cpu_suspend,
 	.resume		= amd_pstate_cpu_resume,
 	.set_boost	= amd_pstate_set_boost,
+	.update_highest_perf	= amd_pstate_update_highest_perf,
 	.name		= "amd-pstate",
 	.attr		= amd_pstate_attr,
 };
@@ -1493,6 +1524,7 @@  static struct cpufreq_driver amd_pstate_epp_driver = {
 	.online		= amd_pstate_epp_cpu_online,
 	.suspend	= amd_pstate_epp_suspend,
 	.resume		= amd_pstate_epp_resume,
+	.update_highest_perf	= amd_pstate_update_highest_perf,
 	.name		= "amd-pstate-epp",
 	.attr		= amd_pstate_epp_attr,
 };
diff --git a/include/linux/amd-pstate.h b/include/linux/amd-pstate.h
index 446394f84606..2159fd5693fe 100644
--- a/include/linux/amd-pstate.h
+++ b/include/linux/amd-pstate.h
@@ -31,6 +31,11 @@  struct amd_aperf_mperf {
 	u64 mperf;
 	u64 tsc;
 };
+	/* For platforms that do not support the preferred core feature, the
+	 * highest_pef may be configured with 166 or 255, to avoid max frequency
+	 * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
+	 * the default max perf.
+	 */
 
 /**
  * struct amd_cpudata - private CPU data for AMD P-State
@@ -39,11 +44,16 @@  struct amd_aperf_mperf {
  * @cppc_req_cached: cached performance request hints
  * @highest_perf: the maximum performance an individual processor may reach,
  *		  assuming ideal conditions
+ *		  For platforms that do not support the preferred core feature, the
+ *		  highest_pef may be configured with 166 or 255, to avoid max frequency
+ *		  calculated wrongly. we take the fixed value as the highest_perf.
  * @nominal_perf: the maximum sustained performance level of the processor,
  *		  assuming ideal operating conditions
  * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
  *			   savings are achieved
  * @lowest_perf: the absolute lowest performance level of the processor
+ * @cppc_highest_perf: the maximum performance an individual processor may reach,
+ *		  assuming ideal conditions
  * @max_freq: the frequency that mapped to highest_perf
  * @min_freq: the frequency that mapped to lowest_perf
  * @nominal_freq: the frequency that mapped to nominal_perf
@@ -70,6 +80,7 @@  struct amd_cpudata {
 	u32	nominal_perf;
 	u32	lowest_nonlinear_perf;
 	u32	lowest_perf;
+	u32     cppc_highest_perf;
 
 	u32	max_freq;
 	u32	min_freq;