diff mbox series

[v2,3/5] cpufreq: intel_pstate: Add ->offline and ->online callbacks

Message ID 2786976.RHVxHup3hB@kreacher
State Superseded
Headers show
Series [v2,1/5] cpufreq: intel_pstate: Refuse to turn off with HWP enabled | expand

Commit Message

Rafael J. Wysocki Aug. 24, 2020, 5:43 p.m. UTC
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>

Add ->offline and ->online driver callbacks to prepare for taking a
CPU offline and to restore its working configuration when it goes
back online, respectively, to avoid invoking the ->init callback on
every CPU online which is quite a bit of unnecessary overhead.

Define ->offline and ->online so that they can be used in the
passive mode as well as in the active mode and because ->offline
will do the majority of ->stop_cpu work, the passive mode does
not need that callback any more, so drop it.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

-> v2: Typo fixes and changelog edits (Doug).

---
 drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

Comments

Jinjie Ruan Nov. 6, 2023, 3:03 a.m. UTC | #1
On 2023/11/3 23:56, Doug Smythies wrote:
> On Fri, Nov 3, 2023 at 3:57 AM Jinjie Ruan <ruanjinjie@huawei.com> wrote:
>> On 2020/8/25 1:43, Rafael J. Wysocki wrote:
>>> From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
>>>
>>> Add ->offline and ->online driver callbacks to prepare for taking a
>>> CPU offline and to restore its working configuration when it goes
>>> back online, respectively, to avoid invoking the ->init callback on
>>> every CPU online which is quite a bit of unnecessary overhead.
>>>
>>> Define ->offline and ->online so that they can be used in the
>>> passive mode as well as in the active mode and because ->offline
>>> will do the majority of ->stop_cpu work, the passive mode does
>>> not need that callback any more, so drop it.
>>>
>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>> ---
>>>
>>> -> v2: Typo fixes and changelog edits (Doug).
>>>
>>> ---
>>>  drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++------
>>>  1 file changed, 32 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
>>> index 3d18934fa975..98836ac299db 100644
>>> --- a/drivers/cpufreq/intel_pstate.c
>>> +++ b/drivers/cpufreq/intel_pstate.c
>>> @@ -2297,28 +2297,51 @@ static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
>>>       return 0;
>>>  }
>>>
>>> -static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
>>> +static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
>>>  {
>>> +     pr_debug("CPU %d going offline\n", policy->cpu);
>>> +
>>> +     intel_pstate_exit_perf_limits(policy);
>>> +
>>> +     /*
>>> +      * If the CPU is an SMT thread and it goes offline with the performance
>>> +      * settings different from the minimum, it will prevent its sibling
>>> +      * from getting to lower performance levels, so force the minimum
>>> +      * performance on CPU offline to prevent that from happening.
>>> +      */
>>>       if (hwp_active)
>>>               intel_pstate_hwp_force_min_perf(policy->cpu);
>>>       else
>>>               intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
>>> +{
>>> +     pr_debug("CPU %d going online\n", policy->cpu);
>>> +
>>> +     intel_pstate_init_acpi_perf_limits(policy);
>>> +
>>> +     if (hwp_active)
>>> +             wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
>>> +                           all_cpu_data[policy->cpu]->hwp_req_cached);
>>> +
>>> +     return 0;
>>>  }
>>
>> On Ice Lake server, there seems a bug when CONFIG_X86_INTEL_PSTATE=y and
>> not configure intel_pstate=xxx in command line.
>>
>> Although the Performance tuner is used, the CPU have the lowest
>> frequency in scaling_cur_freq after the CPU goes offline and then goes
>> online, running the same infinite loop load.
>>
>> How to produce:
>>
>> echo performance > /sys/devices/system/cpu/cpu12/cpufreq/scaling_governor
>>
>> cat while_true.c
>> #include <stdio.h>
>> void main(void)
>> {
>>         while(1);
>> }
>>
>>
>> [root@localhost freq_test]# cat test.sh
>> #!/bin/bash
>>
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_governor
>> taskset -c ${1} ./while_true &
>> sleep 1s
>>
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> echo 0 > /sys/devices/system/cpu/cpu${1}/online
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> sleep 1s
>>
>> echo 1 > /sys/devices/system/cpu/cpu${1}/online
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> taskset -c ${1} ./while_true &
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>> sleep 1s
>> cat /sys/devices/system/cpu/cpu${1}/cpufreq/scaling_cur_freq
>>
>>
>> [root@localhost freq_test]# sh test.sh 40
>> 2300000
>> performance
>> 2299977
>> cat: /sys/devices/system/cpu/cpu40/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 2300000
>> 2300022
>> 2300000
>> 2299953
>> [root@localhost freq_test]# sh test.sh 50
>> 2300000
>> performance
>> 2300000
>> cat: /sys/devices/system/cpu/cpu50/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 2300000
>> 2299977
>> 2300022
>> 2299977
>> [root@localhost freq_test]# sh test.sh 20
>> 2300000
>> performance
>> 2299977
>> cat: /sys/devices/system/cpu/cpu20/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 800000
>> 800000
>> 800000
>> 799992
>> [root@localhost freq_test]# sh test.sh 21
>> 2300000
>> performance
>> 2300000
>> cat: /sys/devices/system/cpu/cpu21/cpufreq/scaling_cur_freq: Device or
>> resource busy
>> 800000
>> 800000
>> 800000
>> 800000
>>
>> [root@localhost freq_test]# cat
>> /sys/devices/system/cpu/cpu21/cpufreq/scaling_max_freq
>> 2300000
>> [root@localhost freq_test]# cat
>> /sys/devices/system/cpu/cpu21/cpufreq/scaling_min_freq
>> 800000
> 
> Hi,
> 
> I followed your "how to reproduce" notes exactly.
> So far, I have been unable to reproduce your issue.

It seems that this issue is platform-specific.

The following CPU family has the issue:

1、Products formerly Haswell

2、Model name:            Intel(R) Xeon(R) Platinum 8380 CPU @
2.30GHz(Ice Lake server)

But the following CPU family do not have the issue:

1、Model name:                         Intel(R) Xeon(R) CPU E5-2620 v2 @
2.10GHz

2、Model name:     Intel(R) Xeon(R) CPU E5-2698 v3 @ 2.30GHz

> 
> I am using kernel 6.6.
> My processor is:
> Intel(R) Core(TM) i5-10600K CPU @ 4.10GHz
> 
> Results:
> root@s19:/home/doug/pstate# ./test.sh 8
> 800000
> performance
> 4799994
> cat: /sys/devices/system/cpu/cpu8/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799999
> 4800000
> 4800001
> 4799996
> root@s19:/home/doug/pstate# ./test.sh 7
> 800000
> performance
> 4800001
> cat: /sys/devices/system/cpu/cpu7/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799967
> 4800028
> 4800006
> 4799997
> root@s19:/home/doug/pstate# ./test.sh 6
> 800000
> performance
> 4800001
> cat: /sys/devices/system/cpu/cpu6/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799983
> 4800001
> 4799993
> 4800002
> root@s19:/home/doug/pstate# ./test.sh 5
> 800000
> performance
> 4799990
> cat: /sys/devices/system/cpu/cpu5/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4800006
> 4800002
> 4800011
> 4799980
> root@s19:/home/doug/pstate# ./test.sh 4
> 4799940
> performance
> 4799985
> cat: /sys/devices/system/cpu/cpu4/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799975
> 4799994
> 4799984
> 4799996
> root@s19:/home/doug/pstate# ./test.sh 3
> 4799986
> performance
> 4799990
> cat: /sys/devices/system/cpu/cpu3/cpufreq/scaling_cur_freq: Device or
> resource busy
> 4799976
> 4800015
> 4800000
> 4799995
> 
> ... Doug
>
diff mbox series

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3d18934fa975..98836ac299db 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -2297,28 +2297,51 @@  static int intel_pstate_verify_policy(struct cpufreq_policy_data *policy)
 	return 0;
 }
 
-static void intel_cpufreq_stop_cpu(struct cpufreq_policy *policy)
+static int intel_pstate_cpu_offline(struct cpufreq_policy *policy)
 {
+	pr_debug("CPU %d going offline\n", policy->cpu);
+
+	intel_pstate_exit_perf_limits(policy);
+
+	/*
+	 * If the CPU is an SMT thread and it goes offline with the performance
+	 * settings different from the minimum, it will prevent its sibling
+	 * from getting to lower performance levels, so force the minimum
+	 * performance on CPU offline to prevent that from happening.
+	 */
 	if (hwp_active)
 		intel_pstate_hwp_force_min_perf(policy->cpu);
 	else
 		intel_pstate_set_min_pstate(all_cpu_data[policy->cpu]);
+
+	return 0;
+}
+
+static int intel_pstate_cpu_online(struct cpufreq_policy *policy)
+{
+	pr_debug("CPU %d going online\n", policy->cpu);
+
+	intel_pstate_init_acpi_perf_limits(policy);
+
+	if (hwp_active)
+		wrmsrl_on_cpu(policy->cpu, MSR_HWP_REQUEST,
+			      all_cpu_data[policy->cpu]->hwp_req_cached);
+
+	return 0;
 }
 
 static void intel_pstate_stop_cpu(struct cpufreq_policy *policy)
 {
-	pr_debug("CPU %d exiting\n", policy->cpu);
+	pr_debug("CPU %d stopping\n", policy->cpu);
 
 	intel_pstate_clear_update_util_hook(policy->cpu);
 	if (hwp_active)
 		intel_pstate_hwp_save_state(policy);
-
-	intel_cpufreq_stop_cpu(policy);
 }
 
 static int intel_pstate_cpu_exit(struct cpufreq_policy *policy)
 {
-	intel_pstate_exit_perf_limits(policy);
+	pr_debug("CPU %d exiting\n", policy->cpu);
 
 	policy->fast_switch_possible = false;
 
@@ -2398,6 +2421,8 @@  static struct cpufreq_driver intel_pstate = {
 	.init		= intel_pstate_cpu_init,
 	.exit		= intel_pstate_cpu_exit,
 	.stop_cpu	= intel_pstate_stop_cpu,
+	.offline	= intel_pstate_cpu_offline,
+	.online		= intel_pstate_cpu_online,
 	.update_limits	= intel_pstate_update_limits,
 	.name		= "intel_pstate",
 };
@@ -2652,7 +2677,8 @@  static struct cpufreq_driver intel_cpufreq = {
 	.fast_switch	= intel_cpufreq_fast_switch,
 	.init		= intel_cpufreq_cpu_init,
 	.exit		= intel_cpufreq_cpu_exit,
-	.stop_cpu	= intel_cpufreq_stop_cpu,
+	.offline	= intel_pstate_cpu_offline,
+	.online		= intel_pstate_cpu_online,
 	.update_limits	= intel_pstate_update_limits,
 	.name		= "intel_cpufreq",
 };