diff mbox series

[v2] cpufreq: schedutil: Move max CPU capacity to sugov_policy

Message ID 20220816130629.3178-1-lukasz.luba@arm.com
State Accepted
Commit 6d5afdc97ea71958287364a1f1d07e59ef151b11
Headers show
Series [v2] cpufreq: schedutil: Move max CPU capacity to sugov_policy | expand

Commit Message

Lukasz Luba Aug. 16, 2022, 1:06 p.m. UTC
There is no need to keep the max CPU capacity in the per_cpu instance.
Furthermore, there is no need to check and update that variable
(sg_cpu->max) every time in the frequency change request, which is part
of hot path. Instead use struct sugov_policy to store that information.
Initialize the max CPU capacity during the setup and start callback.
We can do that since all CPUs in the same frequency domain have the same
max capacity (capacity setup and thermal pressure are based on that).

Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
---
Changes v2:
- collected ACK from Viresh
- re-based on top of latest mainline where the previously conflicting
  change is now merged

 kernel/sched/cpufreq_schedutil.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

Comments

Rafael J. Wysocki Aug. 23, 2022, 6:04 p.m. UTC | #1
On Tue, Aug 16, 2022 at 3:06 PM Lukasz Luba <lukasz.luba@arm.com> wrote:
>
> There is no need to keep the max CPU capacity in the per_cpu instance.
> Furthermore, there is no need to check and update that variable
> (sg_cpu->max) every time in the frequency change request, which is part
> of hot path. Instead use struct sugov_policy to store that information.
> Initialize the max CPU capacity during the setup and start callback.
> We can do that since all CPUs in the same frequency domain have the same
> max capacity (capacity setup and thermal pressure are based on that).
>
> Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
> Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
> ---
> Changes v2:
> - collected ACK from Viresh
> - re-based on top of latest mainline where the previously conflicting
>   change is now merged
>
>  kernel/sched/cpufreq_schedutil.c | 30 +++++++++++++++---------------
>  1 file changed, 15 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
> index 1207c78f85c1..9161d1136d01 100644
> --- a/kernel/sched/cpufreq_schedutil.c
> +++ b/kernel/sched/cpufreq_schedutil.c
> @@ -25,6 +25,9 @@ struct sugov_policy {
>         unsigned int            next_freq;
>         unsigned int            cached_raw_freq;
>
> +       /* max CPU capacity, which is equal for all CPUs in freq. domain */
> +       unsigned long           max;
> +
>         /* The next fields are only needed if fast switch cannot be used: */
>         struct                  irq_work irq_work;
>         struct                  kthread_work work;
> @@ -48,7 +51,6 @@ struct sugov_cpu {
>
>         unsigned long           util;
>         unsigned long           bw_dl;
> -       unsigned long           max;
>
>         /* The field below is for single-CPU policies only: */
>  #ifdef CONFIG_NO_HZ_COMMON
> @@ -158,7 +160,6 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
>  {
>         struct rq *rq = cpu_rq(sg_cpu->cpu);
>
> -       sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
>         sg_cpu->bw_dl = cpu_bw_dl(rq);
>         sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu),
>                                           FREQUENCY_UTIL, NULL);
> @@ -253,6 +254,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
>   */
>  static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
>  {
> +       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
>         unsigned long boost;
>
>         /* No boost currently required */
> @@ -280,7 +282,8 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
>          * sg_cpu->util is already in capacity scale; convert iowait_boost
>          * into the same scale so we can compare.
>          */
> -       boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
> +       boost = sg_cpu->iowait_boost * sg_policy->max;
> +       boost >>= SCHED_CAPACITY_SHIFT;
>         boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
>         if (sg_cpu->util < boost)
>                 sg_cpu->util = boost;
> @@ -337,7 +340,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
>         if (!sugov_update_single_common(sg_cpu, time, flags))
>                 return;
>
> -       next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
> +       next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max);
>         /*
>          * Do not reduce the frequency if the CPU has not been idle
>          * recently, as the reduction is likely to be premature then.
> @@ -373,6 +376,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
>                                      unsigned int flags)
>  {
>         struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
> +       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
>         unsigned long prev_util = sg_cpu->util;
>
>         /*
> @@ -399,7 +403,8 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
>                 sg_cpu->util = prev_util;
>
>         cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
> -                                  map_util_perf(sg_cpu->util), sg_cpu->max);
> +                                  map_util_perf(sg_cpu->util),
> +                                  sg_policy->max);
>
>         sg_cpu->sg_policy->last_freq_update_time = time;
>  }
> @@ -408,25 +413,19 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
>  {
>         struct sugov_policy *sg_policy = sg_cpu->sg_policy;
>         struct cpufreq_policy *policy = sg_policy->policy;
> -       unsigned long util = 0, max = 1;
> +       unsigned long util = 0;
>         unsigned int j;
>
>         for_each_cpu(j, policy->cpus) {
>                 struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
> -               unsigned long j_util, j_max;
>
>                 sugov_get_util(j_sg_cpu);
>                 sugov_iowait_apply(j_sg_cpu, time);
> -               j_util = j_sg_cpu->util;
> -               j_max = j_sg_cpu->max;
>
> -               if (j_util * max > j_max * util) {
> -                       util = j_util;
> -                       max = j_max;
> -               }
> +               util = max(j_sg_cpu->util, util);
>         }
>
> -       return get_next_freq(sg_policy, util, max);
> +       return get_next_freq(sg_policy, util, sg_policy->max);
>  }
>
>  static void
> @@ -752,7 +751,7 @@ static int sugov_start(struct cpufreq_policy *policy)
>  {
>         struct sugov_policy *sg_policy = policy->governor_data;
>         void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
> -       unsigned int cpu;
> +       unsigned int cpu = cpumask_first(policy->cpus);
>
>         sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
>         sg_policy->last_freq_update_time        = 0;
> @@ -760,6 +759,7 @@ static int sugov_start(struct cpufreq_policy *policy)
>         sg_policy->work_in_progress             = false;
>         sg_policy->limits_changed               = false;
>         sg_policy->cached_raw_freq              = 0;
> +       sg_policy->max                          = arch_scale_cpu_capacity(cpu);
>
>         sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
>
> --

Applied as 6.1 material, thanks!
Russell Haley Oct. 3, 2022, 8:41 a.m. UTC | #2
>We can do that since all CPUs in the same frequency domain have the
>same max capacity

Do they? In the Intel Alder Lake datasheet [1], it says that a single
power rail supplies all IA ("Intel Architecture") cores, which includes
both P cores and E cores.

I don't have anything that new, but on Haswell and Skylake, despite the
fact that each CPU has a separate policy that lists only itself in
affected_cpus, with the userspace governor I find that every core runs
at the the highest frequency set among all cores. For clarity:

$ grep . cpufreq/policy*/scaling_{setspeed,cur_freq}
cpufreq/policy0/scaling_setspeed:3000000
cpufreq/policy1/scaling_setspeed:2000000
cpufreq/policy0/scaling_cur_freq:2999997
cpufreq/policy1/scaling_cur_freq:3000001

It seems that these cores are in the same frequency domain, even if
cpufreq doesn't know about it. I don't know if this affects the behavior
of the governors in any way, but it might be a bug in intel_pstate that
could one day be fixed. If it is, then any heterogeneous-uarch chips
with both CPU types sharing a voltage rail would have CPUs with
different max capacity in the same frequency domain.

This might present a problem for any future attempt to harmonize
treatment of big.LITTLE between ARM and x86.

[1]:
https://edc.intel.com/content/www/us/en/design/ipla/software-development-platforms/client/platforms/alder-lake-desktop/12th-generation-intel-core-processors-datasheet-volume-1-of-2/processor-power-rails_1/
diff mbox series

Patch

diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 1207c78f85c1..9161d1136d01 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -25,6 +25,9 @@  struct sugov_policy {
 	unsigned int		next_freq;
 	unsigned int		cached_raw_freq;
 
+	/* max CPU capacity, which is equal for all CPUs in freq. domain */
+	unsigned long		max;
+
 	/* The next fields are only needed if fast switch cannot be used: */
 	struct			irq_work irq_work;
 	struct			kthread_work work;
@@ -48,7 +51,6 @@  struct sugov_cpu {
 
 	unsigned long		util;
 	unsigned long		bw_dl;
-	unsigned long		max;
 
 	/* The field below is for single-CPU policies only: */
 #ifdef CONFIG_NO_HZ_COMMON
@@ -158,7 +160,6 @@  static void sugov_get_util(struct sugov_cpu *sg_cpu)
 {
 	struct rq *rq = cpu_rq(sg_cpu->cpu);
 
-	sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
 	sg_cpu->bw_dl = cpu_bw_dl(rq);
 	sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu),
 					  FREQUENCY_UTIL, NULL);
@@ -253,6 +254,7 @@  static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
  */
 static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
 {
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	unsigned long boost;
 
 	/* No boost currently required */
@@ -280,7 +282,8 @@  static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
 	 * sg_cpu->util is already in capacity scale; convert iowait_boost
 	 * into the same scale so we can compare.
 	 */
-	boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
+	boost = sg_cpu->iowait_boost * sg_policy->max;
+	boost >>= SCHED_CAPACITY_SHIFT;
 	boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
 	if (sg_cpu->util < boost)
 		sg_cpu->util = boost;
@@ -337,7 +340,7 @@  static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
 	if (!sugov_update_single_common(sg_cpu, time, flags))
 		return;
 
-	next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
+	next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max);
 	/*
 	 * Do not reduce the frequency if the CPU has not been idle
 	 * recently, as the reduction is likely to be premature then.
@@ -373,6 +376,7 @@  static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
 				     unsigned int flags)
 {
 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	unsigned long prev_util = sg_cpu->util;
 
 	/*
@@ -399,7 +403,8 @@  static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
 		sg_cpu->util = prev_util;
 
 	cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
-				   map_util_perf(sg_cpu->util), sg_cpu->max);
+				   map_util_perf(sg_cpu->util),
+				   sg_policy->max);
 
 	sg_cpu->sg_policy->last_freq_update_time = time;
 }
@@ -408,25 +413,19 @@  static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
 {
 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
 	struct cpufreq_policy *policy = sg_policy->policy;
-	unsigned long util = 0, max = 1;
+	unsigned long util = 0;
 	unsigned int j;
 
 	for_each_cpu(j, policy->cpus) {
 		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
-		unsigned long j_util, j_max;
 
 		sugov_get_util(j_sg_cpu);
 		sugov_iowait_apply(j_sg_cpu, time);
-		j_util = j_sg_cpu->util;
-		j_max = j_sg_cpu->max;
 
-		if (j_util * max > j_max * util) {
-			util = j_util;
-			max = j_max;
-		}
+		util = max(j_sg_cpu->util, util);
 	}
 
-	return get_next_freq(sg_policy, util, max);
+	return get_next_freq(sg_policy, util, sg_policy->max);
 }
 
 static void
@@ -752,7 +751,7 @@  static int sugov_start(struct cpufreq_policy *policy)
 {
 	struct sugov_policy *sg_policy = policy->governor_data;
 	void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
-	unsigned int cpu;
+	unsigned int cpu = cpumask_first(policy->cpus);
 
 	sg_policy->freq_update_delay_ns	= sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
 	sg_policy->last_freq_update_time	= 0;
@@ -760,6 +759,7 @@  static int sugov_start(struct cpufreq_policy *policy)
 	sg_policy->work_in_progress		= false;
 	sg_policy->limits_changed		= false;
 	sg_policy->cached_raw_freq		= 0;
+	sg_policy->max				= arch_scale_cpu_capacity(cpu);
 
 	sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);