CPUfreq lockdep issue

Message ID 20160219091723.GB22812@vireshk-i7
State New
Headers show

Commit Message

Viresh Kumar Feb. 19, 2016, 9:17 a.m.
Adding the maintainers of the driver in cc now.

On 19-02-16, 10:50, Joonas Lahtinen wrote:
> On to, 2016-02-18 at 17:04 +0530, Viresh Kumar wrote:

> > On 18-02-16, 13:06, Joonas Lahtinen wrote:

> > > Hi,

> > > 

> > > The Intel P-state driver has a lockdep issue as described below. It

> > > could in theory cause a deadlock if initialization and suspend were to

> > > be performed simultaneously. Conflicting calling paths are as follows:

> > > 

> > > intel_pstate_init(...)

> > > 	...cpufreq_online(...)

> > > 		down_write(&policy->rwsem); // Locks policy->rwsem

> > > 		...

> > > 		cpufreq_init_policy(policy);

> > > 			...intel_pstate_hwp_set();

> > > 				get_online_cpus(); // Temporarily locks cpu_hotplug.lock

> > 

> > Why is this one required?

> 

> Otherwise CPUs could be added or removed during the execution of

> intel_pstate_hwp_set(), which has the following code:

> 

>        	get_online_cpus();

>         for_each_online_cpu(cpu) {

> 		...

> 		wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);

> 	}

> > 

> > > 		...

> > > 		up_write(&policy->rwsem);

> > > 

> > > pm_suspend(...)

> > > 	...disable_nonboot_cpus()

> > > 		_cpu_down()

> > > 			cpu_hotplug_begin(); // Locks cpu_hotplug.lock

> > > 			__cpu_notify(CPU_DOWN_PREPARE, ...);

> > > 				...cpufreq_offline_prepare();

> > > 					down_write(&policy->rwsem); // Locks policy->rwsem

> > > 

> > > Quickly looking at the code, some refactoring has to be done to fix the

> > > issue. I think it would a good idea to document some of the driver

> > > callbacks related to what locks are held etc. in order to avoid future

> > > situations like this.

> > > 

> > > Because get_online_cpus() is of recursive nature and the way it

> > > currently works, adding wider get_online_cpus() scope up around

> > > cpufreq_online() does not fix the issue because it only momentarily

> > > locks cpu_hotplug.lock and proceeds to do so again at next call.

> > > 

> > > Moving get_online_cpus() completely away from pstate_hwp_set() and

> > > assuring it is called higher in the call chain might be a viable

> > > solution. Then it could be made sure get_online_cpus() is not called

> > > while policy->rwsem is being held already.


Hi Guys,

Joonas reported a lockdep between cpufreq and intel-pstate driver and
we are looking for probable solutions.

I failed to understand why should we run intel_pstate_hwp_set() for
each online CPU, while the frequency is changed only for a group of
CPUs that belong to a policy. Ofcourse intel_pstate_hwp_set() is
required to be run for all CPUs, while the sysfs files are touched.
And so, do we have a problem with below diff?

-- 
viresh

-------------------------8<-------------------------
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index d6061be2c542..a3c1788daab2 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -287,7 +287,7 @@  static inline void update_turbo_state(void)
 		 cpu->pstate.max_pstate == cpu->pstate.turbo_pstate);
 }
 
-static void intel_pstate_hwp_set(void)
+static void intel_pstate_hwp_set(const struct cpumask *cpumask)
 {
 	int min, hw_min, max, hw_max, cpu, range, adj_range;
 	u64 value, cap;
@@ -297,9 +297,7 @@  static void intel_pstate_hwp_set(void)
 	hw_max = HWP_HIGHEST_PERF(cap);
 	range = hw_max - hw_min;
 
-	get_online_cpus();
-
-	for_each_online_cpu(cpu) {
+	for_each_cpu(cpu, cpumask) {
 		rdmsrl_on_cpu(cpu, MSR_HWP_REQUEST, &value);
 		adj_range = limits->min_perf_pct * range / 100;
 		min = hw_min + adj_range;
@@ -318,7 +316,12 @@  static void intel_pstate_hwp_set(void)
 		value |= HWP_MAX_PERF(max);
 		wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
 	}
+}
 
+static void intel_pstate_hwp_set_online_cpus(void)
+{
+	get_online_cpus();
+	intel_pstate_hwp_set(cpu_online_mask);
 	put_online_cpus();
 }
 
@@ -440,7 +443,7 @@  static ssize_t store_no_turbo(struct kobject *a, struct attribute *b,
 	limits->no_turbo = clamp_t(int, input, 0, 1);
 
 	if (hwp_active)
-		intel_pstate_hwp_set();
+		intel_pstate_hwp_set_online_cpus();
 
 	return count;
 }
@@ -466,7 +469,7 @@  static ssize_t store_max_perf_pct(struct kobject *a, struct attribute *b,
 				  int_tofp(100));
 
 	if (hwp_active)
-		intel_pstate_hwp_set();
+		intel_pstate_hwp_set_online_cpus();
 	return count;
 }
 
@@ -491,7 +494,7 @@  static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 				  int_tofp(100));
 
 	if (hwp_active)
-		intel_pstate_hwp_set();
+		intel_pstate_hwp_set_online_cpus();
 	return count;
 }
 
@@ -1112,7 +1115,7 @@  static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 		pr_debug("intel_pstate: set performance\n");
 		limits = &performance_limits;
 		if (hwp_active)
-			intel_pstate_hwp_set();
+			intel_pstate_hwp_set(policy->cpus);
 		return 0;
 	}
 
@@ -1144,7 +1147,7 @@  static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 				  int_tofp(100));
 
 	if (hwp_active)
-		intel_pstate_hwp_set();
+		intel_pstate_hwp_set(policy->cpus);
 
 	return 0;
 }