@@ -501,6 +501,13 @@ void cpufreq_enable_fast_switch(struct cpufreq_policy *policy)
if (!policy->fast_switch_possible)
return;
+ /**
+ * It's not expected driver's fast_switch callback is not set
+ * even fast_switch_possible is true.
+ */
+ if (!cpufreq_driver_has_fast_switch())
+ pr_alert_once("fast_switch callback is not set\n");
+
mutex_lock(&cpufreq_fast_switch_lock);
if (cpufreq_fast_switch_count >= 0) {
cpufreq_fast_switch_count++;
@@ -2143,6 +2150,17 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
}
EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
+/**
+ * cpufreq_driver_has_fast_switch - Check "fast switch" callback.
+ *
+ * Return 'true' if the ->fast_switch callback is present for the
+ * current driver or 'false' otherwise.
+ */
+bool cpufreq_driver_has_fast_switch(void)
+{
+ return !!cpufreq_driver->fast_switch;
+}
+
/**
* cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
* @cpu: Target CPU.
@@ -340,7 +340,9 @@ struct cpufreq_driver {
/*
* ->fast_switch() replacement for drivers that use an internal
* representation of performance levels and can pass hints other than
- * the target performance level to the hardware.
+ * the target performance level to the hardware. If driver is setting this,
+ * then it needs to set fast_switch also. Because in certain scenario scale
+ * invariance could be disabled and governor can switch back to fast_switch.
*/
void (*adjust_perf)(unsigned int cpu,
unsigned long min_perf,
@@ -604,6 +606,7 @@ struct cpufreq_governor {
/* Pass a target to the cpufreq driver */
unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
unsigned int target_freq);
+bool cpufreq_driver_has_fast_switch(void);
void cpufreq_driver_adjust_perf(unsigned int cpu,
unsigned long min_perf,
unsigned long target_perf,
If fast_switch_possible flag is set by the scaling driver, the governor is free to select fast_switch function even if adjust_perf is set. When the frequency invariance is disabled due to some reason governor fallbacks to fast_switch if fast_switch_possible is set. This could crash the kernel if the driver didn't set the fast_switch function pointer. This issue becomes apparent when aperf/mperf overflow occurs with amd_pstate (passive) + schedutil. When this happens, kernel disables frequency invariance calculation which causes schedutil to fallback to sugov_update_single_freq which currently relies on the fast_switch callback. Normal flow: sugov_update_single_perf cpufreq_driver_adjust_perf cpufreq_driver->adjust_perf Error case flow: sugov_update_single_perf sugov_update_single_freq <-- This is chosen because the freq invariant is disabled due to aperf/mperf overflow cpufreq_driver_fast_switch cpufreq_driver->fast_switch <-- Here NULL pointer dereference is happening, because fast_switch is not set Put up a warning message if the driver sets fast_switch_possible flag but not fast_switch. Signed-off-by: Wyes Karny <wyes.karny@amd.com> --- drivers/cpufreq/cpufreq.c | 18 ++++++++++++++++++ include/linux/cpufreq.h | 5 ++++- 2 files changed, 22 insertions(+), 1 deletion(-)