diff mbox series

[V3,3/3] cpufreq: Specify default governor on command line

Message ID 7eb38608b2b32c0c72dfb160c51206ec42e74e35.1593143118.git.viresh.kumar@linaro.org
State Superseded
Headers show
Series cpufreq: Allow default governor on cmdline and fix locking issues | expand

Commit Message

Viresh Kumar June 26, 2020, 3:51 a.m. UTC
From: Quentin Perret <qperret@google.com>


Currently, the only way to specify the default CPUfreq governor is via
Kconfig options, which suits users who can build the kernel themselves
perfectly.

However, for those who use a distro-like kernel (such as Android, with
the Generic Kernel Image project), the only way to use a different
default is to boot to userspace, and to then switch using the sysfs
interface. Being able to specify the default governor on the command
line, like is the case for cpuidle, would enable those users to specify
their governor of choice earlier on, and to simplify slighlty the
userspace boot procedure.

To support this use-case, add a kernel command line parameter enabling
to specify a default governor for CPUfreq, which takes precedence over
the builtin default.

This implementation has one notable limitation: the default governor
must be registered before the driver. This is solved for builtin
governors and drivers using appropriate *_initcall() functions. And in
the modular case, this must be reflected as a constraint on the module
loading order.

Signed-off-by: Quentin Perret <qperret@google.com>

[ Viresh: Converted 'default_governor' to a string and parsing it only
	  at initcall level, and several updates to
	  cpufreq_init_policy(). ]
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

---
 .../admin-guide/kernel-parameters.txt         |  5 +++
 Documentation/admin-guide/pm/cpufreq.rst      |  6 ++--
 drivers/cpufreq/cpufreq.c                     | 36 ++++++++++++++-----
 3 files changed, 35 insertions(+), 12 deletions(-)

-- 
2.25.0.rc1.19.g042ed3e048af

Comments

Quentin Perret June 26, 2020, 3:57 p.m. UTC | #1
On Friday 26 Jun 2020 at 09:21:44 (+0530), Viresh Kumar wrote:
> index e798a1193bdf..93c6399c1a42 100644

> --- a/drivers/cpufreq/cpufreq.c

> +++ b/drivers/cpufreq/cpufreq.c

> @@ -50,6 +50,9 @@ static LIST_HEAD(cpufreq_governor_list);

>  #define for_each_governor(__governor)				\

>  	list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)

>  

> +static char cpufreq_param_governor[CPUFREQ_NAME_LEN];

> +static char default_governor[CPUFREQ_NAME_LEN];

> +

>  /**

>   * The "cpufreq driver" - the arch- or hardware-dependent low

>   * level driver of CPUFreq support, and its spinlock. This lock

> @@ -1061,7 +1064,6 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void)

>  

>  static int cpufreq_init_policy(struct cpufreq_policy *policy)

>  {

> -	struct cpufreq_governor *def_gov = cpufreq_default_governor();

>  	struct cpufreq_governor *gov = NULL;

>  	unsigned int pol = CPUFREQ_POLICY_UNKNOWN;

>  	bool put_governor = false;

> @@ -1071,22 +1073,29 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)

>  		/* Update policy governor to the one used before hotplug. */

>  		gov = get_governor(policy->last_governor);

>  		if (gov) {

> -			put_governor = true;

>  			pr_debug("Restoring governor %s for cpu %d\n",

> -				 policy->governor->name, policy->cpu);

> -		} else if (def_gov) {

> -			gov = def_gov;

> +				 gov->name, policy->cpu);

>  		} else {

> -			return -ENODATA;

> +			gov = get_governor(default_governor);

> +		}

> +

> +		if (gov) {

> +			put_governor = true;

> +		} else {

> +			gov = cpufreq_default_governor();

> +			if (!gov)

> +				return -ENODATA;

>  		}


As mentioned on patch 01, doing put_module() below if gov != NULL would
avoid this dance with put_governor, but this works too, so no strong
opinion.

> +

>  	} else {

> +

>  		/* Use the default policy if there is no last_policy. */

>  		if (policy->last_policy) {

>  			pol = policy->last_policy;

> -		} else if (def_gov) {

> -			pol = cpufreq_parse_policy(def_gov->name);

> +		} else {

> +			pol = cpufreq_parse_policy(default_governor);

>  			/*

> -			 * In case the default governor is neiter "performance"

> +			 * In case the default governor is neither "performance"

>  			 * nor "powersave", fall back to the initial policy

>  			 * value set by the driver.

>  			 */

> @@ -2796,13 +2805,22 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);

>  

>  static int __init cpufreq_core_init(void)

>  {

> +	struct cpufreq_governor *gov = cpufreq_default_governor();

> +	char *name = gov->name;

> +

>  	if (cpufreq_disabled())

>  		return -ENODEV;

>  

>  	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);

>  	BUG_ON(!cpufreq_global_kobject);

>  

> +	if (strlen(cpufreq_param_governor))

> +		name = cpufreq_param_governor;

> +

> +	strncpy(default_governor, name, CPUFREQ_NAME_LEN);


Do we need both cpufreq_param_governor and default_governor?
Could we move everything to only one of them? Something a little bit
like that maybe?

---8<---
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 93c6399c1a42..a0e90b567e1e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -50,7 +50,6 @@ static LIST_HEAD(cpufreq_governor_list);
 #define for_each_governor(__governor)                          \
        list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)
 
-static char cpufreq_param_governor[CPUFREQ_NAME_LEN];
 static char default_governor[CPUFREQ_NAME_LEN];
 
 /**
@@ -2814,13 +2813,11 @@ static int __init cpufreq_core_init(void)
        cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
        BUG_ON(!cpufreq_global_kobject);
 
-       if (strlen(cpufreq_param_governor))
-               name = cpufreq_param_governor;
-
-       strncpy(default_governor, name, CPUFREQ_NAME_LEN);
+       if (!strlen(default_governor))
+               strncpy(default_governor, name, CPUFREQ_NAME_LEN);
 
        return 0;
 }
 module_param(off, int, 0444);
-module_param_string(default_governor, cpufreq_param_governor, CPUFREQ_NAME_LEN, 0444);
+module_param_string(default_governor, default_governor, CPUFREQ_NAME_LEN, 0444);
--->8---

Also, one thing to keep in mind with this version (or the one you
suggested) is that if the command line parameter is not valid, we will
not fallback on the builtin default for the ->setpolicy() case. But I
suppose one might argue this is a reasonable behaviour, so no objection
from me.

Otherwise, apart from these nits, I gave this a go on my setup, with
builtin and modular governors & drivers, and everything worked exactly
as expected.

Thanks Viresh for the help!
Quentin
Viresh Kumar June 29, 2020, 2:08 a.m. UTC | #2
On 26-06-20, 16:57, Quentin Perret wrote:
> On Friday 26 Jun 2020 at 09:21:44 (+0530), Viresh Kumar wrote:

> > index e798a1193bdf..93c6399c1a42 100644

> > --- a/drivers/cpufreq/cpufreq.c

> > +++ b/drivers/cpufreq/cpufreq.c

> > @@ -50,6 +50,9 @@ static LIST_HEAD(cpufreq_governor_list);

> >  #define for_each_governor(__governor)				\

> >  	list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)

> >  

> > +static char cpufreq_param_governor[CPUFREQ_NAME_LEN];

> > +static char default_governor[CPUFREQ_NAME_LEN];

> > +

> >  /**

> >   * The "cpufreq driver" - the arch- or hardware-dependent low

> >   * level driver of CPUFreq support, and its spinlock. This lock

> > @@ -1061,7 +1064,6 @@ __weak struct cpufreq_governor *cpufreq_default_governor(void)

> >  

> >  static int cpufreq_init_policy(struct cpufreq_policy *policy)

> >  {

> > -	struct cpufreq_governor *def_gov = cpufreq_default_governor();

> >  	struct cpufreq_governor *gov = NULL;

> >  	unsigned int pol = CPUFREQ_POLICY_UNKNOWN;

> >  	bool put_governor = false;

> > @@ -1071,22 +1073,29 @@ static int cpufreq_init_policy(struct cpufreq_policy *policy)

> >  		/* Update policy governor to the one used before hotplug. */

> >  		gov = get_governor(policy->last_governor);

> >  		if (gov) {

> > -			put_governor = true;

> >  			pr_debug("Restoring governor %s for cpu %d\n",

> > -				 policy->governor->name, policy->cpu);

> > -		} else if (def_gov) {

> > -			gov = def_gov;

> > +				 gov->name, policy->cpu);

> >  		} else {

> > -			return -ENODATA;

> > +			gov = get_governor(default_governor);

> > +		}

> > +

> > +		if (gov) {

> > +			put_governor = true;

> > +		} else {

> > +			gov = cpufreq_default_governor();

> > +			if (!gov)

> > +				return -ENODATA;

> >  		}

> 

> As mentioned on patch 01, doing put_module() below if gov != NULL would

> avoid this dance with put_governor, but this works too, so no strong

> opinion.


I did it this way because the code looks buggy otherwise, even though
it isn't as put_module() handles it just fine. And so I would like to
keep it this way, unless there are two votes against mine :)

> > +

> >  	} else {

> > +

> >  		/* Use the default policy if there is no last_policy. */

> >  		if (policy->last_policy) {

> >  			pol = policy->last_policy;

> > -		} else if (def_gov) {

> > -			pol = cpufreq_parse_policy(def_gov->name);

> > +		} else {

> > +			pol = cpufreq_parse_policy(default_governor);

> >  			/*

> > -			 * In case the default governor is neiter "performance"

> > +			 * In case the default governor is neither "performance"

> >  			 * nor "powersave", fall back to the initial policy

> >  			 * value set by the driver.

> >  			 */

> > @@ -2796,13 +2805,22 @@ EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);

> >  

> >  static int __init cpufreq_core_init(void)

> >  {

> > +	struct cpufreq_governor *gov = cpufreq_default_governor();

> > +	char *name = gov->name;

> > +

> >  	if (cpufreq_disabled())

> >  		return -ENODEV;

> >  

> >  	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);

> >  	BUG_ON(!cpufreq_global_kobject);

> >  

> > +	if (strlen(cpufreq_param_governor))

> > +		name = cpufreq_param_governor;

> > +

> > +	strncpy(default_governor, name, CPUFREQ_NAME_LEN);

> 

> Do we need both cpufreq_param_governor and default_governor?

> Could we move everything to only one of them? Something a little bit

> like that maybe?


No because we want to fallback to the default governor when the
governor shown by the cpufreq_param_governor is valid but missing.

> Also, one thing to keep in mind with this version (or the one you

> suggested) is that if the command line parameter is not valid, we will

> not fallback on the builtin default for the ->setpolicy() case. But I

> suppose one might argue this is a reasonable behaviour, so no objection

> from me.


Right, I did that on purpose.

> Otherwise, apart from these nits, I gave this a go on my setup, with

> builtin and modular governors & drivers, and everything worked exactly

> as expected.


Thanks for testing it out Quentin.

-- 
viresh
Quentin Perret June 29, 2020, 8:03 a.m. UTC | #3
On Monday 29 Jun 2020 at 07:38:43 (+0530), Viresh Kumar wrote:
> On 26-06-20, 16:57, Quentin Perret wrote:

> > Do we need both cpufreq_param_governor and default_governor?

> > Could we move everything to only one of them? Something a little bit

> > like that maybe?

> 

> No because we want to fallback to the default governor when the

> governor shown by the cpufreq_param_governor is valid but missing.


But that would still work with my suggestion no? You still fallback to
calling cpufreq_default_governor() in cpufreq_init_policy() if
get_governor(default_governor) doesn't succeed, so we should be covered.

Thanks,
Quentin
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index fb95fad81c79..8deb5a89328a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -703,6 +703,11 @@ 
 	cpufreq.off=1	[CPU_FREQ]
 			disable the cpufreq sub-system
 
+	cpufreq.default_governor=
+			[CPU_FREQ] Name of the default cpufreq governor or
+			policy to use. This governor must be registered in the
+			kernel before the cpufreq driver probes.
+
 	cpu_init_udelay=N
 			[X86] Delay for N microsec between assert and de-assert
 			of APIC INIT to start processors.  This delay occurs
diff --git a/Documentation/admin-guide/pm/cpufreq.rst b/Documentation/admin-guide/pm/cpufreq.rst
index 0c74a7784964..368e612145d2 100644
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -147,9 +147,9 @@  CPUs in it.
 
 The next major initialization step for a new policy object is to attach a
 scaling governor to it (to begin with, that is the default scaling governor
-determined by the kernel configuration, but it may be changed later
-via ``sysfs``).  First, a pointer to the new policy object is passed to the
-governor's ``->init()`` callback which is expected to initialize all of the
+determined by the kernel command line or configuration, but it may be changed
+later via ``sysfs``).  First, a pointer to the new policy object is passed to
+the governor's ``->init()`` callback which is expected to initialize all of the
 data structures necessary to handle the given policy and, possibly, to add
 a governor ``sysfs`` interface to it.  Next, the governor is started by
 invoking its ``->start()`` callback.
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index e798a1193bdf..93c6399c1a42 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -50,6 +50,9 @@  static LIST_HEAD(cpufreq_governor_list);
 #define for_each_governor(__governor)				\
 	list_for_each_entry(__governor, &cpufreq_governor_list, governor_list)
 
+static char cpufreq_param_governor[CPUFREQ_NAME_LEN];
+static char default_governor[CPUFREQ_NAME_LEN];
+
 /**
  * The "cpufreq driver" - the arch- or hardware-dependent low
  * level driver of CPUFreq support, and its spinlock. This lock
@@ -1061,7 +1064,6 @@  __weak struct cpufreq_governor *cpufreq_default_governor(void)
 
 static int cpufreq_init_policy(struct cpufreq_policy *policy)
 {
-	struct cpufreq_governor *def_gov = cpufreq_default_governor();
 	struct cpufreq_governor *gov = NULL;
 	unsigned int pol = CPUFREQ_POLICY_UNKNOWN;
 	bool put_governor = false;
@@ -1071,22 +1073,29 @@  static int cpufreq_init_policy(struct cpufreq_policy *policy)
 		/* Update policy governor to the one used before hotplug. */
 		gov = get_governor(policy->last_governor);
 		if (gov) {
-			put_governor = true;
 			pr_debug("Restoring governor %s for cpu %d\n",
-				 policy->governor->name, policy->cpu);
-		} else if (def_gov) {
-			gov = def_gov;
+				 gov->name, policy->cpu);
 		} else {
-			return -ENODATA;
+			gov = get_governor(default_governor);
+		}
+
+		if (gov) {
+			put_governor = true;
+		} else {
+			gov = cpufreq_default_governor();
+			if (!gov)
+				return -ENODATA;
 		}
+
 	} else {
+
 		/* Use the default policy if there is no last_policy. */
 		if (policy->last_policy) {
 			pol = policy->last_policy;
-		} else if (def_gov) {
-			pol = cpufreq_parse_policy(def_gov->name);
+		} else {
+			pol = cpufreq_parse_policy(default_governor);
 			/*
-			 * In case the default governor is neiter "performance"
+			 * In case the default governor is neither "performance"
 			 * nor "powersave", fall back to the initial policy
 			 * value set by the driver.
 			 */
@@ -2796,13 +2805,22 @@  EXPORT_SYMBOL_GPL(cpufreq_unregister_driver);
 
 static int __init cpufreq_core_init(void)
 {
+	struct cpufreq_governor *gov = cpufreq_default_governor();
+	char *name = gov->name;
+
 	if (cpufreq_disabled())
 		return -ENODEV;
 
 	cpufreq_global_kobject = kobject_create_and_add("cpufreq", &cpu_subsys.dev_root->kobj);
 	BUG_ON(!cpufreq_global_kobject);
 
+	if (strlen(cpufreq_param_governor))
+		name = cpufreq_param_governor;
+
+	strncpy(default_governor, name, CPUFREQ_NAME_LEN);
+
 	return 0;
 }
 module_param(off, int, 0444);
+module_param_string(default_governor, cpufreq_param_governor, CPUFREQ_NAME_LEN, 0444);
 core_initcall(cpufreq_core_init);