diff mbox series

[V2,9/9] cpufreq: scmi: Use .register_em() callback

Message ID 6094d891b4cb0cba3357e2894c8a4431c4c65e67.1628682874.git.viresh.kumar@linaro.org
State New
Headers show
Series cpufreq: Auto-register with energy model | expand

Commit Message

Viresh Kumar Aug. 11, 2021, 11:58 a.m. UTC
Set the newly added .register_em() callback to register with the EM
after the cpufreq policy is properly initialized.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

---
 drivers/cpufreq/scmi-cpufreq.c | 55 ++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 23 deletions(-)

-- 
2.31.1.272.g89b43f80a514

Comments

Quentin Perret Aug. 11, 2021, 1:17 p.m. UTC | #1
On Wednesday 11 Aug 2021 at 17:28:47 (+0530), Viresh Kumar wrote:
> Set the newly added .register_em() callback to register with the EM

> after the cpufreq policy is properly initialized.

> 

> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> ---

>  drivers/cpufreq/scmi-cpufreq.c | 55 ++++++++++++++++++++--------------

>  1 file changed, 32 insertions(+), 23 deletions(-)

> 

> diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c

> index 75f818d04b48..b916c9e22921 100644

> --- a/drivers/cpufreq/scmi-cpufreq.c

> +++ b/drivers/cpufreq/scmi-cpufreq.c

> @@ -22,7 +22,9 @@

>  

>  struct scmi_data {

>  	int domain_id;

> +	int nr_opp;

>  	struct device *cpu_dev;

> +	cpumask_var_t opp_shared_cpus;


Can we use policy->related_cpus and friends directly in the callback
instead? That should simplify the patch a bit.

Also, we can probably afford calling dev_pm_opp_get_opp_count() from the
em_register callback as it is not a hot path, which would avoid wasting
some 'resident' memory here that is only used during init.

Thanks,
Quentin
Lukasz Luba Aug. 11, 2021, 2:09 p.m. UTC | #2
On 8/11/21 2:17 PM, Quentin Perret wrote:
> On Wednesday 11 Aug 2021 at 17:28:47 (+0530), Viresh Kumar wrote:

>> Set the newly added .register_em() callback to register with the EM

>> after the cpufreq policy is properly initialized.

>>

>> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

>> ---

>>   drivers/cpufreq/scmi-cpufreq.c | 55 ++++++++++++++++++++--------------

>>   1 file changed, 32 insertions(+), 23 deletions(-)

>>

>> diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c

>> index 75f818d04b48..b916c9e22921 100644

>> --- a/drivers/cpufreq/scmi-cpufreq.c

>> +++ b/drivers/cpufreq/scmi-cpufreq.c

>> @@ -22,7 +22,9 @@

>>   

>>   struct scmi_data {

>>   	int domain_id;

>> +	int nr_opp;

>>   	struct device *cpu_dev;

>> +	cpumask_var_t opp_shared_cpus;

> 

> Can we use policy->related_cpus and friends directly in the callback


Unfortunately not. This tricky setup code was introduced because we may
have a platform with per-CPU policy, so single bit set in
policy->related_cpus, but we want EAS to be still working on set
of CPUs. That's why we construct temporary cpumask and pass it to EM.

> instead? That should simplify the patch a bit.

> 

> Also, we can probably afford calling dev_pm_opp_get_opp_count() from the

> em_register callback as it is not a hot path, which would avoid wasting

> some 'resident' memory here that is only used during init.

> 

> Thanks,

> Quentin

>
Quentin Perret Aug. 11, 2021, 2:39 p.m. UTC | #3
On Wednesday 11 Aug 2021 at 15:09:13 (+0100), Lukasz Luba wrote:
> 

> 

> On 8/11/21 2:17 PM, Quentin Perret wrote:

> > On Wednesday 11 Aug 2021 at 17:28:47 (+0530), Viresh Kumar wrote:

> > > Set the newly added .register_em() callback to register with the EM

> > > after the cpufreq policy is properly initialized.

> > > 

> > > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> > > ---

> > >   drivers/cpufreq/scmi-cpufreq.c | 55 ++++++++++++++++++++--------------

> > >   1 file changed, 32 insertions(+), 23 deletions(-)

> > > 

> > > diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c

> > > index 75f818d04b48..b916c9e22921 100644

> > > --- a/drivers/cpufreq/scmi-cpufreq.c

> > > +++ b/drivers/cpufreq/scmi-cpufreq.c

> > > @@ -22,7 +22,9 @@

> > >   struct scmi_data {

> > >   	int domain_id;

> > > +	int nr_opp;

> > >   	struct device *cpu_dev;

> > > +	cpumask_var_t opp_shared_cpus;

> > 

> > Can we use policy->related_cpus and friends directly in the callback

> 

> Unfortunately not. This tricky setup code was introduced because we may

> have a platform with per-CPU policy, so single bit set in

> policy->related_cpus, but we want EAS to be still working on set

> of CPUs. That's why we construct temporary cpumask and pass it to EM.


Aha, I see this now. Hmm, those platforms better have AMUs then,
otherwise PELT signals will be wonky ...

I was going to suggest using dev_pm_opp_get_sharing_cpus() from the
callback instead, but maybe that's overkill as we'd need to allocate a
temporary cpumask and all. So n/m this patch should be fine as is.
Lukasz Luba Aug. 11, 2021, 3:52 p.m. UTC | #4
On 8/11/21 3:39 PM, Quentin Perret wrote:
> On Wednesday 11 Aug 2021 at 15:09:13 (+0100), Lukasz Luba wrote:

>>

>>

>> On 8/11/21 2:17 PM, Quentin Perret wrote:

>>> On Wednesday 11 Aug 2021 at 17:28:47 (+0530), Viresh Kumar wrote:

>>>> Set the newly added .register_em() callback to register with the EM

>>>> after the cpufreq policy is properly initialized.

>>>>

>>>> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

>>>> ---

>>>>    drivers/cpufreq/scmi-cpufreq.c | 55 ++++++++++++++++++++--------------

>>>>    1 file changed, 32 insertions(+), 23 deletions(-)

>>>>

>>>> diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c

>>>> index 75f818d04b48..b916c9e22921 100644

>>>> --- a/drivers/cpufreq/scmi-cpufreq.c

>>>> +++ b/drivers/cpufreq/scmi-cpufreq.c

>>>> @@ -22,7 +22,9 @@

>>>>    struct scmi_data {

>>>>    	int domain_id;

>>>> +	int nr_opp;

>>>>    	struct device *cpu_dev;

>>>> +	cpumask_var_t opp_shared_cpus;

>>>

>>> Can we use policy->related_cpus and friends directly in the callback

>>

>> Unfortunately not. This tricky setup code was introduced because we may

>> have a platform with per-CPU policy, so single bit set in

>> policy->related_cpus, but we want EAS to be still working on set

>> of CPUs. That's why we construct temporary cpumask and pass it to EM.

> 

> Aha, I see this now. Hmm, those platforms better have AMUs then,

> otherwise PELT signals will be wonky ...


That's the plan, to have the AMUs. We suggest that, but reality would
tell... ;)

> 

> I was going to suggest using dev_pm_opp_get_sharing_cpus() from the

> callback instead, but maybe that's overkill as we'd need to allocate a

> temporary cpumask and all. So n/m this patch should be fine as is.

>
Lukasz Luba Aug. 11, 2021, 4:32 p.m. UTC | #5
On 8/11/21 12:58 PM, Viresh Kumar wrote:
> Set the newly added .register_em() callback to register with the EM

> after the cpufreq policy is properly initialized.

> 

> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> ---

>   drivers/cpufreq/scmi-cpufreq.c | 55 ++++++++++++++++++++--------------

>   1 file changed, 32 insertions(+), 23 deletions(-)


>   

> +static void scmi_cpufreq_register_em(struct cpufreq_policy *policy)

> +{

> +	struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power);

> +	bool power_scale_mw = perf_ops->power_scale_mw_get(ph);

> +	struct scmi_data *priv = policy->driver_data;

> +

> +	em_dev_register_perf_domain(get_cpu_device(policy->cpu), priv->nr_opp,

> +				    &em_cb, priv->opp_shared_cpus,

> +				    power_scale_mw);


I would free the priv->opp_shared_cpus mask here, since we don't
need it anymore and memory can be reclaimed. Don't forget this
setup would be called N CPUs times, on this per-CPU policy platform.

If freed here, then also there wouldn't be a need to free it in
scmi_cpufreq_exit() so you can remove it from there.

> +}

> +

>   static struct cpufreq_driver scmi_cpufreq_driver = {

>   	.name	= "scmi",

>   	.flags	= CPUFREQ_HAVE_GOVERNOR_PER_POLICY |

> @@ -261,6 +269,7 @@ static struct cpufreq_driver scmi_cpufreq_driver = {

>   	.get	= scmi_cpufreq_get_rate,

>   	.init	= scmi_cpufreq_init,

>   	.exit	= scmi_cpufreq_exit,

> +	.register_em	= scmi_cpufreq_register_em,

>   };

>   

>   static int scmi_cpufreq_probe(struct scmi_device *sdev)

> 


I will test&review this patch on Monday when I re-flash custom FW to my
Juno (just to be sure that this per-CPU cpufreq + shared EM/EAS works).
Viresh Kumar Aug. 12, 2021, 3:53 a.m. UTC | #6
On 11-08-21, 14:17, Quentin Perret wrote:
> Also, we can probably afford calling dev_pm_opp_get_opp_count() from the

> em_register callback as it is not a hot path, which would avoid wasting

> some 'resident' memory here that is only used during init.


We also need to make sure that OPPs are available in init(), else we
fail. So, we can't really move that out.

-- 
viresh
Viresh Kumar Aug. 12, 2021, 4:22 a.m. UTC | #7
On 11-08-21, 17:32, Lukasz Luba wrote:
> 

> 

> On 8/11/21 12:58 PM, Viresh Kumar wrote:

> > Set the newly added .register_em() callback to register with the EM

> > after the cpufreq policy is properly initialized.

> > 

> > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

> > ---

> >   drivers/cpufreq/scmi-cpufreq.c | 55 ++++++++++++++++++++--------------

> >   1 file changed, 32 insertions(+), 23 deletions(-)

> 

> > +static void scmi_cpufreq_register_em(struct cpufreq_policy *policy)

> > +{

> > +	struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power);

> > +	bool power_scale_mw = perf_ops->power_scale_mw_get(ph);

> > +	struct scmi_data *priv = policy->driver_data;

> > +

> > +	em_dev_register_perf_domain(get_cpu_device(policy->cpu), priv->nr_opp,

> > +				    &em_cb, priv->opp_shared_cpus,

> > +				    power_scale_mw);

> 

> I would free the priv->opp_shared_cpus mask here, since we don't

> need it anymore and memory can be reclaimed.


Yes, we don't need it anymore, but this isn't a good place to undo
what init() has done. Moreover, it is possible that register_em() may
not get called at all, if some error has occurred after init() has
successfully returned. It is always better to use exit() for such
things. It won't hurt a lot to keep this around anyway.

> Don't forget this

> setup would be called N CPUs times, on this per-CPU policy platform.


Yes, but EM will just ignore this call. Though I have made a change
here now to check for non-zero nr_opp to avoid the unnecessary call.

> If freed here, then also there wouldn't be a need to free it in

> scmi_cpufreq_exit() so you can remove it from there.


-- 
viresh
diff mbox series

Patch

diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 75f818d04b48..b916c9e22921 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -22,7 +22,9 @@ 
 
 struct scmi_data {
 	int domain_id;
+	int nr_opp;
 	struct device *cpu_dev;
+	cpumask_var_t opp_shared_cpus;
 };
 
 static struct scmi_protocol_handle *ph;
@@ -123,9 +125,6 @@  static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	struct device *cpu_dev;
 	struct scmi_data *priv;
 	struct cpufreq_frequency_table *freq_table;
-	struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power);
-	cpumask_var_t opp_shared_cpus;
-	bool power_scale_mw;
 
 	cpu_dev = get_cpu_device(policy->cpu);
 	if (!cpu_dev) {
@@ -133,9 +132,15 @@  static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 		return -ENODEV;
 	}
 
-	if (!zalloc_cpumask_var(&opp_shared_cpus, GFP_KERNEL))
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
 		return -ENOMEM;
 
+	if (!zalloc_cpumask_var(&priv->opp_shared_cpus, GFP_KERNEL)) {
+		ret = -ENOMEM;
+		goto out_free_priv;
+	}
+
 	/* Obtain CPUs that share SCMI performance controls */
 	ret = scmi_get_sharing_cpus(cpu_dev, policy->cpus);
 	if (ret) {
@@ -148,14 +153,14 @@  static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	 * The OPP 'sharing cpus' info may come from DT through an empty opp
 	 * table and opp-shared.
 	 */
-	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, opp_shared_cpus);
-	if (ret || !cpumask_weight(opp_shared_cpus)) {
+	ret = dev_pm_opp_of_get_sharing_cpus(cpu_dev, priv->opp_shared_cpus);
+	if (ret || !cpumask_weight(priv->opp_shared_cpus)) {
 		/*
 		 * Either opp-table is not set or no opp-shared was found.
 		 * Use the CPU mask from SCMI to designate CPUs sharing an OPP
 		 * table.
 		 */
-		cpumask_copy(opp_shared_cpus, policy->cpus);
+		cpumask_copy(priv->opp_shared_cpus, policy->cpus);
 	}
 
 	 /*
@@ -180,7 +185,7 @@  static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 			goto out_free_opp;
 		}
 
-		ret = dev_pm_opp_set_sharing_cpus(cpu_dev, opp_shared_cpus);
+		ret = dev_pm_opp_set_sharing_cpus(cpu_dev, priv->opp_shared_cpus);
 		if (ret) {
 			dev_err(cpu_dev, "%s: failed to mark OPPs as shared: %d\n",
 				__func__, ret);
@@ -188,21 +193,13 @@  static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 			goto out_free_opp;
 		}
 
-		power_scale_mw = perf_ops->power_scale_mw_get(ph);
-		em_dev_register_perf_domain(cpu_dev, nr_opp, &em_cb,
-					    opp_shared_cpus, power_scale_mw);
-	}
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		ret = -ENOMEM;
-		goto out_free_opp;
+		priv->nr_opp = nr_opp;
 	}
 
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
-		goto out_free_priv;
+		goto out_free_opp;
 	}
 
 	priv->cpu_dev = cpu_dev;
@@ -223,17 +220,16 @@  static int scmi_cpufreq_init(struct cpufreq_policy *policy)
 	policy->fast_switch_possible =
 		perf_ops->fast_switch_possible(ph, cpu_dev);
 
-	free_cpumask_var(opp_shared_cpus);
 	return 0;
 
-out_free_priv:
-	kfree(priv);
-
 out_free_opp:
 	dev_pm_opp_remove_all_dynamic(cpu_dev);
 
 out_free_cpumask:
-	free_cpumask_var(opp_shared_cpus);
+	free_cpumask_var(priv->opp_shared_cpus);
+
+out_free_priv:
+	kfree(priv);
 
 	return ret;
 }
@@ -244,11 +240,23 @@  static int scmi_cpufreq_exit(struct cpufreq_policy *policy)
 
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
 	dev_pm_opp_remove_all_dynamic(priv->cpu_dev);
+	free_cpumask_var(priv->opp_shared_cpus);
 	kfree(priv);
 
 	return 0;
 }
 
+static void scmi_cpufreq_register_em(struct cpufreq_policy *policy)
+{
+	struct em_data_callback em_cb = EM_DATA_CB(scmi_get_cpu_power);
+	bool power_scale_mw = perf_ops->power_scale_mw_get(ph);
+	struct scmi_data *priv = policy->driver_data;
+
+	em_dev_register_perf_domain(get_cpu_device(policy->cpu), priv->nr_opp,
+				    &em_cb, priv->opp_shared_cpus,
+				    power_scale_mw);
+}
+
 static struct cpufreq_driver scmi_cpufreq_driver = {
 	.name	= "scmi",
 	.flags	= CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
@@ -261,6 +269,7 @@  static struct cpufreq_driver scmi_cpufreq_driver = {
 	.get	= scmi_cpufreq_get_rate,
 	.init	= scmi_cpufreq_init,
 	.exit	= scmi_cpufreq_exit,
+	.register_em	= scmi_cpufreq_register_em,
 };
 
 static int scmi_cpufreq_probe(struct scmi_device *sdev)