diff mbox series

[3/5] thermal: devfreq_cooling: add new registration functions with Energy Model

Message ID 20200921122007.29610-4-lukasz.luba@arm.com
State New
Headers show
Series [1/5] thermal: devfreq_cooling: change tracing function and arguments | expand

Commit Message

Lukasz Luba Sept. 21, 2020, 12:20 p.m. UTC
The Energy Model (EM) framework supports devices such as Devfreq. Create
new registration functions which automatically register EM for the thermal
devfreq_cooling devices. This patch prepares the code for coming changes
which are going to replace old power model with the new EM.

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
---
 drivers/thermal/devfreq_cooling.c | 99 ++++++++++++++++++++++++++++++-
 include/linux/devfreq_cooling.h   | 22 +++++++
 2 files changed, 120 insertions(+), 1 deletion(-)

Comments

Ionela Voinescu Oct. 7, 2020, 12:07 p.m. UTC | #1
Hi Lukasz,

On Monday 21 Sep 2020 at 13:20:05 (+0100), Lukasz Luba wrote:
> The Energy Model (EM) framework supports devices such as Devfreq. Create

> new registration functions which automatically register EM for the thermal

> devfreq_cooling devices. This patch prepares the code for coming changes

> which are going to replace old power model with the new EM.

> 

> Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>

> ---

>  drivers/thermal/devfreq_cooling.c | 99 ++++++++++++++++++++++++++++++-

>  include/linux/devfreq_cooling.h   | 22 +++++++

>  2 files changed, 120 insertions(+), 1 deletion(-)

> 

> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c

> index cf045bd4d16b..7e091e795284 100644

> --- a/drivers/thermal/devfreq_cooling.c

> +++ b/drivers/thermal/devfreq_cooling.c

> @@ -50,6 +50,8 @@ static DEFINE_IDA(devfreq_ida);

>   * @capped_state:	index to cooling state with in dynamic power budget

>   * @req_max_freq:	PM QoS request for limiting the maximum frequency

>   *			of the devfreq device.

> + * @em:		Energy Model which represents the associated Devfreq device

                                     ^^^^^^^^^^^^^^^^
				     for
> + * @em_registered:	Devfreq cooling registered the EM and should free it.

>   */

>  struct devfreq_cooling_device {

>  	int id;

> @@ -63,6 +65,8 @@ struct devfreq_cooling_device {

>  	u32 res_util;

>  	int capped_state;

>  	struct dev_pm_qos_request req_max_freq;

> +	struct em_perf_domain *em;

> +	bool em_registered;

>  };

>  

>  static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,

> @@ -586,22 +590,115 @@ struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)

>  }

>  EXPORT_SYMBOL_GPL(devfreq_cooling_register);

>  

> +/**

> + * devfreq_cooling_em_register_power() - Register devfreq cooling device with

> + *		power information and attempt to register Energy Model (EM)


It took me a while to understand the differences between devfreq
register functions and it left me with a nagging feeling that we don't
need all of them. Also, looking over the cpufreq cooling devices, they
keep their registering interfaces quite simple.

With the functions added by this patch, the devfreq cooling devices will have:
 - old:
       of_devfreq_cooling_register_power
       of_devfreq_cooling_register
       devfreq_cooling_register
       devfreq_cooling_unregister
 - new:
       devfreq_cooling_em_register_power
       devfreq_cooling_em_register

My question is whether we actually need the two new
devfreq_cooling_em_register_power() and devfreq_cooling_em_register()?

The power_ops and the em are dependent on one another, so could we
extend the of_devfreq_cooling_register_power() to do the additional em
registration. We only need a way to pass the em_cb and I think that
could fit nicely in devfreq_cooling_power.

To be noted that I've reviewed these interfaces in the context of the
final state of devfreq_cooling.c, after the changes in 4/5.

> + * @df:		Pointer to devfreq device.

> + * @dfc_power:	Pointer to devfreq_cooling_power.

> + * @em_cb:	Callback functions providing the data of the EM

> + *

> + * Register a devfreq cooling device and attempt to register Energy Model. The

> + * available OPPs must be registered for the device.

> + *

> + * If @dfc_power is provided, the cooling device is registered with the

> + * power extensions. If @em_cb is provided it will be called for each OPP to

> + * calculate power value and cost. If @em_cb is not provided then simple Energy

> + * Model is going to be used, which requires "dynamic-power-coefficient" a

> + * devicetree property.

> + */

> +struct thermal_cooling_device *

> +devfreq_cooling_em_register_power(struct devfreq *df,

> +				  struct devfreq_cooling_power *dfc_power,

> +				  struct em_data_callback *em_cb)

> +{

> +	struct thermal_cooling_device *cdev;

> +	struct devfreq_cooling_device *dfc;

> +	struct device_node *np = NULL;

> +	struct device *dev;

> +	int nr_opp, ret;

> +

> +	if (IS_ERR_OR_NULL(df))

> +		return ERR_PTR(-EINVAL);

> +

> +	dev = df->dev.parent;

> +

> +	if (em_cb) {

> +		nr_opp = dev_pm_opp_get_opp_count(dev);

> +		if (nr_opp <= 0) {

> +			dev_err(dev, "No valid OPPs found\n");

> +			return ERR_PTR(-EINVAL);

> +		}

> +

> +		ret = em_dev_register_perf_domain(dev, nr_opp, em_cb, NULL);

> +	} else {

> +		ret = dev_pm_opp_of_register_em(dev, NULL);

> +	}

> +

> +	if (ret)

> +		dev_warn(dev, "Unable to register EM for devfreq cooling device (%d)\n",

> +			 ret);

> +

> +	if (dev->of_node)

> +		np = of_node_get(dev->of_node);

> +

> +	cdev = of_devfreq_cooling_register_power(np, df, dfc_power);

> +

> +	if (np)

> +		of_node_put(np);

> +

> +	if (IS_ERR_OR_NULL(cdev)) {

> +		if (!ret)

> +			em_dev_unregister_perf_domain(dev);

> +	} else {

> +		dfc = cdev->devdata;

> +		dfc->em_registered = !ret;

> +	}

> +

> +	return cdev;

> +}

> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register_power);

> +

> +/**

> + * devfreq_cooling_em_register() - Register devfreq cooling device together

> + *				with Energy Model.

> + * @df:		Pointer to devfreq device.

> + * @em_cb:	Callback functions providing the data of the Energy Model

> + *

> + * This function attempts to register Energy Model for devfreq device and then

> + * register the devfreq cooling device.

> + */

> +struct thermal_cooling_device *

> +devfreq_cooling_em_register(struct devfreq *df, struct em_data_callback *em_cb)

> +{

> +	return devfreq_cooling_em_register_power(df, NULL, em_cb);

> +}

> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);

> +

>  /**

>   * devfreq_cooling_unregister() - Unregister devfreq cooling device.

>   * @cdev: Pointer to devfreq cooling device to unregister.

> + *

> + * Unregisters devfreq cooling device and related Energy Model if it was

> + * present.

>   */

>  void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)

>  {

>  	struct devfreq_cooling_device *dfc;

> +	struct device *dev;

>  

> -	if (!cdev)

> +	if (IS_ERR_OR_NULL(cdev))

>  		return;

>  

>  	dfc = cdev->devdata;

> +	dev = dfc->devfreq->dev.parent;

>  

>  	thermal_cooling_device_unregister(dfc->cdev);

>  	ida_simple_remove(&devfreq_ida, dfc->id);

>  	dev_pm_qos_remove_request(&dfc->req_max_freq);

> +

> +	if (dfc->em_registered)

> +		em_dev_unregister_perf_domain(dev);


Nit: Isn't it enough to check if dev->em_pd != NULL to be able to
unregister the perf_domain? That would remove the need for
dfc->em_registered.

I suppose one could say that's using implementation details on how the
EM is built and stored and we should not rely on it, so it's up to you
if you want to change it.

Kind regards,
Ionela.

> +

>  	kfree(dfc->power_table);

>  	kfree(dfc->freq_table);

>  

> diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h

> index 9df2dfca68dd..19868fb922f1 100644

> --- a/include/linux/devfreq_cooling.h

> +++ b/include/linux/devfreq_cooling.h

> @@ -11,6 +11,7 @@

>  #define __DEVFREQ_COOLING_H__

>  

>  #include <linux/devfreq.h>

> +#include <linux/energy_model.h>

>  #include <linux/thermal.h>

>  

>  

> @@ -65,6 +66,13 @@ struct thermal_cooling_device *

>  of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);

>  struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df);

>  void devfreq_cooling_unregister(struct thermal_cooling_device *dfc);

> +struct thermal_cooling_device *

> +devfreq_cooling_em_register_power(struct devfreq *df,

> +				  struct devfreq_cooling_power *dfc_power,

> +				  struct em_data_callback *em_cb);

> +struct thermal_cooling_device *

> +devfreq_cooling_em_register(struct devfreq *df,

> +			    struct em_data_callback *em_cb);

>  

>  #else /* !CONFIG_DEVFREQ_THERMAL */

>  

> @@ -87,6 +95,20 @@ devfreq_cooling_register(struct devfreq *df)

>  	return ERR_PTR(-EINVAL);

>  }

>  

> +static inline struct thermal_cooling_device *

> +devfreq_cooling_em_register_power(struct devfreq *df,

> +				  struct devfreq_cooling_power *dfc_power,

> +				  struct em_data_callback *em_cb)

> +{

> +	return ERR_PTR(-EINVAL);

> +}

> +

> +static inline struct thermal_cooling_device *

> +devfreq_cooling_em_register(struct devfreq *df,	struct em_data_callback *em_cb)

> +{

> +	return ERR_PTR(-EINVAL);

> +}

> +

>  static inline void

>  devfreq_cooling_unregister(struct thermal_cooling_device *dfc)

>  {

> -- 

> 2.17.1

> 

> _______________________________________________

> dri-devel mailing list

> dri-devel@lists.freedesktop.org

> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Lukasz Luba Oct. 22, 2020, 11:17 a.m. UTC | #2
On 10/7/20 1:07 PM, Ionela Voinescu wrote:
> Hi Lukasz,
> 
> On Monday 21 Sep 2020 at 13:20:05 (+0100), Lukasz Luba wrote:
>> The Energy Model (EM) framework supports devices such as Devfreq. Create
>> new registration functions which automatically register EM for the thermal
>> devfreq_cooling devices. This patch prepares the code for coming changes
>> which are going to replace old power model with the new EM.
>>
>> Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
>> ---
>>   drivers/thermal/devfreq_cooling.c | 99 ++++++++++++++++++++++++++++++-
>>   include/linux/devfreq_cooling.h   | 22 +++++++
>>   2 files changed, 120 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
>> index cf045bd4d16b..7e091e795284 100644
>> --- a/drivers/thermal/devfreq_cooling.c
>> +++ b/drivers/thermal/devfreq_cooling.c
>> @@ -50,6 +50,8 @@ static DEFINE_IDA(devfreq_ida);
>>    * @capped_state:	index to cooling state with in dynamic power budget
>>    * @req_max_freq:	PM QoS request for limiting the maximum frequency
>>    *			of the devfreq device.
>> + * @em:		Energy Model which represents the associated Devfreq device
>                                       ^^^^^^^^^^^^^^^^
> 				     for

I will change it.

>> + * @em_registered:	Devfreq cooling registered the EM and should free it.
>>    */
>>   struct devfreq_cooling_device {
>>   	int id;
>> @@ -63,6 +65,8 @@ struct devfreq_cooling_device {
>>   	u32 res_util;
>>   	int capped_state;
>>   	struct dev_pm_qos_request req_max_freq;
>> +	struct em_perf_domain *em;
>> +	bool em_registered;
>>   };
>>   
>>   static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
>> @@ -586,22 +590,115 @@ struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
>>   }
>>   EXPORT_SYMBOL_GPL(devfreq_cooling_register);
>>   
>> +/**
>> + * devfreq_cooling_em_register_power() - Register devfreq cooling device with
>> + *		power information and attempt to register Energy Model (EM)
> 
> It took me a while to understand the differences between devfreq
> register functions and it left me with a nagging feeling that we don't
> need all of them. Also, looking over the cpufreq cooling devices, they
> keep their registering interfaces quite simple.

This was discussed in previous series, related to EM core changes.
It was requested to have a helper registration function which would
create EM automatically.

> 
> With the functions added by this patch, the devfreq cooling devices will have:
>   - old:
>         of_devfreq_cooling_register_power
>         of_devfreq_cooling_register
>         devfreq_cooling_register
>         devfreq_cooling_unregister
>   - new:
>         devfreq_cooling_em_register_power
>         devfreq_cooling_em_register
> 
> My question is whether we actually need the two new
> devfreq_cooling_em_register_power() and devfreq_cooling_em_register()?

It is just for consistency, with older scheme. It is only a wrapper, one
line, with default NULL. This scheme is common in thermal and some other
frameworks.

> 
> The power_ops and the em are dependent on one another, so could we
> extend the of_devfreq_cooling_register_power() to do the additional em
> registration. We only need a way to pass the em_cb and I think that
> could fit nicely in devfreq_cooling_power.

No, they aren't 'dependent on one another'. The EM usage doesn't depend
on presence of power_ops. Drivers might not support power_ops, but want
the framework still use EM and do power estimation.

> 
> To be noted that I've reviewed these interfaces in the context of the
> final state of devfreq_cooling.c, after the changes in 4/5.
> 
>> + * @df:		Pointer to devfreq device.
>> + * @dfc_power:	Pointer to devfreq_cooling_power.
>> + * @em_cb:	Callback functions providing the data of the EM
>> + *
>> + * Register a devfreq cooling device and attempt to register Energy Model. The
>> + * available OPPs must be registered for the device.
>> + *
>> + * If @dfc_power is provided, the cooling device is registered with the
>> + * power extensions. If @em_cb is provided it will be called for each OPP to
>> + * calculate power value and cost. If @em_cb is not provided then simple Energy
>> + * Model is going to be used, which requires "dynamic-power-coefficient" a
>> + * devicetree property.
>> + */
>> +struct thermal_cooling_device *
>> +devfreq_cooling_em_register_power(struct devfreq *df,
>> +				  struct devfreq_cooling_power *dfc_power,
>> +				  struct em_data_callback *em_cb)
>> +{
>> +	struct thermal_cooling_device *cdev;
>> +	struct devfreq_cooling_device *dfc;
>> +	struct device_node *np = NULL;
>> +	struct device *dev;
>> +	int nr_opp, ret;
>> +
>> +	if (IS_ERR_OR_NULL(df))
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	dev = df->dev.parent;
>> +
>> +	if (em_cb) {
>> +		nr_opp = dev_pm_opp_get_opp_count(dev);
>> +		if (nr_opp <= 0) {
>> +			dev_err(dev, "No valid OPPs found\n");
>> +			return ERR_PTR(-EINVAL);
>> +		}
>> +
>> +		ret = em_dev_register_perf_domain(dev, nr_opp, em_cb, NULL);
>> +	} else {
>> +		ret = dev_pm_opp_of_register_em(dev, NULL);
>> +	}
>> +
>> +	if (ret)
>> +		dev_warn(dev, "Unable to register EM for devfreq cooling device (%d)\n",
>> +			 ret);
>> +
>> +	if (dev->of_node)
>> +		np = of_node_get(dev->of_node);
>> +
>> +	cdev = of_devfreq_cooling_register_power(np, df, dfc_power);
>> +
>> +	if (np)
>> +		of_node_put(np);
>> +
>> +	if (IS_ERR_OR_NULL(cdev)) {
>> +		if (!ret)
>> +			em_dev_unregister_perf_domain(dev);
>> +	} else {
>> +		dfc = cdev->devdata;
>> +		dfc->em_registered = !ret;
>> +	}
>> +
>> +	return cdev;
>> +}
>> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register_power);
>> +
>> +/**
>> + * devfreq_cooling_em_register() - Register devfreq cooling device together
>> + *				with Energy Model.
>> + * @df:		Pointer to devfreq device.
>> + * @em_cb:	Callback functions providing the data of the Energy Model
>> + *
>> + * This function attempts to register Energy Model for devfreq device and then
>> + * register the devfreq cooling device.
>> + */
>> +struct thermal_cooling_device *
>> +devfreq_cooling_em_register(struct devfreq *df, struct em_data_callback *em_cb)
>> +{
>> +	return devfreq_cooling_em_register_power(df, NULL, em_cb);
>> +}
>> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
>> +
>>   /**
>>    * devfreq_cooling_unregister() - Unregister devfreq cooling device.
>>    * @cdev: Pointer to devfreq cooling device to unregister.
>> + *
>> + * Unregisters devfreq cooling device and related Energy Model if it was
>> + * present.
>>    */
>>   void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>>   {
>>   	struct devfreq_cooling_device *dfc;
>> +	struct device *dev;
>>   
>> -	if (!cdev)
>> +	if (IS_ERR_OR_NULL(cdev))
>>   		return;
>>   
>>   	dfc = cdev->devdata;
>> +	dev = dfc->devfreq->dev.parent;
>>   
>>   	thermal_cooling_device_unregister(dfc->cdev);
>>   	ida_simple_remove(&devfreq_ida, dfc->id);
>>   	dev_pm_qos_remove_request(&dfc->req_max_freq);
>> +
>> +	if (dfc->em_registered)
>> +		em_dev_unregister_perf_domain(dev);
> 
> Nit: Isn't it enough to check if dev->em_pd != NULL to be able to
> unregister the perf_domain? That would remove the need for
> dfc->em_registered.

The devfreq cooling may only unregister the EM if it has registered it.
If any other code did the registration, it should unregister when it
finished using it.

> 
> I suppose one could say that's using implementation details on how the
> EM is built and stored and we should not rely on it, so it's up to you
> if you want to change it.
> 
> Kind regards,
> Ionela.
> 
>> +
>>   	kfree(dfc->power_table);
>>   	kfree(dfc->freq_table);
>>   
>> diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
>> index 9df2dfca68dd..19868fb922f1 100644
>> --- a/include/linux/devfreq_cooling.h
>> +++ b/include/linux/devfreq_cooling.h
>> @@ -11,6 +11,7 @@
>>   #define __DEVFREQ_COOLING_H__
>>   
>>   #include <linux/devfreq.h>
>> +#include <linux/energy_model.h>
>>   #include <linux/thermal.h>
>>   
>>   
>> @@ -65,6 +66,13 @@ struct thermal_cooling_device *
>>   of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
>>   struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df);
>>   void devfreq_cooling_unregister(struct thermal_cooling_device *dfc);
>> +struct thermal_cooling_device *
>> +devfreq_cooling_em_register_power(struct devfreq *df,
>> +				  struct devfreq_cooling_power *dfc_power,
>> +				  struct em_data_callback *em_cb);
>> +struct thermal_cooling_device *
>> +devfreq_cooling_em_register(struct devfreq *df,
>> +			    struct em_data_callback *em_cb);
>>   
>>   #else /* !CONFIG_DEVFREQ_THERMAL */
>>   
>> @@ -87,6 +95,20 @@ devfreq_cooling_register(struct devfreq *df)
>>   	return ERR_PTR(-EINVAL);
>>   }
>>   
>> +static inline struct thermal_cooling_device *
>> +devfreq_cooling_em_register_power(struct devfreq *df,
>> +				  struct devfreq_cooling_power *dfc_power,
>> +				  struct em_data_callback *em_cb)
>> +{
>> +	return ERR_PTR(-EINVAL);
>> +}
>> +
>> +static inline struct thermal_cooling_device *
>> +devfreq_cooling_em_register(struct devfreq *df,	struct em_data_callback *em_cb)
>> +{
>> +	return ERR_PTR(-EINVAL);
>> +}
>> +
>>   static inline void
>>   devfreq_cooling_unregister(struct thermal_cooling_device *dfc)
>>   {
>> -- 
>> 2.17.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
Ionela Voinescu Dec. 1, 2020, 2:05 p.m. UTC | #3
Hi,

On Thursday 22 Oct 2020 at 12:17:31 (+0100), Lukasz Luba wrote:
[..]

> > > +/**

> > > + * devfreq_cooling_em_register_power() - Register devfreq cooling device with

> > > + *		power information and attempt to register Energy Model (EM)

> > 

> > It took me a while to understand the differences between devfreq

> > register functions and it left me with a nagging feeling that we don't

> > need all of them. Also, looking over the cpufreq cooling devices, they

> > keep their registering interfaces quite simple.

> 

> This was discussed in previous series, related to EM core changes.

> It was requested to have a helper registration function which would

> create EM automatically.

> 

> > 

> > With the functions added by this patch, the devfreq cooling devices will have:

> >   - old:

> >         of_devfreq_cooling_register_power

> >         of_devfreq_cooling_register

> >         devfreq_cooling_register

> >         devfreq_cooling_unregister

> >   - new:

> >         devfreq_cooling_em_register_power

> >         devfreq_cooling_em_register

> > 

> > My question is whether we actually need the two new

> > devfreq_cooling_em_register_power() and devfreq_cooling_em_register()?

> 

> It is just for consistency, with older scheme. It is only a wrapper, one

> line, with default NULL. This scheme is common in thermal and some other

> frameworks.

> 

> > 

> > The power_ops and the em are dependent on one another, so could we

> > extend the of_devfreq_cooling_register_power() to do the additional em

> > registration. We only need a way to pass the em_cb and I think that

> > could fit nicely in devfreq_cooling_power.

> 

> No, they aren't 'dependent on one another'. The EM usage doesn't depend

> on presence of power_ops. Drivers might not support power_ops, but want

> the framework still use EM and do power estimation.

> 


Okay, wrong choice of words. There's only a one way dependency: you can't
use power_ops without an em, according to
of_devfreq_cooling_register_power().

Correct me if I'm wrong, but I see this as being okay as you still need
an em to give you the maximum power of a device in a certain state.

With this in mind, and taking in detail the possible calls of the
devfreq cooling register functions:

1. Register devfreq cooling device with energy model.
   (used in patch 5/5)

 -> devfreq_cooling_em_register()
    -> devfreq_cooling_em_register_power(dfc_power = NULL, em obtained
                                      through various methods)
      -> of_devfreq_cooling_register_power(same as above)

2. Register devfreq cooling device with power_ops and em:
   (not used)

 -> devfreq_cooling_em_register_power(dfc_power != NULL, em obtained
                                     through various methods)
   -> of_devfreq_cooling_register_power(same as above)

3. Register a devfreq cooling devices with power_ops but no em
   (not used)

 -> of_devfreq_cooling_register_power(dfc_power != NULL)


4. Register a devfreq cooling devices without any kind of power
   information (em or dfc_power/power_ops)

 -> devfreq_cooling_register() or of_devfreq_cooling_register()
   -> of_devfreq_cooling_register_power(dfc_power = NULL)


Given this, aren't we ending up with some possible calls to these
registration functions that don't make sense? That is case 3, as
of_devfreq_cooling_register_power() could not assign and later use
power_ops without an em. For this usecase, 2 should be used instead.

Therefore, can't the same be achieved by collapsing
devfreq_cooling_em_register_power() into
of_devfreq_cooling_register_power()? (with the user having the
possibility to provide the em callback similarly to how get_real_power()
is provided - in devfreq_cooling_power).

IMO is cleaner to unify the functionality (registration and callbacks)
of cooling devices with power capabilities (based on em alone or together
with power_ops). Otherwise we just create confusion for users registering
cooling devices not knowing which function to call.

If this has been discussed previously and I'm missing some details,
please provide some links to the discussions.

Thank you for the patience :).

Ionela.
Lukasz Luba Dec. 1, 2020, 2:37 p.m. UTC | #4
On 12/1/20 2:05 PM, Ionela Voinescu wrote:
> Hi,

> 

> On Thursday 22 Oct 2020 at 12:17:31 (+0100), Lukasz Luba wrote:

> [..]

> 

>>>> +/**

>>>> + * devfreq_cooling_em_register_power() - Register devfreq cooling device with

>>>> + *		power information and attempt to register Energy Model (EM)

>>>

>>> It took me a while to understand the differences between devfreq

>>> register functions and it left me with a nagging feeling that we don't

>>> need all of them. Also, looking over the cpufreq cooling devices, they

>>> keep their registering interfaces quite simple.

>>

>> This was discussed in previous series, related to EM core changes.

>> It was requested to have a helper registration function which would

>> create EM automatically.

>>

>>>

>>> With the functions added by this patch, the devfreq cooling devices will have:

>>>    - old:

>>>          of_devfreq_cooling_register_power

>>>          of_devfreq_cooling_register

>>>          devfreq_cooling_register

>>>          devfreq_cooling_unregister

>>>    - new:

>>>          devfreq_cooling_em_register_power

>>>          devfreq_cooling_em_register

>>>

>>> My question is whether we actually need the two new

>>> devfreq_cooling_em_register_power() and devfreq_cooling_em_register()?

>>

>> It is just for consistency, with older scheme. It is only a wrapper, one

>> line, with default NULL. This scheme is common in thermal and some other

>> frameworks.

>>

>>>

>>> The power_ops and the em are dependent on one another, so could we

>>> extend the of_devfreq_cooling_register_power() to do the additional em

>>> registration. We only need a way to pass the em_cb and I think that

>>> could fit nicely in devfreq_cooling_power.

>>

>> No, they aren't 'dependent on one another'. The EM usage doesn't depend

>> on presence of power_ops. Drivers might not support power_ops, but want

>> the framework still use EM and do power estimation.

>>

> 

> Okay, wrong choice of words. There's only a one way dependency: you can't

> use power_ops without an em, according to

> of_devfreq_cooling_register_power().

> 

> Correct me if I'm wrong, but I see this as being okay as you still need

> an em to give you the maximum power of a device in a certain state.

> 

> With this in mind, and taking in detail the possible calls of the

> devfreq cooling register functions:

> 

> 1. Register devfreq cooling device with energy model.

>     (used in patch 5/5)

> 

>   -> devfreq_cooling_em_register()

>      -> devfreq_cooling_em_register_power(dfc_power = NULL, em obtained

>                                        through various methods)

>        -> of_devfreq_cooling_register_power(same as above)

> 

> 2. Register devfreq cooling device with power_ops and em:

>     (not used)

> 

>   -> devfreq_cooling_em_register_power(dfc_power != NULL, em obtained

>                                       through various methods)

>     -> of_devfreq_cooling_register_power(same as above)

> 

> 3. Register a devfreq cooling devices with power_ops but no em

>     (not used)

> 

>   -> of_devfreq_cooling_register_power(dfc_power != NULL)

> 

> 

> 4. Register a devfreq cooling devices without any kind of power

>     information (em or dfc_power/power_ops)

> 

>   -> devfreq_cooling_register() or of_devfreq_cooling_register()

>     -> of_devfreq_cooling_register_power(dfc_power = NULL)

> 

> 

> Given this, aren't we ending up with some possible calls to these

> registration functions that don't make sense? That is case 3, as

> of_devfreq_cooling_register_power() could not assign and later use

> power_ops without an em. For this usecase, 2 should be used instead.


In use case 3. you missed that the driver could registered EM by itself.
Maybe wanted to manage the EM internally, for various reasons. Then this
registration use case 3. makes sense.

> 

> Therefore, can't the same be achieved by collapsing

> devfreq_cooling_em_register_power() into

> of_devfreq_cooling_register_power()? (with the user having the

> possibility to provide the em callback similarly to how get_real_power()

> is provided - in devfreq_cooling_power).

> 

> IMO is cleaner to unify the functionality (registration and callbacks)

> of cooling devices with power capabilities (based on em alone or together

> with power_ops). Otherwise we just create confusion for users registering

> cooling devices not knowing which function to call.


I don't want to add the code from devfreq_cooling_em_register_power()
into the of_devfreq_cooling_register_power(), these are pretty dense
functions with complicated error handling paths.
In this shape and a few wrappers, which help users to register according
to their needs, it looks OK.

There will be always a review of the coming drivers which would like to
register.

> 

> If this has been discussed previously and I'm missing some details,

> please provide some links to the discussions.

> 

> Thank you for the patience :).

> 

> Ionela.

>
Ionela Voinescu Dec. 1, 2020, 3:02 p.m. UTC | #5
On Tuesday 01 Dec 2020 at 14:37:58 (+0000), Lukasz Luba wrote:
> 

> 

> On 12/1/20 2:05 PM, Ionela Voinescu wrote:

> > Hi,

> > 

> > On Thursday 22 Oct 2020 at 12:17:31 (+0100), Lukasz Luba wrote:

> > [..]

> > 

> > > > > +/**

> > > > > + * devfreq_cooling_em_register_power() - Register devfreq cooling device with

> > > > > + *		power information and attempt to register Energy Model (EM)

> > > > 

> > > > It took me a while to understand the differences between devfreq

> > > > register functions and it left me with a nagging feeling that we don't

> > > > need all of them. Also, looking over the cpufreq cooling devices, they

> > > > keep their registering interfaces quite simple.

> > > 

> > > This was discussed in previous series, related to EM core changes.

> > > It was requested to have a helper registration function which would

> > > create EM automatically.

> > > 

> > > > 

> > > > With the functions added by this patch, the devfreq cooling devices will have:

> > > >    - old:

> > > >          of_devfreq_cooling_register_power

> > > >          of_devfreq_cooling_register

> > > >          devfreq_cooling_register

> > > >          devfreq_cooling_unregister

> > > >    - new:

> > > >          devfreq_cooling_em_register_power

> > > >          devfreq_cooling_em_register

> > > > 

> > > > My question is whether we actually need the two new

> > > > devfreq_cooling_em_register_power() and devfreq_cooling_em_register()?

> > > 

> > > It is just for consistency, with older scheme. It is only a wrapper, one

> > > line, with default NULL. This scheme is common in thermal and some other

> > > frameworks.

> > > 

> > > > 

> > > > The power_ops and the em are dependent on one another, so could we

> > > > extend the of_devfreq_cooling_register_power() to do the additional em

> > > > registration. We only need a way to pass the em_cb and I think that

> > > > could fit nicely in devfreq_cooling_power.

> > > 

> > > No, they aren't 'dependent on one another'. The EM usage doesn't depend

> > > on presence of power_ops. Drivers might not support power_ops, but want

> > > the framework still use EM and do power estimation.

> > > 

> > 

> > Okay, wrong choice of words. There's only a one way dependency: you can't

> > use power_ops without an em, according to

> > of_devfreq_cooling_register_power().

> > 

> > Correct me if I'm wrong, but I see this as being okay as you still need

> > an em to give you the maximum power of a device in a certain state.

> > 

> > With this in mind, and taking in detail the possible calls of the

> > devfreq cooling register functions:

> > 

> > 1. Register devfreq cooling device with energy model.

> >     (used in patch 5/5)

> > 

> >   -> devfreq_cooling_em_register()

> >      -> devfreq_cooling_em_register_power(dfc_power = NULL, em obtained

> >                                        through various methods)

> >        -> of_devfreq_cooling_register_power(same as above)

> > 

> > 2. Register devfreq cooling device with power_ops and em:

> >     (not used)

> > 

> >   -> devfreq_cooling_em_register_power(dfc_power != NULL, em obtained

> >                                       through various methods)

> >     -> of_devfreq_cooling_register_power(same as above)

> > 

> > 3. Register a devfreq cooling devices with power_ops but no em

> >     (not used)

> > 

> >   -> of_devfreq_cooling_register_power(dfc_power != NULL)

> > 

> > 

> > 4. Register a devfreq cooling devices without any kind of power

> >     information (em or dfc_power/power_ops)

> > 

> >   -> devfreq_cooling_register() or of_devfreq_cooling_register()

> >     -> of_devfreq_cooling_register_power(dfc_power = NULL)

> > 

> > 

> > Given this, aren't we ending up with some possible calls to these

> > registration functions that don't make sense? That is case 3, as

> > of_devfreq_cooling_register_power() could not assign and later use

> > power_ops without an em. For this usecase, 2 should be used instead.

> 

> In use case 3. you missed that the driver could registered EM by itself.

> Maybe wanted to manage the EM internally, for various reasons. Then this

> registration use case 3. makes sense.

> 


Yes, the code allows it but it would be unlikely.

> > 

> > Therefore, can't the same be achieved by collapsing

> > devfreq_cooling_em_register_power() into

> > of_devfreq_cooling_register_power()? (with the user having the

> > possibility to provide the em callback similarly to how get_real_power()

> > is provided - in devfreq_cooling_power).

> > 

> > IMO is cleaner to unify the functionality (registration and callbacks)

> > of cooling devices with power capabilities (based on em alone or together

> > with power_ops). Otherwise we just create confusion for users registering

> > cooling devices not knowing which function to call.

> 

> I don't want to add the code from devfreq_cooling_em_register_power()

> into the of_devfreq_cooling_register_power(), these are pretty dense

> functions with complicated error handling paths.

> In this shape and a few wrappers, which help users to register according

> to their needs, it looks OK.

> 

> There will be always a review of the coming drivers which would like to

> register.

> 


Okay, no other arguments from my part.

I'll now take a look over v2. I just wanted to get some of these design
choices out of the way first.

Thanks,
Ionela.

> > 

> > If this has been discussed previously and I'm missing some details,

> > please provide some links to the discussions.

> > 

> > Thank you for the patience :).

> > 

> > Ionela.

> >
diff mbox series

Patch

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index cf045bd4d16b..7e091e795284 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -50,6 +50,8 @@  static DEFINE_IDA(devfreq_ida);
  * @capped_state:	index to cooling state with in dynamic power budget
  * @req_max_freq:	PM QoS request for limiting the maximum frequency
  *			of the devfreq device.
+ * @em:		Energy Model which represents the associated Devfreq device
+ * @em_registered:	Devfreq cooling registered the EM and should free it.
  */
 struct devfreq_cooling_device {
 	int id;
@@ -63,6 +65,8 @@  struct devfreq_cooling_device {
 	u32 res_util;
 	int capped_state;
 	struct dev_pm_qos_request req_max_freq;
+	struct em_perf_domain *em;
+	bool em_registered;
 };
 
 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
@@ -586,22 +590,115 @@  struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
 }
 EXPORT_SYMBOL_GPL(devfreq_cooling_register);
 
+/**
+ * devfreq_cooling_em_register_power() - Register devfreq cooling device with
+ *		power information and attempt to register Energy Model (EM)
+ * @df:		Pointer to devfreq device.
+ * @dfc_power:	Pointer to devfreq_cooling_power.
+ * @em_cb:	Callback functions providing the data of the EM
+ *
+ * Register a devfreq cooling device and attempt to register Energy Model. The
+ * available OPPs must be registered for the device.
+ *
+ * If @dfc_power is provided, the cooling device is registered with the
+ * power extensions. If @em_cb is provided it will be called for each OPP to
+ * calculate power value and cost. If @em_cb is not provided then simple Energy
+ * Model is going to be used, which requires "dynamic-power-coefficient" a
+ * devicetree property.
+ */
+struct thermal_cooling_device *
+devfreq_cooling_em_register_power(struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power,
+				  struct em_data_callback *em_cb)
+{
+	struct thermal_cooling_device *cdev;
+	struct devfreq_cooling_device *dfc;
+	struct device_node *np = NULL;
+	struct device *dev;
+	int nr_opp, ret;
+
+	if (IS_ERR_OR_NULL(df))
+		return ERR_PTR(-EINVAL);
+
+	dev = df->dev.parent;
+
+	if (em_cb) {
+		nr_opp = dev_pm_opp_get_opp_count(dev);
+		if (nr_opp <= 0) {
+			dev_err(dev, "No valid OPPs found\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		ret = em_dev_register_perf_domain(dev, nr_opp, em_cb, NULL);
+	} else {
+		ret = dev_pm_opp_of_register_em(dev, NULL);
+	}
+
+	if (ret)
+		dev_warn(dev, "Unable to register EM for devfreq cooling device (%d)\n",
+			 ret);
+
+	if (dev->of_node)
+		np = of_node_get(dev->of_node);
+
+	cdev = of_devfreq_cooling_register_power(np, df, dfc_power);
+
+	if (np)
+		of_node_put(np);
+
+	if (IS_ERR_OR_NULL(cdev)) {
+		if (!ret)
+			em_dev_unregister_perf_domain(dev);
+	} else {
+		dfc = cdev->devdata;
+		dfc->em_registered = !ret;
+	}
+
+	return cdev;
+}
+EXPORT_SYMBOL_GPL(devfreq_cooling_em_register_power);
+
+/**
+ * devfreq_cooling_em_register() - Register devfreq cooling device together
+ *				with Energy Model.
+ * @df:		Pointer to devfreq device.
+ * @em_cb:	Callback functions providing the data of the Energy Model
+ *
+ * This function attempts to register Energy Model for devfreq device and then
+ * register the devfreq cooling device.
+ */
+struct thermal_cooling_device *
+devfreq_cooling_em_register(struct devfreq *df, struct em_data_callback *em_cb)
+{
+	return devfreq_cooling_em_register_power(df, NULL, em_cb);
+}
+EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
+
 /**
  * devfreq_cooling_unregister() - Unregister devfreq cooling device.
  * @cdev: Pointer to devfreq cooling device to unregister.
+ *
+ * Unregisters devfreq cooling device and related Energy Model if it was
+ * present.
  */
 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
 	struct devfreq_cooling_device *dfc;
+	struct device *dev;
 
-	if (!cdev)
+	if (IS_ERR_OR_NULL(cdev))
 		return;
 
 	dfc = cdev->devdata;
+	dev = dfc->devfreq->dev.parent;
 
 	thermal_cooling_device_unregister(dfc->cdev);
 	ida_simple_remove(&devfreq_ida, dfc->id);
 	dev_pm_qos_remove_request(&dfc->req_max_freq);
+
+	if (dfc->em_registered)
+		em_dev_unregister_perf_domain(dev);
+
 	kfree(dfc->power_table);
 	kfree(dfc->freq_table);
 
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
index 9df2dfca68dd..19868fb922f1 100644
--- a/include/linux/devfreq_cooling.h
+++ b/include/linux/devfreq_cooling.h
@@ -11,6 +11,7 @@ 
 #define __DEVFREQ_COOLING_H__
 
 #include <linux/devfreq.h>
+#include <linux/energy_model.h>
 #include <linux/thermal.h>
 
 
@@ -65,6 +66,13 @@  struct thermal_cooling_device *
 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
 struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df);
 void devfreq_cooling_unregister(struct thermal_cooling_device *dfc);
+struct thermal_cooling_device *
+devfreq_cooling_em_register_power(struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power,
+				  struct em_data_callback *em_cb);
+struct thermal_cooling_device *
+devfreq_cooling_em_register(struct devfreq *df,
+			    struct em_data_callback *em_cb);
 
 #else /* !CONFIG_DEVFREQ_THERMAL */
 
@@ -87,6 +95,20 @@  devfreq_cooling_register(struct devfreq *df)
 	return ERR_PTR(-EINVAL);
 }
 
+static inline struct thermal_cooling_device *
+devfreq_cooling_em_register_power(struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power,
+				  struct em_data_callback *em_cb)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct thermal_cooling_device *
+devfreq_cooling_em_register(struct devfreq *df,	struct em_data_callback *em_cb)
+{
+	return ERR_PTR(-EINVAL);
+}
+
 static inline void
 devfreq_cooling_unregister(struct thermal_cooling_device *dfc)
 {