diff mbox

ARM: cpuidle: Support asymmetric idle definition

Message ID 1495212343-24873-1-git-send-email-daniel.lezcano@linaro.org
State Superseded
Headers show

Commit Message

Daniel Lezcano May 19, 2017, 4:45 p.m. UTC
Some hardware have clusters with different idle states. The current code does
not support this and fails as it expects all the idle states to be identical.

Because of this, the Mediatek mtk8173 had to create the same idle state for a
big.Little system and now the Hisilicon 960 is facing the same situation.

Solve this by simply assuming the multiple driver will be needed for all the
platforms using the ARM generic cpuidle driver which makes sense because of the
different topologies we can support with a single kernel for ARM32 or ARM64.

Tested on:
 - 96boards: Hikey 620
 - 96boards: Hikey 960
 - 96boards: dragonboard410c
 - Mediatek 8173

Tested-by: Leo Yan <leo.yan@linaro.org>

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

---
 drivers/cpuidle/Kconfig.arm   |  1 +
 drivers/cpuidle/cpuidle-arm.c | 55 +++++++++++++++++++++++++++++--------------
 2 files changed, 38 insertions(+), 18 deletions(-)

-- 
2.7.4

Comments

Sudeep Holla May 22, 2017, 10:11 a.m. UTC | #1
On 19/05/17 17:45, Daniel Lezcano wrote:
> Some hardware have clusters with different idle states. The current code does

> not support this and fails as it expects all the idle states to be identical.

> 

> Because of this, the Mediatek mtk8173 had to create the same idle state for a

> big.Little system and now the Hisilicon 960 is facing the same situation.

> 


While I agree the we don't support them today, it's better to benchmark
and record/compare the gain we get with the support for cluster based
idle states.

> Solve this by simply assuming the multiple driver will be needed for all the

> platforms using the ARM generic cpuidle driver which makes sense because of the

> different topologies we can support with a single kernel for ARM32 or ARM64.

> 


Unfortunately, it's not true always and for sure will break with the new
ARM DynamIQ [1]

> Tested on:

>  - 96boards: Hikey 620

>  - 96boards: Hikey 960

>  - 96boards: dragonboard410c

>  - Mediatek 8173

> 

> Tested-by: Leo Yan <leo.yan@linaro.org>

> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

> ---

>  drivers/cpuidle/Kconfig.arm   |  1 +

>  drivers/cpuidle/cpuidle-arm.c | 55 +++++++++++++++++++++++++++++--------------

>  2 files changed, 38 insertions(+), 18 deletions(-)

> 

> diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm

> index 21340e0..f521448 100644

> --- a/drivers/cpuidle/Kconfig.arm

> +++ b/drivers/cpuidle/Kconfig.arm

> @@ -4,6 +4,7 @@

>  config ARM_CPUIDLE

>          bool "Generic ARM/ARM64 CPU idle Driver"

>          select DT_IDLE_STATES

> +	select CPU_IDLE_MULTIPLE_DRIVERS

>          help

>            Select this to enable generic cpuidle driver for ARM.

>            It provides a generic idle driver whose idle states are configured

> diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c

> index f440d38..bec31d5 100644

> --- a/drivers/cpuidle/cpuidle-arm.c

> +++ b/drivers/cpuidle/cpuidle-arm.c

> @@ -18,6 +18,7 @@

>  #include <linux/module.h>

>  #include <linux/of.h>

>  #include <linux/slab.h>

> +#include <linux/topology.h>

>  

>  #include <asm/cpuidle.h>

>  

> @@ -44,7 +45,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev,

>  	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);

>  }

>  

> -static struct cpuidle_driver arm_idle_driver = {

> +static struct cpuidle_driver arm_idle_driver __initdata = {

>  	.name = "arm_idle",

>  	.owner = THIS_MODULE,

>  	/*

> @@ -80,23 +81,40 @@ static const struct of_device_id arm_idle_state_match[] __initconst = {

>  static int __init arm_idle_init(void)

>  {

>  	int cpu, ret;

> -	struct cpuidle_driver *drv = &arm_idle_driver;

> +	struct cpuidle_driver *drv = NULL;

>  	struct cpuidle_device *dev;

>  

> -	/*

> -	 * Initialize idle states data, starting at index 1.

> -	 * This driver is DT only, if no DT idle states are detected (ret == 0)

> -	 * let the driver initialization fail accordingly since there is no

> -	 * reason to initialize the idle driver if only wfi is supported.

> -	 */

> -	ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);

> -	if (ret <= 0)

> -		return ret ? : -ENODEV;

> -

> -	ret = cpuidle_register_driver(drv);

> -	if (ret) {

> -		pr_err("Failed to register cpuidle driver\n");

> -		return ret;

> +	for_each_possible_cpu(cpu) {

> +

> +		if (drv && cpumask_test_cpu(cpu, drv->cpumask))

> +			continue;

> +

> +		ret = -ENOMEM;

> +

> +		drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);

> +		if (!drv)

> +			goto out_fail;

> +

> +		drv->cpumask = &cpu_topology[cpu].core_sibling;

> +


This is not always true and not architecturally guaranteed. So instead
of introducing this broken dependency, better to extract information
from the device tree.

-- 
Regards,
Sudeep

[1]
https://community.arm.com/processors/b/blog/posts/arm-dynamiq-technology-for-the-next-era-of-compute
Daniel Lezcano May 22, 2017, 10:20 a.m. UTC | #2
Hi Sudeep,


On 22/05/2017 12:11, Sudeep Holla wrote:
> 

> 

> On 19/05/17 17:45, Daniel Lezcano wrote:

>> Some hardware have clusters with different idle states. The current code does

>> not support this and fails as it expects all the idle states to be identical.

>>

>> Because of this, the Mediatek mtk8173 had to create the same idle state for a

>> big.Little system and now the Hisilicon 960 is facing the same situation.

>>

> 

> While I agree the we don't support them today, it's better to benchmark

> and record/compare the gain we get with the support for cluster based

> idle states.


Sorry, I don't get what you are talking about. What do you want to
benchmark ? Cluster idling ?

>> Solve this by simply assuming the multiple driver will be needed for all the

>> platforms using the ARM generic cpuidle driver which makes sense because of the

>> different topologies we can support with a single kernel for ARM32 or ARM64.

>>

> 

> Unfortunately, it's not true always and for sure will break with the new

> ARM DynamIQ [1]


We will see when a platform will be available for testing :)

>> Tested on:

>>  - 96boards: Hikey 620

>>  - 96boards: Hikey 960

>>  - 96boards: dragonboard410c

>>  - Mediatek 8173

>>

>> Tested-by: Leo Yan <leo.yan@linaro.org>

>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

>> ---

>>  drivers/cpuidle/Kconfig.arm   |  1 +

>>  drivers/cpuidle/cpuidle-arm.c | 55 +++++++++++++++++++++++++++++--------------

>>  2 files changed, 38 insertions(+), 18 deletions(-)

>>

>> diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm

>> index 21340e0..f521448 100644

>> --- a/drivers/cpuidle/Kconfig.arm

>> +++ b/drivers/cpuidle/Kconfig.arm

>> @@ -4,6 +4,7 @@

>>  config ARM_CPUIDLE

>>          bool "Generic ARM/ARM64 CPU idle Driver"

>>          select DT_IDLE_STATES

>> +	select CPU_IDLE_MULTIPLE_DRIVERS

>>          help

>>            Select this to enable generic cpuidle driver for ARM.

>>            It provides a generic idle driver whose idle states are configured

>> diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c

>> index f440d38..bec31d5 100644

>> --- a/drivers/cpuidle/cpuidle-arm.c

>> +++ b/drivers/cpuidle/cpuidle-arm.c

>> @@ -18,6 +18,7 @@

>>  #include <linux/module.h>

>>  #include <linux/of.h>

>>  #include <linux/slab.h>

>> +#include <linux/topology.h>

>>  

>>  #include <asm/cpuidle.h>

>>  

>> @@ -44,7 +45,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev,

>>  	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);

>>  }

>>  

>> -static struct cpuidle_driver arm_idle_driver = {

>> +static struct cpuidle_driver arm_idle_driver __initdata = {

>>  	.name = "arm_idle",

>>  	.owner = THIS_MODULE,

>>  	/*

>> @@ -80,23 +81,40 @@ static const struct of_device_id arm_idle_state_match[] __initconst = {

>>  static int __init arm_idle_init(void)

>>  {

>>  	int cpu, ret;

>> -	struct cpuidle_driver *drv = &arm_idle_driver;

>> +	struct cpuidle_driver *drv = NULL;

>>  	struct cpuidle_device *dev;

>>  

>> -	/*

>> -	 * Initialize idle states data, starting at index 1.

>> -	 * This driver is DT only, if no DT idle states are detected (ret == 0)

>> -	 * let the driver initialization fail accordingly since there is no

>> -	 * reason to initialize the idle driver if only wfi is supported.

>> -	 */

>> -	ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);

>> -	if (ret <= 0)

>> -		return ret ? : -ENODEV;

>> -

>> -	ret = cpuidle_register_driver(drv);

>> -	if (ret) {

>> -		pr_err("Failed to register cpuidle driver\n");

>> -		return ret;

>> +	for_each_possible_cpu(cpu) {

>> +

>> +		if (drv && cpumask_test_cpu(cpu, drv->cpumask))

>> +			continue;

>> +

>> +		ret = -ENOMEM;

>> +

>> +		drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);

>> +		if (!drv)

>> +			goto out_fail;

>> +

>> +		drv->cpumask = &cpu_topology[cpu].core_sibling;

>> +

> 

> This is not always true and not architecturally guaranteed. So instead

> of introducing this broken dependency, better to extract information

> from the device tree.


Can you give an example of a broken dependency ?

The cpu topology information is extracted from the device tree. So if
the topology is broken, the DT is broken also. Otherwise, the topology
code must fix the broken dependency from the DT.





-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
Sudeep Holla May 22, 2017, 10:32 a.m. UTC | #3
On 22/05/17 11:20, Daniel Lezcano wrote:
> 

> Hi Sudeep,

> 

> 

> On 22/05/2017 12:11, Sudeep Holla wrote:

>>

>>

>> On 19/05/17 17:45, Daniel Lezcano wrote:

>>> Some hardware have clusters with different idle states. The current code does

>>> not support this and fails as it expects all the idle states to be identical.

>>>

>>> Because of this, the Mediatek mtk8173 had to create the same idle state for a

>>> big.Little system and now the Hisilicon 960 is facing the same situation.

>>>

>>

>> While I agree the we don't support them today, it's better to benchmark

>> and record/compare the gain we get with the support for cluster based

>> idle states.

> 

> Sorry, I don't get what you are talking about. What do you want to

> benchmark ? Cluster idling ?

> 


OK, I was not so clear. I had a brief chat with Lorenzo, we have few
reason to have this support:
1. Different number of states between clusters
2. Different latencies(this is the one I was referring above, generally
   we keep worst case timings here and wanted to see if any platform
   measured improvements with different latencies in the idle states)

>>> Solve this by simply assuming the multiple driver will be needed for all the

>>> platforms using the ARM generic cpuidle driver which makes sense because of the

>>> different topologies we can support with a single kernel for ARM32 or ARM64.

>>>

>>

>> Unfortunately, it's not true always and for sure will break with the new

>> ARM DynamIQ [1]

> 

> We will see when a platform will be available for testing :)

> 


May not be too late ;), though I have no clue yet.

>>> Tested on:

>>>  - 96boards: Hikey 620

>>>  - 96boards: Hikey 960

>>>  - 96boards: dragonboard410c

>>>  - Mediatek 8173

>>>

>>> Tested-by: Leo Yan <leo.yan@linaro.org>

>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

>>> ---

>>>  drivers/cpuidle/Kconfig.arm   |  1 +

>>>  drivers/cpuidle/cpuidle-arm.c | 55 +++++++++++++++++++++++++++++--------------

>>>  2 files changed, 38 insertions(+), 18 deletions(-)

>>>

>>> diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm

>>> index 21340e0..f521448 100644

>>> --- a/drivers/cpuidle/Kconfig.arm

>>> +++ b/drivers/cpuidle/Kconfig.arm

>>> @@ -4,6 +4,7 @@

>>>  config ARM_CPUIDLE

>>>          bool "Generic ARM/ARM64 CPU idle Driver"

>>>          select DT_IDLE_STATES

>>> +	select CPU_IDLE_MULTIPLE_DRIVERS

>>>          help

>>>            Select this to enable generic cpuidle driver for ARM.

>>>            It provides a generic idle driver whose idle states are configured

>>> diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c

>>> index f440d38..bec31d5 100644

>>> --- a/drivers/cpuidle/cpuidle-arm.c

>>> +++ b/drivers/cpuidle/cpuidle-arm.c

>>> @@ -18,6 +18,7 @@

>>>  #include <linux/module.h>

>>>  #include <linux/of.h>

>>>  #include <linux/slab.h>

>>> +#include <linux/topology.h>

>>>  

>>>  #include <asm/cpuidle.h>

>>>  

>>> @@ -44,7 +45,7 @@ static int arm_enter_idle_state(struct cpuidle_device *dev,

>>>  	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);

>>>  }

>>>  

>>> -static struct cpuidle_driver arm_idle_driver = {

>>> +static struct cpuidle_driver arm_idle_driver __initdata = {

>>>  	.name = "arm_idle",

>>>  	.owner = THIS_MODULE,

>>>  	/*

>>> @@ -80,23 +81,40 @@ static const struct of_device_id arm_idle_state_match[] __initconst = {

>>>  static int __init arm_idle_init(void)

>>>  {

>>>  	int cpu, ret;

>>> -	struct cpuidle_driver *drv = &arm_idle_driver;

>>> +	struct cpuidle_driver *drv = NULL;

>>>  	struct cpuidle_device *dev;

>>>  

>>> -	/*

>>> -	 * Initialize idle states data, starting at index 1.

>>> -	 * This driver is DT only, if no DT idle states are detected (ret == 0)

>>> -	 * let the driver initialization fail accordingly since there is no

>>> -	 * reason to initialize the idle driver if only wfi is supported.

>>> -	 */

>>> -	ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);

>>> -	if (ret <= 0)

>>> -		return ret ? : -ENODEV;

>>> -

>>> -	ret = cpuidle_register_driver(drv);

>>> -	if (ret) {

>>> -		pr_err("Failed to register cpuidle driver\n");

>>> -		return ret;

>>> +	for_each_possible_cpu(cpu) {

>>> +

>>> +		if (drv && cpumask_test_cpu(cpu, drv->cpumask))

>>> +			continue;

>>> +

>>> +		ret = -ENOMEM;

>>> +

>>> +		drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);

>>> +		if (!drv)

>>> +			goto out_fail;

>>> +

>>> +		drv->cpumask = &cpu_topology[cpu].core_sibling;

>>> +

>>

>> This is not always true and not architecturally guaranteed. So instead

>> of introducing this broken dependency, better to extract information

>> from the device tree.

> 

> Can you give an example of a broken dependency ?

> 

> The cpu topology information is extracted from the device tree. So

> if the topology is broken, the DT is broken also. Otherwise, the

> topology code must fix the broken dependency from the DT.

> 


No, I meant there's no guarantee that all designs must follow this rule.
I don't mean CPU topology code or binding is broken. What I meant is
linking CPU topology to CPU power domains is wrong. We should make use
of DT you infer this information as it's already there. Topology bindings
makes no reference to power and hence you simply can't infer that
information from it.


-- 
Regards,
Sudeep
Daniel Lezcano May 22, 2017, 12:41 p.m. UTC | #4
On 22/05/2017 12:32, Sudeep Holla wrote:
> 

> 

> On 22/05/17 11:20, Daniel Lezcano wrote:

>>

>> Hi Sudeep,

>>

>>

>> On 22/05/2017 12:11, Sudeep Holla wrote:

>>>

>>>

>>> On 19/05/17 17:45, Daniel Lezcano wrote:

>>>> Some hardware have clusters with different idle states. The current code does

>>>> not support this and fails as it expects all the idle states to be identical.

>>>>

>>>> Because of this, the Mediatek mtk8173 had to create the same idle state for a

>>>> big.Little system and now the Hisilicon 960 is facing the same situation.

>>>>

>>>

>>> While I agree the we don't support them today, it's better to benchmark

>>> and record/compare the gain we get with the support for cluster based

>>> idle states.

>>

>> Sorry, I don't get what you are talking about. What do you want to

>> benchmark ? Cluster idling ?

>>

> 

> OK, I was not so clear. I had a brief chat with Lorenzo, we have few

> reason to have this support:

> 1. Different number of states between clusters

> 2. Different latencies(this is the one I was referring above, generally

>    we keep worst case timings here and wanted to see if any platform

>    measured improvements with different latencies in the idle states)


I don't see the point. Are you putting into question the big little design?

[ ... ]

>>>> +	for_each_possible_cpu(cpu) {

>>>> +

>>>> +		if (drv && cpumask_test_cpu(cpu, drv->cpumask))

>>>> +			continue;

>>>> +

>>>> +		ret = -ENOMEM;

>>>> +

>>>> +		drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);

>>>> +		if (!drv)

>>>> +			goto out_fail;

>>>> +

>>>> +		drv->cpumask = &cpu_topology[cpu].core_sibling;

>>>> +

>>>

>>> This is not always true and not architecturally guaranteed. So instead

>>> of introducing this broken dependency, better to extract information

>>> from the device tree.

>>

>> Can you give an example of a broken dependency ?

>>

>> The cpu topology information is extracted from the device tree. So

>> if the topology is broken, the DT is broken also. Otherwise, the

>> topology code must fix the broken dependency from the DT.

>>

> 

> No, I meant there's no guarantee that all designs must follow this rule.

> I don't mean CPU topology code or binding is broken. What I meant is

> linking CPU topology to CPU power domains is wrong. We should make use

> of DT you infer this information as it's already there. Topology bindings

> makes no reference to power and hence you simply can't infer that

> information from it.


Ok, I will have a look how power domains can fit in this.

However I'm curious to know a platform with a cluster idle state
powering down only a subset of CPUs belonging to the cluster.


-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
Sudeep Holla May 22, 2017, 1:02 p.m. UTC | #5
On 22/05/17 13:41, Daniel Lezcano wrote:
> On 22/05/2017 12:32, Sudeep Holla wrote:

>>

>>

>> On 22/05/17 11:20, Daniel Lezcano wrote:

>>>

>>> Hi Sudeep,

>>>

>>>

>>> On 22/05/2017 12:11, Sudeep Holla wrote:

>>>>

>>>>

>>>> On 19/05/17 17:45, Daniel Lezcano wrote:

>>>>> Some hardware have clusters with different idle states. The current code does

>>>>> not support this and fails as it expects all the idle states to be identical.

>>>>>

>>>>> Because of this, the Mediatek mtk8173 had to create the same idle state for a

>>>>> big.Little system and now the Hisilicon 960 is facing the same situation.

>>>>>

>>>>

>>>> While I agree the we don't support them today, it's better to benchmark

>>>> and record/compare the gain we get with the support for cluster based

>>>> idle states.

>>>

>>> Sorry, I don't get what you are talking about. What do you want to

>>> benchmark ? Cluster idling ?

>>>

>>

>> OK, I was not so clear. I had a brief chat with Lorenzo, we have few

>> reason to have this support:

>> 1. Different number of states between clusters

>> 2. Different latencies(this is the one I was referring above, generally

>>    we keep worst case timings here and wanted to see if any platform

>>    measured improvements with different latencies in the idle states)

> 

> I don't see the point. Are you putting into question the big little design?

>


Not exactly. Since they are generally worst case number, I wanted to
check if someone saw real benefit with 2 different set of values.
Anyways that's not important or blocking, just raised a point, so that
we can stick some benchmarking results with this.

> [ ... ]

> 

>>>>> +	for_each_possible_cpu(cpu) {

>>>>> +

>>>>> +		if (drv && cpumask_test_cpu(cpu, drv->cpumask))

>>>>> +			continue;

>>>>> +

>>>>> +		ret = -ENOMEM;

>>>>> +

>>>>> +		drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);

>>>>> +		if (!drv)

>>>>> +			goto out_fail;

>>>>> +

>>>>> +		drv->cpumask = &cpu_topology[cpu].core_sibling;

>>>>> +

>>>>

>>>> This is not always true and not architecturally guaranteed. So instead

>>>> of introducing this broken dependency, better to extract information

>>>> from the device tree.

>>>

>>> Can you give an example of a broken dependency ?

>>>

>>> The cpu topology information is extracted from the device tree. So

>>> if the topology is broken, the DT is broken also. Otherwise, the

>>> topology code must fix the broken dependency from the DT.

>>>

>>

>> No, I meant there's no guarantee that all designs must follow this rule.

>> I don't mean CPU topology code or binding is broken. What I meant is

>> linking CPU topology to CPU power domains is wrong. We should make use

>> of DT you infer this information as it's already there. Topology bindings

>> makes no reference to power and hence you simply can't infer that

>> information from it.

> 

> Ok, I will have a look how power domains can fit in this.

> 

> However I'm curious to know a platform with a cluster idle state

> powering down only a subset of CPUs belonging to the cluster.

> 


We can't reuse CPU topology for power domains:
1. As I mentioned earlier for sure, it won't be same with ARM DynamIQ.
2. Topology bindings strictly restrict themselves with topology and not
connected with power-domains. We also have separate power domain
bindings.

We need to separate topology and power domains. We have some dependency
like this in big little drivers(both CPUfreq and CPUIdle) but that
dependencies must be removed as they are not architecturally guaranteed.
Lorenzo had a patch[1] to solve this issue, I can post the latest
version of it again and continue the discussion after some basic
rebase/testing.

-- 
Regards,
Sudeep

[1] http://www.spinics.net/lists/devicetree/msg76596.html
Leo Yan May 22, 2017, 1:34 p.m. UTC | #6
Hi Sudeep,

On Mon, May 22, 2017 at 02:02:12PM +0100, Sudeep Holla wrote:

[...]

> >>>> On 19/05/17 17:45, Daniel Lezcano wrote:

> >>>>> Some hardware have clusters with different idle states. The current code does

> >>>>> not support this and fails as it expects all the idle states to be identical.

> >>>>>

> >>>>> Because of this, the Mediatek mtk8173 had to create the same idle state for a

> >>>>> big.Little system and now the Hisilicon 960 is facing the same situation.

> >>>>>

> >>>>

> >>>> While I agree the we don't support them today, it's better to benchmark

> >>>> and record/compare the gain we get with the support for cluster based

> >>>> idle states.

> >>>

> >>> Sorry, I don't get what you are talking about. What do you want to

> >>> benchmark ? Cluster idling ?

> >>>

> >>

> >> OK, I was not so clear. I had a brief chat with Lorenzo, we have few

> >> reason to have this support:

> >> 1. Different number of states between clusters

> >> 2. Different latencies(this is the one I was referring above, generally

> >>    we keep worst case timings here and wanted to see if any platform

> >>    measured improvements with different latencies in the idle states)

> > 

> > I don't see the point. Are you putting into question the big little design?

> >

> 

> Not exactly. Since they are generally worst case number, I wanted to

> check if someone saw real benefit with 2 different set of values.

> Anyways that's not important or blocking, just raised a point, so that

> we can stick some benchmarking results with this.


In case you are interesting for Hikey960 idle states, you could see
the two clustsers have different idle states:
http://termbin.com/d7ed

[...]

Thanks,
Leo Yan
Sudeep Holla May 22, 2017, 1:59 p.m. UTC | #7
On 22/05/17 14:34, Leo Yan wrote:
> Hi Sudeep,

> 

> On Mon, May 22, 2017 at 02:02:12PM +0100, Sudeep Holla wrote:

> 

> [...]

> 

>>>>>> On 19/05/17 17:45, Daniel Lezcano wrote:

>>>>>>> Some hardware have clusters with different idle states. The current code does

>>>>>>> not support this and fails as it expects all the idle states to be identical.

>>>>>>>

>>>>>>> Because of this, the Mediatek mtk8173 had to create the same idle state for a

>>>>>>> big.Little system and now the Hisilicon 960 is facing the same situation.

>>>>>>>

>>>>>>

>>>>>> While I agree the we don't support them today, it's better to benchmark

>>>>>> and record/compare the gain we get with the support for cluster based

>>>>>> idle states.

>>>>>

>>>>> Sorry, I don't get what you are talking about. What do you want to

>>>>> benchmark ? Cluster idling ?

>>>>>

>>>>

>>>> OK, I was not so clear. I had a brief chat with Lorenzo, we have few

>>>> reason to have this support:

>>>> 1. Different number of states between clusters

>>>> 2. Different latencies(this is the one I was referring above, generally

>>>>    we keep worst case timings here and wanted to see if any platform

>>>>    measured improvements with different latencies in the idle states)

>>>

>>> I don't see the point. Are you putting into question the big little design?

>>>

>>

>> Not exactly. Since they are generally worst case number, I wanted to

>> check if someone saw real benefit with 2 different set of values.

>> Anyways that's not important or blocking, just raised a point, so that

>> we can stick some benchmarking results with this.

> 

> In case you are interesting for Hikey960 idle states, you could see

> the two clustsers have different idle states:

> http://termbin.com/d7ed


Probably this if  off-topic, but I found CPU_NAP_0 really interesting:

1. CPU_NAP_0 has arm,psci-suspend-param as 0x00000000 which generally
   not recommended to avoid conflict with WFI

2. Initially I assumed, CPU_NAP_0 is retention/standby state, but then I
   see "local-timer-stop", which indicates it's power down state which
   means arm,psci-suspend-param is wrong.

-- 
Regards,
Sudeep
Daniel Lezcano May 22, 2017, 2:48 p.m. UTC | #8
On 22/05/2017 15:02, Sudeep Holla wrote:

[ ... ]

>>>>>> +		drv->cpumask = &cpu_topology[cpu].core_sibling;

>>>>>> +

>>>>>

>>>>> This is not always true and not architecturally guaranteed. So instead

>>>>> of introducing this broken dependency, better to extract information

>>>>> from the device tree.

>>>>

>>>> Can you give an example of a broken dependency ?

>>>>

>>>> The cpu topology information is extracted from the device tree. So

>>>> if the topology is broken, the DT is broken also. Otherwise, the

>>>> topology code must fix the broken dependency from the DT.

>>>>

>>>

>>> No, I meant there's no guarantee that all designs must follow this rule.

>>> I don't mean CPU topology code or binding is broken. What I meant is

>>> linking CPU topology to CPU power domains is wrong. We should make use

>>> of DT you infer this information as it's already there. Topology bindings

>>> makes no reference to power and hence you simply can't infer that

>>> information from it.

>>

>> Ok, I will have a look how power domains can fit in this.

>>

>> However I'm curious to know a platform with a cluster idle state

>> powering down only a subset of CPUs belonging to the cluster.

>>

> 

> We can't reuse CPU topology for power domains:

> 1. As I mentioned earlier for sure, it won't be same with ARM DynamIQ.

> 2. Topology bindings strictly restrict themselves with topology and not

> connected with power-domains. We also have separate power domain

> bindings.


Yes, the theory is valid, but practically nowadays I don't see where we
have a cluster defined by a topology with a different cluster power domain.

By the way, if you have any pointer to documentation for DynamIQ PM and
design? I would be interested to have a look.

> We need to separate topology and power domains. We have some dependency

> like this in big little drivers(both CPUfreq and CPUIdle) but that

> dependencies must be removed as they are not architecturally guaranteed.

> Lorenzo had a patch[1] to solve this issue, I can post the latest

> version of it again and continue the discussion after some basic

> rebase/testing.


Actually, I am not convinced by the approach proposed in this patch.

Let me have a look at the idle power domain before, I do believe we can
do something much more simple.

Thanks.

  -- Daniel


-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
Sudeep Holla May 22, 2017, 3:02 p.m. UTC | #9
On 22/05/17 15:48, Daniel Lezcano wrote:
> On 22/05/2017 15:02, Sudeep Holla wrote:

> 

> [ ... ]

> 

>>>>>>> +		drv->cpumask = &cpu_topology[cpu].core_sibling;

>>>>>>> +

>>>>>>

>>>>>> This is not always true and not architecturally guaranteed. So instead

>>>>>> of introducing this broken dependency, better to extract information

>>>>>> from the device tree.

>>>>>

>>>>> Can you give an example of a broken dependency ?

>>>>>

>>>>> The cpu topology information is extracted from the device tree. So

>>>>> if the topology is broken, the DT is broken also. Otherwise, the

>>>>> topology code must fix the broken dependency from the DT.

>>>>>

>>>>

>>>> No, I meant there's no guarantee that all designs must follow this rule.

>>>> I don't mean CPU topology code or binding is broken. What I meant is

>>>> linking CPU topology to CPU power domains is wrong. We should make use

>>>> of DT you infer this information as it's already there. Topology bindings

>>>> makes no reference to power and hence you simply can't infer that

>>>> information from it.

>>>

>>> Ok, I will have a look how power domains can fit in this.

>>>

>>> However I'm curious to know a platform with a cluster idle state

>>> powering down only a subset of CPUs belonging to the cluster.

>>>

>>

>> We can't reuse CPU topology for power domains:

>> 1. As I mentioned earlier for sure, it won't be same with ARM DynamIQ.

>> 2. Topology bindings strictly restrict themselves with topology and not

>> connected with power-domains. We also have separate power domain

>> bindings.

> 

> Yes, the theory is valid, but practically nowadays I don't see where we

> have a cluster defined by a topology with a different cluster power domain.

> 


While I agree that it's true in current practice, but in past we have
seen "innovative designs". We initially had 2 clusters(big and little)
then we saw 3 cluster(big little and tiny or whatever you what to call)
So as it's not architecturally guaranteed, it's not nice to make this
assumption in a generic driver.

> By the way, if you have any pointer to documentation for DynamIQ PM and

> design? I would be interested to have a look.

> 


I don't have anything in detail. Excerpts from the link I sent earlier
indicate that it's possible and highly likely.

"DynamIQ supports multiple, configurable, performance domains within a
single cluster. These domains, consisting of single or multiple ARM
CPUs, can scale in performance and power with finer granularity than
previous quad-core clusters."

>> We need to separate topology and power domains. We have some dependency

>> like this in big little drivers(both CPUfreq and CPUIdle) but that

>> dependencies must be removed as they are not architecturally guaranteed.

>> Lorenzo had a patch[1] to solve this issue, I can post the latest

>> version of it again and continue the discussion after some basic

>> rebase/testing.

> 

> Actually, I am not convinced by the approach proposed in this patch.

> 

> Let me have a look at the idle power domain before, I do believe we can

> do something much more simple.

> 


OK, if you think so.

-- 
Regards,
Sudeep
Daniel Lezcano May 31, 2017, 4:40 p.m. UTC | #10
On 22/05/2017 17:02, Sudeep Holla wrote:
> 

> 

> On 22/05/17 15:48, Daniel Lezcano wrote:

>> On 22/05/2017 15:02, Sudeep Holla wrote:

>>

>> [ ... ]

>>

>>>>>>>> +		drv->cpumask = &cpu_topology[cpu].core_sibling;

>>>>>>>> +

>>>>>>>

>>>>>>> This is not always true and not architecturally guaranteed. So instead

>>>>>>> of introducing this broken dependency, better to extract information

>>>>>>> from the device tree.

>>>>>>

>>>>>> Can you give an example of a broken dependency ?

>>>>>>

>>>>>> The cpu topology information is extracted from the device tree. So

>>>>>> if the topology is broken, the DT is broken also. Otherwise, the

>>>>>> topology code must fix the broken dependency from the DT.

>>>>>>

>>>>>

>>>>> No, I meant there's no guarantee that all designs must follow this rule.

>>>>> I don't mean CPU topology code or binding is broken. What I meant is

>>>>> linking CPU topology to CPU power domains is wrong. We should make use

>>>>> of DT you infer this information as it's already there. Topology bindings

>>>>> makes no reference to power and hence you simply can't infer that

>>>>> information from it.

>>>>

>>>> Ok, I will have a look how power domains can fit in this.

>>>>

>>>> However I'm curious to know a platform with a cluster idle state

>>>> powering down only a subset of CPUs belonging to the cluster.

>>>>

>>>

>>> We can't reuse CPU topology for power domains:

>>> 1. As I mentioned earlier for sure, it won't be same with ARM DynamIQ.

>>> 2. Topology bindings strictly restrict themselves with topology and not

>>> connected with power-domains. We also have separate power domain

>>> bindings.

>>

>> Yes, the theory is valid, but practically nowadays I don't see where we

>> have a cluster defined by a topology with a different cluster power domain.

>>

> 

> While I agree that it's true in current practice, but in past we have

> seen "innovative designs". We initially had 2 clusters(big and little)

> then we saw 3 cluster(big little and tiny or whatever you what to call)

> So as it's not architecturally guaranteed, it's not nice to make this

> assumption in a generic driver.

> 

>> By the way, if you have any pointer to documentation for DynamIQ PM and

>> design? I would be interested to have a look.

>>

> 

> I don't have anything in detail. Excerpts from the link I sent earlier

> indicate that it's possible and highly likely.

> 

> "DynamIQ supports multiple, configurable, performance domains within a

> single cluster. These domains, consisting of single or multiple ARM

> CPUs, can scale in performance and power with finer granularity than

> previous quad-core clusters."

> 

>>> We need to separate topology and power domains. We have some dependency

>>> like this in big little drivers(both CPUfreq and CPUIdle) but that

>>> dependencies must be removed as they are not architecturally guaranteed.

>>> Lorenzo had a patch[1] to solve this issue, I can post the latest

>>> version of it again and continue the discussion after some basic

>>> rebase/testing.

>>

>> Actually, I am not convinced by the approach proposed in this patch.

>>

>> Let me have a look at the idle power domain before, I do believe we can

>> do something much more simple.

>>

> 

> OK, if you think so.


Hi Sudeep, Lorenzo,

I have been thinking and looking at the domain-idle-state and I don't
see an obvious connection between what is describing the power domain,
the cpu idle driver and what we are trying to achieve.

I would like to suggest something much more simple, register a cpuidle
driver per cpu, so every cpu can have its own idle definitions, that
should work for dynamiQ, smp and hmp. The impact on the driver will be
minimal.



-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
Daniel Lezcano June 1, 2017, 7:37 a.m. UTC | #11
On 31/05/2017 19:33, Sudeep Holla wrote:
> 

> 

> On 31/05/17 17:40, Daniel Lezcano wrote:

> 

>> Hi Sudeep, Lorenzo,

>>

>> I have been thinking and looking at the domain-idle-state and I don't

>> see an obvious connection between what is describing the power domain,

>> the cpu idle driver and what we are trying to achieve.

>>

> 

> I am not sure what you mean by *connection* above.

> 

> 1. With old flat list of idle states, we should get the cpumask sharing

>    the idle states from the phandle or something similar.

> 2. With new domain-idle-state and hierarchical DT binding, you just need

>    to infer that from the hierarchy.

> 

>> I would like to suggest something much more simple, register a cpuidle

>> driver per cpu, so every cpu can have its own idle definitions, that

>> should work for dynamiQ, smp and hmp. The impact on the driver will be

>> minimal.

>>

> 

> Sounds simple, but not sure if it's scalable on platforms with

> relatively large number of CPUs like 48 or 96(e.g. Cavium Thunder


I'm pretty sure it scales, we had the idle states per cpuidle devices
and nobody was complaining. But I agree it could be optimized.


-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
diff mbox

Patch

diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
index 21340e0..f521448 100644
--- a/drivers/cpuidle/Kconfig.arm
+++ b/drivers/cpuidle/Kconfig.arm
@@ -4,6 +4,7 @@ 
 config ARM_CPUIDLE
         bool "Generic ARM/ARM64 CPU idle Driver"
         select DT_IDLE_STATES
+	select CPU_IDLE_MULTIPLE_DRIVERS
         help
           Select this to enable generic cpuidle driver for ARM.
           It provides a generic idle driver whose idle states are configured
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
index f440d38..bec31d5 100644
--- a/drivers/cpuidle/cpuidle-arm.c
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -18,6 +18,7 @@ 
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/slab.h>
+#include <linux/topology.h>
 
 #include <asm/cpuidle.h>
 
@@ -44,7 +45,7 @@  static int arm_enter_idle_state(struct cpuidle_device *dev,
 	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);
 }
 
-static struct cpuidle_driver arm_idle_driver = {
+static struct cpuidle_driver arm_idle_driver __initdata = {
 	.name = "arm_idle",
 	.owner = THIS_MODULE,
 	/*
@@ -80,23 +81,40 @@  static const struct of_device_id arm_idle_state_match[] __initconst = {
 static int __init arm_idle_init(void)
 {
 	int cpu, ret;
-	struct cpuidle_driver *drv = &arm_idle_driver;
+	struct cpuidle_driver *drv = NULL;
 	struct cpuidle_device *dev;
 
-	/*
-	 * Initialize idle states data, starting at index 1.
-	 * This driver is DT only, if no DT idle states are detected (ret == 0)
-	 * let the driver initialization fail accordingly since there is no
-	 * reason to initialize the idle driver if only wfi is supported.
-	 */
-	ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
-	if (ret <= 0)
-		return ret ? : -ENODEV;
-
-	ret = cpuidle_register_driver(drv);
-	if (ret) {
-		pr_err("Failed to register cpuidle driver\n");
-		return ret;
+	for_each_possible_cpu(cpu) {
+
+		if (drv && cpumask_test_cpu(cpu, drv->cpumask))
+			continue;
+
+		ret = -ENOMEM;
+
+		drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);
+		if (!drv)
+			goto out_fail;
+
+		drv->cpumask = &cpu_topology[cpu].core_sibling;
+
+		/*
+		 * Initialize idle states data, starting at index 1.  This
+		 * driver is DT only, if no DT idle states are detected (ret
+		 * == 0) let the driver initialization fail accordingly since
+		 * there is no reason to initialize the idle driver if only
+		 * wfi is supported.
+		 */
+		ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
+		if (ret <= 0) {
+			ret = ret ? : -ENODEV;
+			goto out_fail;
+		}
+
+		ret = cpuidle_register_driver(drv);
+		if (ret) {
+			pr_err("Failed to register cpuidle driver\n");
+			goto out_fail;
+		}
 	}
 
 	/*
@@ -141,10 +159,11 @@  static int __init arm_idle_init(void)
 		dev = per_cpu(cpuidle_devices, cpu);
 		cpuidle_unregister_device(dev);
 		kfree(dev);
+		drv = cpuidle_get_driver();
+		cpuidle_unregister_driver(drv);
+		kfree(drv);
 	}
 
-	cpuidle_unregister_driver(drv);
-
 	return ret;
 }
 device_initcall(arm_idle_init);