[4/4] cpuidle - support multiple drivers

Message ID 1348526634-19029-5-git-send-email-daniel.lezcano@linaro.org
State New
Headers show

Commit Message

Daniel Lezcano Sept. 24, 2012, 10:43 p.m.
With the tegra3 and the big.LITTLE [1] new architectures, several cpus
with different characteristics (latencies and states) can co-exists on the
system.

The cpuidle framework has the limitation of handling only identical cpus.

This patch removes this limitation by introducing the multiple driver support
for cpuidle.

This option is configurable at compile time and should be enabled for the
architectures mentioned above. So there is no impact for the other platforms
if the option is disabled. The option defaults to 'n'. Note the multiple drivers
support is also compatible with the existing drivers, even if just one driver is
needed, all the cpu will be tied to this driver using an extra small chunk of
processor memory.

The multiple driver support use a per-cpu driver pointer instead of a global
variable and the accessor to this variable are done from a cpu context.

In order to keep the compatibility with the existing drivers, the function
'cpuidle_register_driver' and 'cpuidle_unregister_driver' will register
the specified driver for all the cpus.

The sysfs output for the 'current_driver' is changed when this option is
set by giving the drivers per cpu.

eg.
cpu0: acpi_idle
cpu1: acpi_idle

but if the option is disabled, the output will remain the same.

[1] http://lwn.net/Articles/481055/


Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/cpuidle/Kconfig   |    9 +++
 drivers/cpuidle/cpuidle.c |   24 ++++++--
 drivers/cpuidle/driver.c  |  136 +++++++++++++++++++++++++++++++++++++++++---
 drivers/cpuidle/sysfs.c   |   50 +++++++++++++----
 include/linux/cpuidle.h   |    8 ++-
 5 files changed, 197 insertions(+), 30 deletions(-)

Comments

Francesco Lavra Sept. 29, 2012, 9:41 a.m. | #1
Hi,

On 09/25/2012 12:43 AM, Daniel Lezcano wrote:
...
> diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
> index 5f809e3..2596422 100644
> --- a/drivers/cpuidle/sysfs.c
> +++ b/drivers/cpuidle/sysfs.c
> @@ -43,21 +43,46 @@ out:
>  	return i;
>  }
>  
> +struct cbarg {
> +	char *buf;
> +	ssize_t count;
> +};
> +
> +static int each_driver_cb(int cpu, struct cpuidle_driver *drv, void *data)
> +{
> +	int ret;
> +	struct cbarg *cbarg = data;
> +
> +	if ((drv && (strlen(drv->name) + cbarg->count) >= PAGE_SIZE) ||
> +	    (!drv && (strlen("none") + cbarg->count) >= PAGE_SIZE))
> +		return -EOVERFLOW;
> +
> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> +	ret = sprintf(cbarg->buf + cbarg->count, "cpu%d: %s\n",
> +		      cpu, drv ? drv->name : "none" );
> +#else
> +	ret = sprintf(cbarg->buf, "%s\n", drv ? drv->name : "none");
> +#endif
> +	if (ret < 0)
> +		return ret;
> +
> +	cbarg->count += ret;
> +
> +	return 0;
> +}
> +
>  static ssize_t show_current_driver(struct device *dev,
>  				   struct device_attribute *attr,
>  				   char *buf)
>  {
> -	ssize_t ret;
> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
> +	struct cbarg cbarg = { .buf = buf };

cbarg.count should be initialized to 0.

> +	int ret;
>  
> -	spin_lock(&cpuidle_driver_lock);
> -	if (cpuidle_driver)
> -		ret = sprintf(buf, "%s\n", cpuidle_driver->name);
> -	else
> -		ret = sprintf(buf, "none\n");
> -	spin_unlock(&cpuidle_driver_lock);
> +	ret = cpuidle_for_each_driver(each_driver_cb, &cbarg);
> +	if (ret < 0)
> +		return ret;
>  
> -	return ret;
> +	return cbarg.count;
>  }

--
Francesco
Daniel Lezcano Sept. 29, 2012, 5:44 p.m. | #2
On 09/29/2012 11:41 AM, Francesco Lavra wrote:
> Hi,

Hi Francesco,

thanks for reviewing the patch.

>>  static ssize_t show_current_driver(struct device *dev,
>>  				   struct device_attribute *attr,
>>  				   char *buf)
>>  {
>> -	ssize_t ret;
>> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
>> +	struct cbarg cbarg = { .buf = buf };
> 
> cbarg.count should be initialized to 0.

Actually, with this initialization, all the fields will be initialized
to 0, except 'buf'.

Thanks
  -- Daniel

>> +	int ret;
>>  
>> -	spin_lock(&cpuidle_driver_lock);
>> -	if (cpuidle_driver)
>> -		ret = sprintf(buf, "%s\n", cpuidle_driver->name);
>> -	else
>> -		ret = sprintf(buf, "none\n");
>> -	spin_unlock(&cpuidle_driver_lock);
>> +	ret = cpuidle_for_each_driver(each_driver_cb, &cbarg);
>> +	if (ret < 0)
>> +		return ret;
>>  
>> -	return ret;
>> +	return cbarg.count;
>>  }
> 
> --
> Francesco
Rafael J. Wysocki Sept. 29, 2012, 10:07 p.m. | #3
On Saturday, September 29, 2012, Daniel Lezcano wrote:
> On 09/29/2012 11:41 AM, Francesco Lavra wrote:
> > Hi,
> 
> Hi Francesco,
> 
> thanks for reviewing the patch.
> 
> >>  static ssize_t show_current_driver(struct device *dev,
> >>  				   struct device_attribute *attr,
> >>  				   char *buf)
> >>  {
> >> -	ssize_t ret;
> >> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
> >> +	struct cbarg cbarg = { .buf = buf };
> > 
> > cbarg.count should be initialized to 0.
> 
> Actually, with this initialization, all the fields will be initialized
> to 0, except 'buf'.

However, it would be good to initialize count explicitly so as to show
that we care about the initial value of it.

Thanks,
Rafael
Daniel Lezcano Sept. 30, 2012, 4:34 p.m. | #4
On 09/30/2012 12:07 AM, Rafael J. Wysocki wrote:
> On Saturday, September 29, 2012, Daniel Lezcano wrote:
>> On 09/29/2012 11:41 AM, Francesco Lavra wrote:
>>> Hi,
>> Hi Francesco,
>>
>> thanks for reviewing the patch.
>>
>>>>  static ssize_t show_current_driver(struct device *dev,
>>>>  				   struct device_attribute *attr,
>>>>  				   char *buf)
>>>>  {
>>>> -	ssize_t ret;
>>>> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
>>>> +	struct cbarg cbarg = { .buf = buf };
>>> cbarg.count should be initialized to 0.
>> Actually, with this initialization, all the fields will be initialized
>> to 0, except 'buf'.
> However, it would be good to initialize count explicitly so as to show
> that we care about the initial value of it.

Ok, I will change that in a V2.

Rafael, you're right, this approach makes more sense IMO.

Thanks
-- Daniel
Rafael J. Wysocki Oct. 2, 2012, 2:08 a.m. | #5
On Sunday 30 of September 2012 18:34:31 Daniel Lezcano wrote:
> On 09/30/2012 12:07 AM, Rafael J. Wysocki wrote:
> > On Saturday, September 29, 2012, Daniel Lezcano wrote:
> >> On 09/29/2012 11:41 AM, Francesco Lavra wrote:
> >>> Hi,
> >> Hi Francesco,
> >>
> >> thanks for reviewing the patch.
> >>
> >>>>  static ssize_t show_current_driver(struct device *dev,
> >>>>  				   struct device_attribute *attr,
> >>>>  				   char *buf)
> >>>>  {
> >>>> -	ssize_t ret;
> >>>> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
> >>>> +	struct cbarg cbarg = { .buf = buf };
> >>> cbarg.count should be initialized to 0.
> >> Actually, with this initialization, all the fields will be initialized
> >> to 0, except 'buf'.
> > However, it would be good to initialize count explicitly so as to show
> > that we care about the initial value of it.
> 
> Ok, I will change that in a V2.
> 
> Rafael, you're right, this approach makes more sense IMO.

Well, I'm glad that you agree. :-)

I'm sorry I haven't posted any comments about the latest series,
I'll do my best to review it later this week.

Thanks,
Rafael
Daniel Lezcano Oct. 2, 2012, 12:27 p.m. | #6
On 10/02/2012 04:08 AM, Rafael J. Wysocki wrote:
> On Sunday 30 of September 2012 18:34:31 Daniel Lezcano wrote:
>> On 09/30/2012 12:07 AM, Rafael J. Wysocki wrote:
>>> On Saturday, September 29, 2012, Daniel Lezcano wrote:
>>>> On 09/29/2012 11:41 AM, Francesco Lavra wrote:
>>>>> Hi,
>>>> Hi Francesco,
>>>>
>>>> thanks for reviewing the patch.
>>>>
>>>>>>  static ssize_t show_current_driver(struct device *dev,
>>>>>>  				   struct device_attribute *attr,
>>>>>>  				   char *buf)
>>>>>>  {
>>>>>> -	ssize_t ret;
>>>>>> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
>>>>>> +	struct cbarg cbarg = { .buf = buf };
>>>>> cbarg.count should be initialized to 0.
>>>> Actually, with this initialization, all the fields will be initialized
>>>> to 0, except 'buf'.
>>> However, it would be good to initialize count explicitly so as to show
>>> that we care about the initial value of it.
>>
>> Ok, I will change that in a V2.
>>
>> Rafael, you're right, this approach makes more sense IMO.
> 
> Well, I'm glad that you agree. :-)
> 
> I'm sorry I haven't posted any comments about the latest series,
> I'll do my best to review it later this week.

Ok, cool. Thanks!

I guess it is too late to have this material for v3.7, right ?

  -- Daniel
Rafael J. Wysocki Oct. 2, 2012, 2:02 p.m. | #7
On Tuesday 02 of October 2012 14:27:17 Daniel Lezcano wrote:
> On 10/02/2012 04:08 AM, Rafael J. Wysocki wrote:
> > On Sunday 30 of September 2012 18:34:31 Daniel Lezcano wrote:
> >> On 09/30/2012 12:07 AM, Rafael J. Wysocki wrote:
> >>> On Saturday, September 29, 2012, Daniel Lezcano wrote:
> >>>> On 09/29/2012 11:41 AM, Francesco Lavra wrote:
> >>>>> Hi,
> >>>> Hi Francesco,
> >>>>
> >>>> thanks for reviewing the patch.
> >>>>
> >>>>>>  static ssize_t show_current_driver(struct device *dev,
> >>>>>>  				   struct device_attribute *attr,
> >>>>>>  				   char *buf)
> >>>>>>  {
> >>>>>> -	ssize_t ret;
> >>>>>> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
> >>>>>> +	struct cbarg cbarg = { .buf = buf };
> >>>>> cbarg.count should be initialized to 0.
> >>>> Actually, with this initialization, all the fields will be initialized
> >>>> to 0, except 'buf'.
> >>> However, it would be good to initialize count explicitly so as to show
> >>> that we care about the initial value of it.
> >>
> >> Ok, I will change that in a V2.
> >>
> >> Rafael, you're right, this approach makes more sense IMO.
> > 
> > Well, I'm glad that you agree. :-)
> > 
> > I'm sorry I haven't posted any comments about the latest series,
> > I'll do my best to review it later this week.
> 
> Ok, cool. Thanks!
> 
> I guess it is too late to have this material for v3.7, right ?

Yes, it is too late.

Thanks,
Rafael


---
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
Peter De Schrijver Oct. 4, 2012, 2:14 p.m. | #8
On Tue, Sep 25, 2012 at 12:43:54AM +0200, Daniel Lezcano wrote:
> With the tegra3 and the big.LITTLE [1] new architectures, several cpus
> with different characteristics (latencies and states) can co-exists on the
> system.
> 
> The cpuidle framework has the limitation of handling only identical cpus.
> 
> This patch removes this limitation by introducing the multiple driver support
> for cpuidle.
> 
> This option is configurable at compile time and should be enabled for the
> architectures mentioned above. So there is no impact for the other platforms
> if the option is disabled. The option defaults to 'n'. Note the multiple drivers
> support is also compatible with the existing drivers, even if just one driver is
> needed, all the cpu will be tied to this driver using an extra small chunk of
> processor memory.
> 
> The multiple driver support use a per-cpu driver pointer instead of a global
> variable and the accessor to this variable are done from a cpu context.
> 
> In order to keep the compatibility with the existing drivers, the function
> 'cpuidle_register_driver' and 'cpuidle_unregister_driver' will register
> the specified driver for all the cpus.
> 
> The sysfs output for the 'current_driver' is changed when this option is
> set by giving the drivers per cpu.
> 
> eg.
> cpu0: acpi_idle
> cpu1: acpi_idle
> 

Is this allowed? I thought sysfs files can output only 1 line of data?

Cheers,

Peter.
Daniel Lezcano Oct. 4, 2012, 2:26 p.m. | #9
On 10/04/2012 04:14 PM, Peter De Schrijver wrote:
> On Tue, Sep 25, 2012 at 12:43:54AM +0200, Daniel Lezcano wrote:
>> With the tegra3 and the big.LITTLE [1] new architectures, several cpus
>> with different characteristics (latencies and states) can co-exists on the
>> system.
>>
>> The cpuidle framework has the limitation of handling only identical cpus.
>>
>> This patch removes this limitation by introducing the multiple driver support
>> for cpuidle.
>>
>> This option is configurable at compile time and should be enabled for the
>> architectures mentioned above. So there is no impact for the other platforms
>> if the option is disabled. The option defaults to 'n'. Note the multiple drivers
>> support is also compatible with the existing drivers, even if just one driver is
>> needed, all the cpu will be tied to this driver using an extra small chunk of
>> processor memory.
>>
>> The multiple driver support use a per-cpu driver pointer instead of a global
>> variable and the accessor to this variable are done from a cpu context.
>>
>> In order to keep the compatibility with the existing drivers, the function
>> 'cpuidle_register_driver' and 'cpuidle_unregister_driver' will register
>> the specified driver for all the cpus.
>>
>> The sysfs output for the 'current_driver' is changed when this option is
>> set by giving the drivers per cpu.
>>
>> eg.
>> cpu0: acpi_idle
>> cpu1: acpi_idle
>>
> 
> Is this allowed? I thought sysfs files can output only 1 line of data?

Ah yeah :)

Does it make sense to put the following files:

/sys/devices/system/cpu/cpu0/cpuidle/driver
 => acpi_idle
/sys/devices/system/cpu/cpu1/cpuidle/driver
 => acpi_idle

and the content of the file:

/sys/devices/system/cpu/cpuidle/current_driver will show the driver
associated with current cpu ?
Peter De Schrijver Oct. 4, 2012, 2:34 p.m. | #10
> >>
> >> eg.
> >> cpu0: acpi_idle
> >> cpu1: acpi_idle
> >>
> > 
> > Is this allowed? I thought sysfs files can output only 1 line of data?
> 
> Ah yeah :)
> 
> Does it make sense to put the following files:
> 
> /sys/devices/system/cpu/cpu0/cpuidle/driver
>  => acpi_idle
> /sys/devices/system/cpu/cpu1/cpuidle/driver
>  => acpi_idle
> 

Sounds like a reasonable idea.

> and the content of the file:
> 
> /sys/devices/system/cpu/cpuidle/current_driver will show the driver
> associated with current cpu ?
> 

I think that's ok.

Cheers,

Peter.
Rafael J. Wysocki Oct. 7, 2012, 9:09 p.m. | #11
On Thursday 04 of October 2012 17:34:42 Peter De Schrijver wrote:
> > >>
> > >> eg.
> > >> cpu0: acpi_idle
> > >> cpu1: acpi_idle
> > >>
> > > 
> > > Is this allowed? I thought sysfs files can output only 1 line of data?
> > 
> > Ah yeah :)
> > 
> > Does it make sense to put the following files:
> > 
> > /sys/devices/system/cpu/cpu0/cpuidle/driver
> >  => acpi_idle
> > /sys/devices/system/cpu/cpu1/cpuidle/driver
> >  => acpi_idle
> > 
> 
> Sounds like a reasonable idea.
> 
> > and the content of the file:
> > 
> > /sys/devices/system/cpu/cpuidle/current_driver will show the driver
> > associated with current cpu ?
> > 
> 
> I think that's ok.

Yes, that sounds good.

Thanks,
Rafael
Rafael J. Wysocki Oct. 7, 2012, 9:26 p.m. | #12
On Tuesday 25 of September 2012 00:43:54 Daniel Lezcano wrote:
> With the tegra3 and the big.LITTLE [1] new architectures, several cpus
> with different characteristics (latencies and states) can co-exists on the
> system.
> 
> The cpuidle framework has the limitation of handling only identical cpus.
> 
> This patch removes this limitation by introducing the multiple driver support
> for cpuidle.
> 
> This option is configurable at compile time and should be enabled for the
> architectures mentioned above. So there is no impact for the other platforms
> if the option is disabled. The option defaults to 'n'. Note the multiple drivers
> support is also compatible with the existing drivers, even if just one driver is
> needed, all the cpu will be tied to this driver using an extra small chunk of
> processor memory.
> 
> The multiple driver support use a per-cpu driver pointer instead of a global
> variable and the accessor to this variable are done from a cpu context.
> 
> In order to keep the compatibility with the existing drivers, the function
> 'cpuidle_register_driver' and 'cpuidle_unregister_driver' will register
> the specified driver for all the cpus.
> 
> The sysfs output for the 'current_driver' is changed when this option is
> set by giving the drivers per cpu.
> 
> eg.
> cpu0: acpi_idle
> cpu1: acpi_idle
> 
> but if the option is disabled, the output will remain the same.
> 
> [1] http://lwn.net/Articles/481055/
> 
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>  drivers/cpuidle/Kconfig   |    9 +++
>  drivers/cpuidle/cpuidle.c |   24 ++++++--
>  drivers/cpuidle/driver.c  |  136 +++++++++++++++++++++++++++++++++++++++++---
>  drivers/cpuidle/sysfs.c   |   50 +++++++++++++----
>  include/linux/cpuidle.h   |    8 ++-
>  5 files changed, 197 insertions(+), 30 deletions(-)
> 
> diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
> index a76b689..234ae65 100644
> --- a/drivers/cpuidle/Kconfig
> +++ b/drivers/cpuidle/Kconfig
> @@ -9,6 +9,15 @@ config CPU_IDLE
>  
>  	  If you're using an ACPI-enabled platform, you should say Y here.
>  
> +config CPU_IDLE_MULTIPLE_DRIVERS
> +        bool "Support multiple cpuidle drivers"
> +        depends on CPU_IDLE
> +        default n
> +        help
> +         Allows the cpuidle framework to use different drivers for each CPU.
> +         This is useful if you have a system with different CPU latencies and
> +         states. If unsure say N.
> +
>  config CPU_IDLE_GOV_LADDER
>  	bool
>  	depends on CPU_IDLE
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index e28f6ea..c581b99 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -68,7 +68,7 @@ static cpuidle_enter_t cpuidle_enter_ops;
>  int cpuidle_play_dead(void)
>  {
>  	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
> -	struct cpuidle_driver *drv = cpuidle_get_driver();
> +	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev->cpu);

It would be better to change that to be called as cpuidle_get_cpu_driver(dev),
since dev is a cpuidle_device already.

>  	int i, dead_state = -1;
>  	int power_usage = -1;
>  
> @@ -128,7 +128,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
>  int cpuidle_idle_call(void)
>  {
>  	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
> -	struct cpuidle_driver *drv = cpuidle_get_driver();
> +	struct cpuidle_driver *drv;
>  	int next_state, entered_state;
>  
>  	if (off)
> @@ -141,6 +141,8 @@ int cpuidle_idle_call(void)
>  	if (!dev || !dev->enabled)
>  		return -EBUSY;
>  
> +	drv = cpuidle_get_cpu_driver(dev->cpu);
> +
>  	/* ask the governor for the next state */
>  	next_state = cpuidle_curr_governor->select(drv, dev);
>  	if (need_resched()) {
> @@ -308,15 +310,19 @@ static void poll_idle_init(struct cpuidle_driver *drv) {}
>  int cpuidle_enable_device(struct cpuidle_device *dev)
>  {
>  	int ret, i;
> -	struct cpuidle_driver *drv = cpuidle_get_driver();
> +	struct cpuidle_driver *drv;
>  
>  	if (!dev)
>  		return -EINVAL;
>  
>  	if (dev->enabled)
>  		return 0;
> +
> +	drv = cpuidle_get_cpu_driver(dev->cpu);
> +
>  	if (!drv || !cpuidle_curr_governor)
>  		return -EIO;
> +
>  	if (!dev->state_count)
>  		dev->state_count = drv->state_count;
>  
> @@ -368,15 +374,18 @@ EXPORT_SYMBOL_GPL(cpuidle_enable_device);
>   */
>  void cpuidle_disable_device(struct cpuidle_device *dev)
>  {
> +	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev->cpu);
> +
>  	if (!dev->enabled)
>  		return;
> -	if (!cpuidle_get_driver() || !cpuidle_curr_governor)
> +
> +	if (!drv || !cpuidle_curr_governor)
>  		return;
>  
>  	dev->enabled = 0;
>  
>  	if (cpuidle_curr_governor->disable)
> -		cpuidle_curr_governor->disable(cpuidle_get_driver(), dev);
> +		cpuidle_curr_governor->disable(drv, dev);
>  
>  	cpuidle_remove_state_sysfs(dev);
>  	enabled_devices--;
> @@ -395,7 +404,8 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
>  {
>  	int ret;
>  	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
> +	struct cpuidle_driver *cpuidle_driver =
> +		cpuidle_get_cpu_driver(dev->cpu);
>  
>  	if (!try_module_get(cpuidle_driver->owner))
>  		return -EINVAL;
> @@ -461,7 +471,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device);
>  void cpuidle_unregister_device(struct cpuidle_device *dev)
>  {
>  	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
> +	struct cpuidle_driver *cpuidle_driver = cpuidle_get_cpu_driver(dev->cpu);
>  
>  	if (dev->registered == 0)
>  		return;
> diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
> index 391f80f..6529b91 100644
> --- a/drivers/cpuidle/driver.c
> +++ b/drivers/cpuidle/driver.c
> @@ -14,7 +14,11 @@
>  
>  #include "cpuidle.h"
>  
> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> +DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers);
> +#else
>  static struct cpuidle_driver *cpuidle_curr_driver;
> +#endif
>  DEFINE_SPINLOCK(cpuidle_driver_lock);
>  
>  static void set_power_states(struct cpuidle_driver *drv)
> @@ -47,12 +51,25 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv)
>  		set_power_states(drv);
>  }
>  
> -static void cpuidle_set_driver(struct cpuidle_driver *drv)
> +static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
>  {
> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> +	per_cpu(cpuidle_drivers, cpu) = drv;
> +#else
>  	cpuidle_curr_driver = drv;
> +#endif

I'm not a big fan of #ifdef blocks inside of functions.  In my opinion it's
better to put entire functions under #ifdef blocks.  So, for example, in this
particular case I would do"

#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
{
	per_cpu(cpuidle_drivers, cpu) = drv;
}
#else
static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
{
	cpuidle_curr_driver = drv;
}
#endif

>  }
>  
> -static int __cpuidle_register_driver(struct cpuidle_driver *drv)
> +static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
> +{
> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> +	return per_cpu(cpuidle_drivers, cpu);
> +#else
> +	return cpuidle_curr_driver;
> +#endif
> +}

And here analogously.

> +
> +static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu)
>  {
>  	if (!drv || !drv->state_count)
>  		return -EINVAL;
> @@ -60,26 +77,102 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv)
>  	if (cpuidle_disabled())
>  		return -ENODEV;
>  
> -	if (cpuidle_get_driver())
> +	if (__cpuidle_get_cpu_driver(cpu))
>  		return -EBUSY;
>  
>  	__cpuidle_driver_init(drv);
>  
> -	cpuidle_set_driver(drv);
> +	__cpuidle_set_cpu_driver(drv, cpu);
>  
>  	return 0;
>  }
>  
> -static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
> +static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu)
>  {
> -	if (drv != cpuidle_get_driver()) {
> +	if (drv != __cpuidle_get_cpu_driver(cpu)) {
>  		WARN(1, "invalid cpuidle_unregister_driver(%s)\n",
>  			drv->name);
>  		return;
>  	}
>  
>  	if (!WARN_ON(drv->refcnt > 0))
> -		cpuidle_set_driver(NULL);
> +		__cpuidle_set_cpu_driver(NULL, cpu);
> +}
> +
> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> +static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv)
> +{
> +	int cpu;
> +	for_each_present_cpu(cpu)
> +		__cpuidle_unregister_driver(drv, cpu);
> +}
> +
> +static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv)
> +{
> +	int ret = 0;
> +	int i, cpu;
> +
> +	for_each_present_cpu(cpu) {
> +		ret = __cpuidle_register_driver(drv, cpu);
> +		if (!ret)
> +			continue;
> +		for (i = cpu - 1; i >= 0; i--)

I wonder if this is going to work in all cases.  For example, is there any
guarantee that the CPU numbers start from 0 and that there are no gaps?

> +			__cpuidle_unregister_driver(drv, i);
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +static int __cpuidle_for_each_driver(int (*cb)(int, struct cpuidle_driver *,
> +					       void *), void *data)
> +{
> +	struct cpuidle_driver *drv;
> +	int cpu, ret = 0;
> +
> +	for_each_present_cpu(cpu) {
> +		drv = __cpuidle_get_cpu_driver(cpu);
> +		ret = cb(cpu, drv, data);
> +		if (ret < 0)
> +			break;
> +	}
> +	return ret;
> +}
> +
> +int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu)
> +{
> +	int ret;
> +
> +	spin_lock(&cpuidle_driver_lock);
> +	ret = __cpuidle_register_driver(drv, cpu);
> +	spin_unlock(&cpuidle_driver_lock);
> +
> +	return ret;
> +}
> +
> +void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu)
> +{
> +	spin_lock(&cpuidle_driver_lock);
> +	__cpuidle_unregister_driver(drv, cpu);
> +	spin_unlock(&cpuidle_driver_lock);
> +}
> +#endif
> +
> +int cpuidle_for_each_driver(int (*cb)(int, struct cpuidle_driver *, void *),
> +			    void *data)
> +{
> +	int ret;
> +
> +	spin_lock(&cpuidle_driver_lock);
> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> +	ret = __cpuidle_for_each_driver(cb, data);
> +#else
> +	ret = cb(smp_processor_id(),
> +		 __cpuidle_get_cpu_driver(smp_processor_id()), data);
> +#endif

Here I'd make the definition of __cpuidle_for_each_driver() depend on
whether or not CONFIG_CPU_IDLE_MULTIPLE_DRIVERS is set.

> +	spin_unlock(&cpuidle_driver_lock);
> +
> +	return ret;
>  }
>  
>  /**
> @@ -91,7 +184,11 @@ int cpuidle_register_driver(struct cpuidle_driver *drv)
>  	int ret;
>  
>  	spin_lock(&cpuidle_driver_lock);
> -	ret = __cpuidle_register_driver(drv);
> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> +	ret = __cpuidle_register_all_cpu_driver(drv);
> +#else
> +	ret = __cpuidle_register_driver(drv, smp_processor_id());
> +#endif

And analogously here.

>  	spin_unlock(&cpuidle_driver_lock);
>  
>  	return ret;
> @@ -103,18 +200,37 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver);
>   */
>  struct cpuidle_driver *cpuidle_get_driver(void)
>  {
> -	return cpuidle_curr_driver;
> +	return __cpuidle_get_cpu_driver(smp_processor_id());
>  }
>  EXPORT_SYMBOL_GPL(cpuidle_get_driver);
>  
>  /**
> + * cpuidle_get_cpu_driver - return the driver tied with a cpu
> + */
> +struct cpuidle_driver *cpuidle_get_cpu_driver(int cpu)
> +{
> +	struct cpuidle_driver *drv;
> +
> +	spin_lock(&cpuidle_driver_lock);
> +	drv = __cpuidle_get_cpu_driver(cpu);
> +	spin_unlock(&cpuidle_driver_lock);
> +
> +	return drv;
> +}
> +EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver);
> +
> +/**
>   * cpuidle_unregister_driver - unregisters a driver
>   * @drv: the driver
>   */
>  void cpuidle_unregister_driver(struct cpuidle_driver *drv)
>  {
>  	spin_lock(&cpuidle_driver_lock);
> -	__cpuidle_unregister_driver(drv);
> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> +	__cpuidle_unregister_all_cpu_driver(drv);
> +#else
> +	__cpuidle_unregister_driver(drv, smp_processor_id());
> +#endif

I'm slightly cautious about using smp_processor_id() above.
get_cpu()/put_cpu() is the preferred way of doing this nowadays (although
this particular code is under the spinlock, so it should be OK).

>  	spin_unlock(&cpuidle_driver_lock);
>  }
>  EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
> diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
> index 5f809e3..2596422 100644
> --- a/drivers/cpuidle/sysfs.c
> +++ b/drivers/cpuidle/sysfs.c
> @@ -43,21 +43,46 @@ out:
>  	return i;
>  }
>  
> +struct cbarg {
> +	char *buf;
> +	ssize_t count;
> +};
> +
> +static int each_driver_cb(int cpu, struct cpuidle_driver *drv, void *data)
> +{
> +	int ret;
> +	struct cbarg *cbarg = data;
> +
> +	if ((drv && (strlen(drv->name) + cbarg->count) >= PAGE_SIZE) ||
> +	    (!drv && (strlen("none") + cbarg->count) >= PAGE_SIZE))
> +		return -EOVERFLOW;
> +
> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> +	ret = sprintf(cbarg->buf + cbarg->count, "cpu%d: %s\n",
> +		      cpu, drv ? drv->name : "none" );
> +#else
> +	ret = sprintf(cbarg->buf, "%s\n", drv ? drv->name : "none");
> +#endif
> +	if (ret < 0)
> +		return ret;
> +
> +	cbarg->count += ret;
> +
> +	return 0;
> +}
> +
>  static ssize_t show_current_driver(struct device *dev,
>  				   struct device_attribute *attr,
>  				   char *buf)
>  {
> -	ssize_t ret;
> -	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
> +	struct cbarg cbarg = { .buf = buf };
> +	int ret;
>  
> -	spin_lock(&cpuidle_driver_lock);
> -	if (cpuidle_driver)
> -		ret = sprintf(buf, "%s\n", cpuidle_driver->name);
> -	else
> -		ret = sprintf(buf, "none\n");
> -	spin_unlock(&cpuidle_driver_lock);
> +	ret = cpuidle_for_each_driver(each_driver_cb, &cbarg);
> +	if (ret < 0)
> +		return ret;
>  
> -	return ret;
> +	return cbarg.count;
>  }
>  
>  static ssize_t show_current_governor(struct device *dev,
> @@ -363,10 +388,10 @@ int cpuidle_add_state_sysfs(struct cpuidle_device *device)
>  {
>  	int i, ret = -ENOMEM;
>  	struct cpuidle_state_kobj *kobj;
> -	struct cpuidle_driver *drv = cpuidle_get_driver();
> +	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device->cpu);
>  
>  	/* state statistics */
> -	for (i = 0; i < device->state_count; i++) {
> +	for (i = 0; i < drv->state_count; i++) {
>  		kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
>  		if (!kobj)
>  			goto error_state;
> @@ -374,7 +399,8 @@ int cpuidle_add_state_sysfs(struct cpuidle_device *device)
>  		kobj->state_usage = &device->states_usage[i];
>  		init_completion(&kobj->kobj_unregister);
>  
> -		ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, &device->kobj,
> +		ret = kobject_init_and_add(&kobj->kobj,
> +					   &ktype_state_cpuidle, &device->kobj,
>  					   "state%d", i);
>  		if (ret) {
>  			kfree(kobj);
> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> index a4ff9f8..0e0b0ad 100644
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -164,6 +164,13 @@ extern int cpuidle_wrap_enter(struct cpuidle_device *dev,
>  					struct cpuidle_driver *drv, int index));
>  extern int cpuidle_play_dead(void);
>  
> +extern int cpuidle_for_each_driver(
> +	int (*cb)(int, struct cpuidle_driver *, void *), void *data);
> +
> +extern struct cpuidle_driver *cpuidle_get_cpu_driver(int cpu);
> +extern int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu);
> +extern void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu);
> +
>  #else
>  static inline void disable_cpuidle(void) { }
>  static inline int cpuidle_idle_call(void) { return -ENODEV; }
> @@ -190,7 +197,6 @@ static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>  					struct cpuidle_driver *drv, int index))
>  { return -ENODEV; }
>  static inline int cpuidle_play_dead(void) {return -ENODEV; }
> -
>  #endif
>  
>  #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
> 

Thanks,
Rafael
Daniel Lezcano Oct. 11, 2012, 10:04 a.m. | #13
On 10/07/2012 11:26 PM, Rafael J. Wysocki wrote:
> On Tuesday 25 of September 2012 00:43:54 Daniel Lezcano wrote:
>> With the tegra3 and the big.LITTLE [1] new architectures, several cpus
>> with different characteristics (latencies and states) can co-exists on the
>> system.
>>
>> The cpuidle framework has the limitation of handling only identical cpus.
>>
>> This patch removes this limitation by introducing the multiple driver support
>> for cpuidle.
>>
>> This option is configurable at compile time and should be enabled for the
>> architectures mentioned above. So there is no impact for the other platforms
>> if the option is disabled. The option defaults to 'n'. Note the multiple drivers
>> support is also compatible with the existing drivers, even if just one driver is
>> needed, all the cpu will be tied to this driver using an extra small chunk of
>> processor memory.
>>
>> The multiple driver support use a per-cpu driver pointer instead of a global
>> variable and the accessor to this variable are done from a cpu context.

Thanks Rafael for the review.

I took into account all your remarks for the V2.

[ cut ]

>> +static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv)
>> +{
>> +	int ret = 0;
>> +	int i, cpu;
>> +
>> +	for_each_present_cpu(cpu) {
>> +		ret = __cpuidle_register_driver(drv, cpu);
>> +		if (!ret)
>> +			continue;
>> +		for (i = cpu - 1; i >= 0; i--)
> I wonder if this is going to work in all cases.  For example, is there any
> guarantee that the CPU numbers start from 0 and that there are no gaps?

AFAICS, the cpumask.h is not assuming the cpu numbers start from zero
and they are contiguous.

I will fix this reverse loop, thanks for spotting this.

[ cut ]

>>  void cpuidle_unregister_driver(struct cpuidle_driver *drv)
>>  {
>>  	spin_lock(&cpuidle_driver_lock);
>> -	__cpuidle_unregister_driver(drv);
>> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
>> +	__cpuidle_unregister_all_cpu_driver(drv);
>> +#else
>> +	__cpuidle_unregister_driver(drv, smp_processor_id());
>> +#endif
> I'm slightly cautious about using smp_processor_id() above.
> get_cpu()/put_cpu() is the preferred way of doing this nowadays (although
> this particular code is under the spinlock, so it should be OK).
yes, get_cpu does preempt_disable() and smp_processor_id()
As spin_lock does also preempt_disable() that should be ok.
But I changed the code to use the preferred way.

Thanks
-- Daniel
Rafael J. Wysocki Oct. 11, 2012, 5:21 p.m. | #14
On Thursday 11 of October 2012 12:04:37 Daniel Lezcano wrote:
> On 10/07/2012 11:26 PM, Rafael J. Wysocki wrote:
> > On Tuesday 25 of September 2012 00:43:54 Daniel Lezcano wrote:
> >> With the tegra3 and the big.LITTLE [1] new architectures, several cpus
> >> with different characteristics (latencies and states) can co-exists on the
> >> system.
> >>
> >> The cpuidle framework has the limitation of handling only identical cpus.
> >>
> >> This patch removes this limitation by introducing the multiple driver support
> >> for cpuidle.
> >>
> >> This option is configurable at compile time and should be enabled for the
> >> architectures mentioned above. So there is no impact for the other platforms
> >> if the option is disabled. The option defaults to 'n'. Note the multiple drivers
> >> support is also compatible with the existing drivers, even if just one driver is
> >> needed, all the cpu will be tied to this driver using an extra small chunk of
> >> processor memory.
> >>
> >> The multiple driver support use a per-cpu driver pointer instead of a global
> >> variable and the accessor to this variable are done from a cpu context.
> 
> Thanks Rafael for the review.
> 
> I took into account all your remarks for the V2.
> 
> [ cut ]
> 
> >> +static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv)
> >> +{
> >> +	int ret = 0;
> >> +	int i, cpu;
> >> +
> >> +	for_each_present_cpu(cpu) {
> >> +		ret = __cpuidle_register_driver(drv, cpu);
> >> +		if (!ret)
> >> +			continue;
> >> +		for (i = cpu - 1; i >= 0; i--)
> > I wonder if this is going to work in all cases.  For example, is there any
> > guarantee that the CPU numbers start from 0 and that there are no gaps?
> 
> AFAICS, the cpumask.h is not assuming the cpu numbers start from zero
> and they are contiguous.
> 
> I will fix this reverse loop, thanks for spotting this.
> 
> [ cut ]
> 
> >>  void cpuidle_unregister_driver(struct cpuidle_driver *drv)
> >>  {
> >>  	spin_lock(&cpuidle_driver_lock);
> >> -	__cpuidle_unregister_driver(drv);
> >> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
> >> +	__cpuidle_unregister_all_cpu_driver(drv);
> >> +#else
> >> +	__cpuidle_unregister_driver(drv, smp_processor_id());
> >> +#endif
> > I'm slightly cautious about using smp_processor_id() above.
> > get_cpu()/put_cpu() is the preferred way of doing this nowadays (although
> > this particular code is under the spinlock, so it should be OK).
> yes, get_cpu does preempt_disable() and smp_processor_id()
> As spin_lock does also preempt_disable() that should be ok.
> But I changed the code to use the preferred way.

Cool, thanks!

I've seen your new version and I'm going to look deeper into it in the next
few days, but I will be travelling from tomorrow through the end of the next
week, so I may be even slower to respond than usually. Sorry about that.

Thanks,
Rafael
Daniel Lezcano Oct. 11, 2012, 5:39 p.m. | #15
On 10/11/2012 07:21 PM, Rafael J. Wysocki wrote:
> On Thursday 11 of October 2012 12:04:37 Daniel Lezcano wrote:
>> On 10/07/2012 11:26 PM, Rafael J. Wysocki wrote:
>>> On Tuesday 25 of September 2012 00:43:54 Daniel Lezcano wrote:
>>>> With the tegra3 and the big.LITTLE [1] new architectures, several cpus
>>>> with different characteristics (latencies and states) can co-exists on the
>>>> system.
>>>>
>>>> The cpuidle framework has the limitation of handling only identical cpus.
>>>>
>>>> This patch removes this limitation by introducing the multiple driver support
>>>> for cpuidle.
>>>>
>>>> This option is configurable at compile time and should be enabled for the
>>>> architectures mentioned above. So there is no impact for the other platforms
>>>> if the option is disabled. The option defaults to 'n'. Note the multiple drivers
>>>> support is also compatible with the existing drivers, even if just one driver is
>>>> needed, all the cpu will be tied to this driver using an extra small chunk of
>>>> processor memory.
>>>>
>>>> The multiple driver support use a per-cpu driver pointer instead of a global
>>>> variable and the accessor to this variable are done from a cpu context.
>> Thanks Rafael for the review.
>>
>> I took into account all your remarks for the V2.
>>
>> [ cut ]
>>
>>>> +static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv)
>>>> +{
>>>> +	int ret = 0;
>>>> +	int i, cpu;
>>>> +
>>>> +	for_each_present_cpu(cpu) {
>>>> +		ret = __cpuidle_register_driver(drv, cpu);
>>>> +		if (!ret)
>>>> +			continue;
>>>> +		for (i = cpu - 1; i >= 0; i--)
>>> I wonder if this is going to work in all cases.  For example, is there any
>>> guarantee that the CPU numbers start from 0 and that there are no gaps?
>> AFAICS, the cpumask.h is not assuming the cpu numbers start from zero
>> and they are contiguous.
>>
>> I will fix this reverse loop, thanks for spotting this.
>>
>> [ cut ]
>>
>>>>  void cpuidle_unregister_driver(struct cpuidle_driver *drv)
>>>>  {
>>>>  	spin_lock(&cpuidle_driver_lock);
>>>> -	__cpuidle_unregister_driver(drv);
>>>> +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
>>>> +	__cpuidle_unregister_all_cpu_driver(drv);
>>>> +#else
>>>> +	__cpuidle_unregister_driver(drv, smp_processor_id());
>>>> +#endif
>>> I'm slightly cautious about using smp_processor_id() above.
>>> get_cpu()/put_cpu() is the preferred way of doing this nowadays (although
>>> this particular code is under the spinlock, so it should be OK).
>> yes, get_cpu does preempt_disable() and smp_processor_id()
>> As spin_lock does also preempt_disable() that should be ok.
>> But I changed the code to use the preferred way.
> Cool, thanks!
>
> I've seen your new version 

Actually, the patchset is just sysfs cleanup for the multiple drivers
support.
The patches are trivial IMO. If you think they are ok and they are
merged, I will send the multiple drivers patchset V2 on top of them.

> and I'm going to look deeper into it in the next
> few days, but I will be travelling from tomorrow through the end of the next
> week, so I may be even slower to respond than usually. Sorry about that.

No problem, I understand.

Thanks
-- Daniel

Patch

diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index a76b689..234ae65 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -9,6 +9,15 @@  config CPU_IDLE
 
 	  If you're using an ACPI-enabled platform, you should say Y here.
 
+config CPU_IDLE_MULTIPLE_DRIVERS
+        bool "Support multiple cpuidle drivers"
+        depends on CPU_IDLE
+        default n
+        help
+         Allows the cpuidle framework to use different drivers for each CPU.
+         This is useful if you have a system with different CPU latencies and
+         states. If unsure say N.
+
 config CPU_IDLE_GOV_LADDER
 	bool
 	depends on CPU_IDLE
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index e28f6ea..c581b99 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -68,7 +68,7 @@  static cpuidle_enter_t cpuidle_enter_ops;
 int cpuidle_play_dead(void)
 {
 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
-	struct cpuidle_driver *drv = cpuidle_get_driver();
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev->cpu);
 	int i, dead_state = -1;
 	int power_usage = -1;
 
@@ -128,7 +128,7 @@  int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 int cpuidle_idle_call(void)
 {
 	struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
-	struct cpuidle_driver *drv = cpuidle_get_driver();
+	struct cpuidle_driver *drv;
 	int next_state, entered_state;
 
 	if (off)
@@ -141,6 +141,8 @@  int cpuidle_idle_call(void)
 	if (!dev || !dev->enabled)
 		return -EBUSY;
 
+	drv = cpuidle_get_cpu_driver(dev->cpu);
+
 	/* ask the governor for the next state */
 	next_state = cpuidle_curr_governor->select(drv, dev);
 	if (need_resched()) {
@@ -308,15 +310,19 @@  static void poll_idle_init(struct cpuidle_driver *drv) {}
 int cpuidle_enable_device(struct cpuidle_device *dev)
 {
 	int ret, i;
-	struct cpuidle_driver *drv = cpuidle_get_driver();
+	struct cpuidle_driver *drv;
 
 	if (!dev)
 		return -EINVAL;
 
 	if (dev->enabled)
 		return 0;
+
+	drv = cpuidle_get_cpu_driver(dev->cpu);
+
 	if (!drv || !cpuidle_curr_governor)
 		return -EIO;
+
 	if (!dev->state_count)
 		dev->state_count = drv->state_count;
 
@@ -368,15 +374,18 @@  EXPORT_SYMBOL_GPL(cpuidle_enable_device);
  */
 void cpuidle_disable_device(struct cpuidle_device *dev)
 {
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev->cpu);
+
 	if (!dev->enabled)
 		return;
-	if (!cpuidle_get_driver() || !cpuidle_curr_governor)
+
+	if (!drv || !cpuidle_curr_governor)
 		return;
 
 	dev->enabled = 0;
 
 	if (cpuidle_curr_governor->disable)
-		cpuidle_curr_governor->disable(cpuidle_get_driver(), dev);
+		cpuidle_curr_governor->disable(drv, dev);
 
 	cpuidle_remove_state_sysfs(dev);
 	enabled_devices--;
@@ -395,7 +404,8 @@  static int __cpuidle_register_device(struct cpuidle_device *dev)
 {
 	int ret;
 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
-	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
+	struct cpuidle_driver *cpuidle_driver =
+		cpuidle_get_cpu_driver(dev->cpu);
 
 	if (!try_module_get(cpuidle_driver->owner))
 		return -EINVAL;
@@ -461,7 +471,7 @@  EXPORT_SYMBOL_GPL(cpuidle_register_device);
 void cpuidle_unregister_device(struct cpuidle_device *dev)
 {
 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
-	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
+	struct cpuidle_driver *cpuidle_driver = cpuidle_get_cpu_driver(dev->cpu);
 
 	if (dev->registered == 0)
 		return;
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index 391f80f..6529b91 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -14,7 +14,11 @@ 
 
 #include "cpuidle.h"
 
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers);
+#else
 static struct cpuidle_driver *cpuidle_curr_driver;
+#endif
 DEFINE_SPINLOCK(cpuidle_driver_lock);
 
 static void set_power_states(struct cpuidle_driver *drv)
@@ -47,12 +51,25 @@  static void __cpuidle_driver_init(struct cpuidle_driver *drv)
 		set_power_states(drv);
 }
 
-static void cpuidle_set_driver(struct cpuidle_driver *drv)
+static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu)
 {
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+	per_cpu(cpuidle_drivers, cpu) = drv;
+#else
 	cpuidle_curr_driver = drv;
+#endif
 }
 
-static int __cpuidle_register_driver(struct cpuidle_driver *drv)
+static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
+{
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+	return per_cpu(cpuidle_drivers, cpu);
+#else
+	return cpuidle_curr_driver;
+#endif
+}
+
+static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu)
 {
 	if (!drv || !drv->state_count)
 		return -EINVAL;
@@ -60,26 +77,102 @@  static int __cpuidle_register_driver(struct cpuidle_driver *drv)
 	if (cpuidle_disabled())
 		return -ENODEV;
 
-	if (cpuidle_get_driver())
+	if (__cpuidle_get_cpu_driver(cpu))
 		return -EBUSY;
 
 	__cpuidle_driver_init(drv);
 
-	cpuidle_set_driver(drv);
+	__cpuidle_set_cpu_driver(drv, cpu);
 
 	return 0;
 }
 
-static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
+static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu)
 {
-	if (drv != cpuidle_get_driver()) {
+	if (drv != __cpuidle_get_cpu_driver(cpu)) {
 		WARN(1, "invalid cpuidle_unregister_driver(%s)\n",
 			drv->name);
 		return;
 	}
 
 	if (!WARN_ON(drv->refcnt > 0))
-		cpuidle_set_driver(NULL);
+		__cpuidle_set_cpu_driver(NULL, cpu);
+}
+
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv)
+{
+	int cpu;
+	for_each_present_cpu(cpu)
+		__cpuidle_unregister_driver(drv, cpu);
+}
+
+static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv)
+{
+	int ret = 0;
+	int i, cpu;
+
+	for_each_present_cpu(cpu) {
+		ret = __cpuidle_register_driver(drv, cpu);
+		if (!ret)
+			continue;
+		for (i = cpu - 1; i >= 0; i--)
+			__cpuidle_unregister_driver(drv, i);
+		break;
+	}
+
+	return ret;
+}
+
+static int __cpuidle_for_each_driver(int (*cb)(int, struct cpuidle_driver *,
+					       void *), void *data)
+{
+	struct cpuidle_driver *drv;
+	int cpu, ret = 0;
+
+	for_each_present_cpu(cpu) {
+		drv = __cpuidle_get_cpu_driver(cpu);
+		ret = cb(cpu, drv, data);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu)
+{
+	int ret;
+
+	spin_lock(&cpuidle_driver_lock);
+	ret = __cpuidle_register_driver(drv, cpu);
+	spin_unlock(&cpuidle_driver_lock);
+
+	return ret;
+}
+
+void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu)
+{
+	spin_lock(&cpuidle_driver_lock);
+	__cpuidle_unregister_driver(drv, cpu);
+	spin_unlock(&cpuidle_driver_lock);
+}
+#endif
+
+int cpuidle_for_each_driver(int (*cb)(int, struct cpuidle_driver *, void *),
+			    void *data)
+{
+	int ret;
+
+	spin_lock(&cpuidle_driver_lock);
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+	ret = __cpuidle_for_each_driver(cb, data);
+#else
+	ret = cb(smp_processor_id(),
+		 __cpuidle_get_cpu_driver(smp_processor_id()), data);
+#endif
+	spin_unlock(&cpuidle_driver_lock);
+
+	return ret;
 }
 
 /**
@@ -91,7 +184,11 @@  int cpuidle_register_driver(struct cpuidle_driver *drv)
 	int ret;
 
 	spin_lock(&cpuidle_driver_lock);
-	ret = __cpuidle_register_driver(drv);
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+	ret = __cpuidle_register_all_cpu_driver(drv);
+#else
+	ret = __cpuidle_register_driver(drv, smp_processor_id());
+#endif
 	spin_unlock(&cpuidle_driver_lock);
 
 	return ret;
@@ -103,18 +200,37 @@  EXPORT_SYMBOL_GPL(cpuidle_register_driver);
  */
 struct cpuidle_driver *cpuidle_get_driver(void)
 {
-	return cpuidle_curr_driver;
+	return __cpuidle_get_cpu_driver(smp_processor_id());
 }
 EXPORT_SYMBOL_GPL(cpuidle_get_driver);
 
 /**
+ * cpuidle_get_cpu_driver - return the driver tied with a cpu
+ */
+struct cpuidle_driver *cpuidle_get_cpu_driver(int cpu)
+{
+	struct cpuidle_driver *drv;
+
+	spin_lock(&cpuidle_driver_lock);
+	drv = __cpuidle_get_cpu_driver(cpu);
+	spin_unlock(&cpuidle_driver_lock);
+
+	return drv;
+}
+EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver);
+
+/**
  * cpuidle_unregister_driver - unregisters a driver
  * @drv: the driver
  */
 void cpuidle_unregister_driver(struct cpuidle_driver *drv)
 {
 	spin_lock(&cpuidle_driver_lock);
-	__cpuidle_unregister_driver(drv);
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+	__cpuidle_unregister_all_cpu_driver(drv);
+#else
+	__cpuidle_unregister_driver(drv, smp_processor_id());
+#endif
 	spin_unlock(&cpuidle_driver_lock);
 }
 EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 5f809e3..2596422 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -43,21 +43,46 @@  out:
 	return i;
 }
 
+struct cbarg {
+	char *buf;
+	ssize_t count;
+};
+
+static int each_driver_cb(int cpu, struct cpuidle_driver *drv, void *data)
+{
+	int ret;
+	struct cbarg *cbarg = data;
+
+	if ((drv && (strlen(drv->name) + cbarg->count) >= PAGE_SIZE) ||
+	    (!drv && (strlen("none") + cbarg->count) >= PAGE_SIZE))
+		return -EOVERFLOW;
+
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+	ret = sprintf(cbarg->buf + cbarg->count, "cpu%d: %s\n",
+		      cpu, drv ? drv->name : "none" );
+#else
+	ret = sprintf(cbarg->buf, "%s\n", drv ? drv->name : "none");
+#endif
+	if (ret < 0)
+		return ret;
+
+	cbarg->count += ret;
+
+	return 0;
+}
+
 static ssize_t show_current_driver(struct device *dev,
 				   struct device_attribute *attr,
 				   char *buf)
 {
-	ssize_t ret;
-	struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver();
+	struct cbarg cbarg = { .buf = buf };
+	int ret;
 
-	spin_lock(&cpuidle_driver_lock);
-	if (cpuidle_driver)
-		ret = sprintf(buf, "%s\n", cpuidle_driver->name);
-	else
-		ret = sprintf(buf, "none\n");
-	spin_unlock(&cpuidle_driver_lock);
+	ret = cpuidle_for_each_driver(each_driver_cb, &cbarg);
+	if (ret < 0)
+		return ret;
 
-	return ret;
+	return cbarg.count;
 }
 
 static ssize_t show_current_governor(struct device *dev,
@@ -363,10 +388,10 @@  int cpuidle_add_state_sysfs(struct cpuidle_device *device)
 {
 	int i, ret = -ENOMEM;
 	struct cpuidle_state_kobj *kobj;
-	struct cpuidle_driver *drv = cpuidle_get_driver();
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device->cpu);
 
 	/* state statistics */
-	for (i = 0; i < device->state_count; i++) {
+	for (i = 0; i < drv->state_count; i++) {
 		kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
 		if (!kobj)
 			goto error_state;
@@ -374,7 +399,8 @@  int cpuidle_add_state_sysfs(struct cpuidle_device *device)
 		kobj->state_usage = &device->states_usage[i];
 		init_completion(&kobj->kobj_unregister);
 
-		ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, &device->kobj,
+		ret = kobject_init_and_add(&kobj->kobj,
+					   &ktype_state_cpuidle, &device->kobj,
 					   "state%d", i);
 		if (ret) {
 			kfree(kobj);
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index a4ff9f8..0e0b0ad 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -164,6 +164,13 @@  extern int cpuidle_wrap_enter(struct cpuidle_device *dev,
 					struct cpuidle_driver *drv, int index));
 extern int cpuidle_play_dead(void);
 
+extern int cpuidle_for_each_driver(
+	int (*cb)(int, struct cpuidle_driver *, void *), void *data);
+
+extern struct cpuidle_driver *cpuidle_get_cpu_driver(int cpu);
+extern int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu);
+extern void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu);
+
 #else
 static inline void disable_cpuidle(void) { }
 static inline int cpuidle_idle_call(void) { return -ENODEV; }
@@ -190,7 +197,6 @@  static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
 					struct cpuidle_driver *drv, int index))
 { return -ENODEV; }
 static inline int cpuidle_play_dead(void) {return -ENODEV; }
-
 #endif
 
 #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED