[v5,1/9] cpuidle: Add commonly used functionality for consolidation

Message ID 1330318063-25460-2-git-send-email-rob.lee@linaro.org
State New
Headers show

Commit Message

Rob Feb. 27, 2012, 4:47 a.m.
Add functionality that is commonly duplicated by various platforms.

Signed-off-by: Robert Lee <rob.lee@linaro.org>
---
 drivers/cpuidle/cpuidle.c |   37 ++++++++++++++++++------------
 include/linux/cpuidle.h   |   55 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+), 15 deletions(-)

Comments

Jean Pihet Feb. 27, 2012, 4:19 p.m. | #1
Robert,

On Mon, Feb 27, 2012 at 5:47 AM, Robert Lee <rob.lee@linaro.org> wrote:
> Add functionality that is commonly duplicated by various platforms.
>
> Signed-off-by: Robert Lee <rob.lee@linaro.org>
> ---
>  drivers/cpuidle/cpuidle.c |   37 ++++++++++++++++++------------
>  include/linux/cpuidle.h   |   55 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 77 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
...

> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> index 712abcc..6563683 100644
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -15,6 +15,7 @@
>  #include <linux/list.h>
>  #include <linux/kobject.h>
>  #include <linux/completion.h>
> +#include <linux/hrtimer.h>
>
>  #define CPUIDLE_STATE_MAX      8
>  #define CPUIDLE_NAME_LEN       16
> @@ -56,6 +57,16 @@ struct cpuidle_state {
>
>  #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
>
> +/* Common ARM WFI state */
> +#define CPUIDLE_ARM_WFI_STATE {\
> +       .enter                  = cpuidle_simple_enter,\
> +       .exit_latency           = 1,\
> +       .target_residency       = 1,\
> +       .flags                  = CPUIDLE_FLAG_TIME_VALID,\
> +       .name                   = "WFI",\
> +       .desc                   = "ARM core clock gating (WFI)",\
> +}
This macro should belong to an ARM only header.

> +
>  /**
>  * cpuidle_get_statedata - retrieves private driver state data
>  * @st_usage: the state usage statistics
> @@ -122,6 +133,14 @@ struct cpuidle_driver {
>        struct module           *owner;
>
>        unsigned int            power_specified:1;
> +       /*
> +        * use the core cpuidle time keeping. This has been implemented for the
> +        * entire driver instead of per state as currently the device enter
> +        * fuction allows the entered state to change which requires handling
> +        * that requires a (subjectively) unnecessary decrease to efficiency
> +        * in this commonly called code.
> +        */
> +       unsigned int            en_core_tk_irqen:1;
Does the description reads as 'the time accounting is only performed
if en_core_tk_irqen is set'?
Also it suggests that changing (i.e. demoting) the state is kind of a
problem, which is unclear. IIUC the state change is handled correctly
in cpuidle_idle_call, is that correct?

>        struct cpuidle_state    states[CPUIDLE_STATE_MAX];
>        int                     state_count;
>        int                     safe_state_index;
> @@ -140,6 +159,39 @@ extern void cpuidle_pause_and_lock(void);
>  extern void cpuidle_resume_and_unlock(void);
>  extern int cpuidle_enable_device(struct cpuidle_device *dev);
>  extern void cpuidle_disable_device(struct cpuidle_device *dev);
> +extern int cpuidle_simple_enter(struct cpuidle_device *dev,
> +               struct cpuidle_driver *drv, int index);
> +
> +/**
> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
> + * @dev: pointer to a valid cpuidle_device object
> + * @drv: pointer to a valid cpuidle_driver object
> + * @index: index of the target cpuidle state.
> + */
> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
> +                               struct cpuidle_driver *drv, int index,
> +                               int (*enter)(struct cpuidle_device *dev,
> +                                       struct cpuidle_driver *drv, int index))
The function name does not match the description (cpuidle_enter_wrap
vs cpuidle_wrap_enter).

> +{
> +       ktime_t time_start, time_end;
> +       s64 diff;
> +
> +       time_start = ktime_get();
> +
> +       index = enter(dev, drv, index);
> +
> +       time_end = ktime_get();
> +
> +       local_irq_enable();
> +
> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
> +       if (diff > INT_MAX)
> +               diff = INT_MAX;
> +
> +       dev->last_residency = (int) diff;
> +
> +       return index;
> +}
>
>  #else
>  static inline void disable_cpuidle(void) { }
...

Regards,
Jean
Rob Feb. 27, 2012, 7:23 p.m. | #2
On Mon, Feb 27, 2012 at 10:19 AM, Jean Pihet <jean.pihet@newoldbits.com> wrote:
> Robert,
>
> On Mon, Feb 27, 2012 at 5:47 AM, Robert Lee <rob.lee@linaro.org> wrote:
>> Add functionality that is commonly duplicated by various platforms.
>>
>> Signed-off-by: Robert Lee <rob.lee@linaro.org>
>> ---
>>  drivers/cpuidle/cpuidle.c |   37 ++++++++++++++++++------------
>>  include/linux/cpuidle.h   |   55 +++++++++++++++++++++++++++++++++++++++++++++
>>  2 files changed, 77 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> ...
>
>> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
>> index 712abcc..6563683 100644
>> --- a/include/linux/cpuidle.h
>> +++ b/include/linux/cpuidle.h
>> @@ -15,6 +15,7 @@
>>  #include <linux/list.h>
>>  #include <linux/kobject.h>
>>  #include <linux/completion.h>
>> +#include <linux/hrtimer.h>
>>
>>  #define CPUIDLE_STATE_MAX      8
>>  #define CPUIDLE_NAME_LEN       16
>> @@ -56,6 +57,16 @@ struct cpuidle_state {
>>
>>  #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
>>
>> +/* Common ARM WFI state */
>> +#define CPUIDLE_ARM_WFI_STATE {\
>> +       .enter                  = cpuidle_simple_enter,\
>> +       .exit_latency           = 1,\
>> +       .target_residency       = 1,\
>> +       .flags                  = CPUIDLE_FLAG_TIME_VALID,\
>> +       .name                   = "WFI",\
>> +       .desc                   = "ARM core clock gating (WFI)",\
>> +}
> This macro should belong to an ARM only header.
>

Thanks, I was wondering about that but wasn't which location was better.

>> +
>>  /**
>>  * cpuidle_get_statedata - retrieves private driver state data
>>  * @st_usage: the state usage statistics
>> @@ -122,6 +133,14 @@ struct cpuidle_driver {
>>        struct module           *owner;
>>
>>        unsigned int            power_specified:1;
>> +       /*
>> +        * use the core cpuidle time keeping. This has been implemented for the
>> +        * entire driver instead of per state as currently the device enter
>> +        * fuction allows the entered state to change which requires handling
>> +        * that requires a (subjectively) unnecessary decrease to efficiency
>> +        * in this commonly called code.
>> +        */
>> +       unsigned int            en_core_tk_irqen:1;
> Does the description reads as 'the time accounting is only performed
> if en_core_tk_irqen is set'?

Correct.  I can make this clearer in the next version's comments.

> Also it suggests that changing (i.e. demoting) the state is kind of a
> problem, which is unclear. IIUC the state change is handled correctly
> in cpuidle_idle_call, is that correct?
>

If en_core_tk_irqen was a cpuidle state option instead of a cpuidle
driver option, then if the platform enter function changed the state
to one that used a different en_core_tk_irqen value, a time keeping
problem could occur.  Extra handling could be added, but for many SMP
systems, this case will be the most common case, and so I didn't think
it best to force this extra handling.  Anyways, with the current
implementation, if a platform cpuidle driver chooses not to use
en_core_tk_irqen, they can still use the consolidated time keeping
functionality by using the exported inline function (e.g., the OMAP
34XX case).

>>        struct cpuidle_state    states[CPUIDLE_STATE_MAX];
>>        int                     state_count;
>>        int                     safe_state_index;
>> @@ -140,6 +159,39 @@ extern void cpuidle_pause_and_lock(void);
>>  extern void cpuidle_resume_and_unlock(void);
>>  extern int cpuidle_enable_device(struct cpuidle_device *dev);
>>  extern void cpuidle_disable_device(struct cpuidle_device *dev);
>> +extern int cpuidle_simple_enter(struct cpuidle_device *dev,
>> +               struct cpuidle_driver *drv, int index);
>> +
>> +/**
>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
>> + * @dev: pointer to a valid cpuidle_device object
>> + * @drv: pointer to a valid cpuidle_driver object
>> + * @index: index of the target cpuidle state.
>> + */
>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>> +                               struct cpuidle_driver *drv, int index,
>> +                               int (*enter)(struct cpuidle_device *dev,
>> +                                       struct cpuidle_driver *drv, int index))
> The function name does not match the description (cpuidle_enter_wrap
> vs cpuidle_wrap_enter).
>

Oops, thanks.

>> +{
>> +       ktime_t time_start, time_end;
>> +       s64 diff;
>> +
>> +       time_start = ktime_get();
>> +
>> +       index = enter(dev, drv, index);
>> +
>> +       time_end = ktime_get();
>> +
>> +       local_irq_enable();
>> +
>> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
>> +       if (diff > INT_MAX)
>> +               diff = INT_MAX;
>> +
>> +       dev->last_residency = (int) diff;
>> +
>> +       return index;
>> +}
>>
>>  #else
>>  static inline void disable_cpuidle(void) { }
> ...
>
> Regards,
> Jean
Mike Turquette Feb. 28, 2012, 12:06 a.m. | #3
On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee <rob.lee@linaro.org> wrote:
> +/**
> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
> + * @dev: pointer to a valid cpuidle_device object
> + * @drv: pointer to a valid cpuidle_driver object
> + * @index: index of the target cpuidle state.
> + */
> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
> +                               struct cpuidle_driver *drv, int index,
> +                               int (*enter)(struct cpuidle_device *dev,
> +                                       struct cpuidle_driver *drv, int index))
> +{
> +       ktime_t time_start, time_end;
> +       s64 diff;
> +
> +       time_start = ktime_get();
> +
> +       index = enter(dev, drv, index);
> +
> +       time_end = ktime_get();
> +
> +       local_irq_enable();
> +
> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
> +       if (diff > INT_MAX)
> +               diff = INT_MAX;
> +
> +       dev->last_residency = (int) diff;
> +
> +       return index;
> +}

Any reason that this code is in the header?  Why not in cpuidle.c?

Regards,
Mike
Mike Turquette Feb. 28, 2012, 12:49 a.m. | #4
On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee <rob.lee@linaro.org> wrote:
> +/**
> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
> + * @dev: pointer to a valid cpuidle_device object
> + * @drv: pointer to a valid cpuidle_driver object
> + * @index: index of the target cpuidle state.
> + */
> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
> +                               struct cpuidle_driver *drv, int index,
> +                               int (*enter)(struct cpuidle_device *dev,
> +                                       struct cpuidle_driver *drv, int index))
> +{
> +       ktime_t time_start, time_end;
> +       s64 diff;
> +
> +       time_start = ktime_get();
> +
> +       index = enter(dev, drv, index);
> +
> +       time_end = ktime_get();
> +
> +       local_irq_enable();
> +
> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
> +       if (diff > INT_MAX)
> +               diff = INT_MAX;
> +
> +       dev->last_residency = (int) diff;
> +
> +       return index;
> +}

Hi Rob,

In a previous series I brought up the idea of not accounting for time
if a C-state transition fails.  My post on that thread can be found
here:
http://article.gmane.org/gmane.linux.ports.arm.kernel/149293/

How do you feel about adding something like the following?

if (IS_ERR(index))
        dev->last_residency = 0;
        return index;

Obviously it will up to the platforms to figure out how to propagate
that error up from their respective low power code.

Regards,
Mike
Rob Feb. 28, 2012, 3:45 p.m. | #5
Hey Mike,

On Mon, Feb 27, 2012 at 6:06 PM, Turquette, Mike <mturquette@ti.com> wrote:
> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee <rob.lee@linaro.org> wrote:
>> +/**
>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
>> + * @dev: pointer to a valid cpuidle_device object
>> + * @drv: pointer to a valid cpuidle_driver object
>> + * @index: index of the target cpuidle state.
>> + */
>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>> +                               struct cpuidle_driver *drv, int index,
>> +                               int (*enter)(struct cpuidle_device *dev,
>> +                                       struct cpuidle_driver *drv, int index))
>> +{
>> +       ktime_t time_start, time_end;
>> +       s64 diff;
>> +
>> +       time_start = ktime_get();
>> +
>> +       index = enter(dev, drv, index);
>> +
>> +       time_end = ktime_get();
>> +
>> +       local_irq_enable();
>> +
>> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
>> +       if (diff > INT_MAX)
>> +               diff = INT_MAX;
>> +
>> +       dev->last_residency = (int) diff;
>> +
>> +       return index;
>> +}
>
> Any reason that this code is in the header?  Why not in cpuidle.c?
>

Not a strong reason.  I thought making it an inline would introduce
slightly less new execution when adding this code (realizing that
there are function calls immediately after, so the only benefit is the
reduce popping and pushing).  But it does require an extra copy of
this code for any platform driver that does not enable
en_core_tk_irqen and instead makes calls to it directly (like omap3).
For this case, I don't think the inline implementation should add
extra code from what exists today as it should simply replace the
existing platform time keeping calls to a standard one defined by the
core cpuidle.

I don't have a strong preference with using the inline so if you or
others can give your opinion on which method to use and why, I'd be
glad to read it.

> Regards,
> Mike
Rob Feb. 28, 2012, 3:50 p.m. | #6
On Mon, Feb 27, 2012 at 6:49 PM, Turquette, Mike <mturquette@ti.com> wrote:
> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee <rob.lee@linaro.org> wrote:
>> +/**
>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
>> + * @dev: pointer to a valid cpuidle_device object
>> + * @drv: pointer to a valid cpuidle_driver object
>> + * @index: index of the target cpuidle state.
>> + */
>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>> +                               struct cpuidle_driver *drv, int index,
>> +                               int (*enter)(struct cpuidle_device *dev,
>> +                                       struct cpuidle_driver *drv, int index))
>> +{
>> +       ktime_t time_start, time_end;
>> +       s64 diff;
>> +
>> +       time_start = ktime_get();
>> +
>> +       index = enter(dev, drv, index);
>> +
>> +       time_end = ktime_get();
>> +
>> +       local_irq_enable();
>> +
>> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
>> +       if (diff > INT_MAX)
>> +               diff = INT_MAX;
>> +
>> +       dev->last_residency = (int) diff;
>> +
>> +       return index;
>> +}
>
> Hi Rob,
>
> In a previous series I brought up the idea of not accounting for time
> if a C-state transition fails.  My post on that thread can be found
> here:
> http://article.gmane.org/gmane.linux.ports.arm.kernel/149293/
>
> How do you feel about adding something like the following?
>
> if (IS_ERR(index))
>        dev->last_residency = 0;
>        return index;
>
> Obviously it will up to the platforms to figure out how to propagate
> that error up from their respective low power code.

To be completely clear on what you're asking for, from
cpuidle_idle_call in drivers/cpuidle/cpuidle.c:

...
	target_state = &drv->states[next_state];

	trace_power_start(POWER_CSTATE, next_state, dev->cpu);
	trace_cpu_idle(next_state, dev->cpu);

	entered_state = target_state->enter(dev, drv, next_state);

	trace_power_end(dev->cpu);
	trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);

	if (entered_state >= 0) {
		/* Update cpuidle counters */
		/* This can be moved to within driver enter routine
		 * but that results in multiple copies of same code.
		 */
		dev->states_usage[entered_state].time +=
				(unsigned long long)dev->last_residency;
		dev->states_usage[entered_state].usage++;
	}
...

Note the "if (entered_state >= 0)".  This ultimately prevents the
cpuidle device time accounting upon an negative value being returned.
So are you asking for the if(IS_ERR(index)) check to prevent the
unnecessary last_residency time calculation in the wrapper, or to make
sure a last_residency is zero upon failure?  (or both?)

It seems like a bug (or lack or documentation at best) in the code
that exists today to not zero out dev->last_residency upon a negative
return value as this value is used by the governors upon the next
idle.  So to ensure last_residency is 0 upon failure, I think it'd be
best to add that to an new else statement immediately following the
"if (entered_state >=))" so that any platform cpuidle driver that
returns a negative will have the last_residency zeroed out, not just
those that use en_core_tk_irqen.

>
> Regards,
> Mike
Rob Herring Feb. 28, 2012, 3:58 p.m. | #7
On 02/28/2012 09:45 AM, Rob Lee wrote:
> Hey Mike,
> 
> On Mon, Feb 27, 2012 at 6:06 PM, Turquette, Mike <mturquette@ti.com> wrote:
>> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee <rob.lee@linaro.org> wrote:
>>> +/**
>>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
>>> + * @dev: pointer to a valid cpuidle_device object
>>> + * @drv: pointer to a valid cpuidle_driver object
>>> + * @index: index of the target cpuidle state.
>>> + */
>>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>>> +                               struct cpuidle_driver *drv, int index,
>>> +                               int (*enter)(struct cpuidle_device *dev,
>>> +                                       struct cpuidle_driver *drv, int index))
>>> +{
>>> +       ktime_t time_start, time_end;
>>> +       s64 diff;
>>> +
>>> +       time_start = ktime_get();
>>> +
>>> +       index = enter(dev, drv, index);
>>> +
>>> +       time_end = ktime_get();
>>> +
>>> +       local_irq_enable();
>>> +
>>> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
>>> +       if (diff > INT_MAX)
>>> +               diff = INT_MAX;
>>> +
>>> +       dev->last_residency = (int) diff;
>>> +
>>> +       return index;
>>> +}
>>
>> Any reason that this code is in the header?  Why not in cpuidle.c?
>>
> 
> Not a strong reason.  I thought making it an inline would introduce
> slightly less new execution when adding this code (realizing that
> there are function calls immediately after, so the only benefit is the
> reduce popping and pushing).  But it does require an extra copy of
> this code for any platform driver that does not enable
> en_core_tk_irqen and instead makes calls to it directly (like omap3).
> For this case, I don't think the inline implementation should add
> extra code from what exists today as it should simply replace the
> existing platform time keeping calls to a standard one defined by the
> core cpuidle.
> 
But you will have multiple copies of the inlined code if platforms do
use it. Or is it used only by the core cpuidle code? In that case, gcc
can automatically inline static functions.

It seems a bit long to inline and this isn't performance critical (at
least for the enter side).

Rob

> I don't have a strong preference with using the inline so if you or
> others can give your opinion on which method to use and why, I'd be
> glad to read it.
> 
>> Regards,
>> Mike
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Rob Feb. 28, 2012, 8:49 p.m. | #8
>>> Any reason that this code is in the header?  Why not in cpuidle.c?
>>>
>>
>> Not a strong reason.  I thought making it an inline would introduce
>> slightly less new execution when adding this code (realizing that
>> there are function calls immediately after, so the only benefit is the
>> reduce popping and pushing).  But it does require an extra copy of
>> this code for any platform driver that does not enable
>> en_core_tk_irqen and instead makes calls to it directly (like omap3).
>> For this case, I don't think the inline implementation should add
>> extra code from what exists today as it should simply replace the
>> existing platform time keeping calls to a standard one defined by the
>> core cpuidle.
>>
> But you will have multiple copies of the inlined code if platforms do
> use it. Or is it used only by the core cpuidle code? In that case, gcc
> can automatically inline static functions.

Used by some platforms as well.

>
> It seems a bit long to inline and this isn't performance critical (at
> least for the enter side).

Ok.  Unless there are further comments supporting the inline method,
I'll switch to non-inline for next version.  Thanks Mike and Rob for
the feedback.

>
> Rob
>
>> I don't have a strong preference with using the inline so if you or
>> others can give your opinion on which method to use and why, I'd be
>> glad to read it.
>>
>>> Regards,
>>> Mike
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> linux-arm-kernel@lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
Mike Turquette Feb. 28, 2012, 9:42 p.m. | #9
On Tue, Feb 28, 2012 at 7:50 AM, Rob Lee <rob.lee@linaro.org> wrote:
> On Mon, Feb 27, 2012 at 6:49 PM, Turquette, Mike <mturquette@ti.com> wrote:
>> On Sun, Feb 26, 2012 at 8:47 PM, Robert Lee <rob.lee@linaro.org> wrote:
>>> +/**
>>> + * cpuidle_enter_wrap - performing timekeeping and irq around enter function
>>> + * @dev: pointer to a valid cpuidle_device object
>>> + * @drv: pointer to a valid cpuidle_driver object
>>> + * @index: index of the target cpuidle state.
>>> + */
>>> +static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
>>> +                               struct cpuidle_driver *drv, int index,
>>> +                               int (*enter)(struct cpuidle_device *dev,
>>> +                                       struct cpuidle_driver *drv, int index))
>>> +{
>>> +       ktime_t time_start, time_end;
>>> +       s64 diff;
>>> +
>>> +       time_start = ktime_get();
>>> +
>>> +       index = enter(dev, drv, index);
>>> +
>>> +       time_end = ktime_get();
>>> +
>>> +       local_irq_enable();
>>> +
>>> +       diff = ktime_to_us(ktime_sub(time_end, time_start));
>>> +       if (diff > INT_MAX)
>>> +               diff = INT_MAX;
>>> +
>>> +       dev->last_residency = (int) diff;
>>> +
>>> +       return index;
>>> +}
>>
>> Hi Rob,
>>
>> In a previous series I brought up the idea of not accounting for time
>> if a C-state transition fails.  My post on that thread can be found
>> here:
>> http://article.gmane.org/gmane.linux.ports.arm.kernel/149293/
>>
>> How do you feel about adding something like the following?
>>
>> if (IS_ERR(index))
>>        dev->last_residency = 0;
>>        return index;
>>
>> Obviously it will up to the platforms to figure out how to propagate
>> that error up from their respective low power code.
>
> To be completely clear on what you're asking for, from
> cpuidle_idle_call in drivers/cpuidle/cpuidle.c:
>
> ...
>        target_state = &drv->states[next_state];
>
>        trace_power_start(POWER_CSTATE, next_state, dev->cpu);
>        trace_cpu_idle(next_state, dev->cpu);
>
>        entered_state = target_state->enter(dev, drv, next_state);
>
>        trace_power_end(dev->cpu);
>        trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
>
>        if (entered_state >= 0) {
>                /* Update cpuidle counters */
>                /* This can be moved to within driver enter routine
>                 * but that results in multiple copies of same code.
>                 */
>                dev->states_usage[entered_state].time +=
>                                (unsigned long long)dev->last_residency;
>                dev->states_usage[entered_state].usage++;
>        }
> ...
>
> Note the "if (entered_state >= 0)".  This ultimately prevents the
> cpuidle device time accounting upon an negative value being returned.
> So are you asking for the if(IS_ERR(index)) check to prevent the
> unnecessary last_residency time calculation in the wrapper, or to make
> sure a last_residency is zero upon failure?  (or both?)
>
> It seems like a bug (or lack or documentation at best) in the code
> that exists today to not zero out dev->last_residency upon a negative
> return value as this value is used by the governors upon the next
> idle.  So to ensure last_residency is 0 upon failure, I think it'd be
> best to add that to an new else statement immediately following the
> "if (entered_state >=))" so that any platform cpuidle driver that
> returns a negative will have the last_residency zeroed out, not just
> those that use en_core_tk_irqen.

+ Cc: Jon Hunter

Hi Rob,

I didn't review the code carefully enough to catch the 'if
(entered_state >= 0)' part, but that seems like a graceful way to
solve this problem by appending the 'else' statement on there and
seeting last_residency to zero.

I brought this topic up internally and Jon suggested that the 'usage'
statistics that are reported in sysfs should also reflect failed
versus successful C-state transitions, which is a great idea.  This
could simply be achieved by renaming the current 'usage' count to
something like 'transitions_attempted' and then conditionally
increment a new counter within the 'if (entered_state >= 0)' block,
perhaps named, 'transition_succeeded'.

This way the old 'usage' count paradigm is as accurate as the new
time-keeping code.  Being able to easily observe which C-state tend to
fail the most would be invaluable in tuning idle policy for maximum
effectiveness.

Thoughts?

Regards,
Mike

>
>>
>> Regards,
>> Mike
Rob Feb. 28, 2012, 11:33 p.m. | #10
>
> I brought this topic up internally and Jon suggested that the 'usage'
> statistics that are reported in sysfs should also reflect failed
> versus successful C-state transitions, which is a great idea.  This
> could simply be achieved by renaming the current 'usage' count to
> something like 'transitions_attempted' and then conditionally
> increment a new counter within the 'if (entered_state >= 0)' block,
> perhaps named, 'transition_succeeded'.
>
> This way the old 'usage' count paradigm is as accurate as the new
> time-keeping code.  Being able to easily observe which C-state tend to
> fail the most would be invaluable in tuning idle policy for maximum
> effectiveness.
>
> Thoughts?

Sounds reasonable.  In some cases it may be helpful to track state
demotion as well.  Since I'm still a noob and wearing my submission
training wheels, I'm trying to minimize things that fall outside of
this basic consolidation effort for this patch series.  But I added
Jon's suggestion to this cpuidle page which contains future cpuidle
items to consider adding:
https://wiki.linaro.org/WorkingGroups/PowerManagement/Doc/CPUIdle#Track_both_attempted_and_successful_enter_attempts

>
> Regards,
> Mike
>
>>
>>>
>>> Regards,
>>> Mike
Mike Turquette Feb. 28, 2012, 11:47 p.m. | #11
On Tue, Feb 28, 2012 at 3:33 PM, Rob Lee <rob.lee@linaro.org> wrote:
>>
>> I brought this topic up internally and Jon suggested that the 'usage'
>> statistics that are reported in sysfs should also reflect failed
>> versus successful C-state transitions, which is a great idea.  This
>> could simply be achieved by renaming the current 'usage' count to
>> something like 'transitions_attempted' and then conditionally
>> increment a new counter within the 'if (entered_state >= 0)' block,
>> perhaps named, 'transition_succeeded'.
>>
>> This way the old 'usage' count paradigm is as accurate as the new
>> time-keeping code.  Being able to easily observe which C-state tend to
>> fail the most would be invaluable in tuning idle policy for maximum
>> effectiveness.
>>
>> Thoughts?
>
> Sounds reasonable.  In some cases it may be helpful to track state
> demotion as well.  Since I'm still a noob and wearing my submission
> training wheels, I'm trying to minimize things that fall outside of
> this basic consolidation effort for this patch series.  But I added
> Jon's suggestion to this cpuidle page which contains future cpuidle
> items to consider adding:
> https://wiki.linaro.org/WorkingGroups/PowerManagement/Doc/CPUIdle#Track_both_attempted_and_successful_enter_attempts

Yeah, I don't want to feature-bloat your submission more than
necessary.  I'm happy for the usage counter stuff to get tackled at a
later date, but you're still on board for setting last_residency to
zero in this series, right?

Regards,
Mike

>
>>
>> Regards,
>> Mike
>>
>>>
>>>>
>>>> Regards,
>>>> Mike
Rob Feb. 29, 2012, 12:04 a.m. | #12
>> Sounds reasonable.  In some cases it may be helpful to track state
>> demotion as well.  Since I'm still a noob and wearing my submission
>> training wheels, I'm trying to minimize things that fall outside of
>> this basic consolidation effort for this patch series.  But I added
>> Jon's suggestion to this cpuidle page which contains future cpuidle
>> items to consider adding:
>> https://wiki.linaro.org/WorkingGroups/PowerManagement/Doc/CPUIdle#Track_both_attempted_and_successful_enter_attempts
>
> Yeah, I don't want to feature-bloat your submission more than
> necessary.  I'm happy for the usage counter stuff to get tackled at a
> later date, but you're still on board for setting last_residency to
> zero in this series, right?

Yes.

>
> Regards,
> Mike
>

Patch

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 59f4261..f21d58e 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -18,6 +18,7 @@ 
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
 #include <linux/module.h>
+#include <asm/proc-fns.h>
 #include <trace/events/power.h>
 
 #include "cpuidle.h"
@@ -97,7 +98,11 @@  int cpuidle_idle_call(void)
 	trace_power_start(POWER_CSTATE, next_state, dev->cpu);
 	trace_cpu_idle(next_state, dev->cpu);
 
-	entered_state = target_state->enter(dev, drv, next_state);
+	if (drv->en_core_tk_irqen)
+		entered_state = cpuidle_wrap_enter(dev,	drv, next_state,
+						target_state->enter);
+	else
+		entered_state = target_state->enter(dev, drv, next_state);
 
 	trace_power_end(dev->cpu);
 	trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu);
@@ -164,28 +169,31 @@  void cpuidle_resume_and_unlock(void)
 
 EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
 
-#ifdef CONFIG_ARCH_HAS_CPU_RELAX
-static int poll_idle(struct cpuidle_device *dev,
+int cpuidle_simple_enter(struct cpuidle_device *dev,
 		struct cpuidle_driver *drv, int index)
 {
-	ktime_t	t1, t2;
-	s64 diff;
+	cpu_do_idle();
+
+	return index;
+}
 
-	t1 = ktime_get();
-	local_irq_enable();
+#ifdef CONFIG_ARCH_HAS_CPU_RELAX
+static inline int __poll_idle(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
+{
 	while (!need_resched())
 		cpu_relax();
 
-	t2 = ktime_get();
-	diff = ktime_to_us(ktime_sub(t2, t1));
-	if (diff > INT_MAX)
-		diff = INT_MAX;
-
-	dev->last_residency = (int) diff;
-
 	return index;
 }
 
+static int poll_idle(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
+{
+	return cpuidle_wrap_enter(dev,	drv, next_state,
+				__poll_idle);
+}
+
 static void poll_idle_init(struct cpuidle_driver *drv)
 {
 	struct cpuidle_state *state = &drv->states[0];
@@ -195,7 +203,6 @@  static void poll_idle_init(struct cpuidle_driver *drv)
 	state->exit_latency = 0;
 	state->target_residency = 0;
 	state->power_usage = -1;
-	state->flags = 0;
 	state->enter = poll_idle;
 }
 #else
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 712abcc..6563683 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -15,6 +15,7 @@ 
 #include <linux/list.h>
 #include <linux/kobject.h>
 #include <linux/completion.h>
+#include <linux/hrtimer.h>
 
 #define CPUIDLE_STATE_MAX	8
 #define CPUIDLE_NAME_LEN	16
@@ -56,6 +57,16 @@  struct cpuidle_state {
 
 #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000)
 
+/* Common ARM WFI state */
+#define CPUIDLE_ARM_WFI_STATE {\
+	.enter                  = cpuidle_simple_enter,\
+	.exit_latency           = 1,\
+	.target_residency       = 1,\
+	.flags                  = CPUIDLE_FLAG_TIME_VALID,\
+	.name                   = "WFI",\
+	.desc                   = "ARM core clock gating (WFI)",\
+}
+
 /**
  * cpuidle_get_statedata - retrieves private driver state data
  * @st_usage: the state usage statistics
@@ -122,6 +133,14 @@  struct cpuidle_driver {
 	struct module 		*owner;
 
 	unsigned int		power_specified:1;
+	/*
+	 * use the core cpuidle time keeping. This has been implemented for the
+	 * entire driver instead of per state as currently the device enter
+	 * fuction allows the entered state to change which requires handling
+	 * that requires a (subjectively) unnecessary decrease to efficiency
+	 * in this commonly called code.
+	 */
+	unsigned int		en_core_tk_irqen:1;
 	struct cpuidle_state	states[CPUIDLE_STATE_MAX];
 	int			state_count;
 	int			safe_state_index;
@@ -140,6 +159,39 @@  extern void cpuidle_pause_and_lock(void);
 extern void cpuidle_resume_and_unlock(void);
 extern int cpuidle_enable_device(struct cpuidle_device *dev);
 extern void cpuidle_disable_device(struct cpuidle_device *dev);
+extern int cpuidle_simple_enter(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index);
+
+/**
+ * cpuidle_enter_wrap - performing timekeeping and irq around enter function
+ * @dev: pointer to a valid cpuidle_device object
+ * @drv: pointer to a valid cpuidle_driver object
+ * @index: index of the target cpuidle state.
+ */
+static inline int cpuidle_wrap_enter(struct cpuidle_device *dev,
+				struct cpuidle_driver *drv, int index,
+				int (*enter)(struct cpuidle_device *dev,
+					struct cpuidle_driver *drv, int index))
+{
+	ktime_t time_start, time_end;
+	s64 diff;
+
+	time_start = ktime_get();
+
+	index = enter(dev, drv, index);
+
+	time_end = ktime_get();
+
+	local_irq_enable();
+
+	diff = ktime_to_us(ktime_sub(time_end, time_start));
+	if (diff > INT_MAX)
+		diff = INT_MAX;
+
+	dev->last_residency = (int) diff;
+
+	return index;
+}
 
 #else
 static inline void disable_cpuidle(void) { }
@@ -157,6 +209,9 @@  static inline void cpuidle_resume_and_unlock(void) { }
 static inline int cpuidle_enable_device(struct cpuidle_device *dev)
 {return -ENODEV; }
 static inline void cpuidle_disable_device(struct cpuidle_device *dev) { }
+static inline int cpuidle_simple_enter(struct cpuidle_device *dev,
+		struct cpuidle_driver *drv, int index)
+{return -ENODEV; }
 
 #endif