cpufreq: arm_big_little: set lowest frequcency for cluster when no cpus in it

Message ID 1403795313-6903-1-git-send-email-thomas.ab@samsung.com
State New
Headers show

Commit Message

Thomas Abraham June 26, 2014, 3:08 p.m.
From: Thomas Abraham <thomas.ab@samsung.com>

If there are no active cpus in a cluster, the clock frequency of the cluster
can be lowered to the lowest possible frequency for that cluster. This can
help reduce the output clock speed of the PLL clocking the cluster and
save power.

The get_table_min() function is also moved with this change to avoid forward
declaration. The function get_table_max() is also moved along with
get_table_min() so that these two functions are adjacent the code.

Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
---
 drivers/cpufreq/arm_big_little.c |   51 +++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 23 deletions(-)

Comments

Viresh Kumar June 26, 2014, 3:38 p.m. | #1
On 26 June 2014 20:38, Thomas Abraham <thomas.ab@samsung.com> wrote:
> From: Thomas Abraham <thomas.ab@samsung.com>
>
> If there are no active cpus in a cluster, the clock frequency of the cluster
> can be lowered to the lowest possible frequency for that cluster. This can
> help reduce the output clock speed of the PLL clocking the cluster and
> save power.
>
> The get_table_min() function is also moved with this change to avoid forward
> declaration. The function get_table_max() is also moved along with
> get_table_min() so that these two functions are adjacent the code.

Makes sense as my poor mind also thought this over an year and
half back.

> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
> ---
>  drivers/cpufreq/arm_big_little.c |   51 +++++++++++++++++++++-----------------
>  1 file changed, 28 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
> index 1f4d4e3..4b1431f 100644
> --- a/drivers/cpufreq/arm_big_little.c
> +++ b/drivers/cpufreq/arm_big_little.c
> @@ -75,6 +75,28 @@ static inline int cpu_to_cluster(int cpu)
>                 MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
>  }
>
> +/* get the minimum frequency in the cpufreq_frequency_table */
> +static inline u32 get_table_min(struct cpufreq_frequency_table *table)
> +{
> +       struct cpufreq_frequency_table *pos;
> +       uint32_t min_freq = ~0;
> +       cpufreq_for_each_entry(pos, table)
> +               if (pos->frequency < min_freq)
> +                       min_freq = pos->frequency;
> +       return min_freq;
> +}
> +
> +/* get the maximum frequency in the cpufreq_frequency_table */
> +static inline u32 get_table_max(struct cpufreq_frequency_table *table)
> +{
> +       struct cpufreq_frequency_table *pos;
> +       uint32_t max_freq = 0;
> +       cpufreq_for_each_entry(pos, table)
> +               if (pos->frequency > max_freq)
> +                       max_freq = pos->frequency;
> +       return max_freq;
> +}
> +
>  static unsigned int find_cluster_maxfreq(int cluster)
>  {
>         int j;
> @@ -170,9 +192,14 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
>
>                 mutex_lock(&cluster_lock[old_cluster]);
>
> -               /* Set freq of old cluster if there are cpus left on it */
> +               /*
> +                * Set freq of old cluster. If there are no cpus left on it,
> +                * set the lowest possible frequency for that cluster.
> +                */
>                 new_rate = find_cluster_maxfreq(old_cluster);
>                 new_rate = ACTUAL_FREQ(old_cluster, new_rate);
> +               if (!new_rate)
> +                       new_rate = get_table_min(freq_table[old_cluster]);

Have you hit this code? How did you test it?

In case you have hit this code, you must be using IKS as in MP it
doesn't make sense.

Two things:
- First in case this patch gets in, you can check !new_rate just after
  find_cluster_maxfreq() as it will be zero there as well..

- The counter argument given to me from Sudeep at that time was,
  "it is just a waste of time". Because if we aren't using the cluster
  anymore, it will be switched off very soon and there is no point
  wasting time changing freq to lowest..

  And I somewhat agreed to it then..

Cc'ing Nico as he can throw some light from MCPM's perspective
here.

--
viresh
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sudeep Holla June 26, 2014, 3:57 p.m. | #2
On 26/06/14 16:38, Viresh Kumar wrote:
> On 26 June 2014 20:38, Thomas Abraham <thomas.ab@samsung.com> wrote:
>> From: Thomas Abraham <thomas.ab@samsung.com>
>>
>> If there are no active cpus in a cluster, the clock frequency of the cluster
>> can be lowered to the lowest possible frequency for that cluster. This can
>> help reduce the output clock speed of the PLL clocking the cluster and
>> save power.
>>
>> The get_table_min() function is also moved with this change to avoid forward
>> declaration. The function get_table_max() is also moved along with
>> get_table_min() so that these two functions are adjacent the code.
>
> Makes sense as my poor mind also thought this over an year and
> half back.
>
>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>> ---
>>   drivers/cpufreq/arm_big_little.c |   51 +++++++++++++++++++++-----------------
>>   1 file changed, 28 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
>> index 1f4d4e3..4b1431f 100644
>> --- a/drivers/cpufreq/arm_big_little.c
>> +++ b/drivers/cpufreq/arm_big_little.c
>> @@ -75,6 +75,28 @@ static inline int cpu_to_cluster(int cpu)
>>                  MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
>>   }
>>
>> +/* get the minimum frequency in the cpufreq_frequency_table */
>> +static inline u32 get_table_min(struct cpufreq_frequency_table *table)
>> +{
>> +       struct cpufreq_frequency_table *pos;
>> +       uint32_t min_freq = ~0;
>> +       cpufreq_for_each_entry(pos, table)
>> +               if (pos->frequency < min_freq)
>> +                       min_freq = pos->frequency;
>> +       return min_freq;
>> +}
>> +
>> +/* get the maximum frequency in the cpufreq_frequency_table */
>> +static inline u32 get_table_max(struct cpufreq_frequency_table *table)
>> +{
>> +       struct cpufreq_frequency_table *pos;
>> +       uint32_t max_freq = 0;
>> +       cpufreq_for_each_entry(pos, table)
>> +               if (pos->frequency > max_freq)
>> +                       max_freq = pos->frequency;
>> +       return max_freq;
>> +}
>> +
>>   static unsigned int find_cluster_maxfreq(int cluster)
>>   {
>>          int j;
>> @@ -170,9 +192,14 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
>>
>>                  mutex_lock(&cluster_lock[old_cluster]);
>>
>> -               /* Set freq of old cluster if there are cpus left on it */
>> +               /*
>> +                * Set freq of old cluster. If there are no cpus left on it,
>> +                * set the lowest possible frequency for that cluster.
>> +                */
>>                  new_rate = find_cluster_maxfreq(old_cluster);
>>                  new_rate = ACTUAL_FREQ(old_cluster, new_rate);
>> +               if (!new_rate)
>> +                       new_rate = get_table_min(freq_table[old_cluster]);
>
> Have you hit this code? How did you test it?
>
> In case you have hit this code, you must be using IKS as in MP it
> doesn't make sense.
>
> Two things:
> - First in case this patch gets in, you can check !new_rate just after
>    find_cluster_maxfreq() as it will be zero there as well..
>
> - The counter argument given to me from Sudeep at that time was,
>    "it is just a waste of time". Because if we aren't using the cluster
>    anymore, it will be switched off very soon and there is no point
>    wasting time changing freq to lowest..
>

I was just writing the same thing again, you saved my time :)
Since in IKS the cluster will be powered down if no CPU is active, I see no
point in doing this. bL_switch_request would initiate cluster powerdown and
trying to reset the clk to lowest might unnecessarily wake it up(though
the exact behaviour depends on the firmware/code managing it)

Regards,
Sudeep

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Abraham June 26, 2014, 6:25 p.m. | #3
On Thu, Jun 26, 2014 at 9:27 PM, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
>
> On 26/06/14 16:38, Viresh Kumar wrote:
>>
>> On 26 June 2014 20:38, Thomas Abraham <thomas.ab@samsung.com> wrote:
>>>
>>> From: Thomas Abraham <thomas.ab@samsung.com>
>>>
>>> If there are no active cpus in a cluster, the clock frequency of the
>>> cluster
>>> can be lowered to the lowest possible frequency for that cluster. This
>>> can
>>> help reduce the output clock speed of the PLL clocking the cluster and
>>> save power.
>>>
>>> The get_table_min() function is also moved with this change to avoid
>>> forward
>>> declaration. The function get_table_max() is also moved along with
>>> get_table_min() so that these two functions are adjacent the code.
>>
>>
>> Makes sense as my poor mind also thought this over an year and
>> half back.
>>
>>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>>> ---
>>>   drivers/cpufreq/arm_big_little.c |   51
>>> +++++++++++++++++++++-----------------
>>>   1 file changed, 28 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/drivers/cpufreq/arm_big_little.c
>>> b/drivers/cpufreq/arm_big_little.c
>>> index 1f4d4e3..4b1431f 100644
>>> --- a/drivers/cpufreq/arm_big_little.c
>>> +++ b/drivers/cpufreq/arm_big_little.c
>>> @@ -75,6 +75,28 @@ static inline int cpu_to_cluster(int cpu)
>>>                  MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
>>>   }
>>>
>>> +/* get the minimum frequency in the cpufreq_frequency_table */
>>> +static inline u32 get_table_min(struct cpufreq_frequency_table *table)
>>> +{
>>> +       struct cpufreq_frequency_table *pos;
>>> +       uint32_t min_freq = ~0;
>>> +       cpufreq_for_each_entry(pos, table)
>>> +               if (pos->frequency < min_freq)
>>> +                       min_freq = pos->frequency;
>>> +       return min_freq;
>>> +}
>>> +
>>> +/* get the maximum frequency in the cpufreq_frequency_table */
>>> +static inline u32 get_table_max(struct cpufreq_frequency_table *table)
>>> +{
>>> +       struct cpufreq_frequency_table *pos;
>>> +       uint32_t max_freq = 0;
>>> +       cpufreq_for_each_entry(pos, table)
>>> +               if (pos->frequency > max_freq)
>>> +                       max_freq = pos->frequency;
>>> +       return max_freq;
>>> +}
>>> +
>>>   static unsigned int find_cluster_maxfreq(int cluster)
>>>   {
>>>          int j;
>>> @@ -170,9 +192,14 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32
>>> new_cluster, u32 rate)
>>>
>>>                  mutex_lock(&cluster_lock[old_cluster]);
>>>
>>> -               /* Set freq of old cluster if there are cpus left on it
>>> */
>>> +               /*
>>> +                * Set freq of old cluster. If there are no cpus left on
>>> it,
>>> +                * set the lowest possible frequency for that cluster.
>>> +                */
>>>                  new_rate = find_cluster_maxfreq(old_cluster);
>>>                  new_rate = ACTUAL_FREQ(old_cluster, new_rate);
>>> +               if (!new_rate)
>>> +                       new_rate =
>>> get_table_min(freq_table[old_cluster]);
>>
>>
>> Have you hit this code? How did you test it?
>>
>> In case you have hit this code, you must be using IKS as in MP it
>> doesn't make sense.

Yes, I have hit this code with IKS.

>>
>> Two things:
>> - First in case this patch gets in, you can check !new_rate just after
>>    find_cluster_maxfreq() as it will be zero there as well..

Ok.

>>
>> - The counter argument given to me from Sudeep at that time was,
>>    "it is just a waste of time". Because if we aren't using the cluster
>>    anymore, it will be switched off very soon and there is no point
>>    wasting time changing freq to lowest..
>>
>
> I was just writing the same thing again, you saved my time :)
> Since in IKS the cluster will be powered down if no CPU is active, I see no
> point in doing this. bL_switch_request would initiate cluster powerdown and

The CPU clock domain can contain additional components than just the
mutli-core cluster. Those continue to be alive and clocked even when
the cluster is power gated. This results in additional power
consumption, though may not be high enough, but can be reduced by
lowering the clock speed.

> trying to reset the clk to lowest might unnecessarily wake it up(though
> the exact behaviour depends on the firmware/code managing it)

Any particular example of a platform that has this problem, just to
understand what could cause such a behavior and if is mostly a
software or a hardware induced problem.

Thanks,
Thomas.

>
> Regards,
> Sudeep
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-pm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Sudeep Holla June 26, 2014, 6:57 p.m. | #4
On 26/06/14 19:25, Thomas Abraham wrote:
> On Thu, Jun 26, 2014 at 9:27 PM, Sudeep Holla <sudeep.holla@arm.com> wrote:
>>
>>
>> On 26/06/14 16:38, Viresh Kumar wrote:
>>>
>>> On 26 June 2014 20:38, Thomas Abraham <thomas.ab@samsung.com> wrote:
>>>>
>>>> From: Thomas Abraham <thomas.ab@samsung.com>
>>>>
>>>> If there are no active cpus in a cluster, the clock frequency of the
>>>> cluster
>>>> can be lowered to the lowest possible frequency for that cluster. This
>>>> can
>>>> help reduce the output clock speed of the PLL clocking the cluster and
>>>> save power.
>>>>
>>>> The get_table_min() function is also moved with this change to avoid
>>>> forward
>>>> declaration. The function get_table_max() is also moved along with
>>>> get_table_min() so that these two functions are adjacent the code.
>>>
>>>
>>> Makes sense as my poor mind also thought this over an year and
>>> half back.
>>>
>>>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>>>> ---
>>>>    drivers/cpufreq/arm_big_little.c |   51
>>>> +++++++++++++++++++++-----------------
>>>>    1 file changed, 28 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/drivers/cpufreq/arm_big_little.c
>>>> b/drivers/cpufreq/arm_big_little.c
>>>> index 1f4d4e3..4b1431f 100644
>>>> --- a/drivers/cpufreq/arm_big_little.c
>>>> +++ b/drivers/cpufreq/arm_big_little.c
>>>> @@ -75,6 +75,28 @@ static inline int cpu_to_cluster(int cpu)
>>>>                   MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
>>>>    }
>>>>
>>>> +/* get the minimum frequency in the cpufreq_frequency_table */
>>>> +static inline u32 get_table_min(struct cpufreq_frequency_table *table)
>>>> +{
>>>> +       struct cpufreq_frequency_table *pos;
>>>> +       uint32_t min_freq = ~0;
>>>> +       cpufreq_for_each_entry(pos, table)
>>>> +               if (pos->frequency < min_freq)
>>>> +                       min_freq = pos->frequency;
>>>> +       return min_freq;
>>>> +}
>>>> +
>>>> +/* get the maximum frequency in the cpufreq_frequency_table */
>>>> +static inline u32 get_table_max(struct cpufreq_frequency_table *table)
>>>> +{
>>>> +       struct cpufreq_frequency_table *pos;
>>>> +       uint32_t max_freq = 0;
>>>> +       cpufreq_for_each_entry(pos, table)
>>>> +               if (pos->frequency > max_freq)
>>>> +                       max_freq = pos->frequency;
>>>> +       return max_freq;
>>>> +}
>>>> +
>>>>    static unsigned int find_cluster_maxfreq(int cluster)
>>>>    {
>>>>           int j;
>>>> @@ -170,9 +192,14 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32
>>>> new_cluster, u32 rate)
>>>>
>>>>                   mutex_lock(&cluster_lock[old_cluster]);
>>>>
>>>> -               /* Set freq of old cluster if there are cpus left on it
>>>> */
>>>> +               /*
>>>> +                * Set freq of old cluster. If there are no cpus left on
>>>> it,
>>>> +                * set the lowest possible frequency for that cluster.
>>>> +                */
>>>>                   new_rate = find_cluster_maxfreq(old_cluster);
>>>>                   new_rate = ACTUAL_FREQ(old_cluster, new_rate);
>>>> +               if (!new_rate)
>>>> +                       new_rate =
>>>> get_table_min(freq_table[old_cluster]);
>>>
>>>
>>> Have you hit this code? How did you test it?
>>>
>>> In case you have hit this code, you must be using IKS as in MP it
>>> doesn't make sense.
>
> Yes, I have hit this code with IKS.
>
>>>
>>> Two things:
>>> - First in case this patch gets in, you can check !new_rate just after
>>>     find_cluster_maxfreq() as it will be zero there as well..
>
> Ok.
>
>>>
>>> - The counter argument given to me from Sudeep at that time was,
>>>     "it is just a waste of time". Because if we aren't using the cluster
>>>     anymore, it will be switched off very soon and there is no point
>>>     wasting time changing freq to lowest..
>>>
>>
>> I was just writing the same thing again, you saved my time :)
>> Since in IKS the cluster will be powered down if no CPU is active, I see no
>> point in doing this. bL_switch_request would initiate cluster powerdown and
>
> The CPU clock domain can contain additional components than just the
> mutli-core cluster. Those continue to be alive and clocked even when
> the cluster is power gated. This results in additional power
> consumption, though may not be high enough, but can be reduced by
> lowering the clock speed.
>

I am not really sure if that's the case. Consider a single cluster system,
how would you handle that ? Do you set the clock to lowest every time you
enter deepest C-state.

Alternatively in a multi-cluster platform if I hotplug out all the cpus in one
cluster, do you mean we still have to explicitly set the frequency to lowest
before the last cpu goes down. This needs to be handled implicitly IMO, but
they may be some restrictions on some platforms which I am not aware of.

It would be good to know why exactly you need that.

>> trying to reset the clk to lowest might unnecessarily wake it up(though
>> the exact behaviour depends on the firmware/code managing it)
>
> Any particular example of a platform that has this problem, just to
> understand what could cause such a behavior and if is mostly a
> software or a hardware induced problem.
>

No I don't know any particular platform.

--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Abraham June 27, 2014, 6:11 a.m. | #5
On Fri, Jun 27, 2014 at 12:27 AM, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
>
> On 26/06/14 19:25, Thomas Abraham wrote:
>>
>> On Thu, Jun 26, 2014 at 9:27 PM, Sudeep Holla <sudeep.holla@arm.com>
>> wrote:
>>>
>>>
>>>
>>> On 26/06/14 16:38, Viresh Kumar wrote:
>>>>
>>>>
>>>> On 26 June 2014 20:38, Thomas Abraham <thomas.ab@samsung.com> wrote:
>>>>>
>>>>>
>>>>> From: Thomas Abraham <thomas.ab@samsung.com>
>>>>>
>>>>> If there are no active cpus in a cluster, the clock frequency of the
>>>>> cluster
>>>>> can be lowered to the lowest possible frequency for that cluster. This
>>>>> can
>>>>> help reduce the output clock speed of the PLL clocking the cluster and
>>>>> save power.
>>>>>
>>>>> The get_table_min() function is also moved with this change to avoid
>>>>> forward
>>>>> declaration. The function get_table_max() is also moved along with
>>>>> get_table_min() so that these two functions are adjacent the code.
>>>>
>>>>
>>>>
>>>> Makes sense as my poor mind also thought this over an year and
>>>> half back.
>>>>
>>>>> Signed-off-by: Thomas Abraham <thomas.ab@samsung.com>
>>>>> ---
>>>>>    drivers/cpufreq/arm_big_little.c |   51
>>>>> +++++++++++++++++++++-----------------
>>>>>    1 file changed, 28 insertions(+), 23 deletions(-)
>>>>>
>>>>> diff --git a/drivers/cpufreq/arm_big_little.c
>>>>> b/drivers/cpufreq/arm_big_little.c
>>>>> index 1f4d4e3..4b1431f 100644
>>>>> --- a/drivers/cpufreq/arm_big_little.c
>>>>> +++ b/drivers/cpufreq/arm_big_little.c
>>>>> @@ -75,6 +75,28 @@ static inline int cpu_to_cluster(int cpu)
>>>>>                   MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
>>>>>    }
>>>>>
>>>>> +/* get the minimum frequency in the cpufreq_frequency_table */
>>>>> +static inline u32 get_table_min(struct cpufreq_frequency_table *table)
>>>>> +{
>>>>> +       struct cpufreq_frequency_table *pos;
>>>>> +       uint32_t min_freq = ~0;
>>>>> +       cpufreq_for_each_entry(pos, table)
>>>>> +               if (pos->frequency < min_freq)
>>>>> +                       min_freq = pos->frequency;
>>>>> +       return min_freq;
>>>>> +}
>>>>> +
>>>>> +/* get the maximum frequency in the cpufreq_frequency_table */
>>>>> +static inline u32 get_table_max(struct cpufreq_frequency_table *table)
>>>>> +{
>>>>> +       struct cpufreq_frequency_table *pos;
>>>>> +       uint32_t max_freq = 0;
>>>>> +       cpufreq_for_each_entry(pos, table)
>>>>> +               if (pos->frequency > max_freq)
>>>>> +                       max_freq = pos->frequency;
>>>>> +       return max_freq;
>>>>> +}
>>>>> +
>>>>>    static unsigned int find_cluster_maxfreq(int cluster)
>>>>>    {
>>>>>           int j;
>>>>> @@ -170,9 +192,14 @@ bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32
>>>>> new_cluster, u32 rate)
>>>>>
>>>>>                   mutex_lock(&cluster_lock[old_cluster]);
>>>>>
>>>>> -               /* Set freq of old cluster if there are cpus left on it
>>>>> */
>>>>> +               /*
>>>>> +                * Set freq of old cluster. If there are no cpus left
>>>>> on
>>>>> it,
>>>>> +                * set the lowest possible frequency for that cluster.
>>>>> +                */
>>>>>                   new_rate = find_cluster_maxfreq(old_cluster);
>>>>>                   new_rate = ACTUAL_FREQ(old_cluster, new_rate);
>>>>> +               if (!new_rate)
>>>>> +                       new_rate =
>>>>> get_table_min(freq_table[old_cluster]);
>>>>
>>>>
>>>>
>>>> Have you hit this code? How did you test it?
>>>>
>>>> In case you have hit this code, you must be using IKS as in MP it
>>>> doesn't make sense.
>>
>>
>> Yes, I have hit this code with IKS.
>>
>>>>
>>>> Two things:
>>>> - First in case this patch gets in, you can check !new_rate just after
>>>>     find_cluster_maxfreq() as it will be zero there as well..
>>
>>
>> Ok.
>>
>>>>
>>>> - The counter argument given to me from Sudeep at that time was,
>>>>     "it is just a waste of time". Because if we aren't using the cluster
>>>>     anymore, it will be switched off very soon and there is no point
>>>>     wasting time changing freq to lowest..
>>>>
>>>
>>> I was just writing the same thing again, you saved my time :)
>>> Since in IKS the cluster will be powered down if no CPU is active, I see
>>> no
>>> point in doing this. bL_switch_request would initiate cluster powerdown
>>> and
>>
>>
>> The CPU clock domain can contain additional components than just the
>> mutli-core cluster. Those continue to be alive and clocked even when
>> the cluster is power gated. This results in additional power
>> consumption, though may not be high enough, but can be reduced by
>> lowering the clock speed.
>>
>
> I am not really sure if that's the case. Consider a single cluster system,
> how would you handle that ? Do you set the clock to lowest every time you
> enter deepest C-state.

In that case we need not do this. And this patch is not meant for such a case.

>
> Alternatively in a multi-cluster platform if I hotplug out all the cpus in
> one
> cluster, do you mean we still have to explicitly set the frequency to lowest
> before the last cpu goes down.

Yes, but it would "after" the last cpu goes down. As mentioned in my
previous reply, there is an opportunity to save power by doing this.

> This needs to be handled implicitly IMO, but
> they may be some restrictions on some platforms which I am not aware of.

This change is useful on Exynos based platforms and not intrusive
either. So if we can get this on linux-next and gets test coverage on
other platforms until maybe -rc7, we can be reasonably sure that other
platforms are not affected by this. I see no reason in holding this
back just based on assumptions about other platforms.

>
> It would be good to know why exactly you need that.

It is to avoid power wastage by clocking the components in the CPU
clock domain at a higher speed when the cluster in that CPU clock
domain is switched off. I did mention this reason in my previous
reply.

>
>
>>> trying to reset the clk to lowest might unnecessarily wake it up(though
>>> the exact behaviour depends on the firmware/code managing it)
>>
>>
>> Any particular example of a platform that has this problem, just to
>> understand what could cause such a behavior and if is mostly a
>> software or a hardware induced problem.
>>
>
> No I don't know any particular platform.

Ok.

>

Thanks,
Thomas.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar June 27, 2014, 6:48 a.m. | #6
On 27 June 2014 11:41, Thomas Abraham <ta.omasab@gmail.com> wrote:
> On Fri, Jun 27, 2014 at 12:27 AM, Sudeep Holla <sudeep.holla@arm.com> wrote:

>> Alternatively in a multi-cluster platform if I hotplug out all the cpus in
>> one
>> cluster, do you mean we still have to explicitly set the frequency to lowest
>> before the last cpu goes down.
>
> Yes, but it would "after" the last cpu goes down. As mentioned in my

How will you change freq *after* last cpu goes down? This code
always initiate freq-switch from last-cpu.

Also, we must keep in mind that switching to lowest freq would make
it take more time to execute the cluster shutdown code..

> previous reply, there is an opportunity to save power by doing this.
>
>> This needs to be handled implicitly IMO, but
>> they may be some restrictions on some platforms which I am not aware of.
>
> This change is useful on Exynos based platforms and not intrusive
> either. So if we can get this on linux-next and gets test coverage on
> other platforms until maybe -rc7, we can be reasonably sure that other
> platforms are not affected by this. I see no reason in holding this
> back just based on assumptions about other platforms.

I don't think getting this to linux-next would make a difference. There
aren't many big LITTLE platforms, even lesser would be using linux-next
and very very few would be using IKS :)

So, we have to decide whatever we want here only.

@Nicolas: Please comment here, we need your suggestions :)

As I told you before, I wrote something similar long back and had the
same mindset as you have now. Yes, it *might* be useful but we
wanna be sure about it. That's why all these discussions. So, just
hold on :)

I am not asking you to reveal internals of Exynos octa, but what exactly
is using this clock when the cluster goes down? L2 caches might stay on
for a while if they have something useful, don't know exactly how the
CPU clock would be affecting that..

This is what my understanding is and it holds true for the cpuidle case
sudeep mentioned:
- If cluster is going to shutdown soon, there is no point changing clk rate.
- If cluster isn't going down for a complete shutdown, yes we can save
some power by playing with clk rate. And the L2 cache example above
fits here.

@Daniel: Are there some idle states for clusters where we aren't switching
it off completely, but all CPUs are idle? And will switching to lowest freq
sensible there ?

--
viresh
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Abraham June 27, 2014, 8:44 a.m. | #7
On Fri, Jun 27, 2014 at 12:18 PM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 27 June 2014 11:41, Thomas Abraham <ta.omasab@gmail.com> wrote:
>> On Fri, Jun 27, 2014 at 12:27 AM, Sudeep Holla <sudeep.holla@arm.com> wrote:
>
>>> Alternatively in a multi-cluster platform if I hotplug out all the cpus in
>>> one
>>> cluster, do you mean we still have to explicitly set the frequency to lowest
>>> before the last cpu goes down.
>>
>> Yes, but it would "after" the last cpu goes down. As mentioned in my
>
> How will you change freq *after* last cpu goes down? This code
> always initiate freq-switch from last-cpu.

meaning, after the 'bL_switch_request' call for the last cpu in the cluster.

>
> Also, we must keep in mind that switching to lowest freq would make
> it take more time to execute the cluster shutdown code..

The inbound CPU is probably on this way to execute useful code while
the outbound CPU does the cluster down and it is the frequency of the
outbound cluster that is being changed here.

>
>> previous reply, there is an opportunity to save power by doing this.
>>
>>> This needs to be handled implicitly IMO, but
>>> they may be some restrictions on some platforms which I am not aware of.
>>
>> This change is useful on Exynos based platforms and not intrusive
>> either. So if we can get this on linux-next and gets test coverage on
>> other platforms until maybe -rc7, we can be reasonably sure that other
>> platforms are not affected by this. I see no reason in holding this
>> back just based on assumptions about other platforms.
>
> I don't think getting this to linux-next would make a difference. There
> aren't many big LITTLE platforms, even lesser would be using linux-next
> and very very few would be using IKS :)
>
> So, we have to decide whatever we want here only.

Ok.

>
> @Nicolas: Please comment here, we need your suggestions :)
>
> As I told you before, I wrote something similar long back and had the
> same mindset as you have now. Yes, it *might* be useful but we
> wanna be sure about it. That's why all these discussions. So, just
> hold on :)

The reason for sending this patch was because of the power saving
observed on Chromebook platform. So this patch is not based on an
assumption.

>
> I am not asking you to reveal internals of Exynos octa, but what exactly
> is using this clock when the cluster goes down? L2 caches might stay on
> for a while if they have something useful, don't know exactly how the
> CPU clock would be affecting that..

There are few blocks running of the CPU clock (internal buses,
Coresight, JTAG, AXI interface clock). Also, it has been observed that
a lower ARM clock reduces the power consumed on the voltage rail for
the peripherals block.

>
> This is what my understanding is and it holds true for the cpuidle case
> sudeep mentioned:
> - If cluster is going to shutdown soon, there is no point changing clk rate.
> - If cluster isn't going down for a complete shutdown, yes we can save
> some power by playing with clk rate. And the L2 cache example above
> fits here.
>
> @Daniel: Are there some idle states for clusters where we aren't switching
> it off completely, but all CPUs are idle? And will switching to lowest freq
> sensible there ?

Maybe I misunderstand here, but why is this patch being linked with CPU idle?

Thanks,
Thomas.

>
> --
> viresh
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar June 27, 2014, 8:57 a.m. | #8
On 27 June 2014 14:14, Thomas Abraham <ta.omasab@gmail.com> wrote:
> On Fri, Jun 27, 2014 at 12:18 PM, Viresh Kumar <viresh.kumar@linaro.org> wrote:

>> Also, we must keep in mind that switching to lowest freq would make
>> it take more time to execute the cluster shutdown code..
>
> The inbound CPU is probably on this way to execute useful code while
> the outbound CPU does the cluster down and it is the frequency of the
> outbound cluster that is being changed here.

I didn't get you here. For the cluster going down, there is no Inbound
CPU. And the last outbound CPU of dying cluster will shut it down and
I was talking about the code it has to execute to get it down.. It will
take more time to get it down now..

> The reason for sending this patch was because of the power saving
> observed on Chromebook platform. So this patch is not based on an
> assumption.

Always ALWAYS mention such details in logs. It helps people understand
you really need it. Giving values would be the best though.. Or atleast
mention % improvement with a commit.

So, how much you saved here?

> There are few blocks running of the CPU clock (internal buses,
> Coresight, JTAG, AXI interface clock).

Don't they go down with the cluster? Or we aren't performing a full
cluster down?

> Also, it has been observed that
> a lower ARM clock reduces the power consumed on the voltage rail for
> the peripherals block.

Peripherals internal to cluster? Like arch-timer?

> Maybe I misunderstand here, but why is this patch being linked with CPU idle?

If this patch saves us some energy for IKS, then we can use something
similar for MP as well by reducing frequency on cluster cpu-idle ..
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Abraham June 27, 2014, 10:07 a.m. | #9
On Fri, Jun 27, 2014 at 2:27 PM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 27 June 2014 14:14, Thomas Abraham <ta.omasab@gmail.com> wrote:
>> On Fri, Jun 27, 2014 at 12:18 PM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
>>> Also, we must keep in mind that switching to lowest freq would make
>>> it take more time to execute the cluster shutdown code..
>>
>> The inbound CPU is probably on this way to execute useful code while
>> the outbound CPU does the cluster down and it is the frequency of the
>> outbound cluster that is being changed here.
>
> I didn't get you here. For the cluster going down, therclock e is no Inbound
> CPU. And the last outbound CPU of dying cluster will shut it down and
> I was talking about the code it has to execute to get it down.. It will
> take more time to get it down now..

I was trying to emphasis on the point that we are reducing the
frequency of the outbound cluster. The inbound CPU (pair of the last
man CPU in the outbound cluster) would be executing useful code while
the outbound last man CPU is executing the cluster down sequence. Does
it hurt if the cluster down sequence is executed a bit slower (and if
yes, then why not set the max speed of the cluster for cluster down).

I haven't checked in detail but won't the cluster down be completed
most of the times before return from bL_switch_request()?

>
>> The reason for sending this patch was because of the power saving
>> observed on Chromebook platform. So this patch is not based on an
>> assumption.
>
> Always ALWAYS mention such details in logs. It helps people understand
> you really need it. Giving values would be the best though.. Or atleast
> mention % improvement with a commit.
>
> So, how much you saved here?

Depending on the frequency of the outbound cluster, the savings was
about 5~10mA. It is close to 25mA in case the PLL driving the CPU is
also turned off.

>
>> There are few blocks running of the CPU clock (internal buses,
>> Coresight, JTAG, AXI interface clock).
>
> Don't they go down with the cluster? Or we aren't performing a full
> cluster down?

We are doing a full cluster down. I am not sure which of these
continue to consume power in case of full cluster down.

>
>> Also, it has been observed that
>> a lower ARM clock reduces the power consumed on the voltage rail for
>> the peripherals block.
>
> Peripherals internal to cluster? Like arch-timer?

No, the voltage rail on which the SoC peripherals such as
connectivity, media, storage IP's are connected.

>
>> Maybe I misunderstand here, but why is this patch being linked with CPU idle?
>
> If this patch saves us some energy for IKS, then we can use something
> similar for MP as well by reducing frequency on cluster cpu-idle ..

Ok. The reason I asked was because this is being done in
bL_cpufreq_set_rate() function.

Thanks,
Thomas.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar June 27, 2014, 10:16 a.m. | #10
On 27 June 2014 15:37, Thomas Abraham <ta.omasab@gmail.com> wrote:
> I was trying to emphasis on the point that we are reducing the
> frequency of the outbound cluster. The inbound CPU (pair of the last
> man CPU in the outbound cluster) would be executing useful code while
> the outbound last man CPU is executing the cluster down sequence. Does
> it hurt if the cluster down sequence is executed a bit slower (and if
> yes, then why not set the max speed of the cluster for cluster down).

Okay, I wasn't worried about performance but power here.

What I meant was that, because it *might* take more time to
shutdown the cluster it will be on for longer. And so might end up
consuming more power with this.

> I haven't checked in detail but won't the cluster down be completed
> most of the times before return from bL_switch_request()?

No idea :)

> Depending on the frequency of the outbound cluster, the savings was
> about 5~10mA. It is close to 25mA in case the PLL driving the CPU is
> also turned off.

Shouldn't we talk about Energy here instead of current?

>>> There are few blocks running of the CPU clock (internal buses,
>>> Coresight, JTAG, AXI interface clock).
>>
>> Don't they go down with the cluster? Or we aren't performing a full
>> cluster down?
>
> We are doing a full cluster down. I am not sure which of these
> continue to consume power in case of full cluster down.

None should.

>>> Also, it has been observed that
>>> a lower ARM clock reduces the power consumed on the voltage rail for
>>> the peripherals block.
>>
>> Peripherals internal to cluster? Like arch-timer?
>
> No, the voltage rail on which the SoC peripherals such as
> connectivity, media, storage IP's are connected.

What do they have to do with cluster frequency ?
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Abraham June 27, 2014, 10:53 a.m. | #11
On Fri, Jun 27, 2014 at 3:46 PM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 27 June 2014 15:37, Thomas Abraham <ta.omasab@gmail.com> wrote:
>> I was trying to emphasis on the point that we are reducing the
>> frequency of the outbound cluster. The inbound CPU (pair of the last
>> man CPU in the outbound cluster) would be executing useful code while
>> the outbound last man CPU is executing the cluster down sequence. Does
>> it hurt if the cluster down sequence is executed a bit slower (and if
>> yes, then why not set the max speed of the cluster for cluster down).
>
> Okay, I wasn't worried about performance but power here.
>
> What I meant was that, because it *might* take more time to
> shutdown the cluster it will be on for longer. And so might end up
> consuming more power with this.

Ok. It is a comparison with the amount of power saved during the
duration in which cluster remains inactive and clocked at a lower
speed.

>
>> I haven't checked in detail but won't the cluster down be completed
>> most of the times before return from bL_switch_request()?
>
> No idea :)
>
>> Depending on the frequency of the outbound cluster, the savings was
>> about 5~10mA. It is close to 25mA in case the PLL driving the CPU is
>> also turned off.
>
> Shouldn't we talk about Energy here instead of current?

Sorry, it is mW units.

>
>>>> There are few blocks running of the CPU clock (internal buses,
>>>> Coresight, JTAG, AXI interface clock).
>>>
>>> Don't they go down with the cluster? Or we aren't performing a full
>>> cluster down?
>>
>> We are doing a full cluster down. I am not sure which of these
>> continue to consume power in case of full cluster down.
>
> None should.
>
>>>> Also, it has been observed that
>>>> a lower ARM clock reduces the power consumed on the voltage rail for
>>>> the peripherals block.
>>>
>>> Peripherals internal to cluster? Like arch-timer?
>>
>> No, the voltage rail on which the SoC peripherals such as
>> connectivity, media, storage IP's are connected.
>
> What do they have to do with cluster frequency ?

I am not sure but maybe maintaining sync between two buses with a
large clock frequency difference between them. Just a guess, but there
does seem to be a relation with the cluster frequency.

Thanks,
Thomas.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas Pitre June 28, 2014, 2:12 a.m. | #12
On Fri, 27 Jun 2014, Viresh Kumar wrote:

> On 27 June 2014 11:41, Thomas Abraham <ta.omasab@gmail.com> wrote:
> > On Fri, Jun 27, 2014 at 12:27 AM, Sudeep Holla <sudeep.holla@arm.com> wrote:
> 
> >> Alternatively in a multi-cluster platform if I hotplug out all the cpus in
> >> one
> >> cluster, do you mean we still have to explicitly set the frequency to lowest
> >> before the last cpu goes down.
> >
> > Yes, but it would "after" the last cpu goes down. As mentioned in my
> 
> How will you change freq *after* last cpu goes down? This code
> always initiate freq-switch from last-cpu.
> 
> Also, we must keep in mind that switching to lowest freq would make
> it take more time to execute the cluster shutdown code..
> 
> > previous reply, there is an opportunity to save power by doing this.
> >
> >> This needs to be handled implicitly IMO, but
> >> they may be some restrictions on some platforms which I am not aware of.
> >
> > This change is useful on Exynos based platforms and not intrusive
> > either. So if we can get this on linux-next and gets test coverage on
> > other platforms until maybe -rc7, we can be reasonably sure that other
> > platforms are not affected by this. I see no reason in holding this
> > back just based on assumptions about other platforms.
> 
> I don't think getting this to linux-next would make a difference. There
> aren't many big LITTLE platforms, even lesser would be using linux-next
> and very very few would be using IKS :)
> 
> So, we have to decide whatever we want here only.
> 
> @Nicolas: Please comment here, we need your suggestions :)

OK...

If reducing the frequency of the cluster going down saves power, then it 
should be done _all_ the time i.e. when using CPU hotplug, when cpuidle 
sends all CPUs to sleep, etc.  So this has nothing to do with the 
switcher even if the switcher would also benefit from this.

In other words, this should be performed by the MCPM backend as part of 
the last man operations. This way it is generic and completely 
transparent to the rest of the system.

To restore the CPU frequency that was effective before going down, you 
may use the MCPM powered_up method.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar June 30, 2014, 4:52 a.m. | #13
On 28 June 2014 07:42, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> If reducing the frequency of the cluster going down saves power, then it
> should be done _all_ the time i.e. when using CPU hotplug, when cpuidle
> sends all CPUs to sleep, etc.  So this has nothing to do with the
> switcher even if the switcher would also benefit from this.
>
> In other words, this should be performed by the MCPM backend as part of
> the last man operations. This way it is generic and completely
> transparent to the rest of the system.
>
> To restore the CPU frequency that was effective before going down, you
> may use the MCPM powered_up method.

Oh yes, this is exactly what we want then..

But do we want this forced for every platform? Or can we have some flag
set by multi-cluster platforms, so that it stays optional ?
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Abraham June 30, 2014, 8:32 a.m. | #14
On Mon, Jun 30, 2014 at 10:22 AM, Viresh Kumar <viresh.kumar@linaro.org> wrote:
> On 28 June 2014 07:42, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
>> If reducing the frequency of the cluster going down saves power, then it
>> should be done _all_ the time i.e. when using CPU hotplug, when cpuidle
>> sends all CPUs to sleep, etc.  So this has nothing to do with the
>> switcher even if the switcher would also benefit from this.
>>
>> In other words, this should be performed by the MCPM backend as part of
>> the last man operations. This way it is generic and completely
>> transparent to the rest of the system.
>>
>> To restore the CPU frequency that was effective before going down, you
>> may use the MCPM powered_up method.
>
> Oh yes, this is exactly what we want then..
>
> But do we want this forced for every platform? Or can we have some flag
> set by multi-cluster platforms, so that it stays optional ?

Thanks Nicolas and Viresh for your feedback. Probably, this can be
handled in platform specific mcpm backend driver.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Viresh Kumar June 30, 2014, 8:33 a.m. | #15
On 30 June 2014 14:02, Thomas Abraham <ta.omasab@gmail.com> wrote:
> Thanks Nicolas and Viresh for your feedback. Probably, this can be
> handled in platform specific mcpm backend driver.

This can be a generic requirement and so doing it inside MCPM core
would be better IMO.

But the control should be there with backend driver.
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas Pitre July 1, 2014, 2:14 a.m. | #16
On Mon, 30 Jun 2014, Viresh Kumar wrote:

> On 28 June 2014 07:42, Nicolas Pitre <nicolas.pitre@linaro.org> wrote:
> > If reducing the frequency of the cluster going down saves power, then it
> > should be done _all_ the time i.e. when using CPU hotplug, when cpuidle
> > sends all CPUs to sleep, etc.  So this has nothing to do with the
> > switcher even if the switcher would also benefit from this.
> >
> > In other words, this should be performed by the MCPM backend as part of
> > the last man operations. This way it is generic and completely
> > transparent to the rest of the system.
> >
> > To restore the CPU frequency that was effective before going down, you
> > may use the MCPM powered_up method.
> 
> Oh yes, this is exactly what we want then..
> 
> But do we want this forced for every platform? Or can we have some flag
> set by multi-cluster platforms, so that it stays optional ?

No flags.  That's a "power saving measure" specific to that platform 
being invoked from a platform specific backend.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas Pitre July 1, 2014, 3:01 a.m. | #17
On Mon, 30 Jun 2014, Viresh Kumar wrote:

> On 30 June 2014 14:02, Thomas Abraham <ta.omasab@gmail.com> wrote:
> > Thanks Nicolas and Viresh for your feedback. Probably, this can be
> > handled in platform specific mcpm backend driver.
> 
> This can be a generic requirement and so doing it inside MCPM core
> would be better IMO.

Let me rephrase the above.

This could become a generic requirement at which point doing it inside 
MCPM core would be better.

Let's resist overengineering. things prematurely.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-pm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/cpufreq/arm_big_little.c b/drivers/cpufreq/arm_big_little.c
index 1f4d4e3..4b1431f 100644
--- a/drivers/cpufreq/arm_big_little.c
+++ b/drivers/cpufreq/arm_big_little.c
@@ -75,6 +75,28 @@  static inline int cpu_to_cluster(int cpu)
 		MAX_CLUSTERS : raw_cpu_to_cluster(cpu);
 }
 
+/* get the minimum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_min(struct cpufreq_frequency_table *table)
+{
+	struct cpufreq_frequency_table *pos;
+	uint32_t min_freq = ~0;
+	cpufreq_for_each_entry(pos, table)
+		if (pos->frequency < min_freq)
+			min_freq = pos->frequency;
+	return min_freq;
+}
+
+/* get the maximum frequency in the cpufreq_frequency_table */
+static inline u32 get_table_max(struct cpufreq_frequency_table *table)
+{
+	struct cpufreq_frequency_table *pos;
+	uint32_t max_freq = 0;
+	cpufreq_for_each_entry(pos, table)
+		if (pos->frequency > max_freq)
+			max_freq = pos->frequency;
+	return max_freq;
+}
+
 static unsigned int find_cluster_maxfreq(int cluster)
 {
 	int j;
@@ -170,9 +192,14 @@  bL_cpufreq_set_rate(u32 cpu, u32 old_cluster, u32 new_cluster, u32 rate)
 
 		mutex_lock(&cluster_lock[old_cluster]);
 
-		/* Set freq of old cluster if there are cpus left on it */
+		/*
+		 * Set freq of old cluster. If there are no cpus left on it,
+		 * set the lowest possible frequency for that cluster.
+		 */
 		new_rate = find_cluster_maxfreq(old_cluster);
 		new_rate = ACTUAL_FREQ(old_cluster, new_rate);
+		if (!new_rate)
+			new_rate = get_table_min(freq_table[old_cluster]);
 
 		if (new_rate) {
 			pr_debug("%s: Updating rate of old cluster: %d, to freq: %d\n",
@@ -223,28 +250,6 @@  static inline u32 get_table_count(struct cpufreq_frequency_table *table)
 	return count;
 }
 
-/* get the minimum frequency in the cpufreq_frequency_table */
-static inline u32 get_table_min(struct cpufreq_frequency_table *table)
-{
-	struct cpufreq_frequency_table *pos;
-	uint32_t min_freq = ~0;
-	cpufreq_for_each_entry(pos, table)
-		if (pos->frequency < min_freq)
-			min_freq = pos->frequency;
-	return min_freq;
-}
-
-/* get the maximum frequency in the cpufreq_frequency_table */
-static inline u32 get_table_max(struct cpufreq_frequency_table *table)
-{
-	struct cpufreq_frequency_table *pos;
-	uint32_t max_freq = 0;
-	cpufreq_for_each_entry(pos, table)
-		if (pos->frequency > max_freq)
-			max_freq = pos->frequency;
-	return max_freq;
-}
-
 static int merge_cluster_tables(void)
 {
 	int i, j, k = 0, count = 1;