diff mbox series

[v6,6/7] cpufreq: Skip inefficient frequencies

Message ID 1630405453-275784-7-git-send-email-vincent.donnefort@arm.com
State New
Headers show
Series Inefficient OPPs | expand

Commit Message

Vincent Donnefort Aug. 31, 2021, 10:24 a.m. UTC
CPUFreq governors that do DVFS (i.e. CPUFREQ_GOV_DYNAMIC_SWITCHING flag)
can skip frequencies marked as inefficient, as long as the efficient
frequency found meet the policy maximum requirement.

Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>

Comments

Rafael J. Wysocki Sept. 1, 2021, 6:13 p.m. UTC | #1
On Tue, Aug 31, 2021 at 12:24 PM Vincent Donnefort
<vincent.donnefort@arm.com> wrote:
>
> CPUFreq governors that do DVFS (i.e. CPUFREQ_GOV_DYNAMIC_SWITCHING flag)
> can skip frequencies marked as inefficient, as long as the efficient
> frequency found meet the policy maximum requirement.
>
> Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 7d5f170ecad1..b46fe2d7baf1 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -2295,6 +2295,44 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void)
>         return NULL;
>  }
>
> +static inline bool
> +cpufreq_can_skip_inefficiencies(struct cpufreq_policy *policy)

This is not just about the ability to skp the inefficient frequencies,
but about whether or not they should be skipped.  Moreover, the
inefficient frequencies are not really skipped, but mapped to specific
efficient ones.

I would call this function cpufreq_use_efficient_frequencies() or similar.

> +{
> +       struct cpufreq_frequency_table *pos;
> +       bool valid = false;
> +       int idx;
> +
> +       if (!(policy->governor->flags & CPUFREQ_GOV_DYNAMIC_SWITCHING))
> +               return false;
> +
> +       if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)
> +               return false;
> +
> +       /* Is there at least one inefficiency ? */
> +       cpufreq_for_each_valid_entry(pos, policy->freq_table) {
> +               if (pos->flags & CPUFREQ_INEFFICIENT_FREQ) {
> +                       valid = true;
> +                       break;
> +               }
> +       }
> +
> +       if (!valid)
> +               return false;
> +
> +       /*
> +        * Has cpufreq_table_update_efficiencies been called? i.e. is the
> +        * highest frequency efficient.
> +        */
> +       cpufreq_for_each_valid_entry_idx(pos, policy->freq_table, idx) {
> +               valid = !!(idx == pos->efficient);
> +               if (policy->freq_table_sorted ==
> +                                       CPUFREQ_TABLE_SORTED_DESCENDING)
> +                       break;
> +       }
> +
> +       return valid;
> +}
> +
>  static int cpufreq_init_governor(struct cpufreq_policy *policy)
>  {
>         int ret;
> @@ -2337,6 +2375,7 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy)
>         }
>
>         policy->strict_target = !!(policy->governor->flags & CPUFREQ_GOV_STRICT_TARGET);
> +       policy->skip_inefficiencies = cpufreq_can_skip_inefficiencies(policy);
>
>         return 0;
>  }
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 4e901ebd104d..cb09afbf01e2 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -117,6 +117,13 @@ struct cpufreq_policy {
>         bool                    strict_target;
>
>         /*
> +        * Set if the CPUFREQ_GOV_DYNAMIC_SWITCHING flag is set for the current
> +        * governor and if inefficient frequencies were found in the frequency
> +        * table.
> +        */
> +       bool                    skip_inefficiencies;
> +
> +       /*
>          * Preferred average time interval between consecutive invocations of
>          * the driver to set the frequency for this policy.  To be set by the
>          * scaling driver (0, which is the default, means no preference).
> @@ -972,25 +979,46 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
>                 return cpufreq_table_find_index_dc(policy, target_freq);
>  }
>
> +static inline unsigned int
> +cpufreq_frequency_find_efficient(struct cpufreq_policy *policy,
> +                                unsigned int idx)
> +{
> +       struct cpufreq_frequency_table *table = policy->freq_table;
> +       unsigned int efficient_idx = table[idx].efficient;
> +
> +       return table[efficient_idx].frequency <= policy->max ? efficient_idx :
> +               idx;

I'm not sure about this.

In the _RELATION_L case table[idx].frequency can be above the policy
max, so you may end up running at an inefficient frequency above the
policy max, but you won't use an efficient one above it.  Isn't this
slightly confusing?

> +}
> +
>  static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
>                                                  unsigned int target_freq,
>                                                  unsigned int relation)
>  {
> +       int idx;
> +
>         if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))
>                 return cpufreq_table_index_unsorted(policy, target_freq,
>                                                     relation);

Don't you want to take this case into account?

>
>         switch (relation) {
>         case CPUFREQ_RELATION_L:
> -               return cpufreq_table_find_index_l(policy, target_freq);
> +               idx = cpufreq_table_find_index_l(policy, target_freq);
> +               break;
>         case CPUFREQ_RELATION_H:
> -               return cpufreq_table_find_index_h(policy, target_freq);
> +               idx = cpufreq_table_find_index_h(policy, target_freq);
> +               break;
>         case CPUFREQ_RELATION_C:
> -               return cpufreq_table_find_index_c(policy, target_freq);
> +               idx = cpufreq_table_find_index_c(policy, target_freq);
> +               break;
>         default:
>                 WARN_ON_ONCE(1);
>                 return 0;
>         }
> +
> +       if (policy->skip_inefficiencies)
> +               idx = cpufreq_frequency_find_efficient(policy, idx);

Here, it matters which _RELATION_ is used.  For instance, in the
RELATION_H case, the frequency used cannot be above the target, which
is not guaranteed now AFAICS.

I would just really skip the inefficient frequencies as though they
were not there in the table, and then refine the search to take the
inefficient ones into account when the policy max limit is in effect
(which also depends on the relation type AFAICS).

In addition to the above, please note that __cpufreq_driver_target()
is not only called by governors and at least in the suspend frequency
case I don't see why it should be an efficient one even if
skip_inefficiencies is set.

> +
> +       return idx;
>  }
>
>  static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)
> --
> 2.7.4
>
Vincent Donnefort Sept. 2, 2021, 10:50 a.m. UTC | #2
On Wed, Sep 01, 2021 at 08:13:24PM +0200, Rafael J. Wysocki wrote:
> On Tue, Aug 31, 2021 at 12:24 PM Vincent Donnefort

> <vincent.donnefort@arm.com> wrote:

> >

> > CPUFreq governors that do DVFS (i.e. CPUFREQ_GOV_DYNAMIC_SWITCHING flag)

> > can skip frequencies marked as inefficient, as long as the efficient

> > frequency found meet the policy maximum requirement.

> >

> > Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>

> >

> > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c

> > index 7d5f170ecad1..b46fe2d7baf1 100644

> > --- a/drivers/cpufreq/cpufreq.c

> > +++ b/drivers/cpufreq/cpufreq.c

> > @@ -2295,6 +2295,44 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void)

> >         return NULL;

> >  }

> >

> > +static inline bool

> > +cpufreq_can_skip_inefficiencies(struct cpufreq_policy *policy)

> 

> This is not just about the ability to skp the inefficient frequencies,

> but about whether or not they should be skipped.  Moreover, the

> inefficient frequencies are not really skipped, but mapped to specific

> efficient ones.

> 

> I would call this function cpufreq_use_efficient_frequencies() or similar.


Ack.

> 

> > +{

> > +       struct cpufreq_frequency_table *pos;

> > +       bool valid = false;

> > +       int idx;

> > +

> > +       if (!(policy->governor->flags & CPUFREQ_GOV_DYNAMIC_SWITCHING))

> > +               return false;

> > +

> > +       if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)

> > +               return false;

> > +

> > +       /* Is there at least one inefficiency ? */

> > +       cpufreq_for_each_valid_entry(pos, policy->freq_table) {

> > +               if (pos->flags & CPUFREQ_INEFFICIENT_FREQ) {

> > +                       valid = true;

> > +                       break;

> > +               }

> > +       }

> > +

> > +       if (!valid)

> > +               return false;

> > +

> > +       /*

> > +        * Has cpufreq_table_update_efficiencies been called? i.e. is the

> > +        * highest frequency efficient.

> > +        */

> > +       cpufreq_for_each_valid_entry_idx(pos, policy->freq_table, idx) {

> > +               valid = !!(idx == pos->efficient);

> > +               if (policy->freq_table_sorted ==

> > +                                       CPUFREQ_TABLE_SORTED_DESCENDING)

> > +                       break;

> > +       }

> > +

> > +       return valid;

> > +}

> > +

> >  static int cpufreq_init_governor(struct cpufreq_policy *policy)

> >  {

> >         int ret;

> > @@ -2337,6 +2375,7 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy)

> >         }

> >

> >         policy->strict_target = !!(policy->governor->flags & CPUFREQ_GOV_STRICT_TARGET);

> > +       policy->skip_inefficiencies = cpufreq_can_skip_inefficiencies(policy);

> >

> >         return 0;

> >  }

> > diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h

> > index 4e901ebd104d..cb09afbf01e2 100644

> > --- a/include/linux/cpufreq.h

> > +++ b/include/linux/cpufreq.h

> > @@ -117,6 +117,13 @@ struct cpufreq_policy {

> >         bool                    strict_target;

> >

> >         /*

> > +        * Set if the CPUFREQ_GOV_DYNAMIC_SWITCHING flag is set for the current

> > +        * governor and if inefficient frequencies were found in the frequency

> > +        * table.

> > +        */

> > +       bool                    skip_inefficiencies;

> > +

> > +       /*

> >          * Preferred average time interval between consecutive invocations of

> >          * the driver to set the frequency for this policy.  To be set by the

> >          * scaling driver (0, which is the default, means no preference).

> > @@ -972,25 +979,46 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,

> >                 return cpufreq_table_find_index_dc(policy, target_freq);

> >  }

> >

> > +static inline unsigned int

> > +cpufreq_frequency_find_efficient(struct cpufreq_policy *policy,

> > +                                unsigned int idx)

> > +{

> > +       struct cpufreq_frequency_table *table = policy->freq_table;

> > +       unsigned int efficient_idx = table[idx].efficient;

> > +

> > +       return table[efficient_idx].frequency <= policy->max ? efficient_idx :

> > +               idx;

> 

> I'm not sure about this.

> 

> In the _RELATION_L case table[idx].frequency can be above the policy

> max, so you may end up running at an inefficient frequency above the

> policy max, but you won't use an efficient one above it.  Isn't this

> slightly confusing?


This can indeed happen when policy->max isn't equal to an available frequency.
But nontheless, we can't let the efficient resolution violate the policy->max,
which is used for thermal capping. The fact that we can overshoot a max
limit is confusing as well.

So I could add a policy->max sanity, to make sure this value is an actual
frequency and that RELATION_L will never overshoot that value. Or we can have a
flag somewhere to indicate thermal capping is happening and we shouldn't skip
inefficient frequencies.

> 

> > +}

> > +

> >  static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,

> >                                                  unsigned int target_freq,

> >                                                  unsigned int relation)

> >  {

> > +       int idx;

> > +

> >         if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))

> >                 return cpufreq_table_index_unsorted(policy, target_freq,

> >                                                     relation);

> 

> Don't you want to take this case into account?


As the inefficient frequencies are only configured by the EM so far, and this
latter needs sorted frequencies to work, I didn't add support for unsorted
table.

> 

> >

> >         switch (relation) {

> >         case CPUFREQ_RELATION_L:

> > -               return cpufreq_table_find_index_l(policy, target_freq);

> > +               idx = cpufreq_table_find_index_l(policy, target_freq);

> > +               break;

> >         case CPUFREQ_RELATION_H:

> > -               return cpufreq_table_find_index_h(policy, target_freq);

> > +               idx = cpufreq_table_find_index_h(policy, target_freq);

> > +               break;

> >         case CPUFREQ_RELATION_C:

> > -               return cpufreq_table_find_index_c(policy, target_freq);

> > +               idx = cpufreq_table_find_index_c(policy, target_freq);

> > +               break;

> >         default:

> >                 WARN_ON_ONCE(1);

> >                 return 0;

> >         }

> > +

> > +       if (policy->skip_inefficiencies)

> > +               idx = cpufreq_frequency_find_efficient(policy, idx);

> 

> Here, it matters which _RELATION_ is used.  For instance, in the

> RELATION_H case, the frequency used cannot be above the target, which

> is not guaranteed now AFAICS.


RELATION_H is used in ondemand, when powersave_bias_target is set. In that
case, it doesn't seem to be an issue to overshoot the target freq in that case.

> 

> I would just really skip the inefficient frequencies as though they

> were not there in the table, and then refine the search to take the

> inefficient ones into account when the policy max limit is in effect

> (which also depends on the relation type AFAICS).

> 

> In addition to the above, please note that __cpufreq_driver_target()

> is not only called by governors and at least in the suspend frequency

> case I don't see why it should be an efficient one even if

> skip_inefficiencies is set.


Then we need to separate the __cpufreq_driver_target() issued from DVFS
governors and the others. I can bring back the previous RELATION_E
and even have RELATION_LE and RELATION_HE to not leave behind
ondemands's powerbias?

> 

> > +

> > +       return idx;

> >  }

> >

> >  static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)

> > --

> > 2.7.4

> >
Rafael J. Wysocki Sept. 2, 2021, 1:09 p.m. UTC | #3
On Thu, Sep 2, 2021 at 12:50 PM Vincent Donnefort
<vincent.donnefort@arm.com> wrote:
>

> On Wed, Sep 01, 2021 at 08:13:24PM +0200, Rafael J. Wysocki wrote:

> > On Tue, Aug 31, 2021 at 12:24 PM Vincent Donnefort

> > <vincent.donnefort@arm.com> wrote:

> > >

> > > CPUFreq governors that do DVFS (i.e. CPUFREQ_GOV_DYNAMIC_SWITCHING flag)

> > > can skip frequencies marked as inefficient, as long as the efficient

> > > frequency found meet the policy maximum requirement.

> > >

> > > Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>

> > >

> > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c

> > > index 7d5f170ecad1..b46fe2d7baf1 100644

> > > --- a/drivers/cpufreq/cpufreq.c

> > > +++ b/drivers/cpufreq/cpufreq.c

> > > @@ -2295,6 +2295,44 @@ __weak struct cpufreq_governor *cpufreq_fallback_governor(void)

> > >         return NULL;

> > >  }

> > >

> > > +static inline bool

> > > +cpufreq_can_skip_inefficiencies(struct cpufreq_policy *policy)

> >

> > This is not just about the ability to skp the inefficient frequencies,

> > but about whether or not they should be skipped.  Moreover, the

> > inefficient frequencies are not really skipped, but mapped to specific

> > efficient ones.

> >

> > I would call this function cpufreq_use_efficient_frequencies() or similar.

>

> Ack.

>

> >

> > > +{

> > > +       struct cpufreq_frequency_table *pos;

> > > +       bool valid = false;

> > > +       int idx;

> > > +

> > > +       if (!(policy->governor->flags & CPUFREQ_GOV_DYNAMIC_SWITCHING))

> > > +               return false;

> > > +

> > > +       if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)

> > > +               return false;

> > > +

> > > +       /* Is there at least one inefficiency ? */

> > > +       cpufreq_for_each_valid_entry(pos, policy->freq_table) {

> > > +               if (pos->flags & CPUFREQ_INEFFICIENT_FREQ) {

> > > +                       valid = true;

> > > +                       break;

> > > +               }

> > > +       }

> > > +

> > > +       if (!valid)

> > > +               return false;

> > > +

> > > +       /*

> > > +        * Has cpufreq_table_update_efficiencies been called? i.e. is the

> > > +        * highest frequency efficient.

> > > +        */

> > > +       cpufreq_for_each_valid_entry_idx(pos, policy->freq_table, idx) {

> > > +               valid = !!(idx == pos->efficient);

> > > +               if (policy->freq_table_sorted ==

> > > +                                       CPUFREQ_TABLE_SORTED_DESCENDING)

> > > +                       break;

> > > +       }

> > > +

> > > +       return valid;

> > > +}

> > > +

> > >  static int cpufreq_init_governor(struct cpufreq_policy *policy)

> > >  {

> > >         int ret;

> > > @@ -2337,6 +2375,7 @@ static int cpufreq_init_governor(struct cpufreq_policy *policy)

> > >         }

> > >

> > >         policy->strict_target = !!(policy->governor->flags & CPUFREQ_GOV_STRICT_TARGET);

> > > +       policy->skip_inefficiencies = cpufreq_can_skip_inefficiencies(policy);

> > >

> > >         return 0;

> > >  }

> > > diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h

> > > index 4e901ebd104d..cb09afbf01e2 100644

> > > --- a/include/linux/cpufreq.h

> > > +++ b/include/linux/cpufreq.h

> > > @@ -117,6 +117,13 @@ struct cpufreq_policy {

> > >         bool                    strict_target;

> > >

> > >         /*

> > > +        * Set if the CPUFREQ_GOV_DYNAMIC_SWITCHING flag is set for the current

> > > +        * governor and if inefficient frequencies were found in the frequency

> > > +        * table.

> > > +        */

> > > +       bool                    skip_inefficiencies;

> > > +

> > > +       /*

> > >          * Preferred average time interval between consecutive invocations of

> > >          * the driver to set the frequency for this policy.  To be set by the

> > >          * scaling driver (0, which is the default, means no preference).

> > > @@ -972,25 +979,46 @@ static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,

> > >                 return cpufreq_table_find_index_dc(policy, target_freq);

> > >  }

> > >

> > > +static inline unsigned int

> > > +cpufreq_frequency_find_efficient(struct cpufreq_policy *policy,

> > > +                                unsigned int idx)

> > > +{

> > > +       struct cpufreq_frequency_table *table = policy->freq_table;

> > > +       unsigned int efficient_idx = table[idx].efficient;

> > > +

> > > +       return table[efficient_idx].frequency <= policy->max ? efficient_idx :

> > > +               idx;

> >

> > I'm not sure about this.

> >

> > In the _RELATION_L case table[idx].frequency can be above the policy

> > max, so you may end up running at an inefficient frequency above the

> > policy max, but you won't use an efficient one above it.  Isn't this

> > slightly confusing?

>

> This can indeed happen when policy->max isn't equal to an available frequency.

> But nontheless, we can't let the efficient resolution violate the policy->max,

> which is used for thermal capping. The fact that we can overshoot a max

> limit is confusing as well.

>

> So I could add a policy->max sanity, to make sure this value is an actual

> frequency and that RELATION_L will never overshoot that value. Or we can have a

> flag somewhere to indicate thermal capping is happening and we shouldn't skip

> inefficient frequencies.


I would prefer the first option, because user space may be applying
the limit for power capping or thermal reasons too and the kernel
can't really tell why it is doing that.

This needs to be added to cpufreq_set_policy(), I think after calling
the driver's ->verify().

And if this is done to the policy max, IMO it needs to be done to the
policy min too, for consistency.  So if a frequency table is used, the
policy max and the policy min could be only the frequencies present in
the table.

Moreover, if only efficient frequencies are to be used, RELATION_L
needs to return min(policy->max, the closest efficient frequency equal
to or above the target).

> >

> > > +}

> > > +

> > >  static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,

> > >                                                  unsigned int target_freq,

> > >                                                  unsigned int relation)

> > >  {

> > > +       int idx;

> > > +

> > >         if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))

> > >                 return cpufreq_table_index_unsorted(policy, target_freq,

> > >                                                     relation);

> >

> > Don't you want to take this case into account?

>

> As the inefficient frequencies are only configured by the EM so far, and this

> latter needs sorted frequencies to work, I didn't add support for unsorted

> table.


So the assumption is that if inefficient OPPs are present in the
frequency table, it must be sorted.

It would be prudent to add a comment documenting this assumption, then.

> >

> > >

> > >         switch (relation) {

> > >         case CPUFREQ_RELATION_L:

> > > -               return cpufreq_table_find_index_l(policy, target_freq);

> > > +               idx = cpufreq_table_find_index_l(policy, target_freq);

> > > +               break;

> > >         case CPUFREQ_RELATION_H:

> > > -               return cpufreq_table_find_index_h(policy, target_freq);

> > > +               idx = cpufreq_table_find_index_h(policy, target_freq);

> > > +               break;

> > >         case CPUFREQ_RELATION_C:

> > > -               return cpufreq_table_find_index_c(policy, target_freq);

> > > +               idx = cpufreq_table_find_index_c(policy, target_freq);

> > > +               break;

> > >         default:

> > >                 WARN_ON_ONCE(1);

> > >                 return 0;

> > >         }

> > > +

> > > +       if (policy->skip_inefficiencies)

> > > +               idx = cpufreq_frequency_find_efficient(policy, idx);

> >

> > Here, it matters which _RELATION_ is used.  For instance, in the

> > RELATION_H case, the frequency used cannot be above the target, which

> > is not guaranteed now AFAICS.

>

> RELATION_H is used in ondemand, when powersave_bias_target is set. In that

> case, it doesn't seem to be an issue to overshoot the target freq in that case.


It actually is used in multiple places, including the performance
governor, so changing the behavior of it is generally risky.

> >

> > I would just really skip the inefficient frequencies as though they

> > were not there in the table, and then refine the search to take the

> > inefficient ones into account when the policy max limit is in effect

> > (which also depends on the relation type AFAICS).

> >

> > In addition to the above, please note that __cpufreq_driver_target()

> > is not only called by governors and at least in the suspend frequency

> > case I don't see why it should be an efficient one even if

> > skip_inefficiencies is set.

>

> Then we need to separate the __cpufreq_driver_target() issued from DVFS

> governors and the others.


I think so.

> I can bring back the previous RELATION_E

> and even have RELATION_LE and RELATION_HE to not leave behind

> ondemands's powerbias?


Or a flag to be ORed with the relation to indicate that the request
doesn't come from a DVFS governor (which basically boils down to the
same thing).

> >

> > > +

> > > +       return idx;

> > >  }

> > >

> > >  static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)

> > > --

> > > 2.7.4

> > >
Vincent Donnefort Sept. 2, 2021, 1:49 p.m. UTC | #4
[...]

> > > >

> > > > +static inline unsigned int

> > > > +cpufreq_frequency_find_efficient(struct cpufreq_policy *policy,

> > > > +                                unsigned int idx)

> > > > +{

> > > > +       struct cpufreq_frequency_table *table = policy->freq_table;

> > > > +       unsigned int efficient_idx = table[idx].efficient;

> > > > +

> > > > +       return table[efficient_idx].frequency <= policy->max ? efficient_idx :

> > > > +               idx;

> > >

> > > I'm not sure about this.

> > >

> > > In the _RELATION_L case table[idx].frequency can be above the policy

> > > max, so you may end up running at an inefficient frequency above the

> > > policy max, but you won't use an efficient one above it.  Isn't this

> > > slightly confusing?

> >

> > This can indeed happen when policy->max isn't equal to an available frequency.

> > But nontheless, we can't let the efficient resolution violate the policy->max,

> > which is used for thermal capping. The fact that we can overshoot a max

> > limit is confusing as well.

> >

> > So I could add a policy->max sanity, to make sure this value is an actual

> > frequency and that RELATION_L will never overshoot that value. Or we can have a

> > flag somewhere to indicate thermal capping is happening and we shouldn't skip

> > inefficient frequencies.

> 

> I would prefer the first option, because user space may be applying

> the limit for power capping or thermal reasons too and the kernel

> can't really tell why it is doing that.

> 

> This needs to be added to cpufreq_set_policy(), I think after calling

> the driver's ->verify().

> 

> And if this is done to the policy max, IMO it needs to be done to the

> policy min too, for consistency.  So if a frequency table is used, the

> policy max and the policy min could be only the frequencies present in

> the table.


Ack. I'll consolidate policy->max and ->min set behavior in v7 so we won't have
any problem having to know who set policy->max and if yes or no we can break it.

What relation do we want for min/max setting? I guess RELATION_H for policy->max
and RELATION_L for policy->min (i.e. The highest frequency existing right below
the maximum and the lowest frequency existing just above the minimum)

> 

> Moreover, if only efficient frequencies are to be used, RELATION_L

> needs to return min(policy->max, the closest efficient frequency equal

> to or above the target).


You mean, never returning an inefficient frequency, unless there are no
efficient ones in the range policy->min policy->max ? 

The problem is a frequency is inefficient compared to a higher one. If we have
F, inefficient, while F+1 and F-1 are efficient and policy->max restricting to F.
If we return F-1, because it is the highest _efficient_ frequency allowed, we'll
give a frequency that can do less than originally requested. It would downgrade
the performance, compared to what the platform is really allowed to.

> 

> > >

> > > > +}

> > > > +

> > > >  static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,

> > > >                                                  unsigned int target_freq,

> > > >                                                  unsigned int relation)

> > > >  {

> > > > +       int idx;

> > > > +

> > > >         if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))

> > > >                 return cpufreq_table_index_unsorted(policy, target_freq,

> > > >                                                     relation);

> > >

> > > Don't you want to take this case into account?

> >

> > As the inefficient frequencies are only configured by the EM so far, and this

> > latter needs sorted frequencies to work, I didn't add support for unsorted

> > table.

> 

> So the assumption is that if inefficient OPPs are present in the

> frequency table, it must be sorted.

> 

> It would be prudent to add a comment documenting this assumption, then.


Ack. I'll add that to the documentation.

> 

> > >

> > > >

> > > >         switch (relation) {

> > > >         case CPUFREQ_RELATION_L:

> > > > -               return cpufreq_table_find_index_l(policy, target_freq);

> > > > +               idx = cpufreq_table_find_index_l(policy, target_freq);

> > > > +               break;

> > > >         case CPUFREQ_RELATION_H:

> > > > -               return cpufreq_table_find_index_h(policy, target_freq);

> > > > +               idx = cpufreq_table_find_index_h(policy, target_freq);

> > > > +               break;

> > > >         case CPUFREQ_RELATION_C:

> > > > -               return cpufreq_table_find_index_c(policy, target_freq);

> > > > +               idx = cpufreq_table_find_index_c(policy, target_freq);

> > > > +               break;

> > > >         default:

> > > >                 WARN_ON_ONCE(1);

> > > >                 return 0;

> > > >         }

> > > > +

> > > > +       if (policy->skip_inefficiencies)

> > > > +               idx = cpufreq_frequency_find_efficient(policy, idx);

> > >

> > > Here, it matters which _RELATION_ is used.  For instance, in the

> > > RELATION_H case, the frequency used cannot be above the target, which

> > > is not guaranteed now AFAICS.

> >

> > RELATION_H is used in ondemand, when powersave_bias_target is set. In that

> > case, it doesn't seem to be an issue to overshoot the target freq in that case.

> 

> It actually is used in multiple places, including the performance

> governor, so changing the behavior of it is generally risky.

> 

> > >

> > > I would just really skip the inefficient frequencies as though they

> > > were not there in the table, and then refine the search to take the

> > > inefficient ones into account when the policy max limit is in effect

> > > (which also depends on the relation type AFAICS).

> > >

> > > In addition to the above, please note that __cpufreq_driver_target()

> > > is not only called by governors and at least in the suspend frequency

> > > case I don't see why it should be an efficient one even if

> > > skip_inefficiencies is set.

> >

> > Then we need to separate the __cpufreq_driver_target() issued from DVFS

> > governors and the others.

> 

> I think so.

> 

> > I can bring back the previous RELATION_E

> > and even have RELATION_LE and RELATION_HE to not leave behind

> > ondemands's powerbias?

> 

> Or a flag to be ORed with the relation to indicate that the request

> doesn't come from a DVFS governor (which basically boils down to the

> same thing).


Ok. A flag is probably better, so we don't need to redefine each relation with
an "efficient" version.
Rafael J. Wysocki Sept. 3, 2021, 6:14 p.m. UTC | #5
On Thu, Sep 2, 2021 at 3:49 PM Vincent Donnefort
<vincent.donnefort@arm.com> wrote:
>

> [...]

>

> > > > >

> > > > > +static inline unsigned int

> > > > > +cpufreq_frequency_find_efficient(struct cpufreq_policy *policy,

> > > > > +                                unsigned int idx)

> > > > > +{

> > > > > +       struct cpufreq_frequency_table *table = policy->freq_table;

> > > > > +       unsigned int efficient_idx = table[idx].efficient;

> > > > > +

> > > > > +       return table[efficient_idx].frequency <= policy->max ? efficient_idx :

> > > > > +               idx;

> > > >

> > > > I'm not sure about this.

> > > >

> > > > In the _RELATION_L case table[idx].frequency can be above the policy

> > > > max, so you may end up running at an inefficient frequency above the

> > > > policy max, but you won't use an efficient one above it.  Isn't this

> > > > slightly confusing?

> > >

> > > This can indeed happen when policy->max isn't equal to an available frequency.

> > > But nontheless, we can't let the efficient resolution violate the policy->max,

> > > which is used for thermal capping. The fact that we can overshoot a max

> > > limit is confusing as well.

> > >

> > > So I could add a policy->max sanity, to make sure this value is an actual

> > > frequency and that RELATION_L will never overshoot that value. Or we can have a

> > > flag somewhere to indicate thermal capping is happening and we shouldn't skip

> > > inefficient frequencies.

> >

> > I would prefer the first option, because user space may be applying

> > the limit for power capping or thermal reasons too and the kernel

> > can't really tell why it is doing that.

> >

> > This needs to be added to cpufreq_set_policy(), I think after calling

> > the driver's ->verify().

> >

> > And if this is done to the policy max, IMO it needs to be done to the

> > policy min too, for consistency.  So if a frequency table is used, the

> > policy max and the policy min could be only the frequencies present in

> > the table.

>

> Ack. I'll consolidate policy->max and ->min set behavior in v7 so we won't have

> any problem having to know who set policy->max and if yes or no we can break it.

>

> What relation do we want for min/max setting? I guess RELATION_H for policy->max

> and RELATION_L for policy->min (i.e. The highest frequency existing right below

> the maximum and the lowest frequency existing just above the minimum)


Yes.

> >

> > Moreover, if only efficient frequencies are to be used, RELATION_L

> > needs to return min(policy->max, the closest efficient frequency equal

> > to or above the target).

>

> You mean, never returning an inefficient frequency, unless there are no

> efficient ones in the range policy->min policy->max ?


No, that's not what I mean.

First note that the target here is clamped between the policy min and
max.  Also bear in mind that each of them is a frequency from the
table, either efficient or inefficient.

In the first step, search through the efficient frequencies only.
That will return you something at or above the target.  If it is at
the target, you're done.  If it is above the target, it may be either
within or above the policy max.  If it is within the policy max,
you're done.  If it is above the policy max, you need to search
through the inefficient frequencies between the target and the policy
max (and you know that there is at least one - the policy max itself).

So what I said previously wasn't particularly precise, sorry about that.
Vincent Donnefort Sept. 6, 2021, 8:17 a.m. UTC | #6
[...]

> > >

> > > Moreover, if only efficient frequencies are to be used, RELATION_L

> > > needs to return min(policy->max, the closest efficient frequency equal

> > > to or above the target).

> >

> > You mean, never returning an inefficient frequency, unless there are no

> > efficient ones in the range policy->min policy->max ?

> 

> No, that's not what I mean.

> 

> First note that the target here is clamped between the policy min and

> max.  Also bear in mind that each of them is a frequency from the

> table, either efficient or inefficient.

> 

> In the first step, search through the efficient frequencies only.

> That will return you something at or above the target.  If it is at

> the target, you're done.  If it is above the target, it may be either

> within or above the policy max.  If it is within the policy max,

> you're done.  If it is above the policy max, you need to search

> through the inefficient frequencies between the target and the policy

> max (and you know that there is at least one - the policy max itself).

> 

> So what I said previously wasn't particularly precise, sorry about that.


I might have missed something but it seems equivalent to what's currently done:

Find the appropriate frequency, if inefficient go to the efficient one, if
above policy->max return the original inefficient frequency.
Rafael J. Wysocki Sept. 6, 2021, 12:08 p.m. UTC | #7
On Mon, Sep 6, 2021 at 10:17 AM Vincent Donnefort
<vincent.donnefort@arm.com> wrote:
>

> [...]

>

> > > >

> > > > Moreover, if only efficient frequencies are to be used, RELATION_L

> > > > needs to return min(policy->max, the closest efficient frequency equal

> > > > to or above the target).

> > >

> > > You mean, never returning an inefficient frequency, unless there are no

> > > efficient ones in the range policy->min policy->max ?

> >

> > No, that's not what I mean.

> >

> > First note that the target here is clamped between the policy min and

> > max.  Also bear in mind that each of them is a frequency from the

> > table, either efficient or inefficient.

> >

> > In the first step, search through the efficient frequencies only.

> > That will return you something at or above the target.  If it is at

> > the target, you're done.  If it is above the target, it may be either

> > within or above the policy max.  If it is within the policy max,

> > you're done.  If it is above the policy max, you need to search

> > through the inefficient frequencies between the target and the policy

> > max (and you know that there is at least one - the policy max itself).

> >

> > So what I said previously wasn't particularly precise, sorry about that.

>

> I might have missed something but it seems equivalent to what's currently done:

>

> Find the appropriate frequency, if inefficient go to the efficient one, if

> above policy->max return the original inefficient frequency.


It may or may not be equivalent depending on what the efficient one is.

And what is there now doesn't work for RELATION_H if I'm not mistaken.
Vincent Donnefort Sept. 6, 2021, 12:53 p.m. UTC | #8
On Mon, Sep 06, 2021 at 02:08:36PM +0200, Rafael J. Wysocki wrote:
> On Mon, Sep 6, 2021 at 10:17 AM Vincent Donnefort

> <vincent.donnefort@arm.com> wrote:

> >

> > [...]

> >

> > > > >

> > > > > Moreover, if only efficient frequencies are to be used, RELATION_L

> > > > > needs to return min(policy->max, the closest efficient frequency equal

> > > > > to or above the target).

> > > >

> > > > You mean, never returning an inefficient frequency, unless there are no

> > > > efficient ones in the range policy->min policy->max ?

> > >

> > > No, that's not what I mean.

> > >

> > > First note that the target here is clamped between the policy min and

> > > max.  Also bear in mind that each of them is a frequency from the

> > > table, either efficient or inefficient.

> > >

> > > In the first step, search through the efficient frequencies only.

> > > That will return you something at or above the target.  If it is at

> > > the target, you're done.  If it is above the target, it may be either

> > > within or above the policy max.  If it is within the policy max,

> > > you're done.  If it is above the policy max, you need to search

> > > through the inefficient frequencies between the target and the policy

> > > max (and you know that there is at least one - the policy max itself).

> > >

> > > So what I said previously wasn't particularly precise, sorry about that.

> >

> > I might have missed something but it seems equivalent to what's currently done:

> >

> > Find the appropriate frequency, if inefficient go to the efficient one, if

> > above policy->max return the original inefficient frequency.

> 

> It may or may not be equivalent depending on what the efficient one is.

> 

> And what is there now doesn't work for RELATION_H if I'm not mistaken.


With the current approach, RELATION_H would find the best frequency and then
resolve it to an efficient one if possible. The efficient one might overshoot
the target_freq, but isn't it a good thing ?
 
 - If we consider only the efficient freqs in a first place, there's a risk we
   would return a frequency way smaller than the actual request.

 - What are the gain in capping RELATION_H to the target_freq if we can find a
   frequency higher than the request that wouldn't use more energy than the
   target?

RELATION_H is used in two DVFS governors: conservative and ondemand:

  - Ondemand is using RELATION_H in the context of powerbias. It seems a usecase
    where it is not a problem to overshoot RELATION_H.

  - Conservative is using RELATION_H when the load is increasing. Same as
    ondemand, I don't think we would have any gain by not overshooting the
    RELATION_H target.
Rafael J. Wysocki Sept. 6, 2021, 3:16 p.m. UTC | #9
On Mon, Sep 6, 2021 at 2:53 PM Vincent Donnefort
<vincent.donnefort@arm.com> wrote:
>

> On Mon, Sep 06, 2021 at 02:08:36PM +0200, Rafael J. Wysocki wrote:

> > On Mon, Sep 6, 2021 at 10:17 AM Vincent Donnefort

> > <vincent.donnefort@arm.com> wrote:

> > >

> > > [...]

> > >

> > > > > >

> > > > > > Moreover, if only efficient frequencies are to be used, RELATION_L

> > > > > > needs to return min(policy->max, the closest efficient frequency equal

> > > > > > to or above the target).

> > > > >

> > > > > You mean, never returning an inefficient frequency, unless there are no

> > > > > efficient ones in the range policy->min policy->max ?

> > > >

> > > > No, that's not what I mean.

> > > >

> > > > First note that the target here is clamped between the policy min and

> > > > max.  Also bear in mind that each of them is a frequency from the

> > > > table, either efficient or inefficient.

> > > >

> > > > In the first step, search through the efficient frequencies only.

> > > > That will return you something at or above the target.  If it is at

> > > > the target, you're done.  If it is above the target, it may be either

> > > > within or above the policy max.  If it is within the policy max,

> > > > you're done.  If it is above the policy max, you need to search

> > > > through the inefficient frequencies between the target and the policy

> > > > max (and you know that there is at least one - the policy max itself).

> > > >

> > > > So what I said previously wasn't particularly precise, sorry about that.

> > >

> > > I might have missed something but it seems equivalent to what's currently done:

> > >

> > > Find the appropriate frequency, if inefficient go to the efficient one, if

> > > above policy->max return the original inefficient frequency.

> >

> > It may or may not be equivalent depending on what the efficient one is.

> >

> > And what is there now doesn't work for RELATION_H if I'm not mistaken.

>

> With the current approach, RELATION_H would find the best frequency and then

> resolve it to an efficient one if possible. The efficient one might overshoot

> the target_freq, but isn't it a good thing ?


No, it cannot.

>  - If we consider only the efficient freqs in a first place, there's a risk we

>    would return a frequency way smaller than the actual request.

>

>  - What are the gain in capping RELATION_H to the target_freq if we can find a

>    frequency higher than the request that wouldn't use more energy than the

>    target?


There may be a power constraint, so RELATION_H should never go above the target.

> RELATION_H is used in two DVFS governors: conservative and ondemand:

>

>   - Ondemand is using RELATION_H in the context of powerbias. It seems a usecase

>     where it is not a problem to overshoot RELATION_H.

>

>   - Conservative is using RELATION_H when the load is increasing. Same as

>     ondemand, I don't think we would have any gain by not overshooting the

>     RELATION_H target.


Well, let me repeat: the performance governor uses RELATION_H, so if
you allow it to go above the policy limit, it will stay there forever.
Rafael J. Wysocki Sept. 6, 2021, 3:32 p.m. UTC | #10
On Mon, Sep 6, 2021 at 5:16 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>

> On Mon, Sep 6, 2021 at 2:53 PM Vincent Donnefort

> <vincent.donnefort@arm.com> wrote:

> >

> > On Mon, Sep 06, 2021 at 02:08:36PM +0200, Rafael J. Wysocki wrote:

> > > On Mon, Sep 6, 2021 at 10:17 AM Vincent Donnefort

> > > <vincent.donnefort@arm.com> wrote:

> > > >

> > > > [...]

> > > >

> > > > > > >

> > > > > > > Moreover, if only efficient frequencies are to be used, RELATION_L

> > > > > > > needs to return min(policy->max, the closest efficient frequency equal

> > > > > > > to or above the target).

> > > > > >

> > > > > > You mean, never returning an inefficient frequency, unless there are no

> > > > > > efficient ones in the range policy->min policy->max ?

> > > > >

> > > > > No, that's not what I mean.

> > > > >

> > > > > First note that the target here is clamped between the policy min and

> > > > > max.  Also bear in mind that each of them is a frequency from the

> > > > > table, either efficient or inefficient.

> > > > >

> > > > > In the first step, search through the efficient frequencies only.

> > > > > That will return you something at or above the target.  If it is at

> > > > > the target, you're done.  If it is above the target, it may be either

> > > > > within or above the policy max.  If it is within the policy max,

> > > > > you're done.  If it is above the policy max, you need to search

> > > > > through the inefficient frequencies between the target and the policy

> > > > > max (and you know that there is at least one - the policy max itself).

> > > > >

> > > > > So what I said previously wasn't particularly precise, sorry about that.

> > > >

> > > > I might have missed something but it seems equivalent to what's currently done:

> > > >

> > > > Find the appropriate frequency, if inefficient go to the efficient one, if

> > > > above policy->max return the original inefficient frequency.

> > >

> > > It may or may not be equivalent depending on what the efficient one is.

> > >

> > > And what is there now doesn't work for RELATION_H if I'm not mistaken.

> >

> > With the current approach, RELATION_H would find the best frequency and then

> > resolve it to an efficient one if possible. The efficient one might overshoot

> > the target_freq, but isn't it a good thing ?

>

> No, it cannot.

>

> >  - If we consider only the efficient freqs in a first place, there's a risk we

> >    would return a frequency way smaller than the actual request.

> >

> >  - What are the gain in capping RELATION_H to the target_freq if we can find a

> >    frequency higher than the request that wouldn't use more energy than the

> >    target?

>

> There may be a power constraint, so RELATION_H should never go above the target.

>

> > RELATION_H is used in two DVFS governors: conservative and ondemand:

> >

> >   - Ondemand is using RELATION_H in the context of powerbias. It seems a usecase

> >     where it is not a problem to overshoot RELATION_H.

> >

> >   - Conservative is using RELATION_H when the load is increasing. Same as

> >     ondemand, I don't think we would have any gain by not overshooting the

> >     RELATION_H target.

>

> Well, let me repeat: the performance governor uses RELATION_H, so if

> you allow it to go above the policy limit, it will stay there forever.


You'll probably say that the we'll always use the inefficient
frequencies with the performance governor, so this doesn't matter,
which is fair enough, but let me say that I'm not particularly keen on
storing the efficient frequency intex in every row of the table for
every CPU in the system especially on systems where all frequencies
are efficient (and these are the majority AFAICS), because this is
just plain confusing and it causes you to wonder why it may be a good
idea to let the target to be overshot in the RELATION_H case which is
even more confusing.

Can we just perhaps avoid this possibly premature optimization and get
back to it when it has been demonstrated to be necessary?
Vincent Donnefort Sept. 6, 2021, 3:38 p.m. UTC | #11
On Mon, Sep 06, 2021 at 05:16:32PM +0200, Rafael J. Wysocki wrote:
> On Mon, Sep 6, 2021 at 2:53 PM Vincent Donnefort

> <vincent.donnefort@arm.com> wrote:

> >

> > On Mon, Sep 06, 2021 at 02:08:36PM +0200, Rafael J. Wysocki wrote:

> > > On Mon, Sep 6, 2021 at 10:17 AM Vincent Donnefort

> > > <vincent.donnefort@arm.com> wrote:

> > > >

> > > > [...]

> > > >

> > > > > > >

> > > > > > > Moreover, if only efficient frequencies are to be used, RELATION_L

> > > > > > > needs to return min(policy->max, the closest efficient frequency equal

> > > > > > > to or above the target).

> > > > > >

> > > > > > You mean, never returning an inefficient frequency, unless there are no

> > > > > > efficient ones in the range policy->min policy->max ?

> > > > >

> > > > > No, that's not what I mean.

> > > > >

> > > > > First note that the target here is clamped between the policy min and

> > > > > max.  Also bear in mind that each of them is a frequency from the

> > > > > table, either efficient or inefficient.

> > > > >

> > > > > In the first step, search through the efficient frequencies only.

> > > > > That will return you something at or above the target.  If it is at

> > > > > the target, you're done.  If it is above the target, it may be either

> > > > > within or above the policy max.  If it is within the policy max,

> > > > > you're done.  If it is above the policy max, you need to search

> > > > > through the inefficient frequencies between the target and the policy

> > > > > max (and you know that there is at least one - the policy max itself).

> > > > >

> > > > > So what I said previously wasn't particularly precise, sorry about that.

> > > >

> > > > I might have missed something but it seems equivalent to what's currently done:

> > > >

> > > > Find the appropriate frequency, if inefficient go to the efficient one, if

> > > > above policy->max return the original inefficient frequency.

> > >

> > > It may or may not be equivalent depending on what the efficient one is.

> > >

> > > And what is there now doesn't work for RELATION_H if I'm not mistaken.

> >

> > With the current approach, RELATION_H would find the best frequency and then

> > resolve it to an efficient one if possible. The efficient one might overshoot

> > the target_freq, but isn't it a good thing ?

> 

> No, it cannot.

> 

> >  - If we consider only the efficient freqs in a first place, there's a risk we

> >    would return a frequency way smaller than the actual request.

> >

> >  - What are the gain in capping RELATION_H to the target_freq if we can find a

> >    frequency higher than the request that wouldn't use more energy than the

> >    target?

> 

> There may be a power constraint, so RELATION_H should never go above the target.


Fair enough, let's not overshoot RELATION_H's target and only return (if
possible) an efficient freq <= target_freq

> 

> > RELATION_H is used in two DVFS governors: conservative and ondemand:

> >

> >   - Ondemand is using RELATION_H in the context of powerbias. It seems a usecase

> >     where it is not a problem to overshoot RELATION_H.

> >

> >   - Conservative is using RELATION_H when the load is increasing. Same as

> >     ondemand, I don't think we would have any gain by not overshooting the

> >     RELATION_H target.

> 

> Well, let me repeat: the performance governor uses RELATION_H, so if

> you allow it to go above the policy limit, it will stay there forever.


The performance governor isn't concerned by this change, I was only talking
about conservative and ondemand. Also overshooting policy->max wouldn't have
ever happened.
Rafael J. Wysocki Sept. 6, 2021, 4:04 p.m. UTC | #12
On Mon, Sep 6, 2021 at 5:38 PM Vincent Donnefort
<vincent.donnefort@arm.com> wrote:
>

> On Mon, Sep 06, 2021 at 05:16:32PM +0200, Rafael J. Wysocki wrote:

> > On Mon, Sep 6, 2021 at 2:53 PM Vincent Donnefort

> > <vincent.donnefort@arm.com> wrote:

> > >

> > > On Mon, Sep 06, 2021 at 02:08:36PM +0200, Rafael J. Wysocki wrote:

> > > > On Mon, Sep 6, 2021 at 10:17 AM Vincent Donnefort

> > > > <vincent.donnefort@arm.com> wrote:

> > > > >

> > > > > [...]

> > > > >

> > > > > > > >

> > > > > > > > Moreover, if only efficient frequencies are to be used, RELATION_L

> > > > > > > > needs to return min(policy->max, the closest efficient frequency equal

> > > > > > > > to or above the target).

> > > > > > >

> > > > > > > You mean, never returning an inefficient frequency, unless there are no

> > > > > > > efficient ones in the range policy->min policy->max ?

> > > > > >

> > > > > > No, that's not what I mean.

> > > > > >

> > > > > > First note that the target here is clamped between the policy min and

> > > > > > max.  Also bear in mind that each of them is a frequency from the

> > > > > > table, either efficient or inefficient.

> > > > > >

> > > > > > In the first step, search through the efficient frequencies only.

> > > > > > That will return you something at or above the target.  If it is at

> > > > > > the target, you're done.  If it is above the target, it may be either

> > > > > > within or above the policy max.  If it is within the policy max,

> > > > > > you're done.  If it is above the policy max, you need to search

> > > > > > through the inefficient frequencies between the target and the policy

> > > > > > max (and you know that there is at least one - the policy max itself).

> > > > > >

> > > > > > So what I said previously wasn't particularly precise, sorry about that.

> > > > >

> > > > > I might have missed something but it seems equivalent to what's currently done:

> > > > >

> > > > > Find the appropriate frequency, if inefficient go to the efficient one, if

> > > > > above policy->max return the original inefficient frequency.

> > > >

> > > > It may or may not be equivalent depending on what the efficient one is.

> > > >

> > > > And what is there now doesn't work for RELATION_H if I'm not mistaken.

> > >

> > > With the current approach, RELATION_H would find the best frequency and then

> > > resolve it to an efficient one if possible. The efficient one might overshoot

> > > the target_freq, but isn't it a good thing ?

> >

> > No, it cannot.

> >

> > >  - If we consider only the efficient freqs in a first place, there's a risk we

> > >    would return a frequency way smaller than the actual request.

> > >

> > >  - What are the gain in capping RELATION_H to the target_freq if we can find a

> > >    frequency higher than the request that wouldn't use more energy than the

> > >    target?

> >

> > There may be a power constraint, so RELATION_H should never go above the target.

>

> Fair enough, let's not overshoot RELATION_H's target and only return (if

> possible) an efficient freq <= target_freq


Thanks!

Please also note that there is a similar problem in the current patch
set for RELATION_C.

Namely, say that there are four frequencies, f_a < f_b < f_c < f_d,
such that f_b - f_a == f_d - f_c == (f_c - f_b) / 2 and f_a, f_d are
efficient.  If the efficient frequencies are preferred and the target
falls between f_b and f_c, the current series will always return f_d
(if I understand it correctly), whereas if the search is limited to
the efficient frequencies, f_a will often be closer to the target than
f_d (50% of the time under random distribution of target values).

> >

> > > RELATION_H is used in two DVFS governors: conservative and ondemand:

> > >

> > >   - Ondemand is using RELATION_H in the context of powerbias. It seems a usecase

> > >     where it is not a problem to overshoot RELATION_H.

> > >

> > >   - Conservative is using RELATION_H when the load is increasing. Same as

> > >     ondemand, I don't think we would have any gain by not overshooting the

> > >     RELATION_H target.

> >

> > Well, let me repeat: the performance governor uses RELATION_H, so if

> > you allow it to go above the policy limit, it will stay there forever.

>

> The performance governor isn't concerned by this change, I was only talking

> about conservative and ondemand. Also overshooting policy->max wouldn't have

> ever happened.


Right.
diff mbox series

Patch

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 7d5f170ecad1..b46fe2d7baf1 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -2295,6 +2295,44 @@  __weak struct cpufreq_governor *cpufreq_fallback_governor(void)
 	return NULL;
 }
 
+static inline bool
+cpufreq_can_skip_inefficiencies(struct cpufreq_policy *policy)
+{
+	struct cpufreq_frequency_table *pos;
+	bool valid = false;
+	int idx;
+
+	if (!(policy->governor->flags & CPUFREQ_GOV_DYNAMIC_SWITCHING))
+		return false;
+
+	if (policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED)
+		return false;
+
+	/* Is there at least one inefficiency ? */
+	cpufreq_for_each_valid_entry(pos, policy->freq_table) {
+		if (pos->flags & CPUFREQ_INEFFICIENT_FREQ) {
+			valid = true;
+			break;
+		}
+	}
+
+	if (!valid)
+		return false;
+
+	/*
+	 * Has cpufreq_table_update_efficiencies been called? i.e. is the
+	 * highest frequency efficient.
+	 */
+	cpufreq_for_each_valid_entry_idx(pos, policy->freq_table, idx) {
+		valid = !!(idx == pos->efficient);
+		if (policy->freq_table_sorted ==
+					CPUFREQ_TABLE_SORTED_DESCENDING)
+			break;
+	}
+
+	return valid;
+}
+
 static int cpufreq_init_governor(struct cpufreq_policy *policy)
 {
 	int ret;
@@ -2337,6 +2375,7 @@  static int cpufreq_init_governor(struct cpufreq_policy *policy)
 	}
 
 	policy->strict_target = !!(policy->governor->flags & CPUFREQ_GOV_STRICT_TARGET);
+	policy->skip_inefficiencies = cpufreq_can_skip_inefficiencies(policy);
 
 	return 0;
 }
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 4e901ebd104d..cb09afbf01e2 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -117,6 +117,13 @@  struct cpufreq_policy {
 	bool			strict_target;
 
 	/*
+	 * Set if the CPUFREQ_GOV_DYNAMIC_SWITCHING flag is set for the current
+	 * governor and if inefficient frequencies were found in the frequency
+	 * table.
+	 */
+	bool			skip_inefficiencies;
+
+	/*
 	 * Preferred average time interval between consecutive invocations of
 	 * the driver to set the frequency for this policy.  To be set by the
 	 * scaling driver (0, which is the default, means no preference).
@@ -972,25 +979,46 @@  static inline int cpufreq_table_find_index_c(struct cpufreq_policy *policy,
 		return cpufreq_table_find_index_dc(policy, target_freq);
 }
 
+static inline unsigned int
+cpufreq_frequency_find_efficient(struct cpufreq_policy *policy,
+				 unsigned int idx)
+{
+	struct cpufreq_frequency_table *table = policy->freq_table;
+	unsigned int efficient_idx = table[idx].efficient;
+
+	return table[efficient_idx].frequency <= policy->max ? efficient_idx :
+		idx;
+}
+
 static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy,
 						 unsigned int target_freq,
 						 unsigned int relation)
 {
+	int idx;
+
 	if (unlikely(policy->freq_table_sorted == CPUFREQ_TABLE_UNSORTED))
 		return cpufreq_table_index_unsorted(policy, target_freq,
 						    relation);
 
 	switch (relation) {
 	case CPUFREQ_RELATION_L:
-		return cpufreq_table_find_index_l(policy, target_freq);
+		idx = cpufreq_table_find_index_l(policy, target_freq);
+		break;
 	case CPUFREQ_RELATION_H:
-		return cpufreq_table_find_index_h(policy, target_freq);
+		idx = cpufreq_table_find_index_h(policy, target_freq);
+		break;
 	case CPUFREQ_RELATION_C:
-		return cpufreq_table_find_index_c(policy, target_freq);
+		idx = cpufreq_table_find_index_c(policy, target_freq);
+		break;
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
 	}
+
+	if (policy->skip_inefficiencies)
+		idx = cpufreq_frequency_find_efficient(policy, idx);
+
+	return idx;
 }
 
 static inline int cpufreq_table_count_valid_entries(const struct cpufreq_policy *policy)