diff mbox series

[v3,3/4] thermal/core: Build ascending ordered indexes for the trip points

Message ID 20220715210911.714479-3-daniel.lezcano@linaro.org
State New
Headers show
Series [v3,1/4] thermal/core: Encapsulate the trip point crossed function | expand

Commit Message

Daniel Lezcano July 15, 2022, 9:09 p.m. UTC
By convention the trips points are declared in the ascending
temperature order. However, no specification for the device tree, ACPI
or documentation tells the trip points must be ordered this way.

In the other hand, we need those to be ordered to browse them at the
thermal events. But if we assume they are ordered and change the code
based on this assumption, any platform with shuffled trip points
description will be broken (if they exist).

Instead of taking the risk of breaking the existing platforms, use an
array of temperature ordered trip identifiers and make it available
for the code needing to browse the trip points in an ordered way.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/thermal_core.c | 62 +++++++++++++++++++++++++++-------
 include/linux/thermal.h        |  2 ++
 2 files changed, 52 insertions(+), 12 deletions(-)

Comments

Zhang, Rui July 18, 2022, 5:28 a.m. UTC | #1
On Fri, 2022-07-15 at 23:09 +0200, Daniel Lezcano wrote:
> By convention the trips points are declared in the ascending
> temperature order. However, no specification for the device tree,
> ACPI
> or documentation tells the trip points must be ordered this way.
> 
> In the other hand, we need those to be ordered to browse them at the
> thermal events. But if we assume they are ordered and change the code
> based on this assumption, any platform with shuffled trip points
> description will be broken (if they exist).
> 
> Instead of taking the risk of breaking the existing platforms, use an
> array of temperature ordered trip identifiers and make it available
> for the code needing to browse the trip points in an ordered way.
> 
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---
>  drivers/thermal/thermal_core.c | 62 +++++++++++++++++++++++++++-----
> --
>  include/linux/thermal.h        |  2 ++
>  2 files changed, 52 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/thermal/thermal_core.c
> b/drivers/thermal/thermal_core.c
> index f66036b3daae..f02f38b66445 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -355,7 +355,8 @@ static void handle_critical_trips(struct
> thermal_zone_device *tz,
>  }
>  
>  static void handle_thermal_trip_crossed(struct thermal_zone_device
> *tz, int trip,
> -                                       int trip_temp, int trip_hyst,
> enum thermal_trip_type trip_type)
> +                                       int trip_temp, int trip_hyst,
> +                                       enum thermal_trip_type
> trip_type)
>  {
>         if (tz->last_temperature == THERMAL_TEMP_INVALID)
>                 return;
> @@ -1165,6 +1166,46 @@ static void bind_tz(struct thermal_zone_device
> *tz)
>         mutex_unlock(&thermal_list_lock);
>  }
>  
> +static void sort_trips_indexes(struct thermal_zone_device *tz)
> +{
> +       int i, j;
> +
> +       for (i = 0; i < tz->trips; i++)
> +               tz->trips_indexes[i] = i;
> + 
> +       for (i = 0; i < tz->trips; i++) {
> +               for (j = i + 1; j < tz->trips; j++) {
> +                       int t1, t2;
> +
> +                       tz->ops->get_trip_temp(tz, tz-
> >trips_indexes[i], &t1);

This line can be moved to the upper loop.

> +                       tz->ops->get_trip_temp(tz, tz-
> >trips_indexes[j], &t2);
> +

what about the disabled trip points?

we should ignore those trip points and check the return value to make
sure we're comparing the valid trip_temp values.

thanks,
rui

> +                       if (t1 > t2)
> +                               swap(tz->trips_indexes[i], tz-
> >trips_indexes[j]);
> +               }
> +       }
> +}
> +
> +static int thermal_zone_device_trip_init(struct thermal_zone_device
> *tz)
> +{
> +       enum thermal_trip_type trip_type;
> +       int trip_temp, i;
> +       
> +       tz->trips_indexes = kzalloc(tz->trips * sizeof(int),
> GFP_KERNEL);
> +       if (!tz->trips_indexes)
> +               return -ENOMEM;
> +
> +       for (i = 0; i < tz->trips; i++) {
> +               if (tz->ops->get_trip_type(tz, i, &trip_type) ||
> +                   tz->ops->get_trip_temp(tz, i, &trip_temp) ||
> !trip_temp)
> +                       set_bit(i, &tz->trips_disabled);
> +       }
> +
> +       sort_trips_indexes(tz);
> +
> +       return 0;
> +}
> +
>  /**
>   * thermal_zone_device_register() - register a new thermal zone
> device
>   * @type:      the thermal zone device type
> @@ -1196,11 +1237,8 @@ thermal_zone_device_register(const char *type,
> int trips, int mask,
>                              int polling_delay)
>  {
>         struct thermal_zone_device *tz;
> -       enum thermal_trip_type trip_type;
> -       int trip_temp;
>         int id;
>         int result;
> -       int count;
>         struct thermal_governor *governor;
>  
>         if (!type || strlen(type) == 0) {
> @@ -1272,12 +1310,9 @@ thermal_zone_device_register(const char *type,
> int trips, int mask,
>         if (result)
>                 goto release_device;
>  
> -       for (count = 0; count < trips; count++) {
> -               if (tz->ops->get_trip_type(tz, count, &trip_type) ||
> -                   tz->ops->get_trip_temp(tz, count, &trip_temp) ||
> -                   !trip_temp)
> -                       set_bit(count, &tz->trips_disabled);
> -       }
> +       result = thermal_zone_device_trip_init(tz);
> +       if (result)
> +               goto unregister;
>  
>         /* Update 'this' zone's governor information */
>         mutex_lock(&thermal_governor_lock);
> @@ -1290,7 +1325,7 @@ thermal_zone_device_register(const char *type,
> int trips, int mask,
>         result = thermal_set_governor(tz, governor);
>         if (result) {
>                 mutex_unlock(&thermal_governor_lock);
> -               goto unregister;
> +               goto kfree_indexes;
>         }
>  
>         mutex_unlock(&thermal_governor_lock);
> @@ -1298,7 +1333,7 @@ thermal_zone_device_register(const char *type,
> int trips, int mask,
>         if (!tz->tzp || !tz->tzp->no_hwmon) {
>                 result = thermal_add_hwmon_sysfs(tz);
>                 if (result)
> -                       goto unregister;
> +                       goto kfree_indexes;
>         }
>  
>         mutex_lock(&thermal_list_lock);
> @@ -1319,6 +1354,8 @@ thermal_zone_device_register(const char *type,
> int trips, int mask,
>  
>         return tz;
>  
> +kfree_indexes:
> +       kfree(tz->trips_indexes);
>  unregister:
>         device_del(&tz->device);
>  release_device:
> @@ -1387,6 +1424,7 @@ void thermal_zone_device_unregister(struct
> thermal_zone_device *tz)
>         thermal_remove_hwmon_sysfs(tz);
>         ida_simple_remove(&thermal_tz_ida, tz->id);
>         ida_destroy(&tz->ida);
> +       kfree(tz->trips_indexes);
>         mutex_destroy(&tz->lock);
>         device_unregister(&tz->device);
>  
> diff --git a/include/linux/thermal.h b/include/linux/thermal.h
> index 231bac2768fb..4c3b72536772 100644
> --- a/include/linux/thermal.h
> +++ b/include/linux/thermal.h
> @@ -112,6 +112,7 @@ struct thermal_cooling_device {
>   * @mode:              current mode of this thermal zone
>   * @devdata:   private pointer for device private data
>   * @trips:     number of trip points the thermal zone supports
> + * @trips_indexes:     an array of sorted trip points indexes
>   * @trips_disabled;    bitmap for disabled trips
>   * @passive_delay_jiffies: number of jiffies to wait between polls
> when
>   *                     performing passive cooling.
> @@ -152,6 +153,7 @@ struct thermal_zone_device {
>         enum thermal_device_mode mode;
>         void *devdata;
>         int trips;
> +       int *trips_indexes;
>         unsigned long trips_disabled;   /* bitmap for disabled trips
> */
>         unsigned long passive_delay_jiffies;
>         unsigned long polling_delay_jiffies;
Daniel Lezcano July 18, 2022, 1:21 p.m. UTC | #2
Hi Zhang,

thanks for the review

On 18/07/2022 07:28, Zhang Rui wrote:
> On Fri, 2022-07-15 at 23:09 +0200, Daniel Lezcano wrote:

[ ... ]

>> Instead of taking the risk of breaking the existing platforms, use an
>> array of temperature ordered trip identifiers and make it available
>> for the code needing to browse the trip points in an ordered way.
>>
>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>> ---

[ ... ]

>> +static void sort_trips_indexes(struct thermal_zone_device *tz)
>> +{
>> +       int i, j;
>> +
>> +       for (i = 0; i < tz->trips; i++)
>> +               tz->trips_indexes[i] = i;
>> +
>> +       for (i = 0; i < tz->trips; i++) {
>> +               for (j = i + 1; j < tz->trips; j++) {
>> +                       int t1, t2;
>> +
>> +                       tz->ops->get_trip_temp(tz, tz-
>>> trips_indexes[i], &t1);
> 
> This line can be moved to the upper loop.

Right, thanks!

>> +                       tz->ops->get_trip_temp(tz, tz-
>>> trips_indexes[j], &t2);
>> +
> 
> what about the disabled trip points?
> 
> we should ignore those trip points and check the return value to make
> sure we're comparing the valid trip_temp values.

We don't have to care about, whatever the position, the corresponding 
trip id will be disabled by the trip init function before calling this 
one and ignored in the handle_thermal_trip() function
Daniel Lezcano July 18, 2022, 2:32 p.m. UTC | #3
On 18/07/2022 07:28, Zhang Rui wrote:
> On Fri, 2022-07-15 at 23:09 +0200, Daniel Lezcano wrote:
>> By convention the trips points are declared in the ascending
>> temperature order. However, no specification for the device tree,
>> ACPI
>> or documentation tells the trip points must be ordered this way.
>>
>> In the other hand, we need those to be ordered to browse them at the
>> thermal events. But if we assume they are ordered and change the code
>> based on this assumption, any platform with shuffled trip points
>> description will be broken (if they exist).
>>
>> Instead of taking the risk of breaking the existing platforms, use an
>> array of temperature ordered trip identifiers and make it available
>> for the code needing to browse the trip points in an ordered way.
>>
>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

[ ... ]

>> +static void sort_trips_indexes(struct thermal_zone_device *tz)
>> +{
>> +       int i, j;
>> +
>> +       for (i = 0; i < tz->trips; i++)
>> +               tz->trips_indexes[i] = i;
>> +
>> +       for (i = 0; i < tz->trips; i++) {
>> +               for (j = i + 1; j < tz->trips; j++) {
>> +                       int t1, t2;
>> +
>> +                       tz->ops->get_trip_temp(tz, tz-
>>> trips_indexes[i], &t1);
> 
> This line can be moved to the upper loop.
> 
>> +                       tz->ops->get_trip_temp(tz, tz-
>>> trips_indexes[j], &t2);


Actually, we can not move the line up because of the swap below

>> +                       if (t1 > t2)
>> +                               swap(tz->trips_indexes[i], tz-
>>> trips_indexes[j]);
>> +               }
>> +       }
>> +}
Zhang, Rui July 19, 2022, 1:35 a.m. UTC | #4
On Tue, 2022-07-19 at 09:14 +0800, Zhang Rui wrote:
> On Mon, 2022-07-18 at 15:21 +0200, Daniel Lezcano wrote:
> > 
> > Hi Zhang,
> > 
> > thanks for the review
> > 
> > On 18/07/2022 07:28, Zhang Rui wrote:
> > > On Fri, 2022-07-15 at 23:09 +0200, Daniel Lezcano wrote:
> > 
> > [ ... ]
> > 
> > > > Instead of taking the risk of breaking the existing platforms,
> > > > use an
> > > > array of temperature ordered trip identifiers and make it
> > > > available
> > > > for the code needing to browse the trip points in an ordered
> > > > way.
> > > > 
> > > > Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> > > > ---
> > 
> > [ ... ]
> > 
> > > > +static void sort_trips_indexes(struct thermal_zone_device *tz)
> > > > +{
> > > > +       int i, j;
> > > > +
> > > > +       for (i = 0; i < tz->trips; i++)
> > > > +               tz->trips_indexes[i] = i;
> > > > +
> > > > +       for (i = 0; i < tz->trips; i++) {
> > > > +               for (j = i + 1; j < tz->trips; j++) {
> > > > +                       int t1, t2;
> > > > +
> > > > +                       tz->ops->get_trip_temp(tz, tz-
> > > > > trips_indexes[i], &t1);
> > > 
> > > This line can be moved to the upper loop.
> > 
> > Right, thanks!
> > 
> > > > +                       tz->ops->get_trip_temp(tz, tz-
> > > > > trips_indexes[j], &t2);
> > > > +
> > > 
> > > what about the disabled trip points?
> > > 
> > > we should ignore those trip points and check the return value to
> > > make
> > > sure we're comparing the valid trip_temp values.
> > 
> > We don't have to care about, whatever the position, the
> > corresponding
> > trip id will be disabled by the trip init function before calling
> > this 
> > one and ignored in the handle_thermal_trip() function
> 
> hah, I missed this one and replied to your latest reply directly.
> 
> The thing I'm concerning is that if we don't check the return value,
> for a disabled trip point, the trip_temp (t1/t2) returned is some
> random value, it all depends on the previous value set by last
> successful .get_trip_temp(),

or uninitialized value from the stack, but still random value every
time we invoke .get_trip_temp() for the same trip point.

-rui

>  and this may screw up the sorting.
> 
> thanks,
> rui
> > 
> > 
>
Daniel Lezcano July 19, 2022, 7:22 a.m. UTC | #5
On 19/07/2022 03:14, Zhang Rui wrote:
> On Mon, 2022-07-18 at 15:21 +0200, Daniel Lezcano wrote:
>>
>> Hi Zhang,
>>
>> thanks for the review
>>
>> On 18/07/2022 07:28, Zhang Rui wrote:
>>> On Fri, 2022-07-15 at 23:09 +0200, Daniel Lezcano wrote:
>>
>> [ ... ]
>>
>>>> Instead of taking the risk of breaking the existing platforms,
>>>> use an
>>>> array of temperature ordered trip identifiers and make it
>>>> available
>>>> for the code needing to browse the trip points in an ordered way.
>>>>
>>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>>>> ---
>>
>> [ ... ]
>>
>>>> +static void sort_trips_indexes(struct thermal_zone_device *tz)
>>>> +{
>>>> +       int i, j;
>>>> +
>>>> +       for (i = 0; i < tz->trips; i++)
>>>> +               tz->trips_indexes[i] = i;
>>>> +
>>>> +       for (i = 0; i < tz->trips; i++) {
>>>> +               for (j = i + 1; j < tz->trips; j++) {
>>>> +                       int t1, t2;
>>>> +
>>>> +                       tz->ops->get_trip_temp(tz, tz-
>>>>> trips_indexes[i], &t1);
>>>
>>> This line can be moved to the upper loop.
>>
>> Right, thanks!
>>
>>>> +                       tz->ops->get_trip_temp(tz, tz-
>>>>> trips_indexes[j], &t2);
>>>> +
>>>
>>> what about the disabled trip points?
>>>
>>> we should ignore those trip points and check the return value to
>>> make
>>> sure we're comparing the valid trip_temp values.
>>
>> We don't have to care about, whatever the position, the corresponding
>> trip id will be disabled by the trip init function before calling
>> this
>> one and ignored in the handle_thermal_trip() function
> 
> hah, I missed this one and replied to your latest reply directly.
> 
> The thing I'm concerning is that if we don't check the return value,
> for a disabled trip point, the trip_temp (t1/t2) returned is some
> random value, it all depends on the previous value set by last
> successful .get_trip_temp(), and this may screw up the sorting.

The indexes array is the same size as the trip array, that makes the 
code much less prone to errors.

To have the same number of trip points, the index of the disabled trip 
must be inserted also in the array. We don't care about its position in 
the indexes array because it is discarded in the handle_trip_point() 
function anyway. For this reason, the random temperature of the disabled 
trip point and the resulting position in the sorting is harmless.

It is made on purpose to ignore the return value, so we have a simpler code.
Zhang, Rui July 19, 2022, 2:17 p.m. UTC | #6
On Tue, 2022-07-19 at 09:22 +0200, Daniel Lezcano wrote:
> On 19/07/2022 03:14, Zhang Rui wrote:
> > On Mon, 2022-07-18 at 15:21 +0200, Daniel Lezcano wrote:
> > > 
> > > Hi Zhang,
> > > 
> > > thanks for the review
> > > 
> > > On 18/07/2022 07:28, Zhang Rui wrote:
> > > > On Fri, 2022-07-15 at 23:09 +0200, Daniel Lezcano wrote:
> > > 
> > > [ ... ]
> > > 
> > > > > Instead of taking the risk of breaking the existing
> > > > > platforms,
> > > > > use an
> > > > > array of temperature ordered trip identifiers and make it
> > > > > available
> > > > > for the code needing to browse the trip points in an ordered
> > > > > way.
> > > > > 
> > > > > Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> > > > > ---
> > > 
> > > [ ... ]
> > > 
> > > > > +static void sort_trips_indexes(struct thermal_zone_device
> > > > > *tz)
> > > > > +{
> > > > > +       int i, j;
> > > > > +
> > > > > +       for (i = 0; i < tz->trips; i++)
> > > > > +               tz->trips_indexes[i] = i;
> > > > > +
> > > > > +       for (i = 0; i < tz->trips; i++) {
> > > > > +               for (j = i + 1; j < tz->trips; j++) {
> > > > > +                       int t1, t2;
> > > > > +
> > > > > +                       tz->ops->get_trip_temp(tz, tz-
> > > > > > trips_indexes[i], &t1);
> > > > 
> > > > This line can be moved to the upper loop.
> > > 
> > > Right, thanks!
> > > 
> > > > > +                       tz->ops->get_trip_temp(tz, tz-
> > > > > > trips_indexes[j], &t2);
> > > > > +
> > > > 
> > > > what about the disabled trip points?
> > > > 
> > > > we should ignore those trip points and check the return value
> > > > to
> > > > make
> > > > sure we're comparing the valid trip_temp values.
> > > 
> > > We don't have to care about, whatever the position, the
> > > corresponding
> > > trip id will be disabled by the trip init function before calling
> > > this
> > > one and ignored in the handle_thermal_trip() function
> > 
> > hah, I missed this one and replied to your latest reply directly.
> > 
> > The thing I'm concerning is that if we don't check the return
> > value,
> > for a disabled trip point, the trip_temp (t1/t2) returned is some
> > random value, it all depends on the previous value set by last
> > successful .get_trip_temp(), and this may screw up the sorting.
> 
> The indexes array is the same size as the trip array, that makes the 
> code much less prone to errors.
> 
> To have the same number of trip points, the index of the disabled
> trip 
> must be inserted also in the array. We don't care about its position
> in 
> the indexes array because it is discarded in the handle_trip_point() 
> function anyway. For this reason, the random temperature of the
> disabled 
> trip point and the resulting position in the sorting is harmless.
> 
> It is made on purpose to ignore the return value, so we have a
> simpler code.
> 
Let's take below case for example,
say, we have three trip points 0, 1, 2, and trip point 1 is broken and
disabled.

trip temp for trip point 0 is 10 and for trip point 2 is 20.
.get_trip_temp(tz, 1, &t) fails, and t is an uninitialized random value


Initial:
   trip_indexes[0]=0,trip_indexes[1]=1,trip_indexes[2]=2
step1:
   i=0,j=1
   get trip temp for trip point trip_indexes[0]=0 and trip_indexes[1]=1
   trip point 1 returns trip temp 5, and it swaps with trip point 0
   so
   trip_indexes[0]=1,trip_indexes[1]=0,trip_indexes[2]=2
step2:
   i=0,j=2
   get trip temp for trip point trip_indexes[0]=1 and trip_indexes[2]=2
   trip point 1 returns trip temp 25, and it swaps with trip point 2
   so
   trip_indexes[0]=2,trip_indexes[1]=0,trip_indexes[2]=1

And the sorting is broken now.

please correct me if I'm missing anything.

thanks,
rui
Daniel Lezcano July 21, 2022, 9:34 a.m. UTC | #7
On 19/07/2022 16:17, Zhang Rui wrote:
> On Tue, 2022-07-19 at 09:22 +0200, Daniel Lezcano wrote:
>> On 19/07/2022 03:14, Zhang Rui wrote:
>>> On Mon, 2022-07-18 at 15:21 +0200, Daniel Lezcano wrote:
>>>>
>>>> Hi Zhang,
>>>>
>>>> thanks for the review
>>>>
>>>> On 18/07/2022 07:28, Zhang Rui wrote:
>>>>> On Fri, 2022-07-15 at 23:09 +0200, Daniel Lezcano wrote:
>>>>
>>>> [ ... ]
>>>>
>>>>>> Instead of taking the risk of breaking the existing
>>>>>> platforms,
>>>>>> use an
>>>>>> array of temperature ordered trip identifiers and make it
>>>>>> available
>>>>>> for the code needing to browse the trip points in an ordered
>>>>>> way.
>>>>>>
>>>>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
>>>>>> ---
>>>>
>>>> [ ... ]
>>>>
>>>>>> +static void sort_trips_indexes(struct thermal_zone_device
>>>>>> *tz)
>>>>>> +{
>>>>>> +       int i, j;
>>>>>> +
>>>>>> +       for (i = 0; i < tz->trips; i++)
>>>>>> +               tz->trips_indexes[i] = i;
>>>>>> +
>>>>>> +       for (i = 0; i < tz->trips; i++) {
>>>>>> +               for (j = i + 1; j < tz->trips; j++) {
>>>>>> +                       int t1, t2;
>>>>>> +
>>>>>> +                       tz->ops->get_trip_temp(tz, tz-
>>>>>>> trips_indexes[i], &t1);
>>>>>
>>>>> This line can be moved to the upper loop.
>>>>
>>>> Right, thanks!
>>>>
>>>>>> +                       tz->ops->get_trip_temp(tz, tz-
>>>>>>> trips_indexes[j], &t2);
>>>>>> +
>>>>>
>>>>> what about the disabled trip points?
>>>>>
>>>>> we should ignore those trip points and check the return value
>>>>> to
>>>>> make
>>>>> sure we're comparing the valid trip_temp values.
>>>>
>>>> We don't have to care about, whatever the position, the
>>>> corresponding
>>>> trip id will be disabled by the trip init function before calling
>>>> this
>>>> one and ignored in the handle_thermal_trip() function
>>>
>>> hah, I missed this one and replied to your latest reply directly.
>>>
>>> The thing I'm concerning is that if we don't check the return
>>> value,
>>> for a disabled trip point, the trip_temp (t1/t2) returned is some
>>> random value, it all depends on the previous value set by last
>>> successful .get_trip_temp(), and this may screw up the sorting.
>>
>> The indexes array is the same size as the trip array, that makes the
>> code much less prone to errors.
>>
>> To have the same number of trip points, the index of the disabled
>> trip
>> must be inserted also in the array. We don't care about its position
>> in
>> the indexes array because it is discarded in the handle_trip_point()
>> function anyway. For this reason, the random temperature of the
>> disabled
>> trip point and the resulting position in the sorting is harmless.
>>
>> It is made on purpose to ignore the return value, so we have a
>> simpler code.
>>
> Let's take below case for example,
> say, we have three trip points 0, 1, 2, and trip point 1 is broken and
> disabled.
> 
> trip temp for trip point 0 is 10 and for trip point 2 is 20.
> .get_trip_temp(tz, 1, &t) fails, and t is an uninitialized random value
> 
> 
> Initial:
>     trip_indexes[0]=0,trip_indexes[1]=1,trip_indexes[2]=2
> step1:
>     i=0,j=1
>     get trip temp for trip point trip_indexes[0]=0 and trip_indexes[1]=1
>     trip point 1 returns trip temp 5, and it swaps with trip point 0
>     so
>     trip_indexes[0]=1,trip_indexes[1]=0,trip_indexes[2]=2
> step2:
>     i=0,j=2
>     get trip temp for trip point trip_indexes[0]=1 and trip_indexes[2]=2
>     trip point 1 returns trip temp 25, and it swaps with trip point 2
>     so
>     trip_indexes[0]=2,trip_indexes[1]=0,trip_indexes[2]=1
> 
> And the sorting is broken now.
> 
> please correct me if I'm missing anything.

Oh, nice! Thanks for the detailed explanation.

We can initialize t1 and t2 to INT_MAX, so if the get_trip_temp() fails, 
they will be set to the maximum temperature and it will be at the end of 
the array.

Alternatively, we check the disabled bit and set the temperature to INT_MAX.
Rafael J. Wysocki July 22, 2022, 4:49 p.m. UTC | #8
On Fri, Jul 22, 2022 at 9:16 AM Zhang, Rui <rui.zhang@intel.com> wrote:
>
>
>
> > -----Original Message-----
> > From: Daniel Lezcano <daniel.lezcano@linaro.org>
> > Sent: Thursday, July 21, 2022 5:35 PM
> > To: Zhang, Rui <rui.zhang@intel.com>; rafael@kernel.org
> > Cc: quic_manafm@quicinc.com; amitk@kernel.org; lukasz.luba@arm.com;
> > linux-pm@vger.kernel.org; linux-kernel@vger.kernel.org
> > Subject: Re: [PATCH v3 3/4] thermal/core: Build ascending ordered indexes
> > for the trip points
> > Importance: High
> >
> > On 19/07/2022 16:17, Zhang Rui wrote:
> > > On Tue, 2022-07-19 at 09:22 +0200, Daniel Lezcano wrote:
> > >> On 19/07/2022 03:14, Zhang Rui wrote:
> > >>> On Mon, 2022-07-18 at 15:21 +0200, Daniel Lezcano wrote:
> > >>>>
> > >>>> Hi Zhang,
> > >>>>
> > >>>> thanks for the review
> > >>>>
> > >>>> On 18/07/2022 07:28, Zhang Rui wrote:
> > >>>>> On Fri, 2022-07-15 at 23:09 +0200, Daniel Lezcano wrote:
> > >>>>
> > >>>> [ ... ]
> > >>>>
> > >>>>>> Instead of taking the risk of breaking the existing platforms,
> > >>>>>> use an array of temperature ordered trip identifiers and make it
> > >>>>>> available for the code needing to browse the trip points in an
> > >>>>>> ordered way.
> > >>>>>>
> > >>>>>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> > >>>>>> ---
> > >>>>
> > >>>> [ ... ]
> > >>>>
> > >>>>>> +static void sort_trips_indexes(struct thermal_zone_device
> > >>>>>> *tz)
> > >>>>>> +{
> > >>>>>> +       int i, j;
> > >>>>>> +
> > >>>>>> +       for (i = 0; i < tz->trips; i++)
> > >>>>>> +               tz->trips_indexes[i] = i;
> > >>>>>> +
> > >>>>>> +       for (i = 0; i < tz->trips; i++) {
> > >>>>>> +               for (j = i + 1; j < tz->trips; j++) {
> > >>>>>> +                       int t1, t2;
> > >>>>>> +
> > >>>>>> +                       tz->ops->get_trip_temp(tz, tz-
> > >>>>>>> trips_indexes[i], &t1);
> > >>>>>
> > >>>>> This line can be moved to the upper loop.
> > >>>>
> > >>>> Right, thanks!
> > >>>>
> > >>>>>> +                       tz->ops->get_trip_temp(tz, tz-
> > >>>>>>> trips_indexes[j], &t2);
> > >>>>>> +
> > >>>>>
> > >>>>> what about the disabled trip points?
> > >>>>>
> > >>>>> we should ignore those trip points and check the return value to
> > >>>>> make sure we're comparing the valid trip_temp values.
> > >>>>
> > >>>> We don't have to care about, whatever the position, the
> > >>>> corresponding trip id will be disabled by the trip init function
> > >>>> before calling this one and ignored in the handle_thermal_trip()
> > >>>> function
> > >>>
> > >>> hah, I missed this one and replied to your latest reply directly.
> > >>>
> > >>> The thing I'm concerning is that if we don't check the return value,
> > >>> for a disabled trip point, the trip_temp (t1/t2) returned is some
> > >>> random value, it all depends on the previous value set by last
> > >>> successful .get_trip_temp(), and this may screw up the sorting.
> > >>
> > >> The indexes array is the same size as the trip array, that makes the
> > >> code much less prone to errors.
> > >>
> > >> To have the same number of trip points, the index of the disabled
> > >> trip must be inserted also in the array. We don't care about its
> > >> position in the indexes array because it is discarded in the
> > >> handle_trip_point() function anyway. For this reason, the random
> > >> temperature of the disabled trip point and the resulting position in
> > >> the sorting is harmless.
> > >>
> > >> It is made on purpose to ignore the return value, so we have a
> > >> simpler code.
> > >>
> > > Let's take below case for example,
> > > say, we have three trip points 0, 1, 2, and trip point 1 is broken and
> > > disabled.
> > >
> > > trip temp for trip point 0 is 10 and for trip point 2 is 20.
> > > .get_trip_temp(tz, 1, &t) fails, and t is an uninitialized random
> > > value
> > >
> > >
> > > Initial:
> > >     trip_indexes[0]=0,trip_indexes[1]=1,trip_indexes[2]=2
> > > step1:
> > >     i=0,j=1
> > >     get trip temp for trip point trip_indexes[0]=0 and trip_indexes[1]=1
> > >     trip point 1 returns trip temp 5, and it swaps with trip point 0
> > >     so
> > >     trip_indexes[0]=1,trip_indexes[1]=0,trip_indexes[2]=2
> > > step2:
> > >     i=0,j=2
> > >     get trip temp for trip point trip_indexes[0]=1 and trip_indexes[2]=2
> > >     trip point 1 returns trip temp 25, and it swaps with trip point 2
> > >     so
> > >     trip_indexes[0]=2,trip_indexes[1]=0,trip_indexes[2]=1
> > >
> > > And the sorting is broken now.
> > >
> > > please correct me if I'm missing anything.
> >
> > Oh, nice! Thanks for the detailed explanation.
> >
> > We can initialize t1 and t2 to INT_MAX, so if the get_trip_temp() fails, they
> > will be set to the maximum temperature and it will be at the end of the array.
> >
> > Alternatively, we check the disabled bit and set the temperature to INT_MAX.
>
> IMO, we can
> 1. get the trip temp for each trip point and cache them
> 2. set the trips_disabled bit
> 3. do the sorting using the cached trip temp values
> in thermal_zone_device_trip_init() altogether.

What about the case when the trip temperature can be set from user
space?  I guess we replace the cached value then, but it may be
necessary to sort them again in theory?
diff mbox series

Patch

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index f66036b3daae..f02f38b66445 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -355,7 +355,8 @@  static void handle_critical_trips(struct thermal_zone_device *tz,
 }
 
 static void handle_thermal_trip_crossed(struct thermal_zone_device *tz, int trip,
-					int trip_temp, int trip_hyst, enum thermal_trip_type trip_type)
+					int trip_temp, int trip_hyst,
+					enum thermal_trip_type trip_type)
 {
 	if (tz->last_temperature == THERMAL_TEMP_INVALID)
 		return;
@@ -1165,6 +1166,46 @@  static void bind_tz(struct thermal_zone_device *tz)
 	mutex_unlock(&thermal_list_lock);
 }
 
+static void sort_trips_indexes(struct thermal_zone_device *tz)
+{
+	int i, j;
+
+	for (i = 0; i < tz->trips; i++)
+		tz->trips_indexes[i] = i;
+ 
+	for (i = 0; i < tz->trips; i++) {
+		for (j = i + 1; j < tz->trips; j++) {
+			int t1, t2;
+
+			tz->ops->get_trip_temp(tz, tz->trips_indexes[i], &t1);
+			tz->ops->get_trip_temp(tz, tz->trips_indexes[j], &t2);
+
+			if (t1 > t2)
+				swap(tz->trips_indexes[i], tz->trips_indexes[j]);
+		}
+ 	}
+}
+
+static int thermal_zone_device_trip_init(struct thermal_zone_device *tz)
+{
+	enum thermal_trip_type trip_type;
+	int trip_temp, i;
+	
+	tz->trips_indexes = kzalloc(tz->trips * sizeof(int), GFP_KERNEL);
+	if (!tz->trips_indexes)
+		return -ENOMEM;
+
+	for (i = 0; i < tz->trips; i++) {
+		if (tz->ops->get_trip_type(tz, i, &trip_type) ||
+		    tz->ops->get_trip_temp(tz, i, &trip_temp) || !trip_temp)
+			set_bit(i, &tz->trips_disabled);
+	}
+
+	sort_trips_indexes(tz);
+
+	return 0;
+}
+
 /**
  * thermal_zone_device_register() - register a new thermal zone device
  * @type:	the thermal zone device type
@@ -1196,11 +1237,8 @@  thermal_zone_device_register(const char *type, int trips, int mask,
 			     int polling_delay)
 {
 	struct thermal_zone_device *tz;
-	enum thermal_trip_type trip_type;
-	int trip_temp;
 	int id;
 	int result;
-	int count;
 	struct thermal_governor *governor;
 
 	if (!type || strlen(type) == 0) {
@@ -1272,12 +1310,9 @@  thermal_zone_device_register(const char *type, int trips, int mask,
 	if (result)
 		goto release_device;
 
-	for (count = 0; count < trips; count++) {
-		if (tz->ops->get_trip_type(tz, count, &trip_type) ||
-		    tz->ops->get_trip_temp(tz, count, &trip_temp) ||
-		    !trip_temp)
-			set_bit(count, &tz->trips_disabled);
-	}
+	result = thermal_zone_device_trip_init(tz);
+	if (result)
+		goto unregister;
 
 	/* Update 'this' zone's governor information */
 	mutex_lock(&thermal_governor_lock);
@@ -1290,7 +1325,7 @@  thermal_zone_device_register(const char *type, int trips, int mask,
 	result = thermal_set_governor(tz, governor);
 	if (result) {
 		mutex_unlock(&thermal_governor_lock);
-		goto unregister;
+		goto kfree_indexes;
 	}
 
 	mutex_unlock(&thermal_governor_lock);
@@ -1298,7 +1333,7 @@  thermal_zone_device_register(const char *type, int trips, int mask,
 	if (!tz->tzp || !tz->tzp->no_hwmon) {
 		result = thermal_add_hwmon_sysfs(tz);
 		if (result)
-			goto unregister;
+			goto kfree_indexes;
 	}
 
 	mutex_lock(&thermal_list_lock);
@@ -1319,6 +1354,8 @@  thermal_zone_device_register(const char *type, int trips, int mask,
 
 	return tz;
 
+kfree_indexes:
+	kfree(tz->trips_indexes);
 unregister:
 	device_del(&tz->device);
 release_device:
@@ -1387,6 +1424,7 @@  void thermal_zone_device_unregister(struct thermal_zone_device *tz)
 	thermal_remove_hwmon_sysfs(tz);
 	ida_simple_remove(&thermal_tz_ida, tz->id);
 	ida_destroy(&tz->ida);
+	kfree(tz->trips_indexes);
 	mutex_destroy(&tz->lock);
 	device_unregister(&tz->device);
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 231bac2768fb..4c3b72536772 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -112,6 +112,7 @@  struct thermal_cooling_device {
  * @mode:		current mode of this thermal zone
  * @devdata:	private pointer for device private data
  * @trips:	number of trip points the thermal zone supports
+ * @trips_indexes:	an array of sorted trip points indexes
  * @trips_disabled;	bitmap for disabled trips
  * @passive_delay_jiffies: number of jiffies to wait between polls when
  *			performing passive cooling.
@@ -152,6 +153,7 @@  struct thermal_zone_device {
 	enum thermal_device_mode mode;
 	void *devdata;
 	int trips;
+	int *trips_indexes;
 	unsigned long trips_disabled;	/* bitmap for disabled trips */
 	unsigned long passive_delay_jiffies;
 	unsigned long polling_delay_jiffies;