diff mbox series

[v1,1/3] thermal: core: Make thermal_zone_device_unregister() return after freeing the zone

Message ID 13414639.uLZWGnKmhe@kreacher
State New
Headers show
Series thermal: core: Remove thermal zones during unregistration | expand

Commit Message

Rafael J. Wysocki Dec. 8, 2023, 7:13 p.m. UTC
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Make thermal_zone_device_unregister() wait until all of the references
to the given thermal zone object have been dropped and free it before
returning.

This guarantees that when thermal_zone_device_unregister() returns,
there is no leftover activity regarding the thermal zone in question
which is required by some of its callers (for instance, modular driver
code that wants to know when it is safe to let the module go away).

Subsequently, this will allow some confusing device_is_registered()
checks to be dropped from the thermal sysfs and core code.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/thermal_core.c |    6 +++++-
 include/linux/thermal.h        |    2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

Comments

Daniel Lezcano Dec. 11, 2023, 4:28 p.m. UTC | #1
On 08/12/2023 20:13, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> Make thermal_zone_device_unregister() wait until all of the references
> to the given thermal zone object have been dropped and free it before
> returning.
> 
> This guarantees that when thermal_zone_device_unregister() returns,
> there is no leftover activity regarding the thermal zone in question
> which is required by some of its callers (for instance, modular driver
> code that wants to know when it is safe to let the module go away).
> 
> Subsequently, this will allow some confusing device_is_registered()
> checks to be dropped from the thermal sysfs and core code.
> 
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---

Definitively agree on the change

Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>

Would it make sense to use kref_get/put ?


>   drivers/thermal/thermal_core.c |    6 +++++-
>   include/linux/thermal.h        |    2 ++
>   2 files changed, 7 insertions(+), 1 deletion(-)
> 
> Index: linux-pm/drivers/thermal/thermal_core.c
> ===================================================================
> --- linux-pm.orig/drivers/thermal/thermal_core.c
> +++ linux-pm/drivers/thermal/thermal_core.c
> @@ -822,7 +822,7 @@ static void thermal_release(struct devic
>   		tz = to_thermal_zone(dev);
>   		thermal_zone_destroy_device_groups(tz);
>   		mutex_destroy(&tz->lock);
> -		kfree(tz);
> +		complete(&tz->removal);
>   	} else if (!strncmp(dev_name(dev), "cooling_device",
>   			    sizeof("cooling_device") - 1)) {
>   		cdev = to_cooling_device(dev);
> @@ -1315,6 +1315,7 @@ thermal_zone_device_register_with_trips(
>   	INIT_LIST_HEAD(&tz->thermal_instances);
>   	ida_init(&tz->ida);
>   	mutex_init(&tz->lock);
> +	init_completion(&tz->removal);
>   	id = ida_alloc(&thermal_tz_ida, GFP_KERNEL);
>   	if (id < 0) {
>   		result = id;
> @@ -1494,6 +1495,9 @@ void thermal_zone_device_unregister(stru
>   	put_device(&tz->device);
>   
>   	thermal_notify_tz_delete(tz_id);
> +
> +	wait_for_completion(&tz->removal);
> +	kfree(tz);
>   }
>   EXPORT_SYMBOL_GPL(thermal_zone_device_unregister);
>   
> Index: linux-pm/include/linux/thermal.h
> ===================================================================
> --- linux-pm.orig/include/linux/thermal.h
> +++ linux-pm/include/linux/thermal.h
> @@ -117,6 +117,7 @@ struct thermal_cooling_device {
>    * @id:		unique id number for each thermal zone
>    * @type:	the thermal zone device type
>    * @device:	&struct device for this thermal zone
> + * @removal:	removal completion
>    * @trip_temp_attrs:	attributes for trip points for sysfs: trip temperature
>    * @trip_type_attrs:	attributes for trip points for sysfs: trip type
>    * @trip_hyst_attrs:	attributes for trip points for sysfs: trip hysteresis
> @@ -156,6 +157,7 @@ struct thermal_zone_device {
>   	int id;
>   	char type[THERMAL_NAME_LENGTH];
>   	struct device device;
> +	struct completion removal;
>   	struct attribute_group trips_attribute_group;
>   	struct thermal_attr *trip_temp_attrs;
>   	struct thermal_attr *trip_type_attrs;
> 
> 
>
Rafael J. Wysocki Dec. 11, 2023, 4:42 p.m. UTC | #2
On Mon, Dec 11, 2023 at 5:28 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> On 08/12/2023 20:13, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > Make thermal_zone_device_unregister() wait until all of the references
> > to the given thermal zone object have been dropped and free it before
> > returning.
> >
> > This guarantees that when thermal_zone_device_unregister() returns,
> > there is no leftover activity regarding the thermal zone in question
> > which is required by some of its callers (for instance, modular driver
> > code that wants to know when it is safe to let the module go away).
> >
> > Subsequently, this will allow some confusing device_is_registered()
> > checks to be dropped from the thermal sysfs and core code.
> >
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
>
> Definitively agree on the change
>
> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>

Thanks!

> Would it make sense to use kref_get/put ?

Why and where?
Daniel Lezcano Dec. 11, 2023, 5:34 p.m. UTC | #3
On 11/12/2023 17:42, Rafael J. Wysocki wrote:
> On Mon, Dec 11, 2023 at 5:28 PM Daniel Lezcano
> <daniel.lezcano@linaro.org> wrote:
>>
>> On 08/12/2023 20:13, Rafael J. Wysocki wrote:
>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>
>>> Make thermal_zone_device_unregister() wait until all of the references
>>> to the given thermal zone object have been dropped and free it before
>>> returning.
>>>
>>> This guarantees that when thermal_zone_device_unregister() returns,
>>> there is no leftover activity regarding the thermal zone in question
>>> which is required by some of its callers (for instance, modular driver
>>> code that wants to know when it is safe to let the module go away).
>>>
>>> Subsequently, this will allow some confusing device_is_registered()
>>> checks to be dropped from the thermal sysfs and core code.
>>>
>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>> ---
>>
>> Definitively agree on the change
>>
>> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> 
> Thanks!
> 
>> Would it make sense to use kref_get/put ?
> 
> Why and where?

Well it is a general question. Usually this kind of removal is tied with 
a refcount
Rafael J. Wysocki Dec. 11, 2023, 5:54 p.m. UTC | #4
On Mon, Dec 11, 2023 at 6:35 PM Daniel Lezcano
<daniel.lezcano@linaro.org> wrote:
>
> On 11/12/2023 17:42, Rafael J. Wysocki wrote:
> > On Mon, Dec 11, 2023 at 5:28 PM Daniel Lezcano
> > <daniel.lezcano@linaro.org> wrote:
> >>
> >> On 08/12/2023 20:13, Rafael J. Wysocki wrote:
> >>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>
> >>> Make thermal_zone_device_unregister() wait until all of the references
> >>> to the given thermal zone object have been dropped and free it before
> >>> returning.
> >>>
> >>> This guarantees that when thermal_zone_device_unregister() returns,
> >>> there is no leftover activity regarding the thermal zone in question
> >>> which is required by some of its callers (for instance, modular driver
> >>> code that wants to know when it is safe to let the module go away).
> >>>
> >>> Subsequently, this will allow some confusing device_is_registered()
> >>> checks to be dropped from the thermal sysfs and core code.
> >>>
> >>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>> ---
> >>
> >> Definitively agree on the change
> >>
> >> Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> >
> > Thanks!
> >
> >> Would it make sense to use kref_get/put ?
> >
> > Why and where?
>
> Well it is a general question. Usually this kind of removal is tied with
> a refcount

It is tied to a refcount already, but the problem is that the last
reference can be dropped from a thread concurrent to the removal one.

The completion effectively causes the removal thread to wait for the
refcont to become 0.
diff mbox series

Patch

Index: linux-pm/drivers/thermal/thermal_core.c
===================================================================
--- linux-pm.orig/drivers/thermal/thermal_core.c
+++ linux-pm/drivers/thermal/thermal_core.c
@@ -822,7 +822,7 @@  static void thermal_release(struct devic
 		tz = to_thermal_zone(dev);
 		thermal_zone_destroy_device_groups(tz);
 		mutex_destroy(&tz->lock);
-		kfree(tz);
+		complete(&tz->removal);
 	} else if (!strncmp(dev_name(dev), "cooling_device",
 			    sizeof("cooling_device") - 1)) {
 		cdev = to_cooling_device(dev);
@@ -1315,6 +1315,7 @@  thermal_zone_device_register_with_trips(
 	INIT_LIST_HEAD(&tz->thermal_instances);
 	ida_init(&tz->ida);
 	mutex_init(&tz->lock);
+	init_completion(&tz->removal);
 	id = ida_alloc(&thermal_tz_ida, GFP_KERNEL);
 	if (id < 0) {
 		result = id;
@@ -1494,6 +1495,9 @@  void thermal_zone_device_unregister(stru
 	put_device(&tz->device);
 
 	thermal_notify_tz_delete(tz_id);
+
+	wait_for_completion(&tz->removal);
+	kfree(tz);
 }
 EXPORT_SYMBOL_GPL(thermal_zone_device_unregister);
 
Index: linux-pm/include/linux/thermal.h
===================================================================
--- linux-pm.orig/include/linux/thermal.h
+++ linux-pm/include/linux/thermal.h
@@ -117,6 +117,7 @@  struct thermal_cooling_device {
  * @id:		unique id number for each thermal zone
  * @type:	the thermal zone device type
  * @device:	&struct device for this thermal zone
+ * @removal:	removal completion
  * @trip_temp_attrs:	attributes for trip points for sysfs: trip temperature
  * @trip_type_attrs:	attributes for trip points for sysfs: trip type
  * @trip_hyst_attrs:	attributes for trip points for sysfs: trip hysteresis
@@ -156,6 +157,7 @@  struct thermal_zone_device {
 	int id;
 	char type[THERMAL_NAME_LENGTH];
 	struct device device;
+	struct completion removal;
 	struct attribute_group trips_attribute_group;
 	struct thermal_attr *trip_temp_attrs;
 	struct thermal_attr *trip_type_attrs;