diff mbox series

[v4,10/30] thermal/drivers/exynos: Replace of_thermal_is_trip_valid() by thermal_zone_get_trip()

Message ID 20220921094244.606948-11-daniel.lezcano@linaro.org
State Superseded
Headers show
Series Rework the trip points creation | expand

Commit Message

Daniel Lezcano Sept. 21, 2022, 9:42 a.m. UTC
The thermal_zone_get_trip() does the same check as
of_thermal_is_trip_valid(). Replace the call to
of_thermal_is_trip_valid() by thermal_zone_get_trip().

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/samsung/exynos_tmu.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Comments

Marek Szyprowski Sept. 23, 2022, 2:09 p.m. UTC | #1
Hi Daniel,

On 21.09.2022 11:42, Daniel Lezcano wrote:
> The thermal_zone_get_trip() does the same check as
> of_thermal_is_trip_valid(). Replace the call to
> of_thermal_is_trip_valid() by thermal_zone_get_trip().
>
> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
> ---

This patch landed in linux next-20220923 as commit 4a71bb8005ba 
("thermal/drivers/exynos: Replace of_thermal_is_trip_valid() by 
thermal_zone_get_trip()"). Unfortunately it causes deadlock on all 
Exynos based board:

============================================
WARNING: possible recursive locking detected
6.0.0-rc1-00062-g4a71bb8005ba #12855 Not tainted
--------------------------------------------
swapper/0/1 is trying to acquire lock:
c263c394 (&tz->lock){+.+.}-{3:3}, at: 
thermal_zone_device_update.part.0+0x114/0x538

but task is already holding lock:
c263c394 (&tz->lock){+.+.}-{3:3}, at: 
thermal_zone_device_update.part.0+0x3c/0x538

other info that might help us debug this:
  Possible unsafe locking scenario:

        CPU0
        ----
   lock(&tz->lock);
   lock(&tz->lock);

  *** DEADLOCK ***

  May be due to missing lock nesting notation

2 locks held by swapper/0/1:
  #0: c1d5248c (&dev->mutex){....}-{3:3}, at: __driver_attach+0xe4/0x1f0
  #1: c263c394 (&tz->lock){+.+.}-{3:3}, at: 
thermal_zone_device_update.part.0+0x3c/0x538

stack backtrace:
CPU: 1 PID: 1 Comm: swapper/0 Not tainted 6.0.0-rc1-00062-g4a71bb8005ba 
#12855
Hardware name: Samsung Exynos (Flattened Device Tree)
  unwind_backtrace from show_stack+0x10/0x14
  show_stack from dump_stack_lvl+0x58/0x70
  dump_stack_lvl from __lock_acquire+0x146c/0x2a7c
  __lock_acquire from lock_acquire+0x124/0x3e4
  lock_acquire from __mutex_lock+0x90/0x948
  __mutex_lock from mutex_lock_nested+0x1c/0x24
  mutex_lock_nested from thermal_zone_device_update.part.0+0x114/0x538
  thermal_zone_device_update.part.0 from 
thermal_zone_device_set_mode+0x70/0x98
  thermal_zone_device_set_mode from thermal_of_zone_register+0x424/0x69c
  thermal_of_zone_register from devm_thermal_of_zone_register+0x58/0x94
  devm_thermal_of_zone_register from exynos_tmu_probe+0x29c/0x728
  exynos_tmu_probe from platform_probe+0x5c/0xb8
  platform_probe from really_probe+0xe0/0x414
  really_probe from __driver_probe_device+0xa0/0x208
  __driver_probe_device from driver_probe_device+0x30/0xc0
  driver_probe_device from __driver_attach+0xf0/0x1f0
  __driver_attach from bus_for_each_dev+0x70/0xb0
  bus_for_each_dev from bus_add_driver+0x174/0x218
  bus_add_driver from driver_register+0x88/0x11c
  driver_register from do_one_initcall+0x64/0x380
  do_one_initcall from kernel_init_freeable+0x1c0/0x224
  kernel_init_freeable from kernel_init+0x18/0x12c
  kernel_init from ret_from_fork+0x14/0x2c
Exception stack(0xf082dfb0 to 0xf082dff8)

[deadlock]

Something is wrong in locking in the functions from the above stacktrace.


>   drivers/thermal/samsung/exynos_tmu.c | 9 ++++++---
>   1 file changed, 6 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
> index 91e6860b5ec4..34b460092308 100644
> --- a/drivers/thermal/samsung/exynos_tmu.c
> +++ b/drivers/thermal/samsung/exynos_tmu.c
> @@ -554,13 +554,14 @@ static void exynos4210_tmu_control(struct platform_device *pdev, bool on)
>   {
>   	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
>   	struct thermal_zone_device *tz = data->tzd;
> +	struct thermal_trip trip;
>   	unsigned int con, interrupt_en = 0, i;
>   
>   	con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL));
>   
>   	if (on) {
>   		for (i = 0; i < data->ntrip; i++) {
> -			if (!of_thermal_is_trip_valid(tz, i))
> +			if (thermal_zone_get_trip(tz, i, &trip))
>   				continue;
>   
>   			interrupt_en |=
> @@ -584,13 +585,14 @@ static void exynos5433_tmu_control(struct platform_device *pdev, bool on)
>   {
>   	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
>   	struct thermal_zone_device *tz = data->tzd;
> +	struct thermal_trip trip;
>   	unsigned int con, interrupt_en = 0, pd_det_en, i;
>   
>   	con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL));
>   
>   	if (on) {
>   		for (i = 0; i < data->ntrip; i++) {
> -			if (!of_thermal_is_trip_valid(tz, i))
> +			if (thermal_zone_get_trip(tz, i, &trip))
>   				continue;
>   
>   			interrupt_en |=
> @@ -615,13 +617,14 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on)
>   {
>   	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
>   	struct thermal_zone_device *tz = data->tzd;
> +	struct thermal_trip trip;
>   	unsigned int con, interrupt_en = 0, i;
>   
>   	con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL));
>   
>   	if (on) {
>   		for (i = 0; i < data->ntrip; i++) {
> -			if (!of_thermal_is_trip_valid(tz, i))
> +			if (thermal_zone_get_trip(tz, i, &trip))
>   				continue;
>   
>   			interrupt_en |=

Best regards
Daniel Lezcano Sept. 23, 2022, 5:40 p.m. UTC | #2
Hi Marek,

thanks for reporting

On 23/09/2022 16:09, Marek Szyprowski wrote:

[ ... ]

> Exception stack(0xf082dfb0 to 0xf082dff8)
> 
> [deadlock]
> 
> Something is wrong in locking in the functions from the above stacktrace.

Are you sure this deadlock is coming from this patch? Does a revert of 
this patch solve the issue ?
Marek Szyprowski Sept. 23, 2022, 10:02 p.m. UTC | #3
Hi Daniel,

On 23.09.2022 19:40, Daniel Lezcano wrote:
>
> Hi Marek,
>
> thanks for reporting
>
> On 23/09/2022 16:09, Marek Szyprowski wrote:
>
> [ ... ]
>
>> Exception stack(0xf082dfb0 to 0xf082dff8)
>>
>> [deadlock]
>>
>> Something is wrong in locking in the functions from the above 
>> stacktrace.
>
> Are you sure this deadlock is coming from this patch? Does a revert of 
> this patch solve the issue ?
>
Ups, my fault. It looks that I've copied SHA from the wrong window while 
preparing the report. The bisection pointed to the 78ffa3e58d93 
("thermal/core: Add a generic thermal_zone_get_trip() function") commit 
and I've already found how to fix the deadlock. I will report it again 
under the proper patch.


Best regards
diff mbox series

Patch

diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index 91e6860b5ec4..34b460092308 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c
@@ -554,13 +554,14 @@  static void exynos4210_tmu_control(struct platform_device *pdev, bool on)
 {
 	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct thermal_zone_device *tz = data->tzd;
+	struct thermal_trip trip;
 	unsigned int con, interrupt_en = 0, i;
 
 	con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL));
 
 	if (on) {
 		for (i = 0; i < data->ntrip; i++) {
-			if (!of_thermal_is_trip_valid(tz, i))
+			if (thermal_zone_get_trip(tz, i, &trip))
 				continue;
 
 			interrupt_en |=
@@ -584,13 +585,14 @@  static void exynos5433_tmu_control(struct platform_device *pdev, bool on)
 {
 	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct thermal_zone_device *tz = data->tzd;
+	struct thermal_trip trip;
 	unsigned int con, interrupt_en = 0, pd_det_en, i;
 
 	con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL));
 
 	if (on) {
 		for (i = 0; i < data->ntrip; i++) {
-			if (!of_thermal_is_trip_valid(tz, i))
+			if (thermal_zone_get_trip(tz, i, &trip))
 				continue;
 
 			interrupt_en |=
@@ -615,13 +617,14 @@  static void exynos7_tmu_control(struct platform_device *pdev, bool on)
 {
 	struct exynos_tmu_data *data = platform_get_drvdata(pdev);
 	struct thermal_zone_device *tz = data->tzd;
+	struct thermal_trip trip;
 	unsigned int con, interrupt_en = 0, i;
 
 	con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL));
 
 	if (on) {
 		for (i = 0; i < data->ntrip; i++) {
-			if (!of_thermal_is_trip_valid(tz, i))
+			if (thermal_zone_get_trip(tz, i, &trip))
 				continue;
 
 			interrupt_en |=