diff mbox series

[4/4] thermal: thermal_core: Allow rebooting after critical temp

Message ID 20230825102453.836627-4-festevam@gmail.com
State New
Headers show
Series [1/4] dt-bindings: thermal-zones: Document critical-action | expand

Commit Message

Fabio Estevam Aug. 25, 2023, 10:24 a.m. UTC
From: Fabio Estevam <festevam@denx.de>

Currently, the default mechanism is to trigger a shutdown after the
critical temperature is reached.

In some embedded cases, such behavior does not suit well, as the board may
be unattended in the field and rebooting may be a better approach.

The bootloader may also check the temperature and only allow the boot to
proceed when the temperature is below a certain threshold.

Introduce support for allowing a reboot to be triggered after the
critical temperature is reached.

If the "critical-action" devicetree property is not found, fall back to
the shutdown action to preserve the existing default behavior.

Tested on a i.MX8MM board with the following devicetre changes:

	thermal-zones {
		cpu-thermal {
			critical-action = <THERMAL_CRITICAL_ACTION_REBOOT>;
		};
	};
	
Signed-off-by: Fabio Estevam <festevam@denx.de>
---
 drivers/thermal/thermal_core.c |  8 +++++++-
 drivers/thermal/thermal_of.c   | 17 ++++++++++++++---
 include/linux/thermal.h        |  6 ++++++
 3 files changed, 27 insertions(+), 4 deletions(-)
diff mbox series

Patch

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index cc2b5e81c620..3f4ea27560f8 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -320,11 +320,17 @@  void thermal_zone_device_critical(struct thermal_zone_device *tz)
 	 * Its a must for forced_emergency_poweroff_work to be scheduled.
 	 */
 	int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
+	void (*hw_protection_action)(const char *reason, int ms_until_forced);
 
 	dev_emerg(&tz->device, "%s: critical temperature reached, "
 		  "shutting down\n", tz->type);
 
-	hw_protection_shutdown("Temperature too high", poweroff_delay_ms);
+	hw_protection_action = hw_protection_shutdown;
+
+	if (tz->action == THERMAL_CRITICAL_ACTION_REBOOT)
+		hw_protection_action = hw_protection_reboot;
+
+	hw_protection_action("Temperature too high", poweroff_delay_ms);
 }
 EXPORT_SYMBOL(thermal_zone_device_critical);
 
diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index 330690a3a208..36a2c82d3405 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -218,7 +218,8 @@  static struct device_node *of_thermal_zone_find(struct device_node *sensor, int
 	return tz;
 }
 
-static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdelay)
+static int thermal_of_monitor_init(struct device_node *np, int *delay,
+				   int *pdelay, int *critical_action)
 {
 	int ret;
 
@@ -234,6 +235,14 @@  static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdel
 		return ret;
 	}
 
+	/*
+	 * If the "critical-action" property is not found, fall back to
+	 * the shutdown action to keep the existing behavior.
+	 */
+	ret = of_property_read_u32(np, "critical-action", critical_action);
+	if (ret < 0)
+		*critical_action = THERMAL_CRITICAL_ACTION_SHUTDOWN;
+
 	return 0;
 }
 
@@ -471,7 +480,7 @@  static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
 	struct thermal_zone_params tzp = {};
 	struct thermal_zone_device_ops *of_ops;
 	struct device_node *np;
-	int delay, pdelay;
+	int delay, pdelay, critical_action;
 	int ntrips, mask;
 	int ret;
 
@@ -494,7 +503,7 @@  static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
 		goto out_kfree_of_ops;
 	}
 
-	ret = thermal_of_monitor_init(np, &delay, &pdelay);
+	ret = thermal_of_monitor_init(np, &delay, &pdelay, &critical_action);
 	if (ret) {
 		pr_err("Failed to initialize monitoring delays from %pOFn\n", np);
 		goto out_kfree_trips;
@@ -516,6 +525,8 @@  static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
 		goto out_kfree_trips;
 	}
 
+	tz->action = critical_action;
+
 	ret = thermal_zone_device_enable(tz);
 	if (ret) {
 		pr_err("Failed to enabled thermal zone '%s', id=%d: %d\n",
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index dee66ade89a0..48f29ab16218 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -34,6 +34,11 @@  struct thermal_cooling_device;
 struct thermal_instance;
 struct thermal_attr;
 
+enum thermal_action {
+	THERMAL_CRITICAL_ACTION_SHUTDOWN, /* shutdown when crit temperature is reached */
+	THERMAL_CRITICAL_ACTION_REBOOT, /* reboot when crit temperature is reached */
+};
+
 enum thermal_trend {
 	THERMAL_TREND_STABLE, /* temperature is stable */
 	THERMAL_TREND_RAISING, /* temperature is raising */
@@ -185,6 +190,7 @@  struct thermal_zone_device {
 	struct list_head node;
 	struct delayed_work poll_queue;
 	enum thermal_notify_event notify_event;
+	enum thermal_action action;
 };
 
 /**