===================================================================
@@ -288,6 +288,28 @@ static int __thermal_zone_device_set_mod
return 0;
}
+static void thermal_zone_broken_disable(struct thermal_zone_device *tz)
+{
+ struct thermal_trip_desc *td;
+
+ dev_err(&tz->device, "Unable to get temperature, disabling!\n");
+ /*
+ * This function only runs for enabled thermal zones, so no need to
+ * check for the current mode.
+ */
+ __thermal_zone_device_set_mode(tz, THERMAL_DEVICE_DISABLED);
+ thermal_notify_tz_disable(tz);
+
+ for_each_trip_desc(tz, td) {
+ if (td->trip.type == THERMAL_TRIP_CRITICAL &&
+ td->trip.temperature > THERMAL_TEMP_INVALID) {
+ dev_crit(&tz->device,
+ "Disabled thermal zone with critical trip point\n");
+ return;
+ }
+ }
+}
+
/*
* Zone update section: main control loop applied to each zone while monitoring
* in polling mode. The monitoring is done using a workqueue.
@@ -308,6 +330,34 @@ static void thermal_zone_device_set_poll
cancel_delayed_work(&tz->poll_queue);
}
+static void thermal_zone_recheck(struct thermal_zone_device *tz, int error)
+{
+ if (error == -EAGAIN) {
+ thermal_zone_device_set_polling(tz, THERMAL_RECHECK_DELAY);
+ return;
+ }
+
+ /*
+ * Print the message once to reduce log noise. It will be followed by
+ * another one if the temperature cannot be determined after multiple
+ * attempts.
+ */
+ if (tz->recheck_delay_jiffies == THERMAL_RECHECK_DELAY)
+ dev_info(&tz->device, "Temperature check failed (%d)\n", error);
+
+ thermal_zone_device_set_polling(tz, tz->recheck_delay_jiffies);
+
+ tz->recheck_delay_jiffies += max(tz->recheck_delay_jiffies >> 1, 1ULL);
+ if (tz->recheck_delay_jiffies > THERMAL_MAX_RECHECK_DELAY) {
+ thermal_zone_broken_disable(tz);
+ /*
+ * Restore the original recheck delay value to allow the thermal
+ * zone to try to recover when it is reenabled by user space.
+ */
+ tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY;
+ }
+}
+
static void monitor_thermal_zone(struct thermal_zone_device *tz)
{
if (tz->mode != THERMAL_DEVICE_ENABLED)
@@ -507,10 +557,7 @@ void __thermal_zone_device_update(struct
ret = __thermal_zone_get_temp(tz, &temp);
if (ret) {
- if (ret != -EAGAIN)
- dev_info(&tz->device, "Temperature check failed (%d)\n", ret);
-
- thermal_zone_device_set_polling(tz, msecs_to_jiffies(THERMAL_RECHECK_DELAY_MS));
+ thermal_zone_recheck(tz, ret);
return;
} else if (temp <= THERMAL_TEMP_INVALID) {
/*
@@ -522,6 +569,8 @@ void __thermal_zone_device_update(struct
goto monitor;
}
+ tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY;
+
tz->last_temperature = tz->temperature;
tz->temperature = temp;
@@ -1462,6 +1511,7 @@ thermal_zone_device_register_with_trips(
thermal_set_delay_jiffies(&tz->passive_delay_jiffies, passive_delay);
thermal_set_delay_jiffies(&tz->polling_delay_jiffies, polling_delay);
+ tz->recheck_delay_jiffies = THERMAL_RECHECK_DELAY;
/* sys I/F */
/* Add nodes that are always present via .groups */
===================================================================
@@ -67,6 +67,8 @@ struct thermal_governor {
* @polling_delay_jiffies: number of jiffies to wait between polls when
* checking whether trip points have been crossed (0 for
* interrupt driven systems)
+ * @recheck_delay_jiffies: delay after a failed attempt to determine the zone
+ * temperature before trying again
* @temperature: current temperature. This is only for core code,
* drivers should use thermal_zone_get_temp() to get the
* current temperature
@@ -108,6 +110,7 @@ struct thermal_zone_device {
int num_trips;
unsigned long passive_delay_jiffies;
unsigned long polling_delay_jiffies;
+ unsigned long recheck_delay_jiffies;
int temperature;
int last_temperature;
int emul_temperature;
@@ -137,10 +140,11 @@ struct thermal_zone_device {
#define THERMAL_TEMP_INIT INT_MIN
/*
- * Default delay after a failing thermal zone temperature check before
- * attempting to check it again.
+ * Default and maximum delay after a failed thermal zone temperature check
+ * before attempting to check it again (in jiffies).
*/
-#define THERMAL_RECHECK_DELAY_MS 250
+#define THERMAL_RECHECK_DELAY msecs_to_jiffies(250)
+#define THERMAL_MAX_RECHECK_DELAY (120 * HZ)
/* Default Thermal Governor */
#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)