diff mbox series

[2/2] thermal: tegra-bpmp: Always (re)program trip temperatures

Message ID 20230207135610.3100865-2-cyndis@kapsi.fi
State New
Headers show
Series [1/2] thermal: tegra-bpmp: Handle offline zones | expand

Commit Message

Mikko Perttunen Feb. 7, 2023, 1:56 p.m. UTC
From: Mikko Perttunen <mperttunen@nvidia.com>

In the rare case that calculation of trip temperatures would result
in the same trip temperatures that were previously programmed, the
thermal core skips calling .set_trips. However, presently, if it is
not called, we may end up with no trip temperatures programmed at all.

To avoid this, make set_trips a no-op and in places where it would be
called, instead unconditionally program trip temperatures to the last
specified temperatures.

This also fixes the situation where a trip is triggered between
registering a thermal zone and registering the trip MRQ handler, in
which case we would also get stuck.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
---
 drivers/thermal/tegra/tegra-bpmp-thermal.c | 33 +++++++++++++++++++---
 1 file changed, 29 insertions(+), 4 deletions(-)

Comments

Thierry Reding Feb. 8, 2023, 10:43 a.m. UTC | #1
On Tue, Feb 07, 2023 at 03:56:09PM +0200, Mikko Perttunen wrote:
> From: Mikko Perttunen <mperttunen@nvidia.com>
> 
> In the rare case that calculation of trip temperatures would result
> in the same trip temperatures that were previously programmed, the
> thermal core skips calling .set_trips.

That seems like an appropriate optimization.

> However, presently, if it is not called, we may end up with no trip
> temperatures programmed at all.

I have a hard time understanding when this would happen. prev_low_trip
and prev_high_trip are -INT_MAX and INT_MAX, respectively, so these are
unlikely to be the result of anything we compute at runtime, based on
temperatures specified in DT, for example.

So I would expect ->set_trips() to get called at least once when the
thermal zones are first registered. Are you saying there are cases where
->set_trips() doesn't get called at all?

> To avoid this, make set_trips a no-op and in places where it would be
> called, instead unconditionally program trip temperatures to the last
> specified temperatures.

Again, this seems more like a workaround for an issue that exists
elsewhere. If ->set_trips() doesn't always get called when it should be,
then that's what we should fix.

> This also fixes the situation where a trip is triggered between
> registering a thermal zone and registering the trip MRQ handler, in
> which case we would also get stuck.

Could this be fixed by requesting the MRQ prior to registering the
zones? That seems like the more appropriate fix for this issue. It's
similar to how we typically register IRQ handlers before enabling a
device to make sure we don't miss any interrupts.

Thierry
diff mbox series

Patch

diff --git a/drivers/thermal/tegra/tegra-bpmp-thermal.c b/drivers/thermal/tegra/tegra-bpmp-thermal.c
index 628b18818ae9..9f69dbe1c7d4 100644
--- a/drivers/thermal/tegra/tegra-bpmp-thermal.c
+++ b/drivers/thermal/tegra/tegra-bpmp-thermal.c
@@ -67,9 +67,8 @@  static int tegra_bpmp_thermal_get_temp(struct thermal_zone_device *tz, int *out_
 	return __tegra_bpmp_thermal_get_temp(tz->devdata, out_temp);
 }
 
-static int tegra_bpmp_thermal_set_trips(struct thermal_zone_device *tz, int low, int high)
+static int tegra_bpmp_thermal_program_trips(struct tegra_bpmp_thermal_zone *zone)
 {
-	struct tegra_bpmp_thermal_zone *zone = tz->devdata;
 	struct mrq_thermal_host_to_bpmp_request req;
 	struct tegra_bpmp_message msg;
 	int err;
@@ -78,8 +77,10 @@  static int tegra_bpmp_thermal_set_trips(struct thermal_zone_device *tz, int low,
 	req.type = CMD_THERMAL_SET_TRIP;
 	req.set_trip.zone = zone->idx;
 	req.set_trip.enabled = true;
-	req.set_trip.low = low;
-	req.set_trip.high = high;
+	mutex_lock(&zone->tzd->lock);
+	req.set_trip.low = zone->tzd->prev_low_trip;
+	req.set_trip.high = zone->tzd->prev_high_trip;
+	mutex_unlock(&zone->tzd->lock);
 
 	memset(&msg, 0, sizeof(msg));
 	msg.mrq = MRQ_THERMAL;
@@ -95,14 +96,31 @@  static int tegra_bpmp_thermal_set_trips(struct thermal_zone_device *tz, int low,
 	return 0;
 }
 
+static int tegra_bpmp_thermal_set_trips(struct thermal_zone_device *tz, int low, int high)
+{
+	return 0;
+}
+
 static void tz_device_update_work_fn(struct work_struct *work)
 {
 	struct tegra_bpmp_thermal_zone *zone;
+	int err;
 
 	zone = container_of(work, struct tegra_bpmp_thermal_zone,
 			    tz_device_update_work);
 
+	/* Recalculates trip temperatures. */
 	thermal_zone_device_update(zone->tzd, THERMAL_TRIP_VIOLATED);
+
+	/*
+	 * Program trip temperatures. We must do this outside `set_trips`
+	 * since thermal core may skip calling it if the trip temperatures
+	 * are unchanged.
+	 */
+	err = tegra_bpmp_thermal_program_trips(zone);
+	if (err)
+		dev_err(zone->tegra->dev, "failed to update trip temperatures for zone '%s': %d\n",
+			zone->tzd->type, err);
 }
 
 static void bpmp_mrq_thermal(unsigned int mrq, struct tegra_bpmp_channel *ch,
@@ -293,6 +311,13 @@  static int tegra_bpmp_thermal_probe(struct platform_device *pdev)
 		return err;
 	}
 
+	for (i = 0; i < tegra->num_zones; i++) {
+		err = tegra_bpmp_thermal_program_trips(tegra->zones[i]);
+		if (err)
+			dev_err(&pdev->dev, "failed to set trip temperatures for zone '%s': %d\n",
+				tzd->type, err);
+	}
+
 	platform_set_drvdata(pdev, tegra);
 
 	return 0;