diff mbox series

[v2,2/2] arm64: dts: qcom: sm8650: setup gpu thermal with higher temperatures

Message ID 20250110-topic-sm8650-thermal-cpu-idle-v2-2-5787ad79abbb@linaro.org
State New
Headers show
Series arm64: dts: qcom: sm8650: rework CPU & GPU thermal zones | expand

Commit Message

Neil Armstrong Jan. 10, 2025, 10:36 a.m. UTC
On the SM8650, the dynamic clock and voltage scaling (DCVS) for the GPU
is done in an hardware controlled loop by the GPU Management Unit (GMU).

Since the GMU does a better job at maintaining the GPUs temperature in an
acceptable range by taking in account more parameters like the die
characteristics or other internal sensors, it makes no sense to try
and reproduce a similar set of constraints with the Linux devfreq thermal
core.

Instead, set higher temperatures in the GPU trip points corresponding to
the temperatures provided by Qualcomm in the dowstream source, which will
trigger the devfreq thermal core if the GMU cannot handle the temperature
surge, and try our best to avoid reaching the critical temperature trip
point which should trigger an inevitable thermal shutdown.

Fixes: 497624ed5506 ("arm64: dts: qcom: sm8650: Throttle the GPU when overheating")
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
---
 arch/arm64/boot/dts/qcom/sm8650.dtsi | 48 ++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

Comments

Akhil P Oommen Jan. 13, 2025, 10:28 a.m. UTC | #1
On 1/10/2025 4:06 PM, Neil Armstrong wrote:
> On the SM8650, the dynamic clock and voltage scaling (DCVS) for the GPU
> is done in an hardware controlled loop by the GPU Management Unit (GMU).
> 
> Since the GMU does a better job at maintaining the GPUs temperature in an
> acceptable range by taking in account more parameters like the die
> characteristics or other internal sensors, it makes no sense to try
> and reproduce a similar set of constraints with the Linux devfreq thermal
> core.

Just FYI, this description is incorrect. SM8650's GMU doesn't do any
sort of thermal management.

-Akhil.

> 
> Instead, set higher temperatures in the GPU trip points corresponding to
> the temperatures provided by Qualcomm in the dowstream source, which will
> trigger the devfreq thermal core if the GMU cannot handle the temperature
> surge, and try our best to avoid reaching the critical temperature trip
> point which should trigger an inevitable thermal shutdown.
> 
> Fixes: 497624ed5506 ("arm64: dts: qcom: sm8650: Throttle the GPU when overheating")
> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
> ---
>  arch/arm64/boot/dts/qcom/sm8650.dtsi | 48 ++++++++++++++++++------------------
>  1 file changed, 24 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi
> index 95509ce2713d4fcc3dbe0c5cd5827312d5681af4..e9fcf05cb084b7979ecf0f4712fed332e9f4b07a 100644
> --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi
> +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi
> @@ -6173,19 +6173,19 @@ map0 {
>  
>  			trips {
>  				gpu0_alert0: trip-point0 {
> -					temperature = <85000>;
> +					temperature = <95000>;
>  					hysteresis = <1000>;
>  					type = "passive";
>  				};
>  
>  				trip-point1 {
> -					temperature = <90000>;
> +					temperature = <115000>;
>  					hysteresis = <1000>;
>  					type = "hot";
>  				};
>  
>  				trip-point2 {
> -					temperature = <110000>;
> +					temperature = <125000>;
>  					hysteresis = <1000>;
>  					type = "critical";
>  				};
> @@ -6206,19 +6206,19 @@ map0 {
>  
>  			trips {
>  				gpu1_alert0: trip-point0 {
> -					temperature = <85000>;
> +					temperature = <95000>;
>  					hysteresis = <1000>;
>  					type = "passive";
>  				};
>  
>  				trip-point1 {
> -					temperature = <90000>;
> +					temperature = <115000>;
>  					hysteresis = <1000>;
>  					type = "hot";
>  				};
>  
>  				trip-point2 {
> -					temperature = <110000>;
> +					temperature = <125000>;
>  					hysteresis = <1000>;
>  					type = "critical";
>  				};
> @@ -6239,19 +6239,19 @@ map0 {
>  
>  			trips {
>  				gpu2_alert0: trip-point0 {
> -					temperature = <85000>;
> +					temperature = <95000>;
>  					hysteresis = <1000>;
>  					type = "passive";
>  				};
>  
>  				trip-point1 {
> -					temperature = <90000>;
> +					temperature = <115000>;
>  					hysteresis = <1000>;
>  					type = "hot";
>  				};
>  
>  				trip-point2 {
> -					temperature = <110000>;
> +					temperature = <125000>;
>  					hysteresis = <1000>;
>  					type = "critical";
>  				};
> @@ -6272,19 +6272,19 @@ map0 {
>  
>  			trips {
>  				gpu3_alert0: trip-point0 {
> -					temperature = <85000>;
> +					temperature = <95000>;
>  					hysteresis = <1000>;
>  					type = "passive";
>  				};
>  
>  				trip-point1 {
> -					temperature = <90000>;
> +					temperature = <115000>;
>  					hysteresis = <1000>;
>  					type = "hot";
>  				};
>  
>  				trip-point2 {
> -					temperature = <110000>;
> +					temperature = <125000>;
>  					hysteresis = <1000>;
>  					type = "critical";
>  				};
> @@ -6305,19 +6305,19 @@ map0 {
>  
>  			trips {
>  				gpu4_alert0: trip-point0 {
> -					temperature = <85000>;
> +					temperature = <95000>;
>  					hysteresis = <1000>;
>  					type = "passive";
>  				};
>  
>  				trip-point1 {
> -					temperature = <90000>;
> +					temperature = <115000>;
>  					hysteresis = <1000>;
>  					type = "hot";
>  				};
>  
>  				trip-point2 {
> -					temperature = <110000>;
> +					temperature = <125000>;
>  					hysteresis = <1000>;
>  					type = "critical";
>  				};
> @@ -6338,19 +6338,19 @@ map0 {
>  
>  			trips {
>  				gpu5_alert0: trip-point0 {
> -					temperature = <85000>;
> +					temperature = <95000>;
>  					hysteresis = <1000>;
>  					type = "passive";
>  				};
>  
>  				trip-point1 {
> -					temperature = <90000>;
> +					temperature = <115000>;
>  					hysteresis = <1000>;
>  					type = "hot";
>  				};
>  
>  				trip-point2 {
> -					temperature = <110000>;
> +					temperature = <125000>;
>  					hysteresis = <1000>;
>  					type = "critical";
>  				};
> @@ -6371,19 +6371,19 @@ map0 {
>  
>  			trips {
>  				gpu6_alert0: trip-point0 {
> -					temperature = <85000>;
> +					temperature = <95000>;
>  					hysteresis = <1000>;
>  					type = "passive";
>  				};
>  
>  				trip-point1 {
> -					temperature = <90000>;
> +					temperature = <115000>;
>  					hysteresis = <1000>;
>  					type = "hot";
>  				};
>  
>  				trip-point2 {
> -					temperature = <110000>;
> +					temperature = <125000>;
>  					hysteresis = <1000>;
>  					type = "critical";
>  				};
> @@ -6404,19 +6404,19 @@ map0 {
>  
>  			trips {
>  				gpu7_alert0: trip-point0 {
> -					temperature = <85000>;
> +					temperature = <95000>;
>  					hysteresis = <1000>;
>  					type = "passive";
>  				};
>  
>  				trip-point1 {
> -					temperature = <90000>;
> +					temperature = <115000>;
>  					hysteresis = <1000>;
>  					type = "hot";
>  				};
>  
>  				trip-point2 {
> -					temperature = <110000>;
> +					temperature = <125000>;
>  					hysteresis = <1000>;
>  					type = "critical";
>  				};
>
Neil Armstrong Jan. 13, 2025, 10:45 a.m. UTC | #2
Hi,

On 13/01/2025 11:28, Akhil P Oommen wrote:
> On 1/10/2025 4:06 PM, Neil Armstrong wrote:
>> On the SM8650, the dynamic clock and voltage scaling (DCVS) for the GPU
>> is done in an hardware controlled loop by the GPU Management Unit (GMU).
>>
>> Since the GMU does a better job at maintaining the GPUs temperature in an
>> acceptable range by taking in account more parameters like the die
>> characteristics or other internal sensors, it makes no sense to try
>> and reproduce a similar set of constraints with the Linux devfreq thermal
>> core.
> 
> Just FYI, this description is incorrect. SM8650's GMU doesn't do any
> sort of thermal management.

Ok, thx for confirming this, in our tests the temperature steadily stayed
at a max trip point when setting them higher. But perhaps it's a side effect
of other mitigations.

Are the new trip points still ok ? they are derived from the downstream DT.

Thanks,
Neil

> 
> -Akhil.
> 
>>
>> Instead, set higher temperatures in the GPU trip points corresponding to
>> the temperatures provided by Qualcomm in the dowstream source, which will
>> trigger the devfreq thermal core if the GMU cannot handle the temperature
>> surge, and try our best to avoid reaching the critical temperature trip
>> point which should trigger an inevitable thermal shutdown.
>>
>> Fixes: 497624ed5506 ("arm64: dts: qcom: sm8650: Throttle the GPU when overheating")
>> Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
>> ---
>>   arch/arm64/boot/dts/qcom/sm8650.dtsi | 48 ++++++++++++++++++------------------
>>   1 file changed, 24 insertions(+), 24 deletions(-)
>>
>> diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi
>> index 95509ce2713d4fcc3dbe0c5cd5827312d5681af4..e9fcf05cb084b7979ecf0f4712fed332e9f4b07a 100644
>> --- a/arch/arm64/boot/dts/qcom/sm8650.dtsi
>> +++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi
>> @@ -6173,19 +6173,19 @@ map0 {
>>   
>>   			trips {
>>   				gpu0_alert0: trip-point0 {
>> -					temperature = <85000>;
>> +					temperature = <95000>;
>>   					hysteresis = <1000>;
>>   					type = "passive";
>>   				};
>>   
>>   				trip-point1 {
>> -					temperature = <90000>;
>> +					temperature = <115000>;
>>   					hysteresis = <1000>;
>>   					type = "hot";
>>   				};
>>   
>>   				trip-point2 {
>> -					temperature = <110000>;
>> +					temperature = <125000>;
>>   					hysteresis = <1000>;
>>   					type = "critical";
>>   				};
>> @@ -6206,19 +6206,19 @@ map0 {
>>   
>>   			trips {
>>   				gpu1_alert0: trip-point0 {
>> -					temperature = <85000>;
>> +					temperature = <95000>;
>>   					hysteresis = <1000>;
>>   					type = "passive";
>>   				};
>>   
>>   				trip-point1 {
>> -					temperature = <90000>;
>> +					temperature = <115000>;
>>   					hysteresis = <1000>;
>>   					type = "hot";
>>   				};
>>   
>>   				trip-point2 {
>> -					temperature = <110000>;
>> +					temperature = <125000>;
>>   					hysteresis = <1000>;
>>   					type = "critical";
>>   				};
>> @@ -6239,19 +6239,19 @@ map0 {
>>   
>>   			trips {
>>   				gpu2_alert0: trip-point0 {
>> -					temperature = <85000>;
>> +					temperature = <95000>;
>>   					hysteresis = <1000>;
>>   					type = "passive";
>>   				};
>>   
>>   				trip-point1 {
>> -					temperature = <90000>;
>> +					temperature = <115000>;
>>   					hysteresis = <1000>;
>>   					type = "hot";
>>   				};
>>   
>>   				trip-point2 {
>> -					temperature = <110000>;
>> +					temperature = <125000>;
>>   					hysteresis = <1000>;
>>   					type = "critical";
>>   				};
>> @@ -6272,19 +6272,19 @@ map0 {
>>   
>>   			trips {
>>   				gpu3_alert0: trip-point0 {
>> -					temperature = <85000>;
>> +					temperature = <95000>;
>>   					hysteresis = <1000>;
>>   					type = "passive";
>>   				};
>>   
>>   				trip-point1 {
>> -					temperature = <90000>;
>> +					temperature = <115000>;
>>   					hysteresis = <1000>;
>>   					type = "hot";
>>   				};
>>   
>>   				trip-point2 {
>> -					temperature = <110000>;
>> +					temperature = <125000>;
>>   					hysteresis = <1000>;
>>   					type = "critical";
>>   				};
>> @@ -6305,19 +6305,19 @@ map0 {
>>   
>>   			trips {
>>   				gpu4_alert0: trip-point0 {
>> -					temperature = <85000>;
>> +					temperature = <95000>;
>>   					hysteresis = <1000>;
>>   					type = "passive";
>>   				};
>>   
>>   				trip-point1 {
>> -					temperature = <90000>;
>> +					temperature = <115000>;
>>   					hysteresis = <1000>;
>>   					type = "hot";
>>   				};
>>   
>>   				trip-point2 {
>> -					temperature = <110000>;
>> +					temperature = <125000>;
>>   					hysteresis = <1000>;
>>   					type = "critical";
>>   				};
>> @@ -6338,19 +6338,19 @@ map0 {
>>   
>>   			trips {
>>   				gpu5_alert0: trip-point0 {
>> -					temperature = <85000>;
>> +					temperature = <95000>;
>>   					hysteresis = <1000>;
>>   					type = "passive";
>>   				};
>>   
>>   				trip-point1 {
>> -					temperature = <90000>;
>> +					temperature = <115000>;
>>   					hysteresis = <1000>;
>>   					type = "hot";
>>   				};
>>   
>>   				trip-point2 {
>> -					temperature = <110000>;
>> +					temperature = <125000>;
>>   					hysteresis = <1000>;
>>   					type = "critical";
>>   				};
>> @@ -6371,19 +6371,19 @@ map0 {
>>   
>>   			trips {
>>   				gpu6_alert0: trip-point0 {
>> -					temperature = <85000>;
>> +					temperature = <95000>;
>>   					hysteresis = <1000>;
>>   					type = "passive";
>>   				};
>>   
>>   				trip-point1 {
>> -					temperature = <90000>;
>> +					temperature = <115000>;
>>   					hysteresis = <1000>;
>>   					type = "hot";
>>   				};
>>   
>>   				trip-point2 {
>> -					temperature = <110000>;
>> +					temperature = <125000>;
>>   					hysteresis = <1000>;
>>   					type = "critical";
>>   				};
>> @@ -6404,19 +6404,19 @@ map0 {
>>   
>>   			trips {
>>   				gpu7_alert0: trip-point0 {
>> -					temperature = <85000>;
>> +					temperature = <95000>;
>>   					hysteresis = <1000>;
>>   					type = "passive";
>>   				};
>>   
>>   				trip-point1 {
>> -					temperature = <90000>;
>> +					temperature = <115000>;
>>   					hysteresis = <1000>;
>>   					type = "hot";
>>   				};
>>   
>>   				trip-point2 {
>> -					temperature = <110000>;
>> +					temperature = <125000>;
>>   					hysteresis = <1000>;
>>   					type = "critical";
>>   				};
>>
>
diff mbox series

Patch

diff --git a/arch/arm64/boot/dts/qcom/sm8650.dtsi b/arch/arm64/boot/dts/qcom/sm8650.dtsi
index 95509ce2713d4fcc3dbe0c5cd5827312d5681af4..e9fcf05cb084b7979ecf0f4712fed332e9f4b07a 100644
--- a/arch/arm64/boot/dts/qcom/sm8650.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8650.dtsi
@@ -6173,19 +6173,19 @@  map0 {
 
 			trips {
 				gpu0_alert0: trip-point0 {
-					temperature = <85000>;
+					temperature = <95000>;
 					hysteresis = <1000>;
 					type = "passive";
 				};
 
 				trip-point1 {
-					temperature = <90000>;
+					temperature = <115000>;
 					hysteresis = <1000>;
 					type = "hot";
 				};
 
 				trip-point2 {
-					temperature = <110000>;
+					temperature = <125000>;
 					hysteresis = <1000>;
 					type = "critical";
 				};
@@ -6206,19 +6206,19 @@  map0 {
 
 			trips {
 				gpu1_alert0: trip-point0 {
-					temperature = <85000>;
+					temperature = <95000>;
 					hysteresis = <1000>;
 					type = "passive";
 				};
 
 				trip-point1 {
-					temperature = <90000>;
+					temperature = <115000>;
 					hysteresis = <1000>;
 					type = "hot";
 				};
 
 				trip-point2 {
-					temperature = <110000>;
+					temperature = <125000>;
 					hysteresis = <1000>;
 					type = "critical";
 				};
@@ -6239,19 +6239,19 @@  map0 {
 
 			trips {
 				gpu2_alert0: trip-point0 {
-					temperature = <85000>;
+					temperature = <95000>;
 					hysteresis = <1000>;
 					type = "passive";
 				};
 
 				trip-point1 {
-					temperature = <90000>;
+					temperature = <115000>;
 					hysteresis = <1000>;
 					type = "hot";
 				};
 
 				trip-point2 {
-					temperature = <110000>;
+					temperature = <125000>;
 					hysteresis = <1000>;
 					type = "critical";
 				};
@@ -6272,19 +6272,19 @@  map0 {
 
 			trips {
 				gpu3_alert0: trip-point0 {
-					temperature = <85000>;
+					temperature = <95000>;
 					hysteresis = <1000>;
 					type = "passive";
 				};
 
 				trip-point1 {
-					temperature = <90000>;
+					temperature = <115000>;
 					hysteresis = <1000>;
 					type = "hot";
 				};
 
 				trip-point2 {
-					temperature = <110000>;
+					temperature = <125000>;
 					hysteresis = <1000>;
 					type = "critical";
 				};
@@ -6305,19 +6305,19 @@  map0 {
 
 			trips {
 				gpu4_alert0: trip-point0 {
-					temperature = <85000>;
+					temperature = <95000>;
 					hysteresis = <1000>;
 					type = "passive";
 				};
 
 				trip-point1 {
-					temperature = <90000>;
+					temperature = <115000>;
 					hysteresis = <1000>;
 					type = "hot";
 				};
 
 				trip-point2 {
-					temperature = <110000>;
+					temperature = <125000>;
 					hysteresis = <1000>;
 					type = "critical";
 				};
@@ -6338,19 +6338,19 @@  map0 {
 
 			trips {
 				gpu5_alert0: trip-point0 {
-					temperature = <85000>;
+					temperature = <95000>;
 					hysteresis = <1000>;
 					type = "passive";
 				};
 
 				trip-point1 {
-					temperature = <90000>;
+					temperature = <115000>;
 					hysteresis = <1000>;
 					type = "hot";
 				};
 
 				trip-point2 {
-					temperature = <110000>;
+					temperature = <125000>;
 					hysteresis = <1000>;
 					type = "critical";
 				};
@@ -6371,19 +6371,19 @@  map0 {
 
 			trips {
 				gpu6_alert0: trip-point0 {
-					temperature = <85000>;
+					temperature = <95000>;
 					hysteresis = <1000>;
 					type = "passive";
 				};
 
 				trip-point1 {
-					temperature = <90000>;
+					temperature = <115000>;
 					hysteresis = <1000>;
 					type = "hot";
 				};
 
 				trip-point2 {
-					temperature = <110000>;
+					temperature = <125000>;
 					hysteresis = <1000>;
 					type = "critical";
 				};
@@ -6404,19 +6404,19 @@  map0 {
 
 			trips {
 				gpu7_alert0: trip-point0 {
-					temperature = <85000>;
+					temperature = <95000>;
 					hysteresis = <1000>;
 					type = "passive";
 				};
 
 				trip-point1 {
-					temperature = <90000>;
+					temperature = <115000>;
 					hysteresis = <1000>;
 					type = "hot";
 				};
 
 				trip-point2 {
-					temperature = <110000>;
+					temperature = <125000>;
 					hysteresis = <1000>;
 					type = "critical";
 				};