diff mbox series

[v2,1/2] thermal: intel: int340x: Add throttling control interface to PTC

Message ID 20250613214923.2910397-1-srinivas.pandruvada@linux.intel.com
State New
Headers show
Series [v2,1/2] thermal: intel: int340x: Add throttling control interface to PTC | expand

Commit Message

Srinivas Pandruvada June 13, 2025, 9:49 p.m. UTC
Firmware-based thermal temperature control loops may aggressively
throttle performance to prevent temperature overshoots relative to the
defined target temperature. This can negatively impact performance. User
space may prefer to prioritize performance, even if it results in
temperature overshoots with in acceptable range.

For example, user space might tolerate temperature overshoots when the
device is placed on a desk, as opposed to when it's on a lap. To
accommodate such scenarios, an optional attribute is provided to specify
a tolerance level for temperature overshoots while maintaining acceptable
performance.

Attribute:
thermal_tolerance: This attribute ranges from 0 to 7, where 0 represents
the most aggressive control to avoid any temperature overshoots, and 7
represents a more graceful approach, favoring performance even at the
expense of temperature overshoots.
Note: This level may not scale linearly. For example, a value of 3 does not
necessarily imply a 50% improvement in performance compared to a value of
0.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
---
v2:
- Changed commit description
- Change "gain" to "thermal_tolerance" analogous to latency_tolerance.
- Dropped "min_performance" attribute for next patch set

 Documentation/driver-api/thermal/intel_dptf.rst          | 9 +++++++++
 .../intel/int340x_thermal/platform_temperature_control.c | 8 +++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

Comments

Zhang, Rui June 16, 2025, 12:46 a.m. UTC | #1
On Fri, 2025-06-13 at 14:49 -0700, Srinivas Pandruvada wrote:
> Firmware-based thermal temperature control loops may aggressively
> throttle performance to prevent temperature overshoots relative to the
> defined target temperature. This can negatively impact performance.
> User
> space may prefer to prioritize performance, even if it results in
> temperature overshoots with in acceptable range.
> 
> For example, user space might tolerate temperature overshoots when the
> device is placed on a desk, as opposed to when it's on a lap. To
> accommodate such scenarios, an optional attribute is provided to
> specify
> a tolerance level for temperature overshoots while maintaining
> acceptable
> performance.
> 
> Attribute:
> thermal_tolerance:

yeah, this is much better to me.

>  This attribute ranges from 0 to 7, where 0 represents
> the most aggressive control to avoid any temperature overshoots, and 7
> represents a more graceful approach, favoring performance even at the
> expense of temperature overshoots.
> Note: This level may not scale linearly. For example, a value of 3 does
> not
> necessarily imply a 50% improvement in performance compared to a value
> of
> 0.
> 
> Signed-off-by: Srinivas Pandruvada
> <srinivas.pandruvada@linux.intel.com>

Reviewed-by: Zhang Rui <rui.zhang@intel.com>

-rui
> ---
> v2:
> - Changed commit description
> - Change "gain" to "thermal_tolerance" analogous to latency_tolerance.
> - Dropped "min_performance" attribute for next patch set
> 
>  Documentation/driver-api/thermal/intel_dptf.rst          | 9 +++++++++
>  .../intel/int340x_thermal/platform_temperature_control.c | 8 +++++++-
>  2 files changed, 16 insertions(+), 1 deletion(-)
> 
> diff --git a/Documentation/driver-api/thermal/intel_dptf.rst
> b/Documentation/driver-api/thermal/intel_dptf.rst
> index ec5769accae0..c51ac793dc06 100644
> --- a/Documentation/driver-api/thermal/intel_dptf.rst
> +++ b/Documentation/driver-api/thermal/intel_dptf.rst
> @@ -206,6 +206,15 @@ All these controls needs admin privilege to
> update.
>  	Update a new temperature target in milli degree celsius for
> hardware to
>  	use for the temperature control.
>  
> +``thermal_tolerance`` (RW)
> +	This attribute ranges from 0 to 7, where 0 represents
> +	the most aggressive control to avoid any temperature
> overshoots, and
> +	7 represents a more graceful approach, favoring performance
> even at
> +	the expense of temperature overshoots.
> +	Note: This level may not scale linearly. For example, a value
> of 3 does
> +	not necessarily imply a 50% improvement in performance
> compared to a
> +	value of 0.
> +
>  Given that this is platform temperature control, it is expected that a
>  single user-level manager owns and manages the controls. If multiple
>  user-level software applications attempt to write different targets,
> it
> diff --git
> a/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
> b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
> index 2d6504514893..7850e91a6e2c 100644
> ---
> a/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
> +++
> b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
> @@ -49,7 +49,7 @@ struct mmio_reg {
>  };
>  
>  #define MAX_ATTR_GROUP_NAME_LEN	32
> -#define PTC_MAX_ATTRS		3
> +#define PTC_MAX_ATTRS		4
>  
>  struct ptc_data {
>  	u32 offset;
> @@ -57,6 +57,7 @@ struct ptc_data {
>  	struct attribute *ptc_attrs[PTC_MAX_ATTRS];
>  	struct device_attribute temperature_target_attr;
>  	struct device_attribute enable_attr;
> +	struct device_attribute thermal_tolerance_attr;
>  	char group_name[MAX_ATTR_GROUP_NAME_LEN];
>  };
>  
> @@ -78,6 +79,7 @@ static u32 ptc_offsets[PTC_MAX_INSTANCES] = {0x5B20,
> 0x5B28, 0x5B30};
>  static const char * const ptc_strings[] = {
>  	"temperature_target",
>  	"enable",
> +	"thermal_tolerance",
>  	NULL
>  };
>  
> @@ -177,6 +179,8 @@ PTC_SHOW(temperature_target);
>  PTC_STORE(temperature_target);
>  PTC_SHOW(enable);
>  PTC_STORE(enable);
> +PTC_SHOW(thermal_tolerance);
> +PTC_STORE(thermal_tolerance);
>  
>  #define ptc_init_attribute(_name)\
>  	do {\
> @@ -193,9 +197,11 @@ static int ptc_create_groups(struct pci_dev *pdev,
> int instance, struct ptc_data
>  
>  	ptc_init_attribute(temperature_target);
>  	ptc_init_attribute(enable);
> +	ptc_init_attribute(thermal_tolerance);
>  
>  	data->ptc_attrs[index++] = &data-
> >temperature_target_attr.attr;
>  	data->ptc_attrs[index++] = &data->enable_attr.attr;
> +	data->ptc_attrs[index++] = &data->thermal_tolerance_attr.attr;
>  	data->ptc_attrs[index] = NULL;
>  
>  	snprintf(data->group_name, MAX_ATTR_GROUP_NAME_LEN,
Rafael J. Wysocki June 16, 2025, 12:05 p.m. UTC | #2
On Mon, Jun 16, 2025 at 2:47 AM Zhang, Rui <rui.zhang@intel.com> wrote:
>
> On Fri, 2025-06-13 at 14:49 -0700, Srinivas Pandruvada wrote:
> > Firmware-based thermal temperature control loops may aggressively
> > throttle performance to prevent temperature overshoots relative to the
> > defined target temperature. This can negatively impact performance.
> > User
> > space may prefer to prioritize performance, even if it results in
> > temperature overshoots with in acceptable range.
> >
> > For example, user space might tolerate temperature overshoots when the
> > device is placed on a desk, as opposed to when it's on a lap. To
> > accommodate such scenarios, an optional attribute is provided to
> > specify
> > a tolerance level for temperature overshoots while maintaining
> > acceptable
> > performance.
> >
> > Attribute:
> > thermal_tolerance:
>
> yeah, this is much better to me.
>
> >  This attribute ranges from 0 to 7, where 0 represents
> > the most aggressive control to avoid any temperature overshoots, and 7
> > represents a more graceful approach, favoring performance even at the
> > expense of temperature overshoots.
> > Note: This level may not scale linearly. For example, a value of 3 does
> > not
> > necessarily imply a 50% improvement in performance compared to a value
> > of
> > 0.
> >
> > Signed-off-by: Srinivas Pandruvada
> > <srinivas.pandruvada@linux.intel.com>
>
> Reviewed-by: Zhang Rui <rui.zhang@intel.com>

Applied along with the [2/2] as 6.17 material, thanks!
diff mbox series

Patch

diff --git a/Documentation/driver-api/thermal/intel_dptf.rst b/Documentation/driver-api/thermal/intel_dptf.rst
index ec5769accae0..c51ac793dc06 100644
--- a/Documentation/driver-api/thermal/intel_dptf.rst
+++ b/Documentation/driver-api/thermal/intel_dptf.rst
@@ -206,6 +206,15 @@  All these controls needs admin privilege to update.
 	Update a new temperature target in milli degree celsius for hardware to
 	use for the temperature control.
 
+``thermal_tolerance`` (RW)
+	This attribute ranges from 0 to 7, where 0 represents
+	the most aggressive control to avoid any temperature overshoots, and
+	7 represents a more graceful approach, favoring performance even at
+	the expense of temperature overshoots.
+	Note: This level may not scale linearly. For example, a value of 3 does
+	not necessarily imply a 50% improvement in performance compared to a
+	value of 0.
+
 Given that this is platform temperature control, it is expected that a
 single user-level manager owns and manages the controls. If multiple
 user-level software applications attempt to write different targets, it
diff --git a/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
index 2d6504514893..7850e91a6e2c 100644
--- a/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
+++ b/drivers/thermal/intel/int340x_thermal/platform_temperature_control.c
@@ -49,7 +49,7 @@  struct mmio_reg {
 };
 
 #define MAX_ATTR_GROUP_NAME_LEN	32
-#define PTC_MAX_ATTRS		3
+#define PTC_MAX_ATTRS		4
 
 struct ptc_data {
 	u32 offset;
@@ -57,6 +57,7 @@  struct ptc_data {
 	struct attribute *ptc_attrs[PTC_MAX_ATTRS];
 	struct device_attribute temperature_target_attr;
 	struct device_attribute enable_attr;
+	struct device_attribute thermal_tolerance_attr;
 	char group_name[MAX_ATTR_GROUP_NAME_LEN];
 };
 
@@ -78,6 +79,7 @@  static u32 ptc_offsets[PTC_MAX_INSTANCES] = {0x5B20, 0x5B28, 0x5B30};
 static const char * const ptc_strings[] = {
 	"temperature_target",
 	"enable",
+	"thermal_tolerance",
 	NULL
 };
 
@@ -177,6 +179,8 @@  PTC_SHOW(temperature_target);
 PTC_STORE(temperature_target);
 PTC_SHOW(enable);
 PTC_STORE(enable);
+PTC_SHOW(thermal_tolerance);
+PTC_STORE(thermal_tolerance);
 
 #define ptc_init_attribute(_name)\
 	do {\
@@ -193,9 +197,11 @@  static int ptc_create_groups(struct pci_dev *pdev, int instance, struct ptc_data
 
 	ptc_init_attribute(temperature_target);
 	ptc_init_attribute(enable);
+	ptc_init_attribute(thermal_tolerance);
 
 	data->ptc_attrs[index++] = &data->temperature_target_attr.attr;
 	data->ptc_attrs[index++] = &data->enable_attr.attr;
+	data->ptc_attrs[index++] = &data->thermal_tolerance_attr.attr;
 	data->ptc_attrs[index] = NULL;
 
 	snprintf(data->group_name, MAX_ATTR_GROUP_NAME_LEN,