diff mbox series

[v5,1/6] sched/pelt.c: Add support to track thermal pressure

Message ID 1572979786-20361-2-git-send-email-thara.gopinath@linaro.org
State New
Headers show
Series Introduce Thermal Pressure | expand

Commit Message

Thara Gopinath Nov. 5, 2019, 6:49 p.m. UTC
Extrapolating on the exisiting framework to track rt/dl utilization using
pelt signals, add a similar mechanism to track thermal pressure. The
difference here from rt/dl utilization tracking is that, instead of
tracking time spent by a cpu running a rt/dl task through util_avg,
the average thermal pressure is tracked through load_avg. This is
because thermal pressure signal is weighted "delta" capacity
and is not binary(util_avg is binary). "delta capacity" here
means delta between the actual capacity of a cpu and the decreased
capacity a cpu due to a thermal event.
In order to track average thermal pressure, a new sched_avg variable
avg_thermal is introduced. Function update_thermal_load_avg can be called
to do the periodic bookeeping (accumulate, decay and average)
of the thermal pressure.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>

---
 kernel/sched/pelt.c  | 13 +++++++++++++
 kernel/sched/pelt.h  |  7 +++++++
 kernel/sched/sched.h |  1 +
 3 files changed, 21 insertions(+)

-- 
2.1.4

Comments

Vincent Guittot Nov. 6, 2019, 8:24 a.m. UTC | #1
On Tue, 5 Nov 2019 at 19:49, Thara Gopinath <thara.gopinath@linaro.org> wrote:
>

> Extrapolating on the exisiting framework to track rt/dl utilization using

> pelt signals, add a similar mechanism to track thermal pressure. The

> difference here from rt/dl utilization tracking is that, instead of

> tracking time spent by a cpu running a rt/dl task through util_avg,

> the average thermal pressure is tracked through load_avg. This is

> because thermal pressure signal is weighted "delta" capacity

> and is not binary(util_avg is binary). "delta capacity" here

> means delta between the actual capacity of a cpu and the decreased

> capacity a cpu due to a thermal event.

> In order to track average thermal pressure, a new sched_avg variable

> avg_thermal is introduced. Function update_thermal_load_avg can be called

> to do the periodic bookeeping (accumulate, decay and average)

> of the thermal pressure.

>

> Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>


Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>


> ---

>  kernel/sched/pelt.c  | 13 +++++++++++++

>  kernel/sched/pelt.h  |  7 +++++++

>  kernel/sched/sched.h |  1 +

>  3 files changed, 21 insertions(+)

>

> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c

> index a96db50..3821069 100644

> --- a/kernel/sched/pelt.c

> +++ b/kernel/sched/pelt.c

> @@ -353,6 +353,19 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)

>         return 0;

>  }

>

> +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)

> +{

> +       if (___update_load_sum(now, &rq->avg_thermal,

> +                              capacity,

> +                              capacity,

> +                              capacity)) {

> +               ___update_load_avg(&rq->avg_thermal, 1, 1);

> +               return 1;

> +       }

> +

> +       return 0;

> +}

> +

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>  /*

>   * irq:

> diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h

> index afff644..c74226d 100644

> --- a/kernel/sched/pelt.h

> +++ b/kernel/sched/pelt.h

> @@ -6,6 +6,7 @@ int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se

>  int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);

>  int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);

>  int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);

> +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);

>

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>  int update_irq_load_avg(struct rq *rq, u64 running);

> @@ -159,6 +160,12 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)

>  }

>

>  static inline int

> +update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)

> +{

> +       return 0;

> +}

> +

> +static inline int

>  update_irq_load_avg(struct rq *rq, u64 running)

>  {

>         return 0;

> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

> index 0db2c1b..d5d82c8 100644

> --- a/kernel/sched/sched.h

> +++ b/kernel/sched/sched.h

> @@ -944,6 +944,7 @@ struct rq {

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>         struct sched_avg        avg_irq;

>  #endif

> +       struct sched_avg        avg_thermal;

>         u64                     idle_stamp;

>         u64                     avg_idle;

>

> --

> 2.1.4

>
Dietmar Eggemann Nov. 6, 2019, 12:50 p.m. UTC | #2
On 05/11/2019 19:49, Thara Gopinath wrote:
> Extrapolating on the exisiting framework to track rt/dl utilization using


s/exisiting/existing

> pelt signals, add a similar mechanism to track thermal pressure. The

> difference here from rt/dl utilization tracking is that, instead of

> tracking time spent by a cpu running a rt/dl task through util_avg,

> the average thermal pressure is tracked through load_avg. This is

> because thermal pressure signal is weighted "delta" capacity

> and is not binary(util_avg is binary). "delta capacity" here

> means delta between the actual capacity of a cpu and the decreased

> capacity a cpu due to a thermal event.

> In order to track average thermal pressure, a new sched_avg variable

> avg_thermal is introduced. Function update_thermal_load_avg can be called

> to do the periodic bookeeping (accumulate, decay and average)


s/bookeeping/bookkeeping

> of the thermal pressure.

> 

> Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>

> ---

>  kernel/sched/pelt.c  | 13 +++++++++++++

>  kernel/sched/pelt.h  |  7 +++++++

>  kernel/sched/sched.h |  1 +

>  3 files changed, 21 insertions(+)

> 

> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c

> index a96db50..3821069 100644

> --- a/kernel/sched/pelt.c

> +++ b/kernel/sched/pelt.c

> @@ -353,6 +353,19 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)

>  	return 0;

>  }


Minor thing: There are function headers for rt_rq, dl_rq and irq. rt_rq
even explains that 'load_avg and runnable_load_avg are not supported and
meaningless.' Could you do something similar for thermal here? It's not
self-explanatory why we track load_avg, runnable_load_avg and util_avg
for thermal but only use load_avg.

> +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)

> +{

> +	if (___update_load_sum(now, &rq->avg_thermal,

> +			       capacity,

> +			       capacity,

> +			       capacity)) {

> +		___update_load_avg(&rq->avg_thermal, 1, 1);

> +		return 1;

> +	}

> +

> +	return 0;

> +}

> +


[...]
Thara Gopinath Nov. 6, 2019, 5 p.m. UTC | #3
Hi Dietmar,
Thanks for the review.
On 11/06/2019 07:50 AM, Dietmar Eggemann wrote:
> On 05/11/2019 19:49, Thara Gopinath wrote:

>> Extrapolating on the exisiting framework to track rt/dl utilization using

> 

> s/exisiting/existing

> 

>> pelt signals, add a similar mechanism to track thermal pressure. The

>> difference here from rt/dl utilization tracking is that, instead of

>> tracking time spent by a cpu running a rt/dl task through util_avg,

>> the average thermal pressure is tracked through load_avg. This is

>> because thermal pressure signal is weighted "delta" capacity

>> and is not binary(util_avg is binary). "delta capacity" here

>> means delta between the actual capacity of a cpu and the decreased

>> capacity a cpu due to a thermal event.

>> In order to track average thermal pressure, a new sched_avg variable

>> avg_thermal is introduced. Function update_thermal_load_avg can be called

>> to do the periodic bookeeping (accumulate, decay and average)

> 

> s/bookeeping/bookkeeping

> 

>> of the thermal pressure.

>>

>> Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>

>> ---

>>  kernel/sched/pelt.c  | 13 +++++++++++++

>>  kernel/sched/pelt.h  |  7 +++++++

>>  kernel/sched/sched.h |  1 +

>>  3 files changed, 21 insertions(+)

>>

>> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c

>> index a96db50..3821069 100644

>> --- a/kernel/sched/pelt.c

>> +++ b/kernel/sched/pelt.c

>> @@ -353,6 +353,19 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)

>>  	return 0;

>>  }

> 

> Minor thing: There are function headers for rt_rq, dl_rq and irq. rt_rq

> even explains that 'load_avg and runnable_load_avg are not supported and

> meaningless.' Could you do something similar for thermal here? It's not

> self-explanatory why we track load_avg, runnable_load_avg and util_avg

> for thermal but only use load_avg.


Will put a function header and update the nits above.
> 

>> +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)

>> +{

>> +	if (___update_load_sum(now, &rq->avg_thermal,

>> +			       capacity,

>> +			       capacity,

>> +			       capacity)) {

>> +		___update_load_avg(&rq->avg_thermal, 1, 1);

>> +		return 1;

>> +	}

>> +

>> +	return 0;

>> +}

>> +

> 

> [...]

> 



-- 
Warm Regards
Thara
Qais Yousef Nov. 7, 2019, 4:39 p.m. UTC | #4
Hi Thara

On 11/05/19 13:49, Thara Gopinath wrote:
> Extrapolating on the exisiting framework to track rt/dl utilization using

> pelt signals, add a similar mechanism to track thermal pressure. The

> difference here from rt/dl utilization tracking is that, instead of

> tracking time spent by a cpu running a rt/dl task through util_avg,

> the average thermal pressure is tracked through load_avg. This is

> because thermal pressure signal is weighted "delta" capacity

> and is not binary(util_avg is binary). "delta capacity" here

> means delta between the actual capacity of a cpu and the decreased

> capacity a cpu due to a thermal event.

> In order to track average thermal pressure, a new sched_avg variable

> avg_thermal is introduced. Function update_thermal_load_avg can be called

> to do the periodic bookeeping (accumulate, decay and average)

> of the thermal pressure.

> 

> Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>

> ---

>  kernel/sched/pelt.c  | 13 +++++++++++++

>  kernel/sched/pelt.h  |  7 +++++++

>  kernel/sched/sched.h |  1 +

>  3 files changed, 21 insertions(+)

> 

> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c

> index a96db50..3821069 100644

> --- a/kernel/sched/pelt.c

> +++ b/kernel/sched/pelt.c

> @@ -353,6 +353,19 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)

>  	return 0;

>  }

>  

> +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)

> +{

> +	if (___update_load_sum(now, &rq->avg_thermal,

> +			       capacity,

> +			       capacity,

> +			       capacity)) {

> +		___update_load_avg(&rq->avg_thermal, 1, 1);

> +		return 1;

> +	}

> +

> +	return 0;

> +}


Care to add a tracepoint to this new signal like we now have for the other
ones?

Thanks

--
Qais Yousef

> +

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>  /*

>   * irq:

> diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h

> index afff644..c74226d 100644

> --- a/kernel/sched/pelt.h

> +++ b/kernel/sched/pelt.h

> @@ -6,6 +6,7 @@ int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se

>  int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);

>  int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);

>  int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);

> +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);

>  

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>  int update_irq_load_avg(struct rq *rq, u64 running);

> @@ -159,6 +160,12 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)

>  }

>  

>  static inline int

> +update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)

> +{

> +	return 0;

> +}

> +

> +static inline int

>  update_irq_load_avg(struct rq *rq, u64 running)

>  {

>  	return 0;

> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

> index 0db2c1b..d5d82c8 100644

> --- a/kernel/sched/sched.h

> +++ b/kernel/sched/sched.h

> @@ -944,6 +944,7 @@ struct rq {

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>  	struct sched_avg	avg_irq;

>  #endif

> +	struct sched_avg	avg_thermal;

>  	u64			idle_stamp;

>  	u64			avg_idle;

>  

> -- 

> 2.1.4

>
Amit Kucheria Nov. 19, 2019, 10:50 a.m. UTC | #5
On Wed, Nov 6, 2019 at 12:20 AM Thara Gopinath
<thara.gopinath@linaro.org> wrote:
>

> Extrapolating on the exisiting framework to track rt/dl utilization using

> pelt signals, add a similar mechanism to track thermal pressure. The

> difference here from rt/dl utilization tracking is that, instead of

> tracking time spent by a cpu running a rt/dl task through util_avg,

> the average thermal pressure is tracked through load_avg. This is

> because thermal pressure signal is weighted "delta" capacity

> and is not binary(util_avg is binary). "delta capacity" here

> means delta between the actual capacity of a cpu and the decreased

> capacity a cpu due to a thermal event.


Use a blank line here. And reflow the paragraph text.

> In order to track average thermal pressure, a new sched_avg variable

> avg_thermal is introduced. Function update_thermal_load_avg can be called

> to do the periodic bookeeping (accumulate, decay and average)

> of the thermal pressure.

>

> Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>

> ---

>  kernel/sched/pelt.c  | 13 +++++++++++++

>  kernel/sched/pelt.h  |  7 +++++++

>  kernel/sched/sched.h |  1 +

>  3 files changed, 21 insertions(+)

>

> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c

> index a96db50..3821069 100644

> --- a/kernel/sched/pelt.c

> +++ b/kernel/sched/pelt.c

> @@ -353,6 +353,19 @@ int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)

>         return 0;

>  }

>

> +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)

> +{

> +       if (___update_load_sum(now, &rq->avg_thermal,

> +                              capacity,

> +                              capacity,

> +                              capacity)) {

> +               ___update_load_avg(&rq->avg_thermal, 1, 1);

> +               return 1;

> +       }

> +

> +       return 0;

> +}

> +

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>  /*

>   * irq:

> diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h

> index afff644..c74226d 100644

> --- a/kernel/sched/pelt.h

> +++ b/kernel/sched/pelt.h

> @@ -6,6 +6,7 @@ int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se

>  int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);

>  int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);

>  int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);

> +int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);

>

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>  int update_irq_load_avg(struct rq *rq, u64 running);

> @@ -159,6 +160,12 @@ update_dl_rq_load_avg(u64 now, struct rq *rq, int running)

>  }

>

>  static inline int

> +update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)

> +{

> +       return 0;

> +}

> +

> +static inline int

>  update_irq_load_avg(struct rq *rq, u64 running)

>  {

>         return 0;

> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

> index 0db2c1b..d5d82c8 100644

> --- a/kernel/sched/sched.h

> +++ b/kernel/sched/sched.h

> @@ -944,6 +944,7 @@ struct rq {

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>         struct sched_avg        avg_irq;

>  #endif

> +       struct sched_avg        avg_thermal;


Have your considered putting this inside a #ifdef
CONFIG_HAVE_SCHED_THERMAL_PRESSURE?


>         u64                     idle_stamp;

>         u64                     avg_idle;

>

> --

> 2.1.4

>
diff mbox series

Patch

diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index a96db50..3821069 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -353,6 +353,19 @@  int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
 	return 0;
 }
 
+int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
+{
+	if (___update_load_sum(now, &rq->avg_thermal,
+			       capacity,
+			       capacity,
+			       capacity)) {
+		___update_load_avg(&rq->avg_thermal, 1, 1);
+		return 1;
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
 /*
  * irq:
diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
index afff644..c74226d 100644
--- a/kernel/sched/pelt.h
+++ b/kernel/sched/pelt.h
@@ -6,6 +6,7 @@  int __update_load_avg_se(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se
 int __update_load_avg_cfs_rq(u64 now, struct cfs_rq *cfs_rq);
 int update_rt_rq_load_avg(u64 now, struct rq *rq, int running);
 int update_dl_rq_load_avg(u64 now, struct rq *rq, int running);
+int update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity);
 
 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
 int update_irq_load_avg(struct rq *rq, u64 running);
@@ -159,6 +160,12 @@  update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
 }
 
 static inline int
+update_thermal_load_avg(u64 now, struct rq *rq, u64 capacity)
+{
+	return 0;
+}
+
+static inline int
 update_irq_load_avg(struct rq *rq, u64 running)
 {
 	return 0;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0db2c1b..d5d82c8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -944,6 +944,7 @@  struct rq {
 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
 	struct sched_avg	avg_irq;
 #endif
+	struct sched_avg	avg_thermal;
 	u64			idle_stamp;
 	u64			avg_idle;