[v6,4/7] sched/fair: Enable periodic update of average thermal pressure

Message ID 1576123908-12105-5-git-send-email-thara.gopinath@linaro.org
State New
Headers show
Series
  • Introduce Thermal Pressure
Related show

Commit Message

Thara Gopinath Dec. 12, 2019, 4:11 a.m.
Introduce support in CFS periodic tick and other bookkeeping apis
to trigger the process of computing average thermal pressure for a
cpu. Also consider avg_thermal.load_avg in others_have_blocked
which allows for decay of pelt signals.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>

---
 kernel/sched/fair.c | 8 ++++++++
 1 file changed, 8 insertions(+)

v4->v5:
	- Updated both versions of update_blocked_averages to trigger the
	  process of computing average thermal pressure.
	- Updated others_have_blocked to considerd avg_thermal.load_avg.

-- 
2.1.4

Comments

Peter Zijlstra Dec. 16, 2019, 2:39 p.m. | #1
On Wed, Dec 11, 2019 at 11:11:45PM -0500, Thara Gopinath wrote:
> Introduce support in CFS periodic tick and other bookkeeping apis

> to trigger the process of computing average thermal pressure for a

> cpu. Also consider avg_thermal.load_avg in others_have_blocked

> which allows for decay of pelt signals.

> 

> Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>

> ---

>  kernel/sched/fair.c | 8 ++++++++

>  1 file changed, 8 insertions(+)

> 

> v4->v5:

> 	- Updated both versions of update_blocked_averages to trigger the

> 	  process of computing average thermal pressure.

> 	- Updated others_have_blocked to considerd avg_thermal.load_avg.

> 

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> index 08a233e..e12a375 100644

> --- a/kernel/sched/fair.c

> +++ b/kernel/sched/fair.c

> @@ -7462,6 +7462,9 @@ static inline bool others_have_blocked(struct rq *rq)

>  	if (READ_ONCE(rq->avg_dl.util_avg))

>  		return true;

>  

> +	if (READ_ONCE(rq->avg_thermal.load_avg))

> +		return true;

> +

>  #ifdef CONFIG_HAVE_SCHED_AVG_IRQ

>  	if (READ_ONCE(rq->avg_irq.util_avg))

>  		return true;

> @@ -7487,6 +7490,7 @@ static bool __update_blocked_others(struct rq *rq, bool *done)

>  {

>  	const struct sched_class *curr_class;

>  	u64 now = rq_clock_pelt(rq);

> +	unsigned long thermal_pressure = arch_scale_thermal_capacity(cpu_of(rq));

>  	bool decayed;

>  

>  	/*

> @@ -7497,6 +7501,8 @@ static bool __update_blocked_others(struct rq *rq, bool *done)

>  

>  	decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |

>  		  update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |

> +		  update_thermal_load_avg(rq_clock_task(rq), rq,

> +					  thermal_pressure) 			|

>  		  update_irq_load_avg(rq, 0);

>  

>  	if (others_have_blocked(rq))

> @@ -10263,6 +10269,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)

>  {

>  	struct cfs_rq *cfs_rq;

>  	struct sched_entity *se = &curr->se;

> +	unsigned long thermal_pressure = arch_scale_thermal_capacity(cpu_of(rq));

>  

>  	for_each_sched_entity(se) {

>  		cfs_rq = cfs_rq_of(se);

> @@ -10274,6 +10281,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)

>  

>  	update_misfit_status(curr, rq);

>  	update_overutilized_status(task_rq(curr));

> +	update_thermal_load_avg(rq_clock_task(rq), rq, thermal_pressure);

>  }


My objection here is that when the arch does not have support for it,
there is still code generated and runtime overhead associated with it.
Quentin Perret Dec. 16, 2019, 5:59 p.m. | #2
On Monday 16 Dec 2019 at 15:39:32 (+0100), Peter Zijlstra wrote:
> > @@ -10274,6 +10281,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)

> >  

> >  	update_misfit_status(curr, rq);

> >  	update_overutilized_status(task_rq(curr));

> > +	update_thermal_load_avg(rq_clock_task(rq), rq, thermal_pressure);

> >  }

> 

> My objection here is that when the arch does not have support for it,

> there is still code generated and runtime overhead associated with it.


I guess this function could be stubbed for CONFIG_CPU_THERMAL=n ?
That is, reflecting the thermal pressure in the scheduler only makes
sense when the thermal infrastructure is enabled to begin with (which is
indeed not the case for most archs).

Thanks,
Quentin
Dietmar Eggemann Dec. 17, 2019, 12:57 p.m. | #3
On 16/12/2019 18:59, Quentin Perret wrote:
> On Monday 16 Dec 2019 at 15:39:32 (+0100), Peter Zijlstra wrote:

>>> @@ -10274,6 +10281,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)

>>>  

>>>  	update_misfit_status(curr, rq);

>>>  	update_overutilized_status(task_rq(curr));

>>> +	update_thermal_load_avg(rq_clock_task(rq), rq, thermal_pressure);

>>>  }

>>

>> My objection here is that when the arch does not have support for it,

>> there is still code generated and runtime overhead associated with it.

> 

> I guess this function could be stubbed for CONFIG_CPU_THERMAL=n ?

> That is, reflecting the thermal pressure in the scheduler only makes

> sense when the thermal infrastructure is enabled to begin with (which is

> indeed not the case for most archs).


Makes sense to me. If we can agree that 'CPU cooling' is the only actor
for thermal (CPU capacity) capping.

thermal_sys-$(CONFIG_CPU_THERMAL)       += cpu_cooling.o
Thara Gopinath Dec. 27, 2019, 3:22 p.m. | #4
On 12/17/2019 07:57 AM, Dietmar Eggemann wrote:
> On 16/12/2019 18:59, Quentin Perret wrote:

>> On Monday 16 Dec 2019 at 15:39:32 (+0100), Peter Zijlstra wrote:

>>>> @@ -10274,6 +10281,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)

>>>>  

>>>>  	update_misfit_status(curr, rq);

>>>>  	update_overutilized_status(task_rq(curr));

>>>> +	update_thermal_load_avg(rq_clock_task(rq), rq, thermal_pressure);

>>>>  }

>>>

>>> My objection here is that when the arch does not have support for it,

>>> there is still code generated and runtime overhead associated with it.

>>

>> I guess this function could be stubbed for CONFIG_CPU_THERMAL=n ?

>> That is, reflecting the thermal pressure in the scheduler only makes

>> sense when the thermal infrastructure is enabled to begin with (which is

>> indeed not the case for most archs).

> 

> Makes sense to me. If we can agree that 'CPU cooling' is the only actor

> for thermal (CPU capacity) capping.

> 

> thermal_sys-$(CONFIG_CPU_THERMAL)       += cpu_cooling.o

> 


Hi All,
Thanks for all the reviews!

The other option will be to have a separate
CONFIG_HAVE_SCHED_THERMAL_PRESSURE. This will ensure that we are not
tied to cpu cooling thermal infrastructure. What say?
 There is a CONFIG_HAVE_SCHED_AVG_IRQ for irq load average in pelt.c.


-- 
Warm Regards
Thara

Patch

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 08a233e..e12a375 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7462,6 +7462,9 @@  static inline bool others_have_blocked(struct rq *rq)
 	if (READ_ONCE(rq->avg_dl.util_avg))
 		return true;
 
+	if (READ_ONCE(rq->avg_thermal.load_avg))
+		return true;
+
 #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
 	if (READ_ONCE(rq->avg_irq.util_avg))
 		return true;
@@ -7487,6 +7490,7 @@  static bool __update_blocked_others(struct rq *rq, bool *done)
 {
 	const struct sched_class *curr_class;
 	u64 now = rq_clock_pelt(rq);
+	unsigned long thermal_pressure = arch_scale_thermal_capacity(cpu_of(rq));
 	bool decayed;
 
 	/*
@@ -7497,6 +7501,8 @@  static bool __update_blocked_others(struct rq *rq, bool *done)
 
 	decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
 		  update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
+		  update_thermal_load_avg(rq_clock_task(rq), rq,
+					  thermal_pressure) 			|
 		  update_irq_load_avg(rq, 0);
 
 	if (others_have_blocked(rq))
@@ -10263,6 +10269,7 @@  static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &curr->se;
+	unsigned long thermal_pressure = arch_scale_thermal_capacity(cpu_of(rq));
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
@@ -10274,6 +10281,7 @@  static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 
 	update_misfit_status(curr, rq);
 	update_overutilized_status(task_rq(curr));
+	update_thermal_load_avg(rq_clock_task(rq), rq, thermal_pressure);
 }
 
 /*