diff mbox series

[v3,7/7] sched: thermal: Enable tuning of decay period

Message ID 1571014705-19646-8-git-send-email-thara.gopinath@linaro.org
State New
Headers show
Series Introduce Thermal Pressure | expand

Commit Message

Thara Gopinath Oct. 14, 2019, 12:58 a.m. UTC
Thermal pressure follows pelt signas which means the
decay period for thermal pressure is the default pelt
decay period. Depending on soc charecteristics and thermal
activity, it might be beneficial to decay thermal pressure
slower, but still in-tune with the pelt signals.
One way to achieve this slow decay is to right shift the now
run time.

Signed-off-by: Thara Gopinath <thara.gopinath@linaro.org>

---
 include/linux/sched/sysctl.h |  1 +
 kernel/sched/thermal.c       | 16 +++++++++++++++-
 kernel/sysctl.c              |  7 +++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

-- 
2.1.4

Comments

Quentin Perret Oct. 15, 2019, 10:14 a.m. UTC | #1
Hi Thara,

On Sunday 13 Oct 2019 at 20:58:25 (-0400), Thara Gopinath wrote:
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c

> index 00fcea2..5056c08 100644

> --- a/kernel/sysctl.c

> +++ b/kernel/sysctl.c

> @@ -376,6 +376,13 @@ static struct ctl_table kern_table[] = {

>  		.mode		= 0644,

>  		.proc_handler	= proc_dointvec,

>  	},

> +	{

> +		.procname	= "sched_thermal_decay_coeff",

> +		.data		= &sysctl_sched_thermal_decay_coeff,

> +		.maxlen		= sizeof(unsigned int),

> +		.mode		= 0644,

> +		.proc_handler	= proc_dointvec,


Perhaps change this for 'sched_proc_update_handler' with min and max
values ? Otherwise userspace is allowed to write nonsensical values
here. And since sysctl_sched_thermal_decay_coeff is used to shift, this
can lead to an undefined behaviour.

Also, could we take this sysctl out of SCHED_DEBUG ? I expect this to be
used/tuned on production devices where SCHED_DEBUG should theoretically
be off.

Thanks,
Quentin
Thara Gopinath Oct. 21, 2019, 9:03 p.m. UTC | #2
Hi Quentin,

Thanks for the review.
On 10/15/2019 06:14 AM, Quentin Perret wrote:
> Hi Thara,

> 

> On Sunday 13 Oct 2019 at 20:58:25 (-0400), Thara Gopinath wrote:

>> diff --git a/kernel/sysctl.c b/kernel/sysctl.c

>> index 00fcea2..5056c08 100644

>> --- a/kernel/sysctl.c

>> +++ b/kernel/sysctl.c

>> @@ -376,6 +376,13 @@ static struct ctl_table kern_table[] = {

>>  		.mode		= 0644,

>>  		.proc_handler	= proc_dointvec,

>>  	},

>> +	{

>> +		.procname	= "sched_thermal_decay_coeff",

>> +		.data		= &sysctl_sched_thermal_decay_coeff,

>> +		.maxlen		= sizeof(unsigned int),

>> +		.mode		= 0644,

>> +		.proc_handler	= proc_dointvec,

> 

> Perhaps change this for 'sched_proc_update_handler' with min and max

> values ? Otherwise userspace is allowed to write nonsensical values

> here. And since sysctl_sched_thermal_decay_coeff is used to shift, this

> can lead to an undefined behaviour.

Will do
> 

> Also, could we take this sysctl out of SCHED_DEBUG ? I expect this to be

> used/tuned on production devices where SCHED_DEBUG should theoretically

> be off.


I will take it out of SCHED_DEBUG. I am wondering if this should be
a runtime control at all. Because this is a shift this changes the
accumulating window for the thermal pressure signal. A runtime change
will not guarantee a clean start of the window. May be I should make
this a config option.

> 

> Thanks,

> Quentin

> 



-- 
Warm Regards
Thara
Quentin Perret Oct. 22, 2019, 9:03 a.m. UTC | #3
Hi Thara,

On Monday 21 Oct 2019 at 17:03:56 (-0400), Thara Gopinath wrote:
> On 10/15/2019 06:14 AM, Quentin Perret wrote:

> > Hi Thara,

> > 

> > On Sunday 13 Oct 2019 at 20:58:25 (-0400), Thara Gopinath wrote:

> >> diff --git a/kernel/sysctl.c b/kernel/sysctl.c

> >> index 00fcea2..5056c08 100644

> >> --- a/kernel/sysctl.c

> >> +++ b/kernel/sysctl.c

> >> @@ -376,6 +376,13 @@ static struct ctl_table kern_table[] = {

> >>  		.mode		= 0644,

> >>  		.proc_handler	= proc_dointvec,

> >>  	},

> >> +	{

> >> +		.procname	= "sched_thermal_decay_coeff",

> >> +		.data		= &sysctl_sched_thermal_decay_coeff,

> >> +		.maxlen		= sizeof(unsigned int),

> >> +		.mode		= 0644,

> >> +		.proc_handler	= proc_dointvec,

> > 

> > Perhaps change this for 'sched_proc_update_handler' with min and max

> > values ? Otherwise userspace is allowed to write nonsensical values

> > here. And since sysctl_sched_thermal_decay_coeff is used to shift, this

> > can lead to an undefined behaviour.

> Will do

> > 

> > Also, could we take this sysctl out of SCHED_DEBUG ? I expect this to be

> > used/tuned on production devices where SCHED_DEBUG should theoretically

> > be off.

> 

> I will take it out of SCHED_DEBUG. I am wondering if this should be

> a runtime control at all. Because this is a shift this changes the

> accumulating window for the thermal pressure signal. A runtime change

> will not guarantee a clean start of the window. May be I should make

> this a config option.


I'd personally prefer if it wan't a Kconfig option. We'd like to make
Android devices (which are going to use this) work with a Generic Kernel
Image, which means there will be a single config for everyone. But I
expect this knob to be tuned to different values depending on the SoC.

If you really don't want a sysctl, perhaps a cmdline option could work ?

Thanks,
Quentin
Thara Gopinath Oct. 22, 2019, 8:36 p.m. UTC | #4
On 10/22/2019 05:03 AM, Quentin Perret wrote:
> Hi Thara,

> 

> On Monday 21 Oct 2019 at 17:03:56 (-0400), Thara Gopinath wrote:

>> On 10/15/2019 06:14 AM, Quentin Perret wrote:

>>> Hi Thara,

>>>

>>> On Sunday 13 Oct 2019 at 20:58:25 (-0400), Thara Gopinath wrote:

>>>> diff --git a/kernel/sysctl.c b/kernel/sysctl.c

>>>> index 00fcea2..5056c08 100644

>>>> --- a/kernel/sysctl.c

>>>> +++ b/kernel/sysctl.c

>>>> @@ -376,6 +376,13 @@ static struct ctl_table kern_table[] = {

>>>>  		.mode		= 0644,

>>>>  		.proc_handler	= proc_dointvec,

>>>>  	},

>>>> +	{

>>>> +		.procname	= "sched_thermal_decay_coeff",

>>>> +		.data		= &sysctl_sched_thermal_decay_coeff,

>>>> +		.maxlen		= sizeof(unsigned int),

>>>> +		.mode		= 0644,

>>>> +		.proc_handler	= proc_dointvec,

>>>

>>> Perhaps change this for 'sched_proc_update_handler' with min and max

>>> values ? Otherwise userspace is allowed to write nonsensical values

>>> here. And since sysctl_sched_thermal_decay_coeff is used to shift, this

>>> can lead to an undefined behaviour.

>> Will do

>>>

>>> Also, could we take this sysctl out of SCHED_DEBUG ? I expect this to be

>>> used/tuned on production devices where SCHED_DEBUG should theoretically

>>> be off.

>>

>> I will take it out of SCHED_DEBUG. I am wondering if this should be

>> a runtime control at all. Because this is a shift this changes the

>> accumulating window for the thermal pressure signal. A runtime change

>> will not guarantee a clean start of the window. May be I should make

>> this a config option.

> 

> I'd personally prefer if it wan't a Kconfig option. We'd like to make

> Android devices (which are going to use this) work with a Generic Kernel

> Image, which means there will be a single config for everyone. But I

> expect this knob to be tuned to different values depending on the SoC.

> 

> If you really don't want a sysctl, perhaps a cmdline option could work ?

yes . I will. I have sent across v4 with these and other review comments
fixed.
> 

> Thanks,

> Quentin

> 



-- 
Warm Regards
Thara
diff mbox series

Patch

diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d4f6215..f4c4afd 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -41,6 +41,7 @@  extern unsigned int sysctl_numa_balancing_scan_size;
 #ifdef CONFIG_SCHED_DEBUG
 extern __read_mostly unsigned int sysctl_sched_migration_cost;
 extern __read_mostly unsigned int sysctl_sched_nr_migrate;
+extern __read_mostly unsigned int sysctl_sched_thermal_decay_coeff;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
 		void __user *buffer, size_t *length,
diff --git a/kernel/sched/thermal.c b/kernel/sched/thermal.c
index 5f0b2d4..2a00488 100644
--- a/kernel/sched/thermal.c
+++ b/kernel/sched/thermal.c
@@ -10,6 +10,19 @@ 
 #include "pelt.h"
 #include "thermal.h"
 
+/**
+ * By default the decay is the default pelt decay period.
+ * The decay coefficient can change is decay period in
+ * multiples of 32.
+ *   Decay coefficient    Decay period(ms)
+ *	0			32
+ *	1			64
+ *	2			128
+ *	3			256
+ *	4			512
+ */
+unsigned int sysctl_sched_thermal_decay_coeff __read_mostly;
+
 struct max_capacity_info {
 	unsigned long max_capacity;
 	unsigned long cap_capacity;
@@ -62,5 +75,6 @@  void update_periodic_maxcap(struct rq *rq)
 		return;
 
 	delta = __max_cap->max_capacity - __max_cap->cap_capacity;
-	update_thermal_avg(rq_clock_task(rq), rq, delta);
+	update_thermal_avg((rq_clock_task(rq) >>
+			   sysctl_sched_thermal_decay_coeff), rq, delta);
 }
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 00fcea2..5056c08 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -376,6 +376,13 @@  static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "sched_thermal_decay_coeff",
+		.data		= &sysctl_sched_thermal_decay_coeff,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_SCHEDSTATS
 	{
 		.procname	= "sched_schedstats",