[V3,1/3] sched: cpufreq: Allow remote cpufreq callbacks

Message ID 0f950529a63fb95e87944644c4854be4fcfaea38.1499927699.git.viresh.kumar@linaro.org
State New
Headers show
Series
  • [V3,1/3] sched: cpufreq: Allow remote cpufreq callbacks
Related show

Commit Message

Viresh Kumar July 13, 2017, 6:44 a.m.
We do not call cpufreq callbacks from scheduler core for remote
(non-local) CPUs currently. But there are cases where such remote
callbacks are useful, specially in the case of shared cpufreq policies.

This patch updates the scheduler core to call the cpufreq callbacks for
remote CPUs as well.

For now, all the registered utilization update callbacks are updated to
return early if remote callback is detected. That is, this patch just
moves the decision making down in the hierarchy.

Later patches would enable remote callbacks for shared policies.

Based on initial work from Steve Muckle.

Signed-off-by: Steve Muckle <smuckle.linux@gmail.com>

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>

---
 drivers/cpufreq/cpufreq_governor.c |  4 ++++
 drivers/cpufreq/intel_pstate.c     |  8 ++++++++
 include/linux/sched/cpufreq.h      |  1 +
 kernel/sched/cpufreq.c             |  1 +
 kernel/sched/cpufreq_schedutil.c   |  8 ++++++++
 kernel/sched/deadline.c            |  2 +-
 kernel/sched/fair.c                |  8 +++++---
 kernel/sched/rt.c                  |  2 +-
 kernel/sched/sched.h               | 10 ++--------
 9 files changed, 31 insertions(+), 13 deletions(-)

-- 
2.13.0.71.gd7076ec9c9cb

Comments

Peter Zijlstra July 21, 2017, 1:03 p.m. | #1
On Thu, Jul 13, 2017 at 12:14:37PM +0530, Viresh Kumar wrote:
> diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c

> index 47e24b5384b3..606b1a37a1af 100644

> --- a/drivers/cpufreq/cpufreq_governor.c

> +++ b/drivers/cpufreq/cpufreq_governor.c

> @@ -275,6 +275,10 @@ static void dbs_update_util_handler(struct update_util_data *data, u64 time,

>  	struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;

>  	u64 delta_ns, lst;

>  

> +	/* Don't allow remote callbacks */

> +	if (smp_processor_id() != data->cpu)

> +		return;

> +


The alternative is using some of that policy_dbs->policy->*cpus crud I
suppose, because:

>  	/*

>  	 * The work may not be allowed to be queued up right now.

>  	 * Possible reasons:

> diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c

> index b7fb8b7c980d..4bee2f4cbc28 100644

> --- a/drivers/cpufreq/intel_pstate.c

> +++ b/drivers/cpufreq/intel_pstate.c

> @@ -1732,6 +1732,10 @@ static void intel_pstate_update_util_pid(struct update_util_data *data,

>  	struct cpudata *cpu = container_of(data, struct cpudata, update_util);

>  	u64 delta_ns = time - cpu->sample.time;

>  

> +	/* Don't allow remote callbacks */

> +	if (smp_processor_id() != data->cpu)

> +		return;

> +

>  	if ((s64)delta_ns < pid_params.sample_rate_ns)

>  		return;

>  

> @@ -1749,6 +1753,10 @@ static void intel_pstate_update_util(struct update_util_data *data, u64 time,

>  	struct cpudata *cpu = container_of(data, struct cpudata, update_util);

>  	u64 delta_ns;

>  

> +	/* Don't allow remote callbacks */

> +	if (smp_processor_id() != data->cpu)

> +		return;

> +

>  	if (flags & SCHED_CPUFREQ_IOWAIT) {

>  		cpu->iowait_boost = int_tofp(1);

>  	} else if (cpu->iowait_boost) {



For these we can already use cpu->cpu, which would make:

> diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h

> index d2be2ccbb372..8256a8f35f22 100644

> --- a/include/linux/sched/cpufreq.h

> +++ b/include/linux/sched/cpufreq.h

> @@ -16,6 +16,7 @@

>  #ifdef CONFIG_CPU_FREQ

>  struct update_util_data {

>         void (*func)(struct update_util_data *data, u64 time, unsigned int flags);

> +       unsigned int cpu;

>  };

>  

>  void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,

> diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c

> index dbc51442ecbc..ee4c596b71b4 100644

> --- a/kernel/sched/cpufreq.c

> +++ b/kernel/sched/cpufreq.c

> @@ -42,6 +42,7 @@ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,

>  		return;

>  

>  	data->func = func;

> +	data->cpu = cpu;

>  	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);

>  }

>  EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);


redundant.

> diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

> index 29a397067ffa..ed9c589e5386 100644

> --- a/kernel/sched/cpufreq_schedutil.c

> +++ b/kernel/sched/cpufreq_schedutil.c

> @@ -218,6 +218,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,

>  	unsigned int next_f;

>  	bool busy;

>  

> +	/* Remote callbacks aren't allowed for policies which aren't shared */

> +	if (smp_processor_id() != hook->cpu)

> +		return;

> +

>  	sugov_set_iowait_boost(sg_cpu, time, flags);

>  	sg_cpu->last_update = time;

>  

> @@ -290,6 +294,10 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,

>  	unsigned long util, max;

>  	unsigned int next_f;

>  

> +	/* Don't allow remote callbacks */

> +	if (smp_processor_id() != hook->cpu)

> +		return;

> +

>  	sugov_get_util(&util, &max);

>  

>  	raw_spin_lock(&sg_policy->update_lock);



Given the whole rq->lock thing, I suspect we could actually not do these
two. That would then continue to process the iowait and other accounting
stuff, but stall the moment we call into the actual driver, which will
then drop the request on the floor as per the first few hunks.

> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

> index a84299f44b5d..7fcfaee39d19 100644

> --- a/kernel/sched/deadline.c

> +++ b/kernel/sched/deadline.c

> @@ -1136,7 +1136,7 @@ static void update_curr_dl(struct rq *rq)

>  	}

>  

>  	/* kick cpufreq (see the comment in kernel/sched/sched.h). */

> -	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);

> +	cpufreq_update_util(rq, SCHED_CPUFREQ_DL);

>  

>  	schedstat_set(curr->se.statistics.exec_max,

>  		      max(curr->se.statistics.exec_max, delta_exec));

> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

> index c95880e216f6..d378d02fdfcb 100644

> --- a/kernel/sched/fair.c

> +++ b/kernel/sched/fair.c

> @@ -3278,7 +3278,9 @@ static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}

>  

>  static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)

>  {

> -	if (&this_rq()->cfs == cfs_rq) {

> +	struct rq *rq = rq_of(cfs_rq);

> +

> +	if (&rq->cfs == cfs_rq) {

>  		/*

>  		 * There are a few boundary cases this might miss but it should

>  		 * get called often enough that that should (hopefully) not be

> @@ -3295,7 +3297,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)

>  		 *

>  		 * See cpu_util().

>  		 */

> -		cpufreq_update_util(rq_of(cfs_rq), 0);

> +		cpufreq_update_util(rq, 0);

>  	}

>  }

>  

> @@ -4875,7 +4877,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)

>  	 * passed.

>  	 */

>  	if (p->in_iowait)

> -		cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);

> +		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);

>  

>  	for_each_sched_entity(se) {

>  		if (se->on_rq)

> diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c

> index 45caf937ef90..0af5ca9e3e3f 100644

> --- a/kernel/sched/rt.c

> +++ b/kernel/sched/rt.c

> @@ -970,7 +970,7 @@ static void update_curr_rt(struct rq *rq)

>  		return;

>  

>  	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */

> -	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);

> +	cpufreq_update_util(rq, SCHED_CPUFREQ_RT);

>  

>  	schedstat_set(curr->se.statistics.exec_max,

>  		      max(curr->se.statistics.exec_max, delta_exec));

> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

> index eeef1a3086d1..aa9d5b87b4f8 100644

> --- a/kernel/sched/sched.h

> +++ b/kernel/sched/sched.h

> @@ -2070,19 +2070,13 @@ static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)

>  {

>  	struct update_util_data *data;

>  

> -	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));

> +	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,

> +						  cpu_of(rq)));

>  	if (data)

>  		data->func(data, rq_clock(rq), flags);

>  }

> -

> -static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)

> -{

> -	if (cpu_of(rq) == smp_processor_id())

> -		cpufreq_update_util(rq, flags);

> -}

>  #else

>  static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}

> -static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}

>  #endif /* CONFIG_CPU_FREQ */


This seems ok. Except of course you'll have conflicts with Juri's patch
set, but that should be trivial to sort out.
Viresh Kumar July 24, 2017, 11:01 a.m. | #2
On 21-07-17, 15:03, Peter Zijlstra wrote:
> On Thu, Jul 13, 2017 at 12:14:37PM +0530, Viresh Kumar wrote:


> > @@ -42,6 +42,7 @@ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,

> >  		return;

> >  

> >  	data->func = func;

> > +	data->cpu = cpu;

> >  	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);

> >  }

> >  EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);

> 

> redundant.


Actually we will still need it. We pass hook->cpu to sugov_get_util()
in the 2nd patch of this series and there is no work around possible
around that.

> > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

> > index 29a397067ffa..ed9c589e5386 100644

> > --- a/kernel/sched/cpufreq_schedutil.c

> > +++ b/kernel/sched/cpufreq_schedutil.c

> > @@ -218,6 +218,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,

> >  	unsigned int next_f;

> >  	bool busy;

> >  

> > +	/* Remote callbacks aren't allowed for policies which aren't shared */

> > +	if (smp_processor_id() != hook->cpu)

> > +		return;

> > +

> >  	sugov_set_iowait_boost(sg_cpu, time, flags);

> >  	sg_cpu->last_update = time;

> >  

> > @@ -290,6 +294,10 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,

> >  	unsigned long util, max;

> >  	unsigned int next_f;

> >  

> > +	/* Don't allow remote callbacks */

> > +	if (smp_processor_id() != hook->cpu)

> > +		return;

> > +

> >  	sugov_get_util(&util, &max);

> >  

> >  	raw_spin_lock(&sg_policy->update_lock);

> 

> 

> Given the whole rq->lock thing, I suspect we could actually not do these

> two.


You meant sugov_get_util() and raw_spin_lock()? Why?

The locking is required here in the shared-policy case to make sure
only one CPU is updating the frequency for the entire policy. And we
can't really avoid that even with the rq->lock guarantees from the
scheduler for the target CPU. 

> That would then continue to process the iowait and other accounting

> stuff, but stall the moment we call into the actual driver, which will

> then drop the request on the floor as per the first few hunks.


I am not sure I understood your comment completely though.

> This seems ok. Except of course you'll have conflicts with Juri's patch

> set, but that should be trivial to sort out.


Yeah, I wouldn't mind rebasing if his series gets in first.

-- 
viresh
Peter Zijlstra July 24, 2017, 1:47 p.m. | #3
On Mon, Jul 24, 2017 at 04:31:22PM +0530, Viresh Kumar wrote:
> On 21-07-17, 15:03, Peter Zijlstra wrote:

> > On Thu, Jul 13, 2017 at 12:14:37PM +0530, Viresh Kumar wrote:


> > > diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c

> > > index 29a397067ffa..ed9c589e5386 100644

> > > --- a/kernel/sched/cpufreq_schedutil.c

> > > +++ b/kernel/sched/cpufreq_schedutil.c

> > > @@ -218,6 +218,10 @@ static void sugov_update_single(struct update_util_data *hook, u64 time,

> > >  	unsigned int next_f;

> > >  	bool busy;

> > >  

> > > +	/* Remote callbacks aren't allowed for policies which aren't shared */

> > > +	if (smp_processor_id() != hook->cpu)

> > > +		return;

> > > +

> > >  	sugov_set_iowait_boost(sg_cpu, time, flags);

> > >  	sg_cpu->last_update = time;

> > >  

> > > @@ -290,6 +294,10 @@ static void sugov_update_shared(struct update_util_data *hook, u64 time,

> > >  	unsigned long util, max;

> > >  	unsigned int next_f;

> > >  

> > > +	/* Don't allow remote callbacks */

> > > +	if (smp_processor_id() != hook->cpu)

> > > +		return;

> > > +

> > >  	sugov_get_util(&util, &max);

> > >  

> > >  	raw_spin_lock(&sg_policy->update_lock);

> > 

> > 

> > Given the whole rq->lock thing, I suspect we could actually not do these

> > two.

> 

> You meant sugov_get_util() and raw_spin_lock()? Why?

> 

> The locking is required here in the shared-policy case to make sure

> only one CPU is updating the frequency for the entire policy. And we

> can't really avoid that even with the rq->lock guarantees from the

> scheduler for the target CPU. 


I said nothing about the shared locking. That is indeed required. All I
said is that those two tests you add could be left out.

> > That would then continue to process the iowait and other accounting

> > stuff, but stall the moment we call into the actual driver, which will

> > then drop the request on the floor as per the first few hunks.

> 

> I am not sure I understood your comment completely though.


Since we call cpufreq_update_util(@rq, ...) with @rq->lock held, all
such calls are in fact serialized for that cpu. Therefore the cpu !=
current_cpu test you add are pointless.

Only once we get to the actual cpufreq driver (intel_pstate and others)
do we run into the fact that we might not be able to service the request
remotely. But since you also add a test there, that is sufficient.

Patch hide | download patch | download mbox

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 47e24b5384b3..606b1a37a1af 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -275,6 +275,10 @@  static void dbs_update_util_handler(struct update_util_data *data, u64 time,
 	struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
 	u64 delta_ns, lst;
 
+	/* Don't allow remote callbacks */
+	if (smp_processor_id() != data->cpu)
+		return;
+
 	/*
 	 * The work may not be allowed to be queued up right now.
 	 * Possible reasons:
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b7fb8b7c980d..4bee2f4cbc28 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1732,6 +1732,10 @@  static void intel_pstate_update_util_pid(struct update_util_data *data,
 	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
 	u64 delta_ns = time - cpu->sample.time;
 
+	/* Don't allow remote callbacks */
+	if (smp_processor_id() != data->cpu)
+		return;
+
 	if ((s64)delta_ns < pid_params.sample_rate_ns)
 		return;
 
@@ -1749,6 +1753,10 @@  static void intel_pstate_update_util(struct update_util_data *data, u64 time,
 	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
 	u64 delta_ns;
 
+	/* Don't allow remote callbacks */
+	if (smp_processor_id() != data->cpu)
+		return;
+
 	if (flags & SCHED_CPUFREQ_IOWAIT) {
 		cpu->iowait_boost = int_tofp(1);
 	} else if (cpu->iowait_boost) {
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index d2be2ccbb372..8256a8f35f22 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -16,6 +16,7 @@ 
 #ifdef CONFIG_CPU_FREQ
 struct update_util_data {
        void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
+       unsigned int cpu;
 };
 
 void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index dbc51442ecbc..ee4c596b71b4 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -42,6 +42,7 @@  void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
 		return;
 
 	data->func = func;
+	data->cpu = cpu;
 	rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), data);
 }
 EXPORT_SYMBOL_GPL(cpufreq_add_update_util_hook);
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 29a397067ffa..ed9c589e5386 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -218,6 +218,10 @@  static void sugov_update_single(struct update_util_data *hook, u64 time,
 	unsigned int next_f;
 	bool busy;
 
+	/* Remote callbacks aren't allowed for policies which aren't shared */
+	if (smp_processor_id() != hook->cpu)
+		return;
+
 	sugov_set_iowait_boost(sg_cpu, time, flags);
 	sg_cpu->last_update = time;
 
@@ -290,6 +294,10 @@  static void sugov_update_shared(struct update_util_data *hook, u64 time,
 	unsigned long util, max;
 	unsigned int next_f;
 
+	/* Don't allow remote callbacks */
+	if (smp_processor_id() != hook->cpu)
+		return;
+
 	sugov_get_util(&util, &max);
 
 	raw_spin_lock(&sg_policy->update_lock);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index a84299f44b5d..7fcfaee39d19 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1136,7 +1136,7 @@  static void update_curr_dl(struct rq *rq)
 	}
 
 	/* kick cpufreq (see the comment in kernel/sched/sched.h). */
-	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
+	cpufreq_update_util(rq, SCHED_CPUFREQ_DL);
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c95880e216f6..d378d02fdfcb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3278,7 +3278,9 @@  static inline void set_tg_cfs_propagate(struct cfs_rq *cfs_rq) {}
 
 static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 {
-	if (&this_rq()->cfs == cfs_rq) {
+	struct rq *rq = rq_of(cfs_rq);
+
+	if (&rq->cfs == cfs_rq) {
 		/*
 		 * There are a few boundary cases this might miss but it should
 		 * get called often enough that that should (hopefully) not be
@@ -3295,7 +3297,7 @@  static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
 		 *
 		 * See cpu_util().
 		 */
-		cpufreq_update_util(rq_of(cfs_rq), 0);
+		cpufreq_update_util(rq, 0);
 	}
 }
 
@@ -4875,7 +4877,7 @@  enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	 * passed.
 	 */
 	if (p->in_iowait)
-		cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
+		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
 
 	for_each_sched_entity(se) {
 		if (se->on_rq)
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 45caf937ef90..0af5ca9e3e3f 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -970,7 +970,7 @@  static void update_curr_rt(struct rq *rq)
 		return;
 
 	/* Kick cpufreq (see the comment in kernel/sched/sched.h). */
-	cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
+	cpufreq_update_util(rq, SCHED_CPUFREQ_RT);
 
 	schedstat_set(curr->se.statistics.exec_max,
 		      max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index eeef1a3086d1..aa9d5b87b4f8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2070,19 +2070,13 @@  static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
 {
 	struct update_util_data *data;
 
-	data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
+	data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
+						  cpu_of(rq)));
 	if (data)
 		data->func(data, rq_clock(rq), flags);
 }
-
-static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
-{
-	if (cpu_of(rq) == smp_processor_id())
-		cpufreq_update_util(rq, flags);
-}
 #else
 static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
-static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
 #endif /* CONFIG_CPU_FREQ */
 
 #ifdef arch_scale_freq_capacity