diff mbox series

[1/3] cpuidle: play_idle: Make play_idle more flexible

Message ID 20190909145015.26317-1-daniel.lezcano@linaro.org
State Superseded
Headers show
Series [1/3] cpuidle: play_idle: Make play_idle more flexible | expand

Commit Message

Daniel Lezcano Sept. 9, 2019, 2:50 p.m. UTC
The play_idle function has two users, the intel powerclamp and the
idle_injection.

The idle injection cooling device uses the function via the
idle_injection powercap's APIs. Unfortunately, play_idle is currently
limited by the idle state depth: by default the deepest idle state is
selected. On the ARM[64] platforms, most of the time it is the cluster
idle state, the exit latency and the residency can be very high. That
reduces the scope of the idle injection usage because the impact on
the performances can be very significant.

If the idle injection cycles can be done with a shallow state like a
retention state, the cooling effect would eventually give similar
results than the cpufreq cooling device.

In order to prepare the function to receive an idle state parameter,
let's replace the 'use_deepest_state' boolean field with 'use_state'
and use this value to enter the specific idle state.

The current code keeps the default behavior which is go to the deepest
idle state.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

---
 drivers/cpuidle/cpuidle.c | 21 +++++++++++----------
 include/linux/cpuidle.h   | 14 +++++++-------
 kernel/sched/idle.c       | 10 +++++-----
 3 files changed, 23 insertions(+), 22 deletions(-)

-- 
2.17.1

Comments

Mathieu Poirier Sept. 18, 2019, 7:30 p.m. UTC | #1
On Mon, Sep 09, 2019 at 04:50:13PM +0200, Daniel Lezcano wrote:
> The play_idle function has two users, the intel powerclamp and the

> idle_injection.

> 

> The idle injection cooling device uses the function via the

> idle_injection powercap's APIs. Unfortunately, play_idle is currently

> limited by the idle state depth: by default the deepest idle state is

> selected. On the ARM[64] platforms, most of the time it is the cluster

> idle state, the exit latency and the residency can be very high. That

> reduces the scope of the idle injection usage because the impact on

> the performances can be very significant.

> 

> If the idle injection cycles can be done with a shallow state like a

> retention state, the cooling effect would eventually give similar

> results than the cpufreq cooling device.

> 

> In order to prepare the function to receive an idle state parameter,

> let's replace the 'use_deepest_state' boolean field with 'use_state'

> and use this value to enter the specific idle state.

> 

> The current code keeps the default behavior which is go to the deepest

> idle state.

> 

> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

> ---

>  drivers/cpuidle/cpuidle.c | 21 +++++++++++----------

>  include/linux/cpuidle.h   | 14 +++++++-------

>  kernel/sched/idle.c       | 10 +++++-----

>  3 files changed, 23 insertions(+), 22 deletions(-)

> 

> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c

> index 0895b988fa92..f8b54f277589 100644

> --- a/drivers/cpuidle/cpuidle.c

> +++ b/drivers/cpuidle/cpuidle.c

> @@ -99,31 +99,31 @@ static int find_deepest_state(struct cpuidle_driver *drv,

>  }

>  

>  /**

> - * cpuidle_use_deepest_state - Set/clear governor override flag.

> - * @enable: New value of the flag.

> + * cpuidle_use_state - Force the cpuidle framework to enter an idle state.

> + * @state: An integer for an idle state

>   *

> - * Set/unset the current CPU to use the deepest idle state (override governors

> - * going forward if set).

> + * Specify an idle state the cpuidle framework must step in and bypass

> + * the idle state selection process.

>   */

> -void cpuidle_use_deepest_state(bool enable)

> +void cpuidle_use_state(int state)

>  {

>  	struct cpuidle_device *dev;

>  

>  	preempt_disable();

>  	dev = cpuidle_get_device();

>  	if (dev)

> -		dev->use_deepest_state = enable;

> +		dev->use_state = state;

>  	preempt_enable();

>  }

>  

>  /**

>   * cpuidle_find_deepest_state - Find the deepest available idle state.

> - * @drv: cpuidle driver for the given CPU.

> - * @dev: cpuidle device for the given CPU.

>   */

> -int cpuidle_find_deepest_state(struct cpuidle_driver *drv,

> -			       struct cpuidle_device *dev)

> +int cpuidle_find_deepest_state(void)

>  {

> +	struct cpuidle_device *dev = cpuidle_get_device();

> +	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);

> +

>  	return find_deepest_state(drv, dev, UINT_MAX, 0, false);

>  }

>  

> @@ -554,6 +554,7 @@ static void __cpuidle_unregister_device(struct cpuidle_device *dev)

>  static void __cpuidle_device_init(struct cpuidle_device *dev)

>  {

>  	memset(dev->states_usage, 0, sizeof(dev->states_usage));

> +	dev->use_state = CPUIDLE_STATE_NOUSE;

>  	dev->last_residency = 0;

>  	dev->next_hrtimer = 0;

>  }

> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h

> index 1a9f54eb3aa1..a1839122e219 100644

> --- a/include/linux/cpuidle.h

> +++ b/include/linux/cpuidle.h

> @@ -15,6 +15,7 @@

>  #include <linux/list.h>

>  #include <linux/hrtimer.h>

>  

> +#define CPUIDLE_STATE_NOUSE	-1

>  #define CPUIDLE_STATE_MAX	10

>  #define CPUIDLE_NAME_LEN	16

>  #define CPUIDLE_DESC_LEN	32

> @@ -80,11 +81,12 @@ struct cpuidle_driver_kobj;

>  struct cpuidle_device {

>  	unsigned int		registered:1;

>  	unsigned int		enabled:1;

> -	unsigned int		use_deepest_state:1;

>  	unsigned int		poll_time_limit:1;

>  	unsigned int		cpu;

> +


Spurious newline

>  	ktime_t			next_hrtimer;

>  

> +	int			use_state;

>  	int			last_state_idx;

>  	int			last_residency;

>  	u64			poll_limit_ns;

> @@ -200,19 +202,17 @@ static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }

>  #endif

>  

>  #ifdef CONFIG_CPU_IDLE

> -extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,

> -				      struct cpuidle_device *dev);

> +extern int cpuidle_find_deepest_state(void);

>  extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,

>  				struct cpuidle_device *dev);

> -extern void cpuidle_use_deepest_state(bool enable);

> +extern void cpuidle_use_state(int state);

>  #else

> -static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,

> -					     struct cpuidle_device *dev)

> +static inline int cpuidle_find_deepest_state(void)

>  {return -ENODEV; }

>  static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,

>  				       struct cpuidle_device *dev)

>  {return -ENODEV; }

> -static inline void cpuidle_use_deepest_state(bool enable)

> +static inline void cpuidle_use_state(int state)

>  {

>  }

>  #endif

> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c

> index b98283fc6914..17da9cb309e1 100644

> --- a/kernel/sched/idle.c

> +++ b/kernel/sched/idle.c

> @@ -165,7 +165,8 @@ static void cpuidle_idle_call(void)

>  	 * until a proper wakeup interrupt happens.

>  	 */

>  

> -	if (idle_should_enter_s2idle() || dev->use_deepest_state) {

> +	if (idle_should_enter_s2idle() ||

> +	    dev->use_state != CPUIDLE_STATE_NOUSE) {

>  		if (idle_should_enter_s2idle()) {

>  			rcu_idle_enter();

>  

> @@ -181,8 +182,7 @@ static void cpuidle_idle_call(void)

>  		tick_nohz_idle_stop_tick();

>  		rcu_idle_enter();

>  

> -		next_state = cpuidle_find_deepest_state(drv, dev);

> -		call_cpuidle(drv, dev, next_state);

> +		call_cpuidle(drv, dev, dev->use_state);

>  	} else {

>  		bool stop_tick = true;

>  

> @@ -328,7 +328,7 @@ void play_idle(unsigned long duration_us)

>  	rcu_sleep_check();

>  	preempt_disable();

>  	current->flags |= PF_IDLE;

> -	cpuidle_use_deepest_state(true);

> +	cpuidle_use_state(cpuidle_find_deepest_state());

>  

>  	it.done = 0;

>  	hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);

> @@ -339,7 +339,7 @@ void play_idle(unsigned long duration_us)

>  	while (!READ_ONCE(it.done))

>  		do_idle();

>  

> -	cpuidle_use_deepest_state(false);

> +	cpuidle_use_state(CPUIDLE_STATE_NOUSE);

>  	current->flags &= ~PF_IDLE;

>  

>  	preempt_fold_need_resched();


With the above:

Acked-by: Mathieu Poirier <mathieu.poirier@linaro.org>


> -- 

> 2.17.1

>
Daniel Lezcano Sept. 24, 2019, 1:02 p.m. UTC | #2
Hi Mathieu,

On 18/09/2019 21:35, Mathieu Poirier wrote:
> On Mon, Sep 09, 2019 at 04:50:15PM +0200, Daniel Lezcano wrote:

>> Currently the idle injection framework only allows to inject the

>> deepest idle state available on the system.

>>

>> Give the opportunity to specify which idle state we want to inject by

>> adding a new function helper to set the state and use it when calling

>> play_idle().

>>

>> There is no functional changes, the cpuidle state is the deepest one.

>>

>> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>

>> ---


[ ... ]

>> +/**

>> + * idle_inject_set_state - set the idle state to inject

>> + * @state: an integer for the idle state to inject

>> + */

>> +void idle_inject_set_state(struct idle_inject_device *ii_dev, int state)

>> +{

>> +	if (state >= CPUIDLE_STATE_NOUSE && state < CPUIDLE_STATE_MAX)

>> +		WRITE_ONCE(ii_dev->state, state);

>> +}

>> +

>>  /**

>>   * idle_inject_start - start idle injections

>>   * @ii_dev: idle injection control device structure

>> @@ -298,6 +310,7 @@ struct idle_inject_device *idle_inject_register(struct cpumask *cpumask)

>>  	cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask);

>>  	hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);

>>  	ii_dev->timer.function = idle_inject_timer_fn;

>> +	ii_dev->state = cpuidle_find_deepest_state();

>>  

>>  	for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) {

>>  

>> diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h

>> index a445cd1a36c5..e2b26b9ccd34 100644

>> --- a/include/linux/idle_inject.h

>> +++ b/include/linux/idle_inject.h

>> @@ -26,4 +26,7 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev,

>>  void idle_inject_get_duration(struct idle_inject_device *ii_dev,

>>  				 unsigned int *run_duration_us,

>>  				 unsigned int *idle_duration_us);

>> +

>> +void idle_inject_set_state(struct idle_inject_device *ii_dev, int state);

>> +

> 

> The above function is not used in this patch and as such should be introduce as

> part of future work.  Otherwise I agree that this patch does not carry any

> functional changes.

> 

> Without function idle_inject_set_state():


I was about to remove the function but actually it may not make sense as
the idle_inject is a framework providing the different API to do the
idle injection and the function is an helper to set the state value. It
comes with the addition of the state number in the structure.

Next patch is the idle cooling device and makes use of it.

Can I still consider your acked-by valid?

-- 
 <http://www.linaro.org/> Linaro.org │ Open source software for ARM SoCs

Follow Linaro:  <http://www.facebook.com/pages/Linaro> Facebook |
<http://twitter.com/#!/linaroorg> Twitter |
<http://www.linaro.org/linaro-blog/> Blog
diff mbox series

Patch

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 0895b988fa92..f8b54f277589 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -99,31 +99,31 @@  static int find_deepest_state(struct cpuidle_driver *drv,
 }
 
 /**
- * cpuidle_use_deepest_state - Set/clear governor override flag.
- * @enable: New value of the flag.
+ * cpuidle_use_state - Force the cpuidle framework to enter an idle state.
+ * @state: An integer for an idle state
  *
- * Set/unset the current CPU to use the deepest idle state (override governors
- * going forward if set).
+ * Specify an idle state the cpuidle framework must step in and bypass
+ * the idle state selection process.
  */
-void cpuidle_use_deepest_state(bool enable)
+void cpuidle_use_state(int state)
 {
 	struct cpuidle_device *dev;
 
 	preempt_disable();
 	dev = cpuidle_get_device();
 	if (dev)
-		dev->use_deepest_state = enable;
+		dev->use_state = state;
 	preempt_enable();
 }
 
 /**
  * cpuidle_find_deepest_state - Find the deepest available idle state.
- * @drv: cpuidle driver for the given CPU.
- * @dev: cpuidle device for the given CPU.
  */
-int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
-			       struct cpuidle_device *dev)
+int cpuidle_find_deepest_state(void)
 {
+	struct cpuidle_device *dev = cpuidle_get_device();
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+
 	return find_deepest_state(drv, dev, UINT_MAX, 0, false);
 }
 
@@ -554,6 +554,7 @@  static void __cpuidle_unregister_device(struct cpuidle_device *dev)
 static void __cpuidle_device_init(struct cpuidle_device *dev)
 {
 	memset(dev->states_usage, 0, sizeof(dev->states_usage));
+	dev->use_state = CPUIDLE_STATE_NOUSE;
 	dev->last_residency = 0;
 	dev->next_hrtimer = 0;
 }
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 1a9f54eb3aa1..a1839122e219 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -15,6 +15,7 @@ 
 #include <linux/list.h>
 #include <linux/hrtimer.h>
 
+#define CPUIDLE_STATE_NOUSE	-1
 #define CPUIDLE_STATE_MAX	10
 #define CPUIDLE_NAME_LEN	16
 #define CPUIDLE_DESC_LEN	32
@@ -80,11 +81,12 @@  struct cpuidle_driver_kobj;
 struct cpuidle_device {
 	unsigned int		registered:1;
 	unsigned int		enabled:1;
-	unsigned int		use_deepest_state:1;
 	unsigned int		poll_time_limit:1;
 	unsigned int		cpu;
+
 	ktime_t			next_hrtimer;
 
+	int			use_state;
 	int			last_state_idx;
 	int			last_residency;
 	u64			poll_limit_ns;
@@ -200,19 +202,17 @@  static inline struct cpuidle_device *cpuidle_get_device(void) {return NULL; }
 #endif
 
 #ifdef CONFIG_CPU_IDLE
-extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
-				      struct cpuidle_device *dev);
+extern int cpuidle_find_deepest_state(void);
 extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev);
-extern void cpuidle_use_deepest_state(bool enable);
+extern void cpuidle_use_state(int state);
 #else
-static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
-					     struct cpuidle_device *dev)
+static inline int cpuidle_find_deepest_state(void)
 {return -ENODEV; }
 static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
 				       struct cpuidle_device *dev)
 {return -ENODEV; }
-static inline void cpuidle_use_deepest_state(bool enable)
+static inline void cpuidle_use_state(int state)
 {
 }
 #endif
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index b98283fc6914..17da9cb309e1 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -165,7 +165,8 @@  static void cpuidle_idle_call(void)
 	 * until a proper wakeup interrupt happens.
 	 */
 
-	if (idle_should_enter_s2idle() || dev->use_deepest_state) {
+	if (idle_should_enter_s2idle() ||
+	    dev->use_state != CPUIDLE_STATE_NOUSE) {
 		if (idle_should_enter_s2idle()) {
 			rcu_idle_enter();
 
@@ -181,8 +182,7 @@  static void cpuidle_idle_call(void)
 		tick_nohz_idle_stop_tick();
 		rcu_idle_enter();
 
-		next_state = cpuidle_find_deepest_state(drv, dev);
-		call_cpuidle(drv, dev, next_state);
+		call_cpuidle(drv, dev, dev->use_state);
 	} else {
 		bool stop_tick = true;
 
@@ -328,7 +328,7 @@  void play_idle(unsigned long duration_us)
 	rcu_sleep_check();
 	preempt_disable();
 	current->flags |= PF_IDLE;
-	cpuidle_use_deepest_state(true);
+	cpuidle_use_state(cpuidle_find_deepest_state());
 
 	it.done = 0;
 	hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
@@ -339,7 +339,7 @@  void play_idle(unsigned long duration_us)
 	while (!READ_ONCE(it.done))
 		do_idle();
 
-	cpuidle_use_deepest_state(false);
+	cpuidle_use_state(CPUIDLE_STATE_NOUSE);
 	current->flags &= ~PF_IDLE;
 
 	preempt_fold_need_resched();