diff mbox

[V2,4/5] idle: Move idle conditions in cpuidle_idle main function

Message ID 1393250151-6982-4-git-send-email-daniel.lezcano@linaro.org
State New
Headers show

Commit Message

Daniel Lezcano Feb. 24, 2014, 1:55 p.m. UTC
This patch moves the condition before entering idle into the cpuidle main
function located in idle.c. That simplify the idle mainloop functions and
increase the readibility of the conditions to enter truly idle.

This patch is code reorganization and does not change the behavior of the
function.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 kernel/sched/idle.c |   40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Comments

Daniel Lezcano Feb. 24, 2014, 3:39 p.m. UTC | #1
On 02/24/2014 03:59 PM, Peter Zijlstra wrote:
> On Mon, Feb 24, 2014 at 02:55:50PM +0100, Daniel Lezcano wrote:
>> @@ -136,25 +155,8 @@ static void cpu_idle_loop(void)
>>   			local_irq_disable();
>>   			arch_cpu_idle_enter();
>>
>> -			/*
>> -			 * In poll mode we reenable interrupts and spin.
>> -			 *
>> -			 * Also if we detected in the wakeup from idle
>> -			 * path that the tick broadcast device expired
>> -			 * for us, we don't want to go deep idle as we
>> -			 * know that the IPI is going to arrive right
>> -			 * away
>> -			 */
>> -			if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
>> -				cpu_idle_poll();
>> -			} else {
>> -				if (!current_clr_polling_and_test()) {
>> -					cpuidle_idle_call();
>> -				} else {
>> -					local_irq_enable();
>> -				}
>> -				__current_set_polling();
>> -			}
>> +			cpuidle_idle_call();
>> +
>
> Yeah, not liking that much; you can make it look like:
>
> 	if (cpu_idle_force_poll || tick_check_broadcast_expired())
> 		cpu_idle_poll();
> 	else
> 		cpu_idle_call();
>
> Though. That keeps the polling case separate from the actual idle
> function.

Yes, you are right, it looks better.

> And when you do that; you can also push down the
> current_clr_polling_and_test() muck so it doesn't cover the actual
> cpuidle policy code.

I am not getting it. Where do you suggest to move it ?
Daniel Lezcano Feb. 24, 2014, 5:03 p.m. UTC | #2
On 02/24/2014 05:05 PM, Peter Zijlstra wrote:
> On Mon, Feb 24, 2014 at 04:39:08PM +0100, Daniel Lezcano wrote:
>
>>> And when you do that; you can also push down the
>>> current_clr_polling_and_test() muck so it doesn't cover the actual
>>> cpuidle policy code.
>>
>> I am not getting it. Where do you suggest to move it ?
>
> A little something like so I suppose;

Thanks for taking the time to send me this patch.

> only call
> current_clr_polling_and_test() right before calling the actual idle
> function. Although I suppose cpuidle_enter() can still do all sorts of
> weird.

Well there is the polling idle state for the x86 and ppc cpuidle 
drivers. Except that, I think we have something more or less clean.

> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -78,37 +78,35 @@ static int cpuidle_idle_call(void)
>   	stop_critical_timings();
>   	rcu_idle_enter();
>
> -	next_state = cpuidle_enabled(drv, dev);
> -	if (next_state < 0) {
> -		arch_cpu_idle();
> -		goto out;
> -	}
> -
> -	/*
> -	 * Ask the governor for the next state, this call can fail for
> -	 * different reasons: cpuidle is not enabled or an idle state
> -	 * fulfilling the constraints was not found. In this case, we
> -	 * fall back to the default idle function
> -	 */
> -	next_state = cpuidle_select(drv, dev);
> +	if (cpuidle_enabled(drv, dev) == 0) {
> +		/*
> +		 * Ask the governor for the next state, this call can fail for
> +		 * different reasons: cpuidle is not enabled or an idle state
> +		 * fulfilling the constraints was not found. In this case, we
> +		 * fall back to the default idle function
> +		 */
> +		next_state = cpuidle_select(drv, dev);
> +
> +		if (current_clr_polling_and_test()) {
> +			dev->last_residency = 0;
> +			entered_state = next_state;
> +			local_irq_enable();
> +		} else {
> +			trace_cpu_idle_rcuidle(next_state, dev->cpu);
> +			entered_state = cpuidle_enter(drv, dev, next_state);
> +			trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
> +		}
>
> -	if (need_resched()) {

Ok. The need_resched is now replaced by 'current_clr_polling_and_test', 
with a call to '__current_set_polling()' to set the flag back, right ?

For my personal information, what is the subtlety with:

if (tif_need_resched())
	set_preempt_need_resched();

?

> -		dev->last_residency = 0;
>   		/* give the governor an opportunity to reflect on the outcome */
> -		cpuidle_reflect(dev, next_state);
> -		local_irq_enable();
> -		goto out;
> +		cpuidle_reflect(dev, entered_state);
> +	} else {
> +		if (current_clr_polling_and_test())
> +			local_irq_enable();
> +		else
> +			arch_cpu_idle();
>   	}
> +	__current_set_polling();
>
> -	trace_cpu_idle_rcuidle(next_state, dev->cpu);
> -
> -	entered_state = cpuidle_enter(drv, dev, next_state);
> -
> -	trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
> -
> -	/* give the governor an opportunity to reflect on the outcome */
> -	cpuidle_reflect(dev, entered_state);
> -out:
>   	if (WARN_ON_ONCE(irqs_disabled()))
>   		local_irq_enable();
>
> @@ -145,16 +143,11 @@ static void cpu_idle_loop(void)
>   			 * know that the IPI is going to arrive right
>   			 * away
>   			 */
> -			if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
> +			if (cpu_idle_force_poll || tick_check_broadcast_expired())
>   				cpu_idle_poll();
> -			} else {
> -				if (!current_clr_polling_and_test()) {
> -					cpuidle_idle_call();
> -				} else {
> -					local_irq_enable();
> -				}
> -				__current_set_polling();
> -			}
> +			else
> +				cpu_idle_call();
> +
>   			arch_cpu_idle_exit();
>   			/*
>   			 * We need to test and propagate the TIF_NEED_RESCHED
>
Nicolas Pitre Feb. 24, 2014, 5:54 p.m. UTC | #3
On Mon, 24 Feb 2014, Peter Zijlstra wrote:

> Urgh, looks like something went wrong with: cf37b6b48428d 
> 
> That commit doesn't actually remove kernel/cpu/idle.c nor is the new
> code an exact replica of the old one.
> 
> Ingo, any chance we can get that fixed?

The patch I posted was based on v3.13.  It doesn't apply on later 
kernels without a conflict.  I suspect the conflict resolution was 
goofed somehow.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Daniel Lezcano Feb. 24, 2014, 7:04 p.m. UTC | #4
On 02/24/2014 06:22 PM, Peter Zijlstra wrote:
> On Mon, Feb 24, 2014 at 06:03:10PM +0100, Daniel Lezcano wrote:
>> Well there is the polling idle state for the x86 and ppc cpuidle drivers.
>> Except that, I think we have something more or less clean.
>
> Yeah, they have to set it back to polling again :/
>
> Ideally we'd sweep the entire tree and switch the default polling state
> to polling and add a current_clr_polling_and_test() to all WFI/HLT like
> ones that need the interrupt.
>
> But lots of work that.
>
>>> -	if (need_resched()) {
>>
>> Ok. The need_resched is now replaced by 'current_clr_polling_and_test', with
>> a call to '__current_set_polling()' to set the flag back, right ?
>
> Yah.
>
>> For my personal information, what is the subtlety with:
>>
>> if (tif_need_resched())
>> 	set_preempt_need_resched();
>>
>> ?
>
> Urgh, looks like something went wrong with: cf37b6b48428d
>
> That commit doesn't actually remove kernel/cpu/idle.c nor is the new
> code an exact replica of the old one.
>
> Ingo, any chance we can get that fixed?
>
> Daniel; does the below change/comment clarify?

Yes, I think so.

[ ... ]

> +
> +		/*
> +		 * Since we fell out of the loop above, we know
> +		 * TIF_NEED_RESCHED must be set, propagate it into
> +		 * PREEMPT_NEED_RESCHED.
> +		 *
> +		 * This is required because for polling idle loops we will
> +		 * not have had an IPI to fold the state for us.
> +		 */
> +		preempt_set_need_resched();
>   		tick_nohz_idle_exit();
>   		schedule_preempt_disabled();

So IIUC, the mainloop has two states: one where it is blocked on a 
HLT/WFI instruction (or about to enter/ exit this state) and another one 
outside of this blocking section.

When the idle task is blocked on HLT/WFI, it needs the IPI-reschedule in 
order to be woken up and rescheduled. But if it is outside this section, 
the idle task is not waiting for an interrupt and an expensive IPI can 
be saved by just setting the TS_POLLING flag, the scheduler will check 
this flag and won't send the IPI.

But 'set_preempt_need_resched' is called from the IPI handler. So if no 
IPI is sent because the idle task is in polling state, we have to set it 
ourself.

Now, the difference between the old code with 'tif_need_resched()' is 
because we don't need to check it because it is always true.

Am I right ?
diff mbox

Patch

Index: cpuidle-next/kernel/sched/idle.c
===================================================================
--- cpuidle-next.orig/kernel/sched/idle.c
+++ cpuidle-next/kernel/sched/idle.c
@@ -75,6 +75,23 @@  static int cpuidle_idle_call(void)
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int next_state, entered_state;
 
+	/*
+	 * In poll mode we reenable interrupts and spin.
+	 *
+	 * Also if we detected in the wakeup from idle path that the
+	 * tick broadcast device expired for us, we don't want to go
+	 * deep idle as we know that the IPI is going to arrive right
+	 * away
+	 */
+	if (cpu_idle_force_poll || tick_check_broadcast_expired())
+		return cpu_idle_poll();
+
+	if (current_clr_polling_and_test()) {
+		local_irq_enable();
+		__current_set_polling();
+		return 0;
+	}
+
 	stop_critical_timings();
 	rcu_idle_enter();
 
@@ -115,6 +132,8 @@  out:
 	rcu_idle_exit();
 	start_critical_timings();
 
+	__current_set_polling();
+
 	return 0;
 }
 
@@ -136,25 +155,8 @@  static void cpu_idle_loop(void)
 			local_irq_disable();
 			arch_cpu_idle_enter();
 
-			/*
-			 * In poll mode we reenable interrupts and spin.
-			 *
-			 * Also if we detected in the wakeup from idle
-			 * path that the tick broadcast device expired
-			 * for us, we don't want to go deep idle as we
-			 * know that the IPI is going to arrive right
-			 * away
-			 */
-			if (cpu_idle_force_poll || tick_check_broadcast_expired()) {
-				cpu_idle_poll();
-			} else {
-				if (!current_clr_polling_and_test()) {
-					cpuidle_idle_call();
-				} else {
-					local_irq_enable();
-				}
-				__current_set_polling();
-			}
+			cpuidle_idle_call();
+
 			arch_cpu_idle_exit();
 			/*
 			 * We need to test and propagate the TIF_NEED_RESCHED