[v6,2/2] arm64: enable context tracking

Message ID 1401399904-24471-3-git-send-email-larry.bassel@linaro.org
State New
Headers show

Commit Message

Larry Bassel May 29, 2014, 9:45 p.m.
Make calls to ct_user_enter when the kernel is exited
and ct_user_exit when the kernel is entered (in el0_da,
el0_ia, el0_svc, el0_irq and all of the "error" paths).

These macros expand to function calls which will only work
properly if el0_sync and related code has been rearranged
(in a previous patch of this series).

The calls to ct_user_exit are made after hw debugging has been
enabled (enable_dbg_and_irq).

The call to ct_user_enter is made at the beginning of the
kernel_exit macro.

This patch is based on earlier work by Kevin Hilman.
Save/restore optimizations were also done by Kevin.

Signed-off-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Larry Bassel <larry.bassel@linaro.org>
---
 arch/arm64/Kconfig                   |  1 +
 arch/arm64/include/asm/thread_info.h |  4 ++++
 arch/arm64/kernel/entry.S            | 39 +++++++++++++++++++++++++++++++++++-
 3 files changed, 43 insertions(+), 1 deletion(-)

Comments

Will Deacon May 30, 2014, 6:23 p.m. | #1
On Thu, May 29, 2014 at 10:45:04PM +0100, Larry Bassel wrote:
> Make calls to ct_user_enter when the kernel is exited
> and ct_user_exit when the kernel is entered (in el0_da,
> el0_ia, el0_svc, el0_irq and all of the "error" paths).
> 
> These macros expand to function calls which will only work
> properly if el0_sync and related code has been rearranged
> (in a previous patch of this series).
> 
> The calls to ct_user_exit are made after hw debugging has been
> enabled (enable_dbg_and_irq).
> 
> The call to ct_user_enter is made at the beginning of the
> kernel_exit macro.
> 
> This patch is based on earlier work by Kevin Hilman.
> Save/restore optimizations were also done by Kevin.

Apparently I'm feeling pedantic today, one minor comment below...

> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index b0101b9..3c484e2 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -30,6 +30,32 @@
>  #include <asm/unistd32.h>
>  
>  /*
> + * Context tracking subsystem.  Used to instrument transitions
> + * between user and kernel mode.
> + */
> +	.macro ct_user_exit, restore = 0
> +#ifdef CONFIG_CONTEXT_TRACKING
> +	bl	context_tracking_user_exit
> +	.if \restore == 1

... I know we already changed this argument from \save, but maybe just
naming it \syscall makes most sense now?

If you make that change:

  Acked-by: Will Deacon <will.deacon@arm.com>

I'd like to give these some stress testing before it gets merged, so I'm
not sure if it'll make it for 3.16 given where we are at the moment.

Will
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Will Deacon June 3, 2014, 10:26 a.m. | #2
Hi guys,

On Fri, May 30, 2014 at 08:08:38PM +0100, Kevin Hilman wrote:
> Will Deacon <will.deacon@arm.com> writes:
> > I'd like to give these some stress testing before it gets merged, so I'm
> > not sure if it'll make it for 3.16 given where we are at the moment.
> 
> FWIW, this feature is disabled by default.  I use the following kconfig
> fragment to enable the various parts I use for testing:
> 
> CONFIG_NO_HZ=y
> CONFIG_NO_HZ_FULL=y
> CONFIG_NO_HZ_FULL_ALL=y
> CONFIG_NO_HZ_FULL_SYSIDLE=y
> 
> # default to power-efficient workqueues (which are then set to unbound)
> CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
> 
> # lockup detector sets a 4s timer on every CPU, which wakes CPUs
> # from idle. (alternately, can be controlled via procfs,
> # e.g: echo 0 > /proc/sys/kernel/watchdog)
> #CONFIG_LOCKUP_DETECTOR=n

I had a go with this, but I couldn't seem to trigger any context tracking
without forcing CONFIG_CONTEXT_TRACKING_FORCE=y. Does that mean we're
missing something else?

Anyway, with that forced on, I see the following during boot:


------------[ cut here ]------------
WARNING: CPU: 0 PID: 0 at kernel/rcu/tree.c:418 rcu_eqs_enter+0x84/0xa4()
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.15.0-rc8+ #5
Call trace:
[<ffffffc000088048>] dump_backtrace+0x0/0x130
[<ffffffc000088188>] show_stack+0x10/0x1c
[<ffffffc0004891a0>] dump_stack+0x74/0xbc
[<ffffffc0000a45e0>] warn_slowpath_common+0x8c/0xb4
[<ffffffc0000a46cc>] warn_slowpath_null+0x14/0x20
[<ffffffc0000efc14>] rcu_eqs_enter+0x80/0xa4
[<ffffffc0000efc58>] rcu_idle_enter+0x20/0x50
[<ffffffc0000dd314>] cpu_startup_entry+0x118/0x184
[<ffffffc0004865ec>] rest_init+0x7c/0x88
[<ffffffc000609800>] start_kernel+0x368/0x37c
---[ end trace c17313e162496e65 ]---
------------[ cut here ]------------
WARNING: CPU: 0 PID: 0 at kernel/rcu/tree.c:541 rcu_eqs_exit+0xb0/0xbc()
Modules linked in:
CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W     3.15.0-rc8+ #5
Call trace:
[<ffffffc000088048>] dump_backtrace+0x0/0x130
[<ffffffc000088188>] show_stack+0x10/0x1c
[<ffffffc0004891a0>] dump_stack+0x74/0xbc
[<ffffffc0000a45e0>] warn_slowpath_common+0x8c/0xb4
[<ffffffc0000a46cc>] warn_slowpath_null+0x14/0x20
[<ffffffc0000ed384>] rcu_eqs_exit+0xac/0xbc
[<ffffffc0000efdac>] rcu_user_exit+0xc/0x18
[<ffffffc00011d48c>] context_tracking_user_exit+0xc4/0xd4
[<ffffffc000083d98>] el1_irq+0x58/0xd4
[<ffffffc0000dd318>] cpu_startup_entry+0x11c/0x184
[<ffffffc0004865ec>] rest_init+0x7c/0x88
[<ffffffc000609800>] start_kernel+0x368/0x37c
---[ end trace c17313e162496e66 ]---


Can you take a look please? I had to fix up some conflicts to apply your
patches against our for-next branch, so I've put a branch here for you to
look at:

  git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git aarch64/context-tracking

Cheers,

Will
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Kevin Hilman June 3, 2014, 5:34 p.m. | #3
Will Deacon <will.deacon@arm.com> writes:

> Hi guys,
>
> On Fri, May 30, 2014 at 08:08:38PM +0100, Kevin Hilman wrote:
>> Will Deacon <will.deacon@arm.com> writes:
>> > I'd like to give these some stress testing before it gets merged, so I'm
>> > not sure if it'll make it for 3.16 given where we are at the moment.
>> 
>> FWIW, this feature is disabled by default.  I use the following kconfig
>> fragment to enable the various parts I use for testing:
>> 
>> CONFIG_NO_HZ=y
>> CONFIG_NO_HZ_FULL=y
>> CONFIG_NO_HZ_FULL_ALL=y
>> CONFIG_NO_HZ_FULL_SYSIDLE=y
>> 
>> # default to power-efficient workqueues (which are then set to unbound)
>> CONFIG_WQ_POWER_EFFICIENT_DEFAULT=y
>> 
>> # lockup detector sets a 4s timer on every CPU, which wakes CPUs
>> # from idle. (alternately, can be controlled via procfs,
>> # e.g: echo 0 > /proc/sys/kernel/watchdog)
>> #CONFIG_LOCKUP_DETECTOR=n
>
> I had a go with this, but I couldn't seem to trigger any context tracking
> without forcing CONFIG_CONTEXT_TRACKING_FORCE=y. Does that mean we're
> missing something else?

No, it just means that you never hit the conditions to trigger full
NOHZ.  Using _FORCE is a good way to do that since it forces the context
tracking paths whether or not it's actually needed by full NOHZ.

> Anyway, with that forced on, I see the following during boot:
>
> ------------[ cut here ]------------
> WARNING: CPU: 0 PID: 0 at kernel/rcu/tree.c:418 rcu_eqs_enter+0x84/0xa4()
> Modules linked in:
> CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.15.0-rc8+ #5
> Call trace:
> [<ffffffc000088048>] dump_backtrace+0x0/0x130
> [<ffffffc000088188>] show_stack+0x10/0x1c
> [<ffffffc0004891a0>] dump_stack+0x74/0xbc
> [<ffffffc0000a45e0>] warn_slowpath_common+0x8c/0xb4
> [<ffffffc0000a46cc>] warn_slowpath_null+0x14/0x20
> [<ffffffc0000efc14>] rcu_eqs_enter+0x80/0xa4
> [<ffffffc0000efc58>] rcu_idle_enter+0x20/0x50
> [<ffffffc0000dd314>] cpu_startup_entry+0x118/0x184
> [<ffffffc0004865ec>] rest_init+0x7c/0x88
> [<ffffffc000609800>] start_kernel+0x368/0x37c
> ---[ end trace c17313e162496e65 ]---

So this suggests that we've told RCU that we've entered userspace twice,
without having left (the context tracker is an extention of the RCU
extended quiscent state machinery.)

So after I was able to reproduce this (after some IRC discussion with
Will, and using full ubuntu rootfs and CONFIG_CONTEXT_TRACKING_FORCE=y)
I think I found the bug.

Basically, the problem is that we have a ct_user_exit in el1_irq
(interrupt in kernel space) when it should be in el0_irq (interrupt in
user space.)  

Moving the ct_user_exit into el0_irq, I'm not able to see the problem.  

Larry, could you sanity check that and respin a v8 with that change if
it works for you?

Thanks,

Kevin
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e759af5..ef18ae5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -55,6 +55,7 @@  config ARM64
 	select RTC_LIB
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
+	select HAVE_CONTEXT_TRACKING
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 720e70b..8363f34 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -100,6 +100,7 @@  static inline struct thread_info *current_thread_info(void)
 #define TIF_SIGPENDING		0
 #define TIF_NEED_RESCHED	1
 #define TIF_NOTIFY_RESUME	2	/* callback before returning to user */
+#define TIF_NOHZ                7
 #define TIF_SYSCALL_TRACE	8
 #define TIF_POLLING_NRFLAG	16
 #define TIF_MEMDIE		18	/* is terminating due to OOM killer */
@@ -113,9 +114,12 @@  static inline struct thread_info *current_thread_info(void)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_32BIT		(1 << TIF_32BIT)
+#define _TIF_SYSCALL_TRACE      (1 << TIF_SYSCALL_TRACE)
+#define _TIF_NOHZ               (1 << TIF_NOHZ)
 
 #define _TIF_WORK_MASK		(_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
 				 _TIF_NOTIFY_RESUME)
+#define _TIF_SYSCALL_WORK       (_TIF_SYSCALL_TRACE | _TIF_NOHZ)
 
 #endif /* __KERNEL__ */
 #endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index b0101b9..3c484e2 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -30,6 +30,32 @@ 
 #include <asm/unistd32.h>
 
 /*
+ * Context tracking subsystem.  Used to instrument transitions
+ * between user and kernel mode.
+ */
+	.macro ct_user_exit, restore = 0
+#ifdef CONFIG_CONTEXT_TRACKING
+	bl	context_tracking_user_exit
+	.if \restore == 1
+	/*
+	 * Save/restore needed during syscalls.  Restore syscall arguments from
+	 * the values already saved on stack during kernel_entry.
+	 */
+	ldp	x0, x1, [sp]
+	ldp	x2, x3, [sp, #S_X2]
+	ldp	x4, x5, [sp, #S_X4]
+	ldp	x6, x7, [sp, #S_X6]
+	.endif
+#endif
+	.endm
+
+	.macro ct_user_enter
+#ifdef CONFIG_CONTEXT_TRACKING
+	bl	context_tracking_user_enter
+#endif
+	.endm
+
+/*
  * Bad Abort numbers
  *-----------------
  */
@@ -91,6 +117,7 @@ 
 	.macro	kernel_exit, el, ret = 0
 	ldp	x21, x22, [sp, #S_PC]		// load ELR, SPSR
 	.if	\el == 0
+	ct_user_enter
 	ldr	x23, [sp, #S_SP]		// load return stack pointer
 	.endif
 	.if	\ret
@@ -318,6 +345,7 @@  el1_irq:
 	bl	trace_hardirqs_off
 #endif
 
+	ct_user_exit
 	irq_handler
 
 #ifdef CONFIG_PREEMPT
@@ -427,6 +455,7 @@  el0_da:
 	mrs	x26, far_el1
 	// enable interrupts before calling the main handler
 	enable_dbg_and_irq
+	ct_user_exit
 	bic	x0, x26, #(0xff << 56)
 	mov	x1, x25
 	mov	x2, sp
@@ -439,6 +468,7 @@  el0_ia:
 	mrs	x26, far_el1
 	// enable interrupts before calling the main handler
 	enable_dbg_and_irq
+	ct_user_exit
 	mov	x0, x26
 	orr	x1, x25, #1 << 24		// use reserved ISS bit for instruction aborts
 	mov	x2, sp
@@ -449,6 +479,7 @@  el0_fpsimd_acc:
 	 * Floating Point or Advanced SIMD access
 	 */
 	enable_dbg
+	ct_user_exit
 	mov	x0, x25
 	mov	x1, sp
 	adr	lr, ret_to_user
@@ -458,6 +489,7 @@  el0_fpsimd_exc:
 	 * Floating Point or Advanced SIMD exception
 	 */
 	enable_dbg
+	ct_user_exit
 	mov	x0, x25
 	mov	x1, sp
 	adr	lr, ret_to_user
@@ -480,6 +512,7 @@  el0_undef:
 	 */
 	// enable interrupts before calling the main handler
 	enable_dbg_and_irq
+	ct_user_exit
 	mov	x0, sp
 	adr	lr, ret_to_user
 	b	do_undefinstr
@@ -493,9 +526,11 @@  el0_dbg:
 	mov	x2, sp
 	bl	do_debug_exception
 	enable_dbg
+	ct_user_exit
 	b	ret_to_user
 el0_inv:
 	enable_dbg
+	ct_user_exit
 	mov	x0, sp
 	mov	x1, #BAD_SYNC
 	mrs	x2, esr_el1
@@ -616,9 +651,11 @@  el0_svc:
 el0_svc_naked:					// compat entry point
 	stp	x0, scno, [sp, #S_ORIG_X0]	// save the original x0 and syscall number
 	enable_dbg_and_irq
+	ct_user_exit 1
 
 	ldr	x16, [tsk, #TI_FLAGS]		// check for syscall tracing
-	tbnz	x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls?
+	and	x16, x16, #_TIF_SYSCALL_WORK    // are we tracing syscalls?
+	cbnz	x16, __sys_trace
 	adr	lr, ret_fast_syscall		// return address
 	cmp     scno, sc_nr                     // check upper syscall limit
 	b.hs	ni_sys