diff mbox

[RFC,v2,1/4] ftrace: allow arch-specific check_stack()

Message ID 1438674249-3447-2-git-send-email-takahiro.akashi@linaro.org
State New
Headers show

Commit Message

AKASHI Takahiro Aug. 4, 2015, 7:44 a.m. UTC
A stack frame pointer may be used in a different way depending on
cpu architecture. Thus it is not always appropriate to slurp the stack
contents, as currently done in check_stack(), in order to calcurate
a stack index (height) at a given function call. At least not on arm64.

This patch extract potentially arch-specific code from check_stack()
and puts it into a new arch_check_stack(), which is declared as weak.
So we will be able to add arch-specific and most efficient way of
stack traversing Later.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 include/linux/stacktrace.h |    4 ++
 kernel/trace/trace_stack.c |   88 ++++++++++++++++++++++++++------------------
 2 files changed, 56 insertions(+), 36 deletions(-)

Comments

AKASHI Takahiro Aug. 17, 2015, 6:07 a.m. UTC | #1
Will,

On 08/12/2015 02:03 AM, Will Deacon wrote:
> On Tue, Aug 04, 2015 at 08:44:06AM +0100, AKASHI Takahiro wrote:
>> A stack frame pointer may be used in a different way depending on
>> cpu architecture. Thus it is not always appropriate to slurp the stack
>> contents, as currently done in check_stack(), in order to calcurate
>> a stack index (height) at a given function call. At least not on arm64.
>>
>> This patch extract potentially arch-specific code from check_stack()
>> and puts it into a new arch_check_stack(), which is declared as weak.
>> So we will be able to add arch-specific and most efficient way of
>> stack traversing Later.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>
> If arm64 is the only architecture behaving differently, then I'm happy
> to reconsider the fix to unwind_frame that we merged in e306dfd06fcb
> ("ARM64: unwind: Fix PC calculation"). I'd have thought any architecture
> with a branch-and-link instruction would potentially have the same issue,
> so we could just be fixing things in the wrong place if ftrace works
> everywhere else.

I'm not the right person to answer for other architectures (and ftrace
behavior on them.) The only thing I know is that current ftrace stack tracer
works correctly only if the addresses stored and found on stack match to
the ones returned by save_stack_trace().

Anyway, the fix above is not the only reason that I want to introduce arch-specific
arch_check_stack(). Other issues to fix include
   - combined case of stack tracer and function graph tracer (common across arch's)
   - exception entries (as I'm trying to address in RFC 4/4)
   - in-accurate stack size (for each function, my current fix is not perfect though.)

Thanks,
-Takahiro AKASHI

> Will
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
diff mbox

Patch

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 0a34489..bfae605 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -10,6 +10,10 @@  struct pt_regs;
 struct stack_trace {
 	unsigned int nr_entries, max_entries;
 	unsigned long *entries;
+#ifdef CONFIG_STACK_TRACER
+	unsigned *index;
+	unsigned long *sp;
+#endif
 	int skip;	/* input argument: How many entries to skip */
 };
 
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 3d9356b..021b8c3 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -27,9 +27,10 @@  static unsigned stack_dump_index[STACK_TRACE_ENTRIES];
  * us to remove most or all of the stack size overhead
  * added by the stack tracer itself.
  */
-static struct stack_trace max_stack_trace = {
+	struct stack_trace max_stack_trace = {
 	.max_entries		= STACK_TRACE_ENTRIES - 1,
 	.entries		= &stack_dump_trace[0],
+	.index			= &stack_dump_index[0],
 };
 
 static unsigned long max_stack_size;
@@ -65,42 +66,15 @@  static inline void print_max_stack(void)
 	}
 }
 
-static inline void
-check_stack(unsigned long ip, unsigned long *stack)
+void __weak
+arch_check_stack(unsigned long ip, unsigned long *stack,
+			unsigned long *max_size, unsigned int *tracer_size)
 {
-	unsigned long this_size, flags; unsigned long *p, *top, *start;
-	static int tracer_frame;
-	int frame_size = ACCESS_ONCE(tracer_frame);
+	unsigned long *p, *top, *start;
+	unsigned long this_size;
 	int i, x;
 
-	this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
-	this_size = THREAD_SIZE - this_size;
-	/* Remove the frame of the tracer */
-	this_size -= frame_size;
-
-	if (this_size <= max_stack_size)
-		return;
-
-	/* we do not handle interrupt stacks yet */
-	if (!object_is_on_stack(stack))
-		return;
-
-	local_irq_save(flags);
-	arch_spin_lock(&max_stack_lock);
-
-	/* In case another CPU set the tracer_frame on us */
-	if (unlikely(!frame_size))
-		this_size -= tracer_frame;
-
-	/* a race could have already updated it */
-	if (this_size <= max_stack_size)
-		goto out;
-
-	max_stack_size = this_size;
-
-	max_stack_trace.nr_entries = 0;
 	max_stack_trace.skip = 3;
-
 	save_stack_trace(&max_stack_trace);
 
 	/* Skip over the overhead of the stack tracer itself */
@@ -116,6 +90,7 @@  check_stack(unsigned long ip, unsigned long *stack)
 	start = stack;
 	top = (unsigned long *)
 		(((unsigned long)start & ~(THREAD_SIZE-1)) + THREAD_SIZE);
+	this_size = *max_size;
 
 	/*
 	 * Loop through all the entries. One of the entries may
@@ -146,10 +121,10 @@  check_stack(unsigned long ip, unsigned long *stack)
 				 * out what that is, then figure it out
 				 * now.
 				 */
-				if (unlikely(!tracer_frame)) {
-					tracer_frame = (p - stack) *
+				if (unlikely(!*tracer_size)) {
+					*tracer_size = (p - stack) *
 						sizeof(unsigned long);
-					max_stack_size -= tracer_frame;
+					*max_size -= *tracer_size;
 				}
 			}
 		}
@@ -161,6 +136,47 @@  check_stack(unsigned long ip, unsigned long *stack)
 	max_stack_trace.nr_entries = x;
 	for (; x < i; x++)
 		stack_dump_trace[x] = ULONG_MAX;
+}
+
+static inline void
+check_stack(unsigned long ip, unsigned long *stack)
+{
+	unsigned long this_size, flags;
+	static int tracer_frame;
+	int frame_size = ACCESS_ONCE(tracer_frame);
+
+	this_size = ((unsigned long)stack) & (THREAD_SIZE-1);
+	this_size = THREAD_SIZE - this_size;
+	/* for safety, depending on arch_check_stack() */
+	if (this_size < frame_size)
+		return;
+
+	/* Remove the frame of the tracer */
+	this_size -= frame_size;
+
+	if (this_size <= max_stack_size)
+		return;
+
+	/* we do not handle interrupt stacks yet */
+	if (!object_is_on_stack(stack))
+		return;
+
+	local_irq_save(flags);
+	arch_spin_lock(&max_stack_lock);
+
+	/* In case another CPU set the tracer_frame on us */
+	if (unlikely(!frame_size))
+		this_size -= tracer_frame;
+
+	/* a race could have already updated it */
+	if (this_size <= max_stack_size)
+		goto out;
+
+	max_stack_size = this_size;
+
+	max_stack_trace.nr_entries = 0;
+
+	arch_check_stack(ip, stack, &max_stack_size, &tracer_frame);
 
 	if (task_stack_end_corrupted(current)) {
 		print_max_stack();