diff mbox

[3/8] pstore: Add persistent function tracing

Message ID 1341879046-5197-3-git-send-email-anton.vorontsov@linaro.org
State Accepted
Commit 060287b8c467bf49a594d8d669e1986c6d8d76b0
Headers show

Commit Message

Anton Vorontsov July 10, 2012, 12:10 a.m. UTC
With this support kernel can save function call chain log into a
persistent ram buffer that can be decoded and dumped after reboot
through pstore filesystem. It can be used to determine what function
was last called before a reset or panic.

We store the log in a binary format and then decode it at read time.

p.s.
Mostly the code comes from trace_persistent.c driver found in the
Android git tree, written by Colin Cross <ccross@android.com>
(according to sign-off history). I reworked the driver a little bit,
and ported it to pstore.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 fs/pstore/Kconfig      |   12 ++++++
 fs/pstore/Makefile     |    1 +
 fs/pstore/ftrace.c     |   35 +++++++++++++++
 fs/pstore/inode.c      |  111 ++++++++++++++++++++++++++++++++++++++++++++++--
 fs/pstore/internal.h   |   43 +++++++++++++++++++
 fs/pstore/platform.c   |    2 +-
 include/linux/pstore.h |    9 ++++
 7 files changed, 208 insertions(+), 5 deletions(-)
 create mode 100644 fs/pstore/ftrace.c

Comments

Steven Rostedt July 17, 2012, 7:38 p.m. UTC | #1
On Mon, 2012-07-09 at 17:10 -0700, Anton Vorontsov wrote:

> --- /dev/null
> +++ b/fs/pstore/ftrace.c
> @@ -0,0 +1,35 @@
> +/*
> + * Copyright 2012  Google, Inc.
> + *
> + * This software is licensed under the terms of the GNU General Public
> + * License version 2, as published by the Free Software Foundation, and
> + * may be copied, distributed, and modified under those terms.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/compiler.h>
> +#include <linux/irqflags.h>
> +#include <linux/percpu.h>
> +#include <linux/smp.h>
> +#include <linux/atomic.h>
> +#include <asm/barrier.h>
> +#include "internal.h"
> +
> +void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip)

BTW, you can make the entire file 'notrace' without adding annotations
by including in the Makefile:

CFLAGS_REMOVE_ftrace.o = -pg


> +{
> +	struct pstore_ftrace_record rec = {};
> +
> +	if (unlikely(oops_in_progress))
> +		return;
> +
> +	rec.ip = ip;
> +	rec.parent_ip = parent_ip;
> +	pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id());
> +	psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec,
> +			  sizeof(rec), psinfo);
> +}

BTW, can any of the called functions go into module code that can be
removed? If so, then this is not safe at all. Normal function tracing
can not be synced in a preemptible kernel.

Also, I'm starting to wonder if this should be in its own utility
(separate debugfs?) than hooking directly into ftrace. Then you don't
need to modify ftrace at all and you can do the following:

static struct ftrace_ops trace_ops {
	.func = pstore_ftrace_call;
};

then in your write to debugfs file:

	register_ftrace_function(&trace_ops);

To turn off tracing:

	unregister_ftrace_function(&trace_ops);

Note, it's safe to use if the trace_ops (or anything the callback calls)
is a module. That's because it detects the trace_ops is not kernel core
code and will place a wrapper around it that allows the function tracing
to by synced with module unload. You still need to unregister the
trace_ops before unloading the module, or you can have a crash that way.

-- Steve
Anton Vorontsov July 17, 2012, 8:01 p.m. UTC | #2
On Tue, Jul 17, 2012 at 03:38:18PM -0400, Steven Rostedt wrote:
[...]
> > +void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip)
> 
> BTW, you can make the entire file 'notrace' without adding annotations
> by including in the Makefile:
> 
> CFLAGS_REMOVE_ftrace.o = -pg

Actually it was in the first version in the patch, but then I changed
it 'notrace' for just this func. This is for the case if the file would
contain some more code which we actually may trace. Doing things fine-
grained seemed to be better than making the whole file as notrace. Plus
it is one line less. :-)

But I have no preference, so I can change it.

> > +{
> > +	struct pstore_ftrace_record rec = {};
> > +
> > +	if (unlikely(oops_in_progress))
> > +		return;
> > +
> > +	rec.ip = ip;
> > +	rec.parent_ip = parent_ip;
> > +	pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id());
> > +	psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec,
> > +			  sizeof(rec), psinfo);
> > +}
> 
> BTW, can any of the called functions go into module code that can be
> removed? If so, then this is not safe at all. Normal function tracing
> can not be synced in a preemptible kernel.

Um. Yes, psinfo->write_buf() might be in the module. Nice catch.

> Also, I'm starting to wonder if this should be in its own utility
> (separate debugfs?) than hooking directly into ftrace. Then you don't
> need to modify ftrace at all and you can do the following:
> 
> static struct ftrace_ops trace_ops {
> 	.func = pstore_ftrace_call;
> };
> 
> then in your write to debugfs file:
> 
> 	register_ftrace_function(&trace_ops);
> 
> To turn off tracing:
> 
> 	unregister_ftrace_function(&trace_ops);
> 
> Note, it's safe to use if the trace_ops (or anything the callback calls)
> is a module. That's because it detects the trace_ops is not kernel core
> code and will place a wrapper around it that allows the function tracing
> to by synced with module unload. You still need to unregister the
> trace_ops before unloading the module, or you can have a crash that way.

Hehe. Like this? http://lkml.org/lkml/2012/5/26/80 :-D

So, do you want something like this, but combinded: we don't register
another tracer, but register our own ftrace_ops? This sounds doable.

Thanks!
Steven Rostedt July 17, 2012, 9:38 p.m. UTC | #3
On Tue, 2012-07-17 at 13:01 -0700, Anton Vorontsov wrote:
> On Tue, Jul 17, 2012 at 03:38:18PM -0400, Steven Rostedt wrote:
> [...]
> > > +void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip)
> > 
> > BTW, you can make the entire file 'notrace' without adding annotations
> > by including in the Makefile:
> > 
> > CFLAGS_REMOVE_ftrace.o = -pg
> 
> Actually it was in the first version in the patch, but then I changed
> it 'notrace' for just this func. This is for the case if the file would
> contain some more code which we actually may trace. Doing things fine-
> grained seemed to be better than making the whole file as notrace. Plus
> it is one line less. :-)
> 
> But I have no preference, so I can change it.

I have no preference, it was just an FYI. For just a single function in
a file, it really makes no difference which you use. The Makefile trick
is better for needing to make sure an entire file is not traced.


> 
> > > +{
> > > +	struct pstore_ftrace_record rec = {};
> > > +
> > > +	if (unlikely(oops_in_progress))
> > > +		return;
> > > +
> > > +	rec.ip = ip;
> > > +	rec.parent_ip = parent_ip;
> > > +	pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id());
> > > +	psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec,
> > > +			  sizeof(rec), psinfo);
> > > +}
> > 
> > BTW, can any of the called functions go into module code that can be
> > removed? If so, then this is not safe at all. Normal function tracing
> > can not be synced in a preemptible kernel.
> 
> Um. Yes, psinfo->write_buf() might be in the module. Nice catch.
> 
> > Also, I'm starting to wonder if this should be in its own utility
> > (separate debugfs?) than hooking directly into ftrace. Then you don't
> > need to modify ftrace at all and you can do the following:
> > 
> > static struct ftrace_ops trace_ops {
> > 	.func = pstore_ftrace_call;
> > };
> > 
> > then in your write to debugfs file:
> > 
> > 	register_ftrace_function(&trace_ops);
> > 
> > To turn off tracing:
> > 
> > 	unregister_ftrace_function(&trace_ops);
> > 
> > Note, it's safe to use if the trace_ops (or anything the callback calls)
> > is a module. That's because it detects the trace_ops is not kernel core
> > code and will place a wrapper around it that allows the function tracing
> > to by synced with module unload. You still need to unregister the
> > trace_ops before unloading the module, or you can have a crash that way.
> 
> Hehe. Like this? http://lkml.org/lkml/2012/5/26/80 :-D
> 
> So, do you want something like this, but combinded: we don't register
> another tracer, but register our own ftrace_ops? This sounds doable.

Yeah, the combined. That is, don't make a tracer out of it. Use another
mechanism to enable the tracing and not as a tracer
in /debug/tracing/available_tracers. We can probably set up events for
you too, but at a later time.

-- Steve
Anton Vorontsov July 17, 2012, 10:06 p.m. UTC | #4
On Tue, Jul 17, 2012 at 05:38:22PM -0400, Steven Rostedt wrote:
[...]
> > > BTW, can any of the called functions go into module code that can be
> > > removed? If so, then this is not safe at all. Normal function tracing
> > > can not be synced in a preemptible kernel.
> > 
> > Um. Yes, psinfo->write_buf() might be in the module. Nice catch.

Oh, actually, while write_buf() can technically be in a removable module,
the RAM backend can't be removed once loaded. So it is not a real issue.
But I'll implement your idea anyway, it seems a bit cleaner and safer,
plus if we ever want to make persistent_ram backend removable, we won't
need to bother w/ this particular issue.

Thanks!
diff mbox

Patch

diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index d044de6..d39bb5c 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -19,6 +19,18 @@  config PSTORE_CONSOLE
 	  When the option is enabled, pstore will log all kernel
 	  messages, even if no oops or panic happened.
 
+config PSTORE_FTRACE
+	bool "Persistent function tracer"
+	depends on PSTORE
+	depends on FUNCTION_TRACER
+	help
+	  With this option kernel traces function calls into a persistent
+	  ram buffer that can be decoded and dumped after reboot through
+	  pstore filesystem. It can be used to determine what function
+	  was last called before a reset or panic.
+
+	  If unsure, say N.
+
 config PSTORE_RAM
 	tristate "Log panic/oops to a RAM buffer"
 	depends on PSTORE
diff --git a/fs/pstore/Makefile b/fs/pstore/Makefile
index 278a44e..4c9095c 100644
--- a/fs/pstore/Makefile
+++ b/fs/pstore/Makefile
@@ -5,6 +5,7 @@ 
 obj-y += pstore.o
 
 pstore-objs += inode.o platform.o
+obj-$(CONFIG_PSTORE_FTRACE)	+= ftrace.o
 
 ramoops-objs += ram.o ram_core.o
 obj-$(CONFIG_PSTORE_RAM)	+= ramoops.o
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
new file mode 100644
index 0000000..a130d48
--- /dev/null
+++ b/fs/pstore/ftrace.c
@@ -0,0 +1,35 @@ 
+/*
+ * Copyright 2012  Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <linux/irqflags.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
+#include <linux/atomic.h>
+#include <asm/barrier.h>
+#include "internal.h"
+
+void notrace pstore_ftrace_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct pstore_ftrace_record rec = {};
+
+	if (unlikely(oops_in_progress))
+		return;
+
+	rec.ip = ip;
+	rec.parent_ip = parent_ip;
+	pstore_ftrace_encode_cpu(&rec, raw_smp_processor_id());
+	psinfo->write_buf(PSTORE_TYPE_FTRACE, 0, NULL, 0, (void *)&rec,
+			  sizeof(rec), psinfo);
+}
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 45bff54..4ab572e 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -27,6 +27,7 @@ 
 #include <linux/list.h>
 #include <linux/string.h>
 #include <linux/mount.h>
+#include <linux/seq_file.h>
 #include <linux/ramfs.h>
 #include <linux/parser.h>
 #include <linux/sched.h>
@@ -52,18 +53,117 @@  struct pstore_private {
 	char	data[];
 };
 
+struct pstore_ftrace_seq_data {
+	const void *ptr;
+	size_t off;
+	size_t size;
+};
+
+#define REC_SIZE sizeof(struct pstore_ftrace_record)
+
+static void *pstore_ftrace_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct pstore_private *ps = s->private;
+	struct pstore_ftrace_seq_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return NULL;
+
+	data->off = ps->size % REC_SIZE;
+	data->off += *pos * REC_SIZE;
+	if (data->off + REC_SIZE > ps->size) {
+		kfree(data);
+		return NULL;
+	}
+
+	return data;
+
+}
+
+static void pstore_ftrace_seq_stop(struct seq_file *s, void *v)
+{
+	kfree(v);
+}
+
+static void *pstore_ftrace_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct pstore_private *ps = s->private;
+	struct pstore_ftrace_seq_data *data = v;
+
+	data->off += REC_SIZE;
+	if (data->off + REC_SIZE > ps->size)
+		return NULL;
+
+	(*pos)++;
+	return data;
+}
+
+static int pstore_ftrace_seq_show(struct seq_file *s, void *v)
+{
+	struct pstore_private *ps = s->private;
+	struct pstore_ftrace_seq_data *data = v;
+	struct pstore_ftrace_record *rec = (void *)(ps->data + data->off);
+
+	seq_printf(s, "%d %08lx  %08lx  %pf <- %pF\n",
+		pstore_ftrace_decode_cpu(rec), rec->ip, rec->parent_ip,
+		(void *)rec->ip, (void *)rec->parent_ip);
+
+	return 0;
+}
+
+static const struct seq_operations pstore_ftrace_seq_ops = {
+	.start	= pstore_ftrace_seq_start,
+	.next	= pstore_ftrace_seq_next,
+	.stop	= pstore_ftrace_seq_stop,
+	.show	= pstore_ftrace_seq_show,
+};
+
 static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
 						size_t count, loff_t *ppos)
 {
-	struct pstore_private *ps = file->private_data;
+	struct seq_file *sf = file->private_data;
+	struct pstore_private *ps = sf->private;
 
+	if (ps->type == PSTORE_TYPE_FTRACE)
+		return seq_read(file, userbuf, count, ppos);
 	return simple_read_from_buffer(userbuf, count, ppos, ps->data, ps->size);
 }
 
+static int pstore_file_open(struct inode *inode, struct file *file)
+{
+	struct pstore_private *ps = inode->i_private;
+	struct seq_file *sf;
+	int err;
+	const struct seq_operations *sops = NULL;
+
+	if (ps->type == PSTORE_TYPE_FTRACE)
+		sops = &pstore_ftrace_seq_ops;
+
+	err = seq_open(file, sops);
+	if (err < 0)
+		return err;
+
+	sf = file->private_data;
+	sf->private = ps;
+
+	return 0;
+}
+
+static loff_t pstore_file_llseek(struct file *file, loff_t off, int origin)
+{
+	struct seq_file *sf = file->private_data;
+
+	if (sf->op)
+		return seq_lseek(file, off, origin);
+	return default_llseek(file, off, origin);
+}
+
 static const struct file_operations pstore_file_operations = {
-	.open	= simple_open,
-	.read	= pstore_file_read,
-	.llseek	= default_llseek,
+	.open		= pstore_file_open,
+	.read		= pstore_file_read,
+	.llseek		= pstore_file_llseek,
+	.release	= seq_release,
 };
 
 /*
@@ -215,6 +315,9 @@  int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
 	case PSTORE_TYPE_CONSOLE:
 		sprintf(name, "console-%s", psname);
 		break;
+	case PSTORE_TYPE_FTRACE:
+		sprintf(name, "ftrace-%s", psname);
+		break;
 	case PSTORE_TYPE_MCE:
 		sprintf(name, "mce-%s-%lld", psname, id);
 		break;
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 3bde461..958c48d 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -1,6 +1,49 @@ 
+#ifndef __PSTORE_INTERNAL_H__
+#define __PSTORE_INTERNAL_H__
+
+#include <linux/pstore.h>
+
+#if NR_CPUS <= 2 && defined(CONFIG_ARM_THUMB)
+#define PSTORE_CPU_IN_IP 0x1
+#elif NR_CPUS <= 4 && defined(CONFIG_ARM)
+#define PSTORE_CPU_IN_IP 0x3
+#endif
+
+struct pstore_ftrace_record {
+	unsigned long ip;
+	unsigned long parent_ip;
+#ifndef PSTORE_CPU_IN_IP
+	unsigned int cpu;
+#endif
+};
+
+static inline void
+pstore_ftrace_encode_cpu(struct pstore_ftrace_record *rec, unsigned int cpu)
+{
+#ifndef PSTORE_CPU_IN_IP
+	rec->cpu = cpu;
+#else
+	rec->ip |= cpu;
+#endif
+}
+
+static inline unsigned int
+pstore_ftrace_decode_cpu(struct pstore_ftrace_record *rec)
+{
+#ifndef PSTORE_CPU_IN_IP
+	return rec->cpu;
+#else
+	return rec->ip & PSTORE_CPU_IN_IP;
+#endif
+}
+
+extern struct pstore_info *psinfo;
+
 extern void	pstore_set_kmsg_bytes(int);
 extern void	pstore_get_records(int);
 extern int	pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
 			      char *data, size_t size,
 			      struct timespec time, struct pstore_info *psi);
 extern int	pstore_is_mounted(void);
+
+#endif
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index ef5ca8a..29996e8 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -61,7 +61,7 @@  static DECLARE_WORK(pstore_work, pstore_dowork);
  * calls to pstore_register()
  */
 static DEFINE_SPINLOCK(pstore_lock);
-static struct pstore_info *psinfo;
+struct pstore_info *psinfo;
 
 static char *backend;
 
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index b107484..120443b 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -30,6 +30,7 @@  enum pstore_type_id {
 	PSTORE_TYPE_DMESG	= 0,
 	PSTORE_TYPE_MCE		= 1,
 	PSTORE_TYPE_CONSOLE	= 2,
+	PSTORE_TYPE_FTRACE	= 3,
 	PSTORE_TYPE_UNKNOWN	= 255
 };
 
@@ -57,6 +58,14 @@  struct pstore_info {
 	void		*data;
 };
 
+
+#ifdef CONFIG_PSTORE_FTRACE
+extern void pstore_ftrace_call(unsigned long ip, unsigned long parent_ip);
+#else
+static inline void pstore_ftrace_call(unsigned long ip, unsigned long parent_ip)
+{ }
+#endif
+
 #ifdef CONFIG_PSTORE
 extern int pstore_register(struct pstore_info *);
 #else