diff mbox

[3.19-rc2,v15,5/8] printk: Simple implementation for NMI backtracing

Message ID 1422022952-31552-6-git-send-email-daniel.thompson@linaro.org
State New
Headers show

Commit Message

Daniel Thompson Jan. 23, 2015, 2:22 p.m. UTC
Currently there is a quite a pile of code sitting in
arch/x86/kernel/apic/hw_nmi.c to support safe all-cpu backtracing from NMI.
The code is inaccessible to backtrace implementations for other
architectures, which is a shame because they would probably like to be
safe too.

Copy this code into printk. We'll port the x86 NMI backtrace to it in a
later patch.

Incidentally, technically I think it might be safe to call
prepare_nmi_printk() from NMI, providing care were taken to honour the
return code. complete_nmi_printk() cannot be called from NMI but could
be scheduled using irq_work_queue(). However honouring the return code
means sometimes it is impossible to get the message out so I'd say using
this code in such a way should probably attract sympathy and/or derision
rather than admiration.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 arch/Kconfig           |   3 ++
 include/linux/printk.h |  22 +++++++++
 kernel/printk/printk.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 147 insertions(+)

Comments

Daniel Thompson Jan. 26, 2015, 5:21 p.m. UTC | #1
On 24/01/15 21:44, Thomas Gleixner wrote:
> On Fri, 23 Jan 2015, Daniel Thompson wrote:
>> +#ifdef CONFIG_ARCH_WANT_NMI_PRINTK
>> +extern __printf(1, 0) int nmi_vprintk(const char *fmt, va_list args);
>> +
>> +struct cpumask;
>> +extern int prepare_nmi_printk(struct cpumask *cpus);
>> +extern void complete_nmi_printk(struct cpumask *cpus);
>> +
>> +/*
>> + * Replace printk to write into the NMI seq.
>> + *
>> + * To avoid include hell this is a macro rather than an inline function
>> + * (printk_func is not declared in this header file).
>> + */
>> +#define this_cpu_begin_nmi_printk() ({ \
>> +	printk_func_t __orig = this_cpu_read(printk_func); \
>> +	this_cpu_write(printk_func, nmi_vprintk); \
>> +	__orig; \
>> +})
>> +#define this_cpu_end_nmi_printk(fn) this_cpu_write(printk_func, fn)
> 
> Why can't we just make it a proper function in printk.c and make
> DEFINE_PER_CPU(printk_func_t, printk_func) static once x86 is
> converted over, thereby getting rid of the misplaced declaration in
> percpu.h?
> 
> It's really not performance critical at all. If you do system wide
> backtraces a function call is the least of your worries.

Yes. I'll make this a proper function.

Not sure about tidying up printk_func though. I had hoped to use that to
get rid of CONFIG_KGGB_KDB ifdef's that are currently found in printk.c .


>> +#ifdef CONFIG_ARCH_WANT_NMI_PRINTK
> 
> Why can't this simply be CONFIG_PRINTK_NMI and live at the same place
> as the other printk related options?

Will do.


>> +int nmi_vprintk(const char *fmt, va_list args)
>> +{
>> +	struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
>> +	unsigned int len = seq_buf_used(&s->seq);
>> +
>> +	seq_buf_vprintf(&s->seq, fmt, args);
>> +	return seq_buf_used(&s->seq) - len;
>> +}
>> +EXPORT_SYMBOL_GPL(nmi_vprintk);
> 
> What's the point of these exports? This stuff is really not supposed
> to be used inside random modules.

Will do.


>> +/*
>> + * Check for concurrent usage and set up per_cpu seq_buf buffers that the NMIs
>> + * running on the other CPUs will write to. Provides the mask of CPUs it is
>> + * safe to write from (i.e. a copy of the online mask).
>> + */
>> +int prepare_nmi_printk(struct cpumask *cpus)
> 
> Can we please make all this proper prefixed? , i.e. printk_nmi_*

Will do.


>> +{
>> +	struct nmi_seq_buf *s;
>> +	int cpu;
>> +
>> +	if (test_and_set_bit(0, &nmi_print_flag)) {
>> +		/*
>> +		 * If something is already using the NMI print facility we
>> +		 * can't allow a second one...
>> +		 */
>> +		return -EBUSY;
> 
> So what's the point of saving and restoring the printk_func pointer at
> the call site?
> 
> void printk_nmi_begin()
> {
> 	if (__this_cpu_inc_return(nmi_printk_nest_level) == 1)
> 	      this_cpu_write(printk_func, nmi_vprintk);
> }
> 
> void printk_nmi_end()
> {
> 	if (__this_cpu_dec_return(nmi_printk_nest_level) > 0)
> 	        return;
>         this_cpu_write(printk_func, default_vprintk);

Looks good to here.


> 	if (in_nmi())
> 		irq_work_schedule();
>         else
> 		printk_nmi_complete();
> }

Not sure about using irq_work here. arch_trigger_all_cpu_backtrace is
generally called when something's gone bad meaning there's a good chance
the interrupts are masked.


> 
>> +	}
>> +
>> +	cpumask_copy(cpus, cpu_online_mask);
> 
> Why do you need external storage for this if nesting is not allowed?
> What's wrong with having a printk_nmi_mask? It's protected by the
> nmi_print_flag, so the call sites do not have to take care about
> protecting it until printk_nmi_complete() has been invoked.

It was used to tell the caller which CPUs are initialized and allowed to
trace...

On reflection though that's a rather pointless optimization. Given the
quantity of data we're about to throw on the console I can't really see
any reason not to use for_each_possible_cpu() for initialization and
leave the caller to figure out which cores to send IPIs to.


>> +	for_each_cpu(cpu, cpus) {
>> +		s = &per_cpu(nmi_print_seq, cpu);
>> +		seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
> 
> Why do you want to do this here? The buffers should be initialized
> before the first NMI can hit and the complete code should reinit them
> before the next printk_nmi_prepare() sees the nmi_print_flag cleared.

To be honest I inherited the just-in-time initialization from Steven's code.

Assuming Steven didn't have a special reason to do it like that then I'm
happy to change this.


>> +static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
>> +{
>> +	const char *buf = s->buffer + start;
>> +
>> +	printk("%.*s", (end - start) + 1, buf);
>> +}
>> +
>> +void complete_nmi_printk(struct cpumask *cpus)
>> +{
>> +	struct nmi_seq_buf *s;
>> +	int len;
>> +	int cpu;
>> +	int i;
> 
> Please condense all ints to a single line, but what's worse is the
> completely inconsistency versus scopes.
> 
> len and i are only used in the for_each loop. Either we put all of
> them at the top of the function or we do it right.

Will do.
diff mbox

Patch

diff --git a/arch/Kconfig b/arch/Kconfig
index 05d7a8a458d5..50c9412a77d0 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -309,6 +309,9 @@  config ARCH_WANT_OLD_COMPAT_IPC
 	select ARCH_WANT_COMPAT_IPC_PARSE_VERSION
 	bool
 
+config ARCH_WANT_NMI_PRINTK
+	bool
+
 config HAVE_ARCH_SECCOMP_FILTER
 	bool
 	help
diff --git a/include/linux/printk.h b/include/linux/printk.h
index c8f170324e64..188fdc2c1efd 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -219,6 +219,28 @@  static inline void show_regs_print_info(const char *log_lvl)
 }
 #endif
 
+#ifdef CONFIG_ARCH_WANT_NMI_PRINTK
+extern __printf(1, 0) int nmi_vprintk(const char *fmt, va_list args);
+
+struct cpumask;
+extern int prepare_nmi_printk(struct cpumask *cpus);
+extern void complete_nmi_printk(struct cpumask *cpus);
+
+/*
+ * Replace printk to write into the NMI seq.
+ *
+ * To avoid include hell this is a macro rather than an inline function
+ * (printk_func is not declared in this header file).
+ */
+#define this_cpu_begin_nmi_printk() ({ \
+	printk_func_t __orig = this_cpu_read(printk_func); \
+	this_cpu_write(printk_func, nmi_vprintk); \
+	__orig; \
+})
+#define this_cpu_end_nmi_printk(fn) this_cpu_write(printk_func, fn)
+
+#endif
+
 extern asmlinkage void dump_stack(void) __cold;
 
 #ifndef pr_fmt
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 02d6b6d28796..774119e27e0b 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1805,6 +1805,127 @@  asmlinkage int printk_emit(int facility, int level,
 }
 EXPORT_SYMBOL(printk_emit);
 
+#ifdef CONFIG_ARCH_WANT_NMI_PRINTK
+
+#define NMI_BUF_SIZE		4096
+
+struct nmi_seq_buf {
+	unsigned char		buffer[NMI_BUF_SIZE];
+	struct seq_buf		seq;
+};
+
+/* Safe printing in NMI context */
+static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
+
+/* "in progress" flag of NMI printing */
+static unsigned long nmi_print_flag;
+
+/*
+ * It is not safe to call printk() directly from NMI handlers.
+ * It may be fine if the NMI detected a lock up and we have no choice
+ * but to do so, but doing a NMI on all other CPUs to get a back trace
+ * can be done with a sysrq-l. We don't want that to lock up, which
+ * can happen if the NMI interrupts a printk in progress.
+ *
+ * Instead, we redirect the vprintk() to this nmi_vprintk() that writes
+ * the content into a per cpu seq_buf buffer. Then when the NMIs are
+ * all done, we can safely dump the contents of the seq_buf to a printk()
+ * from a non NMI context.
+ *
+ * This is not a generic printk() implementation and must be used with
+ * great care. In particular there is a static limit on the quantity of
+ * data that may be emitted during NMI, only one client can be active at
+ * one time (arbitrated by the return value of begin_nmi_printk() and
+ * it is required that something at task or interrupt context be scheduled
+ * to issue the output.
+ */
+int nmi_vprintk(const char *fmt, va_list args)
+{
+	struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
+	unsigned int len = seq_buf_used(&s->seq);
+
+	seq_buf_vprintf(&s->seq, fmt, args);
+	return seq_buf_used(&s->seq) - len;
+}
+EXPORT_SYMBOL_GPL(nmi_vprintk);
+
+/*
+ * Check for concurrent usage and set up per_cpu seq_buf buffers that the NMIs
+ * running on the other CPUs will write to. Provides the mask of CPUs it is
+ * safe to write from (i.e. a copy of the online mask).
+ */
+int prepare_nmi_printk(struct cpumask *cpus)
+{
+	struct nmi_seq_buf *s;
+	int cpu;
+
+	if (test_and_set_bit(0, &nmi_print_flag)) {
+		/*
+		 * If something is already using the NMI print facility we
+		 * can't allow a second one...
+		 */
+		return -EBUSY;
+	}
+
+	cpumask_copy(cpus, cpu_online_mask);
+
+	for_each_cpu(cpu, cpus) {
+		s = &per_cpu(nmi_print_seq, cpu);
+		seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(prepare_nmi_printk);
+
+static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
+{
+	const char *buf = s->buffer + start;
+
+	printk("%.*s", (end - start) + 1, buf);
+}
+
+void complete_nmi_printk(struct cpumask *cpus)
+{
+	struct nmi_seq_buf *s;
+	int len;
+	int cpu;
+	int i;
+
+	/*
+	 * Now that all the NMIs have triggered, we can dump out their
+	 * back traces safely to the console.
+	 */
+	for_each_cpu(cpu, cpus) {
+		int last_i = 0;
+
+		s = &per_cpu(nmi_print_seq, cpu);
+
+		len = seq_buf_used(&s->seq);
+		if (!len)
+			continue;
+
+		/* Print line by line. */
+		for (i = 0; i < len; i++) {
+			if (s->buffer[i] == '\n') {
+				print_seq_line(s, last_i, i);
+				last_i = i + 1;
+			}
+		}
+		/* Check if there was a partial line. */
+		if (last_i < len) {
+			print_seq_line(s, last_i, len - 1);
+			pr_cont("\n");
+		}
+	}
+
+	clear_bit(0, &nmi_print_flag);
+	smp_mb__after_atomic();
+}
+EXPORT_SYMBOL_GPL(complete_nmi_printk);
+
+#endif /* CONFIG_ARCH_WANT_NMI_PRINTK */
+
 int vprintk_default(const char *fmt, va_list args)
 {
 	int r;
@@ -1829,6 +1950,7 @@  EXPORT_SYMBOL_GPL(vprintk_default);
  */
 DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default;
 
+
 /**
  * printk - print a kernel message
  * @fmt: format string