diff mbox series

[RFC,INTERNAL,v2,1/4] tty/sysrq: Make sysrq handler NMI aware

Message ID 1594386725-10346-2-git-send-email-sumit.garg@linaro.org
State Superseded
Headers show
Series Introduce NMI aware serial drivers | expand

Commit Message

Sumit Garg July 10, 2020, 1:12 p.m. UTC
With the advent of pseudo NMIs on arm64 its been possible to raise
serial device interrupt as an NMI in polling mode. With that its now
possible to do a magic sysrq in NMI context rather than in normal IRQ
context. So make sysrq handler NMI aware and allow system requests to
be marked as NMI safe which can run directly in NMI context while
categorized as not being NMI safe will be queued as irq_work for later
processing in normal interrupt context.

This feature is especially helpful in case the primary CPU is stuck in
deadlock with interrupts disabled won't honour serial device interrupt.
So with sysrq running in NMI context is helpful to debug such scenarios.

Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

---
 drivers/tty/sysrq.c       | 36 +++++++++++++++++++++++++++++++++++-
 include/linux/sysrq.h     |  1 +
 kernel/debug/debug_core.c |  1 +
 3 files changed, 37 insertions(+), 1 deletion(-)

-- 
2.7.4

Comments

Daniel Thompson July 13, 2020, 2:04 p.m. UTC | #1
On Fri, Jul 10, 2020 at 06:42:02PM +0530, Sumit Garg wrote:
> With the advent of pseudo NMIs on arm64 its been possible to raise

> serial device interrupt as an NMI in polling mode. With that its now

> possible to do a magic sysrq in NMI context rather than in normal IRQ

> context.


This is patch 1/4... it is not yet possible to do this. Use future
tense.

> So make sysrq handler NMI aware and allow system requests to

> be marked as NMI safe which can run directly in NMI context while

> categorized as not being NMI safe will be queued as irq_work for later

> processing in normal interrupt context.

> 

> This feature is especially helpful in case the primary CPU is stuck in

> deadlock with interrupts disabled won't honour serial device interrupt.

> So with sysrq running in NMI context is helpful to debug such scenarios.


Would be useful to explain the due diligence checks done to decide which
sysrq to make nmi_safe.


> Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> ---

>  drivers/tty/sysrq.c       | 36 +++++++++++++++++++++++++++++++++++-

>  include/linux/sysrq.h     |  1 +

>  kernel/debug/debug_core.c |  1 +

>  3 files changed, 37 insertions(+), 1 deletion(-)

> 

> diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c

> index 7c95afa9..97393c7 100644

> --- a/drivers/tty/sysrq.c

> +++ b/drivers/tty/sysrq.c

> @@ -50,6 +50,8 @@

>  #include <linux/syscalls.h>

>  #include <linux/of.h>

>  #include <linux/rcupdate.h>

> +#include <linux/irq_work.h>

> +#include <linux/kfifo.h>

>  

>  #include <asm/ptrace.h>

>  #include <asm/irq_regs.h>

> @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = {

>  	.help_msg	= "loglevel(0-9)",

>  	.action_msg	= "Changing Loglevel",

>  	.enable_mask	= SYSRQ_ENABLE_LOG,

> +	.nmi_safe	= true,

>  };

>  

>  #ifdef CONFIG_VT

> @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = {

>  	.help_msg	= "crash(c)",

>  	.action_msg	= "Trigger a crash",

>  	.enable_mask	= SYSRQ_ENABLE_DUMP,

> +	.nmi_safe	= true,

>  };

>  

>  static void sysrq_handle_reboot(int key)

> @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = {

>  	.help_msg	= "reboot(b)",

>  	.action_msg	= "Resetting",

>  	.enable_mask	= SYSRQ_ENABLE_BOOT,

> +	.nmi_safe	= true,

>  };

>  

>  const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;

> @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = {

>  	.handler	= sysrq_handle_showlocks,

>  	.help_msg	= "show-all-locks(d)",

>  	.action_msg	= "Show Locks Held",

> +	.nmi_safe	= true,

>  };

>  #else

>  #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)

> @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {

>  	.help_msg	= "show-registers(p)",

>  	.action_msg	= "Show Regs",

>  	.enable_mask	= SYSRQ_ENABLE_DUMP,

> +	.nmi_safe	= true,

>  };

>  

>  static void sysrq_handle_showstate(int key)

> @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = {

>  	.help_msg	= "dump-ftrace-buffer(z)",

>  	.action_msg	= "Dump ftrace buffer",

>  	.enable_mask	= SYSRQ_ENABLE_DUMP,

> +	.nmi_safe	= true,

>  };

>  #else

>  #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)

> @@ -538,6 +546,26 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)

>                  sysrq_key_table[i] = op_p;

>  }

>  

> +#define SYSRQ_NMI_FIFO_SIZE	64

> +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);

> +

> +static void sysrq_do_nmi_work(struct irq_work *work)

> +{

> +	struct sysrq_key_op *op_p;

> +	int key;

> +

> +	if (!kfifo_len(&sysrq_nmi_fifo))

> +		return;


What's the point of this check?


Daniel.


> +

> +	while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {

> +		op_p = __sysrq_get_key_op(key);

> +		if (op_p)

> +			op_p->handler(key);

> +	}

> +}

> +

> +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);

> +

>  void __handle_sysrq(int key, bool check_mask)

>  {

>  	const struct sysrq_key_op *op_p;

> @@ -568,7 +596,13 @@ void __handle_sysrq(int key, bool check_mask)

>  		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {

>  			pr_info("%s\n", op_p->action_msg);

>  			console_loglevel = orig_log_level;

> -			op_p->handler(key);

> +

> +			if (in_nmi() && !op_p->nmi_safe) {

> +				kfifo_in(&sysrq_nmi_fifo, &key, 1);

> +				irq_work_queue(&sysrq_nmi_work);

> +			} else {

> +				op_p->handler(key);

> +			}

>  		} else {

>  			pr_info("This sysrq operation is disabled.\n");

>  			console_loglevel = orig_log_level;

> diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h

> index 3a582ec..630b5b9 100644

> --- a/include/linux/sysrq.h

> +++ b/include/linux/sysrq.h

> @@ -34,6 +34,7 @@ struct sysrq_key_op {

>  	const char * const help_msg;

>  	const char * const action_msg;

>  	const int enable_mask;

> +	const bool nmi_safe;

>  };

>  

>  #ifdef CONFIG_MAGIC_SYSRQ

> diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c

> index 9e59347..2b51173 100644

> --- a/kernel/debug/debug_core.c

> +++ b/kernel/debug/debug_core.c

> @@ -943,6 +943,7 @@ static const struct sysrq_key_op sysrq_dbg_op = {

>  	.handler	= sysrq_handle_dbg,

>  	.help_msg	= "debug(g)",

>  	.action_msg	= "DEBUG",

> +	.nmi_safe	= true,

>  };

>  #endif

>  

> -- 

> 2.7.4

>
Sumit Garg July 14, 2020, 7:20 a.m. UTC | #2
On Mon, 13 Jul 2020 at 19:34, Daniel Thompson
<daniel.thompson@linaro.org> wrote:
>

> On Fri, Jul 10, 2020 at 06:42:02PM +0530, Sumit Garg wrote:

> > With the advent of pseudo NMIs on arm64 its been possible to raise

> > serial device interrupt as an NMI in polling mode. With that its now

> > possible to do a magic sysrq in NMI context rather than in normal IRQ

> > context.

>

> This is patch 1/4... it is not yet possible to do this. Use future

> tense.


Okay.

>

> > So make sysrq handler NMI aware and allow system requests to

> > be marked as NMI safe which can run directly in NMI context while

> > categorized as not being NMI safe will be queued as irq_work for later

> > processing in normal interrupt context.

> >

> > This feature is especially helpful in case the primary CPU is stuck in

> > deadlock with interrupts disabled won't honour serial device interrupt.

> > So with sysrq running in NMI context is helpful to debug such scenarios.

>

> Would be useful to explain the due diligence checks done to decide which

> sysrq to make nmi_safe.

>


Sure, will add that info as well.

>

> > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > ---

> >  drivers/tty/sysrq.c       | 36 +++++++++++++++++++++++++++++++++++-

> >  include/linux/sysrq.h     |  1 +

> >  kernel/debug/debug_core.c |  1 +

> >  3 files changed, 37 insertions(+), 1 deletion(-)

> >

> > diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c

> > index 7c95afa9..97393c7 100644

> > --- a/drivers/tty/sysrq.c

> > +++ b/drivers/tty/sysrq.c

> > @@ -50,6 +50,8 @@

> >  #include <linux/syscalls.h>

> >  #include <linux/of.h>

> >  #include <linux/rcupdate.h>

> > +#include <linux/irq_work.h>

> > +#include <linux/kfifo.h>

> >

> >  #include <asm/ptrace.h>

> >  #include <asm/irq_regs.h>

> > @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = {

> >       .help_msg       = "loglevel(0-9)",

> >       .action_msg     = "Changing Loglevel",

> >       .enable_mask    = SYSRQ_ENABLE_LOG,

> > +     .nmi_safe       = true,

> >  };

> >

> >  #ifdef CONFIG_VT

> > @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = {

> >       .help_msg       = "crash(c)",

> >       .action_msg     = "Trigger a crash",

> >       .enable_mask    = SYSRQ_ENABLE_DUMP,

> > +     .nmi_safe       = true,

> >  };

> >

> >  static void sysrq_handle_reboot(int key)

> > @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = {

> >       .help_msg       = "reboot(b)",

> >       .action_msg     = "Resetting",

> >       .enable_mask    = SYSRQ_ENABLE_BOOT,

> > +     .nmi_safe       = true,

> >  };

> >

> >  const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;

> > @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = {

> >       .handler        = sysrq_handle_showlocks,

> >       .help_msg       = "show-all-locks(d)",

> >       .action_msg     = "Show Locks Held",

> > +     .nmi_safe       = true,

> >  };

> >  #else

> >  #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)

> > @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = {

> >       .help_msg       = "show-registers(p)",

> >       .action_msg     = "Show Regs",

> >       .enable_mask    = SYSRQ_ENABLE_DUMP,

> > +     .nmi_safe       = true,

> >  };

> >

> >  static void sysrq_handle_showstate(int key)

> > @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = {

> >       .help_msg       = "dump-ftrace-buffer(z)",

> >       .action_msg     = "Dump ftrace buffer",

> >       .enable_mask    = SYSRQ_ENABLE_DUMP,

> > +     .nmi_safe       = true,

> >  };

> >  #else

> >  #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)

> > @@ -538,6 +546,26 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)

> >                  sysrq_key_table[i] = op_p;

> >  }

> >

> > +#define SYSRQ_NMI_FIFO_SIZE  64

> > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);

> > +

> > +static void sysrq_do_nmi_work(struct irq_work *work)

> > +{

> > +     struct sysrq_key_op *op_p;

> > +     int key;

> > +

> > +     if (!kfifo_len(&sysrq_nmi_fifo))

> > +             return;

>

> What's the point of this check?

>


You are correct, it's just a redundant check as the loop below will
automatically take care of empty fifo. So, I will remove it instead.

-Sumit

>

> Daniel.

>

>

> > +

> > +     while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {

> > +             op_p = __sysrq_get_key_op(key);

> > +             if (op_p)

> > +                     op_p->handler(key);

> > +     }

> > +}

> > +

> > +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);

> > +

> >  void __handle_sysrq(int key, bool check_mask)

> >  {

> >       const struct sysrq_key_op *op_p;

> > @@ -568,7 +596,13 @@ void __handle_sysrq(int key, bool check_mask)

> >               if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {

> >                       pr_info("%s\n", op_p->action_msg);

> >                       console_loglevel = orig_log_level;

> > -                     op_p->handler(key);

> > +

> > +                     if (in_nmi() && !op_p->nmi_safe) {

> > +                             kfifo_in(&sysrq_nmi_fifo, &key, 1);

> > +                             irq_work_queue(&sysrq_nmi_work);

> > +                     } else {

> > +                             op_p->handler(key);

> > +                     }

> >               } else {

> >                       pr_info("This sysrq operation is disabled.\n");

> >                       console_loglevel = orig_log_level;

> > diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h

> > index 3a582ec..630b5b9 100644

> > --- a/include/linux/sysrq.h

> > +++ b/include/linux/sysrq.h

> > @@ -34,6 +34,7 @@ struct sysrq_key_op {

> >       const char * const help_msg;

> >       const char * const action_msg;

> >       const int enable_mask;

> > +     const bool nmi_safe;

> >  };

> >

> >  #ifdef CONFIG_MAGIC_SYSRQ

> > diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c

> > index 9e59347..2b51173 100644

> > --- a/kernel/debug/debug_core.c

> > +++ b/kernel/debug/debug_core.c

> > @@ -943,6 +943,7 @@ static const struct sysrq_key_op sysrq_dbg_op = {

> >       .handler        = sysrq_handle_dbg,

> >       .help_msg       = "debug(g)",

> >       .action_msg     = "DEBUG",

> > +     .nmi_safe       = true,

> >  };

> >  #endif

> >

> > --

> > 2.7.4

> >
diff mbox series

Patch

diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 7c95afa9..97393c7 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -50,6 +50,8 @@ 
 #include <linux/syscalls.h>
 #include <linux/of.h>
 #include <linux/rcupdate.h>
+#include <linux/irq_work.h>
+#include <linux/kfifo.h>
 
 #include <asm/ptrace.h>
 #include <asm/irq_regs.h>
@@ -111,6 +113,7 @@  static const struct sysrq_key_op sysrq_loglevel_op = {
 	.help_msg	= "loglevel(0-9)",
 	.action_msg	= "Changing Loglevel",
 	.enable_mask	= SYSRQ_ENABLE_LOG,
+	.nmi_safe	= true,
 };
 
 #ifdef CONFIG_VT
@@ -157,6 +160,7 @@  static const struct sysrq_key_op sysrq_crash_op = {
 	.help_msg	= "crash(c)",
 	.action_msg	= "Trigger a crash",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
+	.nmi_safe	= true,
 };
 
 static void sysrq_handle_reboot(int key)
@@ -170,6 +174,7 @@  static const struct sysrq_key_op sysrq_reboot_op = {
 	.help_msg	= "reboot(b)",
 	.action_msg	= "Resetting",
 	.enable_mask	= SYSRQ_ENABLE_BOOT,
+	.nmi_safe	= true,
 };
 
 const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op;
@@ -217,6 +222,7 @@  static const struct sysrq_key_op sysrq_showlocks_op = {
 	.handler	= sysrq_handle_showlocks,
 	.help_msg	= "show-all-locks(d)",
 	.action_msg	= "Show Locks Held",
+	.nmi_safe	= true,
 };
 #else
 #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL)
@@ -289,6 +295,7 @@  static const struct sysrq_key_op sysrq_showregs_op = {
 	.help_msg	= "show-registers(p)",
 	.action_msg	= "Show Regs",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
+	.nmi_safe	= true,
 };
 
 static void sysrq_handle_showstate(int key)
@@ -326,6 +333,7 @@  static const struct sysrq_key_op sysrq_ftrace_dump_op = {
 	.help_msg	= "dump-ftrace-buffer(z)",
 	.action_msg	= "Dump ftrace buffer",
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
+	.nmi_safe	= true,
 };
 #else
 #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL)
@@ -538,6 +546,26 @@  static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p)
                 sysrq_key_table[i] = op_p;
 }
 
+#define SYSRQ_NMI_FIFO_SIZE	64
+static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE);
+
+static void sysrq_do_nmi_work(struct irq_work *work)
+{
+	struct sysrq_key_op *op_p;
+	int key;
+
+	if (!kfifo_len(&sysrq_nmi_fifo))
+		return;
+
+	while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) {
+		op_p = __sysrq_get_key_op(key);
+		if (op_p)
+			op_p->handler(key);
+	}
+}
+
+static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work);
+
 void __handle_sysrq(int key, bool check_mask)
 {
 	const struct sysrq_key_op *op_p;
@@ -568,7 +596,13 @@  void __handle_sysrq(int key, bool check_mask)
 		if (!check_mask || sysrq_on_mask(op_p->enable_mask)) {
 			pr_info("%s\n", op_p->action_msg);
 			console_loglevel = orig_log_level;
-			op_p->handler(key);
+
+			if (in_nmi() && !op_p->nmi_safe) {
+				kfifo_in(&sysrq_nmi_fifo, &key, 1);
+				irq_work_queue(&sysrq_nmi_work);
+			} else {
+				op_p->handler(key);
+			}
 		} else {
 			pr_info("This sysrq operation is disabled.\n");
 			console_loglevel = orig_log_level;
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 3a582ec..630b5b9 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -34,6 +34,7 @@  struct sysrq_key_op {
 	const char * const help_msg;
 	const char * const action_msg;
 	const int enable_mask;
+	const bool nmi_safe;
 };
 
 #ifdef CONFIG_MAGIC_SYSRQ
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 9e59347..2b51173 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -943,6 +943,7 @@  static const struct sysrq_key_op sysrq_dbg_op = {
 	.handler	= sysrq_handle_dbg,
 	.help_msg	= "debug(g)",
 	.action_msg	= "DEBUG",
+	.nmi_safe	= true,
 };
 #endif