Message ID | 1594386725-10346-2-git-send-email-sumit.garg@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | Introduce NMI aware serial drivers | expand |
On Fri, Jul 10, 2020 at 06:42:02PM +0530, Sumit Garg wrote: > With the advent of pseudo NMIs on arm64 its been possible to raise > serial device interrupt as an NMI in polling mode. With that its now > possible to do a magic sysrq in NMI context rather than in normal IRQ > context. This is patch 1/4... it is not yet possible to do this. Use future tense. > So make sysrq handler NMI aware and allow system requests to > be marked as NMI safe which can run directly in NMI context while > categorized as not being NMI safe will be queued as irq_work for later > processing in normal interrupt context. > > This feature is especially helpful in case the primary CPU is stuck in > deadlock with interrupts disabled won't honour serial device interrupt. > So with sysrq running in NMI context is helpful to debug such scenarios. Would be useful to explain the due diligence checks done to decide which sysrq to make nmi_safe. > Signed-off-by: Sumit Garg <sumit.garg@linaro.org> > --- > drivers/tty/sysrq.c | 36 +++++++++++++++++++++++++++++++++++- > include/linux/sysrq.h | 1 + > kernel/debug/debug_core.c | 1 + > 3 files changed, 37 insertions(+), 1 deletion(-) > > diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c > index 7c95afa9..97393c7 100644 > --- a/drivers/tty/sysrq.c > +++ b/drivers/tty/sysrq.c > @@ -50,6 +50,8 @@ > #include <linux/syscalls.h> > #include <linux/of.h> > #include <linux/rcupdate.h> > +#include <linux/irq_work.h> > +#include <linux/kfifo.h> > > #include <asm/ptrace.h> > #include <asm/irq_regs.h> > @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = { > .help_msg = "loglevel(0-9)", > .action_msg = "Changing Loglevel", > .enable_mask = SYSRQ_ENABLE_LOG, > + .nmi_safe = true, > }; > > #ifdef CONFIG_VT > @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = { > .help_msg = "crash(c)", > .action_msg = "Trigger a crash", > .enable_mask = SYSRQ_ENABLE_DUMP, > + .nmi_safe = true, > }; > > static void sysrq_handle_reboot(int key) > @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = { > .help_msg = "reboot(b)", > .action_msg = "Resetting", > .enable_mask = SYSRQ_ENABLE_BOOT, > + .nmi_safe = true, > }; > > const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op; > @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = { > .handler = sysrq_handle_showlocks, > .help_msg = "show-all-locks(d)", > .action_msg = "Show Locks Held", > + .nmi_safe = true, > }; > #else > #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL) > @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = { > .help_msg = "show-registers(p)", > .action_msg = "Show Regs", > .enable_mask = SYSRQ_ENABLE_DUMP, > + .nmi_safe = true, > }; > > static void sysrq_handle_showstate(int key) > @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = { > .help_msg = "dump-ftrace-buffer(z)", > .action_msg = "Dump ftrace buffer", > .enable_mask = SYSRQ_ENABLE_DUMP, > + .nmi_safe = true, > }; > #else > #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL) > @@ -538,6 +546,26 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p) > sysrq_key_table[i] = op_p; > } > > +#define SYSRQ_NMI_FIFO_SIZE 64 > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE); > + > +static void sysrq_do_nmi_work(struct irq_work *work) > +{ > + struct sysrq_key_op *op_p; > + int key; > + > + if (!kfifo_len(&sysrq_nmi_fifo)) > + return; What's the point of this check? Daniel. > + > + while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) { > + op_p = __sysrq_get_key_op(key); > + if (op_p) > + op_p->handler(key); > + } > +} > + > +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work); > + > void __handle_sysrq(int key, bool check_mask) > { > const struct sysrq_key_op *op_p; > @@ -568,7 +596,13 @@ void __handle_sysrq(int key, bool check_mask) > if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { > pr_info("%s\n", op_p->action_msg); > console_loglevel = orig_log_level; > - op_p->handler(key); > + > + if (in_nmi() && !op_p->nmi_safe) { > + kfifo_in(&sysrq_nmi_fifo, &key, 1); > + irq_work_queue(&sysrq_nmi_work); > + } else { > + op_p->handler(key); > + } > } else { > pr_info("This sysrq operation is disabled.\n"); > console_loglevel = orig_log_level; > diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h > index 3a582ec..630b5b9 100644 > --- a/include/linux/sysrq.h > +++ b/include/linux/sysrq.h > @@ -34,6 +34,7 @@ struct sysrq_key_op { > const char * const help_msg; > const char * const action_msg; > const int enable_mask; > + const bool nmi_safe; > }; > > #ifdef CONFIG_MAGIC_SYSRQ > diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c > index 9e59347..2b51173 100644 > --- a/kernel/debug/debug_core.c > +++ b/kernel/debug/debug_core.c > @@ -943,6 +943,7 @@ static const struct sysrq_key_op sysrq_dbg_op = { > .handler = sysrq_handle_dbg, > .help_msg = "debug(g)", > .action_msg = "DEBUG", > + .nmi_safe = true, > }; > #endif > > -- > 2.7.4 >
On Mon, 13 Jul 2020 at 19:34, Daniel Thompson <daniel.thompson@linaro.org> wrote: > > On Fri, Jul 10, 2020 at 06:42:02PM +0530, Sumit Garg wrote: > > With the advent of pseudo NMIs on arm64 its been possible to raise > > serial device interrupt as an NMI in polling mode. With that its now > > possible to do a magic sysrq in NMI context rather than in normal IRQ > > context. > > This is patch 1/4... it is not yet possible to do this. Use future > tense. Okay. > > > So make sysrq handler NMI aware and allow system requests to > > be marked as NMI safe which can run directly in NMI context while > > categorized as not being NMI safe will be queued as irq_work for later > > processing in normal interrupt context. > > > > This feature is especially helpful in case the primary CPU is stuck in > > deadlock with interrupts disabled won't honour serial device interrupt. > > So with sysrq running in NMI context is helpful to debug such scenarios. > > Would be useful to explain the due diligence checks done to decide which > sysrq to make nmi_safe. > Sure, will add that info as well. > > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org> > > --- > > drivers/tty/sysrq.c | 36 +++++++++++++++++++++++++++++++++++- > > include/linux/sysrq.h | 1 + > > kernel/debug/debug_core.c | 1 + > > 3 files changed, 37 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c > > index 7c95afa9..97393c7 100644 > > --- a/drivers/tty/sysrq.c > > +++ b/drivers/tty/sysrq.c > > @@ -50,6 +50,8 @@ > > #include <linux/syscalls.h> > > #include <linux/of.h> > > #include <linux/rcupdate.h> > > +#include <linux/irq_work.h> > > +#include <linux/kfifo.h> > > > > #include <asm/ptrace.h> > > #include <asm/irq_regs.h> > > @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = { > > .help_msg = "loglevel(0-9)", > > .action_msg = "Changing Loglevel", > > .enable_mask = SYSRQ_ENABLE_LOG, > > + .nmi_safe = true, > > }; > > > > #ifdef CONFIG_VT > > @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = { > > .help_msg = "crash(c)", > > .action_msg = "Trigger a crash", > > .enable_mask = SYSRQ_ENABLE_DUMP, > > + .nmi_safe = true, > > }; > > > > static void sysrq_handle_reboot(int key) > > @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = { > > .help_msg = "reboot(b)", > > .action_msg = "Resetting", > > .enable_mask = SYSRQ_ENABLE_BOOT, > > + .nmi_safe = true, > > }; > > > > const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op; > > @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = { > > .handler = sysrq_handle_showlocks, > > .help_msg = "show-all-locks(d)", > > .action_msg = "Show Locks Held", > > + .nmi_safe = true, > > }; > > #else > > #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL) > > @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = { > > .help_msg = "show-registers(p)", > > .action_msg = "Show Regs", > > .enable_mask = SYSRQ_ENABLE_DUMP, > > + .nmi_safe = true, > > }; > > > > static void sysrq_handle_showstate(int key) > > @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = { > > .help_msg = "dump-ftrace-buffer(z)", > > .action_msg = "Dump ftrace buffer", > > .enable_mask = SYSRQ_ENABLE_DUMP, > > + .nmi_safe = true, > > }; > > #else > > #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL) > > @@ -538,6 +546,26 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p) > > sysrq_key_table[i] = op_p; > > } > > > > +#define SYSRQ_NMI_FIFO_SIZE 64 > > +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE); > > + > > +static void sysrq_do_nmi_work(struct irq_work *work) > > +{ > > + struct sysrq_key_op *op_p; > > + int key; > > + > > + if (!kfifo_len(&sysrq_nmi_fifo)) > > + return; > > What's the point of this check? > You are correct, it's just a redundant check as the loop below will automatically take care of empty fifo. So, I will remove it instead. -Sumit > > Daniel. > > > > + > > + while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) { > > + op_p = __sysrq_get_key_op(key); > > + if (op_p) > > + op_p->handler(key); > > + } > > +} > > + > > +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work); > > + > > void __handle_sysrq(int key, bool check_mask) > > { > > const struct sysrq_key_op *op_p; > > @@ -568,7 +596,13 @@ void __handle_sysrq(int key, bool check_mask) > > if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { > > pr_info("%s\n", op_p->action_msg); > > console_loglevel = orig_log_level; > > - op_p->handler(key); > > + > > + if (in_nmi() && !op_p->nmi_safe) { > > + kfifo_in(&sysrq_nmi_fifo, &key, 1); > > + irq_work_queue(&sysrq_nmi_work); > > + } else { > > + op_p->handler(key); > > + } > > } else { > > pr_info("This sysrq operation is disabled.\n"); > > console_loglevel = orig_log_level; > > diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h > > index 3a582ec..630b5b9 100644 > > --- a/include/linux/sysrq.h > > +++ b/include/linux/sysrq.h > > @@ -34,6 +34,7 @@ struct sysrq_key_op { > > const char * const help_msg; > > const char * const action_msg; > > const int enable_mask; > > + const bool nmi_safe; > > }; > > > > #ifdef CONFIG_MAGIC_SYSRQ > > diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c > > index 9e59347..2b51173 100644 > > --- a/kernel/debug/debug_core.c > > +++ b/kernel/debug/debug_core.c > > @@ -943,6 +943,7 @@ static const struct sysrq_key_op sysrq_dbg_op = { > > .handler = sysrq_handle_dbg, > > .help_msg = "debug(g)", > > .action_msg = "DEBUG", > > + .nmi_safe = true, > > }; > > #endif > > > > -- > > 2.7.4 > >
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 7c95afa9..97393c7 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -50,6 +50,8 @@ #include <linux/syscalls.h> #include <linux/of.h> #include <linux/rcupdate.h> +#include <linux/irq_work.h> +#include <linux/kfifo.h> #include <asm/ptrace.h> #include <asm/irq_regs.h> @@ -111,6 +113,7 @@ static const struct sysrq_key_op sysrq_loglevel_op = { .help_msg = "loglevel(0-9)", .action_msg = "Changing Loglevel", .enable_mask = SYSRQ_ENABLE_LOG, + .nmi_safe = true, }; #ifdef CONFIG_VT @@ -157,6 +160,7 @@ static const struct sysrq_key_op sysrq_crash_op = { .help_msg = "crash(c)", .action_msg = "Trigger a crash", .enable_mask = SYSRQ_ENABLE_DUMP, + .nmi_safe = true, }; static void sysrq_handle_reboot(int key) @@ -170,6 +174,7 @@ static const struct sysrq_key_op sysrq_reboot_op = { .help_msg = "reboot(b)", .action_msg = "Resetting", .enable_mask = SYSRQ_ENABLE_BOOT, + .nmi_safe = true, }; const struct sysrq_key_op *__sysrq_reboot_op = &sysrq_reboot_op; @@ -217,6 +222,7 @@ static const struct sysrq_key_op sysrq_showlocks_op = { .handler = sysrq_handle_showlocks, .help_msg = "show-all-locks(d)", .action_msg = "Show Locks Held", + .nmi_safe = true, }; #else #define sysrq_showlocks_op (*(const struct sysrq_key_op *)NULL) @@ -289,6 +295,7 @@ static const struct sysrq_key_op sysrq_showregs_op = { .help_msg = "show-registers(p)", .action_msg = "Show Regs", .enable_mask = SYSRQ_ENABLE_DUMP, + .nmi_safe = true, }; static void sysrq_handle_showstate(int key) @@ -326,6 +333,7 @@ static const struct sysrq_key_op sysrq_ftrace_dump_op = { .help_msg = "dump-ftrace-buffer(z)", .action_msg = "Dump ftrace buffer", .enable_mask = SYSRQ_ENABLE_DUMP, + .nmi_safe = true, }; #else #define sysrq_ftrace_dump_op (*(const struct sysrq_key_op *)NULL) @@ -538,6 +546,26 @@ static void __sysrq_put_key_op(int key, const struct sysrq_key_op *op_p) sysrq_key_table[i] = op_p; } +#define SYSRQ_NMI_FIFO_SIZE 64 +static DEFINE_KFIFO(sysrq_nmi_fifo, int, SYSRQ_NMI_FIFO_SIZE); + +static void sysrq_do_nmi_work(struct irq_work *work) +{ + struct sysrq_key_op *op_p; + int key; + + if (!kfifo_len(&sysrq_nmi_fifo)) + return; + + while (kfifo_out(&sysrq_nmi_fifo, &key, 1)) { + op_p = __sysrq_get_key_op(key); + if (op_p) + op_p->handler(key); + } +} + +static DEFINE_IRQ_WORK(sysrq_nmi_work, sysrq_do_nmi_work); + void __handle_sysrq(int key, bool check_mask) { const struct sysrq_key_op *op_p; @@ -568,7 +596,13 @@ void __handle_sysrq(int key, bool check_mask) if (!check_mask || sysrq_on_mask(op_p->enable_mask)) { pr_info("%s\n", op_p->action_msg); console_loglevel = orig_log_level; - op_p->handler(key); + + if (in_nmi() && !op_p->nmi_safe) { + kfifo_in(&sysrq_nmi_fifo, &key, 1); + irq_work_queue(&sysrq_nmi_work); + } else { + op_p->handler(key); + } } else { pr_info("This sysrq operation is disabled.\n"); console_loglevel = orig_log_level; diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 3a582ec..630b5b9 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -34,6 +34,7 @@ struct sysrq_key_op { const char * const help_msg; const char * const action_msg; const int enable_mask; + const bool nmi_safe; }; #ifdef CONFIG_MAGIC_SYSRQ diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 9e59347..2b51173 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -943,6 +943,7 @@ static const struct sysrq_key_op sysrq_dbg_op = { .handler = sysrq_handle_dbg, .help_msg = "debug(g)", .action_msg = "DEBUG", + .nmi_safe = true, }; #endif
With the advent of pseudo NMIs on arm64 its been possible to raise serial device interrupt as an NMI in polling mode. With that its now possible to do a magic sysrq in NMI context rather than in normal IRQ context. So make sysrq handler NMI aware and allow system requests to be marked as NMI safe which can run directly in NMI context while categorized as not being NMI safe will be queued as irq_work for later processing in normal interrupt context. This feature is especially helpful in case the primary CPU is stuck in deadlock with interrupts disabled won't honour serial device interrupt. So with sysrq running in NMI context is helpful to debug such scenarios. Signed-off-by: Sumit Garg <sumit.garg@linaro.org> --- drivers/tty/sysrq.c | 36 +++++++++++++++++++++++++++++++++++- include/linux/sysrq.h | 1 + kernel/debug/debug_core.c | 1 + 3 files changed, 37 insertions(+), 1 deletion(-) -- 2.7.4