diff mbox series

[RFC,2/5] serial: core: Add framework to allow NMI aware serial drivers

Message ID 1595333413-30052-3-git-send-email-sumit.garg@linaro.org
State New
Headers show
Series Introduce NMI aware serial drivers | expand

Commit Message

Sumit Garg July 21, 2020, 12:10 p.m. UTC
Add NMI framework APIs in serial core which can be leveraged by serial
drivers to have NMI driven serial transfers. These APIs are kept under
CONFIG_CONSOLE_POLL as currently kgdb initializing uart in polling mode
is the only known user to enable NMI driven serial port.

The general idea is to intercept RX characters in NMI context, if those
are specific to magic sysrq then allow corresponding handler to run in
NMI context. Otherwise defer all other RX and TX operations to IRQ work
queue in order to run those in normal interrupt context.

Also, since magic sysrq entry APIs will need to be invoked from NMI
context, so make those APIs NMI safe via deferring NMI unsafe work to
IRQ work queue.

Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

---
 drivers/tty/serial/serial_core.c | 120 ++++++++++++++++++++++++++++++++++++++-
 include/linux/serial_core.h      |  67 ++++++++++++++++++++++
 2 files changed, 185 insertions(+), 2 deletions(-)

-- 
2.7.4

Comments

Doug Anderson Aug. 12, 2020, 11:59 p.m. UTC | #1
Hi,

On Tue, Jul 21, 2020 at 5:11 AM Sumit Garg <sumit.garg@linaro.org> wrote:
>

> Add NMI framework APIs in serial core which can be leveraged by serial

> drivers to have NMI driven serial transfers. These APIs are kept under

> CONFIG_CONSOLE_POLL as currently kgdb initializing uart in polling mode

> is the only known user to enable NMI driven serial port.

>

> The general idea is to intercept RX characters in NMI context, if those

> are specific to magic sysrq then allow corresponding handler to run in

> NMI context. Otherwise defer all other RX and TX operations to IRQ work

> queue in order to run those in normal interrupt context.

>

> Also, since magic sysrq entry APIs will need to be invoked from NMI

> context, so make those APIs NMI safe via deferring NMI unsafe work to

> IRQ work queue.

>

> Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> ---

>  drivers/tty/serial/serial_core.c | 120 ++++++++++++++++++++++++++++++++++++++-

>  include/linux/serial_core.h      |  67 ++++++++++++++++++++++

>  2 files changed, 185 insertions(+), 2 deletions(-)

>

> diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

> index 57840cf..6342e90 100644

> --- a/drivers/tty/serial/serial_core.c

> +++ b/drivers/tty/serial/serial_core.c

> @@ -3181,8 +3181,14 @@ static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch)

>                 return true;

>         }

>

> +#ifdef CONFIG_CONSOLE_POLL

> +       if (in_nmi())

> +               irq_work_queue(&port->nmi_state.sysrq_toggle_work);

> +       else

> +               schedule_work(&sysrq_enable_work);

> +#else

>         schedule_work(&sysrq_enable_work);

> -

> +#endif


It should be a very high bar to have #ifdefs inside functions.  I
don't think this meets it.  Instead maybe something like this
(untested and maybe slightly wrong syntax, but hopefully makes
sense?):

Outside the function:

#ifdef CONFIG_CONSOLE_POLL
#define queue_port_nmi_work(port, work_type)
irq_work_queue(&port->nmi_state.work_type)
#else
#define queue_port_nmi_work(port, work_type)
#endif

...and then:

if (IS_ENABLED(CONFIG_CONSOLE_POLL) && in_nmi())
  queue_port_nmi_work(port, sysrq_toggle_work);
else
  schedule_work(&sysrq_enable_work);

---

The whole double-hopping is really quite annoying.  I guess
schedule_work() can't be called from NMI context but can be called
from IRQ context?  So you need to first transition from NMI context to
IRQ context and then go and schedule the work?  Almost feels like we
should just fix schedule_work() to do this double-hop for you if
called from NMI context.  Seems like you could even re-use the list
pointers in the work_struct to keep the queue of people who need to be
scheduled from the next irq_work?  Worst case it seems like you could
add a schedule_work_nmi() that would do all the hoops for you.  ...but
I also know very little about NMI so maybe I'm being naive.


>         port->sysrq = 0;

>         return true;

>  }

> @@ -3273,12 +3279,122 @@ int uart_handle_break(struct uart_port *port)

>                 port->sysrq = 0;

>         }

>

> -       if (port->flags & UPF_SAK)

> +       if (port->flags & UPF_SAK) {

> +#ifdef CONFIG_CONSOLE_POLL

> +               if (in_nmi())

> +                       irq_work_queue(&port->nmi_state.sysrq_sak_work);

> +               else

> +                       do_SAK(state->port.tty);

> +#else

>                 do_SAK(state->port.tty);

> +#endif

> +       }


Similar comment as above about avoiding #ifdef in functions.  NOTE: if
you have something like schedule_work_nmi() I think you could just
modify the do_SAK() function to call it and consider do_SAK() to be
NMI safe.


>         return 0;

>  }

>  EXPORT_SYMBOL_GPL(uart_handle_break);

>

> +#ifdef CONFIG_CONSOLE_POLL

> +int uart_nmi_handle_char(struct uart_port *port, unsigned int status,

> +                        unsigned int overrun, unsigned int ch,

> +                        unsigned int flag)

> +{

> +       struct uart_nmi_rx_data rx_data;

> +

> +       if (!in_nmi())

> +               return 0;

> +

> +       rx_data.status = status;

> +       rx_data.overrun = overrun;

> +       rx_data.ch = ch;

> +       rx_data.flag = flag;

> +

> +       if (!kfifo_in(&port->nmi_state.rx_fifo, &rx_data, 1))

> +               ++port->icount.buf_overrun;

> +

> +       return 1;

> +}

> +EXPORT_SYMBOL_GPL(uart_nmi_handle_char);

> +

> +static void uart_nmi_rx_work(struct irq_work *rx_work)

> +{

> +       struct uart_nmi_state *nmi_state =

> +           container_of(rx_work, struct uart_nmi_state, rx_work);

> +       struct uart_port *port =

> +           container_of(nmi_state, struct uart_port, nmi_state);

> +       struct uart_nmi_rx_data rx_data;

> +

> +       /*

> +        * In polling mode, serial device is initialized much prior to

> +        * TTY port becoming active. This scenario is especially useful

> +        * from debugging perspective such that magic sysrq or debugger

> +        * entry would still be possible even when TTY port isn't

> +        * active (consider a boot hang case or if a user hasn't opened

> +        * the serial port). So we discard any other RX data apart from

> +        * magic sysrq commands in case TTY port isn't active.

> +        */

> +       if (!port->state || !tty_port_active(&port->state->port)) {

> +               kfifo_reset(&nmi_state->rx_fifo);

> +               return;

> +       }

> +

> +       spin_lock(&port->lock);

> +       while (kfifo_out(&nmi_state->rx_fifo, &rx_data, 1))

> +               uart_insert_char(port, rx_data.status, rx_data.overrun,

> +                                rx_data.ch, rx_data.flag);

> +       spin_unlock(&port->lock);

> +

> +       tty_flip_buffer_push(&port->state->port);

> +}

> +

> +static void uart_nmi_tx_work(struct irq_work *tx_work)

> +{

> +       struct uart_nmi_state *nmi_state =

> +           container_of(tx_work, struct uart_nmi_state, tx_work);

> +       struct uart_port *port =

> +           container_of(nmi_state, struct uart_port, nmi_state);

> +

> +       spin_lock(&port->lock);

> +       if (nmi_state->tx_irq_callback)

> +               nmi_state->tx_irq_callback(port);

> +       spin_unlock(&port->lock);

> +}

> +

> +static void uart_nmi_sak_work(struct irq_work *work)

> +{

> +       struct uart_nmi_state *nmi_state =

> +           container_of(work, struct uart_nmi_state, sysrq_sak_work);

> +       struct uart_port *port =

> +           container_of(nmi_state, struct uart_port, nmi_state);

> +

> +       do_SAK(port->state->port.tty);

> +}

> +

> +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL

> +static void uart_nmi_toggle_work(struct irq_work *work)

> +{

> +       schedule_work(&sysrq_enable_work);

> +}


Nit: weird that it's called "toggle" work but just wrapps "enable" work.



> +#endif

> +

> +int uart_nmi_state_init(struct uart_port *port)

> +{

> +       int ret;

> +

> +       ret = kfifo_alloc(&port->nmi_state.rx_fifo, 256, GFP_KERNEL);

> +       if (ret)

> +               return ret;

> +

> +       init_irq_work(&port->nmi_state.rx_work, uart_nmi_rx_work);

> +       init_irq_work(&port->nmi_state.tx_work, uart_nmi_tx_work);

> +       init_irq_work(&port->nmi_state.sysrq_sak_work, uart_nmi_sak_work);

> +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL

> +       init_irq_work(&port->nmi_state.sysrq_toggle_work, uart_nmi_toggle_work);

> +#endif

> +       return ret;

> +}

> +EXPORT_SYMBOL_GPL(uart_nmi_state_init);

> +#endif

> +

>  EXPORT_SYMBOL(uart_write_wakeup);

>  EXPORT_SYMBOL(uart_register_driver);

>  EXPORT_SYMBOL(uart_unregister_driver);

> diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h

> index 9fd550e..84487a9 100644

> --- a/include/linux/serial_core.h

> +++ b/include/linux/serial_core.h

> @@ -18,6 +18,8 @@

>  #include <linux/tty.h>

>  #include <linux/mutex.h>

>  #include <linux/sysrq.h>

> +#include <linux/irq_work.h>

> +#include <linux/kfifo.h>

>  #include <uapi/linux/serial_core.h>

>

>  #ifdef CONFIG_SERIAL_CORE_CONSOLE

> @@ -103,6 +105,28 @@ struct uart_icount {

>  typedef unsigned int __bitwise upf_t;

>  typedef unsigned int __bitwise upstat_t;

>

> +#ifdef CONFIG_CONSOLE_POLL

> +struct uart_nmi_rx_data {

> +       unsigned int            status;

> +       unsigned int            overrun;

> +       unsigned int            ch;

> +       unsigned int            flag;

> +};

> +

> +struct uart_nmi_state {

> +       bool                    active;

> +

> +       struct irq_work         tx_work;

> +       void                    (*tx_irq_callback)(struct uart_port *port);

> +

> +       struct irq_work         rx_work;

> +       DECLARE_KFIFO_PTR(rx_fifo, struct uart_nmi_rx_data);

> +

> +       struct irq_work         sysrq_sak_work;

> +       struct irq_work         sysrq_toggle_work;

> +};

> +#endif

> +

>  struct uart_port {

>         spinlock_t              lock;                   /* port lock */

>         unsigned long           iobase;                 /* in/out[bwl] */

> @@ -255,6 +279,9 @@ struct uart_port {

>         struct gpio_desc        *rs485_term_gpio;       /* enable RS485 bus termination */

>         struct serial_iso7816   iso7816;

>         void                    *private_data;          /* generic platform data pointer */

> +#ifdef CONFIG_CONSOLE_POLL

> +       struct uart_nmi_state   nmi_state;

> +#endif

>  };

>

>  static inline int serial_port_in(struct uart_port *up, int offset)

> @@ -475,4 +502,44 @@ extern int uart_handle_break(struct uart_port *port);

>                                          !((cflag) & CLOCAL))

>

>  int uart_get_rs485_mode(struct uart_port *port);

> +

> +/*

> + * The following are helper functions for the NMI aware serial drivers.

> + * Currently NMI support is only enabled under polling mode.

> + */

> +

> +#ifdef CONFIG_CONSOLE_POLL

> +int uart_nmi_state_init(struct uart_port *port);

> +int uart_nmi_handle_char(struct uart_port *port, unsigned int status,

> +                        unsigned int overrun, unsigned int ch,

> +                        unsigned int flag);

> +

> +static inline bool uart_nmi_active(struct uart_port *port)

> +{

> +       return port->nmi_state.active;

> +}

> +

> +static inline void uart_set_nmi_active(struct uart_port *port, bool val)

> +{

> +       port->nmi_state.active = val;

> +}

> +#else

> +static inline int uart_nmi_handle_char(struct uart_port *port,

> +                                      unsigned int status,

> +                                      unsigned int overrun,

> +                                      unsigned int ch, unsigned int flag)

> +{

> +       return 0;

> +}

> +

> +static inline bool uart_nmi_active(struct uart_port *port)

> +{

> +       return false;

> +}

> +

> +static inline void uart_set_nmi_active(struct uart_port *port, bool val)

> +{

> +}

> +#endif

> +

>  #endif /* LINUX_SERIAL_CORE_H */

> --

> 2.7.4

>
Sumit Garg Aug. 13, 2020, 2:19 p.m. UTC | #2
On Thu, 13 Aug 2020 at 05:29, Doug Anderson <dianders@chromium.org> wrote:
>

> Hi,

>

> On Tue, Jul 21, 2020 at 5:11 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> >

> > Add NMI framework APIs in serial core which can be leveraged by serial

> > drivers to have NMI driven serial transfers. These APIs are kept under

> > CONFIG_CONSOLE_POLL as currently kgdb initializing uart in polling mode

> > is the only known user to enable NMI driven serial port.

> >

> > The general idea is to intercept RX characters in NMI context, if those

> > are specific to magic sysrq then allow corresponding handler to run in

> > NMI context. Otherwise defer all other RX and TX operations to IRQ work

> > queue in order to run those in normal interrupt context.

> >

> > Also, since magic sysrq entry APIs will need to be invoked from NMI

> > context, so make those APIs NMI safe via deferring NMI unsafe work to

> > IRQ work queue.

> >

> > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > ---

> >  drivers/tty/serial/serial_core.c | 120 ++++++++++++++++++++++++++++++++++++++-

> >  include/linux/serial_core.h      |  67 ++++++++++++++++++++++

> >  2 files changed, 185 insertions(+), 2 deletions(-)

> >

> > diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

> > index 57840cf..6342e90 100644

> > --- a/drivers/tty/serial/serial_core.c

> > +++ b/drivers/tty/serial/serial_core.c

> > @@ -3181,8 +3181,14 @@ static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch)

> >                 return true;

> >         }

> >

> > +#ifdef CONFIG_CONSOLE_POLL

> > +       if (in_nmi())

> > +               irq_work_queue(&port->nmi_state.sysrq_toggle_work);

> > +       else

> > +               schedule_work(&sysrq_enable_work);

> > +#else

> >         schedule_work(&sysrq_enable_work);

> > -

> > +#endif

>

> It should be a very high bar to have #ifdefs inside functions.  I

> don't think this meets it.  Instead maybe something like this

> (untested and maybe slightly wrong syntax, but hopefully makes

> sense?):

>

> Outside the function:

>

> #ifdef CONFIG_CONSOLE_POLL

> #define queue_port_nmi_work(port, work_type)

> irq_work_queue(&port->nmi_state.work_type)

> #else

> #define queue_port_nmi_work(port, work_type)

> #endif

>

> ...and then:

>

> if (IS_ENABLED(CONFIG_CONSOLE_POLL) && in_nmi())

>   queue_port_nmi_work(port, sysrq_toggle_work);

> else

>   schedule_work(&sysrq_enable_work);

>

> ---

>

> The whole double-hopping is really quite annoying.  I guess

> schedule_work() can't be called from NMI context but can be called

> from IRQ context?  So you need to first transition from NMI context to

> IRQ context and then go and schedule the work?  Almost feels like we

> should just fix schedule_work() to do this double-hop for you if

> called from NMI context.  Seems like you could even re-use the list

> pointers in the work_struct to keep the queue of people who need to be

> scheduled from the next irq_work?  Worst case it seems like you could

> add a schedule_work_nmi() that would do all the hoops for you.  ...but

> I also know very little about NMI so maybe I'm being naive.

>


Thanks for this suggestion and yes indeed we could make
schedule_work() NMI safe and in turn get rid of all this #ifdefs. Have
a look at below changes:

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 26de0ca..1daf1b4 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -14,6 +14,7 @@
 #include <linux/atomic.h>
 #include <linux/cpumask.h>
 #include <linux/rcupdate.h>
+#include <linux/irq_work.h>

 struct workqueue_struct;

@@ -106,6 +107,7 @@ struct work_struct {
 #ifdef CONFIG_LOCKDEP
        struct lockdep_map lockdep_map;
 #endif
+       struct irq_work iw;
 };

 #define WORK_DATA_INIT()       ATOMIC_LONG_INIT((unsigned
long)WORK_STRUCT_NO_POOL)
@@ -478,6 +480,8 @@ extern void print_worker_info(const char *log_lvl,
struct task_struct *task);
 extern void show_workqueue_state(void);
 extern void wq_worker_comm(char *buf, size_t size, struct task_struct *task);

+extern void queue_work_nmi(struct irq_work *iw);
+
 /**
  * queue_work - queue work on a workqueue
  * @wq: workqueue to use
@@ -565,6 +569,11 @@ static inline bool schedule_work_on(int cpu,
struct work_struct *work)
  */
 static inline bool schedule_work(struct work_struct *work)
 {
+       if (in_nmi()) {
+               init_irq_work(&work->iw, queue_work_nmi);
+               return irq_work_queue(&work->iw);
+       }
+
        return queue_work(system_wq, work);
 }

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c41c3c1..aa22883 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1524,6 +1524,14 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL(queue_work_on);

+void queue_work_nmi(struct irq_work *iw)
+{
+       struct work_struct *work = container_of(iw, struct work_struct, iw);
+
+       queue_work(system_wq, work);
+}
+EXPORT_SYMBOL(queue_work_nmi);
+
 /**
  * workqueue_select_cpu_near - Select a CPU based on NUMA node
  * @node: NUMA node ID that we want to select a CPU from

>

> >         port->sysrq = 0;

> >         return true;

> >  }

> > @@ -3273,12 +3279,122 @@ int uart_handle_break(struct uart_port *port)

> >                 port->sysrq = 0;

> >         }

> >

> > -       if (port->flags & UPF_SAK)

> > +       if (port->flags & UPF_SAK) {

> > +#ifdef CONFIG_CONSOLE_POLL

> > +               if (in_nmi())

> > +                       irq_work_queue(&port->nmi_state.sysrq_sak_work);

> > +               else

> > +                       do_SAK(state->port.tty);

> > +#else

> >                 do_SAK(state->port.tty);

> > +#endif

> > +       }

>

> Similar comment as above about avoiding #ifdef in functions.  NOTE: if

> you have something like schedule_work_nmi() I think you could just

> modify the do_SAK() function to call it and consider do_SAK() to be

> NMI safe.

>


See above.

>

> >         return 0;

> >  }

> >  EXPORT_SYMBOL_GPL(uart_handle_break);

> >

> > +#ifdef CONFIG_CONSOLE_POLL

> > +int uart_nmi_handle_char(struct uart_port *port, unsigned int status,

> > +                        unsigned int overrun, unsigned int ch,

> > +                        unsigned int flag)

> > +{

> > +       struct uart_nmi_rx_data rx_data;

> > +

> > +       if (!in_nmi())

> > +               return 0;

> > +

> > +       rx_data.status = status;

> > +       rx_data.overrun = overrun;

> > +       rx_data.ch = ch;

> > +       rx_data.flag = flag;

> > +

> > +       if (!kfifo_in(&port->nmi_state.rx_fifo, &rx_data, 1))

> > +               ++port->icount.buf_overrun;

> > +

> > +       return 1;

> > +}

> > +EXPORT_SYMBOL_GPL(uart_nmi_handle_char);

> > +

> > +static void uart_nmi_rx_work(struct irq_work *rx_work)

> > +{

> > +       struct uart_nmi_state *nmi_state =

> > +           container_of(rx_work, struct uart_nmi_state, rx_work);

> > +       struct uart_port *port =

> > +           container_of(nmi_state, struct uart_port, nmi_state);

> > +       struct uart_nmi_rx_data rx_data;

> > +

> > +       /*

> > +        * In polling mode, serial device is initialized much prior to

> > +        * TTY port becoming active. This scenario is especially useful

> > +        * from debugging perspective such that magic sysrq or debugger

> > +        * entry would still be possible even when TTY port isn't

> > +        * active (consider a boot hang case or if a user hasn't opened

> > +        * the serial port). So we discard any other RX data apart from

> > +        * magic sysrq commands in case TTY port isn't active.

> > +        */

> > +       if (!port->state || !tty_port_active(&port->state->port)) {

> > +               kfifo_reset(&nmi_state->rx_fifo);

> > +               return;

> > +       }

> > +

> > +       spin_lock(&port->lock);

> > +       while (kfifo_out(&nmi_state->rx_fifo, &rx_data, 1))

> > +               uart_insert_char(port, rx_data.status, rx_data.overrun,

> > +                                rx_data.ch, rx_data.flag);

> > +       spin_unlock(&port->lock);

> > +

> > +       tty_flip_buffer_push(&port->state->port);

> > +}

> > +

> > +static void uart_nmi_tx_work(struct irq_work *tx_work)

> > +{

> > +       struct uart_nmi_state *nmi_state =

> > +           container_of(tx_work, struct uart_nmi_state, tx_work);

> > +       struct uart_port *port =

> > +           container_of(nmi_state, struct uart_port, nmi_state);

> > +

> > +       spin_lock(&port->lock);

> > +       if (nmi_state->tx_irq_callback)

> > +               nmi_state->tx_irq_callback(port);

> > +       spin_unlock(&port->lock);

> > +}

> > +

> > +static void uart_nmi_sak_work(struct irq_work *work)

> > +{

> > +       struct uart_nmi_state *nmi_state =

> > +           container_of(work, struct uart_nmi_state, sysrq_sak_work);

> > +       struct uart_port *port =

> > +           container_of(nmi_state, struct uart_port, nmi_state);

> > +

> > +       do_SAK(port->state->port.tty);

> > +}

> > +

> > +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL

> > +static void uart_nmi_toggle_work(struct irq_work *work)

> > +{

> > +       schedule_work(&sysrq_enable_work);

> > +}

>

> Nit: weird that it's called "toggle" work but just wrapps "enable" work.

>


Okay, but this API will no longer be needed if we make schedule_work()
NMI safe (see above).

-Sumit

>

>

> > +#endif

> > +

> > +int uart_nmi_state_init(struct uart_port *port)

> > +{

> > +       int ret;

> > +

> > +       ret = kfifo_alloc(&port->nmi_state.rx_fifo, 256, GFP_KERNEL);

> > +       if (ret)

> > +               return ret;

> > +

> > +       init_irq_work(&port->nmi_state.rx_work, uart_nmi_rx_work);

> > +       init_irq_work(&port->nmi_state.tx_work, uart_nmi_tx_work);

> > +       init_irq_work(&port->nmi_state.sysrq_sak_work, uart_nmi_sak_work);

> > +#ifdef CONFIG_MAGIC_SYSRQ_SERIAL

> > +       init_irq_work(&port->nmi_state.sysrq_toggle_work, uart_nmi_toggle_work);

> > +#endif

> > +       return ret;

> > +}

> > +EXPORT_SYMBOL_GPL(uart_nmi_state_init);

> > +#endif

> > +

> >  EXPORT_SYMBOL(uart_write_wakeup);

> >  EXPORT_SYMBOL(uart_register_driver);

> >  EXPORT_SYMBOL(uart_unregister_driver);

> > diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h

> > index 9fd550e..84487a9 100644

> > --- a/include/linux/serial_core.h

> > +++ b/include/linux/serial_core.h

> > @@ -18,6 +18,8 @@

> >  #include <linux/tty.h>

> >  #include <linux/mutex.h>

> >  #include <linux/sysrq.h>

> > +#include <linux/irq_work.h>

> > +#include <linux/kfifo.h>

> >  #include <uapi/linux/serial_core.h>

> >

> >  #ifdef CONFIG_SERIAL_CORE_CONSOLE

> > @@ -103,6 +105,28 @@ struct uart_icount {

> >  typedef unsigned int __bitwise upf_t;

> >  typedef unsigned int __bitwise upstat_t;

> >

> > +#ifdef CONFIG_CONSOLE_POLL

> > +struct uart_nmi_rx_data {

> > +       unsigned int            status;

> > +       unsigned int            overrun;

> > +       unsigned int            ch;

> > +       unsigned int            flag;

> > +};

> > +

> > +struct uart_nmi_state {

> > +       bool                    active;

> > +

> > +       struct irq_work         tx_work;

> > +       void                    (*tx_irq_callback)(struct uart_port *port);

> > +

> > +       struct irq_work         rx_work;

> > +       DECLARE_KFIFO_PTR(rx_fifo, struct uart_nmi_rx_data);

> > +

> > +       struct irq_work         sysrq_sak_work;

> > +       struct irq_work         sysrq_toggle_work;

> > +};

> > +#endif

> > +

> >  struct uart_port {

> >         spinlock_t              lock;                   /* port lock */

> >         unsigned long           iobase;                 /* in/out[bwl] */

> > @@ -255,6 +279,9 @@ struct uart_port {

> >         struct gpio_desc        *rs485_term_gpio;       /* enable RS485 bus termination */

> >         struct serial_iso7816   iso7816;

> >         void                    *private_data;          /* generic platform data pointer */

> > +#ifdef CONFIG_CONSOLE_POLL

> > +       struct uart_nmi_state   nmi_state;

> > +#endif

> >  };

> >

> >  static inline int serial_port_in(struct uart_port *up, int offset)

> > @@ -475,4 +502,44 @@ extern int uart_handle_break(struct uart_port *port);

> >                                          !((cflag) & CLOCAL))

> >

> >  int uart_get_rs485_mode(struct uart_port *port);

> > +

> > +/*

> > + * The following are helper functions for the NMI aware serial drivers.

> > + * Currently NMI support is only enabled under polling mode.

> > + */

> > +

> > +#ifdef CONFIG_CONSOLE_POLL

> > +int uart_nmi_state_init(struct uart_port *port);

> > +int uart_nmi_handle_char(struct uart_port *port, unsigned int status,

> > +                        unsigned int overrun, unsigned int ch,

> > +                        unsigned int flag);

> > +

> > +static inline bool uart_nmi_active(struct uart_port *port)

> > +{

> > +       return port->nmi_state.active;

> > +}

> > +

> > +static inline void uart_set_nmi_active(struct uart_port *port, bool val)

> > +{

> > +       port->nmi_state.active = val;

> > +}

> > +#else

> > +static inline int uart_nmi_handle_char(struct uart_port *port,

> > +                                      unsigned int status,

> > +                                      unsigned int overrun,

> > +                                      unsigned int ch, unsigned int flag)

> > +{

> > +       return 0;

> > +}

> > +

> > +static inline bool uart_nmi_active(struct uart_port *port)

> > +{

> > +       return false;

> > +}

> > +

> > +static inline void uart_set_nmi_active(struct uart_port *port, bool val)

> > +{

> > +}

> > +#endif

> > +

> >  #endif /* LINUX_SERIAL_CORE_H */

> > --

> > 2.7.4

> >
Doug Anderson Aug. 13, 2020, 2:37 p.m. UTC | #3
Hi,

On Thu, Aug 13, 2020 at 7:19 AM Sumit Garg <sumit.garg@linaro.org> wrote:
>

> On Thu, 13 Aug 2020 at 05:29, Doug Anderson <dianders@chromium.org> wrote:

> >

> > Hi,

> >

> > On Tue, Jul 21, 2020 at 5:11 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > >

> > > Add NMI framework APIs in serial core which can be leveraged by serial

> > > drivers to have NMI driven serial transfers. These APIs are kept under

> > > CONFIG_CONSOLE_POLL as currently kgdb initializing uart in polling mode

> > > is the only known user to enable NMI driven serial port.

> > >

> > > The general idea is to intercept RX characters in NMI context, if those

> > > are specific to magic sysrq then allow corresponding handler to run in

> > > NMI context. Otherwise defer all other RX and TX operations to IRQ work

> > > queue in order to run those in normal interrupt context.

> > >

> > > Also, since magic sysrq entry APIs will need to be invoked from NMI

> > > context, so make those APIs NMI safe via deferring NMI unsafe work to

> > > IRQ work queue.

> > >

> > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > ---

> > >  drivers/tty/serial/serial_core.c | 120 ++++++++++++++++++++++++++++++++++++++-

> > >  include/linux/serial_core.h      |  67 ++++++++++++++++++++++

> > >  2 files changed, 185 insertions(+), 2 deletions(-)

> > >

> > > diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

> > > index 57840cf..6342e90 100644

> > > --- a/drivers/tty/serial/serial_core.c

> > > +++ b/drivers/tty/serial/serial_core.c

> > > @@ -3181,8 +3181,14 @@ static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch)

> > >                 return true;

> > >         }

> > >

> > > +#ifdef CONFIG_CONSOLE_POLL

> > > +       if (in_nmi())

> > > +               irq_work_queue(&port->nmi_state.sysrq_toggle_work);

> > > +       else

> > > +               schedule_work(&sysrq_enable_work);

> > > +#else

> > >         schedule_work(&sysrq_enable_work);

> > > -

> > > +#endif

> >

> > It should be a very high bar to have #ifdefs inside functions.  I

> > don't think this meets it.  Instead maybe something like this

> > (untested and maybe slightly wrong syntax, but hopefully makes

> > sense?):

> >

> > Outside the function:

> >

> > #ifdef CONFIG_CONSOLE_POLL

> > #define queue_port_nmi_work(port, work_type)

> > irq_work_queue(&port->nmi_state.work_type)

> > #else

> > #define queue_port_nmi_work(port, work_type)

> > #endif

> >

> > ...and then:

> >

> > if (IS_ENABLED(CONFIG_CONSOLE_POLL) && in_nmi())

> >   queue_port_nmi_work(port, sysrq_toggle_work);

> > else

> >   schedule_work(&sysrq_enable_work);

> >

> > ---

> >

> > The whole double-hopping is really quite annoying.  I guess

> > schedule_work() can't be called from NMI context but can be called

> > from IRQ context?  So you need to first transition from NMI context to

> > IRQ context and then go and schedule the work?  Almost feels like we

> > should just fix schedule_work() to do this double-hop for you if

> > called from NMI context.  Seems like you could even re-use the list

> > pointers in the work_struct to keep the queue of people who need to be

> > scheduled from the next irq_work?  Worst case it seems like you could

> > add a schedule_work_nmi() that would do all the hoops for you.  ...but

> > I also know very little about NMI so maybe I'm being naive.

> >

>

> Thanks for this suggestion and yes indeed we could make

> schedule_work() NMI safe and in turn get rid of all this #ifdefs. Have

> a look at below changes:

>

> diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

> index 26de0ca..1daf1b4 100644

> --- a/include/linux/workqueue.h

> +++ b/include/linux/workqueue.h

> @@ -14,6 +14,7 @@

>  #include <linux/atomic.h>

>  #include <linux/cpumask.h>

>  #include <linux/rcupdate.h>

> +#include <linux/irq_work.h>

>

>  struct workqueue_struct;

>

> @@ -106,6 +107,7 @@ struct work_struct {

>  #ifdef CONFIG_LOCKDEP

>         struct lockdep_map lockdep_map;

>  #endif

> +       struct irq_work iw;


Hrm, I was thinking you could just have a single queue per CPU then
you don't need to add all this extra data to every single "struct
work_struct".  I was thinking you could use the existing list node in
the "struct work_struct" to keep track of the list of things.  ...but
maybe my idea this isn't actually valid because the linked list might
be in use if we're scheduling work that's already pending / running?

In any case, I worry that people won't be happy with the extra
overhead per "struct work_struct".  Can we reduce it at all?  It still
does feel like you could get by with a single global queue and thus
you wouldn't need to store the function pointer and flags with every
"struct work_struct", right?  So all you'd need is a single pointer
for the linked list?  I haven't actually tried implementing this,
though, so I could certainly be wrong.


-Doug
Sumit Garg Aug. 14, 2020, 11:17 a.m. UTC | #4
On Thu, 13 Aug 2020 at 20:08, Doug Anderson <dianders@chromium.org> wrote:
>

> Hi,

>

> On Thu, Aug 13, 2020 at 7:19 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> >

> > On Thu, 13 Aug 2020 at 05:29, Doug Anderson <dianders@chromium.org> wrote:

> > >

> > > Hi,

> > >

> > > On Tue, Jul 21, 2020 at 5:11 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > >

> > > > Add NMI framework APIs in serial core which can be leveraged by serial

> > > > drivers to have NMI driven serial transfers. These APIs are kept under

> > > > CONFIG_CONSOLE_POLL as currently kgdb initializing uart in polling mode

> > > > is the only known user to enable NMI driven serial port.

> > > >

> > > > The general idea is to intercept RX characters in NMI context, if those

> > > > are specific to magic sysrq then allow corresponding handler to run in

> > > > NMI context. Otherwise defer all other RX and TX operations to IRQ work

> > > > queue in order to run those in normal interrupt context.

> > > >

> > > > Also, since magic sysrq entry APIs will need to be invoked from NMI

> > > > context, so make those APIs NMI safe via deferring NMI unsafe work to

> > > > IRQ work queue.

> > > >

> > > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > > ---

> > > >  drivers/tty/serial/serial_core.c | 120 ++++++++++++++++++++++++++++++++++++++-

> > > >  include/linux/serial_core.h      |  67 ++++++++++++++++++++++

> > > >  2 files changed, 185 insertions(+), 2 deletions(-)

> > > >

> > > > diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

> > > > index 57840cf..6342e90 100644

> > > > --- a/drivers/tty/serial/serial_core.c

> > > > +++ b/drivers/tty/serial/serial_core.c

> > > > @@ -3181,8 +3181,14 @@ static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch)

> > > >                 return true;

> > > >         }

> > > >

> > > > +#ifdef CONFIG_CONSOLE_POLL

> > > > +       if (in_nmi())

> > > > +               irq_work_queue(&port->nmi_state.sysrq_toggle_work);

> > > > +       else

> > > > +               schedule_work(&sysrq_enable_work);

> > > > +#else

> > > >         schedule_work(&sysrq_enable_work);

> > > > -

> > > > +#endif

> > >

> > > It should be a very high bar to have #ifdefs inside functions.  I

> > > don't think this meets it.  Instead maybe something like this

> > > (untested and maybe slightly wrong syntax, but hopefully makes

> > > sense?):

> > >

> > > Outside the function:

> > >

> > > #ifdef CONFIG_CONSOLE_POLL

> > > #define queue_port_nmi_work(port, work_type)

> > > irq_work_queue(&port->nmi_state.work_type)

> > > #else

> > > #define queue_port_nmi_work(port, work_type)

> > > #endif

> > >

> > > ...and then:

> > >

> > > if (IS_ENABLED(CONFIG_CONSOLE_POLL) && in_nmi())

> > >   queue_port_nmi_work(port, sysrq_toggle_work);

> > > else

> > >   schedule_work(&sysrq_enable_work);

> > >

> > > ---

> > >

> > > The whole double-hopping is really quite annoying.  I guess

> > > schedule_work() can't be called from NMI context but can be called

> > > from IRQ context?  So you need to first transition from NMI context to

> > > IRQ context and then go and schedule the work?  Almost feels like we

> > > should just fix schedule_work() to do this double-hop for you if

> > > called from NMI context.  Seems like you could even re-use the list

> > > pointers in the work_struct to keep the queue of people who need to be

> > > scheduled from the next irq_work?  Worst case it seems like you could

> > > add a schedule_work_nmi() that would do all the hoops for you.  ...but

> > > I also know very little about NMI so maybe I'm being naive.

> > >

> >

> > Thanks for this suggestion and yes indeed we could make

> > schedule_work() NMI safe and in turn get rid of all this #ifdefs. Have

> > a look at below changes:

> >

> > diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

> > index 26de0ca..1daf1b4 100644

> > --- a/include/linux/workqueue.h

> > +++ b/include/linux/workqueue.h

> > @@ -14,6 +14,7 @@

> >  #include <linux/atomic.h>

> >  #include <linux/cpumask.h>

> >  #include <linux/rcupdate.h>

> > +#include <linux/irq_work.h>

> >

> >  struct workqueue_struct;

> >

> > @@ -106,6 +107,7 @@ struct work_struct {

> >  #ifdef CONFIG_LOCKDEP

> >         struct lockdep_map lockdep_map;

> >  #endif

> > +       struct irq_work iw;

>

> Hrm, I was thinking you could just have a single queue per CPU then

> you don't need to add all this extra data to every single "struct

> work_struct".  I was thinking you could use the existing list node in

> the "struct work_struct" to keep track of the list of things.  ...but

> maybe my idea this isn't actually valid because the linked list might

> be in use if we're scheduling work that's already pending / running?

>

> In any case, I worry that people won't be happy with the extra

> overhead per "struct work_struct".  Can we reduce it at all?  It still

> does feel like you could get by with a single global queue and thus

> you wouldn't need to store the function pointer and flags with every

> "struct work_struct", right?  So all you'd need is a single pointer

> for the linked list?  I haven't actually tried implementing this,

> though, so I could certainly be wrong.


Let me try to elaborate here:

Here we are dealing with 2 different layers of deferring work, one is
irq_work (NMI safe) using "struct irq_work" and other is normal
workqueue (NMI unsafe) using "struct work_struct".

So when we are in NMI context, the only option is to use irq_work to
defer work and need to pass reference to "struct irq_work". Now in
following irq_work function:

+void queue_work_nmi(struct irq_work *iw)
+{
+       struct work_struct *work = container_of(iw, struct work_struct, iw);
+
+       queue_work(system_wq, work);
+}
+EXPORT_SYMBOL(queue_work_nmi);

we can't find a reference to "struct work_struct" until there is 1:1
mapping with "struct irq_work". So we require a way to establish this
mapping and having "struct irq_work" as part of "struct work_struct"
tries to achieve that. If you have any better way to achieve this, I
can use that instead.

-Sumit

>

>

> -Doug
Daniel Thompson Aug. 14, 2020, 2:13 p.m. UTC | #5
On Fri, Aug 14, 2020 at 04:47:11PM +0530, Sumit Garg wrote:
> On Thu, 13 Aug 2020 at 20:08, Doug Anderson <dianders@chromium.org> wrote:

> >

> > Hi,

> >

> > On Thu, Aug 13, 2020 at 7:19 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > >

> > > On Thu, 13 Aug 2020 at 05:29, Doug Anderson <dianders@chromium.org> wrote:

> > > >

> > > > Hi,

> > > >

> > > > On Tue, Jul 21, 2020 at 5:11 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > > >

> > > > > Add NMI framework APIs in serial core which can be leveraged by serial

> > > > > drivers to have NMI driven serial transfers. These APIs are kept under

> > > > > CONFIG_CONSOLE_POLL as currently kgdb initializing uart in polling mode

> > > > > is the only known user to enable NMI driven serial port.

> > > > >

> > > > > The general idea is to intercept RX characters in NMI context, if those

> > > > > are specific to magic sysrq then allow corresponding handler to run in

> > > > > NMI context. Otherwise defer all other RX and TX operations to IRQ work

> > > > > queue in order to run those in normal interrupt context.

> > > > >

> > > > > Also, since magic sysrq entry APIs will need to be invoked from NMI

> > > > > context, so make those APIs NMI safe via deferring NMI unsafe work to

> > > > > IRQ work queue.

> > > > >

> > > > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > > > ---

> > > > >  drivers/tty/serial/serial_core.c | 120 ++++++++++++++++++++++++++++++++++++++-

> > > > >  include/linux/serial_core.h      |  67 ++++++++++++++++++++++

> > > > >  2 files changed, 185 insertions(+), 2 deletions(-)

> > > > >

> > > > > diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

> > > > > index 57840cf..6342e90 100644

> > > > > --- a/drivers/tty/serial/serial_core.c

> > > > > +++ b/drivers/tty/serial/serial_core.c

> > > > > @@ -3181,8 +3181,14 @@ static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch)

> > > > >                 return true;

> > > > >         }

> > > > >

> > > > > +#ifdef CONFIG_CONSOLE_POLL

> > > > > +       if (in_nmi())

> > > > > +               irq_work_queue(&port->nmi_state.sysrq_toggle_work);

> > > > > +       else

> > > > > +               schedule_work(&sysrq_enable_work);

> > > > > +#else

> > > > >         schedule_work(&sysrq_enable_work);

> > > > > -

> > > > > +#endif

> > > >

> > > > It should be a very high bar to have #ifdefs inside functions.  I

> > > > don't think this meets it.  Instead maybe something like this

> > > > (untested and maybe slightly wrong syntax, but hopefully makes

> > > > sense?):

> > > >

> > > > Outside the function:

> > > >

> > > > #ifdef CONFIG_CONSOLE_POLL

> > > > #define queue_port_nmi_work(port, work_type)

> > > > irq_work_queue(&port->nmi_state.work_type)

> > > > #else

> > > > #define queue_port_nmi_work(port, work_type)

> > > > #endif

> > > >

> > > > ...and then:

> > > >

> > > > if (IS_ENABLED(CONFIG_CONSOLE_POLL) && in_nmi())

> > > >   queue_port_nmi_work(port, sysrq_toggle_work);

> > > > else

> > > >   schedule_work(&sysrq_enable_work);

> > > >

> > > > ---

> > > >

> > > > The whole double-hopping is really quite annoying.  I guess

> > > > schedule_work() can't be called from NMI context but can be called

> > > > from IRQ context?  So you need to first transition from NMI context to

> > > > IRQ context and then go and schedule the work?  Almost feels like we

> > > > should just fix schedule_work() to do this double-hop for you if

> > > > called from NMI context.  Seems like you could even re-use the list

> > > > pointers in the work_struct to keep the queue of people who need to be

> > > > scheduled from the next irq_work?  Worst case it seems like you could

> > > > add a schedule_work_nmi() that would do all the hoops for you.  ...but

> > > > I also know very little about NMI so maybe I'm being naive.

> > > >

> > >

> > > Thanks for this suggestion and yes indeed we could make

> > > schedule_work() NMI safe and in turn get rid of all this #ifdefs. Have

> > > a look at below changes:

> > >

> > > diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

> > > index 26de0ca..1daf1b4 100644

> > > --- a/include/linux/workqueue.h

> > > +++ b/include/linux/workqueue.h

> > > @@ -14,6 +14,7 @@

> > >  #include <linux/atomic.h>

> > >  #include <linux/cpumask.h>

> > >  #include <linux/rcupdate.h>

> > > +#include <linux/irq_work.h>

> > >

> > >  struct workqueue_struct;

> > >

> > > @@ -106,6 +107,7 @@ struct work_struct {

> > >  #ifdef CONFIG_LOCKDEP

> > >         struct lockdep_map lockdep_map;

> > >  #endif

> > > +       struct irq_work iw;

> >

> > Hrm, I was thinking you could just have a single queue per CPU then

> > you don't need to add all this extra data to every single "struct

> > work_struct".  I was thinking you could use the existing list node in

> > the "struct work_struct" to keep track of the list of things.  ...but

> > maybe my idea this isn't actually valid because the linked list might

> > be in use if we're scheduling work that's already pending / running?

> >

> > In any case, I worry that people won't be happy with the extra

> > overhead per "struct work_struct".  Can we reduce it at all?  It still

> > does feel like you could get by with a single global queue and thus

> > you wouldn't need to store the function pointer and flags with every

> > "struct work_struct", right?  So all you'd need is a single pointer

> > for the linked list?  I haven't actually tried implementing this,

> > though, so I could certainly be wrong.

> 

> Let me try to elaborate here:

> 

> Here we are dealing with 2 different layers of deferring work, one is

> irq_work (NMI safe) using "struct irq_work" and other is normal

> workqueue (NMI unsafe) using "struct work_struct".

> 

> So when we are in NMI context, the only option is to use irq_work to

> defer work and need to pass reference to "struct irq_work". Now in

> following irq_work function:

> 

> +void queue_work_nmi(struct irq_work *iw)

> +{

> +       struct work_struct *work = container_of(iw, struct work_struct, iw);

> +

> +       queue_work(system_wq, work);

> +}

> +EXPORT_SYMBOL(queue_work_nmi);

> 

> we can't find a reference to "struct work_struct" until there is 1:1

> mapping with "struct irq_work". So we require a way to establish this

> mapping and having "struct irq_work" as part of "struct work_struct"

> tries to achieve that. If you have any better way to achieve this, I

> can use that instead.


Perhaps don't consider this to be "fixing schedule_work()" but providing
an NMI-safe alternative to schedule_work().

Does it look better if you create a new type to map the two structures
together. Alternatively are there enough existing use-cases to want to
extend irq_work_queue() with irq_work_schedule() or something similar?


Daniel.
Doug Anderson Aug. 14, 2020, 2:43 p.m. UTC | #6
Hi,

On Fri, Aug 14, 2020 at 4:17 AM Sumit Garg <sumit.garg@linaro.org> wrote:
>

> On Thu, 13 Aug 2020 at 20:08, Doug Anderson <dianders@chromium.org> wrote:

> >

> > Hi,

> >

> > On Thu, Aug 13, 2020 at 7:19 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > >

> > > On Thu, 13 Aug 2020 at 05:29, Doug Anderson <dianders@chromium.org> wrote:

> > > >

> > > > Hi,

> > > >

> > > > On Tue, Jul 21, 2020 at 5:11 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > > >

> > > > > Add NMI framework APIs in serial core which can be leveraged by serial

> > > > > drivers to have NMI driven serial transfers. These APIs are kept under

> > > > > CONFIG_CONSOLE_POLL as currently kgdb initializing uart in polling mode

> > > > > is the only known user to enable NMI driven serial port.

> > > > >

> > > > > The general idea is to intercept RX characters in NMI context, if those

> > > > > are specific to magic sysrq then allow corresponding handler to run in

> > > > > NMI context. Otherwise defer all other RX and TX operations to IRQ work

> > > > > queue in order to run those in normal interrupt context.

> > > > >

> > > > > Also, since magic sysrq entry APIs will need to be invoked from NMI

> > > > > context, so make those APIs NMI safe via deferring NMI unsafe work to

> > > > > IRQ work queue.

> > > > >

> > > > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > > > ---

> > > > >  drivers/tty/serial/serial_core.c | 120 ++++++++++++++++++++++++++++++++++++++-

> > > > >  include/linux/serial_core.h      |  67 ++++++++++++++++++++++

> > > > >  2 files changed, 185 insertions(+), 2 deletions(-)

> > > > >

> > > > > diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

> > > > > index 57840cf..6342e90 100644

> > > > > --- a/drivers/tty/serial/serial_core.c

> > > > > +++ b/drivers/tty/serial/serial_core.c

> > > > > @@ -3181,8 +3181,14 @@ static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch)

> > > > >                 return true;

> > > > >         }

> > > > >

> > > > > +#ifdef CONFIG_CONSOLE_POLL

> > > > > +       if (in_nmi())

> > > > > +               irq_work_queue(&port->nmi_state.sysrq_toggle_work);

> > > > > +       else

> > > > > +               schedule_work(&sysrq_enable_work);

> > > > > +#else

> > > > >         schedule_work(&sysrq_enable_work);

> > > > > -

> > > > > +#endif

> > > >

> > > > It should be a very high bar to have #ifdefs inside functions.  I

> > > > don't think this meets it.  Instead maybe something like this

> > > > (untested and maybe slightly wrong syntax, but hopefully makes

> > > > sense?):

> > > >

> > > > Outside the function:

> > > >

> > > > #ifdef CONFIG_CONSOLE_POLL

> > > > #define queue_port_nmi_work(port, work_type)

> > > > irq_work_queue(&port->nmi_state.work_type)

> > > > #else

> > > > #define queue_port_nmi_work(port, work_type)

> > > > #endif

> > > >

> > > > ...and then:

> > > >

> > > > if (IS_ENABLED(CONFIG_CONSOLE_POLL) && in_nmi())

> > > >   queue_port_nmi_work(port, sysrq_toggle_work);

> > > > else

> > > >   schedule_work(&sysrq_enable_work);

> > > >

> > > > ---

> > > >

> > > > The whole double-hopping is really quite annoying.  I guess

> > > > schedule_work() can't be called from NMI context but can be called

> > > > from IRQ context?  So you need to first transition from NMI context to

> > > > IRQ context and then go and schedule the work?  Almost feels like we

> > > > should just fix schedule_work() to do this double-hop for you if

> > > > called from NMI context.  Seems like you could even re-use the list

> > > > pointers in the work_struct to keep the queue of people who need to be

> > > > scheduled from the next irq_work?  Worst case it seems like you could

> > > > add a schedule_work_nmi() that would do all the hoops for you.  ...but

> > > > I also know very little about NMI so maybe I'm being naive.

> > > >

> > >

> > > Thanks for this suggestion and yes indeed we could make

> > > schedule_work() NMI safe and in turn get rid of all this #ifdefs. Have

> > > a look at below changes:

> > >

> > > diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

> > > index 26de0ca..1daf1b4 100644

> > > --- a/include/linux/workqueue.h

> > > +++ b/include/linux/workqueue.h

> > > @@ -14,6 +14,7 @@

> > >  #include <linux/atomic.h>

> > >  #include <linux/cpumask.h>

> > >  #include <linux/rcupdate.h>

> > > +#include <linux/irq_work.h>

> > >

> > >  struct workqueue_struct;

> > >

> > > @@ -106,6 +107,7 @@ struct work_struct {

> > >  #ifdef CONFIG_LOCKDEP

> > >         struct lockdep_map lockdep_map;

> > >  #endif

> > > +       struct irq_work iw;

> >

> > Hrm, I was thinking you could just have a single queue per CPU then

> > you don't need to add all this extra data to every single "struct

> > work_struct".  I was thinking you could use the existing list node in

> > the "struct work_struct" to keep track of the list of things.  ...but

> > maybe my idea this isn't actually valid because the linked list might

> > be in use if we're scheduling work that's already pending / running?

> >

> > In any case, I worry that people won't be happy with the extra

> > overhead per "struct work_struct".  Can we reduce it at all?  It still

> > does feel like you could get by with a single global queue and thus

> > you wouldn't need to store the function pointer and flags with every

> > "struct work_struct", right?  So all you'd need is a single pointer

> > for the linked list?  I haven't actually tried implementing this,

> > though, so I could certainly be wrong.

>

> Let me try to elaborate here:

>

> Here we are dealing with 2 different layers of deferring work, one is

> irq_work (NMI safe) using "struct irq_work" and other is normal

> workqueue (NMI unsafe) using "struct work_struct".

>

> So when we are in NMI context, the only option is to use irq_work to

> defer work and need to pass reference to "struct irq_work". Now in

> following irq_work function:

>

> +void queue_work_nmi(struct irq_work *iw)

> +{

> +       struct work_struct *work = container_of(iw, struct work_struct, iw);

> +

> +       queue_work(system_wq, work);

> +}

> +EXPORT_SYMBOL(queue_work_nmi);

>

> we can't find a reference to "struct work_struct" until there is 1:1

> mapping with "struct irq_work". So we require a way to establish this

> mapping and having "struct irq_work" as part of "struct work_struct"

> tries to achieve that. If you have any better way to achieve this, I

> can use that instead.


So I guess the two options to avoid the overhead are:

1. Create a new struct:

struct nmi_queuable_work_struct {
  struct work_struct work;
  struct irq_work iw;
};

Then the overhead is only needed for those that want this
functionality.  Those people would need to use a variant
nmi_schedule_work() which, depending on in_nmi(), would either
schedule it directly or use the extra work.

Looks like Daniel already responded and suggested this.


2. Something that duplicates the code of at least part of irq_work and
therefore saves the need to store the function pointer.  Think of it
this way: if you made a whole copy of irq_work that was hardcoded to
just call the function you wanted then you wouldn't need to store a
function pointer.  This is, of course, excessive.  I was trying to
figure out if you could do less by only copying the NMI-safe
linked-list manipulation, but this is probably impossible and not
worth it anyway.

-Doug
Sumit Garg Aug. 17, 2020, 12:27 p.m. UTC | #7
On Fri, 14 Aug 2020 at 19:43, Daniel Thompson
<daniel.thompson@linaro.org> wrote:
>

> On Fri, Aug 14, 2020 at 04:47:11PM +0530, Sumit Garg wrote:

> > On Thu, 13 Aug 2020 at 20:08, Doug Anderson <dianders@chromium.org> wrote:

> > >

> > > Hi,

> > >

> > > On Thu, Aug 13, 2020 at 7:19 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > >

> > > > On Thu, 13 Aug 2020 at 05:29, Doug Anderson <dianders@chromium.org> wrote:

> > > > >

> > > > > Hi,

> > > > >

> > > > > On Tue, Jul 21, 2020 at 5:11 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > > > >

> > > > > > Add NMI framework APIs in serial core which can be leveraged by serial

> > > > > > drivers to have NMI driven serial transfers. These APIs are kept under

> > > > > > CONFIG_CONSOLE_POLL as currently kgdb initializing uart in polling mode

> > > > > > is the only known user to enable NMI driven serial port.

> > > > > >

> > > > > > The general idea is to intercept RX characters in NMI context, if those

> > > > > > are specific to magic sysrq then allow corresponding handler to run in

> > > > > > NMI context. Otherwise defer all other RX and TX operations to IRQ work

> > > > > > queue in order to run those in normal interrupt context.

> > > > > >

> > > > > > Also, since magic sysrq entry APIs will need to be invoked from NMI

> > > > > > context, so make those APIs NMI safe via deferring NMI unsafe work to

> > > > > > IRQ work queue.

> > > > > >

> > > > > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > > > > ---

> > > > > >  drivers/tty/serial/serial_core.c | 120 ++++++++++++++++++++++++++++++++++++++-

> > > > > >  include/linux/serial_core.h      |  67 ++++++++++++++++++++++

> > > > > >  2 files changed, 185 insertions(+), 2 deletions(-)

> > > > > >

> > > > > > diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

> > > > > > index 57840cf..6342e90 100644

> > > > > > --- a/drivers/tty/serial/serial_core.c

> > > > > > +++ b/drivers/tty/serial/serial_core.c

> > > > > > @@ -3181,8 +3181,14 @@ static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch)

> > > > > >                 return true;

> > > > > >         }

> > > > > >

> > > > > > +#ifdef CONFIG_CONSOLE_POLL

> > > > > > +       if (in_nmi())

> > > > > > +               irq_work_queue(&port->nmi_state.sysrq_toggle_work);

> > > > > > +       else

> > > > > > +               schedule_work(&sysrq_enable_work);

> > > > > > +#else

> > > > > >         schedule_work(&sysrq_enable_work);

> > > > > > -

> > > > > > +#endif

> > > > >

> > > > > It should be a very high bar to have #ifdefs inside functions.  I

> > > > > don't think this meets it.  Instead maybe something like this

> > > > > (untested and maybe slightly wrong syntax, but hopefully makes

> > > > > sense?):

> > > > >

> > > > > Outside the function:

> > > > >

> > > > > #ifdef CONFIG_CONSOLE_POLL

> > > > > #define queue_port_nmi_work(port, work_type)

> > > > > irq_work_queue(&port->nmi_state.work_type)

> > > > > #else

> > > > > #define queue_port_nmi_work(port, work_type)

> > > > > #endif

> > > > >

> > > > > ...and then:

> > > > >

> > > > > if (IS_ENABLED(CONFIG_CONSOLE_POLL) && in_nmi())

> > > > >   queue_port_nmi_work(port, sysrq_toggle_work);

> > > > > else

> > > > >   schedule_work(&sysrq_enable_work);

> > > > >

> > > > > ---

> > > > >

> > > > > The whole double-hopping is really quite annoying.  I guess

> > > > > schedule_work() can't be called from NMI context but can be called

> > > > > from IRQ context?  So you need to first transition from NMI context to

> > > > > IRQ context and then go and schedule the work?  Almost feels like we

> > > > > should just fix schedule_work() to do this double-hop for you if

> > > > > called from NMI context.  Seems like you could even re-use the list

> > > > > pointers in the work_struct to keep the queue of people who need to be

> > > > > scheduled from the next irq_work?  Worst case it seems like you could

> > > > > add a schedule_work_nmi() that would do all the hoops for you.  ...but

> > > > > I also know very little about NMI so maybe I'm being naive.

> > > > >

> > > >

> > > > Thanks for this suggestion and yes indeed we could make

> > > > schedule_work() NMI safe and in turn get rid of all this #ifdefs. Have

> > > > a look at below changes:

> > > >

> > > > diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

> > > > index 26de0ca..1daf1b4 100644

> > > > --- a/include/linux/workqueue.h

> > > > +++ b/include/linux/workqueue.h

> > > > @@ -14,6 +14,7 @@

> > > >  #include <linux/atomic.h>

> > > >  #include <linux/cpumask.h>

> > > >  #include <linux/rcupdate.h>

> > > > +#include <linux/irq_work.h>

> > > >

> > > >  struct workqueue_struct;

> > > >

> > > > @@ -106,6 +107,7 @@ struct work_struct {

> > > >  #ifdef CONFIG_LOCKDEP

> > > >         struct lockdep_map lockdep_map;

> > > >  #endif

> > > > +       struct irq_work iw;

> > >

> > > Hrm, I was thinking you could just have a single queue per CPU then

> > > you don't need to add all this extra data to every single "struct

> > > work_struct".  I was thinking you could use the existing list node in

> > > the "struct work_struct" to keep track of the list of things.  ...but

> > > maybe my idea this isn't actually valid because the linked list might

> > > be in use if we're scheduling work that's already pending / running?

> > >

> > > In any case, I worry that people won't be happy with the extra

> > > overhead per "struct work_struct".  Can we reduce it at all?  It still

> > > does feel like you could get by with a single global queue and thus

> > > you wouldn't need to store the function pointer and flags with every

> > > "struct work_struct", right?  So all you'd need is a single pointer

> > > for the linked list?  I haven't actually tried implementing this,

> > > though, so I could certainly be wrong.

> >

> > Let me try to elaborate here:

> >

> > Here we are dealing with 2 different layers of deferring work, one is

> > irq_work (NMI safe) using "struct irq_work" and other is normal

> > workqueue (NMI unsafe) using "struct work_struct".

> >

> > So when we are in NMI context, the only option is to use irq_work to

> > defer work and need to pass reference to "struct irq_work". Now in

> > following irq_work function:

> >

> > +void queue_work_nmi(struct irq_work *iw)

> > +{

> > +       struct work_struct *work = container_of(iw, struct work_struct, iw);

> > +

> > +       queue_work(system_wq, work);

> > +}

> > +EXPORT_SYMBOL(queue_work_nmi);

> >

> > we can't find a reference to "struct work_struct" until there is 1:1

> > mapping with "struct irq_work". So we require a way to establish this

> > mapping and having "struct irq_work" as part of "struct work_struct"

> > tries to achieve that. If you have any better way to achieve this, I

> > can use that instead.

>

> Perhaps don't consider this to be "fixing schedule_work()" but providing

> an NMI-safe alternative to schedule_work().


Okay.

>

> Does it look better if you create a new type to map the two structures

> together. Alternatively are there enough existing use-cases to want to

> extend irq_work_queue() with irq_work_schedule() or something similar?

>


Thanks for your suggestion, irq_work_schedule() looked even better
without any overhead, see below:

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 3082378..1eade89 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -3,6 +3,7 @@
 #define _LINUX_IRQ_WORK_H

 #include <linux/smp_types.h>
+#include <linux/workqueue.h>

 /*
  * An entry can be in one of four states:
@@ -24,6 +25,11 @@ struct irq_work {
        void (*func)(struct irq_work *);
 };

+struct irq_work_schedule {
+       struct irq_work work;
+       struct work_struct *sched_work;
+};
+
 static inline
 void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
 {
 {
@@ -39,6 +45,7 @@ void init_irq_work(struct irq_work *work, void
(*func)(struct irq_work *))

 bool irq_work_queue(struct irq_work *work);
 bool irq_work_queue_on(struct irq_work *work, int cpu);
+bool irq_work_schedule(struct work_struct *sched_work);

 void irq_work_tick(void);
 void irq_work_sync(struct irq_work *work);
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index eca8396..3880316 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -24,6 +24,8 @@
 static DEFINE_PER_CPU(struct llist_head, raised_list);
 static DEFINE_PER_CPU(struct llist_head, lazy_list);

+static struct irq_work_schedule irq_work_sched;
+
 /*
  * Claim the entry so that no one else will poke at it.
  */
@@ -79,6 +81,25 @@ bool irq_work_queue(struct irq_work *work)
 }
 EXPORT_SYMBOL_GPL(irq_work_queue);

+static void irq_work_schedule_fn(struct irq_work *work)
+{
+       struct irq_work_schedule *irq_work_sched =
+               container_of(work, struct irq_work_schedule, work);
+
+       if (irq_work_sched->sched_work)
+               schedule_work(irq_work_sched->sched_work);
+}
+
+/* Schedule work via irq work queue */
+bool irq_work_schedule(struct work_struct *sched_work)
+{
+       init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);
+       irq_work_sched.sched_work = sched_work;
+
+       return irq_work_queue(&irq_work_sched.work);
+}
+EXPORT_SYMBOL_GPL(irq_work_schedule);
+
 /*
  * Enqueue the irq_work @work on @cpu unless it's already pending
  * somewhere.

-Sumit

>

> Daniel.
Sumit Garg Aug. 17, 2020, 12:29 p.m. UTC | #8
On Fri, 14 Aug 2020 at 20:14, Doug Anderson <dianders@chromium.org> wrote:
>

> Hi,

>

> On Fri, Aug 14, 2020 at 4:17 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> >

> > On Thu, 13 Aug 2020 at 20:08, Doug Anderson <dianders@chromium.org> wrote:

> > >

> > > Hi,

> > >

> > > On Thu, Aug 13, 2020 at 7:19 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > >

> > > > On Thu, 13 Aug 2020 at 05:29, Doug Anderson <dianders@chromium.org> wrote:

> > > > >

> > > > > Hi,

> > > > >

> > > > > On Tue, Jul 21, 2020 at 5:11 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > > > >

> > > > > > Add NMI framework APIs in serial core which can be leveraged by serial

> > > > > > drivers to have NMI driven serial transfers. These APIs are kept under

> > > > > > CONFIG_CONSOLE_POLL as currently kgdb initializing uart in polling mode

> > > > > > is the only known user to enable NMI driven serial port.

> > > > > >

> > > > > > The general idea is to intercept RX characters in NMI context, if those

> > > > > > are specific to magic sysrq then allow corresponding handler to run in

> > > > > > NMI context. Otherwise defer all other RX and TX operations to IRQ work

> > > > > > queue in order to run those in normal interrupt context.

> > > > > >

> > > > > > Also, since magic sysrq entry APIs will need to be invoked from NMI

> > > > > > context, so make those APIs NMI safe via deferring NMI unsafe work to

> > > > > > IRQ work queue.

> > > > > >

> > > > > > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > > > > > ---

> > > > > >  drivers/tty/serial/serial_core.c | 120 ++++++++++++++++++++++++++++++++++++++-

> > > > > >  include/linux/serial_core.h      |  67 ++++++++++++++++++++++

> > > > > >  2 files changed, 185 insertions(+), 2 deletions(-)

> > > > > >

> > > > > > diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c

> > > > > > index 57840cf..6342e90 100644

> > > > > > --- a/drivers/tty/serial/serial_core.c

> > > > > > +++ b/drivers/tty/serial/serial_core.c

> > > > > > @@ -3181,8 +3181,14 @@ static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch)

> > > > > >                 return true;

> > > > > >         }

> > > > > >

> > > > > > +#ifdef CONFIG_CONSOLE_POLL

> > > > > > +       if (in_nmi())

> > > > > > +               irq_work_queue(&port->nmi_state.sysrq_toggle_work);

> > > > > > +       else

> > > > > > +               schedule_work(&sysrq_enable_work);

> > > > > > +#else

> > > > > >         schedule_work(&sysrq_enable_work);

> > > > > > -

> > > > > > +#endif

> > > > >

> > > > > It should be a very high bar to have #ifdefs inside functions.  I

> > > > > don't think this meets it.  Instead maybe something like this

> > > > > (untested and maybe slightly wrong syntax, but hopefully makes

> > > > > sense?):

> > > > >

> > > > > Outside the function:

> > > > >

> > > > > #ifdef CONFIG_CONSOLE_POLL

> > > > > #define queue_port_nmi_work(port, work_type)

> > > > > irq_work_queue(&port->nmi_state.work_type)

> > > > > #else

> > > > > #define queue_port_nmi_work(port, work_type)

> > > > > #endif

> > > > >

> > > > > ...and then:

> > > > >

> > > > > if (IS_ENABLED(CONFIG_CONSOLE_POLL) && in_nmi())

> > > > >   queue_port_nmi_work(port, sysrq_toggle_work);

> > > > > else

> > > > >   schedule_work(&sysrq_enable_work);

> > > > >

> > > > > ---

> > > > >

> > > > > The whole double-hopping is really quite annoying.  I guess

> > > > > schedule_work() can't be called from NMI context but can be called

> > > > > from IRQ context?  So you need to first transition from NMI context to

> > > > > IRQ context and then go and schedule the work?  Almost feels like we

> > > > > should just fix schedule_work() to do this double-hop for you if

> > > > > called from NMI context.  Seems like you could even re-use the list

> > > > > pointers in the work_struct to keep the queue of people who need to be

> > > > > scheduled from the next irq_work?  Worst case it seems like you could

> > > > > add a schedule_work_nmi() that would do all the hoops for you.  ...but

> > > > > I also know very little about NMI so maybe I'm being naive.

> > > > >

> > > >

> > > > Thanks for this suggestion and yes indeed we could make

> > > > schedule_work() NMI safe and in turn get rid of all this #ifdefs. Have

> > > > a look at below changes:

> > > >

> > > > diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h

> > > > index 26de0ca..1daf1b4 100644

> > > > --- a/include/linux/workqueue.h

> > > > +++ b/include/linux/workqueue.h

> > > > @@ -14,6 +14,7 @@

> > > >  #include <linux/atomic.h>

> > > >  #include <linux/cpumask.h>

> > > >  #include <linux/rcupdate.h>

> > > > +#include <linux/irq_work.h>

> > > >

> > > >  struct workqueue_struct;

> > > >

> > > > @@ -106,6 +107,7 @@ struct work_struct {

> > > >  #ifdef CONFIG_LOCKDEP

> > > >         struct lockdep_map lockdep_map;

> > > >  #endif

> > > > +       struct irq_work iw;

> > >

> > > Hrm, I was thinking you could just have a single queue per CPU then

> > > you don't need to add all this extra data to every single "struct

> > > work_struct".  I was thinking you could use the existing list node in

> > > the "struct work_struct" to keep track of the list of things.  ...but

> > > maybe my idea this isn't actually valid because the linked list might

> > > be in use if we're scheduling work that's already pending / running?

> > >

> > > In any case, I worry that people won't be happy with the extra

> > > overhead per "struct work_struct".  Can we reduce it at all?  It still

> > > does feel like you could get by with a single global queue and thus

> > > you wouldn't need to store the function pointer and flags with every

> > > "struct work_struct", right?  So all you'd need is a single pointer

> > > for the linked list?  I haven't actually tried implementing this,

> > > though, so I could certainly be wrong.

> >

> > Let me try to elaborate here:

> >

> > Here we are dealing with 2 different layers of deferring work, one is

> > irq_work (NMI safe) using "struct irq_work" and other is normal

> > workqueue (NMI unsafe) using "struct work_struct".

> >

> > So when we are in NMI context, the only option is to use irq_work to

> > defer work and need to pass reference to "struct irq_work". Now in

> > following irq_work function:

> >

> > +void queue_work_nmi(struct irq_work *iw)

> > +{

> > +       struct work_struct *work = container_of(iw, struct work_struct, iw);

> > +

> > +       queue_work(system_wq, work);

> > +}

> > +EXPORT_SYMBOL(queue_work_nmi);

> >

> > we can't find a reference to "struct work_struct" until there is 1:1

> > mapping with "struct irq_work". So we require a way to establish this

> > mapping and having "struct irq_work" as part of "struct work_struct"

> > tries to achieve that. If you have any better way to achieve this, I

> > can use that instead.

>

> So I guess the two options to avoid the overhead are:

>

> 1. Create a new struct:

>

> struct nmi_queuable_work_struct {

>   struct work_struct work;

>   struct irq_work iw;

> };

>

> Then the overhead is only needed for those that want this

> functionality.  Those people would need to use a variant

> nmi_schedule_work() which, depending on in_nmi(), would either

> schedule it directly or use the extra work.

>

> Looks like Daniel already responded and suggested this.

>

>

> 2. Something that duplicates the code of at least part of irq_work and

> therefore saves the need to store the function pointer.  Think of it

> this way: if you made a whole copy of irq_work that was hardcoded to

> just call the function you wanted then you wouldn't need to store a

> function pointer.  This is, of course, excessive.  I was trying to

> figure out if you could do less by only copying the NMI-safe

> linked-list manipulation, but this is probably impossible and not

> worth it anyway.

>


Thanks for your suggestions. I came up with an approach without any
overhead (see my reply to Daniel).

-Sumit

> -Doug
Doug Anderson Aug. 17, 2020, 1:57 p.m. UTC | #9
Hi,

On Mon, Aug 17, 2020 at 5:27 AM Sumit Garg <sumit.garg@linaro.org> wrote:
>

> Thanks for your suggestion, irq_work_schedule() looked even better

> without any overhead, see below:

>

> diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h

> index 3082378..1eade89 100644

> --- a/include/linux/irq_work.h

> +++ b/include/linux/irq_work.h

> @@ -3,6 +3,7 @@

>  #define _LINUX_IRQ_WORK_H

>

>  #include <linux/smp_types.h>

> +#include <linux/workqueue.h>

>

>  /*

>   * An entry can be in one of four states:

> @@ -24,6 +25,11 @@ struct irq_work {

>         void (*func)(struct irq_work *);

>  };

>

> +struct irq_work_schedule {

> +       struct irq_work work;

> +       struct work_struct *sched_work;

> +};

> +

>  static inline

>  void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))

>  {

>  {

> @@ -39,6 +45,7 @@ void init_irq_work(struct irq_work *work, void

> (*func)(struct irq_work *))

>

>  bool irq_work_queue(struct irq_work *work);

>  bool irq_work_queue_on(struct irq_work *work, int cpu);

> +bool irq_work_schedule(struct work_struct *sched_work);

>

>  void irq_work_tick(void);

>  void irq_work_sync(struct irq_work *work);

> diff --git a/kernel/irq_work.c b/kernel/irq_work.c

> index eca8396..3880316 100644

> --- a/kernel/irq_work.c

> +++ b/kernel/irq_work.c

> @@ -24,6 +24,8 @@

>  static DEFINE_PER_CPU(struct llist_head, raised_list);

>  static DEFINE_PER_CPU(struct llist_head, lazy_list);

>

> +static struct irq_work_schedule irq_work_sched;

> +

>  /*

>   * Claim the entry so that no one else will poke at it.

>   */

> @@ -79,6 +81,25 @@ bool irq_work_queue(struct irq_work *work)

>  }

>  EXPORT_SYMBOL_GPL(irq_work_queue);

>

> +static void irq_work_schedule_fn(struct irq_work *work)

> +{

> +       struct irq_work_schedule *irq_work_sched =

> +               container_of(work, struct irq_work_schedule, work);

> +

> +       if (irq_work_sched->sched_work)

> +               schedule_work(irq_work_sched->sched_work);

> +}

> +

> +/* Schedule work via irq work queue */

> +bool irq_work_schedule(struct work_struct *sched_work)

> +{

> +       init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);

> +       irq_work_sched.sched_work = sched_work;

> +

> +       return irq_work_queue(&irq_work_sched.work);

> +}

> +EXPORT_SYMBOL_GPL(irq_work_schedule);


Wait, howzat work?  There's a single global variable that you stash
the "sched_work" into with no locking?  What if two people schedule
work at the same time?

-Doug
Sumit Garg Aug. 17, 2020, 2:23 p.m. UTC | #10
On Mon, 17 Aug 2020 at 19:27, Doug Anderson <dianders@chromium.org> wrote:
>

> Hi,

>

> On Mon, Aug 17, 2020 at 5:27 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> >

> > Thanks for your suggestion, irq_work_schedule() looked even better

> > without any overhead, see below:

> >

> > diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h

> > index 3082378..1eade89 100644

> > --- a/include/linux/irq_work.h

> > +++ b/include/linux/irq_work.h

> > @@ -3,6 +3,7 @@

> >  #define _LINUX_IRQ_WORK_H

> >

> >  #include <linux/smp_types.h>

> > +#include <linux/workqueue.h>

> >

> >  /*

> >   * An entry can be in one of four states:

> > @@ -24,6 +25,11 @@ struct irq_work {

> >         void (*func)(struct irq_work *);

> >  };

> >

> > +struct irq_work_schedule {

> > +       struct irq_work work;

> > +       struct work_struct *sched_work;

> > +};

> > +

> >  static inline

> >  void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))

> >  {

> >  {

> > @@ -39,6 +45,7 @@ void init_irq_work(struct irq_work *work, void

> > (*func)(struct irq_work *))

> >

> >  bool irq_work_queue(struct irq_work *work);

> >  bool irq_work_queue_on(struct irq_work *work, int cpu);

> > +bool irq_work_schedule(struct work_struct *sched_work);

> >

> >  void irq_work_tick(void);

> >  void irq_work_sync(struct irq_work *work);

> > diff --git a/kernel/irq_work.c b/kernel/irq_work.c

> > index eca8396..3880316 100644

> > --- a/kernel/irq_work.c

> > +++ b/kernel/irq_work.c

> > @@ -24,6 +24,8 @@

> >  static DEFINE_PER_CPU(struct llist_head, raised_list);

> >  static DEFINE_PER_CPU(struct llist_head, lazy_list);

> >

> > +static struct irq_work_schedule irq_work_sched;

> > +

> >  /*

> >   * Claim the entry so that no one else will poke at it.

> >   */

> > @@ -79,6 +81,25 @@ bool irq_work_queue(struct irq_work *work)

> >  }

> >  EXPORT_SYMBOL_GPL(irq_work_queue);

> >

> > +static void irq_work_schedule_fn(struct irq_work *work)

> > +{

> > +       struct irq_work_schedule *irq_work_sched =

> > +               container_of(work, struct irq_work_schedule, work);

> > +

> > +       if (irq_work_sched->sched_work)

> > +               schedule_work(irq_work_sched->sched_work);

> > +}

> > +

> > +/* Schedule work via irq work queue */

> > +bool irq_work_schedule(struct work_struct *sched_work)

> > +{

> > +       init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);

> > +       irq_work_sched.sched_work = sched_work;

> > +

> > +       return irq_work_queue(&irq_work_sched.work);

> > +}

> > +EXPORT_SYMBOL_GPL(irq_work_schedule);

>

> Wait, howzat work?  There's a single global variable that you stash

> the "sched_work" into with no locking?  What if two people schedule

> work at the same time?


This API is intended to be invoked from NMI context only, so I think
there will be a single user at a time. And we can make that explicit
as well:

+/* Schedule work via irq work queue */
+bool irq_work_schedule(struct work_struct *sched_work)
+{
+       if (in_nmi()) {
+               init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);
+               irq_work_sched.sched_work = sched_work;
+
+               return irq_work_queue(&irq_work_sched.work);
+       }
+
+       return false;
+}
+EXPORT_SYMBOL_GPL(irq_work_schedule);

-Sumit

>

> -Doug
Daniel Thompson Aug. 17, 2020, 2:28 p.m. UTC | #11
On Mon, Aug 17, 2020 at 05:57:03PM +0530, Sumit Garg wrote:
> On Fri, 14 Aug 2020 at 19:43, Daniel Thompson

> <daniel.thompson@linaro.org> wrote:

> > On Fri, Aug 14, 2020 at 04:47:11PM +0530, Sumit Garg wrote:

> > Does it look better if you create a new type to map the two structures

> > together. Alternatively are there enough existing use-cases to want to

> > extend irq_work_queue() with irq_work_schedule() or something similar?

> >

> 

> Thanks for your suggestion, irq_work_schedule() looked even better

> without any overhead, see below:

> 

> diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h

> index 3082378..1eade89 100644

> --- a/include/linux/irq_work.h

> +++ b/include/linux/irq_work.h

> @@ -3,6 +3,7 @@

>  #define _LINUX_IRQ_WORK_H

> 

>  #include <linux/smp_types.h>

> +#include <linux/workqueue.h>

> 

>  /*

>   * An entry can be in one of four states:

> @@ -24,6 +25,11 @@ struct irq_work {

>         void (*func)(struct irq_work *);

>  };

> 

> +struct irq_work_schedule {

> +       struct irq_work work;

> +       struct work_struct *sched_work;

> +};

> +

>  static inline

>  void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))

>  {

>  {

> @@ -39,6 +45,7 @@ void init_irq_work(struct irq_work *work, void

> (*func)(struct irq_work *))

> 

>  bool irq_work_queue(struct irq_work *work);

>  bool irq_work_queue_on(struct irq_work *work, int cpu);

> +bool irq_work_schedule(struct work_struct *sched_work);

> 

>  void irq_work_tick(void);

>  void irq_work_sync(struct irq_work *work);

> diff --git a/kernel/irq_work.c b/kernel/irq_work.c

> index eca8396..3880316 100644

> --- a/kernel/irq_work.c

> +++ b/kernel/irq_work.c

> @@ -24,6 +24,8 @@

>  static DEFINE_PER_CPU(struct llist_head, raised_list);

>  static DEFINE_PER_CPU(struct llist_head, lazy_list);

> 

> +static struct irq_work_schedule irq_work_sched;

> +

>  /*

>   * Claim the entry so that no one else will poke at it.

>   */

> @@ -79,6 +81,25 @@ bool irq_work_queue(struct irq_work *work)

>  }

>  EXPORT_SYMBOL_GPL(irq_work_queue);

> 

> +static void irq_work_schedule_fn(struct irq_work *work)

> +{

> +       struct irq_work_schedule *irq_work_sched =

> +               container_of(work, struct irq_work_schedule, work);

> +

> +       if (irq_work_sched->sched_work)

> +               schedule_work(irq_work_sched->sched_work);

> +}

> +

> +/* Schedule work via irq work queue */

> +bool irq_work_schedule(struct work_struct *sched_work)

> +{

> +       init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);

> +       irq_work_sched.sched_work = sched_work;

> +

> +       return irq_work_queue(&irq_work_sched.work);

> +}

> +EXPORT_SYMBOL_GPL(irq_work_schedule);

> +


This is irredeemably broken.

Even if we didn't care about dropping events (which we do) then when you
overwrite irq_work_sched with a copy of another work_struct, either of
which could currently be enqueued somewhere, then you will cause some
very nasty corruption.


Daniel.
Daniel Thompson Aug. 17, 2020, 2:32 p.m. UTC | #12
On Mon, Aug 17, 2020 at 07:53:55PM +0530, Sumit Garg wrote:
> On Mon, 17 Aug 2020 at 19:27, Doug Anderson <dianders@chromium.org> wrote:

> >

> > Hi,

> >

> > On Mon, Aug 17, 2020 at 5:27 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > >

> > > Thanks for your suggestion, irq_work_schedule() looked even better

> > > without any overhead, see below:

> > >

> > > diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h

> > > index 3082378..1eade89 100644

> > > --- a/include/linux/irq_work.h

> > > +++ b/include/linux/irq_work.h

> > > @@ -3,6 +3,7 @@

> > >  #define _LINUX_IRQ_WORK_H

> > >

> > >  #include <linux/smp_types.h>

> > > +#include <linux/workqueue.h>

> > >

> > >  /*

> > >   * An entry can be in one of four states:

> > > @@ -24,6 +25,11 @@ struct irq_work {

> > >         void (*func)(struct irq_work *);

> > >  };

> > >

> > > +struct irq_work_schedule {

> > > +       struct irq_work work;

> > > +       struct work_struct *sched_work;

> > > +};

> > > +

> > >  static inline

> > >  void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))

> > >  {

> > >  {

> > > @@ -39,6 +45,7 @@ void init_irq_work(struct irq_work *work, void

> > > (*func)(struct irq_work *))

> > >

> > >  bool irq_work_queue(struct irq_work *work);

> > >  bool irq_work_queue_on(struct irq_work *work, int cpu);

> > > +bool irq_work_schedule(struct work_struct *sched_work);

> > >

> > >  void irq_work_tick(void);

> > >  void irq_work_sync(struct irq_work *work);

> > > diff --git a/kernel/irq_work.c b/kernel/irq_work.c

> > > index eca8396..3880316 100644

> > > --- a/kernel/irq_work.c

> > > +++ b/kernel/irq_work.c

> > > @@ -24,6 +24,8 @@

> > >  static DEFINE_PER_CPU(struct llist_head, raised_list);

> > >  static DEFINE_PER_CPU(struct llist_head, lazy_list);

> > >

> > > +static struct irq_work_schedule irq_work_sched;

> > > +

> > >  /*

> > >   * Claim the entry so that no one else will poke at it.

> > >   */

> > > @@ -79,6 +81,25 @@ bool irq_work_queue(struct irq_work *work)

> > >  }

> > >  EXPORT_SYMBOL_GPL(irq_work_queue);

> > >

> > > +static void irq_work_schedule_fn(struct irq_work *work)

> > > +{

> > > +       struct irq_work_schedule *irq_work_sched =

> > > +               container_of(work, struct irq_work_schedule, work);

> > > +

> > > +       if (irq_work_sched->sched_work)

> > > +               schedule_work(irq_work_sched->sched_work);

> > > +}

> > > +

> > > +/* Schedule work via irq work queue */

> > > +bool irq_work_schedule(struct work_struct *sched_work)

> > > +{

> > > +       init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);

> > > +       irq_work_sched.sched_work = sched_work;

> > > +

> > > +       return irq_work_queue(&irq_work_sched.work);

> > > +}

> > > +EXPORT_SYMBOL_GPL(irq_work_schedule);

> >

> > Wait, howzat work?  There's a single global variable that you stash

> > the "sched_work" into with no locking?  What if two people schedule

> > work at the same time?

> 

> This API is intended to be invoked from NMI context only, so I think

> there will be a single user at a time.


How can you possibly know that?

This is library code, not a helper in a driver.


Daniel.


> And we can make that explicit

> as well:

> 

> +/* Schedule work via irq work queue */

> +bool irq_work_schedule(struct work_struct *sched_work)

> +{

> +       if (in_nmi()) {

> +               init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);

> +               irq_work_sched.sched_work = sched_work;

> +

> +               return irq_work_queue(&irq_work_sched.work);

> +       }

> +

> +       return false;

> +}

> +EXPORT_SYMBOL_GPL(irq_work_schedule);

> 

> -Sumit

> 

> >

> > -Doug
Sumit Garg Aug. 18, 2020, 1:06 p.m. UTC | #13
On Mon, 17 Aug 2020 at 19:58, Daniel Thompson
<daniel.thompson@linaro.org> wrote:
>

> On Mon, Aug 17, 2020 at 05:57:03PM +0530, Sumit Garg wrote:

> > On Fri, 14 Aug 2020 at 19:43, Daniel Thompson

> > <daniel.thompson@linaro.org> wrote:

> > > On Fri, Aug 14, 2020 at 04:47:11PM +0530, Sumit Garg wrote:

> > > Does it look better if you create a new type to map the two structures

> > > together. Alternatively are there enough existing use-cases to want to

> > > extend irq_work_queue() with irq_work_schedule() or something similar?

> > >

> >

> > Thanks for your suggestion, irq_work_schedule() looked even better

> > without any overhead, see below:

> >

> > diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h

> > index 3082378..1eade89 100644

> > --- a/include/linux/irq_work.h

> > +++ b/include/linux/irq_work.h

> > @@ -3,6 +3,7 @@

> >  #define _LINUX_IRQ_WORK_H

> >

> >  #include <linux/smp_types.h>

> > +#include <linux/workqueue.h>

> >

> >  /*

> >   * An entry can be in one of four states:

> > @@ -24,6 +25,11 @@ struct irq_work {

> >         void (*func)(struct irq_work *);

> >  };

> >

> > +struct irq_work_schedule {

> > +       struct irq_work work;

> > +       struct work_struct *sched_work;

> > +};

> > +

> >  static inline

> >  void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))

> >  {

> >  {

> > @@ -39,6 +45,7 @@ void init_irq_work(struct irq_work *work, void

> > (*func)(struct irq_work *))

> >

> >  bool irq_work_queue(struct irq_work *work);

> >  bool irq_work_queue_on(struct irq_work *work, int cpu);

> > +bool irq_work_schedule(struct work_struct *sched_work);

> >

> >  void irq_work_tick(void);

> >  void irq_work_sync(struct irq_work *work);

> > diff --git a/kernel/irq_work.c b/kernel/irq_work.c

> > index eca8396..3880316 100644

> > --- a/kernel/irq_work.c

> > +++ b/kernel/irq_work.c

> > @@ -24,6 +24,8 @@

> >  static DEFINE_PER_CPU(struct llist_head, raised_list);

> >  static DEFINE_PER_CPU(struct llist_head, lazy_list);

> >

> > +static struct irq_work_schedule irq_work_sched;

> > +

> >  /*

> >   * Claim the entry so that no one else will poke at it.

> >   */

> > @@ -79,6 +81,25 @@ bool irq_work_queue(struct irq_work *work)

> >  }

> >  EXPORT_SYMBOL_GPL(irq_work_queue);

> >

> > +static void irq_work_schedule_fn(struct irq_work *work)

> > +{

> > +       struct irq_work_schedule *irq_work_sched =

> > +               container_of(work, struct irq_work_schedule, work);

> > +

> > +       if (irq_work_sched->sched_work)

> > +               schedule_work(irq_work_sched->sched_work);

> > +}

> > +

> > +/* Schedule work via irq work queue */

> > +bool irq_work_schedule(struct work_struct *sched_work)

> > +{

> > +       init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);

> > +       irq_work_sched.sched_work = sched_work;

> > +

> > +       return irq_work_queue(&irq_work_sched.work);

> > +}

> > +EXPORT_SYMBOL_GPL(irq_work_schedule);

> > +

>

> This is irredeemably broken.

>

> Even if we didn't care about dropping events (which we do) then when you

> overwrite irq_work_sched with a copy of another work_struct, either of

> which could currently be enqueued somewhere, then you will cause some

> very nasty corruption.

>


Okay, I see your point. I think there isn't a way to avoid caller
specific struct such as:

struct nmi_queuable_work_struct {
  struct work_struct work;
  struct irq_work iw;
};

So in that case will shift to approach as suggested by Doug to rather
have a new nmi_schedule_work() API.

-Sumit

>

> Daniel.
Sumit Garg Aug. 18, 2020, 1:18 p.m. UTC | #14
On Mon, 17 Aug 2020 at 20:02, Daniel Thompson
<daniel.thompson@linaro.org> wrote:
>

> On Mon, Aug 17, 2020 at 07:53:55PM +0530, Sumit Garg wrote:

> > On Mon, 17 Aug 2020 at 19:27, Doug Anderson <dianders@chromium.org> wrote:

> > >

> > > Hi,

> > >

> > > On Mon, Aug 17, 2020 at 5:27 AM Sumit Garg <sumit.garg@linaro.org> wrote:

> > > >

> > > > Thanks for your suggestion, irq_work_schedule() looked even better

> > > > without any overhead, see below:

> > > >

> > > > diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h

> > > > index 3082378..1eade89 100644

> > > > --- a/include/linux/irq_work.h

> > > > +++ b/include/linux/irq_work.h

> > > > @@ -3,6 +3,7 @@

> > > >  #define _LINUX_IRQ_WORK_H

> > > >

> > > >  #include <linux/smp_types.h>

> > > > +#include <linux/workqueue.h>

> > > >

> > > >  /*

> > > >   * An entry can be in one of four states:

> > > > @@ -24,6 +25,11 @@ struct irq_work {

> > > >         void (*func)(struct irq_work *);

> > > >  };

> > > >

> > > > +struct irq_work_schedule {

> > > > +       struct irq_work work;

> > > > +       struct work_struct *sched_work;

> > > > +};

> > > > +

> > > >  static inline

> > > >  void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))

> > > >  {

> > > >  {

> > > > @@ -39,6 +45,7 @@ void init_irq_work(struct irq_work *work, void

> > > > (*func)(struct irq_work *))

> > > >

> > > >  bool irq_work_queue(struct irq_work *work);

> > > >  bool irq_work_queue_on(struct irq_work *work, int cpu);

> > > > +bool irq_work_schedule(struct work_struct *sched_work);

> > > >

> > > >  void irq_work_tick(void);

> > > >  void irq_work_sync(struct irq_work *work);

> > > > diff --git a/kernel/irq_work.c b/kernel/irq_work.c

> > > > index eca8396..3880316 100644

> > > > --- a/kernel/irq_work.c

> > > > +++ b/kernel/irq_work.c

> > > > @@ -24,6 +24,8 @@

> > > >  static DEFINE_PER_CPU(struct llist_head, raised_list);

> > > >  static DEFINE_PER_CPU(struct llist_head, lazy_list);

> > > >

> > > > +static struct irq_work_schedule irq_work_sched;

> > > > +

> > > >  /*

> > > >   * Claim the entry so that no one else will poke at it.

> > > >   */

> > > > @@ -79,6 +81,25 @@ bool irq_work_queue(struct irq_work *work)

> > > >  }

> > > >  EXPORT_SYMBOL_GPL(irq_work_queue);

> > > >

> > > > +static void irq_work_schedule_fn(struct irq_work *work)

> > > > +{

> > > > +       struct irq_work_schedule *irq_work_sched =

> > > > +               container_of(work, struct irq_work_schedule, work);

> > > > +

> > > > +       if (irq_work_sched->sched_work)

> > > > +               schedule_work(irq_work_sched->sched_work);

> > > > +}

> > > > +

> > > > +/* Schedule work via irq work queue */

> > > > +bool irq_work_schedule(struct work_struct *sched_work)

> > > > +{

> > > > +       init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);

> > > > +       irq_work_sched.sched_work = sched_work;

> > > > +

> > > > +       return irq_work_queue(&irq_work_sched.work);

> > > > +}

> > > > +EXPORT_SYMBOL_GPL(irq_work_schedule);

> > >

> > > Wait, howzat work?  There's a single global variable that you stash

> > > the "sched_work" into with no locking?  What if two people schedule

> > > work at the same time?

> >

> > This API is intended to be invoked from NMI context only, so I think

> > there will be a single user at a time.

>

> How can you possibly know that?


I guess here you are referring to NMI nesting, correct?

Anyway, I am going to shift to another implementation as mentioned in
the other thread.

-Sumit

>

> This is library code, not a helper in a driver.

>

>

> Daniel.

>

>

> > And we can make that explicit

> > as well:

> >

> > +/* Schedule work via irq work queue */

> > +bool irq_work_schedule(struct work_struct *sched_work)

> > +{

> > +       if (in_nmi()) {

> > +               init_irq_work(&irq_work_sched.work, irq_work_schedule_fn);

> > +               irq_work_sched.sched_work = sched_work;

> > +

> > +               return irq_work_queue(&irq_work_sched.work);

> > +       }

> > +

> > +       return false;

> > +}

> > +EXPORT_SYMBOL_GPL(irq_work_schedule);

> >

> > -Sumit

> >

> > >

> > > -Doug
diff mbox series

Patch

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 57840cf..6342e90 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -3181,8 +3181,14 @@  static bool uart_try_toggle_sysrq(struct uart_port *port, unsigned int ch)
 		return true;
 	}
 
+#ifdef CONFIG_CONSOLE_POLL
+	if (in_nmi())
+		irq_work_queue(&port->nmi_state.sysrq_toggle_work);
+	else
+		schedule_work(&sysrq_enable_work);
+#else
 	schedule_work(&sysrq_enable_work);
-
+#endif
 	port->sysrq = 0;
 	return true;
 }
@@ -3273,12 +3279,122 @@  int uart_handle_break(struct uart_port *port)
 		port->sysrq = 0;
 	}
 
-	if (port->flags & UPF_SAK)
+	if (port->flags & UPF_SAK) {
+#ifdef CONFIG_CONSOLE_POLL
+		if (in_nmi())
+			irq_work_queue(&port->nmi_state.sysrq_sak_work);
+		else
+			do_SAK(state->port.tty);
+#else
 		do_SAK(state->port.tty);
+#endif
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(uart_handle_break);
 
+#ifdef CONFIG_CONSOLE_POLL
+int uart_nmi_handle_char(struct uart_port *port, unsigned int status,
+			 unsigned int overrun, unsigned int ch,
+			 unsigned int flag)
+{
+	struct uart_nmi_rx_data rx_data;
+
+	if (!in_nmi())
+		return 0;
+
+	rx_data.status = status;
+	rx_data.overrun = overrun;
+	rx_data.ch = ch;
+	rx_data.flag = flag;
+
+	if (!kfifo_in(&port->nmi_state.rx_fifo, &rx_data, 1))
+		++port->icount.buf_overrun;
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(uart_nmi_handle_char);
+
+static void uart_nmi_rx_work(struct irq_work *rx_work)
+{
+	struct uart_nmi_state *nmi_state =
+	    container_of(rx_work, struct uart_nmi_state, rx_work);
+	struct uart_port *port =
+	    container_of(nmi_state, struct uart_port, nmi_state);
+	struct uart_nmi_rx_data rx_data;
+
+	/*
+	 * In polling mode, serial device is initialized much prior to
+	 * TTY port becoming active. This scenario is especially useful
+	 * from debugging perspective such that magic sysrq or debugger
+	 * entry would still be possible even when TTY port isn't
+	 * active (consider a boot hang case or if a user hasn't opened
+	 * the serial port). So we discard any other RX data apart from
+	 * magic sysrq commands in case TTY port isn't active.
+	 */
+	if (!port->state || !tty_port_active(&port->state->port)) {
+		kfifo_reset(&nmi_state->rx_fifo);
+		return;
+	}
+
+	spin_lock(&port->lock);
+	while (kfifo_out(&nmi_state->rx_fifo, &rx_data, 1))
+		uart_insert_char(port, rx_data.status, rx_data.overrun,
+				 rx_data.ch, rx_data.flag);
+	spin_unlock(&port->lock);
+
+	tty_flip_buffer_push(&port->state->port);
+}
+
+static void uart_nmi_tx_work(struct irq_work *tx_work)
+{
+	struct uart_nmi_state *nmi_state =
+	    container_of(tx_work, struct uart_nmi_state, tx_work);
+	struct uart_port *port =
+	    container_of(nmi_state, struct uart_port, nmi_state);
+
+	spin_lock(&port->lock);
+	if (nmi_state->tx_irq_callback)
+		nmi_state->tx_irq_callback(port);
+	spin_unlock(&port->lock);
+}
+
+static void uart_nmi_sak_work(struct irq_work *work)
+{
+	struct uart_nmi_state *nmi_state =
+	    container_of(work, struct uart_nmi_state, sysrq_sak_work);
+	struct uart_port *port =
+	    container_of(nmi_state, struct uart_port, nmi_state);
+
+	do_SAK(port->state->port.tty);
+}
+
+#ifdef CONFIG_MAGIC_SYSRQ_SERIAL
+static void uart_nmi_toggle_work(struct irq_work *work)
+{
+	schedule_work(&sysrq_enable_work);
+}
+#endif
+
+int uart_nmi_state_init(struct uart_port *port)
+{
+	int ret;
+
+	ret = kfifo_alloc(&port->nmi_state.rx_fifo, 256, GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	init_irq_work(&port->nmi_state.rx_work, uart_nmi_rx_work);
+	init_irq_work(&port->nmi_state.tx_work, uart_nmi_tx_work);
+	init_irq_work(&port->nmi_state.sysrq_sak_work, uart_nmi_sak_work);
+#ifdef CONFIG_MAGIC_SYSRQ_SERIAL
+	init_irq_work(&port->nmi_state.sysrq_toggle_work, uart_nmi_toggle_work);
+#endif
+	return ret;
+}
+EXPORT_SYMBOL_GPL(uart_nmi_state_init);
+#endif
+
 EXPORT_SYMBOL(uart_write_wakeup);
 EXPORT_SYMBOL(uart_register_driver);
 EXPORT_SYMBOL(uart_unregister_driver);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 9fd550e..84487a9 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -18,6 +18,8 @@ 
 #include <linux/tty.h>
 #include <linux/mutex.h>
 #include <linux/sysrq.h>
+#include <linux/irq_work.h>
+#include <linux/kfifo.h>
 #include <uapi/linux/serial_core.h>
 
 #ifdef CONFIG_SERIAL_CORE_CONSOLE
@@ -103,6 +105,28 @@  struct uart_icount {
 typedef unsigned int __bitwise upf_t;
 typedef unsigned int __bitwise upstat_t;
 
+#ifdef CONFIG_CONSOLE_POLL
+struct uart_nmi_rx_data {
+	unsigned int		status;
+	unsigned int		overrun;
+	unsigned int		ch;
+	unsigned int		flag;
+};
+
+struct uart_nmi_state {
+	bool			active;
+
+	struct irq_work		tx_work;
+	void			(*tx_irq_callback)(struct uart_port *port);
+
+	struct irq_work		rx_work;
+	DECLARE_KFIFO_PTR(rx_fifo, struct uart_nmi_rx_data);
+
+	struct irq_work		sysrq_sak_work;
+	struct irq_work		sysrq_toggle_work;
+};
+#endif
+
 struct uart_port {
 	spinlock_t		lock;			/* port lock */
 	unsigned long		iobase;			/* in/out[bwl] */
@@ -255,6 +279,9 @@  struct uart_port {
 	struct gpio_desc	*rs485_term_gpio;	/* enable RS485 bus termination */
 	struct serial_iso7816   iso7816;
 	void			*private_data;		/* generic platform data pointer */
+#ifdef CONFIG_CONSOLE_POLL
+	struct uart_nmi_state	nmi_state;
+#endif
 };
 
 static inline int serial_port_in(struct uart_port *up, int offset)
@@ -475,4 +502,44 @@  extern int uart_handle_break(struct uart_port *port);
 					 !((cflag) & CLOCAL))
 
 int uart_get_rs485_mode(struct uart_port *port);
+
+/*
+ * The following are helper functions for the NMI aware serial drivers.
+ * Currently NMI support is only enabled under polling mode.
+ */
+
+#ifdef CONFIG_CONSOLE_POLL
+int uart_nmi_state_init(struct uart_port *port);
+int uart_nmi_handle_char(struct uart_port *port, unsigned int status,
+			 unsigned int overrun, unsigned int ch,
+			 unsigned int flag);
+
+static inline bool uart_nmi_active(struct uart_port *port)
+{
+	return port->nmi_state.active;
+}
+
+static inline void uart_set_nmi_active(struct uart_port *port, bool val)
+{
+	port->nmi_state.active = val;
+}
+#else
+static inline int uart_nmi_handle_char(struct uart_port *port,
+				       unsigned int status,
+				       unsigned int overrun,
+				       unsigned int ch, unsigned int flag)
+{
+	return 0;
+}
+
+static inline bool uart_nmi_active(struct uart_port *port)
+{
+	return false;
+}
+
+static inline void uart_set_nmi_active(struct uart_port *port, bool val)
+{
+}
+#endif
+
 #endif /* LINUX_SERIAL_CORE_H */