diff mbox series

[3/7] kgdb: Add request_nmi() to the io ops table for kgdboc

Message ID 1592835984-28613-4-git-send-email-sumit.garg@linaro.org
State New
Headers show
Series Enable support for kgdb NMI console feature | expand

Commit Message

Sumit Garg June 22, 2020, 2:26 p.m. UTC
From: Daniel Thompson <daniel.thompson@linaro.org>


Add request_nmi() callback to install a non-maskable interrupt handler
corresponding to IRQ retrieved from polling interface. If NMI handler
installation fails due to missing support from underlying irqchip driver
then fallback to install it as normal interrupt handler.

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>

Co-developed-by: Sumit Garg <sumit.garg@linaro.org>
Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

---
 drivers/tty/serial/kgdboc.c | 35 +++++++++++++++++++++++++++++++++++
 include/linux/kgdb.h        |  7 +++++++
 2 files changed, 42 insertions(+)

-- 
2.7.4

Comments

Daniel Thompson June 22, 2020, 4:03 p.m. UTC | #1
On Mon, Jun 22, 2020 at 07:56:20PM +0530, Sumit Garg wrote:
> From: Daniel Thompson <daniel.thompson@linaro.org>

> 

> Add request_nmi() callback to install a non-maskable interrupt handler

> corresponding to IRQ retrieved from polling interface. If NMI handler

> installation fails due to missing support from underlying irqchip driver

> then fallback to install it as normal interrupt handler.

> 

> Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>

> Co-developed-by: Sumit Garg <sumit.garg@linaro.org>

> Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> ---

>  drivers/tty/serial/kgdboc.c | 35 +++++++++++++++++++++++++++++++++++

>  include/linux/kgdb.h        |  7 +++++++

>  2 files changed, 42 insertions(+)

> 

> diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c

> index 84ffede..263afae 100644

> --- a/drivers/tty/serial/kgdboc.c

> +++ b/drivers/tty/serial/kgdboc.c

> @@ -19,6 +19,9 @@

>  #include <linux/console.h>

>  #include <linux/vt_kern.h>

>  #include <linux/input.h>

> +#include <linux/interrupt.h>

> +#include <linux/irq.h>

> +#include <linux/irqdesc.h>

>  #include <linux/module.h>

>  #include <linux/platform_device.h>

>  #include <linux/serial_core.h>

> @@ -390,12 +393,44 @@ static void kgdboc_post_exp_handler(void)

>  	kgdboc_restore_input();

>  }

>  

> +static int kgdb_tty_irq;

> +

> +static int kgdboc_request_nmi(irq_handler_t fn, void *dev_id)

> +{

> +	int irq, res;

> +

> +	/* Better to avoid double allocation in the tty driver! */

> +	if (kgdb_tty_irq)

> +		return 0;

> +

> +	if (!kgdb_tty_driver->ops->poll_get_irq)

> +		return -ENODEV;

> +

> +	irq =

> +	    kgdb_tty_driver->ops->poll_get_irq(kgdb_tty_driver, kgdb_tty_line);

> +	if (irq <= 0)

> +		return irq ? irq : -ENODEV;

> +

> +	irq_set_status_flags(irq, IRQ_NOAUTOEN);

> +	res = request_nmi(irq, fn, IRQF_PERCPU, "kgdboc", dev_id);


Why do we need IRQF_PERCPU here. A UART interrupt is not normally
per-cpu?


> +	if (res) {

> +		res = request_irq(irq, fn, IRQF_SHARED, "kgdboc", dev_id);


IRQF_SHARED?

Currrently there is nothing that prevents concurrent activation of
ttyNMI0 and the underlying serial driver. Using IRQF_SHARED means it
becomes possible for both drivers to try to service the same interrupt.
That risks some rather "interesting" problems.


Daniel.


> +		WARN_ON(res);

> +	}

> +

> +	enable_irq(irq);

> +

> +	kgdb_tty_irq = irq;

> +	return 0;

> +}

> +

>  static struct kgdb_io kgdboc_io_ops = {

>  	.name			= "kgdboc",

>  	.read_char		= kgdboc_get_char,

>  	.write_char		= kgdboc_put_char,

>  	.pre_exception		= kgdboc_pre_exp_handler,

>  	.post_exception		= kgdboc_post_exp_handler,

> +	.request_nmi		= kgdboc_request_nmi,

>  };

>  

>  #if IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE)

> diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h

> index 529116b..b32b044 100644

> --- a/include/linux/kgdb.h

> +++ b/include/linux/kgdb.h

> @@ -16,6 +16,7 @@

>  #include <linux/linkage.h>

>  #include <linux/init.h>

>  #include <linux/atomic.h>

> +#include <linux/interrupt.h>

>  #ifdef CONFIG_HAVE_ARCH_KGDB

>  #include <asm/kgdb.h>

>  #endif

> @@ -276,6 +277,10 @@ struct kgdb_arch {

>   * the I/O driver.

>   * @post_exception: Pointer to a function that will do any cleanup work

>   * for the I/O driver.

> + * @request_nmi: Pointer to a function that can install an non-maskable

> + * interrupt handler that will be called when a character is pending and that

> + * can be cleared by calling @read_char until it returns NO_POLL_CHAR. If NMI

> + * installation fails then fallback to install normal interrupt handler.

>   * @cons: valid if the I/O device is a console; else NULL.

>   */

>  struct kgdb_io {

> @@ -287,6 +292,8 @@ struct kgdb_io {

>  	void			(*deinit) (void);

>  	void			(*pre_exception) (void);

>  	void			(*post_exception) (void);

> +	int			(*request_nmi)(irq_handler_t nmi_handler,

> +					       void *dev_id);

>  	struct console		*cons;

>  };

>  

> -- 

> 2.7.4

>
Sumit Garg June 23, 2020, 8:37 a.m. UTC | #2
On Mon, 22 Jun 2020 at 21:33, Daniel Thompson
<daniel.thompson@linaro.org> wrote:
>

> On Mon, Jun 22, 2020 at 07:56:20PM +0530, Sumit Garg wrote:

> > From: Daniel Thompson <daniel.thompson@linaro.org>

> >

> > Add request_nmi() callback to install a non-maskable interrupt handler

> > corresponding to IRQ retrieved from polling interface. If NMI handler

> > installation fails due to missing support from underlying irqchip driver

> > then fallback to install it as normal interrupt handler.

> >

> > Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>

> > Co-developed-by: Sumit Garg <sumit.garg@linaro.org>

> > Signed-off-by: Sumit Garg <sumit.garg@linaro.org>

> > ---

> >  drivers/tty/serial/kgdboc.c | 35 +++++++++++++++++++++++++++++++++++

> >  include/linux/kgdb.h        |  7 +++++++

> >  2 files changed, 42 insertions(+)

> >

> > diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c

> > index 84ffede..263afae 100644

> > --- a/drivers/tty/serial/kgdboc.c

> > +++ b/drivers/tty/serial/kgdboc.c

> > @@ -19,6 +19,9 @@

> >  #include <linux/console.h>

> >  #include <linux/vt_kern.h>

> >  #include <linux/input.h>

> > +#include <linux/interrupt.h>

> > +#include <linux/irq.h>

> > +#include <linux/irqdesc.h>

> >  #include <linux/module.h>

> >  #include <linux/platform_device.h>

> >  #include <linux/serial_core.h>

> > @@ -390,12 +393,44 @@ static void kgdboc_post_exp_handler(void)

> >       kgdboc_restore_input();

> >  }

> >

> > +static int kgdb_tty_irq;

> > +

> > +static int kgdboc_request_nmi(irq_handler_t fn, void *dev_id)

> > +{

> > +     int irq, res;

> > +

> > +     /* Better to avoid double allocation in the tty driver! */

> > +     if (kgdb_tty_irq)

> > +             return 0;

> > +

> > +     if (!kgdb_tty_driver->ops->poll_get_irq)

> > +             return -ENODEV;

> > +

> > +     irq =

> > +         kgdb_tty_driver->ops->poll_get_irq(kgdb_tty_driver, kgdb_tty_line);

> > +     if (irq <= 0)

> > +             return irq ? irq : -ENODEV;

> > +

> > +     irq_set_status_flags(irq, IRQ_NOAUTOEN);

> > +     res = request_nmi(irq, fn, IRQF_PERCPU, "kgdboc", dev_id);

>

> Why do we need IRQF_PERCPU here. A UART interrupt is not normally

> per-cpu?

>


Have a look at this comment [1] and corresponding check in
request_nmi(). So essentially yes UART interrupt is not normally
per-cpu but in order to make it an NMI, we need to request it in
per-cpu mode.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/irq/manage.c#n2112

>

> > +     if (res) {

> > +             res = request_irq(irq, fn, IRQF_SHARED, "kgdboc", dev_id);

>

> IRQF_SHARED?

>

> Currrently there is nothing that prevents concurrent activation of

> ttyNMI0 and the underlying serial driver. Using IRQF_SHARED means it

> becomes possible for both drivers to try to service the same interrupt.

> That risks some rather "interesting" problems.

>


Could you elaborate more on "interesting" problems?

BTW, I noticed one more problem with this patch that is IRQF_SHARED
doesn't go well with IRQ_NOAUTOEN status flag. Earlier I tested it
with auto enable set.

But if we agree that both shouldn't be active at the same time due to
some real problems(?) then I can rid of IRQF_SHARED as well. Also, I
think we should unregister underlying tty driver (eg. /dev/ttyAMA0) as
well as otherwise it would provide a broken interface to user-space.

-Sumit

>

> Daniel.

>

>

> > +             WARN_ON(res);

> > +     }

> > +

> > +     enable_irq(irq);

> > +

> > +     kgdb_tty_irq = irq;

> > +     return 0;

> > +}

> > +

> >  static struct kgdb_io kgdboc_io_ops = {

> >       .name                   = "kgdboc",

> >       .read_char              = kgdboc_get_char,

> >       .write_char             = kgdboc_put_char,

> >       .pre_exception          = kgdboc_pre_exp_handler,

> >       .post_exception         = kgdboc_post_exp_handler,

> > +     .request_nmi            = kgdboc_request_nmi,

> >  };

> >

> >  #if IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE)

> > diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h

> > index 529116b..b32b044 100644

> > --- a/include/linux/kgdb.h

> > +++ b/include/linux/kgdb.h

> > @@ -16,6 +16,7 @@

> >  #include <linux/linkage.h>

> >  #include <linux/init.h>

> >  #include <linux/atomic.h>

> > +#include <linux/interrupt.h>

> >  #ifdef CONFIG_HAVE_ARCH_KGDB

> >  #include <asm/kgdb.h>

> >  #endif

> > @@ -276,6 +277,10 @@ struct kgdb_arch {

> >   * the I/O driver.

> >   * @post_exception: Pointer to a function that will do any cleanup work

> >   * for the I/O driver.

> > + * @request_nmi: Pointer to a function that can install an non-maskable

> > + * interrupt handler that will be called when a character is pending and that

> > + * can be cleared by calling @read_char until it returns NO_POLL_CHAR. If NMI

> > + * installation fails then fallback to install normal interrupt handler.

> >   * @cons: valid if the I/O device is a console; else NULL.

> >   */

> >  struct kgdb_io {

> > @@ -287,6 +292,8 @@ struct kgdb_io {

> >       void                    (*deinit) (void);

> >       void                    (*pre_exception) (void);

> >       void                    (*post_exception) (void);

> > +     int                     (*request_nmi)(irq_handler_t nmi_handler,

> > +                                            void *dev_id);

> >       struct console          *cons;

> >  };

> >

> > --

> > 2.7.4

> >
Daniel Thompson June 23, 2020, 10:59 a.m. UTC | #3
On Tue, Jun 23, 2020 at 02:07:47PM +0530, Sumit Garg wrote:
> On Mon, 22 Jun 2020 at 21:33, Daniel Thompson

> <daniel.thompson@linaro.org> wrote:

> > > +     irq_set_status_flags(irq, IRQ_NOAUTOEN);

> > > +     res = request_nmi(irq, fn, IRQF_PERCPU, "kgdboc", dev_id);

> >

> > Why do we need IRQF_PERCPU here. A UART interrupt is not normally

> > per-cpu?

> >

> 

> Have a look at this comment [1] and corresponding check in

> request_nmi(). So essentially yes UART interrupt is not normally

> per-cpu but in order to make it an NMI, we need to request it in

> per-cpu mode.

> 

> [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/irq/manage.c#n2112


Thanks! This is clear.

> > > +     if (res) {

> > > +             res = request_irq(irq, fn, IRQF_SHARED, "kgdboc", dev_id);

> >

> > IRQF_SHARED?

> >

> > Currrently there is nothing that prevents concurrent activation of

> > ttyNMI0 and the underlying serial driver. Using IRQF_SHARED means it

> > becomes possible for both drivers to try to service the same interrupt.

> > That risks some rather "interesting" problems.

> >

> 

> Could you elaborate more on "interesting" problems?


Er... one of the serial drivers we have allowed the userspace to open
will, at best, be stone dead and not passing any characters.


> BTW, I noticed one more problem with this patch that is IRQF_SHARED

> doesn't go well with IRQ_NOAUTOEN status flag. Earlier I tested it

> with auto enable set.

> 

> But if we agree that both shouldn't be active at the same time due to

> some real problems(?) then I can rid of IRQF_SHARED as well. Also, I

> think we should unregister underlying tty driver (eg. /dev/ttyAMA0) as

> well as otherwise it would provide a broken interface to user-space.


I don't have a particular strong opinion on whether IRQF_SHARED is
correct or not correct since I think that misses the point.

Firstly, using IRQF_SHARED shows us that there is no interlocking
between kgdb_nmi and the underlying serial driver. That probably tells
us about the importance of the interlock than about IRQF_SHARED.

To some extent I'm also unsure that kgdb_nmi could ever actually know
the correct flags to use in all cases (that was another reason for the
TODO comment about poll_get_irq() being a bogus API).


Daniel.
Doug Anderson June 26, 2020, 7:44 p.m. UTC | #4
Hi,

On Tue, Jun 23, 2020 at 3:59 AM Daniel Thompson
<daniel.thompson@linaro.org> wrote:
>

> On Tue, Jun 23, 2020 at 02:07:47PM +0530, Sumit Garg wrote:

> > On Mon, 22 Jun 2020 at 21:33, Daniel Thompson

> > <daniel.thompson@linaro.org> wrote:

> > > > +     irq_set_status_flags(irq, IRQ_NOAUTOEN);

> > > > +     res = request_nmi(irq, fn, IRQF_PERCPU, "kgdboc", dev_id);

> > >

> > > Why do we need IRQF_PERCPU here. A UART interrupt is not normally

> > > per-cpu?

> > >

> >

> > Have a look at this comment [1] and corresponding check in

> > request_nmi(). So essentially yes UART interrupt is not normally

> > per-cpu but in order to make it an NMI, we need to request it in

> > per-cpu mode.

> >

> > [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/irq/manage.c#n2112

>

> Thanks! This is clear.

>

> > > > +     if (res) {

> > > > +             res = request_irq(irq, fn, IRQF_SHARED, "kgdboc", dev_id);

> > >

> > > IRQF_SHARED?

> > >

> > > Currrently there is nothing that prevents concurrent activation of

> > > ttyNMI0 and the underlying serial driver. Using IRQF_SHARED means it

> > > becomes possible for both drivers to try to service the same interrupt.

> > > That risks some rather "interesting" problems.

> > >

> >

> > Could you elaborate more on "interesting" problems?

>

> Er... one of the serial drivers we have allowed the userspace to open

> will, at best, be stone dead and not passing any characters.

>

>

> > BTW, I noticed one more problem with this patch that is IRQF_SHARED

> > doesn't go well with IRQ_NOAUTOEN status flag. Earlier I tested it

> > with auto enable set.

> >

> > But if we agree that both shouldn't be active at the same time due to

> > some real problems(?) then I can rid of IRQF_SHARED as well. Also, I

> > think we should unregister underlying tty driver (eg. /dev/ttyAMA0) as

> > well as otherwise it would provide a broken interface to user-space.

>

> I don't have a particular strong opinion on whether IRQF_SHARED is

> correct or not correct since I think that misses the point.

>

> Firstly, using IRQF_SHARED shows us that there is no interlocking

> between kgdb_nmi and the underlying serial driver. That probably tells

> us about the importance of the interlock than about IRQF_SHARED.

>

> To some extent I'm also unsure that kgdb_nmi could ever actually know

> the correct flags to use in all cases (that was another reason for the

> TODO comment about poll_get_irq() being a bogus API).


I do wonder a little bit if the architecture of the "kgdb_nmi_console"
should change.  I remember looking at it in the past and thinking it a
little weird that if I wanted to get it to work I'd need to change my
"console=" command line to go through this new driver and (I guess)
change the agetty I have running on my serial port to point to
ttyNMI0.  Is that how it's supposed to work?  Then if I want to do a
build without kgdb then I need to go in and change my agetty to point
back at my normal serial port?

It kinda feels like a better way to much of what the driver does would be to:

1. Allow kgdb to sniff incoming serial bytes on a port and look for
its characters.  We already have this feature in the kernel to a small
extent for sniffing a break / sysrq character.

2. If userspace doesn't happen to have the serial port open then
ideally we could open the port (using all the standard APIs that
already exist) from in the kernel and just throw away all the bytes
(since we already sniffed them).  As soon as userspace tried to open
the port when it would get ownership and if userspace ever closed the
port then we'd start reading / throwing away bytes again.

If we had a solution like that:

a) No serial drivers would need to change.

b) No kernel command line parameters would need to change.

Obviously that solution wouldn't magically get you an NMI, though.
For that I'd presume the right answer would be to add a parameter for
each serial driver that can support it to run its rx interrupt in NMI
mode.

Of course, perhaps I'm just confused and crazy and the above is a
really bad idea.


Speaking of confused: is there actually any way to use the existing
kgdb NMI driver (CONFIG_SERIAL_KGDB_NMI) in mainline without out of
tree patches?  When I looked before I assumed it was just me that was
outta luck because I didn't have NMI at the time, but I just did some
grepping and I can't find anyplace in mainline where
"arch_kgdb_ops.enable_nmi" would not be NULL.  Did I miss it, or do we
need out-of-tree patches to enable this?


-Doug
Daniel Thompson June 29, 2020, 11:45 a.m. UTC | #5
On Fri, Jun 26, 2020 at 12:44:15PM -0700, Doug Anderson wrote:
> Hi,

> 

> On Tue, Jun 23, 2020 at 3:59 AM Daniel Thompson

> <daniel.thompson@linaro.org> wrote:

> >

> > On Tue, Jun 23, 2020 at 02:07:47PM +0530, Sumit Garg wrote:

> > > On Mon, 22 Jun 2020 at 21:33, Daniel Thompson

> > > <daniel.thompson@linaro.org> wrote:

> > > > > +     irq_set_status_flags(irq, IRQ_NOAUTOEN);

> > > > > +     res = request_nmi(irq, fn, IRQF_PERCPU, "kgdboc", dev_id);

> > > >

> > > > Why do we need IRQF_PERCPU here. A UART interrupt is not normally

> > > > per-cpu?

> > > >

> > >

> > > Have a look at this comment [1] and corresponding check in

> > > request_nmi(). So essentially yes UART interrupt is not normally

> > > per-cpu but in order to make it an NMI, we need to request it in

> > > per-cpu mode.

> > >

> > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/irq/manage.c#n2112

> >

> > Thanks! This is clear.

> >

> > > > > +     if (res) {

> > > > > +             res = request_irq(irq, fn, IRQF_SHARED, "kgdboc", dev_id);

> > > >

> > > > IRQF_SHARED?

> > > >

> > > > Currrently there is nothing that prevents concurrent activation of

> > > > ttyNMI0 and the underlying serial driver. Using IRQF_SHARED means it

> > > > becomes possible for both drivers to try to service the same interrupt.

> > > > That risks some rather "interesting" problems.

> > > >

> > >

> > > Could you elaborate more on "interesting" problems?

> >

> > Er... one of the serial drivers we have allowed the userspace to open

> > will, at best, be stone dead and not passing any characters.

> >

> >

> > > BTW, I noticed one more problem with this patch that is IRQF_SHARED

> > > doesn't go well with IRQ_NOAUTOEN status flag. Earlier I tested it

> > > with auto enable set.

> > >

> > > But if we agree that both shouldn't be active at the same time due to

> > > some real problems(?) then I can rid of IRQF_SHARED as well. Also, I

> > > think we should unregister underlying tty driver (eg. /dev/ttyAMA0) as

> > > well as otherwise it would provide a broken interface to user-space.

> >

> > I don't have a particular strong opinion on whether IRQF_SHARED is

> > correct or not correct since I think that misses the point.

> >

> > Firstly, using IRQF_SHARED shows us that there is no interlocking

> > between kgdb_nmi and the underlying serial driver. That probably tells

> > us about the importance of the interlock than about IRQF_SHARED.

> >

> > To some extent I'm also unsure that kgdb_nmi could ever actually know

> > the correct flags to use in all cases (that was another reason for the

> > TODO comment about poll_get_irq() being a bogus API).

> 

> I do wonder a little bit if the architecture of the "kgdb_nmi_console"

> should change.  I remember looking at it in the past and thinking it a

> little weird that if I wanted to get it to work I'd need to change my

> "console=" command line to go through this new driver and (I guess)

> change the agetty I have running on my serial port to point to

> ttyNMI0.  Is that how it's supposed to work?  Then if I want to do a

> build without kgdb then I need to go in and change my agetty to point

> back at my normal serial port?

> 

> It kinda feels like a better way to much of what the driver does would be to:

> 

> 1. Allow kgdb to sniff incoming serial bytes on a port and look for

> its characters.  We already have this feature in the kernel to a small

> extent for sniffing a break / sysrq character.

> 

> 2. If userspace doesn't happen to have the serial port open then

> ideally we could open the port (using all the standard APIs that

> already exist) from in the kernel and just throw away all the bytes

> (since we already sniffed them).  As soon as userspace tried to open

> the port when it would get ownership and if userspace ever closed the

> port then we'd start reading / throwing away bytes again.

> 

> If we had a solution like that:

> 

> a) No serial drivers would need to change.

> 

> b) No kernel command line parameters would need to change.

> 

> Obviously that solution wouldn't magically get you an NMI, though.

> For that I'd presume the right answer would be to add a parameter for

> each serial driver that can support it to run its rx interrupt in NMI

> mode.


... or allow modal changes to the uart driver when kgdboc comes up?

We already allow UART drivers to de-optimize themselves and use
different code paths when polling is enabled so its not totally crazy
;-).


> Of course, perhaps I'm just confused and crazy and the above is a

> really bad idea.


Thanks for bringing this up.

Sumit and I were chatting last week and our discussion went in a similar
direction (I think not exactly the same which is why it is good to
see your thoughts too).

Personally I think it comes down to how intrusive adding NMI support is
to serial drivers. kgdb_nmi is rather hacky and feels a bit odd to
enable. It is clearly intended to avoid almost all changes to the UART
driver. On our side we have been wondering whether the serial core can
add helpers to make it easy for a serial driver to implement an simple,
safe but not optimal NMI implementation. Making it easy to have
safety-first might make NMI more palatable.


> Speaking of confused: is there actually any way to use the existing

> kgdb NMI driver (CONFIG_SERIAL_KGDB_NMI) in mainline without out of

> tree patches?  When I looked before I assumed it was just me that was

> outta luck because I didn't have NMI at the time, but I just did some

> grepping and I can't find anyplace in mainline where

> "arch_kgdb_ops.enable_nmi" would not be NULL.  Did I miss it, or do we

> need out-of-tree patches to enable this?


Out-of-tree...

If, after looking at other approaches, we do all agree to nuke kgdb_nmi
then there shouldn't be much impediment (nor that many tears).


Daniel.
Sumit Garg June 30, 2020, 6:09 a.m. UTC | #6
On Mon, 29 Jun 2020 at 17:15, Daniel Thompson
<daniel.thompson@linaro.org> wrote:
>

> On Fri, Jun 26, 2020 at 12:44:15PM -0700, Doug Anderson wrote:

> > Hi,

> >

> > On Tue, Jun 23, 2020 at 3:59 AM Daniel Thompson

> > <daniel.thompson@linaro.org> wrote:

> > >

> > > On Tue, Jun 23, 2020 at 02:07:47PM +0530, Sumit Garg wrote:

> > > > On Mon, 22 Jun 2020 at 21:33, Daniel Thompson

> > > > <daniel.thompson@linaro.org> wrote:

> > > > > > +     irq_set_status_flags(irq, IRQ_NOAUTOEN);

> > > > > > +     res = request_nmi(irq, fn, IRQF_PERCPU, "kgdboc", dev_id);

> > > > >

> > > > > Why do we need IRQF_PERCPU here. A UART interrupt is not normally

> > > > > per-cpu?

> > > > >

> > > >

> > > > Have a look at this comment [1] and corresponding check in

> > > > request_nmi(). So essentially yes UART interrupt is not normally

> > > > per-cpu but in order to make it an NMI, we need to request it in

> > > > per-cpu mode.

> > > >

> > > > [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/irq/manage.c#n2112

> > >

> > > Thanks! This is clear.

> > >

> > > > > > +     if (res) {

> > > > > > +             res = request_irq(irq, fn, IRQF_SHARED, "kgdboc", dev_id);

> > > > >

> > > > > IRQF_SHARED?

> > > > >

> > > > > Currrently there is nothing that prevents concurrent activation of

> > > > > ttyNMI0 and the underlying serial driver. Using IRQF_SHARED means it

> > > > > becomes possible for both drivers to try to service the same interrupt.

> > > > > That risks some rather "interesting" problems.

> > > > >

> > > >

> > > > Could you elaborate more on "interesting" problems?

> > >

> > > Er... one of the serial drivers we have allowed the userspace to open

> > > will, at best, be stone dead and not passing any characters.

> > >

> > >

> > > > BTW, I noticed one more problem with this patch that is IRQF_SHARED

> > > > doesn't go well with IRQ_NOAUTOEN status flag. Earlier I tested it

> > > > with auto enable set.

> > > >

> > > > But if we agree that both shouldn't be active at the same time due to

> > > > some real problems(?) then I can rid of IRQF_SHARED as well. Also, I

> > > > think we should unregister underlying tty driver (eg. /dev/ttyAMA0) as

> > > > well as otherwise it would provide a broken interface to user-space.

> > >

> > > I don't have a particular strong opinion on whether IRQF_SHARED is

> > > correct or not correct since I think that misses the point.

> > >

> > > Firstly, using IRQF_SHARED shows us that there is no interlocking

> > > between kgdb_nmi and the underlying serial driver. That probably tells

> > > us about the importance of the interlock than about IRQF_SHARED.

> > >

> > > To some extent I'm also unsure that kgdb_nmi could ever actually know

> > > the correct flags to use in all cases (that was another reason for the

> > > TODO comment about poll_get_irq() being a bogus API).

> >

> > I do wonder a little bit if the architecture of the "kgdb_nmi_console"

> > should change.  I remember looking at it in the past and thinking it a

> > little weird that if I wanted to get it to work I'd need to change my

> > "console=" command line to go through this new driver and (I guess)

> > change the agetty I have running on my serial port to point to

> > ttyNMI0.  Is that how it's supposed to work?  Then if I want to do a

> > build without kgdb then I need to go in and change my agetty to point

> > back at my normal serial port?

> >

> > It kinda feels like a better way to much of what the driver does would be to:

> >

> > 1. Allow kgdb to sniff incoming serial bytes on a port and look for

> > its characters.  We already have this feature in the kernel to a small

> > extent for sniffing a break / sysrq character.

> >

> > 2. If userspace doesn't happen to have the serial port open then

> > ideally we could open the port (using all the standard APIs that

> > already exist) from in the kernel and just throw away all the bytes

> > (since we already sniffed them).  As soon as userspace tried to open

> > the port when it would get ownership and if userspace ever closed the

> > port then we'd start reading / throwing away bytes again.

> >

> > If we had a solution like that:

> >

> > a) No serial drivers would need to change.

> >

> > b) No kernel command line parameters would need to change.

> >

> > Obviously that solution wouldn't magically get you an NMI, though.

> > For that I'd presume the right answer would be to add a parameter for

> > each serial driver that can support it to run its rx interrupt in NMI

> > mode.

>


Thanks Doug for the suggestions.

> ... or allow modal changes to the uart driver when kgdboc comes up?

>

> We already allow UART drivers to de-optimize themselves and use

> different code paths when polling is enabled so its not totally crazy

> ;-).

>

>

> > Of course, perhaps I'm just confused and crazy and the above is a

> > really bad idea.

>

> Thanks for bringing this up.

>

> Sumit and I were chatting last week and our discussion went in a similar

> direction (I think not exactly the same which is why it is good to

> see your thoughts too).

>

> Personally I think it comes down to how intrusive adding NMI support is

> to serial drivers. kgdb_nmi is rather hacky and feels a bit odd to

> enable. It is clearly intended to avoid almost all changes to the UART

> driver. On our side we have been wondering whether the serial core can

> add helpers to make it easy for a serial driver to implement an simple,

> safe but not optimal NMI implementation. Making it easy to have

> safety-first might make NMI more palatable.

>


I am currently working on a PoC in this direction and hopeful to come
up with least intrusive NMI support to serial drivers.

>

> > Speaking of confused: is there actually any way to use the existing

> > kgdb NMI driver (CONFIG_SERIAL_KGDB_NMI) in mainline without out of

> > tree patches?  When I looked before I assumed it was just me that was

> > outta luck because I didn't have NMI at the time, but I just did some

> > grepping and I can't find anyplace in mainline where

> > "arch_kgdb_ops.enable_nmi" would not be NULL.  Did I miss it, or do we

> > need out-of-tree patches to enable this?

>

> Out-of-tree...


Yeah and this patch-set derived from Daniel's work was one of them.

>

> If, after looking at other approaches, we do all agree to nuke kgdb_nmi

> then there shouldn't be much impediment (nor that many tears).

>


Makes sense.

-Sumit

>

> Daniel.
diff mbox series

Patch

diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c
index 84ffede..263afae 100644
--- a/drivers/tty/serial/kgdboc.c
+++ b/drivers/tty/serial/kgdboc.c
@@ -19,6 +19,9 @@ 
 #include <linux/console.h>
 #include <linux/vt_kern.h>
 #include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdesc.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/serial_core.h>
@@ -390,12 +393,44 @@  static void kgdboc_post_exp_handler(void)
 	kgdboc_restore_input();
 }
 
+static int kgdb_tty_irq;
+
+static int kgdboc_request_nmi(irq_handler_t fn, void *dev_id)
+{
+	int irq, res;
+
+	/* Better to avoid double allocation in the tty driver! */
+	if (kgdb_tty_irq)
+		return 0;
+
+	if (!kgdb_tty_driver->ops->poll_get_irq)
+		return -ENODEV;
+
+	irq =
+	    kgdb_tty_driver->ops->poll_get_irq(kgdb_tty_driver, kgdb_tty_line);
+	if (irq <= 0)
+		return irq ? irq : -ENODEV;
+
+	irq_set_status_flags(irq, IRQ_NOAUTOEN);
+	res = request_nmi(irq, fn, IRQF_PERCPU, "kgdboc", dev_id);
+	if (res) {
+		res = request_irq(irq, fn, IRQF_SHARED, "kgdboc", dev_id);
+		WARN_ON(res);
+	}
+
+	enable_irq(irq);
+
+	kgdb_tty_irq = irq;
+	return 0;
+}
+
 static struct kgdb_io kgdboc_io_ops = {
 	.name			= "kgdboc",
 	.read_char		= kgdboc_get_char,
 	.write_char		= kgdboc_put_char,
 	.pre_exception		= kgdboc_pre_exp_handler,
 	.post_exception		= kgdboc_post_exp_handler,
+	.request_nmi		= kgdboc_request_nmi,
 };
 
 #if IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE)
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 529116b..b32b044 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -16,6 +16,7 @@ 
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/atomic.h>
+#include <linux/interrupt.h>
 #ifdef CONFIG_HAVE_ARCH_KGDB
 #include <asm/kgdb.h>
 #endif
@@ -276,6 +277,10 @@  struct kgdb_arch {
  * the I/O driver.
  * @post_exception: Pointer to a function that will do any cleanup work
  * for the I/O driver.
+ * @request_nmi: Pointer to a function that can install an non-maskable
+ * interrupt handler that will be called when a character is pending and that
+ * can be cleared by calling @read_char until it returns NO_POLL_CHAR. If NMI
+ * installation fails then fallback to install normal interrupt handler.
  * @cons: valid if the I/O device is a console; else NULL.
  */
 struct kgdb_io {
@@ -287,6 +292,8 @@  struct kgdb_io {
 	void			(*deinit) (void);
 	void			(*pre_exception) (void);
 	void			(*post_exception) (void);
+	int			(*request_nmi)(irq_handler_t nmi_handler,
+					       void *dev_id);
 	struct console		*cons;
 };