diff mbox

[v10,03/19] arm: fiq: Replace default FIQ handler

Message ID 5405AEBC.9020904@linaro.org
State New
Headers show

Commit Message

Daniel Thompson Sept. 2, 2014, 11:49 a.m. UTC
On 28/08/14 16:01, Russell King - ARM Linux wrote:
> On Tue, Aug 19, 2014 at 07:12:07PM +0100, Daniel Thompson wrote:
>> On 19/08/14 18:37, Russell King - ARM Linux wrote:
>>> On Tue, Aug 19, 2014 at 05:45:53PM +0100, Daniel Thompson wrote:
>>>> +int register_fiq_nmi_notifier(struct notifier_block *nb)
>>>> +{
>>>> +	return atomic_notifier_chain_register(&fiq_nmi_chain, nb);
>>>> +}
>>>> +
>>>> +asmlinkage void __exception_irq_entry fiq_nmi_handler(struct pt_regs *regs)
>>>> +{
>>>> +	struct pt_regs *old_regs = set_irq_regs(regs);
>>>> +
>>>> +	nmi_enter();
>>>> +	atomic_notifier_call_chain(&fiq_nmi_chain, (unsigned long)regs, NULL);
>>>> +	nmi_exit();
>>>> +	set_irq_regs(old_regs);
>>>> +}
>>>
>>> Really not happy with this.  What happens if a FIQ occurs while we're
>>> inside register_fiq_nmi_notifier() - more specifically inside
>>> atomic_notifier_chain_register() ?
>>
>> Should depend on which side of the rcu update we're on.
> 
> I just asked Paul McKenney, our RCU expert... essentially, yes, RCU
> stuff itself is safe in this context.  However, RCU stuff can call into
> lockdep if lockdep is configured, and there are questions over lockdep.
> 
> There's some things which can be done to reduce the lockdep exposure
> to it, such as ensuring that rcu_read_lock() is first called outside
> of FIQ context.
> 
> There's concerns with whether either printk() in check_flags() could
> be reached too (flags there should always indicate that IRQs were
> disabled, so that reduces down to a question about just the first
> printk() there.)
> 
> There's also the very_verbose() stuff for RCU lockdep classes which
> Paul says must not be enabled.
> 
> Lastly, Paul isn't a lockdep expert, but he sees nothing that prevents
> lockdep doing the deadlock checking as a result of the above call.
> 
> So... this coupled with my feeling that notifiers make it too easy for
> unreviewed code to be hooked into this path, I'm fairly sure that we
> don't want to be calling atomic notifier chains from FIQ context.

Having esablished (elsewhere in the thread) that RCU usage is safe
from FIQ I have been working on the assumption that your feeling
regarding unreviewed code is sufficient on its own to avoid using
notifiers (and also to avoid a list of function pointers like on x86).

Therefore I have made the changes requested and produced a
before/after patch to show the impact of this. I will merge this
into the FIQ patchset shortly (I need to run a few more build tests
first).

Personally I still favour using notifiers and think the coupling below is
excessive. Nevertheless I've run a couple of basic tests on the code
below and it works fine.

Comments

Paul E. McKenney Sept. 2, 2014, 2:23 p.m. UTC | #1
On Tue, Sep 02, 2014 at 12:49:16PM +0100, Daniel Thompson wrote:
> On 28/08/14 16:01, Russell King - ARM Linux wrote:
> > On Tue, Aug 19, 2014 at 07:12:07PM +0100, Daniel Thompson wrote:
> >> On 19/08/14 18:37, Russell King - ARM Linux wrote:
> >>> On Tue, Aug 19, 2014 at 05:45:53PM +0100, Daniel Thompson wrote:
> >>>> +int register_fiq_nmi_notifier(struct notifier_block *nb)
> >>>> +{
> >>>> +	return atomic_notifier_chain_register(&fiq_nmi_chain, nb);
> >>>> +}
> >>>> +
> >>>> +asmlinkage void __exception_irq_entry fiq_nmi_handler(struct pt_regs *regs)
> >>>> +{
> >>>> +	struct pt_regs *old_regs = set_irq_regs(regs);
> >>>> +
> >>>> +	nmi_enter();
> >>>> +	atomic_notifier_call_chain(&fiq_nmi_chain, (unsigned long)regs, NULL);
> >>>> +	nmi_exit();
> >>>> +	set_irq_regs(old_regs);
> >>>> +}
> >>>
> >>> Really not happy with this.  What happens if a FIQ occurs while we're
> >>> inside register_fiq_nmi_notifier() - more specifically inside
> >>> atomic_notifier_chain_register() ?
> >>
> >> Should depend on which side of the rcu update we're on.
> > 
> > I just asked Paul McKenney, our RCU expert... essentially, yes, RCU
> > stuff itself is safe in this context.  However, RCU stuff can call into
> > lockdep if lockdep is configured, and there are questions over lockdep.
> > 
> > There's some things which can be done to reduce the lockdep exposure
> > to it, such as ensuring that rcu_read_lock() is first called outside
> > of FIQ context.
> > 
> > There's concerns with whether either printk() in check_flags() could
> > be reached too (flags there should always indicate that IRQs were
> > disabled, so that reduces down to a question about just the first
> > printk() there.)
> > 
> > There's also the very_verbose() stuff for RCU lockdep classes which
> > Paul says must not be enabled.
> > 
> > Lastly, Paul isn't a lockdep expert, but he sees nothing that prevents
> > lockdep doing the deadlock checking as a result of the above call.
> > 
> > So... this coupled with my feeling that notifiers make it too easy for
> > unreviewed code to be hooked into this path, I'm fairly sure that we
> > don't want to be calling atomic notifier chains from FIQ context.
> 
> Having esablished (elsewhere in the thread) that RCU usage is safe
> from FIQ I have been working on the assumption that your feeling
> regarding unreviewed code is sufficient on its own to avoid using
> notifiers (and also to avoid a list of function pointers like on x86).

There was a later clarification from a lockdep expert showing that the
code was in fact safe, so the notifier approach should be just fine.

							Thanx, Paul

> Therefore I have made the changes requested and produced a
> before/after patch to show the impact of this. I will merge this
> into the FIQ patchset shortly (I need to run a few more build tests
> first).
> 
> Personally I still favour using notifiers and think the coupling below is
> excessive. Nevertheless I've run a couple of basic tests on the code
> below and it works fine.
> 
> 
> diff --git a/arch/arm/include/asm/fiq.h b/arch/arm/include/asm/fiq.h
> index 175bfed..a25c952 100644
> --- a/arch/arm/include/asm/fiq.h
> +++ b/arch/arm/include/asm/fiq.h
> @@ -54,7 +54,6 @@ extern void disable_fiq(int fiq);
>  extern int ack_fiq(int fiq);
>  extern void eoi_fiq(int fiq);
>  extern bool has_fiq(int fiq);
> -extern int register_fiq_nmi_notifier(struct notifier_block *nb);
>  extern void fiq_register_mapping(int irq, struct fiq_chip *chip);
> 
>  /* helpers defined in fiqasm.S: */
> diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h
> index 6563da0..cb5ccd6 100644
> --- a/arch/arm/include/asm/kgdb.h
> +++ b/arch/arm/include/asm/kgdb.h
> @@ -51,6 +51,7 @@ extern void kgdb_handle_bus_error(void);
>  extern int kgdb_fault_expected;
> 
>  extern int kgdb_register_fiq(unsigned int fiq);
> +extern void kgdb_handle_fiq(struct pt_regs *regs);
> 
>  #endif /* !__ASSEMBLY__ */
> 
> diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
> index b2bd1c7..7422b58 100644
> --- a/arch/arm/kernel/fiq.c
> +++ b/arch/arm/kernel/fiq.c
> @@ -43,12 +43,14 @@
>  #include <linux/irq.h>
>  #include <linux/radix-tree.h>
>  #include <linux/slab.h>
> +#include <linux/irqchip/arm-gic.h>
> 
>  #include <asm/cacheflush.h>
>  #include <asm/cp15.h>
>  #include <asm/exception.h>
>  #include <asm/fiq.h>
>  #include <asm/irq.h>
> +#include <asm/kgdb.h>
>  #include <asm/traps.h>
> 
>  #define FIQ_OFFSET ({					\
> @@ -65,7 +67,6 @@ static unsigned long no_fiq_insn;
>  static int fiq_start = -1;
>  static RADIX_TREE(fiq_data_tree, GFP_KERNEL);
>  static DEFINE_MUTEX(fiq_data_mutex);
> -static ATOMIC_NOTIFIER_HEAD(fiq_nmi_chain);
> 
>  /* Default reacquire function
>   * - we always relinquish FIQ control
> @@ -218,17 +219,23 @@ bool has_fiq(int fiq)
>  }
>  EXPORT_SYMBOL(has_fiq);
> 
> -int register_fiq_nmi_notifier(struct notifier_block *nb)
> -{
> -	return atomic_notifier_chain_register(&fiq_nmi_chain, nb);
> -}
> -
>  asmlinkage void __exception_irq_entry fiq_nmi_handler(struct pt_regs *regs)
>  {
>  	struct pt_regs *old_regs = set_irq_regs(regs);
> 
>  	nmi_enter();
> -	atomic_notifier_call_chain(&fiq_nmi_chain, (unsigned long)regs, NULL);
> +
> +	/* these callbacks deliberately avoid using a notifier chain in
> +	 * order to ensure code review happens (drivers cannot "secretly"
> +	 * employ FIQ without modifying this chain of calls).
> +	 */
> +#ifdef CONFIG_KGDB_FIQ
> +	kgdb_handle_fiq(regs);
> +#endif
> +#ifdef CONFIG_ARM_GIC
> +	gic_handle_fiq_ipi();
> +#endif
> +
>  	nmi_exit();
>  	set_irq_regs(old_regs);
>  }
> diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
> index b77b885..630a3ef 100644
> --- a/arch/arm/kernel/kgdb.c
> +++ b/arch/arm/kernel/kgdb.c
> @@ -312,12 +312,13 @@ struct kgdb_arch arch_kgdb_ops = {
>  };
> 
>  #ifdef CONFIG_KGDB_FIQ
> -static int kgdb_handle_fiq(struct notifier_block *nb, unsigned long arg,
> -			   void *data)
> +void kgdb_handle_fiq(struct pt_regs *regs)
>  {
> -	struct pt_regs *regs = (void *) arg;
>  	int actual;
> 
> +	if (!kgdb_fiq)
> +		return;
> +
>  	if (!kgdb_nmicallback(raw_smp_processor_id(), regs))
>  		return NOTIFY_OK;
> 
> @@ -333,11 +334,6 @@ static int kgdb_handle_fiq(struct notifier_block *nb, unsigned long arg,
>  	return NOTIFY_OK;
>  }
> 
> -static struct notifier_block kgdb_fiq_notifier = {
> -	.notifier_call = kgdb_handle_fiq,
> -	.priority = 100,
> -};
> -
>  int kgdb_register_fiq(unsigned int fiq)
>  {
>  	static struct fiq_handler kgdb_fiq_desc = { .name = "kgdb", };
> @@ -357,7 +353,6 @@ int kgdb_register_fiq(unsigned int fiq)
>  	}
> 
>  	kgdb_fiq = fiq;
> -	register_fiq_nmi_notifier(&kgdb_fiq_notifier);
> 
>  	return 0;
>  }
> diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
> index bda5a91..8821160 100644
> --- a/drivers/irqchip/irq-gic.c
> +++ b/drivers/irqchip/irq-gic.c
> @@ -502,13 +502,17 @@ static void __init gic_init_fiq(struct gic_chip_data *gic,
>  /*
>   * Fully acknowledge (both ack and eoi) a FIQ-based IPI
>   */
> -static int gic_handle_fiq_ipi(struct notifier_block *nb, unsigned long regs,
> -			   void *data)
> +void gic_handle_fiq_ipi(void)
>  {
>  	struct gic_chip_data *gic = &gic_data[0];
> -	void __iomem *cpu_base = gic_data_cpu_base(gic);
> +	void __iomem *cpu_base;
>  	unsigned long irqstat, irqnr;
> 
> +	if (!gic || !gic->fiq_enable)
> +		return;
> +
> +	cpu_base = gic_data_cpu_base(gic);
> +
>  	if (WARN_ON(!in_nmi()))
>  		return NOTIFY_BAD;
> 
> @@ -525,13 +529,6 @@ static int gic_handle_fiq_ipi(struct notifier_block *nb, unsigned long regs,
> 
>  	return NOTIFY_OK;
>  }
> -
> -/*
> - * Notifier to ensure IPI FIQ is acknowledged correctly.
> - */
> -static struct notifier_block gic_fiq_ipi_notifier = {
> -	.notifier_call = gic_handle_fiq_ipi,
> -};
>  #else /* CONFIG_FIQ */
>  static inline void gic_set_group_irq(void __iomem *base, unsigned int hwirq,
>  				     int group) {}
> @@ -1250,10 +1247,6 @@ void __init gic_init_bases(unsigned int gic_nr, int irq_start,
>  #ifdef CONFIG_SMP
>  		set_smp_cross_call(gic_raise_softirq);
>  		register_cpu_notifier(&gic_cpu_notifier);
> -#ifdef CONFIG_FIQ
> -		if (gic_data_fiq_enable(gic))
> -			register_fiq_nmi_notifier(&gic_fiq_ipi_notifier);
> -#endif
>  #endif
>  		set_handle_irq(gic_handle_irq);
>  	}
> diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
> index 45e2d8c..52a5676 100644
> --- a/include/linux/irqchip/arm-gic.h
> +++ b/include/linux/irqchip/arm-gic.h
> @@ -101,5 +101,8 @@ static inline void __init register_routable_domain_ops
>  {
>  	gic_routable_irq_domain_ops = ops;
>  }
> +
> +void gic_handle_fiq_ipi(void);
> +
>  #endif /* __ASSEMBLY */
>  #endif
> 
>
Russell King - ARM Linux Sept. 2, 2014, 4:42 p.m. UTC | #2
On Tue, Sep 02, 2014 at 12:49:16PM +0100, Daniel Thompson wrote:
> On 28/08/14 16:01, Russell King - ARM Linux wrote:
> > I just asked Paul McKenney, our RCU expert... essentially, yes, RCU
> > stuff itself is safe in this context.  However, RCU stuff can call into
> > lockdep if lockdep is configured, and there are questions over lockdep.
> > 
> > There's some things which can be done to reduce the lockdep exposure
> > to it, such as ensuring that rcu_read_lock() is first called outside
> > of FIQ context.
> > 
> > There's concerns with whether either printk() in check_flags() could
> > be reached too (flags there should always indicate that IRQs were
> > disabled, so that reduces down to a question about just the first
> > printk() there.)
> > 
> > There's also the very_verbose() stuff for RCU lockdep classes which
> > Paul says must not be enabled.
> > 
> > Lastly, Paul isn't a lockdep expert, but he sees nothing that prevents
> > lockdep doing the deadlock checking as a result of the above call.
> > 
> > So... this coupled with my feeling that notifiers make it too easy for
> > unreviewed code to be hooked into this path, I'm fairly sure that we
> > don't want to be calling atomic notifier chains from FIQ context.
> 
> Having esablished (elsewhere in the thread) that RCU usage is safe
> from FIQ I have been working on the assumption that your feeling
> regarding unreviewed code is sufficient on its own to avoid using
> notifiers (and also to avoid a list of function pointers like on x86).

Yes, it does, because unlike the x86 community, we have a wide range
of platforms, and platform code does not go through the same path or
get the same review as core ARM code.

As I already pointed out, with a notifier, it's very easy to sneak
something into the FIQ path by submitting a patch for platform code
which calls the registration function.  That's going to be pretty
difficult to spot amongst the 3000+ messages on the linux-arm-kernel
list each month in order to give it the review that it would need.
That's especially true as I now ignore almost all most platform
code patches as we have Arnd and Olof to look at that.

So, unless you can come up with a proposal which ensures that there
is sufficient review triggered when someone decides to call the
notifier registration function...

How about something like:

static const char *allowable_callers[] = {
	...
};

	snprintf(caller, sizeof(caller), "%pf", __builtin_return_address(0));

	for (i = 0; i < ARRAY_SIZE(allowable_callers); i++)
		if (!strcmp(caller, allowable_callers[i]))
			break;

	if (i == ARRAY_SIZE(allowable_callers)) {
		printk(KERN_ERR "%s is not permitted to register a FIQ notifer\n",
			caller);
		return;
	}

This gives us the advantage of using the notifier, but also gives us the
requirement that the file has to be modified to permit new registrations,
thereby triggering the closer review.

The other question I have is that if we permit kgdb and nmi tracing with
this mechanism, how do the hooked callers distinguish between these
different purposes?  I don't see how that works with your notifier
setup.
Daniel Thompson Sept. 3, 2014, 10:21 a.m. UTC | #3
On 02/09/14 17:42, Russell King - ARM Linux wrote:
> On Tue, Sep 02, 2014 at 12:49:16PM +0100, Daniel Thompson wrote:
>> On 28/08/14 16:01, Russell King - ARM Linux wrote:
>>> I just asked Paul McKenney, our RCU expert... essentially, yes, RCU
>>> stuff itself is safe in this context.  However, RCU stuff can call into
>>> lockdep if lockdep is configured, and there are questions over lockdep.
>>>
>>> There's some things which can be done to reduce the lockdep exposure
>>> to it, such as ensuring that rcu_read_lock() is first called outside
>>> of FIQ context.
>>>
>>> There's concerns with whether either printk() in check_flags() could
>>> be reached too (flags there should always indicate that IRQs were
>>> disabled, so that reduces down to a question about just the first
>>> printk() there.)
>>>
>>> There's also the very_verbose() stuff for RCU lockdep classes which
>>> Paul says must not be enabled.
>>>
>>> Lastly, Paul isn't a lockdep expert, but he sees nothing that prevents
>>> lockdep doing the deadlock checking as a result of the above call.
>>>
>>> So... this coupled with my feeling that notifiers make it too easy for
>>> unreviewed code to be hooked into this path, I'm fairly sure that we
>>> don't want to be calling atomic notifier chains from FIQ context.
>>
>> Having esablished (elsewhere in the thread) that RCU usage is safe
>> from FIQ I have been working on the assumption that your feeling
>> regarding unreviewed code is sufficient on its own to avoid using
>> notifiers (and also to avoid a list of function pointers like on x86).
> 
> Yes, it does, because unlike the x86 community, we have a wide range
> of platforms, and platform code does not go through the same path or
> get the same review as core ARM code.
> 
> As I already pointed out, with a notifier, it's very easy to sneak
> something into the FIQ path by submitting a patch for platform code
> which calls the registration function.  That's going to be pretty
> difficult to spot amongst the 3000+ messages on the linux-arm-kernel
> list each month in order to give it the review that it would need.
> That's especially true as I now ignore almost all most platform
> code patches as we have Arnd and Olof to look at that.
> 
> So, unless you can come up with a proposal which ensures that there
> is sufficient review triggered when someone decides to call the
> notifier registration function...

Reflecting upon this and upon Thomas' comments about only using FIQ for
watchdog, backtrace and performance monitoring...

The short version is, "I was wrong and should have done what you said in
the first place".

The long version adds, "because the coupling concerns were spurious; the
only proposed users of the default FIQ handler outside of core ARM code,
are ARM-centric irqchip drivers."


> How about something like:
> 
> static const char *allowable_callers[] = {
> 	...
> };
> 
> 	snprintf(caller, sizeof(caller), "%pf", __builtin_return_address(0));
> 
> 	for (i = 0; i < ARRAY_SIZE(allowable_callers); i++)
> 		if (!strcmp(caller, allowable_callers[i]))
> 			break;
> 
> 	if (i == ARRAY_SIZE(allowable_callers)) {
> 		printk(KERN_ERR "%s is not permitted to register a FIQ notifer\n",
> 			caller);
> 		return;
> 	}
> 
> This gives us the advantage of using the notifier, but also gives us the
> requirement that the file has to be modified to permit new registrations,
> thereby triggering the closer review.

Cool trick. However since I'm wrong...


> The other question I have is that if we permit kgdb and nmi tracing with
> this mechanism, how do the hooked callers distinguish between these
> different purposes?  I don't see how that works with your notifier
> setup.

The notifier as found in the existing patchs allowed the sharing only of
IPI FIQ (for example between stack dump code and kgdb).

This worked due to the way IPIs can be automatically EOIed and, because
they are software generated, software can use flags. In fact the kgdb
handler already uses this to discriminate between IPI FIQ and UART
interrupts (although the flag is set kgdb core logic rather than in the
ARM code).

For SPIs it's safety depended upon drivers using claim_fiq() to
arbitrate. There was no enforcement mechanism to prevent abuse.
Russell King - ARM Linux Sept. 3, 2014, 7:34 p.m. UTC | #4
On Wed, Sep 03, 2014 at 11:21:30AM +0100, Daniel Thompson wrote:
> On 02/09/14 17:42, Russell King - ARM Linux wrote:
> > Yes, it does, because unlike the x86 community, we have a wide range
> > of platforms, and platform code does not go through the same path or
> > get the same review as core ARM code.
> > 
> > As I already pointed out, with a notifier, it's very easy to sneak
> > something into the FIQ path by submitting a patch for platform code
> > which calls the registration function.  That's going to be pretty
> > difficult to spot amongst the 3000+ messages on the linux-arm-kernel
> > list each month in order to give it the review that it would need.
> > That's especially true as I now ignore almost all most platform
> > code patches as we have Arnd and Olof to look at that.
> > 
> > So, unless you can come up with a proposal which ensures that there
> > is sufficient review triggered when someone decides to call the
> > notifier registration function...
> 
> Reflecting upon this and upon Thomas' comments about only using FIQ for
> watchdog, backtrace and performance monitoring...
> 
> The short version is, "I was wrong and should have done what you said in
> the first place".
> 
> The long version adds, "because the coupling concerns were spurious; the
> only proposed users of the default FIQ handler outside of core ARM code,
> are ARM-centric irqchip drivers."

I would say that the ARM specific changes to entry-armv.S and setup.c
are correct.  All that you're doing there is to replace the existing
default no-op FIQ handler with some additional code which gets us into
SVC mode and back out, but itself is also a no-op.  In other words, no
real change.

That's a good first patch, and one which I would actually like to have
in my tree sooner rather than later, so that I can split that out from
my prototype code.

I can also split out from it the ARM generic changes for implementing
the FIQ dumping too, which gives us a second patch.  With a bit of
additional work, much of that should actually be generic code, not
ARM or x86 specific code.  That's going to annoy x86 people a little
because some of that is being reworked...

That will leave the problem of how to deal with the IRQ controller
specifics, and how to properly wire it together with the IRQ controller
in the loop - that is where Thomas' concerns are focused.
Daniel Thompson Sept. 4, 2014, 9:09 a.m. UTC | #5
On 03/09/14 20:34, Russell King - ARM Linux wrote:
> On Wed, Sep 03, 2014 at 11:21:30AM +0100, Daniel Thompson wrote:
>> On 02/09/14 17:42, Russell King - ARM Linux wrote:
>>> Yes, it does, because unlike the x86 community, we have a wide range
>>> of platforms, and platform code does not go through the same path or
>>> get the same review as core ARM code.
>>>
>>> As I already pointed out, with a notifier, it's very easy to sneak
>>> something into the FIQ path by submitting a patch for platform code
>>> which calls the registration function.  That's going to be pretty
>>> difficult to spot amongst the 3000+ messages on the linux-arm-kernel
>>> list each month in order to give it the review that it would need.
>>> That's especially true as I now ignore almost all most platform
>>> code patches as we have Arnd and Olof to look at that.
>>>
>>> So, unless you can come up with a proposal which ensures that there
>>> is sufficient review triggered when someone decides to call the
>>> notifier registration function...
>>
>> Reflecting upon this and upon Thomas' comments about only using FIQ for
>> watchdog, backtrace and performance monitoring...
>>
>> The short version is, "I was wrong and should have done what you said in
>> the first place".
>>
>> The long version adds, "because the coupling concerns were spurious; the
>> only proposed users of the default FIQ handler outside of core ARM code,
>> are ARM-centric irqchip drivers."
> 
> I would say that the ARM specific changes to entry-armv.S and setup.c
> are correct.  All that you're doing there is to replace the existing
> default no-op FIQ handler with some additional code which gets us into
> SVC mode and back out, but itself is also a no-op.  In other words, no
> real change.
> 
> That's a good first patch, and one which I would actually like to have
> in my tree sooner rather than later, so that I can split that out from
> my prototype code.

So would I!

I did some rebasing yesterday to put anything to do with kgdb right at
the back of the queue. This "good first patch" is now actually the first
patch; where the nofifier used to be it currently calls do_unexp_fiq()
making it very close to "no real change".

BTW do_unexp_fiq() calls printk() but

> I can also split out from it the ARM generic changes for implementing
> the FIQ dumping too, which gives us a second patch.  With a bit of
> additional work, much of that should actually be generic code, not
> ARM or x86 specific code.  That's going to annoy x86 people a little
> because some of that is being reworked...

So far I have been testing the action after patch review using the big
kgdb patches on top of it.

Today I plan to remove the kgdb stuff, drop in your version of
arch_trigger_all_cpu_backtrace() and test the results. I was going to
use the code you shared on 13 August meaning all the cpu mask bit
manipulation is in the arm-only code.

If you want me to work with something more recent then feel free to
point me at it...


> That will leave the problem of how to deal with the IRQ controller
> specifics, and how to properly wire it together with the IRQ controller
> in the loop - that is where Thomas' concerns are focused.

I'm working on those and its looking pretty good so far. This is mostly
because SGIs don't need to allocate virqs so the controversial bits of
my last patchset disappear completely.
Russell King - ARM Linux Sept. 4, 2014, 9:45 a.m. UTC | #6
On Thu, Sep 04, 2014 at 10:09:20AM +0100, Daniel Thompson wrote:
> On 03/09/14 20:34, Russell King - ARM Linux wrote:
> > I would say that the ARM specific changes to entry-armv.S and setup.c
> > are correct.  All that you're doing there is to replace the existing
> > default no-op FIQ handler with some additional code which gets us into
> > SVC mode and back out, but itself is also a no-op.  In other words, no
> > real change.
> > 
> > That's a good first patch, and one which I would actually like to have
> > in my tree sooner rather than later, so that I can split that out from
> > my prototype code.
> 
> So would I!
> 
> I did some rebasing yesterday to put anything to do with kgdb right at
> the back of the queue. This "good first patch" is now actually the first
> patch; where the nofifier used to be it currently calls do_unexp_fiq()
> making it very close to "no real change".
> 
> BTW do_unexp_fiq() calls printk() but

You're making the assumption that something called do_unexp_fiq() before
your patches.  It seems that that function is dead code, and now that
you've pointed that out, I will kill this function.

The current situation is that if the CPU receives a FIQ, it enters the
FIQ vector, which contains an immediate return instruction.

> If you want me to work with something more recent then feel free to
> point me at it...

I'll post some of that stuff later today, probably this evening.
Daniel Thompson Sept. 4, 2014, 10:04 a.m. UTC | #7
On 04/09/14 10:45, Russell King - ARM Linux wrote:
> On Thu, Sep 04, 2014 at 10:09:20AM +0100, Daniel Thompson wrote:
>> On 03/09/14 20:34, Russell King - ARM Linux wrote:
>>> I would say that the ARM specific changes to entry-armv.S and setup.c
>>> are correct.  All that you're doing there is to replace the existing
>>> default no-op FIQ handler with some additional code which gets us into
>>> SVC mode and back out, but itself is also a no-op.  In other words, no
>>> real change.
>>>
>>> That's a good first patch, and one which I would actually like to have
>>> in my tree sooner rather than later, so that I can split that out from
>>> my prototype code.
>>
>> So would I!
>>
>> I did some rebasing yesterday to put anything to do with kgdb right at
>> the back of the queue. This "good first patch" is now actually the first
>> patch; where the nofifier used to be it currently calls do_unexp_fiq()
>> making it very close to "no real change".
>>
>> BTW do_unexp_fiq() calls printk() but
> 
> You're making the assumption that something called do_unexp_fiq() before
> your patches.  It seems that that function is dead code, and now that
> you've pointed that out, I will kill this function.
>
> The current situation is that if the CPU receives a FIQ, it enters the
> FIQ vector, which contains an immediate return instruction.

Actually it was the comment above the return instruction in the original
code about getting a message out that made me try to reconnect
do_unexp_fiq().

printk() has reasonably strong defences against being called twice by
the same CPU in that we ought to avoid deadlock if the current CPU owns
the printk locks (although the message would be dropped). I therefore
thought do_unexp_fiq() was a best-effort attempt to inform the user.

That said, assuming the FIQ re-enters, then without a rate limiter the
message will be rather insistent.

For now I will remove calls to do_unexp_fiq().


>> If you want me to work with something more recent then feel free to
>> point me at it...
> 
> I'll post some of that stuff later today, probably this evening.

Hopefully so shall I.
diff mbox

Patch

diff --git a/arch/arm/include/asm/fiq.h b/arch/arm/include/asm/fiq.h
index 175bfed..a25c952 100644
--- a/arch/arm/include/asm/fiq.h
+++ b/arch/arm/include/asm/fiq.h
@@ -54,7 +54,6 @@  extern void disable_fiq(int fiq);
 extern int ack_fiq(int fiq);
 extern void eoi_fiq(int fiq);
 extern bool has_fiq(int fiq);
-extern int register_fiq_nmi_notifier(struct notifier_block *nb);
 extern void fiq_register_mapping(int irq, struct fiq_chip *chip);
 
 /* helpers defined in fiqasm.S: */
diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h
index 6563da0..cb5ccd6 100644
--- a/arch/arm/include/asm/kgdb.h
+++ b/arch/arm/include/asm/kgdb.h
@@ -51,6 +51,7 @@  extern void kgdb_handle_bus_error(void);
 extern int kgdb_fault_expected;
 
 extern int kgdb_register_fiq(unsigned int fiq);
+extern void kgdb_handle_fiq(struct pt_regs *regs);
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c
index b2bd1c7..7422b58 100644
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -43,12 +43,14 @@ 
 #include <linux/irq.h>
 #include <linux/radix-tree.h>
 #include <linux/slab.h>
+#include <linux/irqchip/arm-gic.h>
 
 #include <asm/cacheflush.h>
 #include <asm/cp15.h>
 #include <asm/exception.h>
 #include <asm/fiq.h>
 #include <asm/irq.h>
+#include <asm/kgdb.h>
 #include <asm/traps.h>
 
 #define FIQ_OFFSET ({					\
@@ -65,7 +67,6 @@  static unsigned long no_fiq_insn;
 static int fiq_start = -1;
 static RADIX_TREE(fiq_data_tree, GFP_KERNEL);
 static DEFINE_MUTEX(fiq_data_mutex);
-static ATOMIC_NOTIFIER_HEAD(fiq_nmi_chain);
 
 /* Default reacquire function
  * - we always relinquish FIQ control
@@ -218,17 +219,23 @@  bool has_fiq(int fiq)
 }
 EXPORT_SYMBOL(has_fiq);
 
-int register_fiq_nmi_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_register(&fiq_nmi_chain, nb);
-}
-
 asmlinkage void __exception_irq_entry fiq_nmi_handler(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
 	nmi_enter();
-	atomic_notifier_call_chain(&fiq_nmi_chain, (unsigned long)regs, NULL);
+
+	/* these callbacks deliberately avoid using a notifier chain in
+	 * order to ensure code review happens (drivers cannot "secretly"
+	 * employ FIQ without modifying this chain of calls).
+	 */
+#ifdef CONFIG_KGDB_FIQ
+	kgdb_handle_fiq(regs);
+#endif
+#ifdef CONFIG_ARM_GIC
+	gic_handle_fiq_ipi();
+#endif
+
 	nmi_exit();
 	set_irq_regs(old_regs);
 }
diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index b77b885..630a3ef 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -312,12 +312,13 @@  struct kgdb_arch arch_kgdb_ops = {
 };
 
 #ifdef CONFIG_KGDB_FIQ
-static int kgdb_handle_fiq(struct notifier_block *nb, unsigned long arg,
-			   void *data)
+void kgdb_handle_fiq(struct pt_regs *regs)
 {
-	struct pt_regs *regs = (void *) arg;
 	int actual;
 
+	if (!kgdb_fiq)
+		return;
+
 	if (!kgdb_nmicallback(raw_smp_processor_id(), regs))
 		return NOTIFY_OK;
 
@@ -333,11 +334,6 @@  static int kgdb_handle_fiq(struct notifier_block *nb, unsigned long arg,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block kgdb_fiq_notifier = {
-	.notifier_call = kgdb_handle_fiq,
-	.priority = 100,
-};
-
 int kgdb_register_fiq(unsigned int fiq)
 {
 	static struct fiq_handler kgdb_fiq_desc = { .name = "kgdb", };
@@ -357,7 +353,6 @@  int kgdb_register_fiq(unsigned int fiq)
 	}
 
 	kgdb_fiq = fiq;
-	register_fiq_nmi_notifier(&kgdb_fiq_notifier);
 
 	return 0;
 }
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index bda5a91..8821160 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -502,13 +502,17 @@  static void __init gic_init_fiq(struct gic_chip_data *gic,
 /*
  * Fully acknowledge (both ack and eoi) a FIQ-based IPI
  */
-static int gic_handle_fiq_ipi(struct notifier_block *nb, unsigned long regs,
-			   void *data)
+void gic_handle_fiq_ipi(void)
 {
 	struct gic_chip_data *gic = &gic_data[0];
-	void __iomem *cpu_base = gic_data_cpu_base(gic);
+	void __iomem *cpu_base;
 	unsigned long irqstat, irqnr;
 
+	if (!gic || !gic->fiq_enable)
+		return;
+
+	cpu_base = gic_data_cpu_base(gic);
+
 	if (WARN_ON(!in_nmi()))
 		return NOTIFY_BAD;
 
@@ -525,13 +529,6 @@  static int gic_handle_fiq_ipi(struct notifier_block *nb, unsigned long regs,
 
 	return NOTIFY_OK;
 }
-
-/*
- * Notifier to ensure IPI FIQ is acknowledged correctly.
- */
-static struct notifier_block gic_fiq_ipi_notifier = {
-	.notifier_call = gic_handle_fiq_ipi,
-};
 #else /* CONFIG_FIQ */
 static inline void gic_set_group_irq(void __iomem *base, unsigned int hwirq,
 				     int group) {}
@@ -1250,10 +1247,6 @@  void __init gic_init_bases(unsigned int gic_nr, int irq_start,
 #ifdef CONFIG_SMP
 		set_smp_cross_call(gic_raise_softirq);
 		register_cpu_notifier(&gic_cpu_notifier);
-#ifdef CONFIG_FIQ
-		if (gic_data_fiq_enable(gic))
-			register_fiq_nmi_notifier(&gic_fiq_ipi_notifier);
-#endif
 #endif
 		set_handle_irq(gic_handle_irq);
 	}
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 45e2d8c..52a5676 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -101,5 +101,8 @@  static inline void __init register_routable_domain_ops
 {
 	gic_routable_irq_domain_ops = ops;
 }
+
+void gic_handle_fiq_ipi(void);
+
 #endif /* __ASSEMBLY */
 #endif