diff mbox

[v8,4/4] ARM: Add KGDB/KDB FIQ debugger generic code

Message ID 1404979427-12943-5-git-send-email-daniel.thompson@linaro.org
State New
Headers show

Commit Message

Daniel Thompson July 10, 2014, 8:03 a.m. UTC
From: Anton Vorontsov <anton.vorontsov@linaro.org>

The FIQ debugger may be used to debug situations when the kernel stuck
in uninterruptable sections, e.g. the kernel infinitely loops or
deadlocked in an interrupt or with interrupts disabled.

By default KGDB FIQ is disabled in runtime, but can be enabled with
kgdb_fiq.enable=1 kernel command line option.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
[daniel.thompson@linaro.org: Added __fiq_abt, rewrote stack init]
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Acked-by: Nicolas Pitre <nico@linaro.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Ben Dooks <ben.dooks@codethink.co.uk>
Cc: Dave Martin <Dave.Martin@arm.com>
---
 arch/arm/Kconfig                 |   2 +
 arch/arm/Kconfig.debug           |  18 ++++++
 arch/arm/include/asm/kgdb.h      |   7 +++
 arch/arm/kernel/Makefile         |   1 +
 arch/arm/kernel/kgdb_fiq.c       | 132 +++++++++++++++++++++++++++++++++++++++
 arch/arm/kernel/kgdb_fiq_entry.S | 130 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 290 insertions(+)
 create mode 100644 arch/arm/kernel/kgdb_fiq.c
 create mode 100644 arch/arm/kernel/kgdb_fiq_entry.S

Comments

Russell King - ARM Linux Aug. 13, 2014, 9:45 p.m. UTC | #1
On Thu, Jul 10, 2014 at 09:03:47AM +0100, Daniel Thompson wrote:
> From: Anton Vorontsov <anton.vorontsov@linaro.org>
> 
> The FIQ debugger may be used to debug situations when the kernel stuck
> in uninterruptable sections, e.g. the kernel infinitely loops or
> deadlocked in an interrupt or with interrupts disabled.
> 
> By default KGDB FIQ is disabled in runtime, but can be enabled with
> kgdb_fiq.enable=1 kernel command line option.

I know you've been around the loop on this patch set quite a number of
times.  However, there are two issues.  The first is a simple concern,
the second is more a design decision...

I've recently been hitting a problem on iMX6Q with a irqs-off deadlock
on CPU0 (somehow, it always hit CPU0 every time I tested.)  This wasn't
particularly good as it prevented much in the way of diagnosis.

Of course, things like the spinlock lockup fired... but nothing could
give me a trace from CPU0.

On x86, they have this fixed by using the NMI to trigger a backtrace
on all CPUs when a RCU lockup or spinlock lockup occurs.  There's a
generic hook for this called arch_trigger_all_cpu_backtrace().

So, I set about using the contents of some of your patches to implement
this for ARM, and I came out with something which works.  In doing this,
I started wondering whether the default FIQ handler should not be just
"subs pc, lr, #4" but mostly your FIQ assembly code you have below.
This, along with your GIC patches to move all IRQs to group 1, then
gives us a way to send a FIQ IPI to CPUs in the system - and the FIQ
IPI could be caught and used to dump a backtrace.

Here's the changes I did for that, which are a tad hacky:

irq-gic.c - SGI 8 gets used to trigger a backtrace.  Note it must be
high priority too.

gic_cpu_init()
+       /*
+        * Set all PPI and SGI interrupts to be group 1.
+        *
+        * If grouping is not available (not implemented or prohibited by
+        * security mode) these registers are read-as-zero/write-ignored.
+        */
+       writel_relaxed(0xfffffeff, dist_base + GIC_DIST_IGROUP + 0);
+       writel_relaxed(0xa0a0a000, dist_base + GIC_DIST_PRI + 8);

gic_raise_softirq()
+       softirq = map << 16 | irq;
+       if (irq != 8)
+               softirq |= 0x8000;
+

arch/arm/kernel/smp.c:
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
...
+static void ipi_cpu_backtrace(struct pt_regs *regs)
+{
+       int cpu = smp_processor_id();
+
+       if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
+               static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+               arch_spin_lock(&lock);
+               printk(KERN_WARNING "FIQ backtrace for cpu %d\n", cpu);
+               show_regs(regs);
+               arch_spin_unlock(&lock);
+               cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+       }
+}
+
...
+void arch_trigger_all_cpu_backtrace(bool include_self)
+{
+       static unsigned long backtrace_flag;
+       int i, cpu = get_cpu();
+
+       if (test_and_set_bit(0, &backtrace_flag)) {
+               /*
+                * If there is already a trigger_all_cpu_backtrace() in progress
+                * (backtrace_flag == 1), don't output double cpu dump infos.
+                */
+               put_cpu();
+               return;
+       }
+
+       cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+       if (!include_self)
+               cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+
+       if (!cpumask_empty(to_cpumask(backtrace_mask))) {
+               pr_info("Sending FIQ to %s CPUs:\n",
+                       (include_self ? "all" : "other"));
+               smp_cross_call(to_cpumask(backtrace_mask), IPI_CPU_BACKTRACE);
+       }
+
+       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
+       for (i = 0; i < 10 * 1000; i++) {
+               if (cpumask_empty(to_cpumask(backtrace_mask)))
+                       break;
+
+               mdelay(1);
+       }
+
+       clear_bit(0, &backtrace_flag);
+       smp_mb__after_atomic();
+       put_cpu();
+}
+
+void __fiq_handle(struct pt_regs *regs)
+{
+       ipi_cpu_backtrace(regs);
+}

arch/arm/kernel/setup.c:
+static unsigned int fiq_stack[4][1024];
+
...
cpu_init()
-       "msr    cpsr_c, %7"
+       "msr    cpsr_c, %7\n\t"
+       "mov    sp, %8\n\t"
+       "msr    cpsr_c, %9"
...
+             PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
+             "r" (&fiq_stack[cpu][1024]),

The FIQ assembly code is basically the same as yours, but with:

+       .macro  fiq_handler
+       bl      __fiq_handle
+       .endm

and the code in svc_exit_via_fiq testing the PSR I flag and calling
trace_hardirqs_on removed.

This does have one deficiency, and that is it doesn't EOI the FIQ
interrupt - that's something which should be fixed, but for my
purposes to track down where the locked CPU was, it wasn't strictly
necessary that the system continued to work after this point.

This brings me to my second concern, and is the reason I decided not
to ask for it for this merge window.

Calling the trace_* functions is a no-no from FIQ code.
trace_hardirqs_on() can itself take locks, which can result in a
deadlock.

I thought I'd made it clear that FIQ code can't take locks because
there's no way of knowing what state they're in at the point that the
FIQ fires - _irq() variants won't save you - and that's kind of the
point of FIQ.  It's almost never masked by the kernel.

Now, You'll be forgiven if you now point out that in the code above,
I'm taking a spinlock.  That's absolutely true.  Analyse the code
a little closer and you'll notice it's done in a safe way.  There's
only one time where that spinlock is taken and that's from FIQ code,
never from any other code, and only once per CPU - notice how
arch_trigger_all_cpu_backtrace() protects itself against multiple
callers, and how ipi_cpu_backtrace() is careful to check that its
CPU bit is set.  This is exactly the same method which x86 code uses
(in fact, much of the above code was stolen from x86!)

So, how about moving the FIQ assembly code to entry-armv.S and making
it less kgdb specific?  (Though... we do want to keep a /very/ close
eye on users to ensure that they don't do silly stuff with locking.)
Daniel Thompson Aug. 14, 2014, 10:48 a.m. UTC | #2
On 13/08/14 22:45, Russell King - ARM Linux wrote:
> On Thu, Jul 10, 2014 at 09:03:47AM +0100, Daniel Thompson wrote:
>> From: Anton Vorontsov <anton.vorontsov@linaro.org>
>>
>> The FIQ debugger may be used to debug situations when the kernel stuck
>> in uninterruptable sections, e.g. the kernel infinitely loops or
>> deadlocked in an interrupt or with interrupts disabled.
>>
>> By default KGDB FIQ is disabled in runtime, but can be enabled with
>> kgdb_fiq.enable=1 kernel command line option.
> 
> I know you've been around the loop on this patch set quite a number of
> times.  However, there are two issues.  The first is a simple concern,
> the second is more a design decision...
> 
> I've recently been hitting a problem on iMX6Q with a irqs-off deadlock
> on CPU0 (somehow, it always hit CPU0 every time I tested.)  This wasn't
> particularly good as it prevented much in the way of diagnosis.
> 
> Of course, things like the spinlock lockup fired... but nothing could
> give me a trace from CPU0.
> 
> On x86, they have this fixed by using the NMI to trigger a backtrace
> on all CPUs when a RCU lockup or spinlock lockup occurs.  There's a
> generic hook for this called arch_trigger_all_cpu_backtrace().
> 
> So, I set about using the contents of some of your patches to implement
> this for ARM, and I came out with something which works.  In doing this,
> I started wondering whether the default FIQ handler should not be just
> "subs pc, lr, #4" but mostly your FIQ assembly code you have below.
> This, along with your GIC patches to move all IRQs to group 1, then
> gives us a way to send a FIQ IPI to CPUs in the system - and the FIQ
> IPI could be caught and used to dump a backtrace.
> 
> Here's the changes I did for that, which are a tad hacky:
> 
> irq-gic.c - SGI 8 gets used to trigger a backtrace.  Note it must be
> high priority too.
> 
> gic_cpu_init()
> +       /*
> +        * Set all PPI and SGI interrupts to be group 1.
> +        *
> +        * If grouping is not available (not implemented or prohibited by
> +        * security mode) these registers are read-as-zero/write-ignored.
> +        */
> +       writel_relaxed(0xfffffeff, dist_base + GIC_DIST_IGROUP + 0);
> +       writel_relaxed(0xa0a0a000, dist_base + GIC_DIST_PRI + 8);
> 
> gic_raise_softirq()
> +       softirq = map << 16 | irq;
> +       if (irq != 8)
> +               softirq |= 0x8000;
> +

Let me dig out some of my patches in this area.

I've added an IPI to be used to quiesce the CPUs for KGDB. My code is in
essence the same as yours but uses a bitmask for the IPIs that should be
delivered using FIQ (in fact I wrote that deliberately to leave space to
easily implement arch_trigger_all_cpu_backtrace() )

> arch/arm/kernel/smp.c:
> +/* For reliability, we're prepared to waste bits here. */
> +static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
> +
> ...
> +static void ipi_cpu_backtrace(struct pt_regs *regs)
> +{
> +       int cpu = smp_processor_id();
> +
> +       if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
> +               static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
> +
> +               arch_spin_lock(&lock);
> +               printk(KERN_WARNING "FIQ backtrace for cpu %d\n", cpu);
> +               show_regs(regs);
> +               arch_spin_unlock(&lock);
> +               cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
> +       }
> +}
> +
> ...
> +void arch_trigger_all_cpu_backtrace(bool include_self)
> +{
> +       static unsigned long backtrace_flag;
> +       int i, cpu = get_cpu();
> +
> +       if (test_and_set_bit(0, &backtrace_flag)) {
> +               /*
> +                * If there is already a trigger_all_cpu_backtrace() in progress
> +                * (backtrace_flag == 1), don't output double cpu dump infos.
> +                */
> +               put_cpu();
> +               return;
> +       }
> +
> +       cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
> +       if (!include_self)
> +               cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
> +
> +       if (!cpumask_empty(to_cpumask(backtrace_mask))) {
> +               pr_info("Sending FIQ to %s CPUs:\n",
> +                       (include_self ? "all" : "other"));
> +               smp_cross_call(to_cpumask(backtrace_mask), IPI_CPU_BACKTRACE);
> +       }
> +
> +       /* Wait for up to 10 seconds for all CPUs to do the backtrace */
> +       for (i = 0; i < 10 * 1000; i++) {
> +               if (cpumask_empty(to_cpumask(backtrace_mask)))
> +                       break;
> +
> +               mdelay(1);
> +       }
> +
> +       clear_bit(0, &backtrace_flag);
> +       smp_mb__after_atomic();
> +       put_cpu();
> +}
> +
> +void __fiq_handle(struct pt_regs *regs)
> +{
> +       ipi_cpu_backtrace(regs);
> +}
> 
> arch/arm/kernel/setup.c:
> +static unsigned int fiq_stack[4][1024];
> +
> ...
> cpu_init()
> -       "msr    cpsr_c, %7"
> +       "msr    cpsr_c, %7\n\t"
> +       "mov    sp, %8\n\t"
> +       "msr    cpsr_c, %9"
> ...
> +             PLC (PSR_F_BIT | PSR_I_BIT | FIQ_MODE),
> +             "r" (&fiq_stack[cpu][1024]),
> 
> The FIQ assembly code is basically the same as yours, but with:
> 
> +       .macro  fiq_handler
> +       bl      __fiq_handle
> +       .endm
> 
> and the code in svc_exit_via_fiq testing the PSR I flag and calling
> trace_hardirqs_on removed.
> 
> This does have one deficiency, and that is it doesn't EOI the FIQ
> interrupt - that's something which should be fixed, but for my
> purposes to track down where the locked CPU was, it wasn't strictly
> necessary that the system continued to work after this point.

EOIing IPIs should be pretty easy (it is safe to EOI them immediately
after the ack). This is also included in my existing IPI patches.


> This brings me to my second concern, and is the reason I decided not
> to ask for it for this merge window.
> 
> Calling the trace_* functions is a no-no from FIQ code.
> trace_hardirqs_on() can itself take locks, which can result in a
> deadlock.
> 
> I thought I'd made it clear that FIQ code can't take locks because
> there's no way of knowing what state they're in at the point that the
> FIQ fires - _irq() variants won't save you - and that's kind of the
> point of FIQ.  It's almost never masked by the kernel.

Don't worry, you certainly did make it clear!

I actually went as far as removing the trace_* calls before having a
change of heart. As a result I spent some time looking at the
trace_hardirqs_on() code and, as far as I remember, I couldn't find a
lock in the code that wasn't bypassed when in_nmi() returned true.

However the code is pretty complex and its certainly possible that I
missed something. Are you in a position to point any particular lock to
show I messed this up or are you working from memory?


> Now, You'll be forgiven if you now point out that in the code above,
> I'm taking a spinlock.  That's absolutely true.  Analyse the code
> a little closer and you'll notice it's done in a safe way.  There's
> only one time where that spinlock is taken and that's from FIQ code,
> never from any other code, and only once per CPU - notice how
> arch_trigger_all_cpu_backtrace() protects itself against multiple
> callers, and how ipi_cpu_backtrace() is careful to check that its
> CPU bit is set.  This is exactly the same method which x86 code uses
> (in fact, much of the above code was stolen from x86!)

Agreed. I also plan to use FIQ-safe locks in the GIC code to raise an
IPI. The b.L switcher code explicitly calls local_fiq_disable() so
providing the locks are contested only between the b.L switcher logic
and FIQ handlers it will be safe.

That said my original approach (again, I'll post it in a moment) was
pretty ugly which is why I'm so pleased with Stephen Boyd's recently
proposed change.


> So, how about moving the FIQ assembly code to entry-armv.S and making
> it less kgdb specific?  (Though... we do want to keep a /very/ close
> eye on users to ensure that they don't do silly stuff with locking.)

I think I can do that.

Something like this?

1. Install the current trap handler code by default (either with or
   without trace_* calls).

2. Have default trap handler call an RCU notifier chain to allow it to
   hook up with "normal" code without any hard coding (kgdb, IPI
   handling, etc)

3. Retain existing install_fiq() behaviour for people who want FIQ
   to be a fast-interrupt rather than an NMI (for example, the
   Raspberry pi USB optimizations).

4. Ensure default handler is an exported symbol to allow the meths
   drinkers from #3 to chain to logic for NMI/debug features
   if they want to.


Daniel.
Daniel Thompson Aug. 14, 2014, 11:15 a.m. UTC | #3
This is a work-in-progress set of patches to add support for
implementing IPIs using FIQ. Currently KGDB is the only user
of this feature although others could easily be added due to
the use of notifier chains.

Patches depend on the KGDB NMI/FIQ patch series. I'll send
a complete patchset for the KGDB NMI/FIQ shortly after 3.17rc1 comes
out.

--
1.9.3
Russell King - ARM Linux Aug. 14, 2014, 12:36 p.m. UTC | #4
On Thu, Aug 14, 2014 at 11:48:36AM +0100, Daniel Thompson wrote:
> Don't worry, you certainly did make it clear!
> 
> I actually went as far as removing the trace_* calls before having a
> change of heart. As a result I spent some time looking at the
> trace_hardirqs_on() code and, as far as I remember, I couldn't find a
> lock in the code that wasn't bypassed when in_nmi() returned true.
> 
> However the code is pretty complex and its certainly possible that I
> missed something. Are you in a position to point any particular lock to
> show I messed this up or are you working from memory?

The complexity alone is an argument against calling it from FIQ context,
because there's no guarantee that what's there will stay that way.

trace_hardirqs_on() is made more complex because there's not one function
with that name, but two.  One is in the trace code, the other is in the
lockdep code.  Here's the trace code, which has at least two problems:

trace_hardirqs_on
 -> stop_critical_timing
  -> check_critical_timing
   -> raw_spin_lock_irqsave(&max_trace_lock, flags);
   -> __trace_stack
    -> __ftrace_trace_stack
     -> save_stack_trace
      -> __save_stack_trace
       -> walk_stackframe
        -> unwind_frame
         -> unwind_find_idx
          -> spin_lock_irqsave(&unwind_lock, flags);

> > So, how about moving the FIQ assembly code to entry-armv.S and making
> > it less kgdb specific?  (Though... we do want to keep a /very/ close
> > eye on users to ensure that they don't do silly stuff with locking.)
> 
> I think I can do that.
> 
> Something like this?
> 
> 1. Install the current trap handler code by default (either with or
>    without trace_* calls).

Without trace_* calls.  The FIQ should be transparent to tracing -
tracing and lockdep is too much of an unknown (due to size and
complexity) to ensure that it always follows the rules.

> 2. Have default trap handler call an RCU notifier chain to allow it to
>    hook up with "normal" code without any hard coding (kgdb, IPI
>    handling, etc)

Maybe... that sounds like it opens up FIQ for general purpose use which
is something I want to avoid - I've little motivation to ensure that
everyone plays by the rules.  Given the choice, I'd rather maintain our
present stance that using FIQs is hard and requires a lot of thought.

> 3. Retain existing install_fiq() behaviour for people who want FIQ
>    to be a fast-interrupt rather than an NMI (for example, the
>    Raspberry pi USB optimizations).

Yes, arch/arm/kernel/fiq.c should be relatively untouched by the core
mods I suggested - we're just replacing the default "subs" instruction
(which just ignores the FIQ) with something which can do something
with it.

It should be noted that using arch/arm/kernel/fiq.c would override this
default FIQ handler - and that's a feature of it - fiq.c is based on
the premise that there is only one single owner of the FIQ at any one
time and there is no sharing of it, and that's something we want to
retain.

> 4. Ensure default handler is an exported symbol to allow the meths
>    drinkers from #3 to chain to logic for NMI/debug features
>    if they want to.

That's not something I particularly want to permit - especially as the
requirements for each "handler" are different - this new default code
relies upon r13 pointing at some storage to save some registers, which
may not be true of other FIQ handlers.  Chaining it means that we
force that use of r13 on others, and we then have to also come up with
some way to export the correct r13 value.

Given that fiq.c doesn't work with SMP, this isn't something I want to
encourage.

Further more, I can't get excited about Raspberry Pi "optimisations"
using FIQ until I see the code, and I see no reason to think about
catering for such stuff until the patches become visible here.
Daniel Thompson Aug. 14, 2014, 3:02 p.m. UTC | #5
On 14/08/14 13:36, Russell King - ARM Linux wrote:
> On Thu, Aug 14, 2014 at 11:48:36AM +0100, Daniel Thompson wrote:
>> Don't worry, you certainly did make it clear!
>>
>> I actually went as far as removing the trace_* calls before having a
>> change of heart. As a result I spent some time looking at the
>> trace_hardirqs_on() code and, as far as I remember, I couldn't find a
>> lock in the code that wasn't bypassed when in_nmi() returned true.
>>
>> However the code is pretty complex and its certainly possible that I
>> missed something. Are you in a position to point any particular lock to
>> show I messed this up or are you working from memory?
> 
> The complexity alone is an argument against calling it from FIQ context,
> because there's no guarantee that what's there will stay that way.
> 
> trace_hardirqs_on() is made more complex because there's not one function
> with that name, but two.  One is in the trace code, the other is in the
> lockdep code.  Here's the trace code, which has at least two problems:
> 
> trace_hardirqs_on
>  -> stop_critical_timing
>   -> check_critical_timing
>    -> raw_spin_lock_irqsave(&max_trace_lock, flags);
>    -> __trace_stack
>     -> __ftrace_trace_stack
>      -> save_stack_trace
>       -> __save_stack_trace
>        -> walk_stackframe
>         -> unwind_frame
>          -> unwind_find_idx
>           -> spin_lock_irqsave(&unwind_lock, flags);

Looks like I had an out-by-one error when reviewing this... sadly the
value was boolean.

Thanks this is very clear.


>>> So, how about moving the FIQ assembly code to entry-armv.S and making
>>> it less kgdb specific?  (Though... we do want to keep a /very/ close
>>> eye on users to ensure that they don't do silly stuff with locking.)
>>
>> I think I can do that.
>>
>> Something like this?
>>
>> 1. Install the current trap handler code by default (either with or
>>    without trace_* calls).
> 
> Without trace_* calls.  The FIQ should be transparent to tracing -
> tracing and lockdep is too much of an unknown (due to size and
> complexity) to ensure that it always follows the rules.

Will do.


>> 2. Have default trap handler call an RCU notifier chain to allow it to
>>    hook up with "normal" code without any hard coding (kgdb, IPI
>>    handling, etc)
> 
> Maybe... that sounds like it opens up FIQ for general purpose use which
> is something I want to avoid - I've little motivation to ensure that
> everyone plays by the rules.  Given the choice, I'd rather maintain our
> present stance that using FIQs is hard and requires a lot of thought.

I'll see what feels natural. Not exporting the symbol to register for
notification would go some way to preventing abuse whilst still allowing
decoupling from kgdb (which cannot be a module).


>> 3. Retain existing install_fiq() behaviour for people who want FIQ
>>    to be a fast-interrupt rather than an NMI (for example, the
>>    Raspberry pi USB optimizations).
> 
> Yes, arch/arm/kernel/fiq.c should be relatively untouched by the core
> mods I suggested - we're just replacing the default "subs" instruction
> (which just ignores the FIQ) with something which can do something
> with it.
> 
> It should be noted that using arch/arm/kernel/fiq.c would override this
> default FIQ handler - and that's a feature of it - fiq.c is based on
> the premise that there is only one single owner of the FIQ at any one
> time and there is no sharing of it, and that's something we want to
> retain.

However, particularly for IPIs, if we've build useful debug features
using them it would be nice to provide hooks that the owner can use to
keep them working if they wish.

However I agree with your response to #4; interfacing at a level lower
than C ABI isn't right. To use any hooks the owner must first make it
safe to call C.


>> 4. Ensure default handler is an exported symbol to allow the meths
>>    drinkers from #3 to chain to logic for NMI/debug features
>>    if they want to.
> 
> That's not something I particularly want to permit - especially as the
> requirements for each "handler" are different - this new default code
> relies upon r13 pointing at some storage to save some registers, which
> may not be true of other FIQ handlers.  Chaining it means that we
> force that use of r13 on others, and we then have to also come up with
> some way to export the correct r13 value.
> 
> Given that fiq.c doesn't work with SMP, this isn't something I want to
> encourage.

Makes sense.


> Further more, I can't get excited about Raspberry Pi "optimisations"
> using FIQ until I see the code, and I see no reason to think about
> catering for such stuff until the patches become visible here.

IIRC they don't add any use-cases beyond the existing in-kernel FIQ
users (such as R-PC floppy disc). It's just that they do it on more
recent hardware (and as you say, out-of-tree).
diff mbox

Patch

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 245058b..f385b27 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -297,6 +297,7 @@  choice
 config ARCH_MULTIPLATFORM
 	bool "Allow multiple platforms to be selected"
 	depends on MMU
+	select ARCH_MIGHT_HAVE_KGDB_FIQ
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_HAS_SG_CHAIN
 	select ARM_PATCH_PHYS_VIRT
@@ -346,6 +347,7 @@  config ARCH_REALVIEW
 
 config ARCH_VERSATILE
 	bool "ARM Ltd. Versatile family"
+	select ARCH_MIGHT_HAVE_KGDB_FIQ
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select ARM_AMBA
 	select ARM_TIMER_SP804
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 26536f7..c7342b6 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -2,6 +2,24 @@  menu "Kernel hacking"
 
 source "lib/Kconfig.debug"
 
+config ARCH_MIGHT_HAVE_KGDB_FIQ
+	bool
+
+config KGDB_FIQ
+	bool "KGDB FIQ support"
+	depends on KGDB_KDB && ARCH_MIGHT_HAVE_KGDB_FIQ && !THUMB2_KERNEL
+	select FIQ
+	help
+	  The FIQ debugger may be used to debug situations when the
+	  kernel stuck in uninterruptable sections, e.g. the kernel
+	  infinitely loops or deadlocked in an interrupt or with
+	  interrupts disabled.
+
+	  By default KGDB FIQ is disabled at runtime, but can be
+	  enabled with kgdb_fiq.enable=1 kernel command line option.
+
+	  If unsure, say N.
+
 config ARM_PTDUMP
 	bool "Export kernel pagetable layout to userspace via debugfs"
 	depends on DEBUG_KERNEL
diff --git a/arch/arm/include/asm/kgdb.h b/arch/arm/include/asm/kgdb.h
index 0a9d5dd..5de21f01 100644
--- a/arch/arm/include/asm/kgdb.h
+++ b/arch/arm/include/asm/kgdb.h
@@ -11,7 +11,9 @@ 
 #define __ARM_KGDB_H__
 
 #include <linux/ptrace.h>
+#include <linux/linkage.h>
 #include <asm/opcodes.h>
+#include <asm/exception.h>
 
 /*
  * GDB assumes that we're a user process being debugged, so
@@ -48,6 +50,11 @@  static inline void arch_kgdb_breakpoint(void)
 extern void kgdb_handle_bus_error(void);
 extern int kgdb_fault_expected;
 
+extern char kgdb_fiq_handler;
+extern char kgdb_fiq_handler_end;
+asmlinkage void __exception_irq_entry kgdb_fiq_do_handle(struct pt_regs *regs);
+extern int kgdb_register_fiq(unsigned int fiq);
+
 #endif /* !__ASSEMBLY__ */
 
 /*
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 38ddd9f..30ee8f3 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -68,6 +68,7 @@  endif
 obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
 obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o
 obj-$(CONFIG_KGDB)		+= kgdb.o
+obj-$(CONFIG_KGDB_FIQ)		+= kgdb_fiq_entry.o kgdb_fiq.o
 obj-$(CONFIG_ARM_UNWIND)	+= unwind.o
 obj-$(CONFIG_HAVE_TCM)		+= tcm.o
 obj-$(CONFIG_OF)		+= devtree.o
diff --git a/arch/arm/kernel/kgdb_fiq.c b/arch/arm/kernel/kgdb_fiq.c
new file mode 100644
index 0000000..a894dde
--- /dev/null
+++ b/arch/arm/kernel/kgdb_fiq.c
@@ -0,0 +1,132 @@ 
+/*
+ * KGDB FIQ
+ *
+ * Copyright 2010 Google, Inc.
+ *		  Arve Hjønnevåg <arve@android.com>
+ *		  Colin Cross <ccross@android.com>
+ * Copyright 2012 Linaro Ltd.
+ *		  Anton Vorontsov <anton.vorontsov@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/hardirq.h>
+#include <linux/atomic.h>
+#include <linux/kdb.h>
+#include <linux/kgdb.h>
+#include <asm/fiq.h>
+#include <asm/exception.h>
+
+static int kgdb_fiq_enabled;
+module_param_named(enable, kgdb_fiq_enabled, int, 0600);
+MODULE_PARM_DESC(enable, "set to 1 to enable FIQ KGDB");
+
+static unsigned int kgdb_fiq;
+
+struct fiq_stack {
+	u32 fiq[3];
+} ____cacheline_aligned;
+
+static struct fiq_stack stacks[NR_CPUS];
+
+asmlinkage void __exception_irq_entry kgdb_fiq_do_handle(struct pt_regs *regs)
+{
+	int actual;
+
+	nmi_enter();
+	actual = ack_fiq(kgdb_fiq);
+	WARN_ON(actual != kgdb_fiq);
+
+	/* there's no harm in doing this regardless of the above WARN_ON() */
+	if (kgdb_nmi_poll_knock())
+		kgdb_handle_exception(1, 0, 0, regs);
+
+	eoi_fiq(actual);
+	nmi_exit();
+}
+
+static struct fiq_handler kgdb_fiq_desc = {
+	.name = "kgdb",
+};
+
+static long kgdb_fiq_setup_stack(void *info)
+{
+	struct pt_regs regs;
+
+	regs.ARM_sp = (unsigned long) stacks[smp_processor_id()].fiq;
+	set_fiq_regs(&regs);
+
+	return 0;
+}
+
+/**
+ * kgdb_fiq_enable_nmi - Manage NMI-triggered entry to KGDB
+ * @on: Flag to either enable or disable an NMI
+ *
+ * This function manages NMIs that usually cause KGDB to enter. That is, not
+ * all NMIs should be enabled or disabled, but only those that issue
+ * kgdb_handle_exception().
+ *
+ * The call counts disable requests, and thus allows to nest disables. But
+ * trying to enable already enabled NMI is an error.
+ */
+static void kgdb_fiq_enable_nmi(bool on)
+{
+	static atomic_t cnt;
+	int ret;
+
+	ret = atomic_add_return(on ? 1 : -1, &cnt);
+
+	/*
+	 * There should be only one instance that calls this function
+	 * in "enable, disable" order. All other users must call
+	 * disable first, then enable. If not, something is wrong.
+	 */
+	if (WARN_ON(ret > 1 && on))
+		return;
+
+	if (ret > 0)
+		enable_fiq(kgdb_fiq);
+	else
+		disable_fiq(kgdb_fiq);
+}
+
+int kgdb_register_fiq(unsigned int fiq)
+{
+	int err;
+	int cpu;
+
+	if (!kgdb_fiq_enabled)
+		return -ENODEV;
+
+	if (!has_fiq(fiq)) {
+		pr_warn(
+		    "%s: Cannot register %u (no FIQ with this number)\n",
+		    __func__, fiq);
+		return -ENODEV;
+	}
+
+	kgdb_fiq = fiq;
+
+	err = claim_fiq(&kgdb_fiq_desc);
+	if (err) {
+		pr_warn("%s: unable to claim fiq", __func__);
+		return err;
+	}
+
+	for_each_possible_cpu(cpu)
+		work_on_cpu(cpu, kgdb_fiq_setup_stack, NULL);
+
+	set_fiq_handler(&kgdb_fiq_handler,
+			&kgdb_fiq_handler_end - &kgdb_fiq_handler);
+
+	arch_kgdb_ops.enable_nmi = kgdb_fiq_enable_nmi;
+	return 0;
+}
diff --git a/arch/arm/kernel/kgdb_fiq_entry.S b/arch/arm/kernel/kgdb_fiq_entry.S
new file mode 100644
index 0000000..50499c0
--- /dev/null
+++ b/arch/arm/kernel/kgdb_fiq_entry.S
@@ -0,0 +1,130 @@ 
+/*
+ * KGDB FIQ entry
+ *
+ * Copyright 1996,1997,1998 Russell King.
+ * Copyright 2012 Linaro Ltd.
+ *		  Anton Vorontsov <anton.vorontsov@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/memory.h>
+#include <asm/unwind.h>
+#include "entry-header.S"
+
+	.text
+
+@ This is needed for usr_entry/alignment_trap
+.LCcralign:
+	.long	cr_alignment
+.LCdohandle:
+	.long	kgdb_fiq_do_handle
+
+	.macro	fiq_handler
+	ldr	r1, =.LCdohandle
+	mov	r0, sp
+	adr	lr, BSYM(9997f)
+	ldr	pc, [r1]
+9997:
+	.endm
+
+/*
+ * svc_exit_via_fiq
+ *
+ * This macro acts in a similar manner to svc_exit but switches to FIQ
+ * mode to restore the final part of the register state.
+ *
+ * We cannot use the normal svc_exit procedure because that would
+ * clobber spsr_svc (FIQ could be delivered during the first few instructions
+ * of vector_swi meaning its contents have not been saved anywhere).
+ */
+	.macro  svc_exit_via_fiq, rpsr
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+	tst	\rpsr, #PSR_I_BIT
+	bleq	trace_hardirqs_on
+#endif
+
+	mov	r0, sp
+	ldmib	r0, {r1 - r14}	@ abort is deadly from here onward (it will
+				@ clobber state restored below)
+	msr	cpsr_c, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT
+	add	r8, r0, #S_PC
+	ldr	r9, [r0, #S_PSR]
+	msr	spsr_cxsf, r9
+	ldr	r0, [r0, #S_R0]
+	ldmia	r8, {pc}^
+	.endm
+
+	.align	5
+__fiq_svc:
+	svc_entry
+	fiq_handler
+	svc_exit_via_fiq r5
+ UNWIND(.fnend		)
+ENDPROC(__fiq_svc)
+	.ltorg
+
+	.align 5
+__fiq_abt:
+	svc_entry
+
+	msr	cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT
+	mov	r0, lr		@ Save lr_abt
+	mrs	r1, spsr	@ Save spsr_abt, abort is now safe
+	msr	cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT
+	push	{r0 - r1}
+
+	fiq_handler
+
+	pop	{r0 - r1}
+	msr	cpsr_c, #ABT_MODE | PSR_I_BIT | PSR_F_BIT
+	mov	lr, r0		@ Restore lr_abt, abort is unsafe
+	msr	spsr_cxsf, r1	@ Restore spsr_abt
+	msr	cpsr_c, #SVC_MODE | PSR_I_BIT | PSR_F_BIT
+
+	svc_exit_via_fiq r5
+ UNWIND(.fnend		)
+ENDPROC(__fiq_svc)
+	.ltorg
+
+	.align	5
+__fiq_usr:
+	usr_entry
+	kuser_cmpxchg_check
+	fiq_handler
+	get_thread_info tsk
+	mov	why, #0
+	b	ret_to_user_from_irq
+ UNWIND(.fnend		)
+ENDPROC(__fiq_usr)
+	.ltorg
+
+	.global kgdb_fiq_handler
+kgdb_fiq_handler:
+
+	vector_stub	fiq, FIQ_MODE, 4
+
+	.long	__fiq_usr			@  0  (USR_26 / USR_32)
+	.long	__fiq_svc			@  1  (FIQ_26 / FIQ_32)
+	.long	__fiq_svc			@  2  (IRQ_26 / IRQ_32)
+	.long	__fiq_svc			@  3  (SVC_26 / SVC_32)
+	.long	__fiq_svc			@  4
+	.long	__fiq_svc			@  5
+	.long	__fiq_svc			@  6
+	.long	__fiq_abt			@  7
+	.long	__fiq_svc			@  8
+	.long	__fiq_svc			@  9
+	.long	__fiq_svc			@  a
+	.long	__fiq_svc			@  b
+	.long	__fiq_svc			@  c
+	.long	__fiq_svc			@  d
+	.long	__fiq_svc			@  e
+	.long	__fiq_svc			@  f
+
+	.global kgdb_fiq_handler_end
+kgdb_fiq_handler_end: