diff mbox series

[Xen-devel,v3,14/39] ARM: new VGIC: Add GICv2 world switch backend

Message ID 20180321163235.12529-15-andre.przywara@linaro.org
State Superseded
Headers show
Series New VGIC(-v2) implementation | expand

Commit Message

Andre Przywara March 21, 2018, 4:32 p.m. UTC
Processing maintenance interrupts and accessing the list registers
are dependent on the host's GIC version.
Introduce vgic-v2.c to contain GICv2 specific functions.
Implement the GICv2 specific code for syncing the emulation state
into the VGIC registers.
This also adds the hook to let Xen setup the host GIC addresses.

This is based on Linux commit 140b086dd197, written by Marc Zyngier.

Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
---
Changelog v2 ... v3:
- remove no longer needed asm/io.h header
- replace 0/1 with false/true for bool's
- clear _IRQ_INPROGRESS bit when retiring hardware mapped IRQ
- fix indentation and w/s issues

Changelog v1 ... v2:
- remove v2 specific underflow function (now generic)
- re-add Linux code to properly handle acked level IRQs

 xen/arch/arm/vgic/vgic-v2.c | 239 ++++++++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/vgic/vgic.c    |   6 ++
 xen/arch/arm/vgic/vgic.h    |   9 ++
 3 files changed, 254 insertions(+)
 create mode 100644 xen/arch/arm/vgic/vgic-v2.c

Comments

Julien Grall March 22, 2018, 3:48 a.m. UTC | #1
Hi Andre,

On 03/21/2018 04:32 PM, Andre Przywara wrote:
> +        /*
> +         * If a hardware mapped IRQ has been handled for good, we need to
> +         * clear the _IRQ_INPROGRESS bit to allow handling of new IRQs.
> +         */
> +        if ( irq->hw && !lr_val.active && !lr_val.pending )
> +        {
> +            struct irq_desc *irqd = irq_to_desc(irq->hwintid);
> +
> +            clear_bit(_IRQ_INPROGRESS, &irqd->status);

I realize the current vGIC is doing exactly the same thing. But this is 
racy.

Imagine the interrupt is firing on another pCPU (I wasn't able to rule 
out this even when the interrupt is following the vCPU), that pCPU may 
set _IRQ_INPROGRESS before this is cleared here.

So you would end up clearing _IRQ_INPROGRESS here, making the desc state 
inconsistent and would affect the removal of the IRQ from a guest (see 
gic_remove_irq_from_guest).

I am not entirely sure how to fix this because taking the desc->lock 
would not be enough. Indeed you may end up to clear right after the flag 
was set in do_IRQ.

Because the race is already there in the current vGIC and only affecting 
release IRQ, I guess I would be ok to keep like that for now. Can you 
add a TODO?

> +        }
> +
> +        /* Always preserve the active bit */
> +        irq->active = lr_val.active;
> +
> +        /* Edge is the only case where we preserve the pending bit */
> +        if ( irq->config == VGIC_CONFIG_EDGE && lr_val.pending )
> +        {
> +            irq->pending_latch = true;
> +
> +            if ( vgic_irq_is_sgi(intid) )
> +                irq->source |= (1U << lr_val.virt.source);
> +        }
> +
> +        /* Clear soft pending state when level irqs have been acked. */
> +        if ( irq->config == VGIC_CONFIG_LEVEL && !lr_val.pending )
> +            irq->pending_latch = false;
> +
> +        /*
> +         * Level-triggered mapped IRQs are special because we only
> +         * observe rising edges as input to the VGIC.
> +         *
> +         * If the guest never acked the interrupt we have to sample
> +         * the physical line and set the line level, because the
> +         * device state could have changed or we simply need to
> +         * process the still pending interrupt later.
> +         *
> +         * If this causes us to lower the level, we have to also clear
> +         * the physical active state, since we will otherwise never be
> +         * told when the interrupt becomes asserted again.
> +         */
> +        if ( vgic_irq_is_mapped_level(irq) && lr_val.pending )
> +        {
> +            struct irq_desc *irqd;
> +
> +            ASSERT(irq->hwintid >= VGIC_NR_PRIVATE_IRQS);
> +
> +            irqd = irq_to_desc(irq->hwintid);
> +            irq->line_level = gic_read_pending_state(irqd);
> +
> +            if ( !irq->line_level )
> +                gic_set_active_state(irqd, false);

Sorry, I didn't notice it before. gic_set_active_state expect the 
desc->lock to be taken. But I can't see the code here to do that.

Cheers,
Andre Przywara March 22, 2018, 11:04 a.m. UTC | #2
This is a "patch to the patch" mentioned above, to make it clear what
changed:
We now take the desc lock in vgic_v2_fold_lr_state() when we are dealing
with a hardware IRQ. This is a bit complicated, because we have to obey
the existing locking order, so do our infamous "drop-take-retake" dance.
Also I print a message about using the new VGIC and fix that last
remaining "u32" usage.

Please note that I had to initialise "desc" to NULL because my compiler
(GCC 5.3) is not smart enough to see that we only use it with irq->hw
set and it's safe. Please let me know if it's me not being smart enough
here instead ;-)

Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
---
Hi,

will send a proper, merged v3a version of the patch separately.

Cheers,
Andre

 xen/arch/arm/vgic/vgic-v2.c | 43 ++++++++++++++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/xen/arch/arm/vgic/vgic-v2.c b/xen/arch/arm/vgic/vgic-v2.c
index 5516a8534f..3424a4a66f 100644
--- a/xen/arch/arm/vgic/vgic-v2.c
+++ b/xen/arch/arm/vgic/vgic-v2.c
@@ -43,6 +43,8 @@ void vgic_v2_setup_hw(paddr_t dbase, paddr_t cbase, paddr_t csize,
     gic_v2_hw_data.csize = csize;
     gic_v2_hw_data.vbase = vbase;
     gic_v2_hw_data.aliased_offset = aliased_offset;
+
+    printk("Using the new VGIC implementation.\n");
 }
 
 /*
@@ -69,6 +71,8 @@ void vgic_v2_fold_lr_state(struct vcpu *vcpu)
         struct gic_lr lr_val;
         uint32_t intid;
         struct vgic_irq *irq;
+        struct irq_desc *desc = NULL;
+        bool have_desc_lock = false;
 
         gic_hw_ops->read_lr(lr, &lr_val);
 
@@ -88,18 +92,30 @@ void vgic_v2_fold_lr_state(struct vcpu *vcpu)
         intid = lr_val.virq;
         irq = vgic_get_irq(vcpu->domain, vcpu, intid);
 
-        spin_lock_irqsave(&irq->irq_lock, flags);
+        local_irq_save(flags);
+        spin_lock(&irq->irq_lock);
+
+        /* The locking order forces us to drop and re-take the locks here. */
+        if ( irq->hw )
+        {
+            spin_unlock(&irq->irq_lock);
+
+            desc = irq_to_desc(irq->hwintid);
+            spin_lock(&desc->lock);
+            spin_lock(&irq->irq_lock);
+
+            /* This h/w IRQ should still be assigned to the virtual IRQ. */
+            ASSERT(irq->hw && desc->irq == irq->hwintid);
+
+            have_desc_lock = true;
+        }
 
         /*
          * If a hardware mapped IRQ has been handled for good, we need to
          * clear the _IRQ_INPROGRESS bit to allow handling of new IRQs.
          */
         if ( irq->hw && !lr_val.active && !lr_val.pending )
-        {
-            struct irq_desc *irqd = irq_to_desc(irq->hwintid);
-
-            clear_bit(_IRQ_INPROGRESS, &irqd->status);
-        }
+            clear_bit(_IRQ_INPROGRESS, &desc->status);
 
         /* Always preserve the active bit */
         irq->active = lr_val.active;
@@ -132,18 +148,19 @@ void vgic_v2_fold_lr_state(struct vcpu *vcpu)
          */
         if ( vgic_irq_is_mapped_level(irq) && lr_val.pending )
         {
-            struct irq_desc *irqd;
-
             ASSERT(irq->hwintid >= VGIC_NR_PRIVATE_IRQS);
 
-            irqd = irq_to_desc(irq->hwintid);
-            irq->line_level = gic_read_pending_state(irqd);
+            irq->line_level = gic_read_pending_state(desc);
 
             if ( !irq->line_level )
-                gic_set_active_state(irqd, false);
+                gic_set_active_state(desc, false);
         }
 
-        spin_unlock_irqrestore(&irq->irq_lock, flags);
+        spin_unlock(&irq->irq_lock);
+        if ( have_desc_lock )
+            spin_unlock(&desc->lock);
+        local_irq_restore(flags);
+
         vgic_put_irq(vcpu->domain, irq);
     }
 
@@ -184,7 +201,7 @@ void vgic_v2_populate_lr(struct vcpu *vcpu, struct vgic_irq *irq, int lr)
 
         if ( vgic_irq_is_sgi(irq->intid) )
         {
-            u32 src = ffs(irq->source);
+            uint32_t src = ffs(irq->source);
 
             BUG_ON(!src);
             lr_val.virt.source = (src - 1);
Julien Grall March 22, 2018, 1:55 p.m. UTC | #3
Hi Andre,

On 03/22/2018 11:04 AM, Andre Przywara wrote:
> This is a "patch to the patch" mentioned above, to make it clear what
> changed:
> We now take the desc lock in vgic_v2_fold_lr_state() when we are dealing
> with a hardware IRQ. This is a bit complicated, because we have to obey
> the existing locking order, so do our infamous "drop-take-retake" dance.
> Also I print a message about using the new VGIC and fix that last
> remaining "u32" usage.
> 
> Please note that I had to initialise "desc" to NULL because my compiler
> (GCC 5.3) is not smart enough to see that we only use it with irq->hw
> set and it's safe. Please let me know if it's me not being smart enough
> here instead ;-)

I would not be surprised that even recent compiler can't deal with that. 
It would require quite some work from the compiler to know that desc is 
only used when irq->hw.

I will comment the code on 3a.

Cheers,
diff mbox series

Patch

diff --git a/xen/arch/arm/vgic/vgic-v2.c b/xen/arch/arm/vgic/vgic-v2.c
new file mode 100644
index 0000000000..8ab0cfe81d
--- /dev/null
+++ b/xen/arch/arm/vgic/vgic-v2.c
@@ -0,0 +1,239 @@ 
+/*
+ * Copyright (C) 2015, 2016 ARM Ltd.
+ * Imported from Linux ("new" KVM VGIC) and heavily adapted to Xen.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <asm/new_vgic.h>
+#include <asm/bug.h>
+#include <asm/gic.h>
+#include <xen/sched.h>
+#include <xen/sizes.h>
+
+#include "vgic.h"
+
+static struct {
+    bool enabled;
+    paddr_t dbase;          /* Distributor interface address */
+    paddr_t cbase;          /* CPU interface address & size */
+    paddr_t csize;
+    paddr_t vbase;          /* Virtual CPU interface address */
+
+    /* Offset to add to get an 8kB contiguous region if GIC is aliased */
+    uint32_t aliased_offset;
+} gic_v2_hw_data;
+
+void vgic_v2_setup_hw(paddr_t dbase, paddr_t cbase, paddr_t csize,
+                      paddr_t vbase, uint32_t aliased_offset)
+{
+    gic_v2_hw_data.enabled = true;
+    gic_v2_hw_data.dbase = dbase;
+    gic_v2_hw_data.cbase = cbase;
+    gic_v2_hw_data.csize = csize;
+    gic_v2_hw_data.vbase = vbase;
+    gic_v2_hw_data.aliased_offset = aliased_offset;
+}
+
+/*
+ * transfer the content of the LRs back into the corresponding ap_list:
+ * - active bit is transferred as is
+ * - pending bit is
+ *   - transferred as is in case of edge sensitive IRQs
+ *   - set to the line-level (resample time) for level sensitive IRQs
+ */
+void vgic_v2_fold_lr_state(struct vcpu *vcpu)
+{
+    struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic;
+    unsigned int used_lrs = vcpu->arch.vgic.used_lrs;
+    unsigned long flags;
+    unsigned int lr;
+
+    if ( !used_lrs )    /* No LRs used, so nothing to sync back here. */
+        return;
+
+    gic_hw_ops->update_hcr_status(GICH_HCR_UIE, false);
+
+    for ( lr = 0; lr < used_lrs; lr++ )
+    {
+        struct gic_lr lr_val;
+        uint32_t intid;
+        struct vgic_irq *irq;
+
+        gic_hw_ops->read_lr(lr, &lr_val);
+
+        /*
+         * TODO: Possible optimization to avoid reading LRs:
+         * Read the ELRSR to find out which of our LRs have been cleared
+         * by the guest. We just need to know the IRQ number for those, which
+         * we could save in an array when populating the LRs.
+         * This trades one MMIO access (ELRSR) for possibly more than one (LRs),
+         * but requires some more code to save the IRQ number and to handle
+         * those finished IRQs according to the algorithm below.
+         * We need some numbers to justify this: chances are that we don't
+         * have many LRs in use most of the time, so we might not save much.
+         */
+        gic_hw_ops->clear_lr(lr);
+
+        intid = lr_val.virq;
+        irq = vgic_get_irq(vcpu->domain, vcpu, intid);
+
+        spin_lock_irqsave(&irq->irq_lock, flags);
+
+        /*
+         * If a hardware mapped IRQ has been handled for good, we need to
+         * clear the _IRQ_INPROGRESS bit to allow handling of new IRQs.
+         */
+        if ( irq->hw && !lr_val.active && !lr_val.pending )
+        {
+            struct irq_desc *irqd = irq_to_desc(irq->hwintid);
+
+            clear_bit(_IRQ_INPROGRESS, &irqd->status);
+        }
+
+        /* Always preserve the active bit */
+        irq->active = lr_val.active;
+
+        /* Edge is the only case where we preserve the pending bit */
+        if ( irq->config == VGIC_CONFIG_EDGE && lr_val.pending )
+        {
+            irq->pending_latch = true;
+
+            if ( vgic_irq_is_sgi(intid) )
+                irq->source |= (1U << lr_val.virt.source);
+        }
+
+        /* Clear soft pending state when level irqs have been acked. */
+        if ( irq->config == VGIC_CONFIG_LEVEL && !lr_val.pending )
+            irq->pending_latch = false;
+
+        /*
+         * Level-triggered mapped IRQs are special because we only
+         * observe rising edges as input to the VGIC.
+         *
+         * If the guest never acked the interrupt we have to sample
+         * the physical line and set the line level, because the
+         * device state could have changed or we simply need to
+         * process the still pending interrupt later.
+         *
+         * If this causes us to lower the level, we have to also clear
+         * the physical active state, since we will otherwise never be
+         * told when the interrupt becomes asserted again.
+         */
+        if ( vgic_irq_is_mapped_level(irq) && lr_val.pending )
+        {
+            struct irq_desc *irqd;
+
+            ASSERT(irq->hwintid >= VGIC_NR_PRIVATE_IRQS);
+
+            irqd = irq_to_desc(irq->hwintid);
+            irq->line_level = gic_read_pending_state(irqd);
+
+            if ( !irq->line_level )
+                gic_set_active_state(irqd, false);
+        }
+
+        spin_unlock_irqrestore(&irq->irq_lock, flags);
+        vgic_put_irq(vcpu->domain, irq);
+    }
+
+    gic_hw_ops->update_hcr_status(GICH_HCR_EN, false);
+    vgic_cpu->used_lrs = 0;
+}
+
+/**
+ * vgic_v2_populate_lr() - Populates an LR with the state of a given IRQ.
+ * @vcpu: The VCPU which the given @irq belongs to.
+ * @irq:  The IRQ to convert into an LR. The irq_lock must be held already.
+ * @lr:   The LR number to transfer the state into.
+ *
+ * This moves a virtual IRQ, represented by its vgic_irq, into a list register.
+ * Apart from translating the logical state into the LR bitfields, it also
+ * changes some state in the vgic_irq.
+ * For an edge sensitive IRQ the pending state is cleared in struct vgic_irq,
+ * for a level sensitive IRQ the pending state value is unchanged, as it is
+ * dictated directly by the input line level.
+ *
+ * If @irq describes an SGI with multiple sources, we choose the
+ * lowest-numbered source VCPU and clear that bit in the source bitmap.
+ *
+ * The irq_lock must be held by the caller.
+ */
+void vgic_v2_populate_lr(struct vcpu *vcpu, struct vgic_irq *irq, int lr)
+{
+    struct gic_lr lr_val = {0};
+
+    lr_val.virq = irq->intid;
+
+    if ( irq_is_pending(irq) )
+    {
+        lr_val.pending = true;
+
+        if ( irq->config == VGIC_CONFIG_EDGE )
+            irq->pending_latch = false;
+
+        if ( vgic_irq_is_sgi(irq->intid) )
+        {
+            u32 src = ffs(irq->source);
+
+            BUG_ON(!src);
+            lr_val.virt.source = (src - 1);
+            irq->source &= ~(1 << (src - 1));
+            if ( irq->source )
+                irq->pending_latch = true;
+        }
+    }
+
+    lr_val.active = irq->active;
+
+    if ( irq->hw )
+    {
+        lr_val.hw_status = true;
+        lr_val.hw.pirq = irq->hwintid;
+        /*
+         * Never set pending+active on a HW interrupt, as the
+         * pending state is kept at the physical distributor
+         * level.
+         */
+        if ( irq->active && irq_is_pending(irq) )
+            lr_val.pending = false;
+    }
+    else
+    {
+        if ( irq->config == VGIC_CONFIG_LEVEL )
+            lr_val.virt.eoi = true;
+    }
+
+    /*
+     * Level-triggered mapped IRQs are special because we only observe
+     * rising edges as input to the VGIC.  We therefore lower the line
+     * level here, so that we can take new virtual IRQs.  See
+     * vgic_v2_fold_lr_state for more info.
+     */
+    if ( vgic_irq_is_mapped_level(irq) && lr_val.pending )
+        irq->line_level = false;
+
+    /* The GICv2 LR only holds five bits of priority. */
+    lr_val.priority = irq->priority >> 3;
+
+    gic_hw_ops->write_lr(lr, &lr_val);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c
index 52e1669888..2fa595f4f7 100644
--- a/xen/arch/arm/vgic/vgic.c
+++ b/xen/arch/arm/vgic/vgic.c
@@ -520,6 +520,7 @@  retry:
 
 static void vgic_fold_lr_state(struct vcpu *vcpu)
 {
+    vgic_v2_fold_lr_state(vcpu);
 }
 
 /* Requires the irq_lock to be held. */
@@ -527,6 +528,8 @@  static void vgic_populate_lr(struct vcpu *vcpu,
                              struct vgic_irq *irq, int lr)
 {
     ASSERT(spin_is_locked(&irq->irq_lock));
+
+    vgic_v2_populate_lr(vcpu, irq, lr);
 }
 
 static void vgic_set_underflow(struct vcpu *vcpu)
@@ -640,7 +643,10 @@  void vgic_sync_to_lrs(void)
     spin_lock(&current->arch.vgic.ap_list_lock);
     vgic_flush_lr_state(current);
     spin_unlock(&current->arch.vgic.ap_list_lock);
+
+    gic_hw_ops->update_hcr_status(GICH_HCR_EN, 1);
 }
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/vgic/vgic.h b/xen/arch/arm/vgic/vgic.h
index f530cfa078..41cc0c5b54 100644
--- a/xen/arch/arm/vgic/vgic.h
+++ b/xen/arch/arm/vgic/vgic.h
@@ -27,6 +27,11 @@  static inline bool irq_is_pending(struct vgic_irq *irq)
         return irq->pending_latch || irq->line_level;
 }
 
+static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq)
+{
+    return irq->config == VGIC_CONFIG_LEVEL && irq->hw;
+}
+
 struct vgic_irq *vgic_get_irq(struct domain *d, struct vcpu *vcpu,
                               u32 intid);
 void vgic_put_irq(struct domain *d, struct vgic_irq *irq);
@@ -41,6 +46,10 @@  static inline void vgic_get_irq_kref(struct vgic_irq *irq)
     atomic_inc(&irq->refcount);
 }
 
+void vgic_v2_fold_lr_state(struct vcpu *vcpu);
+void vgic_v2_populate_lr(struct vcpu *vcpu, struct vgic_irq *irq, int lr);
+void vgic_v2_set_underflow(struct vcpu *vcpu);
+
 #endif
 
 /*