diff mbox

[5/5] hw: arm_gic_kvm: Add KVM VGIC save/restore logic

Message ID 1377288624-7418-6-git-send-email-christoffer.dall@linaro.org
State New
Headers show

Commit Message

Christoffer Dall Aug. 23, 2013, 8:10 p.m. UTC
Save and restore the ARM KVM VGIC state from the kernel.  We rely on
QEMU to marshal the GICState data structure and therefore simply
synchronize the kernel state with the QEMU emulated state in both
directions.

We take some care on the restore path to check the VGIC has been
configured with enough IRQs and CPU interfaces that we can properly
restore the state, and for separate set/clear registers we first fully
clear the registers and then set the required bits.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 hw/intc/arm_gic_common.c    |    2 +
 hw/intc/arm_gic_kvm.c       |  418 ++++++++++++++++++++++++++++++++++++++++++-
 hw/intc/gic_internal.h      |    1 +
 include/migration/vmstate.h |    6 +
 4 files changed, 425 insertions(+), 2 deletions(-)

Comments

Alexander Graf Aug. 25, 2013, 3:47 p.m. UTC | #1
On 23.08.2013, at 21:10, Christoffer Dall wrote:

> Save and restore the ARM KVM VGIC state from the kernel.  We rely on
> QEMU to marshal the GICState data structure and therefore simply
> synchronize the kernel state with the QEMU emulated state in both
> directions.
> 
> We take some care on the restore path to check the VGIC has been
> configured with enough IRQs and CPU interfaces that we can properly
> restore the state, and for separate set/clear registers we first fully
> clear the registers and then set the required bits.
> 
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> ---
> hw/intc/arm_gic_common.c    |    2 +
> hw/intc/arm_gic_kvm.c       |  418 ++++++++++++++++++++++++++++++++++++++++++-
> hw/intc/gic_internal.h      |    1 +
> include/migration/vmstate.h |    6 +
> 4 files changed, 425 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
> index a50ded2..f39bdc1 100644
> --- a/hw/intc/arm_gic_common.c
> +++ b/hw/intc/arm_gic_common.c
> @@ -77,6 +77,8 @@ static const VMStateDescription vmstate_gic = {
>         VMSTATE_UINT16_ARRAY(running_priority, GICState, NCPU),
>         VMSTATE_UINT16_ARRAY(current_pending, GICState, NCPU),
>         VMSTATE_UINT8_2DARRAY(binary_point, GICState, 2, NCPU),
> +        VMSTATE_UINT32_2DARRAY(active_prio, GICState, 4, NCPU),
> +        VMSTATE_UINT32(num_irq, GICState),
>         VMSTATE_END_OF_LIST()
>     }
> };
> diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
> index 9f0a852..9785281 100644
> --- a/hw/intc/arm_gic_kvm.c
> +++ b/hw/intc/arm_gic_kvm.c
> @@ -23,6 +23,15 @@
> #include "kvm_arm.h"
> #include "gic_internal.h"
> 
> +//#define DEBUG_GIC_KVM
> +
> +#ifdef DEBUG_GIC_KVM
> +#define DPRINTF(fmt, ...) \
> +do { fprintf(stderr, "arm_gic: " fmt , ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) do {} while(0)

You really want to make DPRINTF still be format checked by the compiler. Check out hw/intc/openpic.c for an example how to get there.

> +#endif
> +
> #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
> #define KVM_ARM_GIC(obj) \
>      OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
> @@ -73,14 +82,419 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
>     kvm_set_irq(kvm_state, kvm_irq, !!level);
> }
> 
> +static bool kvm_arm_gic_can_save_restore(GICState *s)
> +{
> +    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
> +    return kgc->dev_fd >= 0;
> +}
> +
> +static void kvm_arm_gic_reg_access(GICState *s, int group, int offset,
> +                                   int cpu, uint32_t *val, bool write)
> +{
> +    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
> +    struct kvm_device_attr attr;
> +    int type;
> +
> +    cpu = cpu & 0xff;
> +
> +    attr.flags = 0;
> +    attr.group = group;
> +    attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
> +                 KVM_DEV_ARM_VGIC_CPUID_MASK) |
> +                (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
> +                 KVM_DEV_ARM_VGIC_OFFSET_MASK);
> +    attr.addr = (uint64_t)(long)val;

uintptr_t?

> +
> +    if (write) {
> +        type = KVM_SET_DEVICE_ATTR;
> +    } else {
> +        type = KVM_GET_DEVICE_ATTR;
> +    }
> +
> +    if (kvm_device_ioctl(kgc->dev_fd, type, &attr) < 0) {
> +            fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
> +                    strerror(errno));
> +            abort();
> +    }
> +}
> +
> +static void kvm_arm_gic_dist_reg_access(GICState *s, int offset, int cpu,
> +                                        uint32_t *val, bool write)
> +{
> +    kvm_arm_gic_reg_access(s, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
> +                           offset, cpu, val, write);
> +}
> +
> +static void kvm_arm_gic_cpu_reg_access(GICState *s, int offset, int cpu,
> +                                       uint32_t *val, bool write)
> +{
> +    kvm_arm_gic_reg_access(s, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
> +                           offset, cpu, val, write);
> +}
> +
> +#define for_each_irq_reg(_ctr, _max_irq, _field_width) \
> +    for (_ctr = 0; _ctr < ((_max_irq) / (32 / (_field_width))); _ctr++)
> +
> +/*
> + * Translate from the in-kernel field for an IRQ value to/from the qemu
> + * representation.
> + */
> +typedef void (*vgic_translate_fn)(GICState *s, int irq, int cpu,
> +                                  uint32_t *field, bool to_kernel);
> +
> +/* synthetic translate function used for clear/set registers to completely
> + * clear a setting using a clear-register before setting the remaing bits
> + * using a set-register */
> +static void translate_clear(GICState *s, int irq, int cpu,
> +                            uint32_t *field, bool to_kernel)
> +{
> +    if (to_kernel) {
> +        *field = ~0;
> +    } else {
> +        /* does not make sense: qemu model doesn't use set/clear regs */
> +        abort();
> +    }

I don't understand the to_kernel bits. I thought we're in a device file that only works with KVM?

> +}
> +
> +static void translate_enabled(GICState *s, int irq, int cpu,
> +                              uint32_t *field, bool to_kernel)
> +{
> +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> +
> +    if (to_kernel) {
> +        *field = GIC_TEST_ENABLED(irq, cm);
> +    } else {
> +        if (*field & 1) {
> +            GIC_SET_ENABLED(irq, cm);
> +        }
> +    }
> +}
> +
> +static void translate_pending(GICState *s, int irq, int cpu,
> +                              uint32_t *field, bool to_kernel)
> +{
> +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> +
> +    if (to_kernel) {
> +        *field = GIC_TEST_PENDING(irq, cm);
> +    } else {
> +        if (*field & 1) {
> +            GIC_SET_PENDING(irq, cm);
> +            /* TODO: Capture is level-line is held high in the kernel */
> +        }
> +    }
> +}
> +
> +static void translate_active(GICState *s, int irq, int cpu,
> +                             uint32_t *field, bool to_kernel)
> +{
> +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> +
> +    if (to_kernel) {
> +        *field = GIC_TEST_ACTIVE(irq, cm);
> +    } else {
> +        if (*field & 1) {
> +            GIC_SET_ACTIVE(irq, cm);
> +        }
> +    }
> +}
> +
> +static void translate_trigger(GICState *s, int irq, int cpu,
> +                            uint32_t *field, bool to_kernel)
> +{
> +    if (to_kernel) {
> +        *field = (GIC_TEST_TRIGGER(irq)) ? 0x2 : 0x0;
> +    } else {
> +        if (*field & 0x2) {
> +            GIC_SET_TRIGGER(irq);
> +        }
> +    }
> +}
> +
> +static void translate_priority(GICState *s, int irq, int cpu,
> +                               uint32_t *field, bool to_kernel)
> +{
> +    if (to_kernel) {
> +        *field = GIC_GET_PRIORITY(irq, cpu) & 0xff;
> +    } else {
> +        GIC_SET_PRIORITY(irq, cpu, *field & 0xff);
> +    }
> +}
> +
> +static void translate_targets(GICState *s, int irq, int cpu,
> +                              uint32_t *field, bool to_kernel)
> +{
> +    if (to_kernel) {
> +        *field = s->irq_target[irq] & 0xff;
> +    } else {
> +        s->irq_target[irq] = *field & 0xff;
> +    }
> +}
> +
> +static void translate_sgisource(GICState *s, int irq, int cpu,
> +                                uint32_t *field, bool to_kernel)
> +{
> +    if (to_kernel) {
> +        *field = s->sgi_source[irq][cpu] & 0xff;
> +    } else {
> +        s->sgi_source[irq][cpu] = *field & 0xff;
> +    }
> +}
> +
> +/* Read a register group from the kernel VGIC */
> +static void kvm_arm_gic_dist_readr(GICState *s, uint32_t offset, int width,
> +                                   int maxirq, vgic_translate_fn translate_fn)
> +{
> +    uint32_t reg;
> +    int i;
> +    int j;
> +    int irq;
> +    int cpu;
> +    int regsz = 32 / width; /* irqs per kernel register */
> +    uint32_t field;
> +
> +    for_each_irq_reg(i, maxirq, width) {
> +        irq = i * regsz;
> +        cpu = 0;
> +        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
> +            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, false);
> +            for (j = 0; j < regsz; j++) {
> +                field = reg >> (j * width) & ((1 << width) - 1);
> +                translate_fn(s, irq + j, cpu, &field, false);
> +            }
> +
> +            cpu++;
> +        }
> +        offset += 4;
> +    }
> +}
> +
> +/* Write a register group to the kernel VGIC */
> +static void kvm_arm_gic_dist_writer(GICState *s, uint32_t offset, int width,
> +                                    int maxirq, vgic_translate_fn translate_fn)
> +{
> +    uint32_t reg;
> +    int i;
> +    int j;
> +    int irq;
> +    int cpu;
> +    int regsz = 32 / width; /* irqs per kernel register */
> +    uint32_t field;
> +
> +    for_each_irq_reg(i, maxirq, width) {
> +        irq = i * regsz;
> +        cpu = 0;
> +        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
> +            reg = 0;
> +            for (j = 0; j < regsz; j++) {
> +                translate_fn(s, irq + j, cpu, &field, true);
> +                reg |= (field & ((1 << width) - 1)) << (j * width);
> +            }
> +            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, true);
> +
> +            cpu++;
> +        }
> +        offset += 4;
> +    }
> +}
> +
> static void kvm_arm_gic_put(GICState *s)
> {
> -    /* TODO: there isn't currently a kernel interface to set the GIC state */
> +    uint32_t reg;
> +    int i;
> +    int cpu;
> +    int num_cpu;
> +    int num_irq;
> +
> +    if (!kvm_arm_gic_can_save_restore(s)) {
> +            DPRINTF("Cannot put kernel gic state, no kernel interface");
> +            return;
> +    }
> +
> +    /* Note: We do the restore in a slightly different order than the save
> +     * (where the order doesn't matter and is simply ordered according to the
> +     * register offset values */
> +
> +    /*****************************************************************
> +     * Distributor State
> +     */
> +
> +    /* s->enabled -> GICD_CTLR */
> +    reg = s->enabled;
> +    kvm_arm_gic_dist_reg_access(s, 0x0, 0, &reg, true);
> +
> +    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
> +    kvm_arm_gic_dist_reg_access(s, 0x4, 0, &reg, false);
> +    num_irq = ((reg & 0x1f) + 1) * 32;
> +    num_cpu = ((reg & 0xe0) >> 5) + 1;
> +
> +    if (num_irq < s->num_irq) {
> +            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
> +                    s->num_irq, num_irq);
> +            abort();
> +    } else if (num_cpu != s->num_cpu ) {
> +            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
> +                    s->num_cpu, num_cpu);
> +            /* Did we not create the VCPUs in the kernel yet? */
> +            abort();
> +    }
> +
> +    /* TODO: Consider checking compatibility with the IIDR ? */
> +
> +    /* irq_state[n].enabled -> GICD_ISENABLERn */
> +    kvm_arm_gic_dist_writer(s, 0x180, 1, s->num_irq, translate_clear);
> +    kvm_arm_gic_dist_writer(s, 0x100, 1, s->num_irq, translate_enabled);

Don't these magic numbers have #define'd equivalents in their GIC implementation header file? Then you don't need the comments above each of these.

I also find the function names quite long, but I guess as long as everything still fits that doesn't really matter.


Alex
Peter Maydell Sept. 6, 2013, 3:13 p.m. UTC | #2
On 23 August 2013 21:10, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> Save and restore the ARM KVM VGIC state from the kernel.  We rely on
> QEMU to marshal the GICState data structure and therefore simply
> synchronize the kernel state with the QEMU emulated state in both
> directions.
>
> We take some care on the restore path to check the VGIC has been
> configured with enough IRQs and CPU interfaces that we can properly
> restore the state, and for separate set/clear registers we first fully
> clear the registers and then set the required bits.
>
> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> ---
>  hw/intc/arm_gic_common.c    |    2 +
>  hw/intc/arm_gic_kvm.c       |  418 ++++++++++++++++++++++++++++++++++++++++++-
>  hw/intc/gic_internal.h      |    1 +
>  include/migration/vmstate.h |    6 +
>  4 files changed, 425 insertions(+), 2 deletions(-)
>
> diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
> index a50ded2..f39bdc1 100644
> --- a/hw/intc/arm_gic_common.c
> +++ b/hw/intc/arm_gic_common.c
> @@ -77,6 +77,8 @@ static const VMStateDescription vmstate_gic = {
>          VMSTATE_UINT16_ARRAY(running_priority, GICState, NCPU),
>          VMSTATE_UINT16_ARRAY(current_pending, GICState, NCPU),
>          VMSTATE_UINT8_2DARRAY(binary_point, GICState, 2, NCPU),
> +        VMSTATE_UINT32_2DARRAY(active_prio, GICState, 4, NCPU),
> +        VMSTATE_UINT32(num_irq, GICState),

You don't need to save and restore num_irq, it is constant
for the lifetime of the device (set by a property on the
device which is fixed by the board model). Migration only
works between two identical command lines; if the command
lines are identical at each end then num_irq should be too.

>          VMSTATE_END_OF_LIST()
>      }
>  };
> diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
> index 9f0a852..9785281 100644
> --- a/hw/intc/arm_gic_kvm.c
> +++ b/hw/intc/arm_gic_kvm.c
> @@ -23,6 +23,15 @@
>  #include "kvm_arm.h"
>  #include "gic_internal.h"
>
> +//#define DEBUG_GIC_KVM
> +
> +#ifdef DEBUG_GIC_KVM
> +#define DPRINTF(fmt, ...) \
> +do { fprintf(stderr, "arm_gic: " fmt , ## __VA_ARGS__); } while (0)
> +#else
> +#define DPRINTF(fmt, ...) do {} while(0)
> +#endif
> +
>  #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
>  #define KVM_ARM_GIC(obj) \
>       OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
> @@ -73,14 +82,419 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
>      kvm_set_irq(kvm_state, kvm_irq, !!level);
>  }
>
> +static bool kvm_arm_gic_can_save_restore(GICState *s)
> +{
> +    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
> +    return kgc->dev_fd >= 0;
> +}
> +
> +static void kvm_arm_gic_reg_access(GICState *s, int group, int offset,
> +                                   int cpu, uint32_t *val, bool write)
> +{
> +    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
> +    struct kvm_device_attr attr;
> +    int type;
> +
> +    cpu = cpu & 0xff;
> +
> +    attr.flags = 0;
> +    attr.group = group;
> +    attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
> +                 KVM_DEV_ARM_VGIC_CPUID_MASK) |
> +                (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
> +                 KVM_DEV_ARM_VGIC_OFFSET_MASK);
> +    attr.addr = (uint64_t)(long)val;

(uintptr_t) should suffice.

> +
> +    if (write) {
> +        type = KVM_SET_DEVICE_ATTR;
> +    } else {
> +        type = KVM_GET_DEVICE_ATTR;
> +    }
> +
> +    if (kvm_device_ioctl(kgc->dev_fd, type, &attr) < 0) {
> +            fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
> +                    strerror(errno));

Your kvm_device_ioctl returns -errno rather than setting
errno, doesn't it?

> +            abort();
> +    }
> +}
> +

> +/* Read a register group from the kernel VGIC */
> +static void kvm_arm_gic_dist_readr(GICState *s, uint32_t offset, int width,
> +                                   int maxirq, vgic_translate_fn translate_fn)
> +{
> +    uint32_t reg;
> +    int i;
> +    int j;
> +    int irq;
> +    int cpu;
> +    int regsz = 32 / width; /* irqs per kernel register */
> +    uint32_t field;
> +
> +    for_each_irq_reg(i, maxirq, width) {
> +        irq = i * regsz;
> +        cpu = 0;
> +        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
> +            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, false);
> +            for (j = 0; j < regsz; j++) {
> +                field = reg >> (j * width) & ((1 << width) - 1);

    field = extract32(reg, j * width, width);

> +                translate_fn(s, irq + j, cpu, &field, false);
> +            }
> +
> +            cpu++;
> +        }
> +        offset += 4;
> +    }
> +}
> +
> +/* Write a register group to the kernel VGIC */
> +static void kvm_arm_gic_dist_writer(GICState *s, uint32_t offset, int width,
> +                                    int maxirq, vgic_translate_fn translate_fn)
> +{
> +    uint32_t reg;
> +    int i;
> +    int j;
> +    int irq;
> +    int cpu;
> +    int regsz = 32 / width; /* irqs per kernel register */
> +    uint32_t field;
> +
> +    for_each_irq_reg(i, maxirq, width) {
> +        irq = i * regsz;
> +        cpu = 0;
> +        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
> +            reg = 0;
> +            for (j = 0; j < regsz; j++) {
> +                translate_fn(s, irq + j, cpu, &field, true);
> +                reg |= (field & ((1 << width) - 1)) << (j * width);

   reg = deposit32(reg, j * width, width, field);

> +            }
> +            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, true);
> +
> +            cpu++;
> +        }
> +        offset += 4;
> +    }
> +}


>  static void kvm_arm_gic_reset(DeviceState *dev)
> diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
> index 424a41f..9771163 100644
> --- a/hw/intc/gic_internal.h
> +++ b/hw/intc/gic_internal.h
> @@ -100,6 +100,7 @@ typedef struct GICState {
>
>      /* these registers are mainly used for save/restore of KVM state */
>      uint8_t  binary_point[2][NCPU]; /* [0]: group 0, [1]: group 1 */
> +    uint32_t active_prio[4][NCPU]; /* implementation defined layout */

You can't make this impdef in QEMU's state, that would mean
we couldn't do migration between implementations which
use different layouts. We need to pick a standard ("whatever
the ARM v2 GIC implementation is" seems the obvious choice)
and make the kernel convert if it's on an implementation which
doesn't follow that version.

>      uint32_t num_cpu;
>
> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> index 1c31b5d..e5538c7 100644
> --- a/include/migration/vmstate.h
> +++ b/include/migration/vmstate.h
> @@ -633,9 +633,15 @@ extern const VMStateInfo vmstate_info_bitmap;
>  #define VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v)                        \
>      VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint32, uint32_t)
>
> +#define VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, _v)                \
> +    VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint32, uint32_t)
> +
>  #define VMSTATE_UINT32_ARRAY(_f, _s, _n)                              \
>      VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
>
> +#define VMSTATE_UINT32_2DARRAY(_f, _s, _n1, _n2)                      \
> +    VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, 0)
> +
>  #define VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v)                        \
>      VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint64, uint64_t)

Can you put the vmstate improvements in their own patch, please?

thanks
-- PMM
Christoffer Dall Sept. 20, 2013, 7:50 p.m. UTC | #3
On Fri, Sep 06, 2013 at 04:13:32PM +0100, Peter Maydell wrote:
> On 23 August 2013 21:10, Christoffer Dall <christoffer.dall@linaro.org> wrote:
> > Save and restore the ARM KVM VGIC state from the kernel.  We rely on
> > QEMU to marshal the GICState data structure and therefore simply
> > synchronize the kernel state with the QEMU emulated state in both
> > directions.
> >
> > We take some care on the restore path to check the VGIC has been
> > configured with enough IRQs and CPU interfaces that we can properly
> > restore the state, and for separate set/clear registers we first fully
> > clear the registers and then set the required bits.
> >
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > ---
> >  hw/intc/arm_gic_common.c    |    2 +
> >  hw/intc/arm_gic_kvm.c       |  418 ++++++++++++++++++++++++++++++++++++++++++-
> >  hw/intc/gic_internal.h      |    1 +
> >  include/migration/vmstate.h |    6 +
> >  4 files changed, 425 insertions(+), 2 deletions(-)
> >
> > diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
> > index a50ded2..f39bdc1 100644
> > --- a/hw/intc/arm_gic_common.c
> > +++ b/hw/intc/arm_gic_common.c
> > @@ -77,6 +77,8 @@ static const VMStateDescription vmstate_gic = {
> >          VMSTATE_UINT16_ARRAY(running_priority, GICState, NCPU),
> >          VMSTATE_UINT16_ARRAY(current_pending, GICState, NCPU),
> >          VMSTATE_UINT8_2DARRAY(binary_point, GICState, 2, NCPU),
> > +        VMSTATE_UINT32_2DARRAY(active_prio, GICState, 4, NCPU),
> > +        VMSTATE_UINT32(num_irq, GICState),
> 
> You don't need to save and restore num_irq, it is constant
> for the lifetime of the device (set by a property on the
> device which is fixed by the board model). Migration only
> works between two identical command lines; if the command
> lines are identical at each end then num_irq should be too.
> 

ok

> >          VMSTATE_END_OF_LIST()
> >      }
> >  };
> > diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
> > index 9f0a852..9785281 100644
> > --- a/hw/intc/arm_gic_kvm.c
> > +++ b/hw/intc/arm_gic_kvm.c
> > @@ -23,6 +23,15 @@
> >  #include "kvm_arm.h"
> >  #include "gic_internal.h"
> >
> > +//#define DEBUG_GIC_KVM
> > +
> > +#ifdef DEBUG_GIC_KVM
> > +#define DPRINTF(fmt, ...) \
> > +do { fprintf(stderr, "arm_gic: " fmt , ## __VA_ARGS__); } while (0)
> > +#else
> > +#define DPRINTF(fmt, ...) do {} while(0)
> > +#endif
> > +
> >  #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
> >  #define KVM_ARM_GIC(obj) \
> >       OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
> > @@ -73,14 +82,419 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
> >      kvm_set_irq(kvm_state, kvm_irq, !!level);
> >  }
> >
> > +static bool kvm_arm_gic_can_save_restore(GICState *s)
> > +{
> > +    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
> > +    return kgc->dev_fd >= 0;
> > +}
> > +
> > +static void kvm_arm_gic_reg_access(GICState *s, int group, int offset,
> > +                                   int cpu, uint32_t *val, bool write)
> > +{
> > +    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
> > +    struct kvm_device_attr attr;
> > +    int type;
> > +
> > +    cpu = cpu & 0xff;
> > +
> > +    attr.flags = 0;
> > +    attr.group = group;
> > +    attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
> > +                 KVM_DEV_ARM_VGIC_CPUID_MASK) |
> > +                (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
> > +                 KVM_DEV_ARM_VGIC_OFFSET_MASK);
> > +    attr.addr = (uint64_t)(long)val;
> 
> (uintptr_t) should suffice.
> 

yes

> > +
> > +    if (write) {
> > +        type = KVM_SET_DEVICE_ATTR;
> > +    } else {
> > +        type = KVM_GET_DEVICE_ATTR;
> > +    }
> > +
> > +    if (kvm_device_ioctl(kgc->dev_fd, type, &attr) < 0) {
> > +            fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
> > +                    strerror(errno));
> 
> Your kvm_device_ioctl returns -errno rather than setting
> errno, doesn't it?
> 

yes, it does

> > +            abort();
> > +    }
> > +}
> > +
> 
> > +/* Read a register group from the kernel VGIC */
> > +static void kvm_arm_gic_dist_readr(GICState *s, uint32_t offset, int width,
> > +                                   int maxirq, vgic_translate_fn translate_fn)
> > +{
> > +    uint32_t reg;
> > +    int i;
> > +    int j;
> > +    int irq;
> > +    int cpu;
> > +    int regsz = 32 / width; /* irqs per kernel register */
> > +    uint32_t field;
> > +
> > +    for_each_irq_reg(i, maxirq, width) {
> > +        irq = i * regsz;
> > +        cpu = 0;
> > +        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
> > +            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, false);
> > +            for (j = 0; j < regsz; j++) {
> > +                field = reg >> (j * width) & ((1 << width) - 1);
> 
>     field = extract32(reg, j * width, width);
> 

ok

> > +                translate_fn(s, irq + j, cpu, &field, false);
> > +            }
> > +
> > +            cpu++;
> > +        }
> > +        offset += 4;
> > +    }
> > +}
> > +
> > +/* Write a register group to the kernel VGIC */
> > +static void kvm_arm_gic_dist_writer(GICState *s, uint32_t offset, int width,
> > +                                    int maxirq, vgic_translate_fn translate_fn)
> > +{
> > +    uint32_t reg;
> > +    int i;
> > +    int j;
> > +    int irq;
> > +    int cpu;
> > +    int regsz = 32 / width; /* irqs per kernel register */
> > +    uint32_t field;
> > +
> > +    for_each_irq_reg(i, maxirq, width) {
> > +        irq = i * regsz;
> > +        cpu = 0;
> > +        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
> > +            reg = 0;
> > +            for (j = 0; j < regsz; j++) {
> > +                translate_fn(s, irq + j, cpu, &field, true);
> > +                reg |= (field & ((1 << width) - 1)) << (j * width);
> 
>    reg = deposit32(reg, j * width, width, field);
> 

ok

> > +            }
> > +            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, true);
> > +
> > +            cpu++;
> > +        }
> > +        offset += 4;
> > +    }
> > +}
> 
> 
> >  static void kvm_arm_gic_reset(DeviceState *dev)
> > diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
> > index 424a41f..9771163 100644
> > --- a/hw/intc/gic_internal.h
> > +++ b/hw/intc/gic_internal.h
> > @@ -100,6 +100,7 @@ typedef struct GICState {
> >
> >      /* these registers are mainly used for save/restore of KVM state */
> >      uint8_t  binary_point[2][NCPU]; /* [0]: group 0, [1]: group 1 */
> > +    uint32_t active_prio[4][NCPU]; /* implementation defined layout */
> 
> You can't make this impdef in QEMU's state, that would mean
> we couldn't do migration between implementations which
> use different layouts. We need to pick a standard ("whatever
> the ARM v2 GIC implementation is" seems the obvious choice)
> and make the kernel convert if it's on an implementation which
> doesn't follow that version.
> 

Implementation defined as in implementation defined in the
architecture.  I didn't think it would make sense to choose a format for
an a15 implementation, for example, and then translate to that format
for other cores using the ARM gic.  Wouldn't migration only be support
for same qemu model to same qemu model, in which case the format of this
register would always be the same, and the kernel must return a format
corresponding to the target cpu.  Am I missing something here?

> >      uint32_t num_cpu;
> >
> > diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> > index 1c31b5d..e5538c7 100644
> > --- a/include/migration/vmstate.h
> > +++ b/include/migration/vmstate.h
> > @@ -633,9 +633,15 @@ extern const VMStateInfo vmstate_info_bitmap;
> >  #define VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v)                        \
> >      VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint32, uint32_t)
> >
> > +#define VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, _v)                \
> > +    VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint32, uint32_t)
> > +
> >  #define VMSTATE_UINT32_ARRAY(_f, _s, _n)                              \
> >      VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
> >
> > +#define VMSTATE_UINT32_2DARRAY(_f, _s, _n1, _n2)                      \
> > +    VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, 0)
> > +
> >  #define VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v)                        \
> >      VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint64, uint64_t)
> 
> Can you put the vmstate improvements in their own patch, please?
> 

yes,

-Christoffer
Christoffer Dall Sept. 20, 2013, 8:39 p.m. UTC | #4
On Sun, Aug 25, 2013 at 04:47:59PM +0100, Alexander Graf wrote:
> 
> On 23.08.2013, at 21:10, Christoffer Dall wrote:
> 
> > Save and restore the ARM KVM VGIC state from the kernel.  We rely on
> > QEMU to marshal the GICState data structure and therefore simply
> > synchronize the kernel state with the QEMU emulated state in both
> > directions.
> > 
> > We take some care on the restore path to check the VGIC has been
> > configured with enough IRQs and CPU interfaces that we can properly
> > restore the state, and for separate set/clear registers we first fully
> > clear the registers and then set the required bits.
> > 
> > Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
> > ---
> > hw/intc/arm_gic_common.c    |    2 +
> > hw/intc/arm_gic_kvm.c       |  418 ++++++++++++++++++++++++++++++++++++++++++-
> > hw/intc/gic_internal.h      |    1 +
> > include/migration/vmstate.h |    6 +
> > 4 files changed, 425 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
> > index a50ded2..f39bdc1 100644
> > --- a/hw/intc/arm_gic_common.c
> > +++ b/hw/intc/arm_gic_common.c
> > @@ -77,6 +77,8 @@ static const VMStateDescription vmstate_gic = {
> >         VMSTATE_UINT16_ARRAY(running_priority, GICState, NCPU),
> >         VMSTATE_UINT16_ARRAY(current_pending, GICState, NCPU),
> >         VMSTATE_UINT8_2DARRAY(binary_point, GICState, 2, NCPU),
> > +        VMSTATE_UINT32_2DARRAY(active_prio, GICState, 4, NCPU),
> > +        VMSTATE_UINT32(num_irq, GICState),
> >         VMSTATE_END_OF_LIST()
> >     }
> > };
> > diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
> > index 9f0a852..9785281 100644
> > --- a/hw/intc/arm_gic_kvm.c
> > +++ b/hw/intc/arm_gic_kvm.c
> > @@ -23,6 +23,15 @@
> > #include "kvm_arm.h"
> > #include "gic_internal.h"
> > 
> > +//#define DEBUG_GIC_KVM
> > +
> > +#ifdef DEBUG_GIC_KVM
> > +#define DPRINTF(fmt, ...) \
> > +do { fprintf(stderr, "arm_gic: " fmt , ## __VA_ARGS__); } while (0)
> > +#else
> > +#define DPRINTF(fmt, ...) do {} while(0)
> 
> You really want to make DPRINTF still be format checked by the compiler. Check out hw/intc/openpic.c for an example how to get there.
> 

good point, thanks for the pointer.

> > +#endif
> > +
> > #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
> > #define KVM_ARM_GIC(obj) \
> >      OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
> > @@ -73,14 +82,419 @@ static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
> >     kvm_set_irq(kvm_state, kvm_irq, !!level);
> > }
> > 
> > +static bool kvm_arm_gic_can_save_restore(GICState *s)
> > +{
> > +    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
> > +    return kgc->dev_fd >= 0;
> > +}
> > +
> > +static void kvm_arm_gic_reg_access(GICState *s, int group, int offset,
> > +                                   int cpu, uint32_t *val, bool write)
> > +{
> > +    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
> > +    struct kvm_device_attr attr;
> > +    int type;
> > +
> > +    cpu = cpu & 0xff;
> > +
> > +    attr.flags = 0;
> > +    attr.group = group;
> > +    attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
> > +                 KVM_DEV_ARM_VGIC_CPUID_MASK) |
> > +                (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
> > +                 KVM_DEV_ARM_VGIC_OFFSET_MASK);
> > +    attr.addr = (uint64_t)(long)val;
> 
> uintptr_t?
> 

yup

> > +
> > +    if (write) {
> > +        type = KVM_SET_DEVICE_ATTR;
> > +    } else {
> > +        type = KVM_GET_DEVICE_ATTR;
> > +    }
> > +
> > +    if (kvm_device_ioctl(kgc->dev_fd, type, &attr) < 0) {
> > +            fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
> > +                    strerror(errno));
> > +            abort();
> > +    }
> > +}
> > +
> > +static void kvm_arm_gic_dist_reg_access(GICState *s, int offset, int cpu,
> > +                                        uint32_t *val, bool write)
> > +{
> > +    kvm_arm_gic_reg_access(s, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
> > +                           offset, cpu, val, write);
> > +}
> > +
> > +static void kvm_arm_gic_cpu_reg_access(GICState *s, int offset, int cpu,
> > +                                       uint32_t *val, bool write)
> > +{
> > +    kvm_arm_gic_reg_access(s, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
> > +                           offset, cpu, val, write);
> > +}
> > +
> > +#define for_each_irq_reg(_ctr, _max_irq, _field_width) \
> > +    for (_ctr = 0; _ctr < ((_max_irq) / (32 / (_field_width))); _ctr++)
> > +
> > +/*
> > + * Translate from the in-kernel field for an IRQ value to/from the qemu
> > + * representation.
> > + */
> > +typedef void (*vgic_translate_fn)(GICState *s, int irq, int cpu,
> > +                                  uint32_t *field, bool to_kernel);
> > +
> > +/* synthetic translate function used for clear/set registers to completely
> > + * clear a setting using a clear-register before setting the remaing bits
> > + * using a set-register */
> > +static void translate_clear(GICState *s, int irq, int cpu,
> > +                            uint32_t *field, bool to_kernel)
> > +{
> > +    if (to_kernel) {
> > +        *field = ~0;
> > +    } else {
> > +        /* does not make sense: qemu model doesn't use set/clear regs */
> > +        abort();
> > +    }
> 
> I don't understand the to_kernel bits. I thought we're in a device file that only works with KVM?
> 

yeah, but you need to retrieve the state from the kernel on a suspend
"from_kernel" and you need to save the state "to_kernel" on a resume
operation.  These small functions translate between a
qemu-representation and a KVM representation, which allows us to do
saving of the in-kernel state by reusing the qemu model of the gic.

This specific little function is used for MMIO registers that have a
separate register for clearing bits than setting bits, like Paolo
describes.  The performance of these operations are going to be vastly
dominated by saving/restoring your memory and I therefore prefer keeping
the interface coherent with the hardware spec, instead of rewriting
things to save a few writes to the kernel.  Does that answer your
question?

> > +}
> > +
> > +static void translate_enabled(GICState *s, int irq, int cpu,
> > +                              uint32_t *field, bool to_kernel)
> > +{
> > +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> > +
> > +    if (to_kernel) {
> > +        *field = GIC_TEST_ENABLED(irq, cm);
> > +    } else {
> > +        if (*field & 1) {
> > +            GIC_SET_ENABLED(irq, cm);
> > +        }
> > +    }
> > +}
> > +
> > +static void translate_pending(GICState *s, int irq, int cpu,
> > +                              uint32_t *field, bool to_kernel)
> > +{
> > +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> > +
> > +    if (to_kernel) {
> > +        *field = GIC_TEST_PENDING(irq, cm);
> > +    } else {
> > +        if (*field & 1) {
> > +            GIC_SET_PENDING(irq, cm);
> > +            /* TODO: Capture is level-line is held high in the kernel */
> > +        }
> > +    }
> > +}
> > +
> > +static void translate_active(GICState *s, int irq, int cpu,
> > +                             uint32_t *field, bool to_kernel)
> > +{
> > +    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
> > +
> > +    if (to_kernel) {
> > +        *field = GIC_TEST_ACTIVE(irq, cm);
> > +    } else {
> > +        if (*field & 1) {
> > +            GIC_SET_ACTIVE(irq, cm);
> > +        }
> > +    }
> > +}
> > +
> > +static void translate_trigger(GICState *s, int irq, int cpu,
> > +                            uint32_t *field, bool to_kernel)
> > +{
> > +    if (to_kernel) {
> > +        *field = (GIC_TEST_TRIGGER(irq)) ? 0x2 : 0x0;
> > +    } else {
> > +        if (*field & 0x2) {
> > +            GIC_SET_TRIGGER(irq);
> > +        }
> > +    }
> > +}
> > +
> > +static void translate_priority(GICState *s, int irq, int cpu,
> > +                               uint32_t *field, bool to_kernel)
> > +{
> > +    if (to_kernel) {
> > +        *field = GIC_GET_PRIORITY(irq, cpu) & 0xff;
> > +    } else {
> > +        GIC_SET_PRIORITY(irq, cpu, *field & 0xff);
> > +    }
> > +}
> > +
> > +static void translate_targets(GICState *s, int irq, int cpu,
> > +                              uint32_t *field, bool to_kernel)
> > +{
> > +    if (to_kernel) {
> > +        *field = s->irq_target[irq] & 0xff;
> > +    } else {
> > +        s->irq_target[irq] = *field & 0xff;
> > +    }
> > +}
> > +
> > +static void translate_sgisource(GICState *s, int irq, int cpu,
> > +                                uint32_t *field, bool to_kernel)
> > +{
> > +    if (to_kernel) {
> > +        *field = s->sgi_source[irq][cpu] & 0xff;
> > +    } else {
> > +        s->sgi_source[irq][cpu] = *field & 0xff;
> > +    }
> > +}
> > +
> > +/* Read a register group from the kernel VGIC */
> > +static void kvm_arm_gic_dist_readr(GICState *s, uint32_t offset, int width,
> > +                                   int maxirq, vgic_translate_fn translate_fn)
> > +{
> > +    uint32_t reg;
> > +    int i;
> > +    int j;
> > +    int irq;
> > +    int cpu;
> > +    int regsz = 32 / width; /* irqs per kernel register */
> > +    uint32_t field;
> > +
> > +    for_each_irq_reg(i, maxirq, width) {
> > +        irq = i * regsz;
> > +        cpu = 0;
> > +        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
> > +            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, false);
> > +            for (j = 0; j < regsz; j++) {
> > +                field = reg >> (j * width) & ((1 << width) - 1);
> > +                translate_fn(s, irq + j, cpu, &field, false);
> > +            }
> > +
> > +            cpu++;
> > +        }
> > +        offset += 4;
> > +    }
> > +}
> > +
> > +/* Write a register group to the kernel VGIC */
> > +static void kvm_arm_gic_dist_writer(GICState *s, uint32_t offset, int width,
> > +                                    int maxirq, vgic_translate_fn translate_fn)
> > +{
> > +    uint32_t reg;
> > +    int i;
> > +    int j;
> > +    int irq;
> > +    int cpu;
> > +    int regsz = 32 / width; /* irqs per kernel register */
> > +    uint32_t field;
> > +
> > +    for_each_irq_reg(i, maxirq, width) {
> > +        irq = i * regsz;
> > +        cpu = 0;
> > +        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
> > +            reg = 0;
> > +            for (j = 0; j < regsz; j++) {
> > +                translate_fn(s, irq + j, cpu, &field, true);
> > +                reg |= (field & ((1 << width) - 1)) << (j * width);
> > +            }
> > +            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, true);
> > +
> > +            cpu++;
> > +        }
> > +        offset += 4;
> > +    }
> > +}
> > +
> > static void kvm_arm_gic_put(GICState *s)
> > {
> > -    /* TODO: there isn't currently a kernel interface to set the GIC state */
> > +    uint32_t reg;
> > +    int i;
> > +    int cpu;
> > +    int num_cpu;
> > +    int num_irq;
> > +
> > +    if (!kvm_arm_gic_can_save_restore(s)) {
> > +            DPRINTF("Cannot put kernel gic state, no kernel interface");
> > +            return;
> > +    }
> > +
> > +    /* Note: We do the restore in a slightly different order than the save
> > +     * (where the order doesn't matter and is simply ordered according to the
> > +     * register offset values */
> > +
> > +    /*****************************************************************
> > +     * Distributor State
> > +     */
> > +
> > +    /* s->enabled -> GICD_CTLR */
> > +    reg = s->enabled;
> > +    kvm_arm_gic_dist_reg_access(s, 0x0, 0, &reg, true);
> > +
> > +    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
> > +    kvm_arm_gic_dist_reg_access(s, 0x4, 0, &reg, false);
> > +    num_irq = ((reg & 0x1f) + 1) * 32;
> > +    num_cpu = ((reg & 0xe0) >> 5) + 1;
> > +
> > +    if (num_irq < s->num_irq) {
> > +            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
> > +                    s->num_irq, num_irq);
> > +            abort();
> > +    } else if (num_cpu != s->num_cpu ) {
> > +            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
> > +                    s->num_cpu, num_cpu);
> > +            /* Did we not create the VCPUs in the kernel yet? */
> > +            abort();
> > +    }
> > +
> > +    /* TODO: Consider checking compatibility with the IIDR ? */
> > +
> > +    /* irq_state[n].enabled -> GICD_ISENABLERn */
> > +    kvm_arm_gic_dist_writer(s, 0x180, 1, s->num_irq, translate_clear);
> > +    kvm_arm_gic_dist_writer(s, 0x100, 1, s->num_irq, translate_enabled);
> 
> Don't these magic numbers have #define'd equivalents in their GIC implementation header file? Then you don't need the comments above each of these.

in the kernel yes, in qemu no.  Unless I'm mistaken, in which case
please point me in the right direction.  I don't want to taint the git
history with a rewrite of every mmio function in the tcg version or have
defines for one file and not use them in the other file, which is also a
bit weird.  If people feel this is necessary, I can have a rewrite of
the whole thing, but I prefer not to.  Surprisingly.

> 
> I also find the function names quite long, but I guess as long as everything still fits that doesn't really matter.
> 
I think the names are as descriptive as they should be, but we can drop
some of the kvm_arm_gic prefixes if that's what you have in mind. So
far, most calls and definitions fit on 80 chars width so I'm not too
worried...

-Christoffer
Peter Maydell Sept. 20, 2013, 9:22 p.m. UTC | #5
On 21 September 2013 04:50, Christoffer Dall
<christoffer.dall@linaro.org> wrote:
> On Fri, Sep 06, 2013 at 04:13:32PM +0100, Peter Maydell wrote:
>> >      /* these registers are mainly used for save/restore of KVM state */
>> >      uint8_t  binary_point[2][NCPU]; /* [0]: group 0, [1]: group 1 */
>> > +    uint32_t active_prio[4][NCPU]; /* implementation defined layout */
>>
>> You can't make this impdef in QEMU's state, that would mean
>> we couldn't do migration between implementations which
>> use different layouts. We need to pick a standard ("whatever
>> the ARM v2 GIC implementation is" seems the obvious choice)
>> and make the kernel convert if it's on an implementation which
>> doesn't follow that version.

> Implementation defined as in implementation defined in the
> architecture.  I didn't think it would make sense to choose a format for
> an a15 implementation, for example, and then translate to that format
> for other cores using the ARM gic.  Wouldn't migration only be support
> for same qemu model to same qemu model, in which case the format of this
> register would always be the same, and the kernel must return a format
> corresponding to the target cpu.  Am I missing something here?

I know it's architecturally impdef, but there are a couple of issues
here:
 *) moving to the 'create me an irqchip' API is separating out the
"which GIC do I have?" and "which CPU do I have?" questions
somewhat, so it seems a shame to tangle them up again

 *) for getting TCG<->KVM and KVM-with-non-host-CPU cases
right we need to do translation anyway, or at least think about it.
So we need to at minimum specifically document what the format
is for the CPUs we care about. At that point we might as well have
a standard format. IIRC the GIC spec defines a "this is the sensible
format" anyway.

In practice, for the v7 and v8 CPUs we support, what format do
they use?


-- PMM
Christoffer Dall Sept. 20, 2013, 9:46 p.m. UTC | #6
On Sat, Sep 21, 2013 at 06:22:23AM +0900, Peter Maydell wrote:
> On 21 September 2013 04:50, Christoffer Dall
> <christoffer.dall@linaro.org> wrote:
> > On Fri, Sep 06, 2013 at 04:13:32PM +0100, Peter Maydell wrote:
> >> >      /* these registers are mainly used for save/restore of KVM state */
> >> >      uint8_t  binary_point[2][NCPU]; /* [0]: group 0, [1]: group 1 */
> >> > +    uint32_t active_prio[4][NCPU]; /* implementation defined layout */
> >>
> >> You can't make this impdef in QEMU's state, that would mean
> >> we couldn't do migration between implementations which
> >> use different layouts. We need to pick a standard ("whatever
> >> the ARM v2 GIC implementation is" seems the obvious choice)
> >> and make the kernel convert if it's on an implementation which
> >> doesn't follow that version.
> 
> > Implementation defined as in implementation defined in the
> > architecture.  I didn't think it would make sense to choose a format for
> > an a15 implementation, for example, and then translate to that format
> > for other cores using the ARM gic.  Wouldn't migration only be support
> > for same qemu model to same qemu model, in which case the format of this
> > register would always be the same, and the kernel must return a format
> > corresponding to the target cpu.  Am I missing something here?
> 
> I know it's architecturally impdef, but there are a couple of issues
> here:
>  *) moving to the 'create me an irqchip' API is separating out the
> "which GIC do I have?" and "which CPU do I have?" questions
> somewhat, so it seems a shame to tangle them up again

Hmm, they are inherently coupled in hardware at least for v7 though,
right?

> 
>  *) for getting TCG<->KVM and KVM-with-non-host-CPU cases
> right we need to do translation anyway, or at least think about it.

Why? Wouldn't we always only support the case where QEMU emulates the
same model as KVM in the first case, and the kernel should behave the
same and export the same state if you ask for a specific target no
matter what the underlying hardware is, no?

> So we need to at minimum specifically document what the format
> is for the CPUs we care about. At that point we might as well have
> a standard format. IIRC the GIC spec defines a "this is the sensible
> format" anyway.
> 
> In practice, for the v7 and v8 CPUs we support, what format do
> they use?
> 

It's not particularly clear as far as I can read it, but I'll have to
investigate in more details.  Also, I'm not quite clear on how TCG GIC
reads/writes would get translated to the proper format depending on the
core in that case, and we would have to have code in the arm_gic_kvm
file to detect the emulated target and do translation.  I'm failing to
see the benefits?

-Christoffer
Peter Maydell Sept. 21, 2013, 9:38 a.m. UTC | #7
On 21 September 2013 06:46, Christoffer Dall
<christoffer.dall@linaro.org> wrote:
> On Sat, Sep 21, 2013 at 06:22:23AM +0900, Peter Maydell wrote:
>>  *) for getting TCG<->KVM and KVM-with-non-host-CPU cases
>> right we need to do translation anyway, or at least think about it.
>
> Why? Wouldn't we always only support the case where QEMU emulates the
> same model as KVM in the first case, and the kernel should behave the
> same and export the same state if you ask for a specific target no
> matter what the underlying hardware is, no?

If the kernel has to be able to do translation of the state, why
not make its life easier by having it only need to do one thing
(host h/w format -> whatever standard format we pick)
rather than lots and lots of things
(host CPU X h/w format -> format for supported guest CPU A,
 host CPU X h/w format -> format for supported guest CPU B,
 host CPU Y h/w format -> format for guest CPU A,
 host CPU Y h/w format -> format for guest CPU B,
 etc etc etc)

? That's basically a cross product over every CPU we
support.

-- PMM
Christoffer Dall Sept. 23, 2013, 2:14 a.m. UTC | #8
On Sat, Sep 21, 2013 at 06:38:19PM +0900, Peter Maydell wrote:
> On 21 September 2013 06:46, Christoffer Dall
> <christoffer.dall@linaro.org> wrote:
> > On Sat, Sep 21, 2013 at 06:22:23AM +0900, Peter Maydell wrote:
> >>  *) for getting TCG<->KVM and KVM-with-non-host-CPU cases
> >> right we need to do translation anyway, or at least think about it.
> >
> > Why? Wouldn't we always only support the case where QEMU emulates the
> > same model as KVM in the first case, and the kernel should behave the
> > same and export the same state if you ask for a specific target no
> > matter what the underlying hardware is, no?
> 
> If the kernel has to be able to do translation of the state, why
> not make its life easier by having it only need to do one thing
> (host h/w format -> whatever standard format we pick)
> rather than lots and lots of things
> (host CPU X h/w format -> format for supported guest CPU A,
>  host CPU X h/w format -> format for supported guest CPU B,
>  host CPU Y h/w format -> format for guest CPU A,
>  host CPU Y h/w format -> format for guest CPU B,
>  etc etc etc)
> 
> ? That's basically a cross product over every CPU we
> support.
> 
Good point.

So after reading the GIC specs again, the way I understand it is that
the APR regs have a bit set, if that group priority (a.k.a. preemption
level) has an active interrupt.  Further, multiple set bits would would
only happen if software acknowledges an interrupt and before EOIing it,
the GIC gets preempted by an interrupt with a higher group priority
(lower number).  Correct?

Further, and again, I don't think the spec is particularly clear on this
point, but I think it suggests that if bit[0] is set, then there's an
interrupt from interrupt priority group 0 (the group with the highest
priority) in the active state, if bit[1] is set, one from group 1 is
active, and so on.

That would be a perfectly fine format for the APR in the GICstate
structure, and the only remaining questions would be:

 (1) How many preemption levels should be supported, which would be most
     easily solved by just defining GICC_APR0-GICC_APR3 for all cpu
     interfaces.
 (2) How does the arm_gic_kvm.c code detect the underlying host CPU that
     the kernel read the register from when it returned the value of the
     register to do the proper translation?  I don't even want to think
     about how this will work on Big.Little...

-Christoffer
Peter Maydell Sept. 23, 2013, 12:02 p.m. UTC | #9
On 23 September 2013 11:14, Christoffer Dall
<christoffer.dall@linaro.org> wrote:
> On Sat, Sep 21, 2013 at 06:38:19PM +0900, Peter Maydell wrote:
>  (2) How does the arm_gic_kvm.c code detect the underlying host CPU that
>      the kernel read the register from when it returned the value of the
>      register to do the proper translation?  I don't even want to think
>      about how this will work on Big.Little...

That's why the kernel does the translation, not userspace :-)

-- PMM
Christoffer Dall Sept. 23, 2013, 3:30 p.m. UTC | #10
On Mon, Sep 23, 2013 at 09:02:44PM +0900, Peter Maydell wrote:
> On 23 September 2013 11:14, Christoffer Dall
> <christoffer.dall@linaro.org> wrote:
> > On Sat, Sep 21, 2013 at 06:38:19PM +0900, Peter Maydell wrote:
> >  (2) How does the arm_gic_kvm.c code detect the underlying host CPU that
> >      the kernel read the register from when it returned the value of the
> >      register to do the proper translation?  I don't even want to think
> >      about how this will work on Big.Little...
> 
> That's why the kernel does the translation, not userspace :-)
> 
Ah yeah, I misread your previous mail as returning just the hardware
state from the kernel.  OK, I'll add this implementation definition of
the register group to the kernel interface and to qemu's structure.

-Christoffer
diff mbox

Patch

diff --git a/hw/intc/arm_gic_common.c b/hw/intc/arm_gic_common.c
index a50ded2..f39bdc1 100644
--- a/hw/intc/arm_gic_common.c
+++ b/hw/intc/arm_gic_common.c
@@ -77,6 +77,8 @@  static const VMStateDescription vmstate_gic = {
         VMSTATE_UINT16_ARRAY(running_priority, GICState, NCPU),
         VMSTATE_UINT16_ARRAY(current_pending, GICState, NCPU),
         VMSTATE_UINT8_2DARRAY(binary_point, GICState, 2, NCPU),
+        VMSTATE_UINT32_2DARRAY(active_prio, GICState, 4, NCPU),
+        VMSTATE_UINT32(num_irq, GICState),
         VMSTATE_END_OF_LIST()
     }
 };
diff --git a/hw/intc/arm_gic_kvm.c b/hw/intc/arm_gic_kvm.c
index 9f0a852..9785281 100644
--- a/hw/intc/arm_gic_kvm.c
+++ b/hw/intc/arm_gic_kvm.c
@@ -23,6 +23,15 @@ 
 #include "kvm_arm.h"
 #include "gic_internal.h"
 
+//#define DEBUG_GIC_KVM
+
+#ifdef DEBUG_GIC_KVM
+#define DPRINTF(fmt, ...) \
+do { fprintf(stderr, "arm_gic: " fmt , ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) do {} while(0)
+#endif
+
 #define TYPE_KVM_ARM_GIC "kvm-arm-gic"
 #define KVM_ARM_GIC(obj) \
      OBJECT_CHECK(GICState, (obj), TYPE_KVM_ARM_GIC)
@@ -73,14 +82,419 @@  static void kvm_arm_gic_set_irq(void *opaque, int irq, int level)
     kvm_set_irq(kvm_state, kvm_irq, !!level);
 }
 
+static bool kvm_arm_gic_can_save_restore(GICState *s)
+{
+    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
+    return kgc->dev_fd >= 0;
+}
+
+static void kvm_arm_gic_reg_access(GICState *s, int group, int offset,
+                                   int cpu, uint32_t *val, bool write)
+{
+    KVMARMGICClass *kgc = KVM_ARM_GIC_GET_CLASS(s);
+    struct kvm_device_attr attr;
+    int type;
+
+    cpu = cpu & 0xff;
+
+    attr.flags = 0;
+    attr.group = group;
+    attr.attr = (((uint64_t)cpu << KVM_DEV_ARM_VGIC_CPUID_SHIFT) &
+                 KVM_DEV_ARM_VGIC_CPUID_MASK) |
+                (((uint64_t)offset << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) &
+                 KVM_DEV_ARM_VGIC_OFFSET_MASK);
+    attr.addr = (uint64_t)(long)val;
+
+    if (write) {
+        type = KVM_SET_DEVICE_ATTR;
+    } else {
+        type = KVM_GET_DEVICE_ATTR;
+    }
+
+    if (kvm_device_ioctl(kgc->dev_fd, type, &attr) < 0) {
+            fprintf(stderr, "KVM_{SET/GET}_DEVICE_ATTR failed: %s\n",
+                    strerror(errno));
+            abort();
+    }
+}
+
+static void kvm_arm_gic_dist_reg_access(GICState *s, int offset, int cpu,
+                                        uint32_t *val, bool write)
+{
+    kvm_arm_gic_reg_access(s, KVM_DEV_ARM_VGIC_GRP_DIST_REGS,
+                           offset, cpu, val, write);
+}
+
+static void kvm_arm_gic_cpu_reg_access(GICState *s, int offset, int cpu,
+                                       uint32_t *val, bool write)
+{
+    kvm_arm_gic_reg_access(s, KVM_DEV_ARM_VGIC_GRP_CPU_REGS,
+                           offset, cpu, val, write);
+}
+
+#define for_each_irq_reg(_ctr, _max_irq, _field_width) \
+    for (_ctr = 0; _ctr < ((_max_irq) / (32 / (_field_width))); _ctr++)
+
+/*
+ * Translate from the in-kernel field for an IRQ value to/from the qemu
+ * representation.
+ */
+typedef void (*vgic_translate_fn)(GICState *s, int irq, int cpu,
+                                  uint32_t *field, bool to_kernel);
+
+/* synthetic translate function used for clear/set registers to completely
+ * clear a setting using a clear-register before setting the remaing bits
+ * using a set-register */
+static void translate_clear(GICState *s, int irq, int cpu,
+                            uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = ~0;
+    } else {
+        /* does not make sense: qemu model doesn't use set/clear regs */
+        abort();
+    }
+}
+
+static void translate_enabled(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_ENABLED(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_ENABLED(irq, cm);
+        }
+    }
+}
+
+static void translate_pending(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_PENDING(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_PENDING(irq, cm);
+            /* TODO: Capture is level-line is held high in the kernel */
+        }
+    }
+}
+
+static void translate_active(GICState *s, int irq, int cpu,
+                             uint32_t *field, bool to_kernel)
+{
+    int cm = (irq < GIC_INTERNAL) ? (1 << cpu) : ALL_CPU_MASK;
+
+    if (to_kernel) {
+        *field = GIC_TEST_ACTIVE(irq, cm);
+    } else {
+        if (*field & 1) {
+            GIC_SET_ACTIVE(irq, cm);
+        }
+    }
+}
+
+static void translate_trigger(GICState *s, int irq, int cpu,
+                            uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = (GIC_TEST_TRIGGER(irq)) ? 0x2 : 0x0;
+    } else {
+        if (*field & 0x2) {
+            GIC_SET_TRIGGER(irq);
+        }
+    }
+}
+
+static void translate_priority(GICState *s, int irq, int cpu,
+                               uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = GIC_GET_PRIORITY(irq, cpu) & 0xff;
+    } else {
+        GIC_SET_PRIORITY(irq, cpu, *field & 0xff);
+    }
+}
+
+static void translate_targets(GICState *s, int irq, int cpu,
+                              uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = s->irq_target[irq] & 0xff;
+    } else {
+        s->irq_target[irq] = *field & 0xff;
+    }
+}
+
+static void translate_sgisource(GICState *s, int irq, int cpu,
+                                uint32_t *field, bool to_kernel)
+{
+    if (to_kernel) {
+        *field = s->sgi_source[irq][cpu] & 0xff;
+    } else {
+        s->sgi_source[irq][cpu] = *field & 0xff;
+    }
+}
+
+/* Read a register group from the kernel VGIC */
+static void kvm_arm_gic_dist_readr(GICState *s, uint32_t offset, int width,
+                                   int maxirq, vgic_translate_fn translate_fn)
+{
+    uint32_t reg;
+    int i;
+    int j;
+    int irq;
+    int cpu;
+    int regsz = 32 / width; /* irqs per kernel register */
+    uint32_t field;
+
+    for_each_irq_reg(i, maxirq, width) {
+        irq = i * regsz;
+        cpu = 0;
+        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
+            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, false);
+            for (j = 0; j < regsz; j++) {
+                field = reg >> (j * width) & ((1 << width) - 1);
+                translate_fn(s, irq + j, cpu, &field, false);
+            }
+
+            cpu++;
+        }
+        offset += 4;
+    }
+}
+
+/* Write a register group to the kernel VGIC */
+static void kvm_arm_gic_dist_writer(GICState *s, uint32_t offset, int width,
+                                    int maxirq, vgic_translate_fn translate_fn)
+{
+    uint32_t reg;
+    int i;
+    int j;
+    int irq;
+    int cpu;
+    int regsz = 32 / width; /* irqs per kernel register */
+    uint32_t field;
+
+    for_each_irq_reg(i, maxirq, width) {
+        irq = i * regsz;
+        cpu = 0;
+        while ((cpu < s->num_cpu && irq < GIC_INTERNAL) || cpu == 0) {
+            reg = 0;
+            for (j = 0; j < regsz; j++) {
+                translate_fn(s, irq + j, cpu, &field, true);
+                reg |= (field & ((1 << width) - 1)) << (j * width);
+            }
+            kvm_arm_gic_dist_reg_access(s, offset, cpu, &reg, true);
+
+            cpu++;
+        }
+        offset += 4;
+    }
+}
+
 static void kvm_arm_gic_put(GICState *s)
 {
-    /* TODO: there isn't currently a kernel interface to set the GIC state */
+    uint32_t reg;
+    int i;
+    int cpu;
+    int num_cpu;
+    int num_irq;
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+            DPRINTF("Cannot put kernel gic state, no kernel interface");
+            return;
+    }
+
+    /* Note: We do the restore in a slightly different order than the save
+     * (where the order doesn't matter and is simply ordered according to the
+     * register offset values */
+
+    /*****************************************************************
+     * Distributor State
+     */
+
+    /* s->enabled -> GICD_CTLR */
+    reg = s->enabled;
+    kvm_arm_gic_dist_reg_access(s, 0x0, 0, &reg, true);
+
+    /* Sanity checking on GICD_TYPER and s->num_irq, s->num_cpu */
+    kvm_arm_gic_dist_reg_access(s, 0x4, 0, &reg, false);
+    num_irq = ((reg & 0x1f) + 1) * 32;
+    num_cpu = ((reg & 0xe0) >> 5) + 1;
+
+    if (num_irq < s->num_irq) {
+            fprintf(stderr, "Restoring %u IRQs, but kernel supports max %d\n",
+                    s->num_irq, num_irq);
+            abort();
+    } else if (num_cpu != s->num_cpu ) {
+            fprintf(stderr, "Restoring %u CPU interfaces, kernel only has %d\n",
+                    s->num_cpu, num_cpu);
+            /* Did we not create the VCPUs in the kernel yet? */
+            abort();
+    }
+
+    /* TODO: Consider checking compatibility with the IIDR ? */
+
+    /* irq_state[n].enabled -> GICD_ISENABLERn */
+    kvm_arm_gic_dist_writer(s, 0x180, 1, s->num_irq, translate_clear);
+    kvm_arm_gic_dist_writer(s, 0x100, 1, s->num_irq, translate_enabled);
+
+    /* s->irq_target[irq] -> GICD_ITARGETSRn
+     * (restore targets before pending to ensure the pending state is set on
+     * the appropriate CPU interfaces in the kernel) */
+    kvm_arm_gic_dist_writer(s, 0x800, 8, s->num_irq, translate_targets);
+
+    /* irq_state[n].pending + irq_state[n].level -> GICD_ISPENDRn */
+    kvm_arm_gic_dist_writer(s, 0x280, 1, s->num_irq, translate_clear);
+    kvm_arm_gic_dist_writer(s, 0x200, 1, s->num_irq, translate_pending);
+
+    /* irq_state[n].active -> GICD_ISACTIVERn */
+    kvm_arm_gic_dist_writer(s, 0x380, 1, s->num_irq, translate_clear);
+    kvm_arm_gic_dist_writer(s, 0x300, 1, s->num_irq, translate_active);
+
+    /* irq_state[n].trigger -> GICD_ICFRn */
+    kvm_arm_gic_dist_writer(s, 0xc00, 2, s->num_irq, translate_trigger);
+
+    /* s->priorityX[irq] -> ICD_IPRIORITYRn */
+    kvm_arm_gic_dist_writer(s, 0x400, 8, s->num_irq, translate_priority);
+
+    /* s->sgi_source -> ICD_CPENDSGIRn */
+    kvm_arm_gic_dist_writer(s, 0xf10, 8, GIC_NR_SGIS, translate_clear);
+    kvm_arm_gic_dist_writer(s, 0xf20, 8, GIC_NR_SGIS, translate_sgisource);
+
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (cpu = 0; cpu < s->num_cpu; cpu++) {
+        /* s->cpu_enabled[cpu] -> GICC_CTLR */
+        reg = s->cpu_enabled[cpu];
+        kvm_arm_gic_cpu_reg_access(s, 0x00, cpu, &reg, true);
+
+        /* s->priority_mask[cpu] -> GICC_PMR */
+        reg = (s->priority_mask[cpu] & 0xff);
+        kvm_arm_gic_cpu_reg_access(s, 0x04, cpu, &reg, true);
+
+        /* s->binary_point[cpu][0] -> GICC_BPR */
+        reg = (s->binary_point[0][cpu] & 0x7);
+        kvm_arm_gic_cpu_reg_access(s, 0x08, cpu, &reg, true);
+
+        /* s->binary_point[cpu][1] -> GICC_ABPR */
+        reg = (s->binary_point[1][cpu] & 0x7);
+        kvm_arm_gic_cpu_reg_access(s, 0x1c, cpu, &reg, true);
+
+        /* s->active_prio[n][cpu] -> GICC_APRn */
+        for (i = 0; i < 4; i++) {
+            reg = s->active_prio[i][cpu];
+            kvm_arm_gic_cpu_reg_access(s, 0xd0 + i * 4, cpu, &reg, true);
+        }
+    }
 }
 
 static void kvm_arm_gic_get(GICState *s)
 {
-    /* TODO: there isn't currently a kernel interface to get the GIC state */
+    uint32_t reg;
+    int i;
+    int cpu;
+
+    if (!kvm_arm_gic_can_save_restore(s)) {
+            DPRINTF("Cannot get kernel gic state, no kernel interface");
+            return;
+    }
+
+    /*****************************************************************
+     * Distributor State
+     */
+
+    /* GICD_CTLR -> s->enabled */
+    kvm_arm_gic_dist_reg_access(s, 0x0, 0, &reg, false);
+    s->enabled = reg & 1;
+
+    /* Sanity checking on GICD_TYPER -> s->num_irq, s->num_cpu */
+    kvm_arm_gic_dist_reg_access(s, 0x4, 0, &reg, false);
+    s->num_irq = ((reg & 0x1f) + 1) * 32;
+    s->num_cpu = ((reg & 0xe0) >> 5) + 1;
+
+    if (s->num_irq > GIC_MAXIRQ) {
+            fprintf(stderr, "Too many IRQs reported from the kernel: %d\n",
+                    s->num_irq);
+            abort();
+    }
+
+    /* GICD_IIDR -> ? */
+    kvm_arm_gic_dist_reg_access(s, 0x8, 0, &reg, false);
+
+    /* Verify no GROUP 1 interrupts configured in the kernel */
+    for_each_irq_reg(i, s->num_irq, 1) {
+        kvm_arm_gic_dist_reg_access(s, 0x80 + (i * 4), 0, &reg, false);
+        if (reg != 0) {
+            fprintf(stderr, "Unsupported GICD_IGROUPRn value: %08x\n",
+                    reg);
+            abort();
+        }
+    }
+
+    /* Clear all the IRQ settings */
+    for (i = 0; i < s->num_irq; i++) {
+        memset(&s->irq_state[i], 0, sizeof(s->irq_state[0]));
+    }
+
+    /* GICD_ISENABLERn -> irq_state[n].enabled */
+    kvm_arm_gic_dist_readr(s, 0x100, 1, s->num_irq, translate_enabled);
+
+    /* GICD_ISPENDRn -> irq_state[n].pending + irq_state[n].level */
+    kvm_arm_gic_dist_readr(s, 0x200, 1, s->num_irq, translate_pending);
+
+    /* GICD_ISACTIVERn -> irq_state[n].active */
+    kvm_arm_gic_dist_readr(s, 0x300, 1, s->num_irq, translate_active);
+
+    /* GICD_ICFRn -> irq_state[n].trigger */
+    kvm_arm_gic_dist_readr(s, 0xc00, 2, s->num_irq, translate_trigger);
+
+    /* GICD_IPRIORITYRn -> s->priorityX[irq] */
+    kvm_arm_gic_dist_readr(s, 0x400, 8, s->num_irq, translate_priority);
+
+    /* GICD_ITARGETSRn -> s->irq_target[irq] */
+    kvm_arm_gic_dist_readr(s, 0x800, 8, s->num_irq, translate_targets);
+
+    /* GICD_CPENDSGIRn -> s->sgi_source */
+    kvm_arm_gic_dist_readr(s, 0xf10, 8, GIC_NR_SGIS, translate_sgisource);
+
+
+    /*****************************************************************
+     * CPU Interface(s) State
+     */
+
+    for (cpu = 0; cpu < s->num_cpu; cpu++) {
+        /* GICC_CTLR -> s->cpu_enabled[cpu] */
+        kvm_arm_gic_cpu_reg_access(s, 0x00, cpu, &reg, false);
+        s->cpu_enabled[cpu] = (reg & 1);
+
+        /* GICC_PMR -> s->priority_mask[cpu] */
+        kvm_arm_gic_cpu_reg_access(s, 0x04, cpu, &reg, false);
+        s->priority_mask[cpu] = (reg & 0xff);
+
+        /* GICC_BPR -> s->binary_point[cpu][0] */
+        kvm_arm_gic_cpu_reg_access(s, 0x08, cpu, &reg, false);
+        s->binary_point[0][cpu] = (reg & 0x7);
+
+        /* GICC_ABPR -> s->binary_point[cpu][1] */
+        kvm_arm_gic_cpu_reg_access(s, 0x1c, cpu, &reg, false);
+        s->binary_point[1][cpu] = (reg & 0x7);
+
+        /* GICC_APRn -> s->active_prio[n][cpu] */
+        for (i = 0; i < 4; i++) {
+            kvm_arm_gic_cpu_reg_access(s, 0xd0 + i * 4, cpu, &reg, false);
+            s->active_prio[i][cpu] = reg;
+        }
+    }
 }
 
 static void kvm_arm_gic_reset(DeviceState *dev)
diff --git a/hw/intc/gic_internal.h b/hw/intc/gic_internal.h
index 424a41f..9771163 100644
--- a/hw/intc/gic_internal.h
+++ b/hw/intc/gic_internal.h
@@ -100,6 +100,7 @@  typedef struct GICState {
 
     /* these registers are mainly used for save/restore of KVM state */
     uint8_t  binary_point[2][NCPU]; /* [0]: group 0, [1]: group 1 */
+    uint32_t active_prio[4][NCPU]; /* implementation defined layout */
 
     uint32_t num_cpu;
 
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 1c31b5d..e5538c7 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -633,9 +633,15 @@  extern const VMStateInfo vmstate_info_bitmap;
 #define VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v)                        \
     VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint32, uint32_t)
 
+#define VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, _v)                \
+    VMSTATE_2DARRAY(_f, _s, _n1, _n2, _v, vmstate_info_uint32, uint32_t)
+
 #define VMSTATE_UINT32_ARRAY(_f, _s, _n)                              \
     VMSTATE_UINT32_ARRAY_V(_f, _s, _n, 0)
 
+#define VMSTATE_UINT32_2DARRAY(_f, _s, _n1, _n2)                      \
+    VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
 #define VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v)                        \
     VMSTATE_ARRAY(_f, _s, _n, _v, vmstate_info_uint64, uint64_t)