diff mbox

[GIT,PULL] arm/arm64: KVM: Fix unaligned access bug on gicv2 access

Message ID 20140923111412.GG12187@cbox
State New
Headers show

Commit Message

Christoffer Dall Sept. 23, 2014, 11:14 a.m. UTC
On Tue, Sep 23, 2014 at 10:36:30AM +0200, Paolo Bonzini wrote:
> Il 23/09/2014 00:07, Will Deacon ha scritto:
> >> >  {
> >> >  	if (!(lr_desc.state & LR_STATE_MASK))
> >> > -		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
> >> > +		__set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
> >> >  }
> > Does this work for big-endian arm64 machines? Surely the bug is due to
> > casting a u32 * to an unsigned long *, and not specifically related to
> > atomics (which is where it happened to explode)?
> 
It does look like the whole thing is broken on BE systems, but fixing
that becomes non-trivial.  I don't think this fix is incorrect in
itself, but we do have a larger issue with BE.

I took a stab at fixing this (untested for BE), which looks something
like the following, but I'm a bit uneasy about having to test and merge
this as a fix given the rush before 3.17 is released.

Thoughts?

commit e4b2731a8caad09d0b8b469377f4951ce6b70569 (HEAD, vgic_elrsr_fix)
Author: Christoffer Dall <christoffer.dall@linaro.org>
Date:   Tue Sep 23 11:54:58 2014 +0200

    arm/arm64: KVM: Fix BE accesses to GICv2 EISR and ELRSR regs
    
    The EIRSR and ELRSR registers are 32-bit registers on GICv2, and we
    store these as an array of two such registers on the vgic vcpu struct.
    However, we access them as a single 64-bit value or as a bitmap pointer
    in the generic vgic code, which breaks BE support.
    
    Instead, store them as u64 values on the vgic structure and do the
    word-swapping in the assembly code, which already handles the byte order
    for BE systems.
    
    Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>

Comments

Paolo Bonzini Sept. 23, 2014, 11:50 a.m. UTC | #1
Il 23/09/2014 13:14, Christoffer Dall ha scritto:
> On Tue, Sep 23, 2014 at 10:36:30AM +0200, Paolo Bonzini wrote:
>> Il 23/09/2014 00:07, Will Deacon ha scritto:
>>>>>  {
>>>>>  	if (!(lr_desc.state & LR_STATE_MASK))
>>>>> -		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
>>>>> +		__set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
>>>>>  }
>>> Does this work for big-endian arm64 machines? Surely the bug is due to
>>> casting a u32 * to an unsigned long *, and not specifically related to
>>> atomics (which is where it happened to explode)?
>>
> It does look like the whole thing is broken on BE systems, but fixing
> that becomes non-trivial.  I don't think this fix is incorrect in
> itself, but we do have a larger issue with BE.
> 
> I took a stab at fixing this (untested for BE), which looks something
> like the following, but I'm a bit uneasy about having to test and merge
> this as a fix given the rush before 3.17 is released.
> 
> Thoughts?

If big-endian is broken anyway, let's apply this only:

> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 35b0c12..c66dc9ed 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -168,8 +168,8 @@ struct vgic_v2_cpu_if {
>  	u32		vgic_hcr;
>  	u32		vgic_vmcr;
>  	u32		vgic_misr;	/* Saved only */
> -	u32		vgic_eisr[2];	/* Saved only */
> -	u32		vgic_elrsr[2];	/* Saved only */
> +	u64		vgic_eisr;	/* Saved only */
> +	u64		vgic_elrsr;	/* Saved only */
>  	u32		vgic_apr;
>  	u32		vgic_lr[VGIC_V2_MAX_LRS];
>  };
> diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
> index 416baed..2935405 100644
> --- a/virt/kvm/arm/vgic-v2.c
> +++ b/virt/kvm/arm/vgic-v2.c
> @@ -71,35 +71,17 @@ static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
>  				  struct vgic_lr lr_desc)
>  {
>  	if (!(lr_desc.state & LR_STATE_MASK))
> -		__set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
> +		vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
>  }
>  
>  static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
>  {
> -	u64 val;
> -
> -#if BITS_PER_LONG == 64
> -	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
> -	val <<= 32;
> -	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
> -#else
> -	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
> -#endif
> -	return val;
> +	return vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
>  }
>  
>  static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
>  {
> -	u64 val;
> -
> -#if BITS_PER_LONG == 64
> -	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
> -	val <<= 32;
> -	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
> -#else
> -	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
> -#endif
> -	return val;
> +	return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
>  }
>  
>  static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)

which matches what vgic-v3 already does.

BE can be fixed in 3.18.

Paolo
Paolo Bonzini Sept. 23, 2014, 12:48 p.m. UTC | #2
Il 23/09/2014 14:44, Andre Przywara ha scritto:
> Hi,
> 
> On 23/09/14 12:50, Paolo Bonzini wrote:
>> Il 23/09/2014 13:14, Christoffer Dall ha scritto:
>>> On Tue, Sep 23, 2014 at 10:36:30AM +0200, Paolo Bonzini wrote:
>>>> Il 23/09/2014 00:07, Will Deacon ha scritto:
>>>>>>>  {
>>>>>>>  	if (!(lr_desc.state & LR_STATE_MASK))
>>>>>>> -		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
>>>>>>> +		__set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
>>>>>>>  }
>>>>> Does this work for big-endian arm64 machines? Surely the bug is due to
>>>>> casting a u32 * to an unsigned long *, and not specifically related to
>>>>> atomics (which is where it happened to explode)?
>>>>
>>> It does look like the whole thing is broken on BE systems, but fixing
>>> that becomes non-trivial.  I don't think this fix is incorrect in
>>> itself, but we do have a larger issue with BE.
>>>
>>> I took a stab at fixing this (untested for BE), which looks something
>>> like the following, but I'm a bit uneasy about having to test and merge
>>> this as a fix given the rush before 3.17 is released.
>>>
>>> Thoughts?
>>
>> If big-endian is broken anyway, let's apply this only:
>>
>>> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
>>> index 35b0c12..c66dc9ed 100644
>>> --- a/include/kvm/arm_vgic.h
>>> +++ b/include/kvm/arm_vgic.h
>>> @@ -168,8 +168,8 @@ struct vgic_v2_cpu_if {
>>>  	u32		vgic_hcr;
>>>  	u32		vgic_vmcr;
>>>  	u32		vgic_misr;	/* Saved only */
>>> -	u32		vgic_eisr[2];	/* Saved only */
>>> -	u32		vgic_elrsr[2];	/* Saved only */
>>> +	u64		vgic_eisr;	/* Saved only */
>>> +	u64		vgic_elrsr;	/* Saved only */
>>>  	u32		vgic_apr;
>>>  	u32		vgic_lr[VGIC_V2_MAX_LRS];
>>>  };
> 
> I think Marc's point on this was not to spoil 32bit code (as this is the
> GIC, which is shared). In the GICv2 spec the register are declared as a
> number of 32 bit registers, so there is some sense in keeping it u32.
> So I came up with the following this morning:
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 35b0c12..6f884df 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -168,8 +168,14 @@ struct vgic_v2_cpu_if {
>         u32             vgic_hcr;
>         u32             vgic_vmcr;
>         u32             vgic_misr;      /* Saved only */
> -       u32             vgic_eisr[2];   /* Saved only */
> -       u32             vgic_elrsr[2];  /* Saved only */
> +       union {
> +               u32             vgic_eisr[2];   /* Saved only */
> +               unsigned long   vgic_eisr_bm[8 / sizeof(long)];
> +       };
> +       union {
> +               u32             vgic_elrsr[2];  /* Saved only */
> +               unsigned long   vgic_elrsr_bm[8 / sizeof(long)];
> +       };
>         u32             vgic_apr;
>         u32             vgic_lr[VGIC_V2_MAX_LRS];
>  };
> 
> And then use vgic_elrsr_bm in set_bit().
> 
> Admittedly a bit hacky, but fixes the alignment issue while still
> retaining sane code for ARM.
> If anyone knows a good fix for that "8 / sizeof(long)" kludge, I am all
> ears.

	u32		vgic_eisr[2] __aligned(BITS_PER_LONG/8);
	u32		vgic_elrsr[2] __aligned(BITS_PER_LONG/8);

Still wouldn't fix big-endian, however, and it's not necessary if we go
for set_bit as in Christoffer's original patch.

Paolo
Christoffer Dall Sept. 23, 2014, 1:52 p.m. UTC | #3
On Tue, Sep 23, 2014 at 01:44:11PM +0100, Andre Przywara wrote:
> Hi,
> 
> On 23/09/14 12:50, Paolo Bonzini wrote:
> > Il 23/09/2014 13:14, Christoffer Dall ha scritto:
> >> On Tue, Sep 23, 2014 at 10:36:30AM +0200, Paolo Bonzini wrote:
> >>> Il 23/09/2014 00:07, Will Deacon ha scritto:
> >>>>>>  {
> >>>>>>  	if (!(lr_desc.state & LR_STATE_MASK))
> >>>>>> -		set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
> >>>>>> +		__set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
> >>>>>>  }
> >>>> Does this work for big-endian arm64 machines? Surely the bug is due to
> >>>> casting a u32 * to an unsigned long *, and not specifically related to
> >>>> atomics (which is where it happened to explode)?
> >>>
> >> It does look like the whole thing is broken on BE systems, but fixing
> >> that becomes non-trivial.  I don't think this fix is incorrect in
> >> itself, but we do have a larger issue with BE.
> >>
> >> I took a stab at fixing this (untested for BE), which looks something
> >> like the following, but I'm a bit uneasy about having to test and merge
> >> this as a fix given the rush before 3.17 is released.
> >>
> >> Thoughts?
> > 
> > If big-endian is broken anyway, let's apply this only:
> > 
> >> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> >> index 35b0c12..c66dc9ed 100644
> >> --- a/include/kvm/arm_vgic.h
> >> +++ b/include/kvm/arm_vgic.h
> >> @@ -168,8 +168,8 @@ struct vgic_v2_cpu_if {
> >>  	u32		vgic_hcr;
> >>  	u32		vgic_vmcr;
> >>  	u32		vgic_misr;	/* Saved only */
> >> -	u32		vgic_eisr[2];	/* Saved only */
> >> -	u32		vgic_elrsr[2];	/* Saved only */
> >> +	u64		vgic_eisr;	/* Saved only */
> >> +	u64		vgic_elrsr;	/* Saved only */
> >>  	u32		vgic_apr;
> >>  	u32		vgic_lr[VGIC_V2_MAX_LRS];
> >>  };
> 
> I think Marc's point on this was not to spoil 32bit code (as this is the
> GIC, which is shared). In the GICv2 spec the register are declared as a
> number of 32 bit registers, so there is some sense in keeping it u32.
> So I came up with the following this morning:
> 
> diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
> index 35b0c12..6f884df 100644
> --- a/include/kvm/arm_vgic.h
> +++ b/include/kvm/arm_vgic.h
> @@ -168,8 +168,14 @@ struct vgic_v2_cpu_if {
>         u32             vgic_hcr;
>         u32             vgic_vmcr;
>         u32             vgic_misr;      /* Saved only */
> -       u32             vgic_eisr[2];   /* Saved only */
> -       u32             vgic_elrsr[2];  /* Saved only */
> +       union {
> +               u32             vgic_eisr[2];   /* Saved only */
> +               unsigned long   vgic_eisr_bm[8 / sizeof(long)];
> +       };
> +       union {
> +               u32             vgic_elrsr[2];  /* Saved only */
> +               unsigned long   vgic_elrsr_bm[8 / sizeof(long)];
> +       };
>         u32             vgic_apr;
>         u32             vgic_lr[VGIC_V2_MAX_LRS];
>  };
> 
> And then use vgic_elrsr_bm in set_bit().
> 
> Admittedly a bit hacky, but fixes the alignment issue while still
> retaining sane code for ARM.
> If anyone knows a good fix for that "8 / sizeof(long)" kludge, I am all
> ears.
> 

I honestly thing this obfuscates what's going on more than it helps.  I
think in general complicating your data structure because of the way you
consume it is the wrong way to go, unless it significantly simplifies a
complicated set of manipulators.

Another thing is that this fix does not address the fact that you're
still returning a u64 from vgic_get_elrsr() and related functions, which
will break with the use of for_each_set_bit() in the callers when the
host is a 32-bit BE system.  You'd have to change the accessor functions
to return an (unsigned long *) as well with your change above and 64-bit
BE systems would have to switch the order of the words when accessing
your vgic_elrsr_bm field.  I tried this, and it doesn't look nice.

Therefore, I think we should really just merge the one-line fix or the
patch I sent before.  Paolo seems fine with it either way.

If anyone feels like reviewing my patch and giving it a quick test on a
BE system with a version of QEMU with the pl011 level-triggered patch,
real soon, like today'ish, then we can use that, but otherwise let's go
with the one-liner.

-Christoffer
Paolo Bonzini Sept. 23, 2014, 1:52 p.m. UTC | #4
Il 23/09/2014 15:52, Christoffer Dall ha scritto:
> Therefore, I think we should really just merge the one-line fix or the
> patch I sent before.  Paolo seems fine with it either way.

Yes, it's on its way to Linus.

Paolo
Peter Maydell Sept. 23, 2014, 2:01 p.m. UTC | #5
On 23 September 2014 14:52, Christoffer Dall
<christoffer.dall@linaro.org> wrote:
> If anyone feels like reviewing my patch and giving it a quick test on a
> BE system with a version of QEMU with the pl011 level-triggered patch,

FWIW, any old version of QEMU running the vexpress-a15 model
will also use level-triggered interrupts for pl011, because
the upstream DTB which we use for that board has always
correctly marked the pl011 and all the other motherboard
devices as being level-triggered.

I'm still not 100% convinced we shouldn't mark the
virtio-mmio devices as level-triggered, incidentally.
I *think* that (a) the spec pretty heavily implies that
the lines behave as level triggered but (b) the
specific text in the spec about required guest code
to avoid races (s.2.4.2 of the 0.9.5 spec) means that
even if the interrupt controller treats them as edge
triggered it's OK.

-- PMM
Christoffer Dall Sept. 23, 2014, 2:03 p.m. UTC | #6
On Tue, Sep 23, 2014 at 4:01 PM, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 23 September 2014 14:52, Christoffer Dall
> <christoffer.dall@linaro.org> wrote:
>> If anyone feels like reviewing my patch and giving it a quick test on a
>> BE system with a version of QEMU with the pl011 level-triggered patch,
>
> FWIW, any old version of QEMU running the vexpress-a15 model
> will also use level-triggered interrupts for pl011, because
> the upstream DTB which we use for that board has always
> correctly marked the pl011 and all the other motherboard
> devices as being level-triggered.
>
> I'm still not 100% convinced we shouldn't mark the
> virtio-mmio devices as level-triggered, incidentally.
> I *think* that (a) the spec pretty heavily implies that
> the lines behave as level triggered but (b) the
> specific text in the spec about required guest code
> to avoid races (s.2.4.2 of the 0.9.5 spec) means that
> even if the interrupt controller treats them as edge
> triggered it's OK.
>
I think we should really sit down and figure out the right thing to do
during KVM Forum if we can allocate a slot for that.  Marc seems to
also have some input he would like to share on this subject.

For the record, I'm fine with changing the virtio-mmio devices, but
it's probably worth quickly measuring the performance impact first.

Thanks,
-Christoffer
Christoffer Dall Sept. 23, 2014, 2:07 p.m. UTC | #7
On Tue, Sep 23, 2014 at 03:52:49PM +0200, Paolo Bonzini wrote:
> Il 23/09/2014 15:52, Christoffer Dall ha scritto:
> > Therefore, I think we should really just merge the one-line fix or the
> > patch I sent before.  Paolo seems fine with it either way.
> 
> Yes, it's on its way to Linus.
> 
Thanks Paolo!

-Christoffer
diff mbox

Patch

diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 98c8c5b..14d4883 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -433,10 +433,17 @@  ARM_BE8(rev	r10, r10	)
 	str	r3, [r11, #VGIC_V2_CPU_HCR]
 	str	r4, [r11, #VGIC_V2_CPU_VMCR]
 	str	r5, [r11, #VGIC_V2_CPU_MISR]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	str	r6, [r11, #(VGIC_V2_CPU_EISR + 4)]
+	str	r7, [r11, #VGIC_V2_CPU_EISR]
+	str	r8, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+	str	r9, [r11, #VGIC_V2_CPU_ELRSR]
+#else
 	str	r6, [r11, #VGIC_V2_CPU_EISR]
 	str	r7, [r11, #(VGIC_V2_CPU_EISR + 4)]
 	str	r8, [r11, #VGIC_V2_CPU_ELRSR]
 	str	r9, [r11, #(VGIC_V2_CPU_ELRSR + 4)]
+#endif
 	str	r10, [r11, #VGIC_V2_CPU_APR]
 
 	/* Clear GICH_HCR */
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
index ae21177..f002fe1 100644
--- a/arch/arm64/kvm/vgic-v2-switch.S
+++ b/arch/arm64/kvm/vgic-v2-switch.S
@@ -67,10 +67,14 @@  CPU_BE(	rev	w11, w11 )
 	str	w4, [x3, #VGIC_V2_CPU_HCR]
 	str	w5, [x3, #VGIC_V2_CPU_VMCR]
 	str	w6, [x3, #VGIC_V2_CPU_MISR]
-	str	w7, [x3, #VGIC_V2_CPU_EISR]
-	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)]
-	str	w9, [x3, #VGIC_V2_CPU_ELRSR]
-	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)]
+CPU_LE(	str	w7, [x3, #VGIC_V2_CPU_EISR] )
+CPU_LE(	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)] )
+CPU_LE(	str	w9, [x3, #VGIC_V2_CPU_ELRSR] )
+CPU_LE(	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
+CPU_BE(	str	w7, [x3, #(VGIC_V2_CPU_EISR + 4)] )
+CPU_BE(	str	w8, [x3, #VGIC_V2_CPU_EISR] )
+CPU_BE(	str	w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
+CPU_BE(	str	w10, [x3, #VGIC_V2_CPU_ELRSR] )
 	str	w11, [x3, #VGIC_V2_CPU_APR]
 
 	/* Clear GICH_HCR */
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 35b0c12..c66dc9ed 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -168,8 +168,8 @@  struct vgic_v2_cpu_if {
 	u32		vgic_hcr;
 	u32		vgic_vmcr;
 	u32		vgic_misr;	/* Saved only */
-	u32		vgic_eisr[2];	/* Saved only */
-	u32		vgic_elrsr[2];	/* Saved only */
+	u64		vgic_eisr;	/* Saved only */
+	u64		vgic_elrsr;	/* Saved only */
 	u32		vgic_apr;
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
 };
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 416baed..2935405 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -71,35 +71,17 @@  static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
 				  struct vgic_lr lr_desc)
 {
 	if (!(lr_desc.state & LR_STATE_MASK))
-		__set_bit(lr, (unsigned long *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr);
+		vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
 }
 
 static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu)
 {
-	u64 val;
-
-#if BITS_PER_LONG == 64
-	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[1];
-	val <<= 32;
-	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr[0];
-#else
-	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
-#endif
-	return val;
+	return vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr;
 }
 
 static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu)
 {
-	u64 val;
-
-#if BITS_PER_LONG == 64
-	val  = vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[1];
-	val <<= 32;
-	val |= vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr[0];
-#else
-	val = *(u64 *)vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
-#endif
-	return val;
+	return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr;
 }
 
 static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba79..30cf369 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -118,6 +118,20 @@  static const struct vgic_params *vgic;
 #define REG_OFFSET_SWIZZLE	0
 #endif
 
+/*
+ * Call this function to convert a u64 value to an unsigned long * bitmask
+ * in a way that works on both 32-bit and 64-bit LE and BE platforms.
+ *
+ * Warning: Calling this function may modify *val.
+ */
+static unsigned long *u64_to_bitmask(u64 *val)
+{
+#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
+	*val = (*val >> 32) | (*val << 32);
+#endif
+	return (unsigned long *)val;
+}
+
 static u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x,
 				int cpuid, u32 offset)
 {
@@ -1256,7 +1270,7 @@  static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 		 * active bit.
 		 */
 		u64 eisr = vgic_get_eisr(vcpu);
-		unsigned long *eisr_ptr = (unsigned long *)&eisr;
+		unsigned long *eisr_ptr = u64_to_bitmask(&eisr);
 		int lr;
 
 		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
@@ -1304,7 +1318,7 @@  static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 
 	level_pending = vgic_process_maintenance(vcpu);
 	elrsr = vgic_get_elrsr(vcpu);
-	elrsr_ptr = (unsigned long *)&elrsr;
+	elrsr_ptr = u64_to_bitmask(&elrsr);
 
 	/* Clear mappings for empty LRs */
 	for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {