diff mbox

xen: arm: arm64: Fix memory cloberring issues during VFP save restore.

Message ID 1391769538-9091-1-git-send-email-pranavkumar@linaro.org
State Accepted
Commit 117501c67ac00ad7850eedf25f870fe36579f71c
Headers show

Commit Message

PranavkumarSawargaonkar Feb. 7, 2014, 10:38 a.m. UTC
This patch addresses memory cloberring issue mentioed by Julien Grall
with my earlier patch -
Ref:
http://www.gossamer-threads.com/lists/xen/devel/316247

Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Anup Patel <anup.patel@linaro.org>
---
 xen/arch/arm/arm64/vfp.c |   70 ++++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 34 deletions(-)

Comments

Ian Campbell Feb. 7, 2014, 11:39 a.m. UTC | #1
On Fri, 2014-02-07 at 16:08 +0530, Pranavkumar Sawargaonkar wrote:
> This patch addresses memory cloberring issue mentioed by Julien Grall
> with my earlier patch -
> Ref:
> http://www.gossamer-threads.com/lists/xen/devel/316247
> 
> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
> Signed-off-by: Anup Patel <anup.patel@linaro.org>
> ---
>  xen/arch/arm/arm64/vfp.c |   70 ++++++++++++++++++++++++----------------------
>  1 file changed, 36 insertions(+), 34 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
> index c09cf0c..62f56a3 100644
> --- a/xen/arch/arm/arm64/vfp.c
> +++ b/xen/arch/arm/arm64/vfp.c
> @@ -8,23 +8,24 @@ void vfp_save_state(struct vcpu *v)
>      if ( !cpu_has_fp )
>          return;
>  
> -    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
> -                 "stp q2, q3, [%0, #16 * 2]\n\t"
> -                 "stp q4, q5, [%0, #16 * 4]\n\t"
> -                 "stp q6, q7, [%0, #16 * 6]\n\t"
> -                 "stp q8, q9, [%0, #16 * 8]\n\t"
> -                 "stp q10, q11, [%0, #16 * 10]\n\t"
> -                 "stp q12, q13, [%0, #16 * 12]\n\t"
> -                 "stp q14, q15, [%0, #16 * 14]\n\t"
> -                 "stp q16, q17, [%0, #16 * 16]\n\t"
> -                 "stp q18, q19, [%0, #16 * 18]\n\t"
> -                 "stp q20, q21, [%0, #16 * 20]\n\t"
> -                 "stp q22, q23, [%0, #16 * 22]\n\t"
> -                 "stp q24, q25, [%0, #16 * 24]\n\t"
> -                 "stp q26, q27, [%0, #16 * 26]\n\t"
> -                 "stp q28, q29, [%0, #16 * 28]\n\t"
> -                 "stp q30, q31, [%0, #16 * 30]\n\t"
> -                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
> +    asm volatile("stp q0, q1, [%1, #16 * 0]\n\t"
> +                 "stp q2, q3, [%1, #16 * 2]\n\t"
> +                 "stp q4, q5, [%1, #16 * 4]\n\t"
> +                 "stp q6, q7, [%1, #16 * 6]\n\t"
> +                 "stp q8, q9, [%1, #16 * 8]\n\t"
> +                 "stp q10, q11, [%1, #16 * 10]\n\t"
> +                 "stp q12, q13, [%1, #16 * 12]\n\t"
> +                 "stp q14, q15, [%1, #16 * 14]\n\t"
> +                 "stp q16, q17, [%1, #16 * 16]\n\t"
> +                 "stp q18, q19, [%1, #16 * 18]\n\t"
> +                 "stp q20, q21, [%1, #16 * 20]\n\t"
> +                 "stp q22, q23, [%1, #16 * 22]\n\t"
> +                 "stp q24, q25, [%1, #16 * 24]\n\t"
> +                 "stp q26, q27, [%1, #16 * 26]\n\t"
> +                 "stp q28, q29, [%1, #16 * 28]\n\t"
> +                 "stp q30, q31, [%1, #16 * 30]\n\t"
> +                 :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs)
> +                 : "memory");

The point of this change was to be able to drop the memory clobbers.

George, I'd like to take this in 4.4 if possible -- I wanted to get the
baseline functionality fixed for 4.4 ASAP since it was quite a big hole
which is why I committed without waiting for this respin.

The issue is that the patch which was committed yesterday clobbers all
of memory and not just the bits the inline asm touches.

Ian.
Julien Grall Feb. 7, 2014, 11:54 a.m. UTC | #2
Hello,

Thanks for sending the patch quickly.

On 07/02/14 10:38, Pranavkumar Sawargaonkar wrote:
> This patch addresses memory cloberring issue mentioed by Julien Grall

clobbering mentioned.

> with my earlier patch -
> Ref:
> http://www.gossamer-threads.com/lists/xen/devel/316247

Can you add the commit id?

>
> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
> Signed-off-by: Anup Patel <anup.patel@linaro.org>
> ---
>   xen/arch/arm/arm64/vfp.c |   70 ++++++++++++++++++++++++----------------------
>   1 file changed, 36 insertions(+), 34 deletions(-)
>
> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
> index c09cf0c..62f56a3 100644
> --- a/xen/arch/arm/arm64/vfp.c
> +++ b/xen/arch/arm/arm64/vfp.c
> @@ -8,23 +8,24 @@ void vfp_save_state(struct vcpu *v)
>       if ( !cpu_has_fp )
>           return;
>
> -    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
> -                 "stp q2, q3, [%0, #16 * 2]\n\t"
> -                 "stp q4, q5, [%0, #16 * 4]\n\t"
> -                 "stp q6, q7, [%0, #16 * 6]\n\t"
> -                 "stp q8, q9, [%0, #16 * 8]\n\t"
> -                 "stp q10, q11, [%0, #16 * 10]\n\t"
> -                 "stp q12, q13, [%0, #16 * 12]\n\t"
> -                 "stp q14, q15, [%0, #16 * 14]\n\t"
> -                 "stp q16, q17, [%0, #16 * 16]\n\t"
> -                 "stp q18, q19, [%0, #16 * 18]\n\t"
> -                 "stp q20, q21, [%0, #16 * 20]\n\t"
> -                 "stp q22, q23, [%0, #16 * 22]\n\t"
> -                 "stp q24, q25, [%0, #16 * 24]\n\t"
> -                 "stp q26, q27, [%0, #16 * 26]\n\t"
> -                 "stp q28, q29, [%0, #16 * 28]\n\t"
> -                 "stp q30, q31, [%0, #16 * 30]\n\t"
> -                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
> +    asm volatile("stp q0, q1, [%1, #16 * 0]\n\t"
> +                 "stp q2, q3, [%1, #16 * 2]\n\t"
> +                 "stp q4, q5, [%1, #16 * 4]\n\t"
> +                 "stp q6, q7, [%1, #16 * 6]\n\t"
> +                 "stp q8, q9, [%1, #16 * 8]\n\t"
> +                 "stp q10, q11, [%1, #16 * 10]\n\t"
> +                 "stp q12, q13, [%1, #16 * 12]\n\t"
> +                 "stp q14, q15, [%1, #16 * 14]\n\t"
> +                 "stp q16, q17, [%1, #16 * 16]\n\t"
> +                 "stp q18, q19, [%1, #16 * 18]\n\t"
> +                 "stp q20, q21, [%1, #16 * 20]\n\t"
> +                 "stp q22, q23, [%1, #16 * 22]\n\t"
> +                 "stp q24, q25, [%1, #16 * 24]\n\t"
> +                 "stp q26, q27, [%1, #16 * 26]\n\t"
> +                 "stp q28, q29, [%1, #16 * 28]\n\t"
> +                 "stp q30, q31, [%1, #16 * 30]\n\t"
> +                 :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs)
> +                 : "memory");

You don't need anymore to clobber the whole memory. "memory" can be removed.

>
>       v->arch.vfp.fpsr = READ_SYSREG32(FPSR);
>       v->arch.vfp.fpcr = READ_SYSREG32(FPCR);
> @@ -36,23 +37,24 @@ void vfp_restore_state(struct vcpu *v)
>       if ( !cpu_has_fp )
>           return;
>
> -    asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t"
> -                 "ldp q2, q3, [%0, #16 * 2]\n\t"
> -                 "ldp q4, q5, [%0, #16 * 4]\n\t"
> -                 "ldp q6, q7, [%0, #16 * 6]\n\t"
> -                 "ldp q8, q9, [%0, #16 * 8]\n\t"
> -                 "ldp q10, q11, [%0, #16 * 10]\n\t"
> -                 "ldp q12, q13, [%0, #16 * 12]\n\t"
> -                 "ldp q14, q15, [%0, #16 * 14]\n\t"
> -                 "ldp q16, q17, [%0, #16 * 16]\n\t"
> -                 "ldp q18, q19, [%0, #16 * 18]\n\t"
> -                 "ldp q20, q21, [%0, #16 * 20]\n\t"
> -                 "ldp q22, q23, [%0, #16 * 22]\n\t"
> -                 "ldp q24, q25, [%0, #16 * 24]\n\t"
> -                 "ldp q26, q27, [%0, #16 * 26]\n\t"
> -                 "ldp q28, q29, [%0, #16 * 28]\n\t"
> -                 "ldp q30, q31, [%0, #16 * 30]\n\t"
> -                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
> +    asm volatile("ldp q0, q1, [%1, #16 * 0]\n\t"
> +                 "ldp q2, q3, [%1, #16 * 2]\n\t"
> +                 "ldp q4, q5, [%1, #16 * 4]\n\t"
> +                 "ldp q6, q7, [%1, #16 * 6]\n\t"
> +                 "ldp q8, q9, [%1, #16 * 8]\n\t"
> +                 "ldp q10, q11, [%1, #16 * 10]\n\t"
> +                 "ldp q12, q13, [%1, #16 * 12]\n\t"
> +                 "ldp q14, q15, [%1, #16 * 14]\n\t"
> +                 "ldp q16, q17, [%1, #16 * 16]\n\t"
> +                 "ldp q18, q19, [%1, #16 * 18]\n\t"
> +                 "ldp q20, q21, [%1, #16 * 20]\n\t"
> +                 "ldp q22, q23, [%1, #16 * 22]\n\t"
> +                 "ldp q24, q25, [%1, #16 * 24]\n\t"
> +                 "ldp q26, q27, [%1, #16 * 26]\n\t"
> +                 "ldp q28, q29, [%1, #16 * 28]\n\t"
> +                 "ldp q30, q31, [%1, #16 * 30]\n\t"
> +                 :: "Q" (*v->arch.vfp.fpregs), "r" (v->arch.vfp.fpregs)
> +                 : "memory");

Same here.

Cheers,
PranavkumarSawargaonkar Feb. 7, 2014, 12:47 p.m. UTC | #3
Hi Julieng,

On 7 February 2014 17:24, Julien Grall <julien.grall@linaro.org> wrote:
> Hello,
>
> Thanks for sending the patch quickly.
>
>
> On 07/02/14 10:38, Pranavkumar Sawargaonkar wrote:
>>
>> This patch addresses memory cloberring issue mentioed by Julien Grall
>
>
> clobbering mentioned.
>
>
>> with my earlier patch -
>> Ref:
>> http://www.gossamer-threads.com/lists/xen/devel/316247
>
>
> Can you add the commit id?
Sure.
>
>
>>
>> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
>> Signed-off-by: Anup Patel <anup.patel@linaro.org>
>> ---
>>   xen/arch/arm/arm64/vfp.c |   70
>> ++++++++++++++++++++++++----------------------
>>   1 file changed, 36 insertions(+), 34 deletions(-)
>>
>> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
>> index c09cf0c..62f56a3 100644
>> --- a/xen/arch/arm/arm64/vfp.c
>> +++ b/xen/arch/arm/arm64/vfp.c
>> @@ -8,23 +8,24 @@ void vfp_save_state(struct vcpu *v)
>>       if ( !cpu_has_fp )
>>           return;
>>
>> -    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
>> -                 "stp q2, q3, [%0, #16 * 2]\n\t"
>> -                 "stp q4, q5, [%0, #16 * 4]\n\t"
>> -                 "stp q6, q7, [%0, #16 * 6]\n\t"
>> -                 "stp q8, q9, [%0, #16 * 8]\n\t"
>> -                 "stp q10, q11, [%0, #16 * 10]\n\t"
>> -                 "stp q12, q13, [%0, #16 * 12]\n\t"
>> -                 "stp q14, q15, [%0, #16 * 14]\n\t"
>> -                 "stp q16, q17, [%0, #16 * 16]\n\t"
>> -                 "stp q18, q19, [%0, #16 * 18]\n\t"
>> -                 "stp q20, q21, [%0, #16 * 20]\n\t"
>> -                 "stp q22, q23, [%0, #16 * 22]\n\t"
>> -                 "stp q24, q25, [%0, #16 * 24]\n\t"
>> -                 "stp q26, q27, [%0, #16 * 26]\n\t"
>> -                 "stp q28, q29, [%0, #16 * 28]\n\t"
>> -                 "stp q30, q31, [%0, #16 * 30]\n\t"
>> -                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
>> +    asm volatile("stp q0, q1, [%1, #16 * 0]\n\t"
>> +                 "stp q2, q3, [%1, #16 * 2]\n\t"
>> +                 "stp q4, q5, [%1, #16 * 4]\n\t"
>> +                 "stp q6, q7, [%1, #16 * 6]\n\t"
>> +                 "stp q8, q9, [%1, #16 * 8]\n\t"
>> +                 "stp q10, q11, [%1, #16 * 10]\n\t"
>> +                 "stp q12, q13, [%1, #16 * 12]\n\t"
>> +                 "stp q14, q15, [%1, #16 * 14]\n\t"
>> +                 "stp q16, q17, [%1, #16 * 16]\n\t"
>> +                 "stp q18, q19, [%1, #16 * 18]\n\t"
>> +                 "stp q20, q21, [%1, #16 * 20]\n\t"
>> +                 "stp q22, q23, [%1, #16 * 22]\n\t"
>> +                 "stp q24, q25, [%1, #16 * 24]\n\t"
>> +                 "stp q26, q27, [%1, #16 * 26]\n\t"
>> +                 "stp q28, q29, [%1, #16 * 28]\n\t"
>> +                 "stp q30, q31, [%1, #16 * 30]\n\t"
>> +                 :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs)
>> +                 : "memory");
>
>
> You don't need anymore to clobber the whole memory. "memory" can be removed.
Ok I will remove it in V2.
>
>
>>
>>       v->arch.vfp.fpsr = READ_SYSREG32(FPSR);
>>       v->arch.vfp.fpcr = READ_SYSREG32(FPCR);
>> @@ -36,23 +37,24 @@ void vfp_restore_state(struct vcpu *v)
>>       if ( !cpu_has_fp )
>>           return;
>>
>> -    asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t"
>> -                 "ldp q2, q3, [%0, #16 * 2]\n\t"
>> -                 "ldp q4, q5, [%0, #16 * 4]\n\t"
>> -                 "ldp q6, q7, [%0, #16 * 6]\n\t"
>> -                 "ldp q8, q9, [%0, #16 * 8]\n\t"
>> -                 "ldp q10, q11, [%0, #16 * 10]\n\t"
>> -                 "ldp q12, q13, [%0, #16 * 12]\n\t"
>> -                 "ldp q14, q15, [%0, #16 * 14]\n\t"
>> -                 "ldp q16, q17, [%0, #16 * 16]\n\t"
>> -                 "ldp q18, q19, [%0, #16 * 18]\n\t"
>> -                 "ldp q20, q21, [%0, #16 * 20]\n\t"
>> -                 "ldp q22, q23, [%0, #16 * 22]\n\t"
>> -                 "ldp q24, q25, [%0, #16 * 24]\n\t"
>> -                 "ldp q26, q27, [%0, #16 * 26]\n\t"
>> -                 "ldp q28, q29, [%0, #16 * 28]\n\t"
>> -                 "ldp q30, q31, [%0, #16 * 30]\n\t"
>> -                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
>> +    asm volatile("ldp q0, q1, [%1, #16 * 0]\n\t"
>> +                 "ldp q2, q3, [%1, #16 * 2]\n\t"
>> +                 "ldp q4, q5, [%1, #16 * 4]\n\t"
>> +                 "ldp q6, q7, [%1, #16 * 6]\n\t"
>> +                 "ldp q8, q9, [%1, #16 * 8]\n\t"
>> +                 "ldp q10, q11, [%1, #16 * 10]\n\t"
>> +                 "ldp q12, q13, [%1, #16 * 12]\n\t"
>> +                 "ldp q14, q15, [%1, #16 * 14]\n\t"
>> +                 "ldp q16, q17, [%1, #16 * 16]\n\t"
>> +                 "ldp q18, q19, [%1, #16 * 18]\n\t"
>> +                 "ldp q20, q21, [%1, #16 * 20]\n\t"
>> +                 "ldp q22, q23, [%1, #16 * 22]\n\t"
>> +                 "ldp q24, q25, [%1, #16 * 24]\n\t"
>> +                 "ldp q26, q27, [%1, #16 * 26]\n\t"
>> +                 "ldp q28, q29, [%1, #16 * 28]\n\t"
>> +                 "ldp q30, q31, [%1, #16 * 30]\n\t"
>> +                 :: "Q" (*v->arch.vfp.fpregs), "r" (v->arch.vfp.fpregs)
>> +                 : "memory");
>
>
> Same here.
>
> Cheers,
>
> --
> Julien Grall

-
Pranav
Ian Campbell Feb. 7, 2014, 2:35 p.m. UTC | #4
On Fri, 2014-02-07 at 14:29 +0000, George Dunlap wrote:
> On 02/07/2014 11:39 AM, Ian Campbell wrote:
> > On Fri, 2014-02-07 at 16:08 +0530, Pranavkumar Sawargaonkar wrote:
> >> This patch addresses memory cloberring issue mentioed by Julien Grall
> >> with my earlier patch -
> >> Ref:
> >> http://www.gossamer-threads.com/lists/xen/devel/316247
> >>
> >> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
> >> Signed-off-by: Anup Patel <anup.patel@linaro.org>
> >> ---
> >>   xen/arch/arm/arm64/vfp.c |   70 ++++++++++++++++++++++++----------------------
> >>   1 file changed, 36 insertions(+), 34 deletions(-)
> >>
> >> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
> >> index c09cf0c..62f56a3 100644
> >> --- a/xen/arch/arm/arm64/vfp.c
> >> +++ b/xen/arch/arm/arm64/vfp.c
> >> @@ -8,23 +8,24 @@ void vfp_save_state(struct vcpu *v)
> >>       if ( !cpu_has_fp )
> >>           return;
> >>   
> >> -    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
> >> -                 "stp q2, q3, [%0, #16 * 2]\n\t"
> >> -                 "stp q4, q5, [%0, #16 * 4]\n\t"
> >> -                 "stp q6, q7, [%0, #16 * 6]\n\t"
> >> -                 "stp q8, q9, [%0, #16 * 8]\n\t"
> >> -                 "stp q10, q11, [%0, #16 * 10]\n\t"
> >> -                 "stp q12, q13, [%0, #16 * 12]\n\t"
> >> -                 "stp q14, q15, [%0, #16 * 14]\n\t"
> >> -                 "stp q16, q17, [%0, #16 * 16]\n\t"
> >> -                 "stp q18, q19, [%0, #16 * 18]\n\t"
> >> -                 "stp q20, q21, [%0, #16 * 20]\n\t"
> >> -                 "stp q22, q23, [%0, #16 * 22]\n\t"
> >> -                 "stp q24, q25, [%0, #16 * 24]\n\t"
> >> -                 "stp q26, q27, [%0, #16 * 26]\n\t"
> >> -                 "stp q28, q29, [%0, #16 * 28]\n\t"
> >> -                 "stp q30, q31, [%0, #16 * 30]\n\t"
> >> -                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
> >> +    asm volatile("stp q0, q1, [%1, #16 * 0]\n\t"
> >> +                 "stp q2, q3, [%1, #16 * 2]\n\t"
> >> +                 "stp q4, q5, [%1, #16 * 4]\n\t"
> >> +                 "stp q6, q7, [%1, #16 * 6]\n\t"
> >> +                 "stp q8, q9, [%1, #16 * 8]\n\t"
> >> +                 "stp q10, q11, [%1, #16 * 10]\n\t"
> >> +                 "stp q12, q13, [%1, #16 * 12]\n\t"
> >> +                 "stp q14, q15, [%1, #16 * 14]\n\t"
> >> +                 "stp q16, q17, [%1, #16 * 16]\n\t"
> >> +                 "stp q18, q19, [%1, #16 * 18]\n\t"
> >> +                 "stp q20, q21, [%1, #16 * 20]\n\t"
> >> +                 "stp q22, q23, [%1, #16 * 22]\n\t"
> >> +                 "stp q24, q25, [%1, #16 * 24]\n\t"
> >> +                 "stp q26, q27, [%1, #16 * 26]\n\t"
> >> +                 "stp q28, q29, [%1, #16 * 28]\n\t"
> >> +                 "stp q30, q31, [%1, #16 * 30]\n\t"
> >> +                 :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs)
> >> +                 : "memory");
> > The point of this change was to be able to drop the memory clobbers.
> >
> > George, I'd like to take this in 4.4 if possible -- I wanted to get the
> > baseline functionality fixed for 4.4 ASAP since it was quite a big hole
> > which is why I committed without waiting for this respin.
> >
> > The issue is that the patch which was committed yesterday clobbers all
> > of memory and not just the bits the inline asm touches.
> 
> Obviously there's not much point in releasing a version with a fix that 
> doesn't work. :-)

It does work, just the clobber is too aggressive.

> Release-acked-by: George Dunlap <george.dunlap@eu.citrix.com>
diff mbox

Patch

diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
index c09cf0c..62f56a3 100644
--- a/xen/arch/arm/arm64/vfp.c
+++ b/xen/arch/arm/arm64/vfp.c
@@ -8,23 +8,24 @@  void vfp_save_state(struct vcpu *v)
     if ( !cpu_has_fp )
         return;
 
-    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
-                 "stp q2, q3, [%0, #16 * 2]\n\t"
-                 "stp q4, q5, [%0, #16 * 4]\n\t"
-                 "stp q6, q7, [%0, #16 * 6]\n\t"
-                 "stp q8, q9, [%0, #16 * 8]\n\t"
-                 "stp q10, q11, [%0, #16 * 10]\n\t"
-                 "stp q12, q13, [%0, #16 * 12]\n\t"
-                 "stp q14, q15, [%0, #16 * 14]\n\t"
-                 "stp q16, q17, [%0, #16 * 16]\n\t"
-                 "stp q18, q19, [%0, #16 * 18]\n\t"
-                 "stp q20, q21, [%0, #16 * 20]\n\t"
-                 "stp q22, q23, [%0, #16 * 22]\n\t"
-                 "stp q24, q25, [%0, #16 * 24]\n\t"
-                 "stp q26, q27, [%0, #16 * 26]\n\t"
-                 "stp q28, q29, [%0, #16 * 28]\n\t"
-                 "stp q30, q31, [%0, #16 * 30]\n\t"
-                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
+    asm volatile("stp q0, q1, [%1, #16 * 0]\n\t"
+                 "stp q2, q3, [%1, #16 * 2]\n\t"
+                 "stp q4, q5, [%1, #16 * 4]\n\t"
+                 "stp q6, q7, [%1, #16 * 6]\n\t"
+                 "stp q8, q9, [%1, #16 * 8]\n\t"
+                 "stp q10, q11, [%1, #16 * 10]\n\t"
+                 "stp q12, q13, [%1, #16 * 12]\n\t"
+                 "stp q14, q15, [%1, #16 * 14]\n\t"
+                 "stp q16, q17, [%1, #16 * 16]\n\t"
+                 "stp q18, q19, [%1, #16 * 18]\n\t"
+                 "stp q20, q21, [%1, #16 * 20]\n\t"
+                 "stp q22, q23, [%1, #16 * 22]\n\t"
+                 "stp q24, q25, [%1, #16 * 24]\n\t"
+                 "stp q26, q27, [%1, #16 * 26]\n\t"
+                 "stp q28, q29, [%1, #16 * 28]\n\t"
+                 "stp q30, q31, [%1, #16 * 30]\n\t"
+                 :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs)
+                 : "memory");
 
     v->arch.vfp.fpsr = READ_SYSREG32(FPSR);
     v->arch.vfp.fpcr = READ_SYSREG32(FPCR);
@@ -36,23 +37,24 @@  void vfp_restore_state(struct vcpu *v)
     if ( !cpu_has_fp )
         return;
 
-    asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t"
-                 "ldp q2, q3, [%0, #16 * 2]\n\t"
-                 "ldp q4, q5, [%0, #16 * 4]\n\t"
-                 "ldp q6, q7, [%0, #16 * 6]\n\t"
-                 "ldp q8, q9, [%0, #16 * 8]\n\t"
-                 "ldp q10, q11, [%0, #16 * 10]\n\t"
-                 "ldp q12, q13, [%0, #16 * 12]\n\t"
-                 "ldp q14, q15, [%0, #16 * 14]\n\t"
-                 "ldp q16, q17, [%0, #16 * 16]\n\t"
-                 "ldp q18, q19, [%0, #16 * 18]\n\t"
-                 "ldp q20, q21, [%0, #16 * 20]\n\t"
-                 "ldp q22, q23, [%0, #16 * 22]\n\t"
-                 "ldp q24, q25, [%0, #16 * 24]\n\t"
-                 "ldp q26, q27, [%0, #16 * 26]\n\t"
-                 "ldp q28, q29, [%0, #16 * 28]\n\t"
-                 "ldp q30, q31, [%0, #16 * 30]\n\t"
-                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
+    asm volatile("ldp q0, q1, [%1, #16 * 0]\n\t"
+                 "ldp q2, q3, [%1, #16 * 2]\n\t"
+                 "ldp q4, q5, [%1, #16 * 4]\n\t"
+                 "ldp q6, q7, [%1, #16 * 6]\n\t"
+                 "ldp q8, q9, [%1, #16 * 8]\n\t"
+                 "ldp q10, q11, [%1, #16 * 10]\n\t"
+                 "ldp q12, q13, [%1, #16 * 12]\n\t"
+                 "ldp q14, q15, [%1, #16 * 14]\n\t"
+                 "ldp q16, q17, [%1, #16 * 16]\n\t"
+                 "ldp q18, q19, [%1, #16 * 18]\n\t"
+                 "ldp q20, q21, [%1, #16 * 20]\n\t"
+                 "ldp q22, q23, [%1, #16 * 22]\n\t"
+                 "ldp q24, q25, [%1, #16 * 24]\n\t"
+                 "ldp q26, q27, [%1, #16 * 26]\n\t"
+                 "ldp q28, q29, [%1, #16 * 28]\n\t"
+                 "ldp q30, q31, [%1, #16 * 30]\n\t"
+                 :: "Q" (*v->arch.vfp.fpregs), "r" (v->arch.vfp.fpregs)
+                 : "memory");
 
     WRITE_SYSREG32(v->arch.vfp.fpsr, FPSR);
     WRITE_SYSREG32(v->arch.vfp.fpcr, FPCR);