Message ID | 1391769538-9091-1-git-send-email-pranavkumar@linaro.org |
---|---|
State | Accepted |
Commit | 117501c67ac00ad7850eedf25f870fe36579f71c |
Headers | show |
On Fri, 2014-02-07 at 16:08 +0530, Pranavkumar Sawargaonkar wrote: > This patch addresses memory cloberring issue mentioed by Julien Grall > with my earlier patch - > Ref: > http://www.gossamer-threads.com/lists/xen/devel/316247 > > Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org> > Signed-off-by: Anup Patel <anup.patel@linaro.org> > --- > xen/arch/arm/arm64/vfp.c | 70 ++++++++++++++++++++++++---------------------- > 1 file changed, 36 insertions(+), 34 deletions(-) > > diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c > index c09cf0c..62f56a3 100644 > --- a/xen/arch/arm/arm64/vfp.c > +++ b/xen/arch/arm/arm64/vfp.c > @@ -8,23 +8,24 @@ void vfp_save_state(struct vcpu *v) > if ( !cpu_has_fp ) > return; > > - asm volatile("stp q0, q1, [%0, #16 * 0]\n\t" > - "stp q2, q3, [%0, #16 * 2]\n\t" > - "stp q4, q5, [%0, #16 * 4]\n\t" > - "stp q6, q7, [%0, #16 * 6]\n\t" > - "stp q8, q9, [%0, #16 * 8]\n\t" > - "stp q10, q11, [%0, #16 * 10]\n\t" > - "stp q12, q13, [%0, #16 * 12]\n\t" > - "stp q14, q15, [%0, #16 * 14]\n\t" > - "stp q16, q17, [%0, #16 * 16]\n\t" > - "stp q18, q19, [%0, #16 * 18]\n\t" > - "stp q20, q21, [%0, #16 * 20]\n\t" > - "stp q22, q23, [%0, #16 * 22]\n\t" > - "stp q24, q25, [%0, #16 * 24]\n\t" > - "stp q26, q27, [%0, #16 * 26]\n\t" > - "stp q28, q29, [%0, #16 * 28]\n\t" > - "stp q30, q31, [%0, #16 * 30]\n\t" > - :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory"); > + asm volatile("stp q0, q1, [%1, #16 * 0]\n\t" > + "stp q2, q3, [%1, #16 * 2]\n\t" > + "stp q4, q5, [%1, #16 * 4]\n\t" > + "stp q6, q7, [%1, #16 * 6]\n\t" > + "stp q8, q9, [%1, #16 * 8]\n\t" > + "stp q10, q11, [%1, #16 * 10]\n\t" > + "stp q12, q13, [%1, #16 * 12]\n\t" > + "stp q14, q15, [%1, #16 * 14]\n\t" > + "stp q16, q17, [%1, #16 * 16]\n\t" > + "stp q18, q19, [%1, #16 * 18]\n\t" > + "stp q20, q21, [%1, #16 * 20]\n\t" > + "stp q22, q23, [%1, #16 * 22]\n\t" > + "stp q24, q25, [%1, #16 * 24]\n\t" > + "stp q26, q27, [%1, #16 * 26]\n\t" > + "stp q28, q29, [%1, #16 * 28]\n\t" > + "stp q30, q31, [%1, #16 * 30]\n\t" > + :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs) > + : "memory"); The point of this change was to be able to drop the memory clobbers. George, I'd like to take this in 4.4 if possible -- I wanted to get the baseline functionality fixed for 4.4 ASAP since it was quite a big hole which is why I committed without waiting for this respin. The issue is that the patch which was committed yesterday clobbers all of memory and not just the bits the inline asm touches. Ian.
Hello, Thanks for sending the patch quickly. On 07/02/14 10:38, Pranavkumar Sawargaonkar wrote: > This patch addresses memory cloberring issue mentioed by Julien Grall clobbering mentioned. > with my earlier patch - > Ref: > http://www.gossamer-threads.com/lists/xen/devel/316247 Can you add the commit id? > > Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org> > Signed-off-by: Anup Patel <anup.patel@linaro.org> > --- > xen/arch/arm/arm64/vfp.c | 70 ++++++++++++++++++++++++---------------------- > 1 file changed, 36 insertions(+), 34 deletions(-) > > diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c > index c09cf0c..62f56a3 100644 > --- a/xen/arch/arm/arm64/vfp.c > +++ b/xen/arch/arm/arm64/vfp.c > @@ -8,23 +8,24 @@ void vfp_save_state(struct vcpu *v) > if ( !cpu_has_fp ) > return; > > - asm volatile("stp q0, q1, [%0, #16 * 0]\n\t" > - "stp q2, q3, [%0, #16 * 2]\n\t" > - "stp q4, q5, [%0, #16 * 4]\n\t" > - "stp q6, q7, [%0, #16 * 6]\n\t" > - "stp q8, q9, [%0, #16 * 8]\n\t" > - "stp q10, q11, [%0, #16 * 10]\n\t" > - "stp q12, q13, [%0, #16 * 12]\n\t" > - "stp q14, q15, [%0, #16 * 14]\n\t" > - "stp q16, q17, [%0, #16 * 16]\n\t" > - "stp q18, q19, [%0, #16 * 18]\n\t" > - "stp q20, q21, [%0, #16 * 20]\n\t" > - "stp q22, q23, [%0, #16 * 22]\n\t" > - "stp q24, q25, [%0, #16 * 24]\n\t" > - "stp q26, q27, [%0, #16 * 26]\n\t" > - "stp q28, q29, [%0, #16 * 28]\n\t" > - "stp q30, q31, [%0, #16 * 30]\n\t" > - :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory"); > + asm volatile("stp q0, q1, [%1, #16 * 0]\n\t" > + "stp q2, q3, [%1, #16 * 2]\n\t" > + "stp q4, q5, [%1, #16 * 4]\n\t" > + "stp q6, q7, [%1, #16 * 6]\n\t" > + "stp q8, q9, [%1, #16 * 8]\n\t" > + "stp q10, q11, [%1, #16 * 10]\n\t" > + "stp q12, q13, [%1, #16 * 12]\n\t" > + "stp q14, q15, [%1, #16 * 14]\n\t" > + "stp q16, q17, [%1, #16 * 16]\n\t" > + "stp q18, q19, [%1, #16 * 18]\n\t" > + "stp q20, q21, [%1, #16 * 20]\n\t" > + "stp q22, q23, [%1, #16 * 22]\n\t" > + "stp q24, q25, [%1, #16 * 24]\n\t" > + "stp q26, q27, [%1, #16 * 26]\n\t" > + "stp q28, q29, [%1, #16 * 28]\n\t" > + "stp q30, q31, [%1, #16 * 30]\n\t" > + :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs) > + : "memory"); You don't need anymore to clobber the whole memory. "memory" can be removed. > > v->arch.vfp.fpsr = READ_SYSREG32(FPSR); > v->arch.vfp.fpcr = READ_SYSREG32(FPCR); > @@ -36,23 +37,24 @@ void vfp_restore_state(struct vcpu *v) > if ( !cpu_has_fp ) > return; > > - asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t" > - "ldp q2, q3, [%0, #16 * 2]\n\t" > - "ldp q4, q5, [%0, #16 * 4]\n\t" > - "ldp q6, q7, [%0, #16 * 6]\n\t" > - "ldp q8, q9, [%0, #16 * 8]\n\t" > - "ldp q10, q11, [%0, #16 * 10]\n\t" > - "ldp q12, q13, [%0, #16 * 12]\n\t" > - "ldp q14, q15, [%0, #16 * 14]\n\t" > - "ldp q16, q17, [%0, #16 * 16]\n\t" > - "ldp q18, q19, [%0, #16 * 18]\n\t" > - "ldp q20, q21, [%0, #16 * 20]\n\t" > - "ldp q22, q23, [%0, #16 * 22]\n\t" > - "ldp q24, q25, [%0, #16 * 24]\n\t" > - "ldp q26, q27, [%0, #16 * 26]\n\t" > - "ldp q28, q29, [%0, #16 * 28]\n\t" > - "ldp q30, q31, [%0, #16 * 30]\n\t" > - :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory"); > + asm volatile("ldp q0, q1, [%1, #16 * 0]\n\t" > + "ldp q2, q3, [%1, #16 * 2]\n\t" > + "ldp q4, q5, [%1, #16 * 4]\n\t" > + "ldp q6, q7, [%1, #16 * 6]\n\t" > + "ldp q8, q9, [%1, #16 * 8]\n\t" > + "ldp q10, q11, [%1, #16 * 10]\n\t" > + "ldp q12, q13, [%1, #16 * 12]\n\t" > + "ldp q14, q15, [%1, #16 * 14]\n\t" > + "ldp q16, q17, [%1, #16 * 16]\n\t" > + "ldp q18, q19, [%1, #16 * 18]\n\t" > + "ldp q20, q21, [%1, #16 * 20]\n\t" > + "ldp q22, q23, [%1, #16 * 22]\n\t" > + "ldp q24, q25, [%1, #16 * 24]\n\t" > + "ldp q26, q27, [%1, #16 * 26]\n\t" > + "ldp q28, q29, [%1, #16 * 28]\n\t" > + "ldp q30, q31, [%1, #16 * 30]\n\t" > + :: "Q" (*v->arch.vfp.fpregs), "r" (v->arch.vfp.fpregs) > + : "memory"); Same here. Cheers,
Hi Julieng, On 7 February 2014 17:24, Julien Grall <julien.grall@linaro.org> wrote: > Hello, > > Thanks for sending the patch quickly. > > > On 07/02/14 10:38, Pranavkumar Sawargaonkar wrote: >> >> This patch addresses memory cloberring issue mentioed by Julien Grall > > > clobbering mentioned. > > >> with my earlier patch - >> Ref: >> http://www.gossamer-threads.com/lists/xen/devel/316247 > > > Can you add the commit id? Sure. > > >> >> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org> >> Signed-off-by: Anup Patel <anup.patel@linaro.org> >> --- >> xen/arch/arm/arm64/vfp.c | 70 >> ++++++++++++++++++++++++---------------------- >> 1 file changed, 36 insertions(+), 34 deletions(-) >> >> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c >> index c09cf0c..62f56a3 100644 >> --- a/xen/arch/arm/arm64/vfp.c >> +++ b/xen/arch/arm/arm64/vfp.c >> @@ -8,23 +8,24 @@ void vfp_save_state(struct vcpu *v) >> if ( !cpu_has_fp ) >> return; >> >> - asm volatile("stp q0, q1, [%0, #16 * 0]\n\t" >> - "stp q2, q3, [%0, #16 * 2]\n\t" >> - "stp q4, q5, [%0, #16 * 4]\n\t" >> - "stp q6, q7, [%0, #16 * 6]\n\t" >> - "stp q8, q9, [%0, #16 * 8]\n\t" >> - "stp q10, q11, [%0, #16 * 10]\n\t" >> - "stp q12, q13, [%0, #16 * 12]\n\t" >> - "stp q14, q15, [%0, #16 * 14]\n\t" >> - "stp q16, q17, [%0, #16 * 16]\n\t" >> - "stp q18, q19, [%0, #16 * 18]\n\t" >> - "stp q20, q21, [%0, #16 * 20]\n\t" >> - "stp q22, q23, [%0, #16 * 22]\n\t" >> - "stp q24, q25, [%0, #16 * 24]\n\t" >> - "stp q26, q27, [%0, #16 * 26]\n\t" >> - "stp q28, q29, [%0, #16 * 28]\n\t" >> - "stp q30, q31, [%0, #16 * 30]\n\t" >> - :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory"); >> + asm volatile("stp q0, q1, [%1, #16 * 0]\n\t" >> + "stp q2, q3, [%1, #16 * 2]\n\t" >> + "stp q4, q5, [%1, #16 * 4]\n\t" >> + "stp q6, q7, [%1, #16 * 6]\n\t" >> + "stp q8, q9, [%1, #16 * 8]\n\t" >> + "stp q10, q11, [%1, #16 * 10]\n\t" >> + "stp q12, q13, [%1, #16 * 12]\n\t" >> + "stp q14, q15, [%1, #16 * 14]\n\t" >> + "stp q16, q17, [%1, #16 * 16]\n\t" >> + "stp q18, q19, [%1, #16 * 18]\n\t" >> + "stp q20, q21, [%1, #16 * 20]\n\t" >> + "stp q22, q23, [%1, #16 * 22]\n\t" >> + "stp q24, q25, [%1, #16 * 24]\n\t" >> + "stp q26, q27, [%1, #16 * 26]\n\t" >> + "stp q28, q29, [%1, #16 * 28]\n\t" >> + "stp q30, q31, [%1, #16 * 30]\n\t" >> + :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs) >> + : "memory"); > > > You don't need anymore to clobber the whole memory. "memory" can be removed. Ok I will remove it in V2. > > >> >> v->arch.vfp.fpsr = READ_SYSREG32(FPSR); >> v->arch.vfp.fpcr = READ_SYSREG32(FPCR); >> @@ -36,23 +37,24 @@ void vfp_restore_state(struct vcpu *v) >> if ( !cpu_has_fp ) >> return; >> >> - asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t" >> - "ldp q2, q3, [%0, #16 * 2]\n\t" >> - "ldp q4, q5, [%0, #16 * 4]\n\t" >> - "ldp q6, q7, [%0, #16 * 6]\n\t" >> - "ldp q8, q9, [%0, #16 * 8]\n\t" >> - "ldp q10, q11, [%0, #16 * 10]\n\t" >> - "ldp q12, q13, [%0, #16 * 12]\n\t" >> - "ldp q14, q15, [%0, #16 * 14]\n\t" >> - "ldp q16, q17, [%0, #16 * 16]\n\t" >> - "ldp q18, q19, [%0, #16 * 18]\n\t" >> - "ldp q20, q21, [%0, #16 * 20]\n\t" >> - "ldp q22, q23, [%0, #16 * 22]\n\t" >> - "ldp q24, q25, [%0, #16 * 24]\n\t" >> - "ldp q26, q27, [%0, #16 * 26]\n\t" >> - "ldp q28, q29, [%0, #16 * 28]\n\t" >> - "ldp q30, q31, [%0, #16 * 30]\n\t" >> - :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory"); >> + asm volatile("ldp q0, q1, [%1, #16 * 0]\n\t" >> + "ldp q2, q3, [%1, #16 * 2]\n\t" >> + "ldp q4, q5, [%1, #16 * 4]\n\t" >> + "ldp q6, q7, [%1, #16 * 6]\n\t" >> + "ldp q8, q9, [%1, #16 * 8]\n\t" >> + "ldp q10, q11, [%1, #16 * 10]\n\t" >> + "ldp q12, q13, [%1, #16 * 12]\n\t" >> + "ldp q14, q15, [%1, #16 * 14]\n\t" >> + "ldp q16, q17, [%1, #16 * 16]\n\t" >> + "ldp q18, q19, [%1, #16 * 18]\n\t" >> + "ldp q20, q21, [%1, #16 * 20]\n\t" >> + "ldp q22, q23, [%1, #16 * 22]\n\t" >> + "ldp q24, q25, [%1, #16 * 24]\n\t" >> + "ldp q26, q27, [%1, #16 * 26]\n\t" >> + "ldp q28, q29, [%1, #16 * 28]\n\t" >> + "ldp q30, q31, [%1, #16 * 30]\n\t" >> + :: "Q" (*v->arch.vfp.fpregs), "r" (v->arch.vfp.fpregs) >> + : "memory"); > > > Same here. > > Cheers, > > -- > Julien Grall - Pranav
On Fri, 2014-02-07 at 14:29 +0000, George Dunlap wrote: > On 02/07/2014 11:39 AM, Ian Campbell wrote: > > On Fri, 2014-02-07 at 16:08 +0530, Pranavkumar Sawargaonkar wrote: > >> This patch addresses memory cloberring issue mentioed by Julien Grall > >> with my earlier patch - > >> Ref: > >> http://www.gossamer-threads.com/lists/xen/devel/316247 > >> > >> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org> > >> Signed-off-by: Anup Patel <anup.patel@linaro.org> > >> --- > >> xen/arch/arm/arm64/vfp.c | 70 ++++++++++++++++++++++++---------------------- > >> 1 file changed, 36 insertions(+), 34 deletions(-) > >> > >> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c > >> index c09cf0c..62f56a3 100644 > >> --- a/xen/arch/arm/arm64/vfp.c > >> +++ b/xen/arch/arm/arm64/vfp.c > >> @@ -8,23 +8,24 @@ void vfp_save_state(struct vcpu *v) > >> if ( !cpu_has_fp ) > >> return; > >> > >> - asm volatile("stp q0, q1, [%0, #16 * 0]\n\t" > >> - "stp q2, q3, [%0, #16 * 2]\n\t" > >> - "stp q4, q5, [%0, #16 * 4]\n\t" > >> - "stp q6, q7, [%0, #16 * 6]\n\t" > >> - "stp q8, q9, [%0, #16 * 8]\n\t" > >> - "stp q10, q11, [%0, #16 * 10]\n\t" > >> - "stp q12, q13, [%0, #16 * 12]\n\t" > >> - "stp q14, q15, [%0, #16 * 14]\n\t" > >> - "stp q16, q17, [%0, #16 * 16]\n\t" > >> - "stp q18, q19, [%0, #16 * 18]\n\t" > >> - "stp q20, q21, [%0, #16 * 20]\n\t" > >> - "stp q22, q23, [%0, #16 * 22]\n\t" > >> - "stp q24, q25, [%0, #16 * 24]\n\t" > >> - "stp q26, q27, [%0, #16 * 26]\n\t" > >> - "stp q28, q29, [%0, #16 * 28]\n\t" > >> - "stp q30, q31, [%0, #16 * 30]\n\t" > >> - :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory"); > >> + asm volatile("stp q0, q1, [%1, #16 * 0]\n\t" > >> + "stp q2, q3, [%1, #16 * 2]\n\t" > >> + "stp q4, q5, [%1, #16 * 4]\n\t" > >> + "stp q6, q7, [%1, #16 * 6]\n\t" > >> + "stp q8, q9, [%1, #16 * 8]\n\t" > >> + "stp q10, q11, [%1, #16 * 10]\n\t" > >> + "stp q12, q13, [%1, #16 * 12]\n\t" > >> + "stp q14, q15, [%1, #16 * 14]\n\t" > >> + "stp q16, q17, [%1, #16 * 16]\n\t" > >> + "stp q18, q19, [%1, #16 * 18]\n\t" > >> + "stp q20, q21, [%1, #16 * 20]\n\t" > >> + "stp q22, q23, [%1, #16 * 22]\n\t" > >> + "stp q24, q25, [%1, #16 * 24]\n\t" > >> + "stp q26, q27, [%1, #16 * 26]\n\t" > >> + "stp q28, q29, [%1, #16 * 28]\n\t" > >> + "stp q30, q31, [%1, #16 * 30]\n\t" > >> + :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs) > >> + : "memory"); > > The point of this change was to be able to drop the memory clobbers. > > > > George, I'd like to take this in 4.4 if possible -- I wanted to get the > > baseline functionality fixed for 4.4 ASAP since it was quite a big hole > > which is why I committed without waiting for this respin. > > > > The issue is that the patch which was committed yesterday clobbers all > > of memory and not just the bits the inline asm touches. > > Obviously there's not much point in releasing a version with a fix that > doesn't work. :-) It does work, just the clobber is too aggressive. > Release-acked-by: George Dunlap <george.dunlap@eu.citrix.com>
diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c index c09cf0c..62f56a3 100644 --- a/xen/arch/arm/arm64/vfp.c +++ b/xen/arch/arm/arm64/vfp.c @@ -8,23 +8,24 @@ void vfp_save_state(struct vcpu *v) if ( !cpu_has_fp ) return; - asm volatile("stp q0, q1, [%0, #16 * 0]\n\t" - "stp q2, q3, [%0, #16 * 2]\n\t" - "stp q4, q5, [%0, #16 * 4]\n\t" - "stp q6, q7, [%0, #16 * 6]\n\t" - "stp q8, q9, [%0, #16 * 8]\n\t" - "stp q10, q11, [%0, #16 * 10]\n\t" - "stp q12, q13, [%0, #16 * 12]\n\t" - "stp q14, q15, [%0, #16 * 14]\n\t" - "stp q16, q17, [%0, #16 * 16]\n\t" - "stp q18, q19, [%0, #16 * 18]\n\t" - "stp q20, q21, [%0, #16 * 20]\n\t" - "stp q22, q23, [%0, #16 * 22]\n\t" - "stp q24, q25, [%0, #16 * 24]\n\t" - "stp q26, q27, [%0, #16 * 26]\n\t" - "stp q28, q29, [%0, #16 * 28]\n\t" - "stp q30, q31, [%0, #16 * 30]\n\t" - :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory"); + asm volatile("stp q0, q1, [%1, #16 * 0]\n\t" + "stp q2, q3, [%1, #16 * 2]\n\t" + "stp q4, q5, [%1, #16 * 4]\n\t" + "stp q6, q7, [%1, #16 * 6]\n\t" + "stp q8, q9, [%1, #16 * 8]\n\t" + "stp q10, q11, [%1, #16 * 10]\n\t" + "stp q12, q13, [%1, #16 * 12]\n\t" + "stp q14, q15, [%1, #16 * 14]\n\t" + "stp q16, q17, [%1, #16 * 16]\n\t" + "stp q18, q19, [%1, #16 * 18]\n\t" + "stp q20, q21, [%1, #16 * 20]\n\t" + "stp q22, q23, [%1, #16 * 22]\n\t" + "stp q24, q25, [%1, #16 * 24]\n\t" + "stp q26, q27, [%1, #16 * 26]\n\t" + "stp q28, q29, [%1, #16 * 28]\n\t" + "stp q30, q31, [%1, #16 * 30]\n\t" + :"=Q" (*v->arch.vfp.fpregs): "r" (v->arch.vfp.fpregs) + : "memory"); v->arch.vfp.fpsr = READ_SYSREG32(FPSR); v->arch.vfp.fpcr = READ_SYSREG32(FPCR); @@ -36,23 +37,24 @@ void vfp_restore_state(struct vcpu *v) if ( !cpu_has_fp ) return; - asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t" - "ldp q2, q3, [%0, #16 * 2]\n\t" - "ldp q4, q5, [%0, #16 * 4]\n\t" - "ldp q6, q7, [%0, #16 * 6]\n\t" - "ldp q8, q9, [%0, #16 * 8]\n\t" - "ldp q10, q11, [%0, #16 * 10]\n\t" - "ldp q12, q13, [%0, #16 * 12]\n\t" - "ldp q14, q15, [%0, #16 * 14]\n\t" - "ldp q16, q17, [%0, #16 * 16]\n\t" - "ldp q18, q19, [%0, #16 * 18]\n\t" - "ldp q20, q21, [%0, #16 * 20]\n\t" - "ldp q22, q23, [%0, #16 * 22]\n\t" - "ldp q24, q25, [%0, #16 * 24]\n\t" - "ldp q26, q27, [%0, #16 * 26]\n\t" - "ldp q28, q29, [%0, #16 * 28]\n\t" - "ldp q30, q31, [%0, #16 * 30]\n\t" - :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory"); + asm volatile("ldp q0, q1, [%1, #16 * 0]\n\t" + "ldp q2, q3, [%1, #16 * 2]\n\t" + "ldp q4, q5, [%1, #16 * 4]\n\t" + "ldp q6, q7, [%1, #16 * 6]\n\t" + "ldp q8, q9, [%1, #16 * 8]\n\t" + "ldp q10, q11, [%1, #16 * 10]\n\t" + "ldp q12, q13, [%1, #16 * 12]\n\t" + "ldp q14, q15, [%1, #16 * 14]\n\t" + "ldp q16, q17, [%1, #16 * 16]\n\t" + "ldp q18, q19, [%1, #16 * 18]\n\t" + "ldp q20, q21, [%1, #16 * 20]\n\t" + "ldp q22, q23, [%1, #16 * 22]\n\t" + "ldp q24, q25, [%1, #16 * 24]\n\t" + "ldp q26, q27, [%1, #16 * 26]\n\t" + "ldp q28, q29, [%1, #16 * 28]\n\t" + "ldp q30, q31, [%1, #16 * 30]\n\t" + :: "Q" (*v->arch.vfp.fpregs), "r" (v->arch.vfp.fpregs) + : "memory"); WRITE_SYSREG32(v->arch.vfp.fpsr, FPSR); WRITE_SYSREG32(v->arch.vfp.fpcr, FPCR);