diff mbox

xen: arm: arm64: Adding VFP save/restore support.

Message ID 1391671722-16127-1-git-send-email-pranavkumar@linaro.org
State Accepted
Commit 712eb2e04da2cbcd9908f74ebd47c6df60d6d12f
Headers show

Commit Message

PranavkumarSawargaonkar Feb. 6, 2014, 7:28 a.m. UTC
This patch adds VFP save/restore support form arm64 across context switch.

Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Anup Patel <anup.patel@linaro.org>
---
 xen/arch/arm/arm64/vfp.c        |   49 +++++++++++++++++++++++++++++++++++++++
 xen/include/asm-arm/arm64/vfp.h |    4 ++++
 2 files changed, 53 insertions(+)

Comments

Ian Campbell Feb. 6, 2014, 10:15 a.m. UTC | #1
On Thu, 2014-02-06 at 12:58 +0530, Pranavkumar Sawargaonkar wrote:
> This patch adds VFP save/restore support form arm64 across context switch.
> 
> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
> Signed-off-by: Anup Patel <anup.patel@linaro.org>

This should go in for 4.4 -- not context switching floating point
registers is obviously a big problem. (bit embarrassed that I forgot
about this...)

> ---
>  xen/arch/arm/arm64/vfp.c        |   49 +++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-arm/arm64/vfp.h |    4 ++++
>  2 files changed, 53 insertions(+)
> 
> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
> index 74e6a50..8c1479a 100644
> --- a/xen/arch/arm/arm64/vfp.c
> +++ b/xen/arch/arm/arm64/vfp.c
> @@ -1,13 +1,62 @@
>  #include <xen/sched.h>
>  #include <asm/processor.h>
> +#include <asm/cpufeature.h>
>  #include <asm/vfp.h>
>  
>  void vfp_save_state(struct vcpu *v)
>  {
>      /* TODO: implement it */

You can probably remove this comment, and the one in restore, unless
there is something still left to do?

Actually, I'll do it on commit, so:
Acked-by: Ian Campbell <ian.campbell@citrix.com>

> +    if ( !cpu_has_fp )
> +        return;
> +
> +    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
> +                 "stp q2, q3, [%0, #16 * 2]\n\t"
> +                 "stp q4, q5, [%0, #16 * 4]\n\t"
> +                 "stp q6, q7, [%0, #16 * 6]\n\t"
> +                 "stp q8, q9, [%0, #16 * 8]\n\t"
> +                 "stp q10, q11, [%0, #16 * 10]\n\t"
> +                 "stp q12, q13, [%0, #16 * 12]\n\t"
> +                 "stp q14, q15, [%0, #16 * 14]\n\t"
> +                 "stp q16, q17, [%0, #16 * 16]\n\t"
> +                 "stp q18, q19, [%0, #16 * 18]\n\t"
> +                 "stp q20, q21, [%0, #16 * 20]\n\t"
> +                 "stp q22, q23, [%0, #16 * 22]\n\t"
> +                 "stp q24, q25, [%0, #16 * 24]\n\t"
> +                 "stp q26, q27, [%0, #16 * 26]\n\t"
> +                 "stp q28, q29, [%0, #16 * 28]\n\t"
> +                 "stp q30, q31, [%0, #16 * 30]\n\t"
> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
> +
> +    v->arch.vfp.fpsr = READ_SYSREG32(FPSR);
> +    v->arch.vfp.fpcr = READ_SYSREG32(FPCR);
> +    v->arch.vfp.fpexc32_el2 = READ_SYSREG32(FPEXC32_EL2);
>  }
>  
>  void vfp_restore_state(struct vcpu *v)
>  {
>      /* TODO: implement it */
> +    if ( !cpu_has_fp )
> +        return;
> +
> +    asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t"
> +                 "ldp q2, q3, [%0, #16 * 2]\n\t"
> +                 "ldp q4, q5, [%0, #16 * 4]\n\t"
> +                 "ldp q6, q7, [%0, #16 * 6]\n\t"
> +                 "ldp q8, q9, [%0, #16 * 8]\n\t"
> +                 "ldp q10, q11, [%0, #16 * 10]\n\t"
> +                 "ldp q12, q13, [%0, #16 * 12]\n\t"
> +                 "ldp q14, q15, [%0, #16 * 14]\n\t"
> +                 "ldp q16, q17, [%0, #16 * 16]\n\t"
> +                 "ldp q18, q19, [%0, #16 * 18]\n\t"
> +                 "ldp q20, q21, [%0, #16 * 20]\n\t"
> +                 "ldp q22, q23, [%0, #16 * 22]\n\t"
> +                 "ldp q24, q25, [%0, #16 * 24]\n\t"
> +                 "ldp q26, q27, [%0, #16 * 26]\n\t"
> +                 "ldp q28, q29, [%0, #16 * 28]\n\t"
> +                 "ldp q30, q31, [%0, #16 * 30]\n\t"
> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
> +
> +    WRITE_SYSREG32(v->arch.vfp.fpsr, FPSR);
> +    WRITE_SYSREG32(v->arch.vfp.fpcr, FPCR);
> +    WRITE_SYSREG32(v->arch.vfp.fpexc32_el2, FPEXC32_EL2);
>  }
> diff --git a/xen/include/asm-arm/arm64/vfp.h b/xen/include/asm-arm/arm64/vfp.h
> index 3733d2c..373f156 100644
> --- a/xen/include/asm-arm/arm64/vfp.h
> +++ b/xen/include/asm-arm/arm64/vfp.h
> @@ -3,6 +3,10 @@
>  
>  struct vfp_state
>  {
> +    uint64_t fpregs[64];
> +    uint32_t fpcr;
> +    uint32_t fpexc32_el2;
> +    uint32_t fpsr;
>  };
>  
>  #endif /* _ARM_ARM64_VFP_H */
PranavkumarSawargaonkar Feb. 6, 2014, 10:41 a.m. UTC | #2
Hi Ian,

On 6 February 2014 15:45, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Thu, 2014-02-06 at 12:58 +0530, Pranavkumar Sawargaonkar wrote:
>> This patch adds VFP save/restore support form arm64 across context switch.
>>
>> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
>> Signed-off-by: Anup Patel <anup.patel@linaro.org>
>
> This should go in for 4.4 -- not context switching floating point
> registers is obviously a big problem. (bit embarrassed that I forgot
> about this...)
>
>> ---
>>  xen/arch/arm/arm64/vfp.c        |   49 +++++++++++++++++++++++++++++++++++++++
>>  xen/include/asm-arm/arm64/vfp.h |    4 ++++
>>  2 files changed, 53 insertions(+)
>>
>> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
>> index 74e6a50..8c1479a 100644
>> --- a/xen/arch/arm/arm64/vfp.c
>> +++ b/xen/arch/arm/arm64/vfp.c
>> @@ -1,13 +1,62 @@
>>  #include <xen/sched.h>
>>  #include <asm/processor.h>
>> +#include <asm/cpufeature.h>
>>  #include <asm/vfp.h>
>>
>>  void vfp_save_state(struct vcpu *v)
>>  {
>>      /* TODO: implement it */
>
> You can probably remove this comment, and the one in restore, unless
> there is something still left to do?
Oops, sorry let me remove and repost it.

>
> Actually, I'll do it on commit, so:
> Acked-by: Ian Campbell <ian.campbell@citrix.com>
>
>> +    if ( !cpu_has_fp )
>> +        return;
>> +
>> +    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
>> +                 "stp q2, q3, [%0, #16 * 2]\n\t"
>> +                 "stp q4, q5, [%0, #16 * 4]\n\t"
>> +                 "stp q6, q7, [%0, #16 * 6]\n\t"
>> +                 "stp q8, q9, [%0, #16 * 8]\n\t"
>> +                 "stp q10, q11, [%0, #16 * 10]\n\t"
>> +                 "stp q12, q13, [%0, #16 * 12]\n\t"
>> +                 "stp q14, q15, [%0, #16 * 14]\n\t"
>> +                 "stp q16, q17, [%0, #16 * 16]\n\t"
>> +                 "stp q18, q19, [%0, #16 * 18]\n\t"
>> +                 "stp q20, q21, [%0, #16 * 20]\n\t"
>> +                 "stp q22, q23, [%0, #16 * 22]\n\t"
>> +                 "stp q24, q25, [%0, #16 * 24]\n\t"
>> +                 "stp q26, q27, [%0, #16 * 26]\n\t"
>> +                 "stp q28, q29, [%0, #16 * 28]\n\t"
>> +                 "stp q30, q31, [%0, #16 * 30]\n\t"
>> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
>> +
>> +    v->arch.vfp.fpsr = READ_SYSREG32(FPSR);
>> +    v->arch.vfp.fpcr = READ_SYSREG32(FPCR);
>> +    v->arch.vfp.fpexc32_el2 = READ_SYSREG32(FPEXC32_EL2);
>>  }
>>
>>  void vfp_restore_state(struct vcpu *v)
>>  {
>>      /* TODO: implement it */
>> +    if ( !cpu_has_fp )
>> +        return;
>> +
>> +    asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t"
>> +                 "ldp q2, q3, [%0, #16 * 2]\n\t"
>> +                 "ldp q4, q5, [%0, #16 * 4]\n\t"
>> +                 "ldp q6, q7, [%0, #16 * 6]\n\t"
>> +                 "ldp q8, q9, [%0, #16 * 8]\n\t"
>> +                 "ldp q10, q11, [%0, #16 * 10]\n\t"
>> +                 "ldp q12, q13, [%0, #16 * 12]\n\t"
>> +                 "ldp q14, q15, [%0, #16 * 14]\n\t"
>> +                 "ldp q16, q17, [%0, #16 * 16]\n\t"
>> +                 "ldp q18, q19, [%0, #16 * 18]\n\t"
>> +                 "ldp q20, q21, [%0, #16 * 20]\n\t"
>> +                 "ldp q22, q23, [%0, #16 * 22]\n\t"
>> +                 "ldp q24, q25, [%0, #16 * 24]\n\t"
>> +                 "ldp q26, q27, [%0, #16 * 26]\n\t"
>> +                 "ldp q28, q29, [%0, #16 * 28]\n\t"
>> +                 "ldp q30, q31, [%0, #16 * 30]\n\t"
>> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
>> +
>> +    WRITE_SYSREG32(v->arch.vfp.fpsr, FPSR);
>> +    WRITE_SYSREG32(v->arch.vfp.fpcr, FPCR);
>> +    WRITE_SYSREG32(v->arch.vfp.fpexc32_el2, FPEXC32_EL2);
>>  }
>> diff --git a/xen/include/asm-arm/arm64/vfp.h b/xen/include/asm-arm/arm64/vfp.h
>> index 3733d2c..373f156 100644
>> --- a/xen/include/asm-arm/arm64/vfp.h
>> +++ b/xen/include/asm-arm/arm64/vfp.h
>> @@ -3,6 +3,10 @@
>>
>>  struct vfp_state
>>  {
>> +    uint64_t fpregs[64];
>> +    uint32_t fpcr;
>> +    uint32_t fpexc32_el2;
>> +    uint32_t fpsr;
>>  };
>>
>>  #endif /* _ARM_ARM64_VFP_H */
>
>
Thanks,
Pranav
PranavkumarSawargaonkar Feb. 6, 2014, 10:50 a.m. UTC | #3
Hi Ian,

On 6 February 2014 15:45, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Thu, 2014-02-06 at 12:58 +0530, Pranavkumar Sawargaonkar wrote:
>> This patch adds VFP save/restore support form arm64 across context switch.
>>
>> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
>> Signed-off-by: Anup Patel <anup.patel@linaro.org>
>
> This should go in for 4.4 -- not context switching floating point
> registers is obviously a big problem. (bit embarrassed that I forgot
> about this...)
>
>> ---
>>  xen/arch/arm/arm64/vfp.c        |   49 +++++++++++++++++++++++++++++++++++++++
>>  xen/include/asm-arm/arm64/vfp.h |    4 ++++
>>  2 files changed, 53 insertions(+)
>>
>> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
>> index 74e6a50..8c1479a 100644
>> --- a/xen/arch/arm/arm64/vfp.c
>> +++ b/xen/arch/arm/arm64/vfp.c
>> @@ -1,13 +1,62 @@
>>  #include <xen/sched.h>
>>  #include <asm/processor.h>
>> +#include <asm/cpufeature.h>
>>  #include <asm/vfp.h>
>>
>>  void vfp_save_state(struct vcpu *v)
>>  {
>>      /* TODO: implement it */
>
> You can probably remove this comment, and the one in restore, unless
> there is something still left to do?
>
> Actually, I'll do it on commit, so:
Missed out above line. Please remove it during commit.

> Acked-by: Ian Campbell <ian.campbell@citrix.com>
Thanks,
Pranav
>
>> +    if ( !cpu_has_fp )
>> +        return;
>> +
>> +    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
>> +                 "stp q2, q3, [%0, #16 * 2]\n\t"
>> +                 "stp q4, q5, [%0, #16 * 4]\n\t"
>> +                 "stp q6, q7, [%0, #16 * 6]\n\t"
>> +                 "stp q8, q9, [%0, #16 * 8]\n\t"
>> +                 "stp q10, q11, [%0, #16 * 10]\n\t"
>> +                 "stp q12, q13, [%0, #16 * 12]\n\t"
>> +                 "stp q14, q15, [%0, #16 * 14]\n\t"
>> +                 "stp q16, q17, [%0, #16 * 16]\n\t"
>> +                 "stp q18, q19, [%0, #16 * 18]\n\t"
>> +                 "stp q20, q21, [%0, #16 * 20]\n\t"
>> +                 "stp q22, q23, [%0, #16 * 22]\n\t"
>> +                 "stp q24, q25, [%0, #16 * 24]\n\t"
>> +                 "stp q26, q27, [%0, #16 * 26]\n\t"
>> +                 "stp q28, q29, [%0, #16 * 28]\n\t"
>> +                 "stp q30, q31, [%0, #16 * 30]\n\t"
>> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
>> +
>> +    v->arch.vfp.fpsr = READ_SYSREG32(FPSR);
>> +    v->arch.vfp.fpcr = READ_SYSREG32(FPCR);
>> +    v->arch.vfp.fpexc32_el2 = READ_SYSREG32(FPEXC32_EL2);
>>  }
>>
>>  void vfp_restore_state(struct vcpu *v)
>>  {
>>      /* TODO: implement it */
>> +    if ( !cpu_has_fp )
>> +        return;
>> +
>> +    asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t"
>> +                 "ldp q2, q3, [%0, #16 * 2]\n\t"
>> +                 "ldp q4, q5, [%0, #16 * 4]\n\t"
>> +                 "ldp q6, q7, [%0, #16 * 6]\n\t"
>> +                 "ldp q8, q9, [%0, #16 * 8]\n\t"
>> +                 "ldp q10, q11, [%0, #16 * 10]\n\t"
>> +                 "ldp q12, q13, [%0, #16 * 12]\n\t"
>> +                 "ldp q14, q15, [%0, #16 * 14]\n\t"
>> +                 "ldp q16, q17, [%0, #16 * 16]\n\t"
>> +                 "ldp q18, q19, [%0, #16 * 18]\n\t"
>> +                 "ldp q20, q21, [%0, #16 * 20]\n\t"
>> +                 "ldp q22, q23, [%0, #16 * 22]\n\t"
>> +                 "ldp q24, q25, [%0, #16 * 24]\n\t"
>> +                 "ldp q26, q27, [%0, #16 * 26]\n\t"
>> +                 "ldp q28, q29, [%0, #16 * 28]\n\t"
>> +                 "ldp q30, q31, [%0, #16 * 30]\n\t"
>> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
>> +
>> +    WRITE_SYSREG32(v->arch.vfp.fpsr, FPSR);
>> +    WRITE_SYSREG32(v->arch.vfp.fpcr, FPCR);
>> +    WRITE_SYSREG32(v->arch.vfp.fpexc32_el2, FPEXC32_EL2);
>>  }
>> diff --git a/xen/include/asm-arm/arm64/vfp.h b/xen/include/asm-arm/arm64/vfp.h
>> index 3733d2c..373f156 100644
>> --- a/xen/include/asm-arm/arm64/vfp.h
>> +++ b/xen/include/asm-arm/arm64/vfp.h
>> @@ -3,6 +3,10 @@
>>
>>  struct vfp_state
>>  {
>> +    uint64_t fpregs[64];
>> +    uint32_t fpcr;
>> +    uint32_t fpexc32_el2;
>> +    uint32_t fpsr;
>>  };
>>
>>  #endif /* _ARM_ARM64_VFP_H */
>
>
Ian Campbell Feb. 6, 2014, 12:40 p.m. UTC | #4
On Thu, 2014-02-06 at 10:53 +0000, George Dunlap wrote:
> On 02/06/2014 10:15 AM, Ian Campbell wrote:
> > On Thu, 2014-02-06 at 12:58 +0530, Pranavkumar Sawargaonkar wrote:
> >> This patch adds VFP save/restore support form arm64 across context switch.
> >>
> >> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
> >> Signed-off-by: Anup Patel <anup.patel@linaro.org>
> > This should go in for 4.4 -- not context switching floating point
> > registers is obviously a big problem. (bit embarrassed that I forgot
> > about this...)
> 
> Yes, absolutely.
> 
> Release-acked-by: George Dunlap <george.dunlap@eu.citrix.com>

Applied, thanks all.
Julien Grall Feb. 6, 2014, 12:44 p.m. UTC | #5
Hello,

On 06/02/14 07:28, Pranavkumar Sawargaonkar wrote:
> This patch adds VFP save/restore support form arm64 across context switch.
>
> Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
> Signed-off-by: Anup Patel <anup.patel@linaro.org>
> ---
>   xen/arch/arm/arm64/vfp.c        |   49 +++++++++++++++++++++++++++++++++++++++
>   xen/include/asm-arm/arm64/vfp.h |    4 ++++
>   2 files changed, 53 insertions(+)
>
> diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
> index 74e6a50..8c1479a 100644
> --- a/xen/arch/arm/arm64/vfp.c
> +++ b/xen/arch/arm/arm64/vfp.c
> @@ -1,13 +1,62 @@
>   #include <xen/sched.h>
>   #include <asm/processor.h>
> +#include <asm/cpufeature.h>
>   #include <asm/vfp.h>
>
>   void vfp_save_state(struct vcpu *v)
>   {
>       /* TODO: implement it */
> +    if ( !cpu_has_fp )
> +        return;
> +
> +    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
> +                 "stp q2, q3, [%0, #16 * 2]\n\t"
> +                 "stp q4, q5, [%0, #16 * 4]\n\t"
> +                 "stp q6, q7, [%0, #16 * 6]\n\t"
> +                 "stp q8, q9, [%0, #16 * 8]\n\t"
> +                 "stp q10, q11, [%0, #16 * 10]\n\t"
> +                 "stp q12, q13, [%0, #16 * 12]\n\t"
> +                 "stp q14, q15, [%0, #16 * 14]\n\t"
> +                 "stp q16, q17, [%0, #16 * 16]\n\t"
> +                 "stp q18, q19, [%0, #16 * 18]\n\t"
> +                 "stp q20, q21, [%0, #16 * 20]\n\t"
> +                 "stp q22, q23, [%0, #16 * 22]\n\t"
> +                 "stp q24, q25, [%0, #16 * 24]\n\t"
> +                 "stp q26, q27, [%0, #16 * 26]\n\t"
> +                 "stp q28, q29, [%0, #16 * 28]\n\t"
> +                 "stp q30, q31, [%0, #16 * 30]\n\t"
> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");

I remember we had a discussion when I have implemented vfp context 
switch for arm32 for the memory constraints 
(http://lists.xen.org/archives/html/xen-devel/2013-06/msg00110.html).

I think you should use "=Q" also here to avoid cloberring the whole memory.

At the same time, is it necessary the (char *)?

> +
> +    v->arch.vfp.fpsr = READ_SYSREG32(FPSR);
> +    v->arch.vfp.fpcr = READ_SYSREG32(FPCR);
> +    v->arch.vfp.fpexc32_el2 = READ_SYSREG32(FPEXC32_EL2);
>   }
>
>   void vfp_restore_state(struct vcpu *v)
>   {
>       /* TODO: implement it */
> +    if ( !cpu_has_fp )
> +        return;
> +
> +    asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t"
> +                 "ldp q2, q3, [%0, #16 * 2]\n\t"
> +                 "ldp q4, q5, [%0, #16 * 4]\n\t"
> +                 "ldp q6, q7, [%0, #16 * 6]\n\t"
> +                 "ldp q8, q9, [%0, #16 * 8]\n\t"
> +                 "ldp q10, q11, [%0, #16 * 10]\n\t"
> +                 "ldp q12, q13, [%0, #16 * 12]\n\t"
> +                 "ldp q14, q15, [%0, #16 * 14]\n\t"
> +                 "ldp q16, q17, [%0, #16 * 16]\n\t"
> +                 "ldp q18, q19, [%0, #16 * 18]\n\t"
> +                 "ldp q20, q21, [%0, #16 * 20]\n\t"
> +                 "ldp q22, q23, [%0, #16 * 22]\n\t"
> +                 "ldp q24, q25, [%0, #16 * 24]\n\t"
> +                 "ldp q26, q27, [%0, #16 * 26]\n\t"
> +                 "ldp q28, q29, [%0, #16 * 28]\n\t"
> +                 "ldp q30, q31, [%0, #16 * 30]\n\t"
> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");

Same here.

Regards,
Ian Campbell Feb. 6, 2014, 12:57 p.m. UTC | #6
On Thu, 2014-02-06 at 12:44 +0000, Julien Grall wrote:
> > +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
> 
> I remember we had a discussion when I have implemented vfp context 
> switch for arm32 for the memory constraints 
> (http://lists.xen.org/archives/html/xen-devel/2013-06/msg00110.html).
> 
> I think you should use "=Q" also here to avoid cloberring the whole memory.

Yes, I forgot to say: I think getting something in now is the priority,
which is why I committed it, but this should be tightened up, probably
for 4.5 unless the difference is benchmarkable.

> At the same time, is it necessary the (char *)?

I missed that, yes I suspect it is unnecessary.

Ian.
Julien Grall Feb. 6, 2014, 1:08 p.m. UTC | #7
On 06/02/14 12:57, Ian Campbell wrote:
> On Thu, 2014-02-06 at 12:44 +0000, Julien Grall wrote:
>>> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
>>
>> I remember we had a discussion when I have implemented vfp context
>> switch for arm32 for the memory constraints
>> (http://lists.xen.org/archives/html/xen-devel/2013-06/msg00110.html).
>>
>> I think you should use "=Q" also here to avoid cloberring the whole memory.
>
> Yes, I forgot to say: I think getting something in now is the priority,
> which is why I committed it, but this should be tightened up, probably
> for 4.5 unless the difference is benchmarkable.

The fix is very simple (a matter of 2 lines changes). I would prefer to 
delay this patch for a couple of days and having a correct 
implementation from the beginning, so we will not forgot to change the 
code for Xen 4.5.

Moreover Pranav usually answer quickly :).
Ian Campbell Feb. 6, 2014, 1:11 p.m. UTC | #8
On Thu, 2014-02-06 at 13:08 +0000, Julien Grall wrote:
> 
> On 06/02/14 12:57, Ian Campbell wrote:
> > On Thu, 2014-02-06 at 12:44 +0000, Julien Grall wrote:
> >>> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
> >>
> >> I remember we had a discussion when I have implemented vfp context
> >> switch for arm32 for the memory constraints
> >> (http://lists.xen.org/archives/html/xen-devel/2013-06/msg00110.html).
> >>
> >> I think you should use "=Q" also here to avoid cloberring the whole memory.
> >
> > Yes, I forgot to say: I think getting something in now is the priority,
> > which is why I committed it, but this should be tightened up, probably
> > for 4.5 unless the difference is benchmarkable.
> 
> The fix is very simple (a matter of 2 lines changes). I would prefer to 
> delay this patch for a couple of days and having a correct 
> implementation from the beginning, so we will not forgot to change the 
> code for Xen 4.5.

And I would rather close this rather large hole right now and not wait
for two more days when we are looking at doing what might be the final
rc soon.

I had already applied before you said anything, so the point is moot.

Anyway, if someone wants to submit for 4.4 with a case for a release
exception then I'm sure George will consider it.

Otherwise this thread is now in my QUEUE-4.5 folder so I'll get a
reminder shortly after the release when I go through that.

> Moreover Pranav usually answer quickly :).

If he's awake/at work, it's out of office hours for him now.

Ian.
PranavkumarSawargaonkar Feb. 7, 2014, 5:31 a.m. UTC | #9
Hi Ian,

On 6 February 2014 18:41, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Thu, 2014-02-06 at 13:08 +0000, Julien Grall wrote:
>>
>> On 06/02/14 12:57, Ian Campbell wrote:
>> > On Thu, 2014-02-06 at 12:44 +0000, Julien Grall wrote:
>> >>> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
>> >>
>> >> I remember we had a discussion when I have implemented vfp context
>> >> switch for arm32 for the memory constraints
>> >> (http://lists.xen.org/archives/html/xen-devel/2013-06/msg00110.html).
>> >>
>> >> I think you should use "=Q" also here to avoid cloberring the whole memory.
>> >
>> > Yes, I forgot to say: I think getting something in now is the priority,
>> > which is why I committed it, but this should be tightened up, probably
>> > for 4.5 unless the difference is benchmarkable.
>>
>> The fix is very simple (a matter of 2 lines changes). I would prefer to
>> delay this patch for a couple of days and having a correct
>> implementation from the beginning, so we will not forgot to change the
>> code for Xen 4.5.
>
> And I would rather close this rather large hole right now and not wait
> for two more days when we are looking at doing what might be the final
> rc soon.
>
> I had already applied before you said anything, so the point is moot.
>
> Anyway, if someone wants to submit for 4.4 with a case for a release
> exception then I'm sure George will consider it.
>
> Otherwise this thread is now in my QUEUE-4.5 folder so I'll get a
> reminder shortly after the release when I go through that.
>
>> Moreover Pranav usually answer quickly :).
>
> If he's awake/at work, it's out of office hours for him now.

: ) :) , sorry somehow I missed this yesterday.

If "=Q"  is really critical i can quickly send you a new patch against
your commit in staging branch
(http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=712eb2e04da2cbcd9908f74ebd47c6df60d6d12f)
>
> Ian.
>
Thanks,
Pranav
PranavkumarSawargaonkar Feb. 7, 2014, 10:43 a.m. UTC | #10
Hi Ian/ Julien,

On 7 February 2014 11:01, Pranavkumar Sawargaonkar
<pranavkumar@linaro.org> wrote:
> Hi Ian,
>
> On 6 February 2014 18:41, Ian Campbell <Ian.Campbell@citrix.com> wrote:
>> On Thu, 2014-02-06 at 13:08 +0000, Julien Grall wrote:
>>>
>>> On 06/02/14 12:57, Ian Campbell wrote:
>>> > On Thu, 2014-02-06 at 12:44 +0000, Julien Grall wrote:
>>> >>> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
>>> >>
>>> >> I remember we had a discussion when I have implemented vfp context
>>> >> switch for arm32 for the memory constraints
>>> >> (http://lists.xen.org/archives/html/xen-devel/2013-06/msg00110.html).
>>> >>
>>> >> I think you should use "=Q" also here to avoid cloberring the whole memory.
>>> >
>>> > Yes, I forgot to say: I think getting something in now is the priority,
>>> > which is why I committed it, but this should be tightened up, probably
>>> > for 4.5 unless the difference is benchmarkable.
>>>
>>> The fix is very simple (a matter of 2 lines changes). I would prefer to
>>> delay this patch for a couple of days and having a correct
>>> implementation from the beginning, so we will not forgot to change the
>>> code for Xen 4.5.
>>
>> And I would rather close this rather large hole right now and not wait
>> for two more days when we are looking at doing what might be the final
>> rc soon.
>>
>> I had already applied before you said anything, so the point is moot.
>>
>> Anyway, if someone wants to submit for 4.4 with a case for a release
>> exception then I'm sure George will consider it.
>>
>> Otherwise this thread is now in my QUEUE-4.5 folder so I'll get a
>> reminder shortly after the release when I go through that.
>>
>>> Moreover Pranav usually answer quickly :).
>>
>> If he's awake/at work, it's out of office hours for him now.
>
> : ) :) , sorry somehow I missed this yesterday.
>
> If "=Q"  is really critical i can quickly send you a new patch against
> your commit in staging branch
> (http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=712eb2e04da2cbcd9908f74ebd47c6df60d6d12f)

Posted a new patch with the desired fix -
"xen: arm: arm64: Fix memory cloberring issues during VFP save restore."

>>
>> Ian.
>>
> Thanks,
> Pranav
Thanks,
Pranav
Ian Campbell Feb. 7, 2014, 3:33 p.m. UTC | #11
On Fri, 2014-02-07 at 15:28 +0000, George Dunlap wrote:
> On Thu, Feb 6, 2014 at 1:11 PM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> > On Thu, 2014-02-06 at 13:08 +0000, Julien Grall wrote:
> >>
> >> On 06/02/14 12:57, Ian Campbell wrote:
> >> > On Thu, 2014-02-06 at 12:44 +0000, Julien Grall wrote:
> >> >>> +                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
> >> >>
> >> >> I remember we had a discussion when I have implemented vfp context
> >> >> switch for arm32 for the memory constraints
> >> >> (http://lists.xen.org/archives/html/xen-devel/2013-06/msg00110.html).
> >> >>
> >> >> I think you should use "=Q" also here to avoid cloberring the whole memory.
> >> >
> >> > Yes, I forgot to say: I think getting something in now is the priority,
> >> > which is why I committed it, but this should be tightened up, probably
> >> > for 4.5 unless the difference is benchmarkable.
> >>
> >> The fix is very simple (a matter of 2 lines changes). I would prefer to
> >> delay this patch for a couple of days and having a correct
> >> implementation from the beginning, so we will not forgot to change the
> >> code for Xen 4.5.
> >
> > And I would rather close this rather large hole right now and not wait
> > for two more days when we are looking at doing what might be the final
> > rc soon.
> >
> > I had already applied before you said anything, so the point is moot.
> 
> But just for future reference: Be on your guard against the "hurry and
> get in" mentality; it's likely to be counterproductive.  I'd rather
> have *more* care taken with the patches at this point than normally.

This was not a case of a careless rush to get something in. It was my
opinion that the minor shortcoming wasn't worth waiting for compared
with the benefit of getting that patch in sooner rather than later.

Note that the patch was not in any way wrong.

> If that means making a case to delay the release, that's what it will
> take.  This would obviously have been a complete blocker -- there's no
> way we could in good release without FPU support for guests. :-)
> 
>  -George
diff mbox

Patch

diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
index 74e6a50..8c1479a 100644
--- a/xen/arch/arm/arm64/vfp.c
+++ b/xen/arch/arm/arm64/vfp.c
@@ -1,13 +1,62 @@ 
 #include <xen/sched.h>
 #include <asm/processor.h>
+#include <asm/cpufeature.h>
 #include <asm/vfp.h>
 
 void vfp_save_state(struct vcpu *v)
 {
     /* TODO: implement it */
+    if ( !cpu_has_fp )
+        return;
+
+    asm volatile("stp q0, q1, [%0, #16 * 0]\n\t"
+                 "stp q2, q3, [%0, #16 * 2]\n\t"
+                 "stp q4, q5, [%0, #16 * 4]\n\t"
+                 "stp q6, q7, [%0, #16 * 6]\n\t"
+                 "stp q8, q9, [%0, #16 * 8]\n\t"
+                 "stp q10, q11, [%0, #16 * 10]\n\t"
+                 "stp q12, q13, [%0, #16 * 12]\n\t"
+                 "stp q14, q15, [%0, #16 * 14]\n\t"
+                 "stp q16, q17, [%0, #16 * 16]\n\t"
+                 "stp q18, q19, [%0, #16 * 18]\n\t"
+                 "stp q20, q21, [%0, #16 * 20]\n\t"
+                 "stp q22, q23, [%0, #16 * 22]\n\t"
+                 "stp q24, q25, [%0, #16 * 24]\n\t"
+                 "stp q26, q27, [%0, #16 * 26]\n\t"
+                 "stp q28, q29, [%0, #16 * 28]\n\t"
+                 "stp q30, q31, [%0, #16 * 30]\n\t"
+                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
+
+    v->arch.vfp.fpsr = READ_SYSREG32(FPSR);
+    v->arch.vfp.fpcr = READ_SYSREG32(FPCR);
+    v->arch.vfp.fpexc32_el2 = READ_SYSREG32(FPEXC32_EL2);
 }
 
 void vfp_restore_state(struct vcpu *v)
 {
     /* TODO: implement it */
+    if ( !cpu_has_fp )
+        return;
+
+    asm volatile("ldp q0, q1, [%0, #16 * 0]\n\t"
+                 "ldp q2, q3, [%0, #16 * 2]\n\t"
+                 "ldp q4, q5, [%0, #16 * 4]\n\t"
+                 "ldp q6, q7, [%0, #16 * 6]\n\t"
+                 "ldp q8, q9, [%0, #16 * 8]\n\t"
+                 "ldp q10, q11, [%0, #16 * 10]\n\t"
+                 "ldp q12, q13, [%0, #16 * 12]\n\t"
+                 "ldp q14, q15, [%0, #16 * 14]\n\t"
+                 "ldp q16, q17, [%0, #16 * 16]\n\t"
+                 "ldp q18, q19, [%0, #16 * 18]\n\t"
+                 "ldp q20, q21, [%0, #16 * 20]\n\t"
+                 "ldp q22, q23, [%0, #16 * 22]\n\t"
+                 "ldp q24, q25, [%0, #16 * 24]\n\t"
+                 "ldp q26, q27, [%0, #16 * 26]\n\t"
+                 "ldp q28, q29, [%0, #16 * 28]\n\t"
+                 "ldp q30, q31, [%0, #16 * 30]\n\t"
+                 :: "r" ((char *)(&v->arch.vfp.fpregs)): "memory");
+
+    WRITE_SYSREG32(v->arch.vfp.fpsr, FPSR);
+    WRITE_SYSREG32(v->arch.vfp.fpcr, FPCR);
+    WRITE_SYSREG32(v->arch.vfp.fpexc32_el2, FPEXC32_EL2);
 }
diff --git a/xen/include/asm-arm/arm64/vfp.h b/xen/include/asm-arm/arm64/vfp.h
index 3733d2c..373f156 100644
--- a/xen/include/asm-arm/arm64/vfp.h
+++ b/xen/include/asm-arm/arm64/vfp.h
@@ -3,6 +3,10 @@ 
 
 struct vfp_state
 {
+    uint64_t fpregs[64];
+    uint32_t fpcr;
+    uint32_t fpexc32_el2;
+    uint32_t fpsr;
 };
 
 #endif /* _ARM_ARM64_VFP_H */