diff mbox

[v2,2/2] xen/arm32: implement VFP context switch

Message ID 1369929719-26298-3-git-send-email-julien.grall@linaro.org
State Superseded, archived
Headers show

Commit Message

Julien Grall May 30, 2013, 4:01 p.m. UTC
Add support for VFP context switch on arm32 and a dummy support for arm64

Signed-off-by: Julien Grall <julien.grall@linaro.org>

Changes in v2:
    - Fix all the small errors (type, lost headers...)
    - Add some comments
---
 xen/arch/arm/arm32/Makefile     |    1 +
 xen/arch/arm/arm32/vfp.c        |   71 +++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/arm64/Makefile     |    1 +
 xen/arch/arm/arm64/vfp.c        |   13 +++++++
 xen/arch/arm/domain.c           |    7 ++--
 xen/include/asm-arm/arm32/vfp.h |   29 ++++++++++++++++
 xen/include/asm-arm/arm64/vfp.h |   16 +++++++++
 xen/include/asm-arm/cpregs.h    |    9 +++++
 xen/include/asm-arm/domain.h    |    4 +++
 xen/include/asm-arm/vfp.h       |   25 ++++++++++++++
 10 files changed, 174 insertions(+), 2 deletions(-)
 create mode 100644 xen/arch/arm/arm32/vfp.c
 create mode 100644 xen/arch/arm/arm64/vfp.c
 create mode 100644 xen/include/asm-arm/arm32/vfp.h
 create mode 100644 xen/include/asm-arm/arm64/vfp.h
 create mode 100644 xen/include/asm-arm/vfp.h

Comments

Ian Campbell May 31, 2013, 2:12 p.m. UTC | #1
On Thu, 2013-05-30 at 17:01 +0100, Julien Grall wrote:
> +void vfp_save_state(struct vcpu *v)
> +{
> +    uint32_t tmp;
> +
> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);

The docs seem to call for reading this via an explicit VMRS
instruction. 

Looking at the ARM ARM this seems to be an alias for the encoding of an
MRC instruction corresponding to reading FPEXC as you have done. Did you
have a reference for that aliasing? (I'm not finding it in the ARM ARM).

Are you avoiding the mnemonic to avoid issues with binutils providing
the instruction?

> +
> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);

This being a CP write, do we need an isb? Will this write complete
before the following read from FPSCR otherwise?

> +
> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
> +
> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
> +    {
> +        v->arch.vfp.fpinst = READ_CP32(FPINST);

Do we need to check that the sub arch in FPSID is vfp common subarch 1
(or 2 or 3?) here? Or better if that's the only thing we support we
could check during boot / outside the hot path.

> +
> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
> +        /* Disable FPEXC_EX */
> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
> +    }
> +
> +    /* Save {d0-d15} */
> +    asm volatile("stc p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs1));

Don't you need a memory clobber of the array at v->arch.vfp.fpregs1?

> +
> +    tmp = READ_CP32(MVFR0);
> +    if ( (tmp & MVFR0_A_SIMD_MASK) == 2 ) /* 32 x 64 bits registers */

tmp isn't really needed here.

> +    {
> +        /* Save {d16-d31} */
> +        asm volatile("stcl p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs2));

Likewise a clobber here.

> +    }
> +
> +    WRITE_CP32(v->arch.vfp.fpexc & ~(FPEXC_EN), FPEXC);

Is this needed? On restore we set it to whatever the next VCPU was
using. In particular if barriers turn out to be required it would be
good to omit as many of these as possible, including the ones where we
turn it on if the guest already had it enabled etc.

Do you know what happens to the values of d0..d31  and FPSCR etc when
FPEXC.EN is disabled? I'm wondering if we can avoid saving the registers
in that case, we'd still need to restore to prevent leaking the last
guests state if the VCPU enabled exceptions. I suppose it's not worth it
unless we implement a more full featured lazy saving regime.

> +}
> +
> +void vfp_restore_state(struct vcpu *v)
> +{

Some of the same comments (tmps, barriers etc) apply to restore as to
save.

Is there any chance that any of these loads could cause an FP fault?
e.g. if the guest had an FP exception pending when we saved it, could
reloading the register retrigger it?

> +    uint32_t tmp = READ_CP32(FPEXC);
> +
> +    WRITE_CP32(tmp | FPEXC_EN, FPEXC);
> +
> +    /* Restore {d0-d15} */
> +    asm volatile("ldc p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs1));
> +
> +    tmp = READ_CP32(MVFR0);
> +    if ( (tmp & MVFR0_A_SIMD_MASK) == 2 ) /* 32 x 64 bits registers */
> +        /* Restore {d16-d31} */
> +        asm volatile("ldcl p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs2));
> +
> +    if ( v->arch.vfp.fpexc & FPEXC_EX )
> +    {
> +        WRITE_CP32(v->arch.vfp.fpinst, FPINST);
> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
> +            WRITE_CP32(v->arch.vfp.fpinst2, FPINST2);
> +    }
> +
> +    WRITE_CP32(v->arch.vfp.fpscr, FPSCR);
> +
> +    WRITE_CP32(v->arch.vfp.fpexc, FPEXC);
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/include/asm-arm/cpregs.h b/xen/include/asm-arm/cpregs.h
> index f08d59a..6d7d6ae 100644
> --- a/xen/include/asm-arm/cpregs.h
> +++ b/xen/include/asm-arm/cpregs.h
> @@ -60,6 +60,14 @@
>   * arguments, which are cp,opc1,crn,crm,opc2.
>   */
>  
> +/* Coprocessor 10 */
> +
> +#define FPSCR           p10,7,c1,c0,0   /* Floating-Point Status and Control Register */
> +#define MVFR0           p10,7,c7,c0,0   /* Media and VFP Feature Register 0 */
> +#define FPEXC           p10,7,c8,c0,0   /* Floating-Point Exception Control Register */
> +#define FPINST          p10,7,c9,c0,0   /* Floating-Point Instruction Register */
> +#define FPINST2         p10,7,c10,c0,0  /* Floating-point Instruction Register 2 */
> +
>  /* Coprocessor 14 */
>  
>  /* CP14 CR0: */
> @@ -106,6 +114,7 @@
>  #define NSACR           p15,0,c1,c1,2   /* Non-Secure Access Control Register */
>  #define HSCTLR          p15,4,c1,c0,0   /* Hyp. System Control Register */
>  #define HCR             p15,4,c1,c1,0   /* Hyp. Configuration Register */
> +#define HCPTR           p15,4,c1,c1,2   /* Hyp. Coprocessor Trap Register */

Is this intentionally unused? I don't mind adding unused register
definitions, just wanted to check there wasn't a hunk missing or
anything.

The register resets to 0x0 which is OK but it might be wise to trap all
accesses the coprocessors which we haven't implemented ctx switching
for? So at least we find out about missing ones instead of silently
leaking information between guests and/or corrupting their state.

Ian.
Julien Grall May 31, 2013, 3:27 p.m. UTC | #2
On 05/31/2013 03:12 PM, Ian Campbell wrote:

> On Thu, 2013-05-30 at 17:01 +0100, Julien Grall wrote:
>> +void vfp_save_state(struct vcpu *v)
>> +{
>> +    uint32_t tmp;
>> +
>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> 
> The docs seem to call for reading this via an explicit VMRS
> instruction. 
> 
> Looking at the ARM ARM this seems to be an alias for the encoding of an
> MRC instruction corresponding to reading FPEXC as you have done. Did you
> have a reference for that aliasing? (I'm not finding it in the ARM ARM).


I didn't find anything on the aliasing. I looked at the linux VFP
include (arch/arm/include/asm/vfp.h).

> Are you avoiding the mnemonic to avoid issues with binutils providing
> the instruction?

This mnemonic can only be used if VFP is enabled by gcc. I think it's
safer to use mrc if we don't want to use VFP on the other part of XEN.

>> +
>> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
> 
> This being a CP write, do we need an isb? Will this write complete
> before the following read from FPSCR otherwise?


I will add an isb and only call this part if VFP is not enabled.

> 
>> +
>> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
>> +
>> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
>> +    {
>> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
> 
> Do we need to check that the sub arch in FPSID is vfp common subarch 1
> (or 2 or 3?) here? Or better if that's the only thing we support we
> could check during boot / outside the hot path.

I will add a check during boot to verify if the VFP is in version 3.

>> +
>> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
>> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
>> +        /* Disable FPEXC_EX */
>> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
>> +    }
>> +
>> +    /* Save {d0-d15} */
>> +    asm volatile("stc p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs1));
> 
> Don't you need a memory clobber of the array at v->arch.vfp.fpregs1?

Indeed. I will fix it on the next version.

>> +
>> +    tmp = READ_CP32(MVFR0);
>> +    if ( (tmp & MVFR0_A_SIMD_MASK) == 2 ) /* 32 x 64 bits registers */
> 
> tmp isn't really needed here.

I will remove it.

> 
>> +    {
>> +        /* Save {d16-d31} */
>> +        asm volatile("stcl p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs2));
> 
> Likewise a clobber here.
> 
>> +    }
>> +
>> +    WRITE_CP32(v->arch.vfp.fpexc & ~(FPEXC_EN), FPEXC);
> 
> Is this needed? On restore we set it to whatever the next VCPU was
> using. In particular if barriers turn out to be required it would be
> good to omit as many of these as possible, including the ones where we
> turn it on if the guest already had it enabled etc.


No if we consider that nobody will use VFP in Xen. I can remove it.

> Do you know what happens to the values of d0..d31  and FPSCR etc when

> FPEXC.EN is disabled? I'm wondering if we can avoid saving the registers
> in that case, we'd still need to restore to prevent leaking the last
> guests state if the VCPU enabled exceptions. I suppose it's not worth it
> unless we implement a more full featured lazy saving regime.


I didn't find something about the state of the registers when VFP is
disabled. I think the registers is not clobbered.
Linux seems to save vfp registers at each context switch only if VFP is
enabled. But we cannot rely on that for the other distributions.

>> +}
>> +
>> +void vfp_restore_state(struct vcpu *v)
>> +{
> 
> Some of the same comments (tmps, barriers etc) apply to restore as to
> save.
> 
> Is there any chance that any of these loads could cause an FP fault?
> e.g. if the guest had an FP exception pending when we saved it, could
> reloading the register retrigger it?

I don't know.

>> +    uint32_t tmp = READ_CP32(FPEXC);
>> +
>> +    WRITE_CP32(tmp | FPEXC_EN, FPEXC);
>> +
>> +    /* Restore {d0-d15} */
>> +    asm volatile("ldc p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs1));
>> +
>> +    tmp = READ_CP32(MVFR0);
>> +    if ( (tmp & MVFR0_A_SIMD_MASK) == 2 ) /* 32 x 64 bits registers */
>> +        /* Restore {d16-d31} */
>> +        asm volatile("ldcl p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs2));
>> +
>> +    if ( v->arch.vfp.fpexc & FPEXC_EX )
>> +    {
>> +        WRITE_CP32(v->arch.vfp.fpinst, FPINST);
>> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
>> +            WRITE_CP32(v->arch.vfp.fpinst2, FPINST2);
>> +    }
>> +
>> +    WRITE_CP32(v->arch.vfp.fpscr, FPSCR);
>> +
>> +    WRITE_CP32(v->arch.vfp.fpexc, FPEXC);
>> +}
>> +
>> +/*
>> + * Local variables:
>> + * mode: C
>> + * c-file-style: "BSD"
>> + * c-basic-offset: 4
>> + * indent-tabs-mode: nil
>> + * End:
>> + */
>> diff --git a/xen/include/asm-arm/cpregs.h b/xen/include/asm-arm/cpregs.h
>> index f08d59a..6d7d6ae 100644
>> --- a/xen/include/asm-arm/cpregs.h
>> +++ b/xen/include/asm-arm/cpregs.h
>> @@ -60,6 +60,14 @@
>>   * arguments, which are cp,opc1,crn,crm,opc2.
>>   */
>>  
>> +/* Coprocessor 10 */
>> +
>> +#define FPSCR           p10,7,c1,c0,0   /* Floating-Point Status and Control Register */
>> +#define MVFR0           p10,7,c7,c0,0   /* Media and VFP Feature Register 0 */
>> +#define FPEXC           p10,7,c8,c0,0   /* Floating-Point Exception Control Register */
>> +#define FPINST          p10,7,c9,c0,0   /* Floating-Point Instruction Register */
>> +#define FPINST2         p10,7,c10,c0,0  /* Floating-point Instruction Register 2 */
>> +
>>  /* Coprocessor 14 */
>>  
>>  /* CP14 CR0: */
>> @@ -106,6 +114,7 @@
>>  #define NSACR           p15,0,c1,c1,2   /* Non-Secure Access Control Register */
>>  #define HSCTLR          p15,4,c1,c0,0   /* Hyp. System Control Register */
>>  #define HCR             p15,4,c1,c1,0   /* Hyp. Configuration Register */
>> +#define HCPTR           p15,4,c1,c1,2   /* Hyp. Coprocessor Trap Register */
> 
> Is this intentionally unused? I don't mind adding unused register
> definitions, just wanted to check there wasn't a hunk missing or
> anything.


Another lost when I have created the patch. I was using it to trap VFP
when I tested the context switch.

> The register resets to 0x0 which is OK but it might be wise to trap all
> accesses the coprocessors which we haven't implemented ctx switching
> for? So at least we find out about missing ones instead of silently
> leaking information between guests and/or corrupting their state.


What about sending an undefined instruction to the guest if the
coprocessor is not implemented?
It seems that some software (sshd, ntpdate) which are using libcrypto,
are trying to access to the cryptographic coprocessor even if it doesn't
exist.

Cheers,
Ian Campbell May 31, 2013, 3:54 p.m. UTC | #3
On Fri, 2013-05-31 at 16:27 +0100, Julien Grall wrote:
> On 05/31/2013 03:12 PM, Ian Campbell wrote:
> 
> > On Thu, 2013-05-30 at 17:01 +0100, Julien Grall wrote:
> >> +void vfp_save_state(struct vcpu *v)
> >> +{
> >> +    uint32_t tmp;
> >> +
> >> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> > 
> > The docs seem to call for reading this via an explicit VMRS
> > instruction. 
> > 
> > Looking at the ARM ARM this seems to be an alias for the encoding of an
> > MRC instruction corresponding to reading FPEXC as you have done. Did you
> > have a reference for that aliasing? (I'm not finding it in the ARM ARM).
> 
> 
> I didn't find anything on the aliasing. I looked at the linux VFP
> include (arch/arm/include/asm/vfp.h).

I wonder if it would be better to do this via VFP specific macros?

Probably not.

> > Are you avoiding the mnemonic to avoid issues with binutils providing
> > the instruction?
> 
> This mnemonic can only be used if VFP is enabled by gcc. I think it's
> safer to use mrc if we don't want to use VFP on the other part of XEN.

Agreed.

> > Do you know what happens to the values of d0..d31  and FPSCR etc when
> 
> > FPEXC.EN is disabled? I'm wondering if we can avoid saving the registers
> > in that case, we'd still need to restore to prevent leaking the last
> > guests state if the VCPU enabled exceptions. I suppose it's not worth it
> > unless we implement a more full featured lazy saving regime.
> 
> 
> I didn't find something about the state of the registers when VFP is
> disabled. I think the registers is not clobbered.
> Linux seems to save vfp registers at each context switch only if VFP is
> enabled. But we cannot rely on that for the other distributions.

Linux likely traps if the process then goes on to use VFP, at which
point it can clear/restore the registers as necessary. We could do
something similar using HCPTR -- that's what I meant by lazy saving (and
restore).

> 
> >> +}
> >> +
> >> +void vfp_restore_state(struct vcpu *v)
> >> +{
> > 
> > Some of the same comments (tmps, barriers etc) apply to restore as to
> > save.
> > 
> > Is there any chance that any of these loads could cause an FP fault?
> > e.g. if the guest had an FP exception pending when we saved it, could
> > reloading the register retrigger it?
> 
> I don't know.

Hrm :-/

If there are no invalid encodings for d0..d31 then it should just be a
case of checking the ARM ARM for FPINST* and FPSCR.

> >> @@ -106,6 +114,7 @@
> >>  #define NSACR           p15,0,c1,c1,2   /* Non-Secure Access Control Register */
> >>  #define HSCTLR          p15,4,c1,c0,0   /* Hyp. System Control Register */
> >>  #define HCR             p15,4,c1,c1,0   /* Hyp. Configuration Register */
> >> +#define HCPTR           p15,4,c1,c1,2   /* Hyp. Coprocessor Trap Register */
> > 
> > Is this intentionally unused? I don't mind adding unused register
> > definitions, just wanted to check there wasn't a hunk missing or
> > anything.
> 
> 
> Another lost when I have created the patch. I was using it to trap VFP
> when I tested the context switch.

OK.

> 
> > The register resets to 0x0 which is OK but it might be wise to trap all
> > accesses the coprocessors which we haven't implemented ctx switching
> > for? So at least we find out about missing ones instead of silently
> > leaking information between guests and/or corrupting their state.
> 
> 
> What about sending an undefined instruction to the guest if the
> coprocessor is not implemented?
> It seems that some software (sshd, ntpdate) which are using libcrypto,
> are trying to access to the cryptographic coprocessor even if it doesn't
> exist.

Urk. We should either ctx switch it properly or we should hide it
properly (from all the feature registers) and inject undef.

Ian.
Julien Grall June 3, 2013, 11:10 a.m. UTC | #4
On 05/31/2013 03:12 PM, Ian Campbell wrote:

> On Thu, 2013-05-30 at 17:01 +0100, Julien Grall wrote:
>> +void vfp_save_state(struct vcpu *v)
>> +{
>> +    uint32_t tmp;
>> +
>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> 
> The docs seem to call for reading this via an explicit VMRS
> instruction. 
> 
> Looking at the ARM ARM this seems to be an alias for the encoding of an
> MRC instruction corresponding to reading FPEXC as you have done. Did you
> have a reference for that aliasing? (I'm not finding it in the ARM ARM).
> 
> Are you avoiding the mnemonic to avoid issues with binutils providing
> the instruction?
> 
>> +
>> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
> 
> This being a CP write, do we need an isb? Will this write complete
> before the following read from FPSCR otherwise?


In fact, isb seems to be unnecessary:
"Writes to the FPEXC can have side-effects on various aspects of
processor operation. All of these side-effects are
synchronous to the FPEXC write. This means they are guaranteed not to be
visible to earlier instructions in the
execution stream, and they are guaranteed to be visible to later
instructions in the execution stream."
Julien Grall June 3, 2013, 11:14 a.m. UTC | #5
On 05/31/2013 04:54 PM, Ian Campbell wrote:

> On Fri, 2013-05-31 at 16:27 +0100, Julien Grall wrote:
>> On 05/31/2013 03:12 PM, Ian Campbell wrote:
>>
>>> On Thu, 2013-05-30 at 17:01 +0100, Julien Grall wrote:
>>>> +void vfp_save_state(struct vcpu *v)
>>>> +{
>>>> +    uint32_t tmp;
>>>> +
>>>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
>>>
>>> The docs seem to call for reading this via an explicit VMRS
>>> instruction. 
>>>
>>> Looking at the ARM ARM this seems to be an alias for the encoding of an
>>> MRC instruction corresponding to reading FPEXC as you have done. Did you
>>> have a reference for that aliasing? (I'm not finding it in the ARM ARM).
>>
>>
>> I didn't find anything on the aliasing. I looked at the linux VFP
>> include (arch/arm/include/asm/vfp.h).
> 
> I wonder if it would be better to do this via VFP specific macros?


Except in this code we don't need the VFP macros. I don't see specific
reason to use it here.

> Probably not.
> 
>>> Are you avoiding the mnemonic to avoid issues with binutils providing
>>> the instruction?
>>
>> This mnemonic can only be used if VFP is enabled by gcc. I think it's
>> safer to use mrc if we don't want to use VFP on the other part of XEN.
> 
> Agreed.
> 
>>> Do you know what happens to the values of d0..d31  and FPSCR etc when
>>
>>> FPEXC.EN is disabled? I'm wondering if we can avoid saving the registers
>>> in that case, we'd still need to restore to prevent leaking the last
>>> guests state if the VCPU enabled exceptions. I suppose it's not worth it
>>> unless we implement a more full featured lazy saving regime.
>>
>>
>> I didn't find something about the state of the registers when VFP is
>> disabled. I think the registers is not clobbered.
>> Linux seems to save vfp registers at each context switch only if VFP is
>> enabled. But we cannot rely on that for the other distributions.
> 
> Linux likely traps if the process then goes on to use VFP, at which
> point it can clear/restore the registers as necessary. We could do
> something similar using HCPTR -- that's what I meant by lazy saving (and
> restore).

I will not have time before 4.3 to implement lazy context switch. I
think we can postpone this part and see later.

>>
>>>> +}
>>>> +
>>>> +void vfp_restore_state(struct vcpu *v)
>>>> +{
>>>
>>> Some of the same comments (tmps, barriers etc) apply to restore as to
>>> save.
>>>
>>> Is there any chance that any of these loads could cause an FP fault?
>>> e.g. if the guest had an FP exception pending when we saved it, could
>>> reloading the register retrigger it?
>>
>> I don't know.
> 
> Hrm :-/
> 
> If there are no invalid encodings for d0..d31 then it should just be a
> case of checking the ARM ARM for FPINST* and FPSCR.


For FPINST*:
"Any value read from a Floating-Point Instruction Register can be
written back to the same register. This means
context switch and debugger software can save and restore Floating-Point
Instruction Register values."

For FPSCR:
"Writes to the FPSCR can have side-effects on various aspects of
processor operation. All of these side-effects are
synchronous to the FPSCR write. This means they are guaranteed not to be
visible to earlier instructions in the
execution stream, and they are guaranteed to be visible to later
instructions in the execution stream."

Except if I missed something in FPSCR encoding, we are safe during the
restore step.

>>>> @@ -106,6 +114,7 @@
>>>>  #define NSACR           p15,0,c1,c1,2   /* Non-Secure Access Control Register */
>>>>  #define HSCTLR          p15,4,c1,c0,0   /* Hyp. System Control Register */
>>>>  #define HCR             p15,4,c1,c1,0   /* Hyp. Configuration Register */
>>>> +#define HCPTR           p15,4,c1,c1,2   /* Hyp. Coprocessor Trap Register */
>>>
>>> Is this intentionally unused? I don't mind adding unused register
>>> definitions, just wanted to check there wasn't a hunk missing or
>>> anything.
>>
>>
>> Another lost when I have created the patch. I was using it to trap VFP
>> when I tested the context switch.
> 
> OK.
> 
>>
>>> The register resets to 0x0 which is OK but it might be wise to trap all
>>> accesses the coprocessors which we haven't implemented ctx switching
>>> for? So at least we find out about missing ones instead of silently
>>> leaking information between guests and/or corrupting their state.
>>
>>
>> What about sending an undefined instruction to the guest if the
>> coprocessor is not implemented?
>> It seems that some software (sshd, ntpdate) which are using libcrypto,
>> are trying to access to the cryptographic coprocessor even if it doesn't
>> exist.
> 
> Urk. We should either ctx switch it properly or we should hide it
> properly (from all the feature registers) and inject undef.


I will try to write a patch for that when I will have time.
Ian Campbell June 3, 2013, 11:23 a.m. UTC | #6
On Mon, 2013-06-03 at 12:10 +0100, Julien Grall wrote:
> On 05/31/2013 03:12 PM, Ian Campbell wrote:
> 
> > On Thu, 2013-05-30 at 17:01 +0100, Julien Grall wrote:
> >> +void vfp_save_state(struct vcpu *v)
> >> +{
> >> +    uint32_t tmp;
> >> +
> >> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> > 
> > The docs seem to call for reading this via an explicit VMRS
> > instruction. 
> > 
> > Looking at the ARM ARM this seems to be an alias for the encoding of an
> > MRC instruction corresponding to reading FPEXC as you have done. Did you
> > have a reference for that aliasing? (I'm not finding it in the ARM ARM).
> > 
> > Are you avoiding the mnemonic to avoid issues with binutils providing
> > the instruction?
> > 
> >> +
> >> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
> > 
> > This being a CP write, do we need an isb? Will this write complete
> > before the following read from FPSCR otherwise?
> 
> 
> In fact, isb seems to be unnecessary:
> "Writes to the FPEXC can have side-effects on various aspects of
> processor operation. All of these side-effects are
> synchronous to the FPEXC write. This means they are guaranteed not to be
> visible to earlier instructions in the
> execution stream, and they are guaranteed to be visible to later
> instructions in the execution stream."

Excellent, thanks for tracking down those words ;-)

Ian.
Ian Campbell June 3, 2013, 11:25 a.m. UTC | #7
On Mon, 2013-06-03 at 12:14 +0100, Julien Grall wrote:
> On 05/31/2013 04:54 PM, Ian Campbell wrote:
> 
> > On Fri, 2013-05-31 at 16:27 +0100, Julien Grall wrote:
> >> On 05/31/2013 03:12 PM, Ian Campbell wrote:
> >>
> >>> On Thu, 2013-05-30 at 17:01 +0100, Julien Grall wrote:
> >>>> +void vfp_save_state(struct vcpu *v)
> >>>> +{
> >>>> +    uint32_t tmp;
> >>>> +
> >>>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> >>>
> >>> The docs seem to call for reading this via an explicit VMRS
> >>> instruction. 
> >>>
> >>> Looking at the ARM ARM this seems to be an alias for the encoding of an
> >>> MRC instruction corresponding to reading FPEXC as you have done. Did you
> >>> have a reference for that aliasing? (I'm not finding it in the ARM ARM).
> >>
> >>
> >> I didn't find anything on the aliasing. I looked at the linux VFP
> >> include (arch/arm/include/asm/vfp.h).
> > 
> > I wonder if it would be better to do this via VFP specific macros?
> 
> 
> Except in this code we don't need the VFP macros. I don't see specific
> reason to use it here.
> 
> > Probably not.

As you can see I agree ;-)

> > 
> >>> Are you avoiding the mnemonic to avoid issues with binutils providing
> >>> the instruction?
> >>
> >> This mnemonic can only be used if VFP is enabled by gcc. I think it's
> >> safer to use mrc if we don't want to use VFP on the other part of XEN.
> > 
> > Agreed.
> > 
> >>> Do you know what happens to the values of d0..d31  and FPSCR etc when
> >>
> >>> FPEXC.EN is disabled? I'm wondering if we can avoid saving the registers
> >>> in that case, we'd still need to restore to prevent leaking the last
> >>> guests state if the VCPU enabled exceptions. I suppose it's not worth it
> >>> unless we implement a more full featured lazy saving regime.
> >>
> >>
> >> I didn't find something about the state of the registers when VFP is
> >> disabled. I think the registers is not clobbered.
> >> Linux seems to save vfp registers at each context switch only if VFP is
> >> enabled. But we cannot rely on that for the other distributions.
> > 
> > Linux likely traps if the process then goes on to use VFP, at which
> > point it can clear/restore the registers as necessary. We could do
> > something similar using HCPTR -- that's what I meant by lazy saving (and
> > restore).
> 
> I will not have time before 4.3 to implement lazy context switch. I
> think we can postpone this part and see later.

Yes, sorry I should have been clearer -- not worth it until/if we
implement lazy switching post 4.3 is what I meant to say.

> 
> >>
> >>>> +}
> >>>> +
> >>>> +void vfp_restore_state(struct vcpu *v)
> >>>> +{
> >>>
> >>> Some of the same comments (tmps, barriers etc) apply to restore as to
> >>> save.
> >>>
> >>> Is there any chance that any of these loads could cause an FP fault?
> >>> e.g. if the guest had an FP exception pending when we saved it, could
> >>> reloading the register retrigger it?
> >>
> >> I don't know.
> > 
> > Hrm :-/
> > 
> > If there are no invalid encodings for d0..d31 then it should just be a
> > case of checking the ARM ARM for FPINST* and FPSCR.
> 
> 
> For FPINST*:
> "Any value read from a Floating-Point Instruction Register can be
> written back to the same register. This means
> context switch and debugger software can save and restore Floating-Point
> Instruction Register values."

Good.

> For FPSCR:
> "Writes to the FPSCR can have side-effects on various aspects of
> processor operation. All of these side-effects are
> synchronous to the FPSCR write. This means they are guaranteed not to be
> visible to earlier instructions in the
> execution stream, and they are guaranteed to be visible to later
> instructions in the execution stream."
> 
> Except if I missed something in FPSCR encoding, we are safe during the
> restore step.

Yes, sounds like it. Thanks.

> >>> The register resets to 0x0 which is OK but it might be wise to trap all
> >>> accesses the coprocessors which we haven't implemented ctx switching
> >>> for? So at least we find out about missing ones instead of silently
> >>> leaking information between guests and/or corrupting their state.
> >>
> >>
> >> What about sending an undefined instruction to the guest if the
> >> coprocessor is not implemented?
> >> It seems that some software (sshd, ntpdate) which are using libcrypto,
> >> are trying to access to the cryptographic coprocessor even if it doesn't
> >> exist.
> > 
> > Urk. We should either ctx switch it properly or we should hide it
> > properly (from all the feature registers) and inject undef.
> 
> 
> I will try to write a patch for that when I will have time.


Thank you.

Ian.
diff mbox

Patch

diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
index aaf277a..b903803 100644
--- a/xen/arch/arm/arm32/Makefile
+++ b/xen/arch/arm/arm32/Makefile
@@ -6,5 +6,6 @@  obj-y += proc-ca15.o
 
 obj-y += traps.o
 obj-y += domain.o
+obj-y += vfp.o
 
 obj-$(EARLY_PRINTK) += debug.o
diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
new file mode 100644
index 0000000..16f635a
--- /dev/null
+++ b/xen/arch/arm/arm32/vfp.c
@@ -0,0 +1,71 @@ 
+#include <xen/sched.h>
+#include <asm/processor.h>
+#include <asm/vfp.h>
+
+void vfp_save_state(struct vcpu *v)
+{
+    uint32_t tmp;
+
+    v->arch.vfp.fpexc = READ_CP32(FPEXC);
+
+    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
+
+    v->arch.vfp.fpscr = READ_CP32(FPSCR);
+
+    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
+    {
+        v->arch.vfp.fpinst = READ_CP32(FPINST);
+
+        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
+            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
+        /* Disable FPEXC_EX */
+        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
+    }
+
+    /* Save {d0-d15} */
+    asm volatile("stc p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs1));
+
+    tmp = READ_CP32(MVFR0);
+    if ( (tmp & MVFR0_A_SIMD_MASK) == 2 ) /* 32 x 64 bits registers */
+    {
+        /* Save {d16-d31} */
+        asm volatile("stcl p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs2));
+    }
+
+    WRITE_CP32(v->arch.vfp.fpexc & ~(FPEXC_EN), FPEXC);
+}
+
+void vfp_restore_state(struct vcpu *v)
+{
+    uint32_t tmp = READ_CP32(FPEXC);
+
+    WRITE_CP32(tmp | FPEXC_EN, FPEXC);
+
+    /* Restore {d0-d15} */
+    asm volatile("ldc p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs1));
+
+    tmp = READ_CP32(MVFR0);
+    if ( (tmp & MVFR0_A_SIMD_MASK) == 2 ) /* 32 x 64 bits registers */
+        /* Restore {d16-d31} */
+        asm volatile("ldcl p11, cr0, [%0], #32*4" : : "r" (v->arch.vfp.fpregs2));
+
+    if ( v->arch.vfp.fpexc & FPEXC_EX )
+    {
+        WRITE_CP32(v->arch.vfp.fpinst, FPINST);
+        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
+            WRITE_CP32(v->arch.vfp.fpinst2, FPINST2);
+    }
+
+    WRITE_CP32(v->arch.vfp.fpscr, FPSCR);
+
+    WRITE_CP32(v->arch.vfp.fpexc, FPEXC);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
index 9484548..e06a0a9 100644
--- a/xen/arch/arm/arm64/Makefile
+++ b/xen/arch/arm/arm64/Makefile
@@ -5,5 +5,6 @@  obj-y += mode_switch.o
 
 obj-y += traps.o
 obj-y += domain.o
+obj-y += vfp.o
 
 obj-$(EARLY_PRINTK) += debug.o
diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
new file mode 100644
index 0000000..74e6a50
--- /dev/null
+++ b/xen/arch/arm/arm64/vfp.c
@@ -0,0 +1,13 @@ 
+#include <xen/sched.h>
+#include <asm/processor.h>
+#include <asm/vfp.h>
+
+void vfp_save_state(struct vcpu *v)
+{
+    /* TODO: implement it */
+}
+
+void vfp_restore_state(struct vcpu *v)
+{
+    /* TODO: implement it */
+}
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 4c434a1..f465ab7 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -27,6 +27,7 @@ 
 #include <asm/p2m.h>
 #include <asm/irq.h>
 #include <asm/cpufeature.h>
+#include <asm/vfp.h>
 
 #include <asm/gic.h>
 #include "vtimer.h"
@@ -117,7 +118,8 @@  static void ctxt_switch_from(struct vcpu *p)
 
     /* XXX MPU */
 
-    /* XXX VFP */
+    /* VFP */
+    vfp_save_state(p);
 
     /* VGIC */
     gic_save_state(p);
@@ -143,7 +145,8 @@  static void ctxt_switch_to(struct vcpu *n)
     /* VGIC */
     gic_restore_state(n);
 
-    /* XXX VFP */
+    /* VFP */
+    vfp_restore_state(n);
 
     /* XXX MPU */
 
diff --git a/xen/include/asm-arm/arm32/vfp.h b/xen/include/asm-arm/arm32/vfp.h
new file mode 100644
index 0000000..c32296e
--- /dev/null
+++ b/xen/include/asm-arm/arm32/vfp.h
@@ -0,0 +1,29 @@ 
+#ifndef _ARM_ARM32_VFP_H
+#define _ARM_ARM32_VFP_H
+
+#define FPEXC_EX                (1u << 31)
+#define FPEXC_EN                (1u << 30)
+#define FPEXC_FP2V              (1u << 28)
+
+#define MVFR0_A_SIMD_MASK       (0xf << 0)
+
+struct vfp_state
+{
+    uint64_t fpregs1[16]; /* {d0-d15} */
+    uint64_t fpregs2[16]; /* {d16-d31} */
+    uint32_t fpexc;
+    uint32_t fpscr;
+    /* VFP implementation specific state */
+    uint32_t fpinst;
+    uint32_t fpinst2;
+};
+
+#endif /* _ARM_ARM32_VFP_H */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-arm/arm64/vfp.h b/xen/include/asm-arm/arm64/vfp.h
new file mode 100644
index 0000000..3733d2c
--- /dev/null
+++ b/xen/include/asm-arm/arm64/vfp.h
@@ -0,0 +1,16 @@ 
+#ifndef _ARM_ARM64_VFP_H
+#define _ARM_ARM64_VFP_H
+
+struct vfp_state
+{
+};
+
+#endif /* _ARM_ARM64_VFP_H */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-arm/cpregs.h b/xen/include/asm-arm/cpregs.h
index f08d59a..6d7d6ae 100644
--- a/xen/include/asm-arm/cpregs.h
+++ b/xen/include/asm-arm/cpregs.h
@@ -60,6 +60,14 @@ 
  * arguments, which are cp,opc1,crn,crm,opc2.
  */
 
+/* Coprocessor 10 */
+
+#define FPSCR           p10,7,c1,c0,0   /* Floating-Point Status and Control Register */
+#define MVFR0           p10,7,c7,c0,0   /* Media and VFP Feature Register 0 */
+#define FPEXC           p10,7,c8,c0,0   /* Floating-Point Exception Control Register */
+#define FPINST          p10,7,c9,c0,0   /* Floating-Point Instruction Register */
+#define FPINST2         p10,7,c10,c0,0  /* Floating-point Instruction Register 2 */
+
 /* Coprocessor 14 */
 
 /* CP14 CR0: */
@@ -106,6 +114,7 @@ 
 #define NSACR           p15,0,c1,c1,2   /* Non-Secure Access Control Register */
 #define HSCTLR          p15,4,c1,c0,0   /* Hyp. System Control Register */
 #define HCR             p15,4,c1,c1,0   /* Hyp. Configuration Register */
+#define HCPTR           p15,4,c1,c1,2   /* Hyp. Coprocessor Trap Register */
 
 /* CP15 CR2: Translation Table Base and Control Registers */
 #define TTBCR           p15,0,c2,c0,2   /* Translatation Table Base Control Register */
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index cb251cc..339b6e6 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -6,6 +6,7 @@ 
 #include <xen/sched.h>
 #include <asm/page.h>
 #include <asm/p2m.h>
+#include <asm/vfp.h>
 #include <public/hvm/params.h>
 
 /* Represents state corresponding to a block of 32 interrupts */
@@ -188,6 +189,9 @@  struct arch_vcpu
     uint32_t joscr, jmcr;
 #endif
 
+    /* Float-pointer */
+    struct vfp_state vfp;
+
     /* CP 15 */
     uint32_t csselr;
 
diff --git a/xen/include/asm-arm/vfp.h b/xen/include/asm-arm/vfp.h
new file mode 100644
index 0000000..39cb9c1
--- /dev/null
+++ b/xen/include/asm-arm/vfp.h
@@ -0,0 +1,25 @@ 
+#ifndef _ASM_VFP_H
+#define _ASM_VFP_H
+
+#include <xen/sched.h>
+
+#if defined(CONFIG_ARM_32)
+# include <asm/arm32/vfp.h>
+#elif defined(COFNIG_ARM_64)
+# include <asm/arm64/vfp.h>
+#else
+# error "Unknown ARM variant"
+#endif
+
+void vfp_save_state(struct vcpu *v);
+void vfp_restore_state(struct vcpu *v);
+
+#endif /* _ASM_VFP_H */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */