diff mbox

[3/3] xen/arm: errata 766422: decode thumb store during data abort

Message ID 1374602713-716-4-git-send-email-julien.grall@linaro.org
State Superseded, archived
Headers show

Commit Message

Julien Grall July 23, 2013, 6:05 p.m. UTC
From the errata document:

When a non-secure non-hypervisor memory operation instruction generates a
stage2 page table translation fault, a trap to the hypervisor will be triggered.
For an architecturally defined subset of instructions, the Hypervisor Syndrome
Register (HSR) will have the Instruction Syndrome Valid (ISV) bit set to 1’b1,
and the Rt field should reflect the source register (for stores) or destination
register for loads.
On Cortex-A15, for Thumb and ThumbEE stores, the Rt value may be incorrect
and should not be used, even if the ISV bit is set. All loads, and all ARM
instruction set loads and stores, will have the correct Rt value if the ISV
bit is set.

To avoid this issue, Xen needs to decode thumb store instruction and update
the transfer register.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
---
 xen/arch/arm/traps.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

Comments

Ian Campbell July 23, 2013, 6:18 p.m. UTC | #1
On Tue, 2013-07-23 at 19:05 +0100, Julien Grall wrote:
> From the errata document:
> 
> When a non-secure non-hypervisor memory operation instruction generates a
> stage2 page table translation fault, a trap to the hypervisor will be triggered.
> For an architecturally defined subset of instructions, the Hypervisor Syndrome
> Register (HSR) will have the Instruction Syndrome Valid (ISV) bit set to 1’b1,
> and the Rt field should reflect the source register (for stores) or destination
> register for loads.
> On Cortex-A15, for Thumb and ThumbEE stores, the Rt value may be incorrect
> and should not be used, even if the ISV bit is set. All loads, and all ARM
> instruction set loads and stores, will have the correct Rt value if the ISV
> bit is set.
> 
> To avoid this issue, Xen needs to decode thumb store instruction and update
> the transfer register.
> 
> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> ---
>  xen/arch/arm/traps.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 47 insertions(+)
> 
> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> index d6dc37d..da2bef6 100644
> --- a/xen/arch/arm/traps.c
> +++ b/xen/arch/arm/traps.c
> @@ -35,6 +35,7 @@
>  #include <asm/regs.h>
>  #include <asm/cpregs.h>
>  #include <asm/psci.h>
> +#include <asm/guest_access.h>
>  
>  #include "io.h"
>  #include "vtimer.h"
> @@ -996,6 +997,31 @@ done:
>      if (first) unmap_domain_page(first);
>  }
>  
> +static int read_instruction(struct cpu_user_regs *regs, unsigned len,
> +                            uint32_t *instr)
> +{
> +    int rc;
> +
> +    rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);
> +
> +    if ( rc )
> +        return rc;
> +
> +    switch ( len )
> +    {
> +    /* 16-bit instruction */
> +    case 2:
> +        *instr &= 0xffff;
> +        break;
> +    /* 32-bit instruction */
> +    case 4:
> +        *instr = (*instr & 0xffff) << 16 | (*instr & 0xffff0000) >> 16;

Since you only ever consult bits 12..15 or 0..3 of the result couldn't
you just read two bytes from pc+2 instead of reading 4 bytes and
swapping etc?

> +        break;
> +    }
> +
> +    return 0;
> +}
> +
>  static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
>                                       struct hsr_dabt dabt)
>  {
> @@ -1021,6 +1047,27 @@ static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
>      if ( !dabt.valid )
>          goto bad_data_abort;
>  
> +    /*
> +     * Errata 766422: Thumb store translation fault to Hypervisor may
> +     * not have correct HSR Rt value.
> +     */
> +    if ( (regs->cpsr & PSR_THUMB) && dabt.write )

Is there some way to more precisely identify the processors with this
errata? It'd be better to avoid this rigmarole when we can...

I'd think about implementing this as a pseudo-cpuinfo thing setup either
via identify_cpu or perhaps via a processor callback in proc-v7.S and
friends. Then you can define and use something like cpu_has_errata766422
in the conditional and force it to zero for arm64 builds.


> +    {
> +        uint32_t instr = 0;
> +
> +        rc = read_instruction(regs, dabt.len ? 4 : 2, &instr);

You might as well just pass dabt.len directly I think, since you just
decode it again with a switch.

> +        if ( rc )
> +            goto bad_data_abort;
> +
> +        /* Retrieve the transfer register from the instruction */
> +        if ( dabt.len )
> +            /* With 32-bit store instruction, the register is in [12..15] */
> +            info.dabt.reg = (instr & 0xf000) >> 12;
> +        else
> +            /* With 16-bit store instruction, the register is in [0..3] */
> +            info.dabt.reg = instr & 0x7;
> +    }
> +
>      if (handle_mmio(&info))
>      {
>          regs->pc += dabt.len ? 4 : 2;
Julien Grall July 23, 2013, 9:41 p.m. UTC | #2
On 23 July 2013 19:18, Ian Campbell <ian.campbell@citrix.com> wrote:
> On Tue, 2013-07-23 at 19:05 +0100, Julien Grall wrote:
>> From the errata document:
>>
>> When a non-secure non-hypervisor memory operation instruction generates a
>> stage2 page table translation fault, a trap to the hypervisor will be triggered.
>> For an architecturally defined subset of instructions, the Hypervisor Syndrome
>> Register (HSR) will have the Instruction Syndrome Valid (ISV) bit set to 1’b1,
>> and the Rt field should reflect the source register (for stores) or destination
>> register for loads.
>> On Cortex-A15, for Thumb and ThumbEE stores, the Rt value may be incorrect
>> and should not be used, even if the ISV bit is set. All loads, and all ARM
>> instruction set loads and stores, will have the correct Rt value if the ISV
>> bit is set.
>>
>> To avoid this issue, Xen needs to decode thumb store instruction and update
>> the transfer register.
>>
>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>> ---
>>  xen/arch/arm/traps.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 47 insertions(+)
>>
>> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
>> index d6dc37d..da2bef6 100644
>> --- a/xen/arch/arm/traps.c
>> +++ b/xen/arch/arm/traps.c
>> @@ -35,6 +35,7 @@
>>  #include <asm/regs.h>
>>  #include <asm/cpregs.h>
>>  #include <asm/psci.h>
>> +#include <asm/guest_access.h>
>>
>>  #include "io.h"
>>  #include "vtimer.h"
>> @@ -996,6 +997,31 @@ done:
>>      if (first) unmap_domain_page(first);
>>  }
>>
>> +static int read_instruction(struct cpu_user_regs *regs, unsigned len,
>> +                            uint32_t *instr)
>> +{
>> +    int rc;
>> +
>> +    rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);
>> +
>> +    if ( rc )
>> +        return rc;
>> +
>> +    switch ( len )
>> +    {
>> +    /* 16-bit instruction */
>> +    case 2:
>> +        *instr &= 0xffff;
>> +        break;
>> +    /* 32-bit instruction */
>> +    case 4:
>> +        *instr = (*instr & 0xffff) << 16 | (*instr & 0xffff0000) >> 16;
>
> Since you only ever consult bits 12..15 or 0..3 of the result couldn't
> you just read two bytes from pc+2 instead of reading 4 bytes and
> swapping etc?

I was thinking to provide a "high level" function to retrieve an
instruction just in case we need it in the future.

>> +        break;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>  static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
>>                                       struct hsr_dabt dabt)
>>  {
>> @@ -1021,6 +1047,27 @@ static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
>>      if ( !dabt.valid )
>>          goto bad_data_abort;
>>
>> +    /*
>> +     * Errata 766422: Thumb store translation fault to Hypervisor may
>> +     * not have correct HSR Rt value.
>> +     */
>> +    if ( (regs->cpsr & PSR_THUMB) && dabt.write )
>
> Is there some way to more precisely identify the processors with this
> errata? It'd be better to avoid this rigmarole when we can...

Only r0p4 (for instance the arndale board) should be impacted by this errata.

> I'd think about implementing this as a pseudo-cpuinfo thing setup either
> via identify_cpu or perhaps via a processor callback in proc-v7.S and
> friends. Then you can define and use something like cpu_has_errata766422
> in the conditional and force it to zero for arm64 builds.

Sounds good. I will rework the patch with this solution.

>> +    {
>> +        uint32_t instr = 0;
>> +
>> +        rc = read_instruction(regs, dabt.len ? 4 : 2, &instr);
>
> You might as well just pass dabt.len directly I think, since you just
> decode it again with a switch.

If a high-level helper to read an instruction is not needed, I will directly
mix this function with the decode part below.

>> +        if ( rc )
>> +            goto bad_data_abort;
>> +
>> +        /* Retrieve the transfer register from the instruction */
>> +        if ( dabt.len )
>> +            /* With 32-bit store instruction, the register is in [12..15] */
>> +            info.dabt.reg = (instr & 0xf000) >> 12;
>> +        else
>> +            /* With 16-bit store instruction, the register is in [0..3] */
>> +            info.dabt.reg = instr & 0x7;
>> +    }
>> +
>>      if (handle_mmio(&info))
>>      {
>>          regs->pc += dabt.len ? 4 : 2;
>
>



--
Julien Grall
Ian Campbell July 23, 2013, 10:16 p.m. UTC | #3
On Tue, 2013-07-23 at 22:41 +0100, Julien Grall wrote:
> On 23 July 2013 19:18, Ian Campbell <ian.campbell@citrix.com> wrote:
> > On Tue, 2013-07-23 at 19:05 +0100, Julien Grall wrote:
> >> From the errata document:
> >>
> >> When a non-secure non-hypervisor memory operation instruction generates a
> >> stage2 page table translation fault, a trap to the hypervisor will be triggered.
> >> For an architecturally defined subset of instructions, the Hypervisor Syndrome
> >> Register (HSR) will have the Instruction Syndrome Valid (ISV) bit set to 1’b1,
> >> and the Rt field should reflect the source register (for stores) or destination
> >> register for loads.
> >> On Cortex-A15, for Thumb and ThumbEE stores, the Rt value may be incorrect
> >> and should not be used, even if the ISV bit is set. All loads, and all ARM
> >> instruction set loads and stores, will have the correct Rt value if the ISV
> >> bit is set.
> >>
> >> To avoid this issue, Xen needs to decode thumb store instruction and update
> >> the transfer register.
> >>
> >> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> >> ---
> >>  xen/arch/arm/traps.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 47 insertions(+)
> >>
> >> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> >> index d6dc37d..da2bef6 100644
> >> --- a/xen/arch/arm/traps.c
> >> +++ b/xen/arch/arm/traps.c
> >> @@ -35,6 +35,7 @@
> >>  #include <asm/regs.h>
> >>  #include <asm/cpregs.h>
> >>  #include <asm/psci.h>
> >> +#include <asm/guest_access.h>
> >>
> >>  #include "io.h"
> >>  #include "vtimer.h"
> >> @@ -996,6 +997,31 @@ done:
> >>      if (first) unmap_domain_page(first);
> >>  }
> >>
> >> +static int read_instruction(struct cpu_user_regs *regs, unsigned len,
> >> +                            uint32_t *instr)
> >> +{
> >> +    int rc;
> >> +
> >> +    rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);
> >> +
> >> +    if ( rc )
> >> +        return rc;
> >> +
> >> +    switch ( len )
> >> +    {
> >> +    /* 16-bit instruction */
> >> +    case 2:
> >> +        *instr &= 0xffff;
> >> +        break;
> >> +    /* 32-bit instruction */
> >> +    case 4:
> >> +        *instr = (*instr & 0xffff) << 16 | (*instr & 0xffff0000) >> 16;
> >
> > Since you only ever consult bits 12..15 or 0..3 of the result couldn't
> > you just read two bytes from pc+2 instead of reading 4 bytes and
> > swapping etc?
> 
> I was thinking to provide a "high level" function to retrieve an
> instruction just in case we need it in the future.

I suppose that does sound potentially useful.

Were does this the idea of swapping them come from though? The ARM ARM
seems (see e.g. section A6.3) to describe things in the order they are
in memory -- is doing otherwise not going to end up confusing us?

[...]
> > I'd think about implementing this as a pseudo-cpuinfo thing setup either
> > via identify_cpu or perhaps via a processor callback in proc-v7.S and
> > friends. Then you can define and use something like cpu_has_errata766422
> > in the conditional and force it to zero for arm64 builds.
> 
> Sounds good. I will rework the patch with this solution.

Thanks.

Ian.
Julien Grall July 23, 2013, 10:43 p.m. UTC | #4
On 23 July 2013 23:16, Ian Campbell <ian.campbell@citrix.com> wrote:
> On Tue, 2013-07-23 at 22:41 +0100, Julien Grall wrote:
>> On 23 July 2013 19:18, Ian Campbell <ian.campbell@citrix.com> wrote:
>> > On Tue, 2013-07-23 at 19:05 +0100, Julien Grall wrote:
>> >> From the errata document:
>> >>
>> >> When a non-secure non-hypervisor memory operation instruction generates a
>> >> stage2 page table translation fault, a trap to the hypervisor will be triggered.
>> >> For an architecturally defined subset of instructions, the Hypervisor Syndrome
>> >> Register (HSR) will have the Instruction Syndrome Valid (ISV) bit set to 1’b1,
>> >> and the Rt field should reflect the source register (for stores) or destination
>> >> register for loads.
>> >> On Cortex-A15, for Thumb and ThumbEE stores, the Rt value may be incorrect
>> >> and should not be used, even if the ISV bit is set. All loads, and all ARM
>> >> instruction set loads and stores, will have the correct Rt value if the ISV
>> >> bit is set.
>> >>
>> >> To avoid this issue, Xen needs to decode thumb store instruction and update
>> >> the transfer register.
>> >>
>> >> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>> >> ---
>> >>  xen/arch/arm/traps.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
>> >>  1 file changed, 47 insertions(+)
>> >>
>> >> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
>> >> index d6dc37d..da2bef6 100644
>> >> --- a/xen/arch/arm/traps.c
>> >> +++ b/xen/arch/arm/traps.c
>> >> @@ -35,6 +35,7 @@
>> >>  #include <asm/regs.h>
>> >>  #include <asm/cpregs.h>
>> >>  #include <asm/psci.h>
>> >> +#include <asm/guest_access.h>
>> >>
>> >>  #include "io.h"
>> >>  #include "vtimer.h"
>> >> @@ -996,6 +997,31 @@ done:
>> >>      if (first) unmap_domain_page(first);
>> >>  }
>> >>
>> >> +static int read_instruction(struct cpu_user_regs *regs, unsigned len,
>> >> +                            uint32_t *instr)
>> >> +{
>> >> +    int rc;
>> >> +
>> >> +    rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);
>> >> +
>> >> +    if ( rc )
>> >> +        return rc;
>> >> +
>> >> +    switch ( len )
>> >> +    {
>> >> +    /* 16-bit instruction */
>> >> +    case 2:
>> >> +        *instr &= 0xffff;
>> >> +        break;
>> >> +    /* 32-bit instruction */
>> >> +    case 4:
>> >> +        *instr = (*instr & 0xffff) << 16 | (*instr & 0xffff0000) >> 16;
>> >
>> > Since you only ever consult bits 12..15 or 0..3 of the result couldn't
>> > you just read two bytes from pc+2 instead of reading 4 bytes and
>> > swapping etc?
>>
>> I was thinking to provide a "high level" function to retrieve an
>> instruction just in case we need it in the future.
>
> I suppose that does sound potentially useful.
>
> Were does this the idea of swapping them come from though? The ARM ARM
> seems (see e.g. section A6.3) to describe things in the order they are
> in memory -- is doing otherwise not going to end up confusing us?

In THUMB set, a 32-bit instruction consisting of two consecutive
halfwords (see section A6.1).
So the instruction is in "big endian", but Xen will read the word as a
"little endian" things.

But in ARM set, a 32-bit instruction is a single word. I will add a
check to handle this case.
Ian Campbell July 24, 2013, 2:55 p.m. UTC | #5
On Tue, 2013-07-23 at 23:43 +0100, Julien Grall wrote:
> On 23 July 2013 23:16, Ian Campbell <ian.campbell@citrix.com> wrote:
> > On Tue, 2013-07-23 at 22:41 +0100, Julien Grall wrote:
> >> On 23 July 2013 19:18, Ian Campbell <ian.campbell@citrix.com> wrote:
> >> > On Tue, 2013-07-23 at 19:05 +0100, Julien Grall wrote:
> >> >> From the errata document:
> >> >>
> >> >> When a non-secure non-hypervisor memory operation instruction generates a
> >> >> stage2 page table translation fault, a trap to the hypervisor will be triggered.
> >> >> For an architecturally defined subset of instructions, the Hypervisor Syndrome
> >> >> Register (HSR) will have the Instruction Syndrome Valid (ISV) bit set to 1’b1,
> >> >> and the Rt field should reflect the source register (for stores) or destination
> >> >> register for loads.
> >> >> On Cortex-A15, for Thumb and ThumbEE stores, the Rt value may be incorrect
> >> >> and should not be used, even if the ISV bit is set. All loads, and all ARM
> >> >> instruction set loads and stores, will have the correct Rt value if the ISV
> >> >> bit is set.
> >> >>
> >> >> To avoid this issue, Xen needs to decode thumb store instruction and update
> >> >> the transfer register.
> >> >>
> >> >> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> >> >> ---
> >> >>  xen/arch/arm/traps.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
> >> >>  1 file changed, 47 insertions(+)
> >> >>
> >> >> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> >> >> index d6dc37d..da2bef6 100644
> >> >> --- a/xen/arch/arm/traps.c
> >> >> +++ b/xen/arch/arm/traps.c
> >> >> @@ -35,6 +35,7 @@
> >> >>  #include <asm/regs.h>
> >> >>  #include <asm/cpregs.h>
> >> >>  #include <asm/psci.h>
> >> >> +#include <asm/guest_access.h>
> >> >>
> >> >>  #include "io.h"
> >> >>  #include "vtimer.h"
> >> >> @@ -996,6 +997,31 @@ done:
> >> >>      if (first) unmap_domain_page(first);
> >> >>  }
> >> >>
> >> >> +static int read_instruction(struct cpu_user_regs *regs, unsigned len,
> >> >> +                            uint32_t *instr)
> >> >> +{
> >> >> +    int rc;
> >> >> +
> >> >> +    rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);
> >> >> +
> >> >> +    if ( rc )
> >> >> +        return rc;
> >> >> +
> >> >> +    switch ( len )
> >> >> +    {
> >> >> +    /* 16-bit instruction */
> >> >> +    case 2:
> >> >> +        *instr &= 0xffff;
> >> >> +        break;
> >> >> +    /* 32-bit instruction */
> >> >> +    case 4:
> >> >> +        *instr = (*instr & 0xffff) << 16 | (*instr & 0xffff0000) >> 16;
> >> >
> >> > Since you only ever consult bits 12..15 or 0..3 of the result couldn't
> >> > you just read two bytes from pc+2 instead of reading 4 bytes and
> >> > swapping etc?
> >>
> >> I was thinking to provide a "high level" function to retrieve an
> >> instruction just in case we need it in the future.
> >
> > I suppose that does sound potentially useful.
> >
> > Were does this the idea of swapping them come from though? The ARM ARM
> > seems (see e.g. section A6.3) to describe things in the order they are
> > in memory -- is doing otherwise not going to end up confusing us?
> 
> In THUMB set, a 32-bit instruction consisting of two consecutive
> halfwords (see section A6.1).
> So the instruction is in "big endian", but Xen will read the word as a
> "little endian" things.

Are you saying that the 16-bit sub-words are in the opposite order to
what I would have expected and what seems most natural from a decode
PoV?

Consider the T3 encoding of STR (imm, thumb) from A8.8.203, which is:
        1 1 1 1 1 0 0 0 1 1 0 0 Rn | Rt imm12

(where Rn == bits 0..3 of the first halfword, Rt is 15..12 of the second
and imm12 is the remainder of the second halfword).

I would have expected that the halfword with the "11111" pattern (which
identifies this as a 32-bit thumb instruction) would have to come first,
so the decode knows to look for the second. IOW the halfword with 11111
should be at PC and the Rt/imm12 halfword should be at PC+2. So if we
copy 4 bytes from guest PC we should end up with things in the order
given above (and in the manual) and swapping shouldn't be needed.
Perhaps I need to go have some coffee...

Have you tested and actually observed this case on real h/w?

> 
> But in ARM set, a 32-bit instruction is a single word. I will add a
> check to handle this case.
>
Julien Grall July 24, 2013, 3:19 p.m. UTC | #6
On 07/24/2013 03:55 PM, Ian Campbell wrote:
> On Tue, 2013-07-23 at 23:43 +0100, Julien Grall wrote:
>> On 23 July 2013 23:16, Ian Campbell <ian.campbell@citrix.com> wrote:
>>> On Tue, 2013-07-23 at 22:41 +0100, Julien Grall wrote:
>>>> On 23 July 2013 19:18, Ian Campbell <ian.campbell@citrix.com> wrote:
>>>>> On Tue, 2013-07-23 at 19:05 +0100, Julien Grall wrote:
>>>>>> From the errata document:
>>>>>>
>>>>>> When a non-secure non-hypervisor memory operation instruction generates a
>>>>>> stage2 page table translation fault, a trap to the hypervisor will be triggered.
>>>>>> For an architecturally defined subset of instructions, the Hypervisor Syndrome
>>>>>> Register (HSR) will have the Instruction Syndrome Valid (ISV) bit set to 1’b1,
>>>>>> and the Rt field should reflect the source register (for stores) or destination
>>>>>> register for loads.
>>>>>> On Cortex-A15, for Thumb and ThumbEE stores, the Rt value may be incorrect
>>>>>> and should not be used, even if the ISV bit is set. All loads, and all ARM
>>>>>> instruction set loads and stores, will have the correct Rt value if the ISV
>>>>>> bit is set.
>>>>>>
>>>>>> To avoid this issue, Xen needs to decode thumb store instruction and update
>>>>>> the transfer register.
>>>>>>
>>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>>>>> ---
>>>>>>  xen/arch/arm/traps.c |   47 +++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  1 file changed, 47 insertions(+)
>>>>>>
>>>>>> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
>>>>>> index d6dc37d..da2bef6 100644
>>>>>> --- a/xen/arch/arm/traps.c
>>>>>> +++ b/xen/arch/arm/traps.c
>>>>>> @@ -35,6 +35,7 @@
>>>>>>  #include <asm/regs.h>
>>>>>>  #include <asm/cpregs.h>
>>>>>>  #include <asm/psci.h>
>>>>>> +#include <asm/guest_access.h>
>>>>>>
>>>>>>  #include "io.h"
>>>>>>  #include "vtimer.h"
>>>>>> @@ -996,6 +997,31 @@ done:
>>>>>>      if (first) unmap_domain_page(first);
>>>>>>  }
>>>>>>
>>>>>> +static int read_instruction(struct cpu_user_regs *regs, unsigned len,
>>>>>> +                            uint32_t *instr)
>>>>>> +{
>>>>>> +    int rc;
>>>>>> +
>>>>>> +    rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);
>>>>>> +
>>>>>> +    if ( rc )
>>>>>> +        return rc;
>>>>>> +
>>>>>> +    switch ( len )
>>>>>> +    {
>>>>>> +    /* 16-bit instruction */
>>>>>> +    case 2:
>>>>>> +        *instr &= 0xffff;
>>>>>> +        break;
>>>>>> +    /* 32-bit instruction */
>>>>>> +    case 4:
>>>>>> +        *instr = (*instr & 0xffff) << 16 | (*instr & 0xffff0000) >> 16;
>>>>>
>>>>> Since you only ever consult bits 12..15 or 0..3 of the result couldn't
>>>>> you just read two bytes from pc+2 instead of reading 4 bytes and
>>>>> swapping etc?
>>>>
>>>> I was thinking to provide a "high level" function to retrieve an
>>>> instruction just in case we need it in the future.
>>>
>>> I suppose that does sound potentially useful.
>>>
>>> Were does this the idea of swapping them come from though? The ARM ARM
>>> seems (see e.g. section A6.3) to describe things in the order they are
>>> in memory -- is doing otherwise not going to end up confusing us?
>>
>> In THUMB set, a 32-bit instruction consisting of two consecutive
>> halfwords (see section A6.1).
>> So the instruction is in "big endian", but Xen will read the word as a
>> "little endian" things.
> 
> Are you saying that the 16-bit sub-words are in the opposite order to
> what I would have expected and what seems most natural from a decode
> PoV?

> Consider the T3 encoding of STR (imm, thumb) from A8.8.203, which is:
>         1 1 1 1 1 0 0 0 1 1 0 0 Rn | Rt imm12
> 
> (where Rn == bits 0..3 of the first halfword, Rt is 15..12 of the second
> and imm12 is the remainder of the second halfword).
> 
> I would have expected that the halfword with the "11111" pattern (which
> identifies this as a 32-bit thumb instruction) would have to come first,
> so the decode knows to look for the second. IOW the halfword with 11111
> should be at PC and the Rt/imm12 halfword should be at PC+2. So if we
> copy 4 bytes from guest PC we should end up with things in the order
> given above (and in the manual) and swapping shouldn't be needed.
> Perhaps I need to go have some coffee...

The ARM ARM specification describes a THUMB 32 bit instruction with
   HW1 HW2

HW1 => [31:16] => most significant
HW2 => [15:0] => less significant

raw_copy_from_copy will directly read 4 bytes and store in an uint32_t.
As Xen is running in little endian, it ends up in:
HW2 => [31:16]
HW1 => [15:0]

Which is false. If it's more clear, I can do something like this

uint16_t a[2];
rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);

...

switch ( len )
{
...
case 4:
   instr = a[0] << 16 | a[1];
...
}

> Have you tested and actually observed this case on real h/w?

I tried on the arndale board.
Tim Deegan July 29, 2013, 2:46 p.m. UTC | #7
Hi,

At 16:19 +0100 on 24 Jul (1374682769), Julien Grall wrote:
> On 07/24/2013 03:55 PM, Ian Campbell wrote:
> > On Tue, 2013-07-23 at 23:43 +0100, Julien Grall wrote:
> >> On 23 July 2013 23:16, Ian Campbell <ian.campbell@citrix.com> wrote:
> >>> Were does this the idea of swapping them come from though? The ARM ARM
> >>> seems (see e.g. section A6.3) to describe things in the order they are
> >>> in memory -- is doing otherwise not going to end up confusing us?
> >>
> >> In THUMB set, a 32-bit instruction consisting of two consecutive
> >> halfwords (see section A6.1).
> >> So the instruction is in "big endian", but Xen will read the word as a
> >> "little endian" things.
> > 
> > Are you saying that the 16-bit sub-words are in the opposite order to
> > what I would have expected and what seems most natural from a decode
> > PoV?
> 
> > Consider the T3 encoding of STR (imm, thumb) from A8.8.203, which is:
> >         1 1 1 1 1 0 0 0 1 1 0 0 Rn | Rt imm12
> > 
> > (where Rn == bits 0..3 of the first halfword, Rt is 15..12 of the second
> > and imm12 is the remainder of the second halfword).
> > 
> > I would have expected that the halfword with the "11111" pattern (which
> > identifies this as a 32-bit thumb instruction) would have to come first,
> > so the decode knows to look for the second. IOW the halfword with 11111
> > should be at PC and the Rt/imm12 halfword should be at PC+2.

Yes.  This seems to be pretty much explicit in section A6.1 -- you can
read 16 bits and decide from those whether you need to read another 16.

"If bits [15:11] of the halfword being decoded take any of the following
 values, the halfword is the first halfword of a 32-bit instruction"

> > So if we
> > copy 4 bytes from guest PC we should end up with things in the order
> > given above (and in the manual) and swapping shouldn't be needed.

Sadly, no.  Instruction memory is always little-endian, but:

"The Thumb instruction stream is a sequence of halfword-aligned
 halfwords. Each Thumb instruction is either a single 16-bit halfword
 in that stream, or a 32-bit instruction consisting of two consecutive
 halfwords in that stream."

So a 32-bit thumb instruction with bytes ABCD (where byte A is the one 
with the magic 5-bit pattern) will be stored in memory as a mixed-endian
monstrosity:

PC:   B
PC+1: A
PC+2: D
PC+3: C

A little-endian halfword read of PC gives you AB; another halfword read
at PC+2 gives CD, and a 32-bit little-endian read gives CDAB.

We don't necessarily have to do the bit-shuffling explicitly: we could
do thumb32 decode with the shuffle implicit in the layout used for
decoding.

> uint16_t a[2];
> rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);

You definitely should read exactly the correct number of bytes -- if we
are decoding a 16-bit instruction at the end of a page we don't want to
trigger a fault by reading 32 bits and crossing a page boundary.

Oh, and one other comment that I've already lost the context for: can
you please rename the instruction fetch-and-shuffle function to have
'thumb' in its name somewhere?

Cheers,

Tim.
Ian Campbell July 29, 2013, 2:55 p.m. UTC | #8
On Mon, 2013-07-29 at 15:46 +0100, Tim Deegan wrote:
> Hi,
> 
> At 16:19 +0100 on 24 Jul (1374682769), Julien Grall wrote:
> > On 07/24/2013 03:55 PM, Ian Campbell wrote:
> > > On Tue, 2013-07-23 at 23:43 +0100, Julien Grall wrote:
> > >> On 23 July 2013 23:16, Ian Campbell <ian.campbell@citrix.com> wrote:
> > >>> Were does this the idea of swapping them come from though? The ARM ARM
> > >>> seems (see e.g. section A6.3) to describe things in the order they are
> > >>> in memory -- is doing otherwise not going to end up confusing us?
> > >>
> > >> In THUMB set, a 32-bit instruction consisting of two consecutive
> > >> halfwords (see section A6.1).
> > >> So the instruction is in "big endian", but Xen will read the word as a
> > >> "little endian" things.
> > > 
> > > Are you saying that the 16-bit sub-words are in the opposite order to
> > > what I would have expected and what seems most natural from a decode
> > > PoV?
> > 
> > > Consider the T3 encoding of STR (imm, thumb) from A8.8.203, which is:
> > >         1 1 1 1 1 0 0 0 1 1 0 0 Rn | Rt imm12
> > > 
> > > (where Rn == bits 0..3 of the first halfword, Rt is 15..12 of the second
> > > and imm12 is the remainder of the second halfword).
> > > 
> > > I would have expected that the halfword with the "11111" pattern (which
> > > identifies this as a 32-bit thumb instruction) would have to come first,
> > > so the decode knows to look for the second. IOW the halfword with 11111
> > > should be at PC and the Rt/imm12 halfword should be at PC+2.
> 
> Yes.  This seems to be pretty much explicit in section A6.1 -- you can
> read 16 bits and decide from those whether you need to read another 16.
> 
> "If bits [15:11] of the halfword being decoded take any of the following
>  values, the halfword is the first halfword of a 32-bit instruction"
> 
> > > So if we
> > > copy 4 bytes from guest PC we should end up with things in the order
> > > given above (and in the manual) and swapping shouldn't be needed.
> 
> Sadly, no.  Instruction memory is always little-endian, but:
> 
> "The Thumb instruction stream is a sequence of halfword-aligned
>  halfwords. Each Thumb instruction is either a single 16-bit halfword
>  in that stream, or a 32-bit instruction consisting of two consecutive
>  halfwords in that stream."
> 
> So a 32-bit thumb instruction with bytes ABCD (where byte A is the one 
> with the magic 5-bit pattern) will be stored in memory as a mixed-endian
> monstrosity:
> 
> PC:   B
> PC+1: A
> PC+2: D
> PC+3: C
> 
> A little-endian halfword read of PC gives you AB; another halfword read
> at PC+2 gives CD, and a 32-bit little-endian read gives CDAB.

Good lord. No wonder I didn't get what Julien was trying to explain to
me, he was trying to explain an insanity!

> We don't necessarily have to do the bit-shuffling explicitly: we could
> do thumb32 decode with the shuffle implicit in the layout used for
> decoding.

Since we already know the instruction length in this context that would
seem OK. Although it does make it harder to reason about code which
handles either 16 or 32 bit instructions -- which half contains "bit 0"
becomes different.

If we wanted to be more general we would do a 16-bit read and then check
for the magic pattern before doing a shift and a second 16-bit read,
which mimic the hardware and avoid my tiny brain having to think to hard
about mixed endianness.

> > uint16_t a[2];
> > rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);
> 
> You definitely should read exactly the correct number of bytes -- if we
> are decoding a 16-bit instruction at the end of a page we don't want to
> trigger a fault by reading 32 bits and crossing a page boundary.

Yes.

> Oh, and one other comment that I've already lost the context for: can
> you please rename the instruction fetch-and-shuffle function to have
> 'thumb' in its name somewhere?

ACK!

Ian.
Julien Grall July 29, 2013, 2:56 p.m. UTC | #9
On 07/29/2013 03:46 PM, Tim Deegan wrote:
> Hi,
> 
> At 16:19 +0100 on 24 Jul (1374682769), Julien Grall wrote:
>> On 07/24/2013 03:55 PM, Ian Campbell wrote:
>>> On Tue, 2013-07-23 at 23:43 +0100, Julien Grall wrote:
>>>> On 23 July 2013 23:16, Ian Campbell <ian.campbell@citrix.com> wrote:
>>>>> Were does this the idea of swapping them come from though? The ARM ARM
>>>>> seems (see e.g. section A6.3) to describe things in the order they are
>>>>> in memory -- is doing otherwise not going to end up confusing us?
>>>>
>>>> In THUMB set, a 32-bit instruction consisting of two consecutive
>>>> halfwords (see section A6.1).
>>>> So the instruction is in "big endian", but Xen will read the word as a
>>>> "little endian" things.
>>>
>>> Are you saying that the 16-bit sub-words are in the opposite order to
>>> what I would have expected and what seems most natural from a decode
>>> PoV?
>>
>>> Consider the T3 encoding of STR (imm, thumb) from A8.8.203, which is:
>>>         1 1 1 1 1 0 0 0 1 1 0 0 Rn | Rt imm12
>>>
>>> (where Rn == bits 0..3 of the first halfword, Rt is 15..12 of the second
>>> and imm12 is the remainder of the second halfword).
>>>
>>> I would have expected that the halfword with the "11111" pattern (which
>>> identifies this as a 32-bit thumb instruction) would have to come first,
>>> so the decode knows to look for the second. IOW the halfword with 11111
>>> should be at PC and the Rt/imm12 halfword should be at PC+2.
> 
> Yes.  This seems to be pretty much explicit in section A6.1 -- you can
> read 16 bits and decide from those whether you need to read another 16.
> 
> "If bits [15:11] of the halfword being decoded take any of the following
>  values, the halfword is the first halfword of a 32-bit instruction"
> 
>>> So if we
>>> copy 4 bytes from guest PC we should end up with things in the order
>>> given above (and in the manual) and swapping shouldn't be needed.
> 
> Sadly, no.  Instruction memory is always little-endian, but:
> 
> "The Thumb instruction stream is a sequence of halfword-aligned
>  halfwords. Each Thumb instruction is either a single 16-bit halfword
>  in that stream, or a 32-bit instruction consisting of two consecutive
>  halfwords in that stream."
> 
> So a 32-bit thumb instruction with bytes ABCD (where byte A is the one 
> with the magic 5-bit pattern) will be stored in memory as a mixed-endian
> monstrosity:
> 
> PC:   B
> PC+1: A
> PC+2: D
> PC+3: C
> 
> A little-endian halfword read of PC gives you AB; another halfword read
> at PC+2 gives CD, and a 32-bit little-endian read gives CDAB.
> 
> We don't necessarily have to do the bit-shuffling explicitly: we could
> do thumb32 decode with the shuffle implicit in the layout used for
> decoding.

I think we need to keep the bit-shuffling explicitly. It's less
confusing than having "non-coherent" shift during the decoding.

>> uint16_t a[2];
>> rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);
> 
> You definitely should read exactly the correct number of bytes -- if we
> are decoding a 16-bit instruction at the end of a page we don't want to
> trigger a fault by reading 32 bits and crossing a page boundary.

I already read either 16 or 32 bits depending of the instruction len.
The array is bigger to fit to the both instruction.

> Oh, and one other comment that I've already lost the context for: can
> you please rename the instruction fetch-and-shuffle function to have
> 'thumb' in its name somewhere?

Actually, I have sent a version for this patch series
(http://www.gossamer-threads.com/lists/xen/devel/291808). It's also
support ARM instruction decoding.

Cheers,
Ian Campbell July 29, 2013, 3 p.m. UTC | #10
On Mon, 2013-07-29 at 15:56 +0100, Julien Grall wrote:
> On 07/29/2013 03:46 PM, Tim Deegan wrote:
> > Hi,
> > 
> > At 16:19 +0100 on 24 Jul (1374682769), Julien Grall wrote:
> >> On 07/24/2013 03:55 PM, Ian Campbell wrote:
> >>> On Tue, 2013-07-23 at 23:43 +0100, Julien Grall wrote:
> >>>> On 23 July 2013 23:16, Ian Campbell <ian.campbell@citrix.com> wrote:
> >>>>> Were does this the idea of swapping them come from though? The ARM ARM
> >>>>> seems (see e.g. section A6.3) to describe things in the order they are
> >>>>> in memory -- is doing otherwise not going to end up confusing us?
> >>>>
> >>>> In THUMB set, a 32-bit instruction consisting of two consecutive
> >>>> halfwords (see section A6.1).
> >>>> So the instruction is in "big endian", but Xen will read the word as a
> >>>> "little endian" things.
> >>>
> >>> Are you saying that the 16-bit sub-words are in the opposite order to
> >>> what I would have expected and what seems most natural from a decode
> >>> PoV?
> >>
> >>> Consider the T3 encoding of STR (imm, thumb) from A8.8.203, which is:
> >>>         1 1 1 1 1 0 0 0 1 1 0 0 Rn | Rt imm12
> >>>
> >>> (where Rn == bits 0..3 of the first halfword, Rt is 15..12 of the second
> >>> and imm12 is the remainder of the second halfword).
> >>>
> >>> I would have expected that the halfword with the "11111" pattern (which
> >>> identifies this as a 32-bit thumb instruction) would have to come first,
> >>> so the decode knows to look for the second. IOW the halfword with 11111
> >>> should be at PC and the Rt/imm12 halfword should be at PC+2.
> > 
> > Yes.  This seems to be pretty much explicit in section A6.1 -- you can
> > read 16 bits and decide from those whether you need to read another 16.
> > 
> > "If bits [15:11] of the halfword being decoded take any of the following
> >  values, the halfword is the first halfword of a 32-bit instruction"
> > 
> >>> So if we
> >>> copy 4 bytes from guest PC we should end up with things in the order
> >>> given above (and in the manual) and swapping shouldn't be needed.
> > 
> > Sadly, no.  Instruction memory is always little-endian, but:
> > 
> > "The Thumb instruction stream is a sequence of halfword-aligned
> >  halfwords. Each Thumb instruction is either a single 16-bit halfword
> >  in that stream, or a 32-bit instruction consisting of two consecutive
> >  halfwords in that stream."
> > 
> > So a 32-bit thumb instruction with bytes ABCD (where byte A is the one 
> > with the magic 5-bit pattern) will be stored in memory as a mixed-endian
> > monstrosity:
> > 
> > PC:   B
> > PC+1: A
> > PC+2: D
> > PC+3: C
> > 
> > A little-endian halfword read of PC gives you AB; another halfword read
> > at PC+2 gives CD, and a 32-bit little-endian read gives CDAB.
> > 
> > We don't necessarily have to do the bit-shuffling explicitly: we could
> > do thumb32 decode with the shuffle implicit in the layout used for
> > decoding.
> 
> I think we need to keep the bit-shuffling explicitly. It's less
> confusing than having "non-coherent" shift during the decoding.

Whatever we do it needs a big comment along the lines of what Tim wrote
above.

Ian.
Julien Grall July 29, 2013, 3:04 p.m. UTC | #11
On 07/29/2013 04:00 PM, Ian Campbell wrote:
> On Mon, 2013-07-29 at 15:56 +0100, Julien Grall wrote:
>> On 07/29/2013 03:46 PM, Tim Deegan wrote:
>>> Hi,
>>>
>>> At 16:19 +0100 on 24 Jul (1374682769), Julien Grall wrote:
>>>> On 07/24/2013 03:55 PM, Ian Campbell wrote:
>>>>> On Tue, 2013-07-23 at 23:43 +0100, Julien Grall wrote:
>>>>>> On 23 July 2013 23:16, Ian Campbell <ian.campbell@citrix.com> wrote:
>>>>>>> Were does this the idea of swapping them come from though? The ARM ARM
>>>>>>> seems (see e.g. section A6.3) to describe things in the order they are
>>>>>>> in memory -- is doing otherwise not going to end up confusing us?
>>>>>>
>>>>>> In THUMB set, a 32-bit instruction consisting of two consecutive
>>>>>> halfwords (see section A6.1).
>>>>>> So the instruction is in "big endian", but Xen will read the word as a
>>>>>> "little endian" things.
>>>>>
>>>>> Are you saying that the 16-bit sub-words are in the opposite order to
>>>>> what I would have expected and what seems most natural from a decode
>>>>> PoV?
>>>>
>>>>> Consider the T3 encoding of STR (imm, thumb) from A8.8.203, which is:
>>>>>         1 1 1 1 1 0 0 0 1 1 0 0 Rn | Rt imm12
>>>>>
>>>>> (where Rn == bits 0..3 of the first halfword, Rt is 15..12 of the second
>>>>> and imm12 is the remainder of the second halfword).
>>>>>
>>>>> I would have expected that the halfword with the "11111" pattern (which
>>>>> identifies this as a 32-bit thumb instruction) would have to come first,
>>>>> so the decode knows to look for the second. IOW the halfword with 11111
>>>>> should be at PC and the Rt/imm12 halfword should be at PC+2.
>>>
>>> Yes.  This seems to be pretty much explicit in section A6.1 -- you can
>>> read 16 bits and decide from those whether you need to read another 16.
>>>
>>> "If bits [15:11] of the halfword being decoded take any of the following
>>>  values, the halfword is the first halfword of a 32-bit instruction"
>>>
>>>>> So if we
>>>>> copy 4 bytes from guest PC we should end up with things in the order
>>>>> given above (and in the manual) and swapping shouldn't be needed.
>>>
>>> Sadly, no.  Instruction memory is always little-endian, but:
>>>
>>> "The Thumb instruction stream is a sequence of halfword-aligned
>>>  halfwords. Each Thumb instruction is either a single 16-bit halfword
>>>  in that stream, or a 32-bit instruction consisting of two consecutive
>>>  halfwords in that stream."
>>>
>>> So a 32-bit thumb instruction with bytes ABCD (where byte A is the one 
>>> with the magic 5-bit pattern) will be stored in memory as a mixed-endian
>>> monstrosity:
>>>
>>> PC:   B
>>> PC+1: A
>>> PC+2: D
>>> PC+3: C
>>>
>>> A little-endian halfword read of PC gives you AB; another halfword read
>>> at PC+2 gives CD, and a 32-bit little-endian read gives CDAB.
>>>
>>> We don't necessarily have to do the bit-shuffling explicitly: we could
>>> do thumb32 decode with the shuffle implicit in the layout used for
>>> decoding.
>>
>> I think we need to keep the bit-shuffling explicitly. It's less
>> confusing than having "non-coherent" shift during the decoding.
> 
> Whatever we do it needs a big comment along the lines of what Tim wrote
> above.

I will add the comment in the third version of this patch series.
Ian Campbell July 29, 2013, 3:15 p.m. UTC | #12
On Mon, 2013-07-29 at 16:04 +0100, Julien Grall wrote:
> On 07/29/2013 04:00 PM, Ian Campbell wrote:
> > On Mon, 2013-07-29 at 15:56 +0100, Julien Grall wrote:
> >> On 07/29/2013 03:46 PM, Tim Deegan wrote:
> >>> Hi,
> >>>
> >>> At 16:19 +0100 on 24 Jul (1374682769), Julien Grall wrote:
> >>>> On 07/24/2013 03:55 PM, Ian Campbell wrote:
> >>>>> On Tue, 2013-07-23 at 23:43 +0100, Julien Grall wrote:
> >>>>>> On 23 July 2013 23:16, Ian Campbell <ian.campbell@citrix.com> wrote:
> >>>>>>> Were does this the idea of swapping them come from though? The ARM ARM
> >>>>>>> seems (see e.g. section A6.3) to describe things in the order they are
> >>>>>>> in memory -- is doing otherwise not going to end up confusing us?
> >>>>>>
> >>>>>> In THUMB set, a 32-bit instruction consisting of two consecutive
> >>>>>> halfwords (see section A6.1).
> >>>>>> So the instruction is in "big endian", but Xen will read the word as a
> >>>>>> "little endian" things.
> >>>>>
> >>>>> Are you saying that the 16-bit sub-words are in the opposite order to
> >>>>> what I would have expected and what seems most natural from a decode
> >>>>> PoV?
> >>>>
> >>>>> Consider the T3 encoding of STR (imm, thumb) from A8.8.203, which is:
> >>>>>         1 1 1 1 1 0 0 0 1 1 0 0 Rn | Rt imm12
> >>>>>
> >>>>> (where Rn == bits 0..3 of the first halfword, Rt is 15..12 of the second
> >>>>> and imm12 is the remainder of the second halfword).
> >>>>>
> >>>>> I would have expected that the halfword with the "11111" pattern (which
> >>>>> identifies this as a 32-bit thumb instruction) would have to come first,
> >>>>> so the decode knows to look for the second. IOW the halfword with 11111
> >>>>> should be at PC and the Rt/imm12 halfword should be at PC+2.
> >>>
> >>> Yes.  This seems to be pretty much explicit in section A6.1 -- you can
> >>> read 16 bits and decide from those whether you need to read another 16.
> >>>
> >>> "If bits [15:11] of the halfword being decoded take any of the following
> >>>  values, the halfword is the first halfword of a 32-bit instruction"
> >>>
> >>>>> So if we
> >>>>> copy 4 bytes from guest PC we should end up with things in the order
> >>>>> given above (and in the manual) and swapping shouldn't be needed.
> >>>
> >>> Sadly, no.  Instruction memory is always little-endian, but:
> >>>
> >>> "The Thumb instruction stream is a sequence of halfword-aligned
> >>>  halfwords. Each Thumb instruction is either a single 16-bit halfword
> >>>  in that stream, or a 32-bit instruction consisting of two consecutive
> >>>  halfwords in that stream."
> >>>
> >>> So a 32-bit thumb instruction with bytes ABCD (where byte A is the one 
> >>> with the magic 5-bit pattern) will be stored in memory as a mixed-endian
> >>> monstrosity:
> >>>
> >>> PC:   B
> >>> PC+1: A
> >>> PC+2: D
> >>> PC+3: C
> >>>
> >>> A little-endian halfword read of PC gives you AB; another halfword read
> >>> at PC+2 gives CD, and a 32-bit little-endian read gives CDAB.
> >>>
> >>> We don't necessarily have to do the bit-shuffling explicitly: we could
> >>> do thumb32 decode with the shuffle implicit in the layout used for
> >>> decoding.
> >>
> >> I think we need to keep the bit-shuffling explicitly. It's less
> >> confusing than having "non-coherent" shift during the decoding.
> > 
> > Whatever we do it needs a big comment along the lines of what Tim wrote
> > above.
> 
> I will add the comment in the third version of this patch series.

FYI I am in the process of applying the first 2 patches of this series,
so you might want to wait an hour or so.

Ian.
diff mbox

Patch

diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index d6dc37d..da2bef6 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -35,6 +35,7 @@ 
 #include <asm/regs.h>
 #include <asm/cpregs.h>
 #include <asm/psci.h>
+#include <asm/guest_access.h>
 
 #include "io.h"
 #include "vtimer.h"
@@ -996,6 +997,31 @@  done:
     if (first) unmap_domain_page(first);
 }
 
+static int read_instruction(struct cpu_user_regs *regs, unsigned len,
+                            uint32_t *instr)
+{
+    int rc;
+
+    rc = raw_copy_from_guest(instr, (void * __user)regs->pc, len);
+
+    if ( rc )
+        return rc;
+
+    switch ( len )
+    {
+    /* 16-bit instruction */
+    case 2:
+        *instr &= 0xffff;
+        break;
+    /* 32-bit instruction */
+    case 4:
+        *instr = (*instr & 0xffff) << 16 | (*instr & 0xffff0000) >> 16;
+        break;
+    }
+
+    return 0;
+}
+
 static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
                                      struct hsr_dabt dabt)
 {
@@ -1021,6 +1047,27 @@  static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
     if ( !dabt.valid )
         goto bad_data_abort;
 
+    /*
+     * Errata 766422: Thumb store translation fault to Hypervisor may
+     * not have correct HSR Rt value.
+     */
+    if ( (regs->cpsr & PSR_THUMB) && dabt.write )
+    {
+        uint32_t instr = 0;
+
+        rc = read_instruction(regs, dabt.len ? 4 : 2, &instr);
+        if ( rc )
+            goto bad_data_abort;
+
+        /* Retrieve the transfer register from the instruction */
+        if ( dabt.len )
+            /* With 32-bit store instruction, the register is in [12..15] */
+            info.dabt.reg = (instr & 0xf000) >> 12;
+        else
+            /* With 16-bit store instruction, the register is in [0..3] */
+            info.dabt.reg = instr & 0x7;
+    }
+
     if (handle_mmio(&info))
     {
         regs->pc += dabt.len ? 4 : 2;