diff mbox series

[Xen-devel,5/7] xen/arm32: Invalidate BTB on guest exit for Cortex A17 and 12

Message ID 20180119134103.3390-6-julien.grall@linaro.org
State Superseded
Headers show
Series xen/arm32: Branch predictor hardening (XSA-254 variant 2) | expand

Commit Message

Julien Grall Jan. 19, 2018, 1:41 p.m. UTC
In order to avoid aliasing attackes agains the branch predictor, let's
invalidate the BTB on guest exist. This is made complicated by the fact
that we cannot take a branch invalidating the BTB.

This is based on the first version posrted by Marc Zyngier on Linux-arm
mailing list (see [1]).

This is part of XSA-254.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Julien Grall <julien.grall@linaro.org>

[1] https://www.spinics.net/lists/arm-kernel/msg627032.html
---
 xen/arch/arm/arm32/entry.S | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/cpuerrata.c   | 19 ++++++++++++++++
 2 files changed, 74 insertions(+)

Comments

Konrad Rzeszutek Wilk Jan. 24, 2018, 10:22 p.m. UTC | #1
On Fri, Jan 19, 2018 at 01:41:01PM +0000, Julien Grall wrote:
> In order to avoid aliasing attackes agains the branch predictor, let's
> invalidate the BTB on guest exist. This is made complicated by the fact
> that we cannot take a branch invalidating the BTB.
> 
> This is based on the first version posrted by Marc Zyngier on Linux-arm
> mailing list (see [1]).
> 
> This is part of XSA-254.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> 
> [1] https://www.spinics.net/lists/arm-kernel/msg627032.html
> ---
>  xen/arch/arm/arm32/entry.S | 55 ++++++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/cpuerrata.c   | 19 ++++++++++++++++
>  2 files changed, 74 insertions(+)
> 
> diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
> index 54a1733f87..c6ec0aa399 100644
> --- a/xen/arch/arm/arm32/entry.S
> +++ b/xen/arch/arm/arm32/entry.S
> @@ -160,6 +160,61 @@ GLOBAL(hyp_traps_vector)
>          b trap_irq                      /* 0x18 - IRQ */
>          b trap_fiq                      /* 0x1c - FIQ */
>  
> +        .align 5

Not .align 8?
Julien Grall Jan. 24, 2018, 10:28 p.m. UTC | #2
On 24 January 2018 at 22:22, Konrad Rzeszutek Wilk
<konrad.wilk@oracle.com> wrote:
> On Fri, Jan 19, 2018 at 01:41:01PM +0000, Julien Grall wrote:
>> In order to avoid aliasing attackes agains the branch predictor, let's
>> invalidate the BTB on guest exist. This is made complicated by the fact
>> that we cannot take a branch invalidating the BTB.
>>
>> This is based on the first version posrted by Marc Zyngier on Linux-arm
>> mailing list (see [1]).
>>
>> This is part of XSA-254.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>
>> [1] https://www.spinics.net/lists/arm-kernel/msg627032.html
>> ---
>>  xen/arch/arm/arm32/entry.S | 55 ++++++++++++++++++++++++++++++++++++++++++++++
>>  xen/arch/arm/cpuerrata.c   | 19 ++++++++++++++++
>>  2 files changed, 74 insertions(+)
>>
>> diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
>> index 54a1733f87..c6ec0aa399 100644
>> --- a/xen/arch/arm/arm32/entry.S
>> +++ b/xen/arch/arm/arm32/entry.S
>> @@ -160,6 +160,61 @@ GLOBAL(hyp_traps_vector)
>>          b trap_irq                      /* 0x18 - IRQ */
>>          b trap_fiq                      /* 0x1c - FIQ */
>>
>> +        .align 5
>
> Not .align 8?

.align 5 is following was we already have for the other vector table.

Per [1], this will make sure the base address of the table is a
multiple of 32 (one instruction per vector).

Cheers,

[1] https://ftp.gnu.org/old-gnu/Manuals/gas/html_chapter/as_7.html#SEC70



>
Stefano Stabellini Jan. 25, 2018, 1:02 a.m. UTC | #3
On Fri, 19 Jan 2018, Julien Grall wrote:
> In order to avoid aliasing attackes agains the branch predictor, let's
> invalidate the BTB on guest exist. This is made complicated by the fact
> that we cannot take a branch invalidating the BTB.
> 
> This is based on the first version posrted by Marc Zyngier on Linux-arm
> mailing list (see [1]).
> 
> This is part of XSA-254.
> 
> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> 
> [1] https://www.spinics.net/lists/arm-kernel/msg627032.html
> ---
>  xen/arch/arm/arm32/entry.S | 55 ++++++++++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/cpuerrata.c   | 19 ++++++++++++++++
>  2 files changed, 74 insertions(+)
> 
> diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
> index 54a1733f87..c6ec0aa399 100644
> --- a/xen/arch/arm/arm32/entry.S
> +++ b/xen/arch/arm/arm32/entry.S
> @@ -160,6 +160,61 @@ GLOBAL(hyp_traps_vector)
>          b trap_irq                      /* 0x18 - IRQ */
>          b trap_fiq                      /* 0x1c - FIQ */
>  
> +        .align 5
> +GLOBAL(hyp_traps_vector_bp_inv)
> +        /*
> +         * We encode the exception entry in the bottom 3 bits of
> +         * SP, and we have to guarantee to be 8 bytes aligned.
> +         */
> +        add sp, sp, #1                  /* Reset            7 */
> +        add sp, sp, #1                  /* Undef            6 */
> +        add sp, sp, #1                  /* Hypervisor Call  5 */
> +        add sp, sp, #1                  /* Prefetch abort   4 */
> +        add sp, sp, #1                  /* Data abort       3 */
> +        add sp, sp, #1                  /* Hypervisor       2 */
> +        add sp, sp, #1                  /* IRQ              1 */
> +        nop                             /* FIQ              0 */

Clever! Things that you don't read every day :-)


> +        mcr	p15, 0, r0, c7, c5, 6	    /* BPIALL */
> +        isb
> +
> +        /*
> +         * As we cannot use any temporary registers and cannot
> +         * clobber SP, we can decode the exception entry using
> +         * an unrolled binary search.
> +         */
> +        tst sp, #4
> +        bne 1f
> +
> +        tst sp, #2
> +        bne 3f
> +
> +        tst sp, #1
> +        bic sp, sp, #0x7
> +        bne trap_irq
> +        b   trap_fiq

I might be confused, but this is the case where sp == 0x7, right?
Shouldn't we have b trap_reset here?

Similarly the branch just above corresponds to 0x6, which should be
bne trap_undefined_instruction.

What am I getting wrong?



> +1:
> +        tst sp, #2
> +        bne 2f
> +
> +        tst sp, #1
> +        bic sp, sp, #0x7
> +        bne trap_hypervisor_call
> +        b   trap_prefetch_abort
> +
> +2:
> +        tst sp, #1
> +        bic sp, sp, #0x7
> +        bne trap_reset
> +        b   trap_undefined_instruction
> +
> +3:
> +        tst sp, #1
> +        bic sp, sp, #0x7
> +        bne trap_data_abort
> +        b   trap_guest_sync
> +
>  DEFINE_TRAP_ENTRY(reset)
>  DEFINE_TRAP_ENTRY(undefined_instruction)
>  DEFINE_TRAP_ENTRY(hypervisor_call)
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 0a138fa735..c79e6d65d3 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -198,6 +198,13 @@ install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry,
>      this_cpu(bp_harden_vecs) = hyp_vecs;
>  }
>  
> +static int enable_bp_inv_hardening(void *data)
> +{
> +    install_bp_hardening_vecs(data, hyp_traps_vector_bp_inv,
> +                              "execute BPIALL");
> +    return 0;
> +}
> +
>  #endif
>  
>  #define MIDR_RANGE(model, min, max)     \
> @@ -284,6 +291,18 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>          .enable = enable_psci_bp_hardening,
>      },
>  #endif
> +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
> +    {
> +        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
> +        MIDR_ALL_VERSIONS(MIDR_CORTEX_A12),
> +        .enable = enable_bp_inv_hardening,
> +    },
> +    {
> +        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
> +        MIDR_ALL_VERSIONS(MIDR_CORTEX_A17),
> +        .enable = enable_bp_inv_hardening,
> +    },
> +#endif
>      {},
>  };
>  
> -- 
> 2.11.0
>
Julien Grall Jan. 25, 2018, 11:50 a.m. UTC | #4
Hi Stefano,

On 25/01/18 01:02, Stefano Stabellini wrote:
> On Fri, 19 Jan 2018, Julien Grall wrote:
>> In order to avoid aliasing attackes agains the branch predictor, let's
>> invalidate the BTB on guest exist. This is made complicated by the fact
>> that we cannot take a branch invalidating the BTB.
>>
>> This is based on the first version posrted by Marc Zyngier on Linux-arm
>> mailing list (see [1]).
>>
>> This is part of XSA-254.
>>
>> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>
>> [1] https://www.spinics.net/lists/arm-kernel/msg627032.html
>> ---
>>   xen/arch/arm/arm32/entry.S | 55 ++++++++++++++++++++++++++++++++++++++++++++++
>>   xen/arch/arm/cpuerrata.c   | 19 ++++++++++++++++
>>   2 files changed, 74 insertions(+)
>>
>> diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
>> index 54a1733f87..c6ec0aa399 100644
>> --- a/xen/arch/arm/arm32/entry.S
>> +++ b/xen/arch/arm/arm32/entry.S
>> @@ -160,6 +160,61 @@ GLOBAL(hyp_traps_vector)
>>           b trap_irq                      /* 0x18 - IRQ */
>>           b trap_fiq                      /* 0x1c - FIQ */
>>   
>> +        .align 5
>> +GLOBAL(hyp_traps_vector_bp_inv)
>> +        /*
>> +         * We encode the exception entry in the bottom 3 bits of
>> +         * SP, and we have to guarantee to be 8 bytes aligned.
>> +         */
>> +        add sp, sp, #1                  /* Reset            7 */
>> +        add sp, sp, #1                  /* Undef            6 */
>> +        add sp, sp, #1                  /* Hypervisor Call  5 */
>> +        add sp, sp, #1                  /* Prefetch abort   4 */
>> +        add sp, sp, #1                  /* Data abort       3 */
>> +        add sp, sp, #1                  /* Hypervisor       2 */
>> +        add sp, sp, #1                  /* IRQ              1 */
>> +        nop                             /* FIQ              0 */
> 
> Clever! Things that you don't read every day :-)

Thanks Marc for the idea :).

> 
> 
>> +        mcr	p15, 0, r0, c7, c5, 6	    /* BPIALL */
>> +        isb
>> +
>> +        /*
>> +         * As we cannot use any temporary registers and cannot
>> +         * clobber SP, we can decode the exception entry using
>> +         * an unrolled binary search.
>> +         */
>> +        tst sp, #4
>> +        bne 1f
>> +
>> +        tst sp, #2
>> +        bne 3f
>> +
>> +        tst sp, #1
>> +        bic sp, sp, #0x7
>> +        bne trap_irq
>> +        b   trap_fiq
> 
> I might be confused, but this is the case where sp == 0x7, right?
> Shouldn't we have b trap_reset here?
> 
> Similarly the branch just above corresponds to 0x6, which should be
> bne trap_undefined_instruction.
> 
> What am I getting wrong?

The tst instruction performs a bitwise AND on a register value (here 
sp). The result will be used to update the condition flags.

So with tst sp, #4 the result will either be 0x100 or 0x000. The former 
will clear Z flag while the latter set Z flag.

This means that bne will branch only when bit 2 is set. So the only way 
to end here is because the first 3-bit are equal to 0x00X. This 
corresponds to IRQ/FIQ vector.

Cheers,
Stefano Stabellini Jan. 25, 2018, 7:35 p.m. UTC | #5
On Thu, 25 Jan 2018, Julien Grall wrote:
> Hi Stefano,
> 
> On 25/01/18 01:02, Stefano Stabellini wrote:
> > On Fri, 19 Jan 2018, Julien Grall wrote:
> > > In order to avoid aliasing attackes agains the branch predictor, let's
> > > invalidate the BTB on guest exist. This is made complicated by the fact
> > > that we cannot take a branch invalidating the BTB.
> > > 
> > > This is based on the first version posrted by Marc Zyngier on Linux-arm
> > > mailing list (see [1]).
> > > 
> > > This is part of XSA-254.
> > > 
> > > Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
> > > Signed-off-by: Julien Grall <julien.grall@linaro.org>
> > > 
> > > [1] https://www.spinics.net/lists/arm-kernel/msg627032.html
> > > ---
> > >   xen/arch/arm/arm32/entry.S | 55
> > > ++++++++++++++++++++++++++++++++++++++++++++++
> > >   xen/arch/arm/cpuerrata.c   | 19 ++++++++++++++++
> > >   2 files changed, 74 insertions(+)
> > > 
> > > diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
> > > index 54a1733f87..c6ec0aa399 100644
> > > --- a/xen/arch/arm/arm32/entry.S
> > > +++ b/xen/arch/arm/arm32/entry.S
> > > @@ -160,6 +160,61 @@ GLOBAL(hyp_traps_vector)
> > >           b trap_irq                      /* 0x18 - IRQ */
> > >           b trap_fiq                      /* 0x1c - FIQ */
> > >   +        .align 5
> > > +GLOBAL(hyp_traps_vector_bp_inv)
> > > +        /*
> > > +         * We encode the exception entry in the bottom 3 bits of
> > > +         * SP, and we have to guarantee to be 8 bytes aligned.
> > > +         */
> > > +        add sp, sp, #1                  /* Reset            7 */
> > > +        add sp, sp, #1                  /* Undef            6 */
> > > +        add sp, sp, #1                  /* Hypervisor Call  5 */
> > > +        add sp, sp, #1                  /* Prefetch abort   4 */
> > > +        add sp, sp, #1                  /* Data abort       3 */
> > > +        add sp, sp, #1                  /* Hypervisor       2 */
> > > +        add sp, sp, #1                  /* IRQ              1 */
> > > +        nop                             /* FIQ              0 */
> > 
> > Clever! Things that you don't read every day :-)
> 
> Thanks Marc for the idea :).
> 
> > 
> > 
> > > +        mcr	p15, 0, r0, c7, c5, 6	    /* BPIALL */
> > > +        isb
> > > +
> > > +        /*
> > > +         * As we cannot use any temporary registers and cannot
> > > +         * clobber SP, we can decode the exception entry using
> > > +         * an unrolled binary search.
> > > +         */
> > > +        tst sp, #4
> > > +        bne 1f
> > > +
> > > +        tst sp, #2
> > > +        bne 3f
> > > +
> > > +        tst sp, #1
> > > +        bic sp, sp, #0x7
> > > +        bne trap_irq
> > > +        b   trap_fiq
> > 
> > I might be confused, but this is the case where sp == 0x7, right?
> > Shouldn't we have b trap_reset here?
> > 
> > Similarly the branch just above corresponds to 0x6, which should be
> > bne trap_undefined_instruction.
> > 
> > What am I getting wrong?
> 
> The tst instruction performs a bitwise AND on a register value (here sp). The
> result will be used to update the condition flags.
> 
> So with tst sp, #4 the result will either be 0x100 or 0x000. The former will
> clear Z flag while the latter set Z flag.
> 
> This means that bne will branch only when bit 2 is set. So the only way to end
> here is because the first 3-bit are equal to 0x00X. This corresponds to
> IRQ/FIQ vector.

I got it the other way around, thanks for the explanation :-)

Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
Julien Grall Jan. 31, 2018, 3:31 p.m. UTC | #6
Hi Stefano,

On 25/01/18 19:35, Stefano Stabellini wrote:
>> This means that bne will branch only when bit 2 is set. So the only way to end
>> here is because the first 3-bit are equal to 0x00X. This corresponds to
>> IRQ/FIQ vector.
> 
> I got it the other way around, thanks for the explanation :-)
> 
> Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>

Did you mean Reviewed-by or Acked-by?

Cheers,
Stefano Stabellini Jan. 31, 2018, 4:40 p.m. UTC | #7
On Wed, 31 Jan 2018, Julien Grall wrote:
> Hi Stefano,
> 
> On 25/01/18 19:35, Stefano Stabellini wrote:
> > > This means that bne will branch only when bit 2 is set. So the only way to
> > > end
> > > here is because the first 3-bit are equal to 0x00X. This corresponds to
> > > IRQ/FIQ vector.
> > 
> > I got it the other way around, thanks for the explanation :-)
> > 
> > Signed-off-by: Stefano Stabellini <sstabellini@kernel.org>
> 
> Did you mean Reviewed-by or Acked-by?

LOL!  I meant Reviewed-by.
diff mbox series

Patch

diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
index 54a1733f87..c6ec0aa399 100644
--- a/xen/arch/arm/arm32/entry.S
+++ b/xen/arch/arm/arm32/entry.S
@@ -160,6 +160,61 @@  GLOBAL(hyp_traps_vector)
         b trap_irq                      /* 0x18 - IRQ */
         b trap_fiq                      /* 0x1c - FIQ */
 
+        .align 5
+GLOBAL(hyp_traps_vector_bp_inv)
+        /*
+         * We encode the exception entry in the bottom 3 bits of
+         * SP, and we have to guarantee to be 8 bytes aligned.
+         */
+        add sp, sp, #1                  /* Reset            7 */
+        add sp, sp, #1                  /* Undef            6 */
+        add sp, sp, #1                  /* Hypervisor Call  5 */
+        add sp, sp, #1                  /* Prefetch abort   4 */
+        add sp, sp, #1                  /* Data abort       3 */
+        add sp, sp, #1                  /* Hypervisor       2 */
+        add sp, sp, #1                  /* IRQ              1 */
+        nop                             /* FIQ              0 */
+
+        mcr	p15, 0, r0, c7, c5, 6	    /* BPIALL */
+        isb
+
+        /*
+         * As we cannot use any temporary registers and cannot
+         * clobber SP, we can decode the exception entry using
+         * an unrolled binary search.
+         */
+        tst sp, #4
+        bne 1f
+
+        tst sp, #2
+        bne 3f
+
+        tst sp, #1
+        bic sp, sp, #0x7
+        bne trap_irq
+        b   trap_fiq
+
+1:
+        tst sp, #2
+        bne 2f
+
+        tst sp, #1
+        bic sp, sp, #0x7
+        bne trap_hypervisor_call
+        b   trap_prefetch_abort
+
+2:
+        tst sp, #1
+        bic sp, sp, #0x7
+        bne trap_reset
+        b   trap_undefined_instruction
+
+3:
+        tst sp, #1
+        bic sp, sp, #0x7
+        bne trap_data_abort
+        b   trap_guest_sync
+
 DEFINE_TRAP_ENTRY(reset)
 DEFINE_TRAP_ENTRY(undefined_instruction)
 DEFINE_TRAP_ENTRY(hypervisor_call)
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 0a138fa735..c79e6d65d3 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -198,6 +198,13 @@  install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry,
     this_cpu(bp_harden_vecs) = hyp_vecs;
 }
 
+static int enable_bp_inv_hardening(void *data)
+{
+    install_bp_hardening_vecs(data, hyp_traps_vector_bp_inv,
+                              "execute BPIALL");
+    return 0;
+}
+
 #endif
 
 #define MIDR_RANGE(model, min, max)     \
@@ -284,6 +291,18 @@  static const struct arm_cpu_capabilities arm_errata[] = {
         .enable = enable_psci_bp_hardening,
     },
 #endif
+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
+    {
+        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A12),
+        .enable = enable_bp_inv_hardening,
+    },
+    {
+        .capability = ARM_HARDEN_BRANCH_PREDICTOR,
+        MIDR_ALL_VERSIONS(MIDR_CORTEX_A17),
+        .enable = enable_bp_inv_hardening,
+    },
+#endif
     {},
 };