diff mbox series

[Xen-devel,4/7] xen/arm32: Add skeleton to harden branch predictor aliasing attacks

Message ID 20180119134103.3390-5-julien.grall@linaro.org
State Superseded
Headers show
Series xen/arm32: Branch predictor hardening (XSA-254 variant 2) | expand

Commit Message

Julien Grall Jan. 19, 2018, 1:41 p.m. UTC
Aliasing attacked against CPU branch predictors can allow an attacker to
redirect speculative control flow on some CPUs and potentially divulge
information from one context to another.

This patch adds initiatial skeleton code behind a new Kconfig option
to enable implementation-specific mitigations against these attacks
for CPUs that are affected.

Most of mitigations will have to be applied when entering to the
hypervisor from the guest context.

Because the attack is against branch predictor, it is not possible to
safely use branch instruction before the mitigation is applied.
Therefore this has to be done in the vector entry before jump to the
helper handling a given exception.

However, on arm32, each vector contain a single instruction. This means
that the hardened vector tables may rely on the state of registers that
does not hold when in the hypervisor (e.g SP is 8 bytes aligned).
Therefore hypervisor code running with guest vectors table should be
minimized and always have interrupts masked to reduce the risk to use
them.

This patch provides an infrastructure to switch vector tables before
entering to the guest and when leaving it.

Note that alternative could have been used, but older Xen (4.8 or
earlier) doesn't have support. So avoid using alternative to ease
backporting.

This is part of XSA-254.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
---
 xen/arch/arm/Kconfig       |  3 +++
 xen/arch/arm/arm32/entry.S | 41 ++++++++++++++++++++++++++++++++++++++++-
 xen/arch/arm/cpuerrata.c   | 30 ++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+), 1 deletion(-)

Comments

Stefano Stabellini Jan. 24, 2018, 11:54 p.m. UTC | #1
On Fri, 19 Jan 2018, Julien Grall wrote:
> Aliasing attacked against CPU branch predictors can allow an attacker to
> redirect speculative control flow on some CPUs and potentially divulge
> information from one context to another.
> 
> This patch adds initiatial skeleton code behind a new Kconfig option
> to enable implementation-specific mitigations against these attacks
> for CPUs that are affected.
> 
> Most of mitigations will have to be applied when entering to the
> hypervisor from the guest context.
> 
> Because the attack is against branch predictor, it is not possible to
> safely use branch instruction before the mitigation is applied.
> Therefore this has to be done in the vector entry before jump to the
> helper handling a given exception.
> 
> However, on arm32, each vector contain a single instruction. This means
> that the hardened vector tables may rely on the state of registers that
> does not hold when in the hypervisor (e.g SP is 8 bytes aligned).
> Therefore hypervisor code running with guest vectors table should be
> minimized and always have interrupts masked to reduce the risk to use
> them.
> 
> This patch provides an infrastructure to switch vector tables before
> entering to the guest and when leaving it.
> 
> Note that alternative could have been used, but older Xen (4.8 or
> earlier) doesn't have support. So avoid using alternative to ease
> backporting.
> 
> This is part of XSA-254.
> 
> Signed-off-by: Julien Grall <julien.grall@linaro.org>

I only have a couple of questions. It looks good.


> ---
>  xen/arch/arm/Kconfig       |  3 +++
>  xen/arch/arm/arm32/entry.S | 41 ++++++++++++++++++++++++++++++++++++++++-
>  xen/arch/arm/cpuerrata.c   | 30 ++++++++++++++++++++++++++++++
>  3 files changed, 73 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> index 06fd85cc77..2782ee6589 100644
> --- a/xen/arch/arm/Kconfig
> +++ b/xen/arch/arm/Kconfig
> @@ -191,6 +191,9 @@ config HARDEN_BRANCH_PREDICTOR
>  config ARM64_HARDEN_BRANCH_PREDICTOR
>      def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
>  
> +config ARM32_HARDEN_BRANCH_PREDICTOR
> +    def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR
> +
>  source "common/Kconfig"
>  
>  source "drivers/Kconfig"
> diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
> index c2fad5fe9b..54a1733f87 100644
> --- a/xen/arch/arm/arm32/entry.S
> +++ b/xen/arch/arm/arm32/entry.S
> @@ -34,6 +34,20 @@
>          blne save_guest_regs
>  
>  save_guest_regs:
> +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
> +        /*
> +         * Restore vectors table to the default as it may have been
> +         * changed when returning to the guest (see
> +         * return_to_hypervisor). We need to do that early (e.g before
> +         * any interrupts are unmasked) because hardened vectors requires
> +         * SP to be 8 bytes aligned. This does not hold when running in
> +         * the hypervisor.
> +         */
> +        ldr r1, =hyp_traps_vector
> +        mcr p15, 4, r1, c12, c0, 0
> +        isb
> +#endif
> +
>          ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor frames. */
>          str r11, [sp, #UREGS_sp]
>          SAVE_ONE_BANKED(SP_usr)
> @@ -179,12 +193,37 @@ return_to_guest:
>          RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
>          /* Fall thru */
>  return_to_hypervisor:
> -        cpsid i
> +        cpsid ai

Why?


>          ldr lr, [sp, #UREGS_lr]
>          ldr r11, [sp, #UREGS_pc]
>          msr ELR_hyp, r11
>          ldr r11, [sp, #UREGS_cpsr]
>          msr SPSR_hyp, r11
> +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
> +        /*
> +         * Hardening branch predictor may require to setup a different
> +         * vector tables before returning to the guests. Those vectors
> +         * may rely on the state of registers that does not hold when
> +         * running in the hypervisor (e.g SP is 8 bytes aligned). So setup
> +         * HVBAR very late.
> +         *
> +         * Default vectors table will be restored on exit (see
> +         * save_guest_regs).
> +         */
> +        mov r9, #0                      /* vector tables = NULL */
> +        /*
> +         * Load vector tables pointer from the per-cpu bp_harden_vecs
> +         * when returning to the guest only.
> +         */
> +        and r11, #PSR_MODE_MASK
> +        cmp r11, #PSR_MODE_HYP
> +        ldrne r11, =per_cpu__bp_harden_vecs
> +        mrcne p15, 4, r10, c13, c0, 2   /* r10 = per-cpu offset (HTPIDR) */
> +        addne r11, r11, r10             /* r11 = offset of the vector tables */
> +        ldrne r9, [r11]                 /* r9  = vector tables */
> +        cmp r9, #0                      /* Only update HVBAR when the vector */
> +        mcrne p15, 4, r9, c12, c0, 0    /* tables is not NULL. */

Do we need/want an isb here? Or maybe it is not necessary because we are
about to eret?


> +#endif
>          pop {r0-r12}
>          add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */
>          clrex
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index f1ea7f3c5b..0a138fa735 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -170,6 +170,36 @@ static int enable_psci_bp_hardening(void *data)
>  
>  #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */
>  
> +/* Hardening Branch predictor code for Arm32 */
> +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
> +
> +/*
> + * Per-CPU vector tables to use when returning to the guests. They will
> + * only be used on platform requiring to harden the branch predictor.
> + */
> +DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs);
> +
> +extern char hyp_traps_vector_bp_inv[];
> +
> +static void __maybe_unused
> +install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry,
> +                          const char *hyp_vecs, const char *desc)
> +{
> +    /*
> +     * Enable callbacks are called on every CPU based on the
> +     * capabilities. So double-check whether the CPU matches the
> +     * entry.
> +     */
> +    if ( !entry->matches(entry) )
> +        return;
> +
> +    printk(XENLOG_INFO "CPU%u will %s on guest exit\n",
> +           smp_processor_id(), desc);
> +    this_cpu(bp_harden_vecs) = hyp_vecs;
> +}
> +
> +#endif
> +
>  #define MIDR_RANGE(model, min, max)     \
>      .matches = is_affected_midr_range,  \
>      .midr_model = model,                \
> -- 
> 2.11.0
>
Julien Grall Jan. 25, 2018, 11:36 a.m. UTC | #2
Hi Stefano,

On 24/01/18 23:54, Stefano Stabellini wrote:
> On Fri, 19 Jan 2018, Julien Grall wrote:
>> Aliasing attacked against CPU branch predictors can allow an attacker to
>> redirect speculative control flow on some CPUs and potentially divulge
>> information from one context to another.
>>
>> This patch adds initiatial skeleton code behind a new Kconfig option
>> to enable implementation-specific mitigations against these attacks
>> for CPUs that are affected.
>>
>> Most of mitigations will have to be applied when entering to the
>> hypervisor from the guest context.
>>
>> Because the attack is against branch predictor, it is not possible to
>> safely use branch instruction before the mitigation is applied.
>> Therefore this has to be done in the vector entry before jump to the
>> helper handling a given exception.
>>
>> However, on arm32, each vector contain a single instruction. This means
>> that the hardened vector tables may rely on the state of registers that
>> does not hold when in the hypervisor (e.g SP is 8 bytes aligned).
>> Therefore hypervisor code running with guest vectors table should be
>> minimized and always have interrupts masked to reduce the risk to use
>> them.
>>
>> This patch provides an infrastructure to switch vector tables before
>> entering to the guest and when leaving it.
>>
>> Note that alternative could have been used, but older Xen (4.8 or
>> earlier) doesn't have support. So avoid using alternative to ease
>> backporting.
>>
>> This is part of XSA-254.
>>
>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> 
> I only have a couple of questions. It looks good.
> 
> 
>> ---
>>   xen/arch/arm/Kconfig       |  3 +++
>>   xen/arch/arm/arm32/entry.S | 41 ++++++++++++++++++++++++++++++++++++++++-
>>   xen/arch/arm/cpuerrata.c   | 30 ++++++++++++++++++++++++++++++
>>   3 files changed, 73 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
>> index 06fd85cc77..2782ee6589 100644
>> --- a/xen/arch/arm/Kconfig
>> +++ b/xen/arch/arm/Kconfig
>> @@ -191,6 +191,9 @@ config HARDEN_BRANCH_PREDICTOR
>>   config ARM64_HARDEN_BRANCH_PREDICTOR
>>       def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
>>   
>> +config ARM32_HARDEN_BRANCH_PREDICTOR
>> +    def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR
>> +
>>   source "common/Kconfig"
>>   
>>   source "drivers/Kconfig"
>> diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
>> index c2fad5fe9b..54a1733f87 100644
>> --- a/xen/arch/arm/arm32/entry.S
>> +++ b/xen/arch/arm/arm32/entry.S
>> @@ -34,6 +34,20 @@
>>           blne save_guest_regs
>>   
>>   save_guest_regs:
>> +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
>> +        /*
>> +         * Restore vectors table to the default as it may have been
>> +         * changed when returning to the guest (see
>> +         * return_to_hypervisor). We need to do that early (e.g before
>> +         * any interrupts are unmasked) because hardened vectors requires
>> +         * SP to be 8 bytes aligned. This does not hold when running in
>> +         * the hypervisor.
>> +         */
>> +        ldr r1, =hyp_traps_vector
>> +        mcr p15, 4, r1, c12, c0, 0
>> +        isb
>> +#endif
>> +
>>           ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor frames. */
>>           str r11, [sp, #UREGS_sp]
>>           SAVE_ONE_BANKED(SP_usr)
>> @@ -179,12 +193,37 @@ return_to_guest:
>>           RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
>>           /* Fall thru */
>>   return_to_hypervisor:
>> -        cpsid i
>> +        cpsid ai
> 
> Why?

Asynchronous abort is a kind of interrupt, as we are going to switch to 
the hardened vector tables you don't want an interrupt to come up.

This is because the hardened vector tables requires SP to be 8 bytes 
aligned. When in the hypervisor, and particularly when restoring the 
register (as below), this assumption does not hold.

So masking all interrupts (as mentioned a few time within the patch and 
the commit message) will reduce the risk to use the hardened vectors. I 
say reduce because you may have receive data abort (imagine an unlikely 
error in the few instructions to restore state).

It is also why switching vector tables are done very early in entry path 
and very late in the exit path.

> 
> 
>>           ldr lr, [sp, #UREGS_lr]
>>           ldr r11, [sp, #UREGS_pc]
>>           msr ELR_hyp, r11
>>           ldr r11, [sp, #UREGS_cpsr]
>>           msr SPSR_hyp, r11
>> +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
>> +        /*
>> +         * Hardening branch predictor may require to setup a different
>> +         * vector tables before returning to the guests. Those vectors
>> +         * may rely on the state of registers that does not hold when
>> +         * running in the hypervisor (e.g SP is 8 bytes aligned). So setup
>> +         * HVBAR very late.
>> +         *
>> +         * Default vectors table will be restored on exit (see
>> +         * save_guest_regs).
>> +         */
>> +        mov r9, #0                      /* vector tables = NULL */
>> +        /*
>> +         * Load vector tables pointer from the per-cpu bp_harden_vecs
>> +         * when returning to the guest only.
>> +         */
>> +        and r11, #PSR_MODE_MASK
>> +        cmp r11, #PSR_MODE_HYP
>> +        ldrne r11, =per_cpu__bp_harden_vecs
>> +        mrcne p15, 4, r10, c13, c0, 2   /* r10 = per-cpu offset (HTPIDR) */
>> +        addne r11, r11, r10             /* r11 = offset of the vector tables */
>> +        ldrne r9, [r11]                 /* r9  = vector tables */
>> +        cmp r9, #0                      /* Only update HVBAR when the vector */
>> +        mcrne p15, 4, r9, c12, c0, 0    /* tables is not NULL. */
> 
> Do we need/want an isb here? Or maybe it is not necessary because we are
> about to eret?
The eret is a context synchronization barrier. HVBAR may not be updated 
until the eret, but we don't much care as it the hardened vector tables 
only matter when running in guest context.

> 
> 
>> +#endif
>>           pop {r0-r12}
>>           add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */
>>           clrex
>> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
>> index f1ea7f3c5b..0a138fa735 100644
>> --- a/xen/arch/arm/cpuerrata.c
>> +++ b/xen/arch/arm/cpuerrata.c
>> @@ -170,6 +170,36 @@ static int enable_psci_bp_hardening(void *data)
>>   
>>   #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */
>>   
>> +/* Hardening Branch predictor code for Arm32 */
>> +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
>> +
>> +/*
>> + * Per-CPU vector tables to use when returning to the guests. They will
>> + * only be used on platform requiring to harden the branch predictor.
>> + */
>> +DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs);
>> +
>> +extern char hyp_traps_vector_bp_inv[];
>> +
>> +static void __maybe_unused
>> +install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry,
>> +                          const char *hyp_vecs, const char *desc)
>> +{
>> +    /*
>> +     * Enable callbacks are called on every CPU based on the
>> +     * capabilities. So double-check whether the CPU matches the
>> +     * entry.
>> +     */
>> +    if ( !entry->matches(entry) )
>> +        return;
>> +
>> +    printk(XENLOG_INFO "CPU%u will %s on guest exit\n",
>> +           smp_processor_id(), desc);
>> +    this_cpu(bp_harden_vecs) = hyp_vecs;
>> +}
>> +
>> +#endif
>> +
>>   #define MIDR_RANGE(model, min, max)     \
>>       .matches = is_affected_midr_range,  \
>>       .midr_model = model,                \
>> -- 
>> 2.11.0
>>

Cheers,
Stefano Stabellini Jan. 25, 2018, 6:45 p.m. UTC | #3
On Thu, 25 Jan 2018, Julien Grall wrote:
> Hi Stefano,
> 
> On 24/01/18 23:54, Stefano Stabellini wrote:
> > On Fri, 19 Jan 2018, Julien Grall wrote:
> > > Aliasing attacked against CPU branch predictors can allow an attacker to
> > > redirect speculative control flow on some CPUs and potentially divulge
> > > information from one context to another.
> > > 
> > > This patch adds initiatial skeleton code behind a new Kconfig option
> > > to enable implementation-specific mitigations against these attacks
> > > for CPUs that are affected.
> > > 
> > > Most of mitigations will have to be applied when entering to the
> > > hypervisor from the guest context.
> > > 
> > > Because the attack is against branch predictor, it is not possible to
> > > safely use branch instruction before the mitigation is applied.
> > > Therefore this has to be done in the vector entry before jump to the
> > > helper handling a given exception.
> > > 
> > > However, on arm32, each vector contain a single instruction. This means
> > > that the hardened vector tables may rely on the state of registers that
> > > does not hold when in the hypervisor (e.g SP is 8 bytes aligned).
> > > Therefore hypervisor code running with guest vectors table should be
> > > minimized and always have interrupts masked to reduce the risk to use
> > > them.
> > > 
> > > This patch provides an infrastructure to switch vector tables before
> > > entering to the guest and when leaving it.
> > > 
> > > Note that alternative could have been used, but older Xen (4.8 or
> > > earlier) doesn't have support. So avoid using alternative to ease
> > > backporting.
> > > 
> > > This is part of XSA-254.
> > > 
> > > Signed-off-by: Julien Grall <julien.grall@linaro.org>
> > 
> > I only have a couple of questions. It looks good.
> > 
> > 
> > > ---
> > >   xen/arch/arm/Kconfig       |  3 +++
> > >   xen/arch/arm/arm32/entry.S | 41
> > > ++++++++++++++++++++++++++++++++++++++++-
> > >   xen/arch/arm/cpuerrata.c   | 30 ++++++++++++++++++++++++++++++
> > >   3 files changed, 73 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> > > index 06fd85cc77..2782ee6589 100644
> > > --- a/xen/arch/arm/Kconfig
> > > +++ b/xen/arch/arm/Kconfig
> > > @@ -191,6 +191,9 @@ config HARDEN_BRANCH_PREDICTOR
> > >   config ARM64_HARDEN_BRANCH_PREDICTOR
> > >       def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
> > >   +config ARM32_HARDEN_BRANCH_PREDICTOR
> > > +    def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR
> > > +
> > >   source "common/Kconfig"
> > >     source "drivers/Kconfig"
> > > diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
> > > index c2fad5fe9b..54a1733f87 100644
> > > --- a/xen/arch/arm/arm32/entry.S
> > > +++ b/xen/arch/arm/arm32/entry.S
> > > @@ -34,6 +34,20 @@
> > >           blne save_guest_regs
> > >     save_guest_regs:
> > > +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
> > > +        /*
> > > +         * Restore vectors table to the default as it may have been
> > > +         * changed when returning to the guest (see
> > > +         * return_to_hypervisor). We need to do that early (e.g before
> > > +         * any interrupts are unmasked) because hardened vectors requires
> > > +         * SP to be 8 bytes aligned. This does not hold when running in
> > > +         * the hypervisor.
> > > +         */
> > > +        ldr r1, =hyp_traps_vector
> > > +        mcr p15, 4, r1, c12, c0, 0
> > > +        isb
> > > +#endif
> > > +
> > >           ldr r11, =0xffffffff  /* Clobber SP which is only valid for
> > > hypervisor frames. */
> > >           str r11, [sp, #UREGS_sp]
> > >           SAVE_ONE_BANKED(SP_usr)
> > > @@ -179,12 +193,37 @@ return_to_guest:
> > >           RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
> > >           /* Fall thru */
> > >   return_to_hypervisor:
> > > -        cpsid i
> > > +        cpsid ai
> > 
> > Why?
> 
> Asynchronous abort is a kind of interrupt, as we are going to switch to the
> hardened vector tables you don't want an interrupt to come up.
> 
> This is because the hardened vector tables requires SP to be 8 bytes aligned.
> When in the hypervisor, and particularly when restoring the register (as
> below), this assumption does not hold.
> 
> So masking all interrupts (as mentioned a few time within the patch and the
> commit message) will reduce the risk to use the hardened vectors. I say reduce
> because you may have receive data abort (imagine an unlikely error in the few
> instructions to restore state).
> 
> It is also why switching vector tables are done very early in entry path and
> very late in the exit path.

All right, thanks for the explanation. Please add "and mask asynchronous
aborts" in the commit message.


> > 
> > 
> > >           ldr lr, [sp, #UREGS_lr]
> > >           ldr r11, [sp, #UREGS_pc]
> > >           msr ELR_hyp, r11
> > >           ldr r11, [sp, #UREGS_cpsr]
> > >           msr SPSR_hyp, r11
> > > +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
> > > +        /*
> > > +         * Hardening branch predictor may require to setup a different
> > > +         * vector tables before returning to the guests. Those vectors
> > > +         * may rely on the state of registers that does not hold when
> > > +         * running in the hypervisor (e.g SP is 8 bytes aligned). So
> > > setup
> > > +         * HVBAR very late.
> > > +         *
> > > +         * Default vectors table will be restored on exit (see
> > > +         * save_guest_regs).
> > > +         */
> > > +        mov r9, #0                      /* vector tables = NULL */
> > > +        /*
> > > +         * Load vector tables pointer from the per-cpu bp_harden_vecs
> > > +         * when returning to the guest only.
> > > +         */
> > > +        and r11, #PSR_MODE_MASK
> > > +        cmp r11, #PSR_MODE_HYP
> > > +        ldrne r11, =per_cpu__bp_harden_vecs
> > > +        mrcne p15, 4, r10, c13, c0, 2   /* r10 = per-cpu offset (HTPIDR)
> > > */
> > > +        addne r11, r11, r10             /* r11 = offset of the vector
> > > tables */
> > > +        ldrne r9, [r11]                 /* r9  = vector tables */
> > > +        cmp r9, #0                      /* Only update HVBAR when the
> > > vector */
> > > +        mcrne p15, 4, r9, c12, c0, 0    /* tables is not NULL. */
> > 
> > Do we need/want an isb here? Or maybe it is not necessary because we are
> > about to eret?
> The eret is a context synchronization barrier. HVBAR may not be updated until
> the eret, but we don't much care as it the hardened vector tables only matter
> when running in guest context.

OK, I understand. Please my

Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>


> > 
> > 
> > > +#endif
> > >           pop {r0-r12}
> > >           add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */
> > >           clrex
> > > diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> > > index f1ea7f3c5b..0a138fa735 100644
> > > --- a/xen/arch/arm/cpuerrata.c
> > > +++ b/xen/arch/arm/cpuerrata.c
> > > @@ -170,6 +170,36 @@ static int enable_psci_bp_hardening(void *data)
> > >     #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */
> > >   +/* Hardening Branch predictor code for Arm32 */
> > > +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
> > > +
> > > +/*
> > > + * Per-CPU vector tables to use when returning to the guests. They will
> > > + * only be used on platform requiring to harden the branch predictor.
> > > + */
> > > +DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs);
> > > +
> > > +extern char hyp_traps_vector_bp_inv[];
> > > +
> > > +static void __maybe_unused
> > > +install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry,
> > > +                          const char *hyp_vecs, const char *desc)
> > > +{
> > > +    /*
> > > +     * Enable callbacks are called on every CPU based on the
> > > +     * capabilities. So double-check whether the CPU matches the
> > > +     * entry.
> > > +     */
> > > +    if ( !entry->matches(entry) )
> > > +        return;
> > > +
> > > +    printk(XENLOG_INFO "CPU%u will %s on guest exit\n",
> > > +           smp_processor_id(), desc);
> > > +    this_cpu(bp_harden_vecs) = hyp_vecs;
> > > +}
> > > +
> > > +#endif
> > > +
> > >   #define MIDR_RANGE(model, min, max)     \
> > >       .matches = is_affected_midr_range,  \
> > >       .midr_model = model,                \
> > > -- 
> > > 2.11.0
> > >
Julien Grall Jan. 25, 2018, 6:50 p.m. UTC | #4
Hi,

On 25/01/18 18:45, Stefano Stabellini wrote:
> On Thu, 25 Jan 2018, Julien Grall wrote:
>> Hi Stefano,
>>
>> On 24/01/18 23:54, Stefano Stabellini wrote:
>>> On Fri, 19 Jan 2018, Julien Grall wrote:
>>>> Aliasing attacked against CPU branch predictors can allow an attacker to
>>>> redirect speculative control flow on some CPUs and potentially divulge
>>>> information from one context to another.
>>>>
>>>> This patch adds initiatial skeleton code behind a new Kconfig option
>>>> to enable implementation-specific mitigations against these attacks
>>>> for CPUs that are affected.
>>>>
>>>> Most of mitigations will have to be applied when entering to the
>>>> hypervisor from the guest context.
>>>>
>>>> Because the attack is against branch predictor, it is not possible to
>>>> safely use branch instruction before the mitigation is applied.
>>>> Therefore this has to be done in the vector entry before jump to the
>>>> helper handling a given exception.
>>>>
>>>> However, on arm32, each vector contain a single instruction. This means
>>>> that the hardened vector tables may rely on the state of registers that
>>>> does not hold when in the hypervisor (e.g SP is 8 bytes aligned).
>>>> Therefore hypervisor code running with guest vectors table should be
>>>> minimized and always have interrupts masked to reduce the risk to use
>>>> them.
>>>>
>>>> This patch provides an infrastructure to switch vector tables before
>>>> entering to the guest and when leaving it.
>>>>
>>>> Note that alternative could have been used, but older Xen (4.8 or
>>>> earlier) doesn't have support. So avoid using alternative to ease
>>>> backporting.
>>>>
>>>> This is part of XSA-254.
>>>>
>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>>
>>> I only have a couple of questions. It looks good.
>>>
>>>
>>>> ---
>>>>    xen/arch/arm/Kconfig       |  3 +++
>>>>    xen/arch/arm/arm32/entry.S | 41
>>>> ++++++++++++++++++++++++++++++++++++++++-
>>>>    xen/arch/arm/cpuerrata.c   | 30 ++++++++++++++++++++++++++++++
>>>>    3 files changed, 73 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
>>>> index 06fd85cc77..2782ee6589 100644
>>>> --- a/xen/arch/arm/Kconfig
>>>> +++ b/xen/arch/arm/Kconfig
>>>> @@ -191,6 +191,9 @@ config HARDEN_BRANCH_PREDICTOR
>>>>    config ARM64_HARDEN_BRANCH_PREDICTOR
>>>>        def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
>>>>    +config ARM32_HARDEN_BRANCH_PREDICTOR
>>>> +    def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR
>>>> +
>>>>    source "common/Kconfig"
>>>>      source "drivers/Kconfig"
>>>> diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
>>>> index c2fad5fe9b..54a1733f87 100644
>>>> --- a/xen/arch/arm/arm32/entry.S
>>>> +++ b/xen/arch/arm/arm32/entry.S
>>>> @@ -34,6 +34,20 @@
>>>>            blne save_guest_regs
>>>>      save_guest_regs:
>>>> +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
>>>> +        /*
>>>> +         * Restore vectors table to the default as it may have been
>>>> +         * changed when returning to the guest (see
>>>> +         * return_to_hypervisor). We need to do that early (e.g before
>>>> +         * any interrupts are unmasked) because hardened vectors requires
>>>> +         * SP to be 8 bytes aligned. This does not hold when running in
>>>> +         * the hypervisor.
>>>> +         */
>>>> +        ldr r1, =hyp_traps_vector
>>>> +        mcr p15, 4, r1, c12, c0, 0
>>>> +        isb
>>>> +#endif
>>>> +
>>>>            ldr r11, =0xffffffff  /* Clobber SP which is only valid for
>>>> hypervisor frames. */
>>>>            str r11, [sp, #UREGS_sp]
>>>>            SAVE_ONE_BANKED(SP_usr)
>>>> @@ -179,12 +193,37 @@ return_to_guest:
>>>>            RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
>>>>            /* Fall thru */
>>>>    return_to_hypervisor:
>>>> -        cpsid i
>>>> +        cpsid ai
>>>
>>> Why?
>>
>> Asynchronous abort is a kind of interrupt, as we are going to switch to the
>> hardened vector tables you don't want an interrupt to come up.
>>
>> This is because the hardened vector tables requires SP to be 8 bytes aligned.
>> When in the hypervisor, and particularly when restoring the register (as
>> below), this assumption does not hold.
>>
>> So masking all interrupts (as mentioned a few time within the patch and the
>> commit message) will reduce the risk to use the hardened vectors. I say reduce
>> because you may have receive data abort (imagine an unlikely error in the few
>> instructions to restore state).
>>
>> It is also why switching vector tables are done very early in entry path and
>> very late in the exit path.
> 
> All right, thanks for the explanation. Please add "and mask asynchronous
> aborts" in the commit message.

I am not surely what you exactly suggest here. The commit message 
currently contains:

"Therefore hypervisor code running with guest vectors table should be
minimized and always have interrupts masked to reduce the risk to use
them."

What are you suggesting?

Cheers,
Stefano Stabellini Jan. 25, 2018, 7:37 p.m. UTC | #5
On Thu, 25 Jan 2018, Julien Grall wrote:
> Hi,
> 
> On 25/01/18 18:45, Stefano Stabellini wrote:
> > On Thu, 25 Jan 2018, Julien Grall wrote:
> > > Hi Stefano,
> > > 
> > > On 24/01/18 23:54, Stefano Stabellini wrote:
> > > > On Fri, 19 Jan 2018, Julien Grall wrote:
> > > > > Aliasing attacked against CPU branch predictors can allow an attacker
> > > > > to
> > > > > redirect speculative control flow on some CPUs and potentially divulge
> > > > > information from one context to another.
> > > > > 
> > > > > This patch adds initiatial skeleton code behind a new Kconfig option
> > > > > to enable implementation-specific mitigations against these attacks
> > > > > for CPUs that are affected.
> > > > > 
> > > > > Most of mitigations will have to be applied when entering to the
> > > > > hypervisor from the guest context.
> > > > > 
> > > > > Because the attack is against branch predictor, it is not possible to
> > > > > safely use branch instruction before the mitigation is applied.
> > > > > Therefore this has to be done in the vector entry before jump to the
> > > > > helper handling a given exception.
> > > > > 
> > > > > However, on arm32, each vector contain a single instruction. This
> > > > > means
> > > > > that the hardened vector tables may rely on the state of registers
> > > > > that
> > > > > does not hold when in the hypervisor (e.g SP is 8 bytes aligned).
> > > > > Therefore hypervisor code running with guest vectors table should be
> > > > > minimized and always have interrupts masked to reduce the risk to use
> > > > > them.
> > > > > 
> > > > > This patch provides an infrastructure to switch vector tables before
> > > > > entering to the guest and when leaving it.
> > > > > 
> > > > > Note that alternative could have been used, but older Xen (4.8 or
> > > > > earlier) doesn't have support. So avoid using alternative to ease
> > > > > backporting.
> > > > > 
> > > > > This is part of XSA-254.
> > > > > 
> > > > > Signed-off-by: Julien Grall <julien.grall@linaro.org>
> > > > 
> > > > I only have a couple of questions. It looks good.
> > > > 
> > > > 
> > > > > ---
> > > > >    xen/arch/arm/Kconfig       |  3 +++
> > > > >    xen/arch/arm/arm32/entry.S | 41
> > > > > ++++++++++++++++++++++++++++++++++++++++-
> > > > >    xen/arch/arm/cpuerrata.c   | 30 ++++++++++++++++++++++++++++++
> > > > >    3 files changed, 73 insertions(+), 1 deletion(-)
> > > > > 
> > > > > diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
> > > > > index 06fd85cc77..2782ee6589 100644
> > > > > --- a/xen/arch/arm/Kconfig
> > > > > +++ b/xen/arch/arm/Kconfig
> > > > > @@ -191,6 +191,9 @@ config HARDEN_BRANCH_PREDICTOR
> > > > >    config ARM64_HARDEN_BRANCH_PREDICTOR
> > > > >        def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
> > > > >    +config ARM32_HARDEN_BRANCH_PREDICTOR
> > > > > +    def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR
> > > > > +
> > > > >    source "common/Kconfig"
> > > > >      source "drivers/Kconfig"
> > > > > diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
> > > > > index c2fad5fe9b..54a1733f87 100644
> > > > > --- a/xen/arch/arm/arm32/entry.S
> > > > > +++ b/xen/arch/arm/arm32/entry.S
> > > > > @@ -34,6 +34,20 @@
> > > > >            blne save_guest_regs
> > > > >      save_guest_regs:
> > > > > +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
> > > > > +        /*
> > > > > +         * Restore vectors table to the default as it may have been
> > > > > +         * changed when returning to the guest (see
> > > > > +         * return_to_hypervisor). We need to do that early (e.g
> > > > > before
> > > > > +         * any interrupts are unmasked) because hardened vectors
> > > > > requires
> > > > > +         * SP to be 8 bytes aligned. This does not hold when running
> > > > > in
> > > > > +         * the hypervisor.
> > > > > +         */
> > > > > +        ldr r1, =hyp_traps_vector
> > > > > +        mcr p15, 4, r1, c12, c0, 0
> > > > > +        isb
> > > > > +#endif
> > > > > +
> > > > >            ldr r11, =0xffffffff  /* Clobber SP which is only valid for
> > > > > hypervisor frames. */
> > > > >            str r11, [sp, #UREGS_sp]
> > > > >            SAVE_ONE_BANKED(SP_usr)
> > > > > @@ -179,12 +193,37 @@ return_to_guest:
> > > > >            RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
> > > > >            /* Fall thru */
> > > > >    return_to_hypervisor:
> > > > > -        cpsid i
> > > > > +        cpsid ai
> > > > 
> > > > Why?
> > > 
> > > Asynchronous abort is a kind of interrupt, as we are going to switch to
> > > the
> > > hardened vector tables you don't want an interrupt to come up.
> > > 
> > > This is because the hardened vector tables requires SP to be 8 bytes
> > > aligned.
> > > When in the hypervisor, and particularly when restoring the register (as
> > > below), this assumption does not hold.
> > > 
> > > So masking all interrupts (as mentioned a few time within the patch and
> > > the
> > > commit message) will reduce the risk to use the hardened vectors. I say
> > > reduce
> > > because you may have receive data abort (imagine an unlikely error in the
> > > few
> > > instructions to restore state).
> > > 
> > > It is also why switching vector tables are done very early in entry path
> > > and
> > > very late in the exit path.
> > 
> > All right, thanks for the explanation. Please add "and mask asynchronous
> > aborts" in the commit message.
> 
> I am not surely what you exactly suggest here. The commit message currently
> contains:
> 
> "Therefore hypervisor code running with guest vectors table should be
> minimized and always have interrupts masked to reduce the risk to use
> them."
> 
> What are you suggesting?

"Therefore hypervisor code running with guest vectors table should be
minimized and always have interrupts and async aborts masked to reduce
the risk to use them."

Do you think that it is clearer?
Julien Grall Jan. 26, 2018, 4:21 p.m. UTC | #6
Hi Stefano,

On 25/01/18 19:37, Stefano Stabellini wrote:
> On Thu, 25 Jan 2018, Julien Grall wrote:
>> Hi,
>>
>> On 25/01/18 18:45, Stefano Stabellini wrote:
>>> On Thu, 25 Jan 2018, Julien Grall wrote:
>>>> Hi Stefano,
>>>>
>>>> On 24/01/18 23:54, Stefano Stabellini wrote:
>>>>> On Fri, 19 Jan 2018, Julien Grall wrote:
>>>>>> Aliasing attacked against CPU branch predictors can allow an attacker
>>>>>> to
>>>>>> redirect speculative control flow on some CPUs and potentially divulge
>>>>>> information from one context to another.
>>>>>>
>>>>>> This patch adds initiatial skeleton code behind a new Kconfig option
>>>>>> to enable implementation-specific mitigations against these attacks
>>>>>> for CPUs that are affected.
>>>>>>
>>>>>> Most of mitigations will have to be applied when entering to the
>>>>>> hypervisor from the guest context.
>>>>>>
>>>>>> Because the attack is against branch predictor, it is not possible to
>>>>>> safely use branch instruction before the mitigation is applied.
>>>>>> Therefore this has to be done in the vector entry before jump to the
>>>>>> helper handling a given exception.
>>>>>>
>>>>>> However, on arm32, each vector contain a single instruction. This
>>>>>> means
>>>>>> that the hardened vector tables may rely on the state of registers
>>>>>> that
>>>>>> does not hold when in the hypervisor (e.g SP is 8 bytes aligned).
>>>>>> Therefore hypervisor code running with guest vectors table should be
>>>>>> minimized and always have interrupts masked to reduce the risk to use
>>>>>> them.
>>>>>>
>>>>>> This patch provides an infrastructure to switch vector tables before
>>>>>> entering to the guest and when leaving it.
>>>>>>
>>>>>> Note that alternative could have been used, but older Xen (4.8 or
>>>>>> earlier) doesn't have support. So avoid using alternative to ease
>>>>>> backporting.
>>>>>>
>>>>>> This is part of XSA-254.
>>>>>>
>>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>>>>
>>>>> I only have a couple of questions. It looks good.
>>>>>
>>>>>
>>>>>> ---
>>>>>>     xen/arch/arm/Kconfig       |  3 +++
>>>>>>     xen/arch/arm/arm32/entry.S | 41
>>>>>> ++++++++++++++++++++++++++++++++++++++++-
>>>>>>     xen/arch/arm/cpuerrata.c   | 30 ++++++++++++++++++++++++++++++
>>>>>>     3 files changed, 73 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
>>>>>> index 06fd85cc77..2782ee6589 100644
>>>>>> --- a/xen/arch/arm/Kconfig
>>>>>> +++ b/xen/arch/arm/Kconfig
>>>>>> @@ -191,6 +191,9 @@ config HARDEN_BRANCH_PREDICTOR
>>>>>>     config ARM64_HARDEN_BRANCH_PREDICTOR
>>>>>>         def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
>>>>>>     +config ARM32_HARDEN_BRANCH_PREDICTOR
>>>>>> +    def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR
>>>>>> +
>>>>>>     source "common/Kconfig"
>>>>>>       source "drivers/Kconfig"
>>>>>> diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
>>>>>> index c2fad5fe9b..54a1733f87 100644
>>>>>> --- a/xen/arch/arm/arm32/entry.S
>>>>>> +++ b/xen/arch/arm/arm32/entry.S
>>>>>> @@ -34,6 +34,20 @@
>>>>>>             blne save_guest_regs
>>>>>>       save_guest_regs:
>>>>>> +#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
>>>>>> +        /*
>>>>>> +         * Restore vectors table to the default as it may have been
>>>>>> +         * changed when returning to the guest (see
>>>>>> +         * return_to_hypervisor). We need to do that early (e.g
>>>>>> before
>>>>>> +         * any interrupts are unmasked) because hardened vectors
>>>>>> requires
>>>>>> +         * SP to be 8 bytes aligned. This does not hold when running
>>>>>> in
>>>>>> +         * the hypervisor.
>>>>>> +         */
>>>>>> +        ldr r1, =hyp_traps_vector
>>>>>> +        mcr p15, 4, r1, c12, c0, 0
>>>>>> +        isb
>>>>>> +#endif
>>>>>> +
>>>>>>             ldr r11, =0xffffffff  /* Clobber SP which is only valid for
>>>>>> hypervisor frames. */
>>>>>>             str r11, [sp, #UREGS_sp]
>>>>>>             SAVE_ONE_BANKED(SP_usr)
>>>>>> @@ -179,12 +193,37 @@ return_to_guest:
>>>>>>             RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
>>>>>>             /* Fall thru */
>>>>>>     return_to_hypervisor:
>>>>>> -        cpsid i
>>>>>> +        cpsid ai
>>>>>
>>>>> Why?
>>>>
>>>> Asynchronous abort is a kind of interrupt, as we are going to switch to
>>>> the
>>>> hardened vector tables you don't want an interrupt to come up.
>>>>
>>>> This is because the hardened vector tables requires SP to be 8 bytes
>>>> aligned.
>>>> When in the hypervisor, and particularly when restoring the register (as
>>>> below), this assumption does not hold.
>>>>
>>>> So masking all interrupts (as mentioned a few time within the patch and
>>>> the
>>>> commit message) will reduce the risk to use the hardened vectors. I say
>>>> reduce
>>>> because you may have receive data abort (imagine an unlikely error in the
>>>> few
>>>> instructions to restore state).
>>>>
>>>> It is also why switching vector tables are done very early in entry path
>>>> and
>>>> very late in the exit path.
>>>
>>> All right, thanks for the explanation. Please add "and mask asynchronous
>>> aborts" in the commit message.
>>
>> I am not surely what you exactly suggest here. The commit message currently
>> contains:
>>
>> "Therefore hypervisor code running with guest vectors table should be
>> minimized and always have interrupts masked to reduce the risk to use
>> them."
>>
>> What are you suggesting?
> 
> "Therefore hypervisor code running with guest vectors table should be
> minimized and always have interrupts and async aborts masked to reduce
> the risk to use them."
> 
> Do you think that it is clearer?

Well, that was covered by "interrupts". If you look at the Arm Arm, A, 
I, F are considered all interrupts.

Cheers,
Julien Grall Jan. 31, 2018, 3:29 p.m. UTC | #7
On 26/01/18 16:21, Julien Grall wrote:
>> "Therefore hypervisor code running with guest vectors table should be
>> minimized and always have interrupts and async aborts masked to reduce
>> the risk to use them."
>>
>> Do you think that it is clearer?
> 
> Well, that was covered by "interrupts". If you look at the Arm Arm, A, 
> I, F are considered all interrupts.

I reworked the paragraph and it is now:

"However, on arm32, each vector contain a single instruction. This means 
that the hardened vector tables may rely on the state of registers that 
does not hold when in the hypervisor (e.g SP is 8 bytes aligned). 
Therefore hypervisor code running with guest vectors table should be
minimized and always have IRQ and SError masked to reduce the risk to 
use them."

Cheers,
Stefano Stabellini Jan. 31, 2018, 4:40 p.m. UTC | #8
On Wed, 31 Jan 2018, Julien Grall wrote:
> On 26/01/18 16:21, Julien Grall wrote:
> > > "Therefore hypervisor code running with guest vectors table should be
> > > minimized and always have interrupts and async aborts masked to reduce
> > > the risk to use them."
> > > 
> > > Do you think that it is clearer?
> > 
> > Well, that was covered by "interrupts". If you look at the Arm Arm, A, I, F
> > are considered all interrupts.
> 
> I reworked the paragraph and it is now:
> 
> "However, on arm32, each vector contain a single instruction. This means that
> the hardened vector tables may rely on the state of registers that does not
> hold when in the hypervisor (e.g SP is 8 bytes aligned). Therefore hypervisor
> code running with guest vectors table should be
> minimized and always have IRQ and SError masked to reduce the risk to use
> them."

I think it's much better, thanks!
diff mbox series

Patch

diff --git a/xen/arch/arm/Kconfig b/xen/arch/arm/Kconfig
index 06fd85cc77..2782ee6589 100644
--- a/xen/arch/arm/Kconfig
+++ b/xen/arch/arm/Kconfig
@@ -191,6 +191,9 @@  config HARDEN_BRANCH_PREDICTOR
 config ARM64_HARDEN_BRANCH_PREDICTOR
     def_bool y if ARM_64 && HARDEN_BRANCH_PREDICTOR
 
+config ARM32_HARDEN_BRANCH_PREDICTOR
+    def_bool y if ARM_32 && HARDEN_BRANCH_PREDICTOR
+
 source "common/Kconfig"
 
 source "drivers/Kconfig"
diff --git a/xen/arch/arm/arm32/entry.S b/xen/arch/arm/arm32/entry.S
index c2fad5fe9b..54a1733f87 100644
--- a/xen/arch/arm/arm32/entry.S
+++ b/xen/arch/arm/arm32/entry.S
@@ -34,6 +34,20 @@ 
         blne save_guest_regs
 
 save_guest_regs:
+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
+        /*
+         * Restore vectors table to the default as it may have been
+         * changed when returning to the guest (see
+         * return_to_hypervisor). We need to do that early (e.g before
+         * any interrupts are unmasked) because hardened vectors requires
+         * SP to be 8 bytes aligned. This does not hold when running in
+         * the hypervisor.
+         */
+        ldr r1, =hyp_traps_vector
+        mcr p15, 4, r1, c12, c0, 0
+        isb
+#endif
+
         ldr r11, =0xffffffff  /* Clobber SP which is only valid for hypervisor frames. */
         str r11, [sp, #UREGS_sp]
         SAVE_ONE_BANKED(SP_usr)
@@ -179,12 +193,37 @@  return_to_guest:
         RESTORE_ONE_BANKED(R11_fiq); RESTORE_ONE_BANKED(R12_fiq);
         /* Fall thru */
 return_to_hypervisor:
-        cpsid i
+        cpsid ai
         ldr lr, [sp, #UREGS_lr]
         ldr r11, [sp, #UREGS_pc]
         msr ELR_hyp, r11
         ldr r11, [sp, #UREGS_cpsr]
         msr SPSR_hyp, r11
+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
+        /*
+         * Hardening branch predictor may require to setup a different
+         * vector tables before returning to the guests. Those vectors
+         * may rely on the state of registers that does not hold when
+         * running in the hypervisor (e.g SP is 8 bytes aligned). So setup
+         * HVBAR very late.
+         *
+         * Default vectors table will be restored on exit (see
+         * save_guest_regs).
+         */
+        mov r9, #0                      /* vector tables = NULL */
+        /*
+         * Load vector tables pointer from the per-cpu bp_harden_vecs
+         * when returning to the guest only.
+         */
+        and r11, #PSR_MODE_MASK
+        cmp r11, #PSR_MODE_HYP
+        ldrne r11, =per_cpu__bp_harden_vecs
+        mrcne p15, 4, r10, c13, c0, 2   /* r10 = per-cpu offset (HTPIDR) */
+        addne r11, r11, r10             /* r11 = offset of the vector tables */
+        ldrne r9, [r11]                 /* r9  = vector tables */
+        cmp r9, #0                      /* Only update HVBAR when the vector */
+        mcrne p15, 4, r9, c12, c0, 0    /* tables is not NULL. */
+#endif
         pop {r0-r12}
         add sp, #(UREGS_SP_usr - UREGS_sp); /* SP, LR, SPSR, PC */
         clrex
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index f1ea7f3c5b..0a138fa735 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -170,6 +170,36 @@  static int enable_psci_bp_hardening(void *data)
 
 #endif /* CONFIG_ARM64_HARDEN_BRANCH_PREDICTOR */
 
+/* Hardening Branch predictor code for Arm32 */
+#ifdef CONFIG_ARM32_HARDEN_BRANCH_PREDICTOR
+
+/*
+ * Per-CPU vector tables to use when returning to the guests. They will
+ * only be used on platform requiring to harden the branch predictor.
+ */
+DEFINE_PER_CPU_READ_MOSTLY(const char *, bp_harden_vecs);
+
+extern char hyp_traps_vector_bp_inv[];
+
+static void __maybe_unused
+install_bp_hardening_vecs(const struct arm_cpu_capabilities *entry,
+                          const char *hyp_vecs, const char *desc)
+{
+    /*
+     * Enable callbacks are called on every CPU based on the
+     * capabilities. So double-check whether the CPU matches the
+     * entry.
+     */
+    if ( !entry->matches(entry) )
+        return;
+
+    printk(XENLOG_INFO "CPU%u will %s on guest exit\n",
+           smp_processor_id(), desc);
+    this_cpu(bp_harden_vecs) = hyp_vecs;
+}
+
+#endif
+
 #define MIDR_RANGE(model, min, max)     \
     .matches = is_affected_midr_range,  \
     .midr_model = model,                \