[Xen-devel,10/13] xen/arm64: Implement a fast path for handling SMCCC_ARCH_WORKAROUND_2

Message ID 20180522174254.27551-11-julien.grall@arm.com
State Superseded
Headers show
Series
  • xen/arm: SSBD (aka Spectre-v4) mitigation (XSA-263)
Related show

Commit Message

Julien Grall May 22, 2018, 5:42 p.m.
The function ARM_SMCCC_ARCH_WORKAROUND_2 will be called by the guest for
enabling/disabling the ssbd mitigation. So we want the handling to
be as fast as possible.

The new sequence will forward guest's ARCH_WORKAROUND_2 call to EL3 and
also track the state of the workaround per-vCPU.

Note that since we need to execute branches, this always executes after
the spectre-v2 mitigation.

This code is based on KVM counterpart "arm64: KVM: Handle guest's
ARCH_WORKAROUND_2 requests" written by Marc Zyngier.

This is part of XSA-263.

Signed-off-by: Julien Grall <julien.grall@arm.com>
---
 xen/arch/arm/arm64/asm-offsets.c |  2 ++
 xen/arch/arm/arm64/entry.S       | 43 +++++++++++++++++++++++++++++++++++++++-
 xen/arch/arm/cpuerrata.c         | 18 +++++++++++++++++
 3 files changed, 62 insertions(+), 1 deletion(-)

Comments

Stefano Stabellini May 25, 2018, 7:18 p.m. | #1
On Tue, 22 May 2018, Julien Grall wrote:
> The function ARM_SMCCC_ARCH_WORKAROUND_2 will be called by the guest for
> enabling/disabling the ssbd mitigation. So we want the handling to
> be as fast as possible.
> 
> The new sequence will forward guest's ARCH_WORKAROUND_2 call to EL3 and
> also track the state of the workaround per-vCPU.
> 
> Note that since we need to execute branches, this always executes after
> the spectre-v2 mitigation.
> 
> This code is based on KVM counterpart "arm64: KVM: Handle guest's
> ARCH_WORKAROUND_2 requests" written by Marc Zyngier.
> 
> This is part of XSA-263.
> 
> Signed-off-by: Julien Grall <julien.grall@arm.com>

I think the patch works as intended.


> ---
>  xen/arch/arm/arm64/asm-offsets.c |  2 ++
>  xen/arch/arm/arm64/entry.S       | 43 +++++++++++++++++++++++++++++++++++++++-
>  xen/arch/arm/cpuerrata.c         | 18 +++++++++++++++++
>  3 files changed, 62 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/arm/arm64/asm-offsets.c b/xen/arch/arm/arm64/asm-offsets.c
> index ce24e44473..f5c696d092 100644
> --- a/xen/arch/arm/arm64/asm-offsets.c
> +++ b/xen/arch/arm/arm64/asm-offsets.c
> @@ -22,6 +22,7 @@
>  void __dummy__(void)
>  {
>     OFFSET(UREGS_X0, struct cpu_user_regs, x0);
> +   OFFSET(UREGS_X1, struct cpu_user_regs, x1);
>     OFFSET(UREGS_LR, struct cpu_user_regs, lr);
>  
>     OFFSET(UREGS_SP, struct cpu_user_regs, sp);
> @@ -45,6 +46,7 @@ void __dummy__(void)
>     BLANK();
>  
>     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
> +   OFFSET(CPUINFO_flags, struct cpu_info, flags);
>  
>     OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context);
>  
> diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
> index e2344e565f..8e25ff3997 100644
> --- a/xen/arch/arm/arm64/entry.S
> +++ b/xen/arch/arm/arm64/entry.S
> @@ -1,4 +1,6 @@
>  #include <asm/asm_defns.h>
> +#include <asm/current.h>
> +#include <asm/macros.h>
>  #include <asm/regs.h>
>  #include <asm/alternative.h>
>  #include <asm/smccc.h>
> @@ -241,7 +243,7 @@ guest_sync:
>           * be encoded as an immediate for cmp.
>           */
>          eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
> -        cbnz    w0, guest_sync_slowpath
> +        cbnz    w0, check_wa2
>  
>          /*
>           * Clobber both x0 and x1 to prevent leakage. Note that thanks
> @@ -250,6 +252,45 @@ guest_sync:
>          mov     x1, xzr
>          eret
>  
> +check_wa2:
> +        /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
> +        eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID

We come to check_wa2 after checking on #ARM_SMCCC_ARCH_WORKAROUND_1_FID,
so maybe we can skip this?


> +        eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID
> +        cbnz    w0, guest_sync_slowpath
> +#ifdef CONFIG_ARM_SSBD
> +alternative_cb arm_enable_wa2_handling
> +        b       wa2_end
> +alternative_cb_end
> +        /* Sanitize the argument */
> +        mov     x0, #-(UREGS_kernel_sizeof - UREGS_X1)  /* x0 := offset of guest's x1 on the stack */
> +        ldr     x1, [sp, x0]                            /* Load guest's x1 */
> +        cmp     w1, wzr
> +        cset    x1, ne
> +
> +        /*
> +         * Update the guest flag. At this stage sp point after the field
> +         * guest_cpu_user_regs in cpu_info.
> +         */
> +        adr_cpu_info x2
> +        ldr     x0, [x2, #CPUINFO_flags]
> +        bfi     x0, x1, #CPUINFO_WORKAROUND_2_FLAG_SHIFT, #1
> +        str     x0, [x2, #CPUINFO_flags]
> +
> +        /* Check that we actually need to perform the call */
> +        ldr_this_cpu x0, ssbd_callback_required, x2
> +        cbz     x0, wa2_end
> +        mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID
> +        smc     #0

Shouldn't we make the call only if get_cpu_info()->flags changed?


> +wa2_end:
> +        /* Don't leak data from the SMC call */
> +        mov     x1, xzr
> +        mov     x2, xzr
> +        mov     x3, xzr
> +#endif /* !CONFIG_ARM_SSBD */
> +        mov     x0, xzr
> +        eret
>  guest_sync_slowpath:
>          /*
>           * x0/x1 may have been scratch by the fast path above, so avoid
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index f921721a66..54df4ff445 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -7,6 +7,7 @@
>  #include <xen/warning.h>
>  #include <asm/cpufeature.h>
>  #include <asm/cpuerrata.h>
> +#include <asm/insn.h>
>  #include <asm/psci.h>
>  
>  /* Override macros from asm/page.h to make them work with mfn_t */
> @@ -272,6 +273,23 @@ static int __init parse_spec_ctrl(const char *s)
>  }
>  custom_param("spec-ctrl", parse_spec_ctrl);
>  
> +/* Arm64 only for now as for Arm32 the workaround is currently handled in C. */
> +#ifdef CONFIG_ARM_64
> +void __init arm_enable_wa2_handling(const struct alt_instr *alt,
> +                                    const uint32_t *origptr,
> +                                    uint32_t *updptr, int nr_inst)
> +{
> +    BUG_ON(nr_inst != 1);
> +
> +    /*
> +     * Only allow mitigation on guest ARCH_WORKAROUND_2 if the SSBD
> +     * state allow it to be flipped.
> +     */
> +    if ( get_ssbd_state() == ARM_SSBD_RUNTIME )
> +        *updptr = aarch64_insn_gen_nop();
> +}
> +#endif
> +
>  /*
>   * Assembly code may use the variable directly, so we need to make sure
>   * it fits in a register.
> -- 
> 2.11.0
>
Julien Grall May 29, 2018, 12:16 p.m. | #2
Hi Stefano,

On 25/05/18 20:18, Stefano Stabellini wrote:
> On Tue, 22 May 2018, Julien Grall wrote:
>> The function ARM_SMCCC_ARCH_WORKAROUND_2 will be called by the guest for
>> enabling/disabling the ssbd mitigation. So we want the handling to
>> be as fast as possible.
>>
>> The new sequence will forward guest's ARCH_WORKAROUND_2 call to EL3 and
>> also track the state of the workaround per-vCPU.
>>
>> Note that since we need to execute branches, this always executes after
>> the spectre-v2 mitigation.
>>
>> This code is based on KVM counterpart "arm64: KVM: Handle guest's
>> ARCH_WORKAROUND_2 requests" written by Marc Zyngier.
>>
>> This is part of XSA-263.
>>
>> Signed-off-by: Julien Grall <julien.grall@arm.com>
> 
> I think the patch works as intended.
> 
> 
>> ---
>>   xen/arch/arm/arm64/asm-offsets.c |  2 ++
>>   xen/arch/arm/arm64/entry.S       | 43 +++++++++++++++++++++++++++++++++++++++-
>>   xen/arch/arm/cpuerrata.c         | 18 +++++++++++++++++
>>   3 files changed, 62 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/arch/arm/arm64/asm-offsets.c b/xen/arch/arm/arm64/asm-offsets.c
>> index ce24e44473..f5c696d092 100644
>> --- a/xen/arch/arm/arm64/asm-offsets.c
>> +++ b/xen/arch/arm/arm64/asm-offsets.c
>> @@ -22,6 +22,7 @@
>>   void __dummy__(void)
>>   {
>>      OFFSET(UREGS_X0, struct cpu_user_regs, x0);
>> +   OFFSET(UREGS_X1, struct cpu_user_regs, x1);
>>      OFFSET(UREGS_LR, struct cpu_user_regs, lr);
>>   
>>      OFFSET(UREGS_SP, struct cpu_user_regs, sp);
>> @@ -45,6 +46,7 @@ void __dummy__(void)
>>      BLANK();
>>   
>>      DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
>> +   OFFSET(CPUINFO_flags, struct cpu_info, flags);
>>   
>>      OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context);
>>   
>> diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
>> index e2344e565f..8e25ff3997 100644
>> --- a/xen/arch/arm/arm64/entry.S
>> +++ b/xen/arch/arm/arm64/entry.S
>> @@ -1,4 +1,6 @@
>>   #include <asm/asm_defns.h>
>> +#include <asm/current.h>
>> +#include <asm/macros.h>
>>   #include <asm/regs.h>
>>   #include <asm/alternative.h>
>>   #include <asm/smccc.h>
>> @@ -241,7 +243,7 @@ guest_sync:
>>            * be encoded as an immediate for cmp.
>>            */
>>           eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
>> -        cbnz    w0, guest_sync_slowpath
>> +        cbnz    w0, check_wa2
>>   
>>           /*
>>            * Clobber both x0 and x1 to prevent leakage. Note that thanks
>> @@ -250,6 +252,45 @@ guest_sync:
>>           mov     x1, xzr
>>           eret
>>   
>> +check_wa2:
>> +        /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
>> +        eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
> 
> We come to check_wa2 after checking on #ARM_SMCCC_ARCH_WORKAROUND_1_FID,
> so maybe we can skip this?

This is necessary. w0 contains "guest x0" xor 
"ARM_SMCCC_ARCH_WORKAROUND_1_FID". So we first need to revert back the 
xor to get "guest x0".

Note, it would be possible to combine the 2 xor. Something like:

eor	w0, w0, #(ARM_SMCCC_ARCH_WORKAROUND_1_FID ^ 
ARM_SMCCC_ARCH_WORKAROUND_2_FID).

Which version do you prefer?

> 
> 
>> +        eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID
>> +        cbnz    w0, guest_sync_slowpath
>> +#ifdef CONFIG_ARM_SSBD
>> +alternative_cb arm_enable_wa2_handling
>> +        b       wa2_end
>> +alternative_cb_end
>> +        /* Sanitize the argument */
>> +        mov     x0, #-(UREGS_kernel_sizeof - UREGS_X1)  /* x0 := offset of guest's x1 on the stack */
>> +        ldr     x1, [sp, x0]                            /* Load guest's x1 */
>> +        cmp     w1, wzr
>> +        cset    x1, ne
>> +
>> +        /*
>> +         * Update the guest flag. At this stage sp point after the field
>> +         * guest_cpu_user_regs in cpu_info.
>> +         */
>> +        adr_cpu_info x2
>> +        ldr     x0, [x2, #CPUINFO_flags]
>> +        bfi     x0, x1, #CPUINFO_WORKAROUND_2_FLAG_SHIFT, #1
>> +        str     x0, [x2, #CPUINFO_flags]
>> +
>> +        /* Check that we actually need to perform the call */
>> +        ldr_this_cpu x0, ssbd_callback_required, x2
>> +        cbz     x0, wa2_end
>> +        mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID
>> +        smc     #0
> 
> Shouldn't we make the call only if get_cpu_info()->flags changed?

There are no harm to call ARCH_WORKAROUND_2 if the flag didn't changed. 
However the guest should already avoid to do the call when it is not 
necessary. So that's not a common case that we should care.

Cheers,
Stefano Stabellini May 29, 2018, 9:39 p.m. | #3
On Tue, 29 May 2018, Julien Grall wrote:
> Hi Stefano,
> 
> On 25/05/18 20:18, Stefano Stabellini wrote:
> > On Tue, 22 May 2018, Julien Grall wrote:
> > > The function ARM_SMCCC_ARCH_WORKAROUND_2 will be called by the guest for
> > > enabling/disabling the ssbd mitigation. So we want the handling to
> > > be as fast as possible.
> > > 
> > > The new sequence will forward guest's ARCH_WORKAROUND_2 call to EL3 and
> > > also track the state of the workaround per-vCPU.
> > > 
> > > Note that since we need to execute branches, this always executes after
> > > the spectre-v2 mitigation.
> > > 
> > > This code is based on KVM counterpart "arm64: KVM: Handle guest's
> > > ARCH_WORKAROUND_2 requests" written by Marc Zyngier.
> > > 
> > > This is part of XSA-263.
> > > 
> > > Signed-off-by: Julien Grall <julien.grall@arm.com>
> > 
> > I think the patch works as intended.
> > 
> > 
> > > ---
> > >   xen/arch/arm/arm64/asm-offsets.c |  2 ++
> > >   xen/arch/arm/arm64/entry.S       | 43
> > > +++++++++++++++++++++++++++++++++++++++-
> > >   xen/arch/arm/cpuerrata.c         | 18 +++++++++++++++++
> > >   3 files changed, 62 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/xen/arch/arm/arm64/asm-offsets.c
> > > b/xen/arch/arm/arm64/asm-offsets.c
> > > index ce24e44473..f5c696d092 100644
> > > --- a/xen/arch/arm/arm64/asm-offsets.c
> > > +++ b/xen/arch/arm/arm64/asm-offsets.c
> > > @@ -22,6 +22,7 @@
> > >   void __dummy__(void)
> > >   {
> > >      OFFSET(UREGS_X0, struct cpu_user_regs, x0);
> > > +   OFFSET(UREGS_X1, struct cpu_user_regs, x1);
> > >      OFFSET(UREGS_LR, struct cpu_user_regs, lr);
> > >        OFFSET(UREGS_SP, struct cpu_user_regs, sp);
> > > @@ -45,6 +46,7 @@ void __dummy__(void)
> > >      BLANK();
> > >        DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
> > > +   OFFSET(CPUINFO_flags, struct cpu_info, flags);
> > >        OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context);
> > >   diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
> > > index e2344e565f..8e25ff3997 100644
> > > --- a/xen/arch/arm/arm64/entry.S
> > > +++ b/xen/arch/arm/arm64/entry.S
> > > @@ -1,4 +1,6 @@
> > >   #include <asm/asm_defns.h>
> > > +#include <asm/current.h>
> > > +#include <asm/macros.h>
> > >   #include <asm/regs.h>
> > >   #include <asm/alternative.h>
> > >   #include <asm/smccc.h>
> > > @@ -241,7 +243,7 @@ guest_sync:
> > >            * be encoded as an immediate for cmp.
> > >            */
> > >           eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
> > > -        cbnz    w0, guest_sync_slowpath
> > > +        cbnz    w0, check_wa2
> > >             /*
> > >            * Clobber both x0 and x1 to prevent leakage. Note that thanks
> > > @@ -250,6 +252,45 @@ guest_sync:
> > >           mov     x1, xzr
> > >           eret
> > >   +check_wa2:
> > > +        /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
> > > +        eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
> > 
> > We come to check_wa2 after checking on #ARM_SMCCC_ARCH_WORKAROUND_1_FID,
> > so maybe we can skip this?
> 
> This is necessary. w0 contains "guest x0" xor
> "ARM_SMCCC_ARCH_WORKAROUND_1_FID". So we first need to revert back the xor to
> get "guest x0".
> 
> Note, it would be possible to combine the 2 xor. Something like:
> 
> eor	w0, w0, #(ARM_SMCCC_ARCH_WORKAROUND_1_FID ^
> ARM_SMCCC_ARCH_WORKAROUND_2_FID).
> 
> Which version do you prefer?

I understand now. Let's combine the two xor.


> > > +        eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID
> > > +        cbnz    w0, guest_sync_slowpath
> > > +#ifdef CONFIG_ARM_SSBD
> > > +alternative_cb arm_enable_wa2_handling
> > > +        b       wa2_end
> > > +alternative_cb_end
> > > +        /* Sanitize the argument */
> > > +        mov     x0, #-(UREGS_kernel_sizeof - UREGS_X1)  /* x0 := offset
> > > of guest's x1 on the stack */
> > > +        ldr     x1, [sp, x0]                            /* Load guest's
> > > x1 */
> > > +        cmp     w1, wzr
> > > +        cset    x1, ne
> > > +
> > > +        /*
> > > +         * Update the guest flag. At this stage sp point after the field
> > > +         * guest_cpu_user_regs in cpu_info.
> > > +         */
> > > +        adr_cpu_info x2
> > > +        ldr     x0, [x2, #CPUINFO_flags]
> > > +        bfi     x0, x1, #CPUINFO_WORKAROUND_2_FLAG_SHIFT, #1
> > > +        str     x0, [x2, #CPUINFO_flags]
> > > +
> > > +        /* Check that we actually need to perform the call */
> > > +        ldr_this_cpu x0, ssbd_callback_required, x2
> > > +        cbz     x0, wa2_end
> > > +        mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID
> > > +        smc     #0
> > 
> > Shouldn't we make the call only if get_cpu_info()->flags changed?
> 
> There are no harm to call ARCH_WORKAROUND_2 if the flag didn't changed.
> However the guest should already avoid to do the call when it is not
> necessary. So that's not a common case that we should care.

All right

Patch

diff --git a/xen/arch/arm/arm64/asm-offsets.c b/xen/arch/arm/arm64/asm-offsets.c
index ce24e44473..f5c696d092 100644
--- a/xen/arch/arm/arm64/asm-offsets.c
+++ b/xen/arch/arm/arm64/asm-offsets.c
@@ -22,6 +22,7 @@ 
 void __dummy__(void)
 {
    OFFSET(UREGS_X0, struct cpu_user_regs, x0);
+   OFFSET(UREGS_X1, struct cpu_user_regs, x1);
    OFFSET(UREGS_LR, struct cpu_user_regs, lr);
 
    OFFSET(UREGS_SP, struct cpu_user_regs, sp);
@@ -45,6 +46,7 @@  void __dummy__(void)
    BLANK();
 
    DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
+   OFFSET(CPUINFO_flags, struct cpu_info, flags);
 
    OFFSET(VCPU_arch_saved_context, struct vcpu, arch.saved_context);
 
diff --git a/xen/arch/arm/arm64/entry.S b/xen/arch/arm/arm64/entry.S
index e2344e565f..8e25ff3997 100644
--- a/xen/arch/arm/arm64/entry.S
+++ b/xen/arch/arm/arm64/entry.S
@@ -1,4 +1,6 @@ 
 #include <asm/asm_defns.h>
+#include <asm/current.h>
+#include <asm/macros.h>
 #include <asm/regs.h>
 #include <asm/alternative.h>
 #include <asm/smccc.h>
@@ -241,7 +243,7 @@  guest_sync:
          * be encoded as an immediate for cmp.
          */
         eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
-        cbnz    w0, guest_sync_slowpath
+        cbnz    w0, check_wa2
 
         /*
          * Clobber both x0 and x1 to prevent leakage. Note that thanks
@@ -250,6 +252,45 @@  guest_sync:
         mov     x1, xzr
         eret
 
+check_wa2:
+        /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
+        eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
+        eor     w0, w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID
+        cbnz    w0, guest_sync_slowpath
+#ifdef CONFIG_ARM_SSBD
+alternative_cb arm_enable_wa2_handling
+        b       wa2_end
+alternative_cb_end
+        /* Sanitize the argument */
+        mov     x0, #-(UREGS_kernel_sizeof - UREGS_X1)  /* x0 := offset of guest's x1 on the stack */
+        ldr     x1, [sp, x0]                            /* Load guest's x1 */
+        cmp     w1, wzr
+        cset    x1, ne
+
+        /*
+         * Update the guest flag. At this stage sp point after the field
+         * guest_cpu_user_regs in cpu_info.
+         */
+        adr_cpu_info x2
+        ldr     x0, [x2, #CPUINFO_flags]
+        bfi     x0, x1, #CPUINFO_WORKAROUND_2_FLAG_SHIFT, #1
+        str     x0, [x2, #CPUINFO_flags]
+
+        /* Check that we actually need to perform the call */
+        ldr_this_cpu x0, ssbd_callback_required, x2
+        cbz     x0, wa2_end
+
+        mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_2_FID
+        smc     #0
+
+wa2_end:
+        /* Don't leak data from the SMC call */
+        mov     x1, xzr
+        mov     x2, xzr
+        mov     x3, xzr
+#endif /* !CONFIG_ARM_SSBD */
+        mov     x0, xzr
+        eret
 guest_sync_slowpath:
         /*
          * x0/x1 may have been scratch by the fast path above, so avoid
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index f921721a66..54df4ff445 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -7,6 +7,7 @@ 
 #include <xen/warning.h>
 #include <asm/cpufeature.h>
 #include <asm/cpuerrata.h>
+#include <asm/insn.h>
 #include <asm/psci.h>
 
 /* Override macros from asm/page.h to make them work with mfn_t */
@@ -272,6 +273,23 @@  static int __init parse_spec_ctrl(const char *s)
 }
 custom_param("spec-ctrl", parse_spec_ctrl);
 
+/* Arm64 only for now as for Arm32 the workaround is currently handled in C. */
+#ifdef CONFIG_ARM_64
+void __init arm_enable_wa2_handling(const struct alt_instr *alt,
+                                    const uint32_t *origptr,
+                                    uint32_t *updptr, int nr_inst)
+{
+    BUG_ON(nr_inst != 1);
+
+    /*
+     * Only allow mitigation on guest ARCH_WORKAROUND_2 if the SSBD
+     * state allow it to be flipped.
+     */
+    if ( get_ssbd_state() == ARM_SSBD_RUNTIME )
+        *updptr = aarch64_insn_gen_nop();
+}
+#endif
+
 /*
  * Assembly code may use the variable directly, so we need to make sure
  * it fits in a register.