[Xen-devel,05/13] xen/arm: Add command line option to control SSBD mitigation

Message ID 20180522174254.27551-6-julien.grall@arm.com
State New
Headers show
Series
  • xen/arm: SSBD (aka Spectre-v4) mitigation (XSA-263)
Related show

Commit Message

Julien Grall May 22, 2018, 5:42 p.m.
On a system where the firmware implements ARCH_WORKAROUND_2, it may be
useful to either permanently enable or disable the workaround for cases
where the user decides that they'd rather not get a trap overhead, and
keep the mitigation permanently on or off instead of switching it on
exception entry/exit.

In any case, default to mitigation being enabled.

At the same time provide a accessor to know the state of the mitigation.

SIgned-off-by: Julien Grall <julien.grall@arm.com>
---
 docs/misc/xen-command-line.markdown |  18 ++++++
 xen/arch/arm/cpuerrata.c            | 115 ++++++++++++++++++++++++++++++++----
 xen/include/asm-arm/cpuerrata.h     |  21 +++++++
 xen/include/asm-arm/smccc.h         |   1 +
 4 files changed, 144 insertions(+), 11 deletions(-)

Comments

Stefano Stabellini May 23, 2018, 10:34 p.m. | #1
On Tue, 22 May 2018, Julien Grall wrote:
> On a system where the firmware implements ARCH_WORKAROUND_2, it may be
> useful to either permanently enable or disable the workaround for cases
> where the user decides that they'd rather not get a trap overhead, and
> keep the mitigation permanently on or off instead of switching it on
> exception entry/exit.
> 
> In any case, default to mitigation being enabled.
> 
> At the same time provide a accessor to know the state of the mitigation.
> 
> SIgned-off-by: Julien Grall <julien.grall@arm.com>
> ---
>  docs/misc/xen-command-line.markdown |  18 ++++++
>  xen/arch/arm/cpuerrata.c            | 115 ++++++++++++++++++++++++++++++++----
>  xen/include/asm-arm/cpuerrata.h     |  21 +++++++
>  xen/include/asm-arm/smccc.h         |   1 +
>  4 files changed, 144 insertions(+), 11 deletions(-)
> 
> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
> index 8712a833a2..962028b6ed 100644
> --- a/docs/misc/xen-command-line.markdown
> +++ b/docs/misc/xen-command-line.markdown
> @@ -1756,6 +1756,24 @@ enforces the maximum theoretically necessary timeout of 670ms. Any number
>  is being interpreted as a custom timeout in milliseconds. Zero or boolean
>  false disable the quirk workaround, which is also the default.
>  
> +### spec-ctrl (Arm)
> +> `= List of [ ssbd=force-disable|runtime|force-enable ]`

Why a list? Shouldn't it be one or the other?

> +Controls for speculative execution sidechannel mitigations.
> +
> +The option `ssbd=` is used to control the state of Speculative Store
> +Bypass Disable (SSBD) mitigation.
> +
> +* `ssbd=force-disable` will keep the mitigation permanently off. The guest
> +will not be able to control the state of the mitigation.
> +* `ssbd=runtime` will always turn on the mitigation when running in the
> +hypervisor context. The guest will be to turn on/off the mitigation for
> +itself by using the firmware interface ARCH\_WORKAROUND\_2.
> +* `ssbd=force-enable` will keep the mitigation permanently on. The guest will
> +not be able to control the state of the mitigation.
> +
> +By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
> +
>  ### spec-ctrl (x86)
>  > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
>  >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd}=<bool> ]`
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index bcea2eb6e5..f921721a66 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -237,6 +237,41 @@ static int enable_ic_inv_hardening(void *data)
>  
>  #ifdef CONFIG_ARM_SSBD
>  
> +enum ssbd_state ssbd_state = ARM_SSBD_RUNTIME;
> +
> +static int __init parse_spec_ctrl(const char *s)
> +{
> +    const char *ss;
> +    int rc = 0;
> +
> +    do {
> +        ss = strchr(s, ',');
> +        if ( !ss )
> +            ss = strchr(s, '\0');

It doesn't look like it is necessary to parse ',' at all. I would remove
the while loop too.


> +        if ( !strncmp(s, "ssbd=", 5) )
> +        {
> +            s += 5;
> +
> +            if ( !strncmp(s, "force-disable", ss - s) )
> +                ssbd_state = ARM_SSBD_FORCE_DISABLE;
> +            else if ( !strncmp(s, "runtime", ss - s) )
> +                ssbd_state = ARM_SSBD_RUNTIME;
> +            else if ( !strncmp(s, "force-enable", ss - s) )
> +                ssbd_state = ARM_SSBD_FORCE_ENABLE;
> +            else
> +                rc = -EINVAL;
> +        }
> +        else
> +            rc = -EINVAL;
> +
> +        s = ss + 1;
> +    } while ( *ss );
> +
> +    return rc;
> +}
> +custom_param("spec-ctrl", parse_spec_ctrl);
> +
>  /*
>   * Assembly code may use the variable directly, so we need to make sure
>   * it fits in a register.
> @@ -246,25 +281,82 @@ DEFINE_PER_CPU_READ_MOSTLY(register_t, ssbd_callback_required);
>  static bool has_ssbd_mitigation(const struct arm_cpu_capabilities *entry)
>  {
>      struct arm_smccc_res res;
> -    bool supported = true;
> +    bool required = true;

Please avoid this renaming. Choose one name or the other from the start.


>      if ( smccc_ver < SMCCC_VERSION(1, 1) )
>          return false;
>  
> -    /*
> -     * The probe function return value is either negative (unsupported
> -     * or mitigated), positive (unaffected), or zero (requires
> -     * mitigation). We only need to do anything in the last case.
> -     */

I would keep the comment


>      arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
>                        ARM_SMCCC_ARCH_WORKAROUND_2_FID, &res);
> -    if ( (int)res.a0 != 0 )
> -        supported = false;
>  
> -    if ( supported )
> -        this_cpu(ssbd_callback_required) = 1;
> +    switch ( (int)res.a0 )

Please introduce this switch in the previous patch. But it makes sense
to add the ssbd_state variable in this patch.


> +    {
> +    case ARM_SMCCC_NOT_SUPPORTED:
> +        ssbd_state = ARM_SSBD_UNKNOWN;
> +        return false;
> +
> +    case ARM_SMCCC_NOT_REQUIRED:
> +        ssbd_state = ARM_SSBD_MITIGATED;
> +        return false;
> +
> +    case ARM_SMCCC_SUCCESS:
> +        required = true;
> +        break;
> +
> +    case 1: /* Mitigation not required on this CPU. */
> +        required = false;
> +        break;

This should "return false". Also, it might make sense to set ssbd_state
to ARM_SSBD_MITIGATED?


> +
> +    default:
> +        ASSERT_UNREACHABLE();
> +        return false;
> +    }
> +
> +    switch ( ssbd_state )
> +    {
> +    case ARM_SSBD_FORCE_DISABLE:
> +    {
> +        static bool once = true;
> +
> +        if ( once )
> +            printk("%s disabled from command-line\n", entry->desc);
> +        once = false;
> +
> +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL);
> +        required = false;
> +
> +        break;
> +    }
> +
> +    case ARM_SSBD_RUNTIME:
> +        if ( required )
> +        {
> +            this_cpu(ssbd_callback_required) = 1;

We have the ARM_SSBD bit, the ssbd_state variable and
ssbd_callback_required. Both ARM_SSBD and ssbd_state are shared across
cores while ssbd_callback_required is per-cpu. Does
ssbd_callback_required really need to be per-cpu? Do we need both
variables? For instance, we could just return ssbd_state ==
ARM_SSBD_RUNTIME instead of this_cpu(ssbd_callback_required)?


> +            arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
> +        }
> +
> +        break;
> +
> +    case ARM_SSBD_FORCE_ENABLE:
> +    {
> +        static bool once = true;
> +
> +        if ( once )
> +            printk("%s forced from command-line\n", entry->desc);
> +        once = false;
> +
> +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
> +        required = true;

This function is supposed to detect whether a workaround is needed, not
enable it, right? Should this switch and relative code be moved to the
.enable function for this capability?


> +        break;
> +    }
> +
> +    default:
> +        ASSERT_UNREACHABLE();
> +        return false;
> +    }
>  
> -    return supported;
> +    return required;
>  }
>  #endif
>  
> @@ -371,6 +463,7 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>  #endif
>  #ifdef CONFIG_ARM_SSBD
>      {
> +        .desc = "Speculative Store Bypass Disabled",
>          .capability = ARM_SSBD,
>          .matches = has_ssbd_mitigation,
>      },
> diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
> index e628d3ff56..7fbb3dc0be 100644
> --- a/xen/include/asm-arm/cpuerrata.h
> +++ b/xen/include/asm-arm/cpuerrata.h
> @@ -31,10 +31,26 @@ CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
>  
>  #undef CHECK_WORKAROUND_HELPER
>  
> +enum ssbd_state
> +{
> +    ARM_SSBD_UNKNOWN,
> +    ARM_SSBD_FORCE_DISABLE,
> +    ARM_SSBD_RUNTIME,
> +    ARM_SSBD_FORCE_ENABLE,
> +    ARM_SSBD_MITIGATED,
> +};
> +
>  #ifdef CONFIG_ARM_SSBD
>  
>  #include <asm/current.h>
>  
> +extern enum ssbd_state ssbd_state;
> +
> +static inline enum ssbd_state get_ssbd_state(void)
> +{
> +    return ssbd_state;
> +}
> +
>  DECLARE_PER_CPU(register_t, ssbd_callback_required);
>  
>  static inline bool cpu_require_ssbd_mitigation(void)
> @@ -49,6 +65,11 @@ static inline bool cpu_require_ssbd_mitigation(void)
>      return false;
>  }
>  
> +static inline enum ssbd_state get_sbdd_state(void)
> +{
> +    return ARM_SSBD_UNKNOWN;
> +}
> +
>  #endif
>  
>  #endif /* __ARM_CPUERRATA_H__ */
> diff --git a/xen/include/asm-arm/smccc.h b/xen/include/asm-arm/smccc.h
> index 650744d28b..a6804cec99 100644
> --- a/xen/include/asm-arm/smccc.h
> +++ b/xen/include/asm-arm/smccc.h
> @@ -265,6 +265,7 @@ struct arm_smccc_res {
>                         0x7FFF)
>  
>  /* SMCCC error codes */
> +#define ARM_SMCCC_NOT_REQUIRED          (-2)
>  #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION  (-1)
>  #define ARM_SMCCC_NOT_SUPPORTED         (-1)
>  #define ARM_SMCCC_SUCCESS               (0)
> -- 
> 2.11.0
>
Stefano Stabellini May 23, 2018, 11:23 p.m. | #2
On Tue, 22 May 2018, Julien Grall wrote:
> +extern enum ssbd_state ssbd_state;
> +
> +static inline enum ssbd_state get_ssbd_state(void)
> +{
> +    return ssbd_state;
> +}
> +
>  DECLARE_PER_CPU(register_t, ssbd_callback_required);
>  
>  static inline bool cpu_require_ssbd_mitigation(void)
> @@ -49,6 +65,11 @@ static inline bool cpu_require_ssbd_mitigation(void)
>      return false;
>  }
>  
> +static inline enum ssbd_state get_sbdd_state(void)

The function name is mispelled


> +{
> +    return ARM_SSBD_UNKNOWN;
> +}
> +
>  #endif
Stefano Stabellini May 24, 2018, 12:48 a.m. | #3
On Wed, 23 May 2018, Stefano Stabellini wrote:
> On Tue, 22 May 2018, Julien Grall wrote:
> > On a system where the firmware implements ARCH_WORKAROUND_2, it may be
> > useful to either permanently enable or disable the workaround for cases
> > where the user decides that they'd rather not get a trap overhead, and
> > keep the mitigation permanently on or off instead of switching it on
> > exception entry/exit.
> > 
> > In any case, default to mitigation being enabled.
> > 
> > At the same time provide a accessor to know the state of the mitigation.
> > 
> > SIgned-off-by: Julien Grall <julien.grall@arm.com>
> > ---
> >  docs/misc/xen-command-line.markdown |  18 ++++++
> >  xen/arch/arm/cpuerrata.c            | 115 ++++++++++++++++++++++++++++++++----
> >  xen/include/asm-arm/cpuerrata.h     |  21 +++++++
> >  xen/include/asm-arm/smccc.h         |   1 +
> >  4 files changed, 144 insertions(+), 11 deletions(-)
> > 
> > diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
> > index 8712a833a2..962028b6ed 100644
> > --- a/docs/misc/xen-command-line.markdown
> > +++ b/docs/misc/xen-command-line.markdown
> > @@ -1756,6 +1756,24 @@ enforces the maximum theoretically necessary timeout of 670ms. Any number
> >  is being interpreted as a custom timeout in milliseconds. Zero or boolean
> >  false disable the quirk workaround, which is also the default.
> >  
> > +### spec-ctrl (Arm)
> > +> `= List of [ ssbd=force-disable|runtime|force-enable ]`
> 
> Why a list? Shouldn't it be one or the other?
> 
> > +Controls for speculative execution sidechannel mitigations.
> > +
> > +The option `ssbd=` is used to control the state of Speculative Store
> > +Bypass Disable (SSBD) mitigation.
> > +
> > +* `ssbd=force-disable` will keep the mitigation permanently off. The guest
> > +will not be able to control the state of the mitigation.
> > +* `ssbd=runtime` will always turn on the mitigation when running in the
> > +hypervisor context. The guest will be to turn on/off the mitigation for
> > +itself by using the firmware interface ARCH\_WORKAROUND\_2.
> > +* `ssbd=force-enable` will keep the mitigation permanently on. The guest will
> > +not be able to control the state of the mitigation.
> > +
> > +By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
> > +
> >  ### spec-ctrl (x86)
> >  > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
> >  >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd}=<bool> ]`
> > diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> > index bcea2eb6e5..f921721a66 100644
> > --- a/xen/arch/arm/cpuerrata.c
> > +++ b/xen/arch/arm/cpuerrata.c
> > @@ -237,6 +237,41 @@ static int enable_ic_inv_hardening(void *data)
> >  
> >  #ifdef CONFIG_ARM_SSBD
> >  
> > +enum ssbd_state ssbd_state = ARM_SSBD_RUNTIME;
> > +
> > +static int __init parse_spec_ctrl(const char *s)
> > +{
> > +    const char *ss;
> > +    int rc = 0;
> > +
> > +    do {
> > +        ss = strchr(s, ',');
> > +        if ( !ss )
> > +            ss = strchr(s, '\0');
> 
> It doesn't look like it is necessary to parse ',' at all. I would remove
> the while loop too.
> 
> 
> > +        if ( !strncmp(s, "ssbd=", 5) )
> > +        {
> > +            s += 5;
> > +
> > +            if ( !strncmp(s, "force-disable", ss - s) )
> > +                ssbd_state = ARM_SSBD_FORCE_DISABLE;
> > +            else if ( !strncmp(s, "runtime", ss - s) )
> > +                ssbd_state = ARM_SSBD_RUNTIME;
> > +            else if ( !strncmp(s, "force-enable", ss - s) )
> > +                ssbd_state = ARM_SSBD_FORCE_ENABLE;
> > +            else
> > +                rc = -EINVAL;
> > +        }
> > +        else
> > +            rc = -EINVAL;
> > +
> > +        s = ss + 1;
> > +    } while ( *ss );
> > +
> > +    return rc;
> > +}
> > +custom_param("spec-ctrl", parse_spec_ctrl);
> > +
> >  /*
> >   * Assembly code may use the variable directly, so we need to make sure
> >   * it fits in a register.
> > @@ -246,25 +281,82 @@ DEFINE_PER_CPU_READ_MOSTLY(register_t, ssbd_callback_required);
> >  static bool has_ssbd_mitigation(const struct arm_cpu_capabilities *entry)
> >  {
> >      struct arm_smccc_res res;
> > -    bool supported = true;
> > +    bool required = true;
> 
> Please avoid this renaming. Choose one name or the other from the start.
> 
> 
> >      if ( smccc_ver < SMCCC_VERSION(1, 1) )
> >          return false;
> >  
> > -    /*
> > -     * The probe function return value is either negative (unsupported
> > -     * or mitigated), positive (unaffected), or zero (requires
> > -     * mitigation). We only need to do anything in the last case.
> > -     */
> 
> I would keep the comment
> 
> 
> >      arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
> >                        ARM_SMCCC_ARCH_WORKAROUND_2_FID, &res);
> > -    if ( (int)res.a0 != 0 )
> > -        supported = false;
> >  
> > -    if ( supported )
> > -        this_cpu(ssbd_callback_required) = 1;
> > +    switch ( (int)res.a0 )
> 
> Please introduce this switch in the previous patch. But it makes sense
> to add the ssbd_state variable in this patch.
> 
> 
> > +    {
> > +    case ARM_SMCCC_NOT_SUPPORTED:
> > +        ssbd_state = ARM_SSBD_UNKNOWN;
> > +        return false;
> > +
> > +    case ARM_SMCCC_NOT_REQUIRED:
> > +        ssbd_state = ARM_SSBD_MITIGATED;
> > +        return false;
> > +
> > +    case ARM_SMCCC_SUCCESS:
> > +        required = true;
> > +        break;
> > +
> > +    case 1: /* Mitigation not required on this CPU. */
> > +        required = false;
> > +        break;
> 
> This should "return false". Also, it might make sense to set ssbd_state
> to ARM_SSBD_MITIGATED?
> 
> 
> > +
> > +    default:
> > +        ASSERT_UNREACHABLE();
> > +        return false;
> > +    }
> > +
> > +    switch ( ssbd_state )
> > +    {
> > +    case ARM_SSBD_FORCE_DISABLE:
> > +    {
> > +        static bool once = true;
> > +
> > +        if ( once )
> > +            printk("%s disabled from command-line\n", entry->desc);
> > +        once = false;
> > +
> > +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL);
> > +        required = false;
> > +
> > +        break;
> > +    }
> > +
> > +    case ARM_SSBD_RUNTIME:
> > +        if ( required )
> > +        {
> > +            this_cpu(ssbd_callback_required) = 1;
> 
> We have the ARM_SSBD bit, the ssbd_state variable and
> ssbd_callback_required. Both ARM_SSBD and ssbd_state are shared across
> cores while ssbd_callback_required is per-cpu. Does
> ssbd_callback_required really need to be per-cpu? Do we need both
> variables? For instance, we could just return ssbd_state ==
> ARM_SSBD_RUNTIME instead of this_cpu(ssbd_callback_required)?

After reading the whole series, I think ssbd_state should be a per_cpu
variable. parse_spec_ctrl initializes ssbd_state to the same value on
all cpus. has_ssbd_mitigation modifies ssbd_state only on the CPUs it is
running on. We get rid of ssbd_callback_required. The assembly fast past
reads ssbd_state instead of ssbd_callback_required.

What do you think?


 
> > +            arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
> > +        }
> > +
> > +        break;
> > +
> > +    case ARM_SSBD_FORCE_ENABLE:
> > +    {
> > +        static bool once = true;
> > +
> > +        if ( once )
> > +            printk("%s forced from command-line\n", entry->desc);
> > +        once = false;
> > +
> > +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
> > +        required = true;
> 
> This function is supposed to detect whether a workaround is needed, not
> enable it, right? Should this switch and relative code be moved to the
> .enable function for this capability?
> 
> 
> > +        break;
> > +    }
> > +
> > +    default:
> > +        ASSERT_UNREACHABLE();
> > +        return false;
> > +    }
> >  
> > -    return supported;
> > +    return required;
> >  }
> >  #endif
> >  
> > @@ -371,6 +463,7 @@ static const struct arm_cpu_capabilities arm_errata[] = {
> >  #endif
> >  #ifdef CONFIG_ARM_SSBD
> >      {
> > +        .desc = "Speculative Store Bypass Disabled",
> >          .capability = ARM_SSBD,
> >          .matches = has_ssbd_mitigation,
> >      },
> > diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
> > index e628d3ff56..7fbb3dc0be 100644
> > --- a/xen/include/asm-arm/cpuerrata.h
> > +++ b/xen/include/asm-arm/cpuerrata.h
> > @@ -31,10 +31,26 @@ CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
> >  
> >  #undef CHECK_WORKAROUND_HELPER
> >  
> > +enum ssbd_state
> > +{
> > +    ARM_SSBD_UNKNOWN,
> > +    ARM_SSBD_FORCE_DISABLE,
> > +    ARM_SSBD_RUNTIME,
> > +    ARM_SSBD_FORCE_ENABLE,
> > +    ARM_SSBD_MITIGATED,
> > +};
> > +
> >  #ifdef CONFIG_ARM_SSBD
> >  
> >  #include <asm/current.h>
> >  
> > +extern enum ssbd_state ssbd_state;
> > +
> > +static inline enum ssbd_state get_ssbd_state(void)
> > +{
> > +    return ssbd_state;
> > +}
> > +
> >  DECLARE_PER_CPU(register_t, ssbd_callback_required);
> >  
> >  static inline bool cpu_require_ssbd_mitigation(void)
> > @@ -49,6 +65,11 @@ static inline bool cpu_require_ssbd_mitigation(void)
> >      return false;
> >  }
> >  
> > +static inline enum ssbd_state get_sbdd_state(void)
> > +{
> > +    return ARM_SSBD_UNKNOWN;
> > +}
> > +
> >  #endif
> >  
> >  #endif /* __ARM_CPUERRATA_H__ */
> > diff --git a/xen/include/asm-arm/smccc.h b/xen/include/asm-arm/smccc.h
> > index 650744d28b..a6804cec99 100644
> > --- a/xen/include/asm-arm/smccc.h
> > +++ b/xen/include/asm-arm/smccc.h
> > @@ -265,6 +265,7 @@ struct arm_smccc_res {
> >                         0x7FFF)
> >  
> >  /* SMCCC error codes */
> > +#define ARM_SMCCC_NOT_REQUIRED          (-2)
> >  #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION  (-1)
> >  #define ARM_SMCCC_NOT_SUPPORTED         (-1)
> >  #define ARM_SMCCC_SUCCESS               (0)
> > -- 
> > 2.11.0
> > 
>
Julien Grall May 24, 2018, 9:52 a.m. | #4
Hi Stefano,

On 23/05/18 23:34, Stefano Stabellini wrote:
> On Tue, 22 May 2018, Julien Grall wrote:
>> On a system where the firmware implements ARCH_WORKAROUND_2, it may be
>> useful to either permanently enable or disable the workaround for cases
>> where the user decides that they'd rather not get a trap overhead, and
>> keep the mitigation permanently on or off instead of switching it on
>> exception entry/exit.
>>
>> In any case, default to mitigation being enabled.
>>
>> At the same time provide a accessor to know the state of the mitigation.
>>
>> SIgned-off-by: Julien Grall <julien.grall@arm.com>
>> ---
>>   docs/misc/xen-command-line.markdown |  18 ++++++
>>   xen/arch/arm/cpuerrata.c            | 115 ++++++++++++++++++++++++++++++++----
>>   xen/include/asm-arm/cpuerrata.h     |  21 +++++++
>>   xen/include/asm-arm/smccc.h         |   1 +
>>   4 files changed, 144 insertions(+), 11 deletions(-)
>>
>> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
>> index 8712a833a2..962028b6ed 100644
>> --- a/docs/misc/xen-command-line.markdown
>> +++ b/docs/misc/xen-command-line.markdown
>> @@ -1756,6 +1756,24 @@ enforces the maximum theoretically necessary timeout of 670ms. Any number
>>   is being interpreted as a custom timeout in milliseconds. Zero or boolean
>>   false disable the quirk workaround, which is also the default.
>>   
>> +### spec-ctrl (Arm)
>> +> `= List of [ ssbd=force-disable|runtime|force-enable ]`
> 
> Why a list? Shouldn't it be one or the other?

Because I am thinking to extend it and add the possibility to disable 
branch predictor hardening. So I decided to get the code and 
documentation ready right now.

> 
>> +Controls for speculative execution sidechannel mitigations.
>> +
>> +The option `ssbd=` is used to control the state of Speculative Store
>> +Bypass Disable (SSBD) mitigation.
>> +
>> +* `ssbd=force-disable` will keep the mitigation permanently off. The guest
>> +will not be able to control the state of the mitigation.
>> +* `ssbd=runtime` will always turn on the mitigation when running in the
>> +hypervisor context. The guest will be to turn on/off the mitigation for
>> +itself by using the firmware interface ARCH\_WORKAROUND\_2.
>> +* `ssbd=force-enable` will keep the mitigation permanently on. The guest will
>> +not be able to control the state of the mitigation.
>> +
>> +By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
>> +
>>   ### spec-ctrl (x86)
>>   > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
>>   >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd}=<bool> ]`
>> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
>> index bcea2eb6e5..f921721a66 100644
>> --- a/xen/arch/arm/cpuerrata.c
>> +++ b/xen/arch/arm/cpuerrata.c
>> @@ -237,6 +237,41 @@ static int enable_ic_inv_hardening(void *data)
>>   
>>   #ifdef CONFIG_ARM_SSBD
>>   
>> +enum ssbd_state ssbd_state = ARM_SSBD_RUNTIME;
>> +
>> +static int __init parse_spec_ctrl(const char *s)
>> +{
>> +    const char *ss;
>> +    int rc = 0;
>> +
>> +    do {
>> +        ss = strchr(s, ',');
>> +        if ( !ss )
>> +            ss = strchr(s, '\0');
> 
> It doesn't look like it is necessary to parse ',' at all. I would remove
> the while loop too.

It matters, you want to catch and warn user that the command line is not 
valid. Imagine someone decide to add ",..." after. It also make easier 
to integrate new option without reworking it.

> 
> 
>> +        if ( !strncmp(s, "ssbd=", 5) )
>> +        {
>> +            s += 5;
>> +
>> +            if ( !strncmp(s, "force-disable", ss - s) )
>> +                ssbd_state = ARM_SSBD_FORCE_DISABLE;
>> +            else if ( !strncmp(s, "runtime", ss - s) )
>> +                ssbd_state = ARM_SSBD_RUNTIME;
>> +            else if ( !strncmp(s, "force-enable", ss - s) )
>> +                ssbd_state = ARM_SSBD_FORCE_ENABLE;
>> +            else
>> +                rc = -EINVAL;
>> +        }
>> +        else
>> +            rc = -EINVAL;
>> +
>> +        s = ss + 1;
>> +    } while ( *ss );
>> +
>> +    return rc;
>> +}
>> +custom_param("spec-ctrl", parse_spec_ctrl);
>> +
>>   /*
>>    * Assembly code may use the variable directly, so we need to make sure
>>    * it fits in a register.
>> @@ -246,25 +281,82 @@ DEFINE_PER_CPU_READ_MOSTLY(register_t, ssbd_callback_required);
>>   static bool has_ssbd_mitigation(const struct arm_cpu_capabilities *entry)
>>   {
>>       struct arm_smccc_res res;
>> -    bool supported = true;
>> +    bool required = true;
> 
> Please avoid this renaming. Choose one name or the other from the start.

This is what happen when you want to split a series in a logical way. 
The name "required" does not make sense with the previous patch. So the 
renaming make sense here.

> 
> 
>>       if ( smccc_ver < SMCCC_VERSION(1, 1) )
>>           return false;
>>   
>> -    /*
>> -     * The probe function return value is either negative (unsupported
>> -     * or mitigated), positive (unaffected), or zero (requires
>> -     * mitigation). We only need to do anything in the last case.
>> -     */
> 
> I would keep the comment

The comment is not correct after this patch. The may need to act 
differently depending on the comment line. Regarding the values, the 
switch is more explanatory than those 3 lines.

> 
> 
>>       arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
>>                         ARM_SMCCC_ARCH_WORKAROUND_2_FID, &res);
>> -    if ( (int)res.a0 != 0 )
>> -        supported = false;
>>   
>> -    if ( supported )
>> -        this_cpu(ssbd_callback_required) = 1;
>> +    switch ( (int)res.a0 )
> 
> Please introduce this switch in the previous patch. But it makes sense
> to add the ssbd_state variable in this patch.

Well, that's not going to make the diff simpler here as the switch will 
be different. So I would keep the patch like that.

> 
> 
>> +    {
>> +    case ARM_SMCCC_NOT_SUPPORTED:
>> +        ssbd_state = ARM_SSBD_UNKNOWN;
>> +        return false;
>> +
>> +    case ARM_SMCCC_NOT_REQUIRED:
>> +        ssbd_state = ARM_SSBD_MITIGATED;
>> +        return false;
>> +
>> +    case ARM_SMCCC_SUCCESS:
>> +        required = true;
>> +        break;
>> +
>> +    case 1: /* Mitigation not required on this CPU. */
>> +        required = false;
>> +        break;
> 
> This should "return false". 

It is perfectly fine to continue as it is safe to execute 
ARCH_WORKAROUND_2 on that CPU.


Also, it might make sense to set ssbd_state
> to ARM_SSBD_MITIGATED?

No, the mitigation is not required on *that* CPU. It does not mean it 
will not be required for all CPUs. So it makes sense to not update 
ssbd_state.

> 
> 
>> +
>> +    default:
>> +        ASSERT_UNREACHABLE();
>> +        return false;
>> +    }
>> +
>> +    switch ( ssbd_state )
>> +    {
>> +    case ARM_SSBD_FORCE_DISABLE:
>> +    {
>> +        static bool once = true;
>> +
>> +        if ( once )
>> +            printk("%s disabled from command-line\n", entry->desc);
>> +        once = false;
>> +
>> +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL);
>> +        required = false;
>> +
>> +        break;
>> +    }
>> +
>> +    case ARM_SSBD_RUNTIME:
>> +        if ( required )
>> +        {
>> +            this_cpu(ssbd_callback_required) = 1;
> 
> We have the ARM_SSBD bit, the ssbd_state variable and
> ssbd_callback_required. Both ARM_SSBD and ssbd_state are shared across
> cores while ssbd_callback_required is per-cpu. Does
> ssbd_callback_required really need to be per-cpu? > Do we need both
> variables? For instance, we could just return ssbd_state ==
> ARM_SSBD_RUNTIME instead of this_cpu(ssbd_callback_required)?

Let me start with because a guest vCPU may run on any pCPU, you always 
have to tell the guest the mitigation is required for all vCPUs.

By default, Linux is calling the workaround at entry from EL0 to enable 
it and at exit to EL0 to disable it. The workaround will first trap in 
EL2 and then get forwarded to EL3.

You can imagine that the trap to EL2 and then EL3 has a cost. If the 
workaround is not necessary, then you can reduce that cost by avoiding 
to trap at EL3. As you can have a platform with heterogenous CPUs, you 
need that workaround per-CPU.

The ARM_SSBD feature bit is useful in order to put shortcut in place 
using alternative (see check_workaround_ssbd). So on platform where the 
mitigation is not required, all the new code is nearly a NOP.

The ssbd_state is used in various place to know what is the global state 
of the mitigation:
	- To initialize the vCPU state for the mitigation
	- To report the guest what is the state of the mitigation using SMCCC

So all those variables have a specific purposes and cannot really be 
replaced by another way.

> 
> 
>> +            arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
>> +        }
>> +
>> +        break;
>> +
>> +    case ARM_SSBD_FORCE_ENABLE:
>> +    {
>> +        static bool once = true;
>> +
>> +        if ( once )
>> +            printk("%s forced from command-line\n", entry->desc);
>> +        once = false;
>> +
>> +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
>> +        required = true;
> 
> This function is supposed to detect whether a workaround is needed, not
> enable it, right? Should this switch and relative code be moved to the
> .enable function for this capability?

I had the split before but it is difficult to get a nice split between 
.enable and .matches. So I decided to follow what Linux/KVM did and put 
everything in has_.

> 
> 
>> +        break;
>> +    }
>> +
>> +    default:
>> +        ASSERT_UNREACHABLE();
>> +        return false;
>> +    }
>>   
>> -    return supported;
>> +    return required;
>>   }
>>   #endif
>>   
>> @@ -371,6 +463,7 @@ static const struct arm_cpu_capabilities arm_errata[] = {
>>   #endif
>>   #ifdef CONFIG_ARM_SSBD
>>       {
>> +        .desc = "Speculative Store Bypass Disabled",
>>           .capability = ARM_SSBD,
>>           .matches = has_ssbd_mitigation,
>>       },
>> diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
>> index e628d3ff56..7fbb3dc0be 100644
>> --- a/xen/include/asm-arm/cpuerrata.h
>> +++ b/xen/include/asm-arm/cpuerrata.h
>> @@ -31,10 +31,26 @@ CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
>>   
>>   #undef CHECK_WORKAROUND_HELPER
>>   
>> +enum ssbd_state
>> +{
>> +    ARM_SSBD_UNKNOWN,
>> +    ARM_SSBD_FORCE_DISABLE,
>> +    ARM_SSBD_RUNTIME,
>> +    ARM_SSBD_FORCE_ENABLE,
>> +    ARM_SSBD_MITIGATED,
>> +};
>> +
>>   #ifdef CONFIG_ARM_SSBD
>>   
>>   #include <asm/current.h>
>>   
>> +extern enum ssbd_state ssbd_state;
>> +
>> +static inline enum ssbd_state get_ssbd_state(void)
>> +{
>> +    return ssbd_state;
>> +}
>> +
>>   DECLARE_PER_CPU(register_t, ssbd_callback_required);
>>   
>>   static inline bool cpu_require_ssbd_mitigation(void)
>> @@ -49,6 +65,11 @@ static inline bool cpu_require_ssbd_mitigation(void)
>>       return false;
>>   }
>>   
>> +static inline enum ssbd_state get_sbdd_state(void)
>> +{
>> +    return ARM_SSBD_UNKNOWN;
>> +}
>> +
>>   #endif
>>   
>>   #endif /* __ARM_CPUERRATA_H__ */
>> diff --git a/xen/include/asm-arm/smccc.h b/xen/include/asm-arm/smccc.h
>> index 650744d28b..a6804cec99 100644
>> --- a/xen/include/asm-arm/smccc.h
>> +++ b/xen/include/asm-arm/smccc.h
>> @@ -265,6 +265,7 @@ struct arm_smccc_res {
>>                          0x7FFF)
>>   
>>   /* SMCCC error codes */
>> +#define ARM_SMCCC_NOT_REQUIRED          (-2)
>>   #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION  (-1)
>>   #define ARM_SMCCC_NOT_SUPPORTED         (-1)
>>   #define ARM_SMCCC_SUCCESS               (0)
>> -- 
>> 2.11.0
>>

Cheers,
Julien Grall May 24, 2018, 9:53 a.m. | #5
Hi Stefano,

On 24/05/18 00:23, Stefano Stabellini wrote:
> On Tue, 22 May 2018, Julien Grall wrote:
>> +extern enum ssbd_state ssbd_state;
>> +
>> +static inline enum ssbd_state get_ssbd_state(void)
>> +{
>> +    return ssbd_state;
>> +}
>> +
>>   DECLARE_PER_CPU(register_t, ssbd_callback_required);
>>   
>>   static inline bool cpu_require_ssbd_mitigation(void)
>> @@ -49,6 +65,11 @@ static inline bool cpu_require_ssbd_mitigation(void)
>>       return false;
>>   }
>>   
>> +static inline enum ssbd_state get_sbdd_state(void)
> 
> The function name is mispelled

Good catch. I will update it.

Cheers,

> 
> 
>> +{
>> +    return ARM_SSBD_UNKNOWN;
>> +}
>> +
>>   #endif
>
Julien Grall May 25, 2018, 7:56 p.m. | #6
Hi Stefano,

On 05/24/2018 01:48 AM, Stefano Stabellini wrote:
> On Wed, 23 May 2018, Stefano Stabellini wrote:
>> On Tue, 22 May 2018, Julien Grall wrote:
>>> +
>>> +    default:
>>> +        ASSERT_UNREACHABLE();
>>> +        return false;
>>> +    }
>>> +
>>> +    switch ( ssbd_state )
>>> +    {
>>> +    case ARM_SSBD_FORCE_DISABLE:
>>> +    {
>>> +        static bool once = true;
>>> +
>>> +        if ( once )
>>> +            printk("%s disabled from command-line\n", entry->desc);
>>> +        once = false;
>>> +
>>> +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL);
>>> +        required = false;
>>> +
>>> +        break;
>>> +    }
>>> +
>>> +    case ARM_SSBD_RUNTIME:
>>> +        if ( required )
>>> +        {
>>> +            this_cpu(ssbd_callback_required) = 1;
>>
>> We have the ARM_SSBD bit, the ssbd_state variable and
>> ssbd_callback_required. Both ARM_SSBD and ssbd_state are shared across
>> cores while ssbd_callback_required is per-cpu. Does
>> ssbd_callback_required really need to be per-cpu? Do we need both
>> variables? For instance, we could just return ssbd_state ==
>> ARM_SSBD_RUNTIME instead of this_cpu(ssbd_callback_required)?
> 
> After reading the whole series, I think ssbd_state should be a per_cpu
> variable. parse_spec_ctrl initializes ssbd_state to the same value on
> all cpus. has_ssbd_mitigation modifies ssbd_state only on the CPUs it is
> running on. We get rid of ssbd_callback_required. The assembly fast past
> reads ssbd_state instead of ssbd_callback_required.
> 
> What do you think?

We need to keep the global ssbd_state around for the vsmc code as we 
need to tell the guest what is the system-wide decision.

This is because a vCPU may move from a affected CPU to a non-affected 
one. So we need to inform the same on every vCPU (i.e mitigated, 
dynamic...).

Cheers,
Stefano Stabellini May 25, 2018, 8:51 p.m. | #7
On Thu, 24 May 2018, Julien Grall wrote:
> Hi Stefano,
> 
> On 23/05/18 23:34, Stefano Stabellini wrote:
> > On Tue, 22 May 2018, Julien Grall wrote:
> > > On a system where the firmware implements ARCH_WORKAROUND_2, it may be
> > > useful to either permanently enable or disable the workaround for cases
> > > where the user decides that they'd rather not get a trap overhead, and
> > > keep the mitigation permanently on or off instead of switching it on
> > > exception entry/exit.
> > > 
> > > In any case, default to mitigation being enabled.
> > > 
> > > At the same time provide a accessor to know the state of the mitigation.
> > > 
> > > SIgned-off-by: Julien Grall <julien.grall@arm.com>
> > > ---
> > >   docs/misc/xen-command-line.markdown |  18 ++++++
> > >   xen/arch/arm/cpuerrata.c            | 115
> > > ++++++++++++++++++++++++++++++++----
> > >   xen/include/asm-arm/cpuerrata.h     |  21 +++++++
> > >   xen/include/asm-arm/smccc.h         |   1 +
> > >   4 files changed, 144 insertions(+), 11 deletions(-)
> > > 
> > > diff --git a/docs/misc/xen-command-line.markdown
> > > b/docs/misc/xen-command-line.markdown
> > > index 8712a833a2..962028b6ed 100644
> > > --- a/docs/misc/xen-command-line.markdown
> > > +++ b/docs/misc/xen-command-line.markdown
> > > @@ -1756,6 +1756,24 @@ enforces the maximum theoretically necessary
> > > timeout of 670ms. Any number
> > >   is being interpreted as a custom timeout in milliseconds. Zero or
> > > boolean
> > >   false disable the quirk workaround, which is also the default.
> > >   +### spec-ctrl (Arm)
> > > +> `= List of [ ssbd=force-disable|runtime|force-enable ]`
> > 
> > Why a list? Shouldn't it be one or the other?
> 
> Because I am thinking to extend it and add the possibility to disable branch
> predictor hardening. So I decided to get the code and documentation ready
> right now.

OK, maybe it would be good to explain this in the commit message but it
is not necessary.


> > > +Controls for speculative execution sidechannel mitigations.
> > > +
> > > +The option `ssbd=` is used to control the state of Speculative Store
> > > +Bypass Disable (SSBD) mitigation.
> > > +
> > > +* `ssbd=force-disable` will keep the mitigation permanently off. The
> > > guest
> > > +will not be able to control the state of the mitigation.
> > > +* `ssbd=runtime` will always turn on the mitigation when running in the
> > > +hypervisor context. The guest will be to turn on/off the mitigation for
> > > +itself by using the firmware interface ARCH\_WORKAROUND\_2.
> > > +* `ssbd=force-enable` will keep the mitigation permanently on. The guest
> > > will
> > > +not be able to control the state of the mitigation.
> > > +
> > > +By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
> > > +
> > >   ### spec-ctrl (x86)
> > >   > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
> > >   >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd}=<bool>
> > > ]`
> > > diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> > > index bcea2eb6e5..f921721a66 100644
> > > --- a/xen/arch/arm/cpuerrata.c
> > > +++ b/xen/arch/arm/cpuerrata.c
> > > @@ -237,6 +237,41 @@ static int enable_ic_inv_hardening(void *data)
> > >     #ifdef CONFIG_ARM_SSBD
> > >   +enum ssbd_state ssbd_state = ARM_SSBD_RUNTIME;
> > > +
> > > +static int __init parse_spec_ctrl(const char *s)
> > > +{
> > > +    const char *ss;
> > > +    int rc = 0;
> > > +
> > > +    do {
> > > +        ss = strchr(s, ',');
> > > +        if ( !ss )
> > > +            ss = strchr(s, '\0');
> > 
> > It doesn't look like it is necessary to parse ',' at all. I would remove
> > the while loop too.
> 
> It matters, you want to catch and warn user that the command line is not
> valid. Imagine someone decide to add ",..." after. It also make easier to
> integrate new option without reworking it.

All right, I re-read the whole loop and looks fine to me.


> > 
> > 
> > > +        if ( !strncmp(s, "ssbd=", 5) )
> > > +        {
> > > +            s += 5;
> > > +
> > > +            if ( !strncmp(s, "force-disable", ss - s) )
> > > +                ssbd_state = ARM_SSBD_FORCE_DISABLE;
> > > +            else if ( !strncmp(s, "runtime", ss - s) )
> > > +                ssbd_state = ARM_SSBD_RUNTIME;
> > > +            else if ( !strncmp(s, "force-enable", ss - s) )
> > > +                ssbd_state = ARM_SSBD_FORCE_ENABLE;
> > > +            else
> > > +                rc = -EINVAL;
> > > +        }
> > > +        else
> > > +            rc = -EINVAL;
> > > +
> > > +        s = ss + 1;
> > > +    } while ( *ss );
> > > +
> > > +    return rc;
> > > +}
> > > +custom_param("spec-ctrl", parse_spec_ctrl);
> > > +
> > >   /*
> > >    * Assembly code may use the variable directly, so we need to make sure
> > >    * it fits in a register.
> > > @@ -246,25 +281,82 @@ DEFINE_PER_CPU_READ_MOSTLY(register_t,
> > > ssbd_callback_required);
> > >   static bool has_ssbd_mitigation(const struct arm_cpu_capabilities
> > > *entry)
> > >   {
> > >       struct arm_smccc_res res;
> > > -    bool supported = true;
> > > +    bool required = true;
> > 
> > Please avoid this renaming. Choose one name or the other from the start.
> 
> This is what happen when you want to split a series in a logical way. The name
> "required" does not make sense with the previous patch. So the renaming make
> sense here.

Why does "required" not make sense in the previous patch? I think it
would be fine.

In any case, I prefer that both of us spend time on useful stuff rather
than choosing which patch should set the variable name :-) So I am going
to drop this regardless.


> > 
> > 
> > >       if ( smccc_ver < SMCCC_VERSION(1, 1) )
> > >           return false;
> > >   -    /*
> > > -     * The probe function return value is either negative (unsupported
> > > -     * or mitigated), positive (unaffected), or zero (requires
> > > -     * mitigation). We only need to do anything in the last case.
> > > -     */
> > 
> > I would keep the comment
> 
> The comment is not correct after this patch. The may need to act differently
> depending on the comment line. Regarding the values, the switch is more
> explanatory than those 3 lines.

I see. You could keep the comment, removing "We only need to do anything
in the last case." Either way is OK.


> > 
> > 
> > >       arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
> > >                         ARM_SMCCC_ARCH_WORKAROUND_2_FID, &res);
> > > -    if ( (int)res.a0 != 0 )
> > > -        supported = false;
> > >   -    if ( supported )
> > > -        this_cpu(ssbd_callback_required) = 1;
> > > +    switch ( (int)res.a0 )
> > 
> > Please introduce this switch in the previous patch. But it makes sense
> > to add the ssbd_state variable in this patch.
> 
> Well, that's not going to make the diff simpler here as the switch will be
> different. So I would keep the patch like that.

The split is a bit iffy to me, but if you don't want to change it, I can
live with it anyway.

 
> > 
> > > +    {
> > > +    case ARM_SMCCC_NOT_SUPPORTED:
> > > +        ssbd_state = ARM_SSBD_UNKNOWN;
> > > +        return false;
> > > +
> > > +    case ARM_SMCCC_NOT_REQUIRED:
> > > +        ssbd_state = ARM_SSBD_MITIGATED;
> > > +        return false;
> > > +
> > > +    case ARM_SMCCC_SUCCESS:
> > > +        required = true;
> > > +        break;
> > > +
> > > +    case 1: /* Mitigation not required on this CPU. */
> > > +        required = false;
> > > +        break;
> > 
> > This should "return false". 
> 
> It is perfectly fine to continue as it is safe to execute ARCH_WORKAROUND_2 on
> that CPU.

This is the case where mitigation is not required but issuing the SMCCC
is safe. Instead of returning immediately, we go through the next
switch:

1) if ARM_SSBD_FORCE_DISABLE, we make the SMCCC
2) if ARM_SSBD_RUNTIME, we do nothing
3) if ARM_SSBD_FORCE_ENABLE, we make the SMCCC

What is the desired outcome for this situation? Obviously, continuing for
case 2) is pointless, we might as well return immediately. For 1) and 3)
is the intention that the SMCCC will actually have an effect even if the
mitigation is not required?

 
> Also, it might make sense to set ssbd_state
> > to ARM_SSBD_MITIGATED?
> 
> No, the mitigation is not required on *that* CPU. It does not mean it will not
> be required for all CPUs. So it makes sense to not update ssbd_state.

I understand now, and thanks for the clarification on the call as well.


> > 
> > 
> > > +
> > > +    default:
> > > +        ASSERT_UNREACHABLE();
> > > +        return false;
> > > +    }
> > > +
> > > +    switch ( ssbd_state )
> > > +    {
> > > +    case ARM_SSBD_FORCE_DISABLE:
> > > +    {
> > > +        static bool once = true;
> > > +
> > > +        if ( once )
> > > +            printk("%s disabled from command-line\n", entry->desc);
> > > +        once = false;
> > > +
> > > +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL);
> > > +        required = false;
> > > +
> > > +        break;
> > > +    }
> > > +
> > > +    case ARM_SSBD_RUNTIME:
> > > +        if ( required )
> > > +        {
> > > +            this_cpu(ssbd_callback_required) = 1;
> > 
> > We have the ARM_SSBD bit, the ssbd_state variable and
> > ssbd_callback_required. Both ARM_SSBD and ssbd_state are shared across
> > cores while ssbd_callback_required is per-cpu. Does
> > ssbd_callback_required really need to be per-cpu? > Do we need both
> > variables? For instance, we could just return ssbd_state ==
> > ARM_SSBD_RUNTIME instead of this_cpu(ssbd_callback_required)?
> 
> Let me start with because a guest vCPU may run on any pCPU, you always have to
> tell the guest the mitigation is required for all vCPUs.
> 
> By default, Linux is calling the workaround at entry from EL0 to enable it and
> at exit to EL0 to disable it. The workaround will first trap in EL2 and then
> get forwarded to EL3.
> 
> You can imagine that the trap to EL2 and then EL3 has a cost. If the
> workaround is not necessary, then you can reduce that cost by avoiding to trap
> at EL3. As you can have a platform with heterogenous CPUs, you need that
> workaround per-CPU.
> 
> The ARM_SSBD feature bit is useful in order to put shortcut in place using
> alternative (see check_workaround_ssbd). So on platform where the mitigation
> is not required, all the new code is nearly a NOP.
> 
> The ssbd_state is used in various place to know what is the global state of
> the mitigation:
> 	- To initialize the vCPU state for the mitigation
> 	- To report the guest what is the state of the mitigation using SMCCC
> 
> So all those variables have a specific purposes and cannot really be replaced
> by another way.

Good explanation. Please add something like this to one of the commit
messages. Please also consider the following suggestion.

Wouldn't it make sense to remove ssbd_callback_required and make
ssbd_state a per-cpu variable? The Xen command line option would remain
the same, global, but it would initialize the value of ssbd_state on all
cpus. Then, has_ssbd_mitigation would further modify ssbd_state on a
specific cpu to ARM_SSBD_UNKNOWN (if ARM_SMCCC_NOT_SUPPORTED),
ARM_SSBD_MITIGATED (if ARM_SMCCC_NOT_REQUIRED"), etc. In the common
case, the CPUs that need the workaround will have ssbd_state set to
ARM_SSBD_RUNTIME, and the others will have ARM_SSBD_UNKNOWN or
ARM_SSBD_MITIGATED, or maybe a new value ARM_SSBD_UNNECESSARY. It looks
like it would still be simple to check on ssbd_state from assembly as
well, it can still be done with one instruction, we just need to make
sure to assign integer values to the enum, such as:
  
  ARM_SSBD_UNKNOWN = 0,
  ARM_SSBD_FORCE_DISABLE = 1,

etc.

 
 
> > > +            arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
> > > +        }
> > > +
> > > +        break;
> > > +
> > > +    case ARM_SSBD_FORCE_ENABLE:
> > > +    {
> > > +        static bool once = true;
> > > +
> > > +        if ( once )
> > > +            printk("%s forced from command-line\n", entry->desc);
> > > +        once = false;
> > > +
> > > +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
> > > +        required = true;
> > 
> > This function is supposed to detect whether a workaround is needed, not
> > enable it, right? Should this switch and relative code be moved to the
> > .enable function for this capability?
> 
> I had the split before but it is difficult to get a nice split between .enable
> and .matches. So I decided to follow what Linux/KVM did and put everything in
> has_.

All right. Please add a note about this in the commit message or the code.


> > 
> > > +        break;
> > > +    }
> > > +
> > > +    default:
> > > +        ASSERT_UNREACHABLE();
> > > +        return false;
> > > +    }
> > >   -    return supported;
> > > +    return required;
> > >   }
> > >   #endif
> > >   @@ -371,6 +463,7 @@ static const struct arm_cpu_capabilities
> > > arm_errata[] = {
> > >   #endif
> > >   #ifdef CONFIG_ARM_SSBD
> > >       {
> > > +        .desc = "Speculative Store Bypass Disabled",
> > >           .capability = ARM_SSBD,
> > >           .matches = has_ssbd_mitigation,
> > >       },
> > > diff --git a/xen/include/asm-arm/cpuerrata.h
> > > b/xen/include/asm-arm/cpuerrata.h
> > > index e628d3ff56..7fbb3dc0be 100644
> > > --- a/xen/include/asm-arm/cpuerrata.h
> > > +++ b/xen/include/asm-arm/cpuerrata.h
> > > @@ -31,10 +31,26 @@ CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD,
> > > CONFIG_ARM_SSBD)
> > >     #undef CHECK_WORKAROUND_HELPER
> > >   +enum ssbd_state
> > > +{
> > > +    ARM_SSBD_UNKNOWN,
> > > +    ARM_SSBD_FORCE_DISABLE,
> > > +    ARM_SSBD_RUNTIME,
> > > +    ARM_SSBD_FORCE_ENABLE,
> > > +    ARM_SSBD_MITIGATED,
> > > +};
> > > +
> > >   #ifdef CONFIG_ARM_SSBD
> > >     #include <asm/current.h>
> > >   +extern enum ssbd_state ssbd_state;
> > > +
> > > +static inline enum ssbd_state get_ssbd_state(void)
> > > +{
> > > +    return ssbd_state;
> > > +}
> > > +
> > >   DECLARE_PER_CPU(register_t, ssbd_callback_required);
> > >     static inline bool cpu_require_ssbd_mitigation(void)
> > > @@ -49,6 +65,11 @@ static inline bool cpu_require_ssbd_mitigation(void)
> > >       return false;
> > >   }
> > >   +static inline enum ssbd_state get_sbdd_state(void)
> > > +{
> > > +    return ARM_SSBD_UNKNOWN;
> > > +}
> > > +
> > >   #endif
> > >     #endif /* __ARM_CPUERRATA_H__ */
> > > diff --git a/xen/include/asm-arm/smccc.h b/xen/include/asm-arm/smccc.h
> > > index 650744d28b..a6804cec99 100644
> > > --- a/xen/include/asm-arm/smccc.h
> > > +++ b/xen/include/asm-arm/smccc.h
> > > @@ -265,6 +265,7 @@ struct arm_smccc_res {
> > >                          0x7FFF)
> > >     /* SMCCC error codes */
> > > +#define ARM_SMCCC_NOT_REQUIRED          (-2)
> > >   #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION  (-1)
> > >   #define ARM_SMCCC_NOT_SUPPORTED         (-1)
> > >   #define ARM_SMCCC_SUCCESS               (0)
> > > -- 
> > > 2.11.0
> > > 
> 
> Cheers,
> 
> -- 
> Julien Grall
>
Julien Grall May 29, 2018, 11:31 a.m. | #8
On 25/05/18 21:51, Stefano Stabellini wrote:
> On Thu, 24 May 2018, Julien Grall wrote:
>> On 23/05/18 23:34, Stefano Stabellini wrote:
>>> On Tue, 22 May 2018, Julien Grall  >>>>        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
>>>>                          ARM_SMCCC_ARCH_WORKAROUND_2_FID, &res);
>>>> -    if ( (int)res.a0 != 0 )
>>>> -        supported = false;
>>>>    -    if ( supported )
>>>> -        this_cpu(ssbd_callback_required) = 1;
>>>> +    switch ( (int)res.a0 )
>>>
>>> Please introduce this switch in the previous patch. But it makes sense
>>> to add the ssbd_state variable in this patch.
>>
>> Well, that's not going to make the diff simpler here as the switch will be
>> different. So I would keep the patch like that.
> 
> The split is a bit iffy to me, but if you don't want to change it, I can
> live with it anyway.

I don't think the other way will help. But I will do it.

>>>
>>>> +    {
>>>> +    case ARM_SMCCC_NOT_SUPPORTED:
>>>> +        ssbd_state = ARM_SSBD_UNKNOWN;
>>>> +        return false;
>>>> +
>>>> +    case ARM_SMCCC_NOT_REQUIRED:
>>>> +        ssbd_state = ARM_SSBD_MITIGATED;
>>>> +        return false;
>>>> +
>>>> +    case ARM_SMCCC_SUCCESS:
>>>> +        required = true;
>>>> +        break;
>>>> +
>>>> +    case 1: /* Mitigation not required on this CPU. */
>>>> +        required = false;
>>>> +        break;
>>>
>>> This should "return false".
>>
>> It is perfectly fine to continue as it is safe to execute ARCH_WORKAROUND_2 on
>> that CPU.
> 
> This is the case where mitigation is not required but issuing the SMCCC
> is safe. Instead of returning immediately, we go through the next
> switch:
> 
> 1) if ARM_SSBD_FORCE_DISABLE, we make the SMCCC
> 2) if ARM_SSBD_RUNTIME, we do nothing
> 3) if ARM_SSBD_FORCE_ENABLE, we make the SMCCC
> 
> What is the desired outcome for this situation? Obviously, continuing for
> case 2) is pointless, we might as well return immediately. For 1) and 3)
> is the intention that the SMCCC will actually have an effect even if the
> mitigation is not required?

While the SMCCC call in 1) and 3) will do nothing for those CPUs, you 
will still print a warning message if the user choose to force 
enable/disable the mitigation.
>>>
>>>
>>>> +
>>>> +    default:
>>>> +        ASSERT_UNREACHABLE();
>>>> +        return false;
>>>> +    }
>>>> +
>>>> +    switch ( ssbd_state )
>>>> +    {
>>>> +    case ARM_SSBD_FORCE_DISABLE:
>>>> +    {
>>>> +        static bool once = true;
>>>> +
>>>> +        if ( once )
>>>> +            printk("%s disabled from command-line\n", entry->desc);
>>>> +        once = false;
>>>> +
>>>> +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL);
>>>> +        required = false;
>>>> +
>>>> +        break;
>>>> +    }
>>>> +
>>>> +    case ARM_SSBD_RUNTIME:
>>>> +        if ( required )
>>>> +        {
>>>> +            this_cpu(ssbd_callback_required) = 1;
>>>
>>> We have the ARM_SSBD bit, the ssbd_state variable and
>>> ssbd_callback_required. Both ARM_SSBD and ssbd_state are shared across
>>> cores while ssbd_callback_required is per-cpu. Does
>>> ssbd_callback_required really need to be per-cpu? > Do we need both
>>> variables? For instance, we could just return ssbd_state ==
>>> ARM_SSBD_RUNTIME instead of this_cpu(ssbd_callback_required)?
>>
>> Let me start with because a guest vCPU may run on any pCPU, you always have to
>> tell the guest the mitigation is required for all vCPUs.
>>
>> By default, Linux is calling the workaround at entry from EL0 to enable it and
>> at exit to EL0 to disable it. The workaround will first trap in EL2 and then
>> get forwarded to EL3.
>>
>> You can imagine that the trap to EL2 and then EL3 has a cost. If the
>> workaround is not necessary, then you can reduce that cost by avoiding to trap
>> at EL3. As you can have a platform with heterogenous CPUs, you need that
>> workaround per-CPU.
>>
>> The ARM_SSBD feature bit is useful in order to put shortcut in place using
>> alternative (see check_workaround_ssbd). So on platform where the mitigation
>> is not required, all the new code is nearly a NOP.
>>
>> The ssbd_state is used in various place to know what is the global state of
>> the mitigation:
>> 	- To initialize the vCPU state for the mitigation
>> 	- To report the guest what is the state of the mitigation using SMCCC
>>
>> So all those variables have a specific purposes and cannot really be replaced
>> by another way.
> 
> Good explanation. Please add something like this to one of the commit
> messages. Please also consider the following suggestion.
> 
> Wouldn't it make sense to remove ssbd_callback_required and make
> ssbd_state a per-cpu variable? The Xen command line option would remain
> the same, global, but it would initialize the value of ssbd_state on all
> cpus. Then, has_ssbd_mitigation would further modify ssbd_state on a
> specific cpu to ARM_SSBD_UNKNOWN (if ARM_SMCCC_NOT_SUPPORTED),
> ARM_SSBD_MITIGATED (if ARM_SMCCC_NOT_REQUIRED"), etc. In the common
> case, the CPUs that need the workaround will have ssbd_state set to
> ARM_SSBD_RUNTIME, and the others will have ARM_SSBD_UNKNOWN or
> ARM_SSBD_MITIGATED, or maybe a new value ARM_SSBD_UNNECESSARY. It looks
> like it would still be simple to check on ssbd_state from assembly as
> well, it can still be done with one instruction, we just need to make
> sure to assign integer values to the enum, such as:
>    
>    ARM_SSBD_UNKNOWN = 0,
>    ARM_SSBD_FORCE_DISABLE = 1,
> 
> etc.

As I said in my previous e-mail, we need to know the global state of the 
mitigation. This is because a vCPU may move from a affected CPU to a 
non-affected one. Therefore we need to inform the same on every vCPU 
(i.e mitigated, dynamic...).

Your suggestion will just save 4 bytes but add more code to find out 
what is the system-wide decision for the mitigation.

Cheers,
Stefano Stabellini May 29, 2018, 10:34 p.m. | #9
On Tue, 29 May 2018, Julien Grall wrote:
> On 25/05/18 21:51, Stefano Stabellini wrote:
> > On Thu, 24 May 2018, Julien Grall wrote:
> > > On 23/05/18 23:34, Stefano Stabellini wrote:
> > > > On Tue, 22 May 2018, Julien Grall  >>>>
> > > > arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
> > > > >                          ARM_SMCCC_ARCH_WORKAROUND_2_FID, &res);
> > > > > -    if ( (int)res.a0 != 0 )
> > > > > -        supported = false;
> > > > >    -    if ( supported )
> > > > > -        this_cpu(ssbd_callback_required) = 1;
> > > > > +    switch ( (int)res.a0 )
> > > > 
> > > > Please introduce this switch in the previous patch. But it makes sense
> > > > to add the ssbd_state variable in this patch.
> > > 
> > > Well, that's not going to make the diff simpler here as the switch will be
> > > different. So I would keep the patch like that.
> > 
> > The split is a bit iffy to me, but if you don't want to change it, I can
> > live with it anyway.
> 
> I don't think the other way will help. But I will do it.

Thank you


> > > > 
> > > > > +    {
> > > > > +    case ARM_SMCCC_NOT_SUPPORTED:
> > > > > +        ssbd_state = ARM_SSBD_UNKNOWN;
> > > > > +        return false;
> > > > > +
> > > > > +    case ARM_SMCCC_NOT_REQUIRED:
> > > > > +        ssbd_state = ARM_SSBD_MITIGATED;
> > > > > +        return false;
> > > > > +
> > > > > +    case ARM_SMCCC_SUCCESS:
> > > > > +        required = true;
> > > > > +        break;
> > > > > +
> > > > > +    case 1: /* Mitigation not required on this CPU. */
> > > > > +        required = false;
> > > > > +        break;
> > > > 
> > > > This should "return false".
> > > 
> > > It is perfectly fine to continue as it is safe to execute
> > > ARCH_WORKAROUND_2 on
> > > that CPU.
> > 
> > This is the case where mitigation is not required but issuing the SMCCC
> > is safe. Instead of returning immediately, we go through the next
> > switch:
> > 
> > 1) if ARM_SSBD_FORCE_DISABLE, we make the SMCCC
> > 2) if ARM_SSBD_RUNTIME, we do nothing
> > 3) if ARM_SSBD_FORCE_ENABLE, we make the SMCCC
> > 
> > What is the desired outcome for this situation? Obviously, continuing for
> > case 2) is pointless, we might as well return immediately. For 1) and 3)
> > is the intention that the SMCCC will actually have an effect even if the
> > mitigation is not required?
> 
> While the SMCCC call in 1) and 3) will do nothing for those CPUs, you will
> still print a warning message if the user choose to force enable/disable the
> mitigation.

Printing warnings could be a good idea. However, I think we should do
the same thing for "1" and for "ARM_SMCCC_NOT_REQUIRED", and maybe even
for "ARM_SMCCC_NOT_SUPPORTED": printing warnings for all or for none.

I also noticed that if the first SMCCC returns "1" and we continue, in
case ssbd_state == ARM_SSBD_FORCE_ENABLE, "required" gets changed to
"true".  Do we want to let the user force-enable the mitigation even
when it will do nothing? I am not really sure, probably not? In any case
I would prefer if we kept the same behavior across "1" and
"ARM_SMCCC_NOT_REQUIRED".


> > > > 
> > > > 
> > > > > +
> > > > > +    default:
> > > > > +        ASSERT_UNREACHABLE();
> > > > > +        return false;
> > > > > +    }
> > > > > +
> > > > > +    switch ( ssbd_state )
> > > > > +    {
> > > > > +    case ARM_SSBD_FORCE_DISABLE:
> > > > > +    {
> > > > > +        static bool once = true;
> > > > > +
> > > > > +        if ( once )
> > > > > +            printk("%s disabled from command-line\n", entry->desc);
> > > > > +        once = false;
> > > > > +
> > > > > +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL);
> > > > > +        required = false;
> > > > > +
> > > > > +        break;
> > > > > +    }
> > > > > +
> > > > > +    case ARM_SSBD_RUNTIME:
> > > > > +        if ( required )
> > > > > +        {
> > > > > +            this_cpu(ssbd_callback_required) = 1;
> > > > 
> > > > We have the ARM_SSBD bit, the ssbd_state variable and
> > > > ssbd_callback_required. Both ARM_SSBD and ssbd_state are shared across
> > > > cores while ssbd_callback_required is per-cpu. Does
> > > > ssbd_callback_required really need to be per-cpu? > Do we need both
> > > > variables? For instance, we could just return ssbd_state ==
> > > > ARM_SSBD_RUNTIME instead of this_cpu(ssbd_callback_required)?
> > > 
> > > Let me start with because a guest vCPU may run on any pCPU, you always
> > > have to
> > > tell the guest the mitigation is required for all vCPUs.
> > > 
> > > By default, Linux is calling the workaround at entry from EL0 to enable it
> > > and
> > > at exit to EL0 to disable it. The workaround will first trap in EL2 and
> > > then
> > > get forwarded to EL3.
> > > 
> > > You can imagine that the trap to EL2 and then EL3 has a cost. If the
> > > workaround is not necessary, then you can reduce that cost by avoiding to
> > > trap
> > > at EL3. As you can have a platform with heterogenous CPUs, you need that
> > > workaround per-CPU.
> > > 
> > > The ARM_SSBD feature bit is useful in order to put shortcut in place using
> > > alternative (see check_workaround_ssbd). So on platform where the
> > > mitigation
> > > is not required, all the new code is nearly a NOP.
> > > 
> > > The ssbd_state is used in various place to know what is the global state
> > > of
> > > the mitigation:
> > > 	- To initialize the vCPU state for the mitigation
> > > 	- To report the guest what is the state of the mitigation using SMCCC
> > > 
> > > So all those variables have a specific purposes and cannot really be
> > > replaced
> > > by another way.
> > 
> > Good explanation. Please add something like this to one of the commit
> > messages. Please also consider the following suggestion.
> > 
> > Wouldn't it make sense to remove ssbd_callback_required and make
> > ssbd_state a per-cpu variable? The Xen command line option would remain
> > the same, global, but it would initialize the value of ssbd_state on all
> > cpus. Then, has_ssbd_mitigation would further modify ssbd_state on a
> > specific cpu to ARM_SSBD_UNKNOWN (if ARM_SMCCC_NOT_SUPPORTED),
> > ARM_SSBD_MITIGATED (if ARM_SMCCC_NOT_REQUIRED"), etc. In the common
> > case, the CPUs that need the workaround will have ssbd_state set to
> > ARM_SSBD_RUNTIME, and the others will have ARM_SSBD_UNKNOWN or
> > ARM_SSBD_MITIGATED, or maybe a new value ARM_SSBD_UNNECESSARY. It looks
> > like it would still be simple to check on ssbd_state from assembly as
> > well, it can still be done with one instruction, we just need to make
> > sure to assign integer values to the enum, such as:
> >       ARM_SSBD_UNKNOWN = 0,
> >    ARM_SSBD_FORCE_DISABLE = 1,
> > 
> > etc.
> 
> As I said in my previous e-mail, we need to know the global state of the
> mitigation. This is because a vCPU may move from a affected CPU to a
> non-affected one. Therefore we need to inform the same on every vCPU (i.e
> mitigated, dynamic...).

All right, but if the SMCCC(ARM_SMCCC_ARCH_FEATURES_FID,
ARM_SMCCC_ARCH_WORKAROUND_2_FID) returns ARM_SMCCC_SUCCESS on cpu0 and
ARM_SMCCC_NOT_REQUIRED on cpu1, the result will be that ssbd_state is
set to ARM_SSBD_MITIGATED for all cpus. Which is not what we want?
It doesn't look like the vsmc would return the right value anymore.

One solution would be that has_ssbd_mitigation is not allowed to set
ssbd_state to ARM_SSBD_UNKNOWN or ARM_SSBD_MITIGATED if previously, or
afterwards, the SMCCC returns ARM_SMCCC_SUCCESS. In other words,
ARM_SMCCC_SUCCESS trumps any other return values.
Julien Grall May 30, 2018, 10:39 a.m. | #10
Hi Stefano,

On 05/29/2018 11:34 PM, Stefano Stabellini wrote:
> On Tue, 29 May 2018, Julien Grall wrote:
>> On 25/05/18 21:51, Stefano Stabellini wrote:
>>> On Thu, 24 May 2018, Julien Grall wrote:
>>>> On 23/05/18 23:34, Stefano Stabellini wrote:
>>>>> On Tue, 22 May 2018, Julien Grall  >>>>
>>>>> arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
>>>>>>                           ARM_SMCCC_ARCH_WORKAROUND_2_FID, &res);
>>>>>> -    if ( (int)res.a0 != 0 )
>>>>>> -        supported = false;
>>>>>>     -    if ( supported )
>>>>>> -        this_cpu(ssbd_callback_required) = 1;
>>>>>> +    switch ( (int)res.a0 )
>>>>>
>>>>> Please introduce this switch in the previous patch. But it makes sense
>>>>> to add the ssbd_state variable in this patch.
>>>>
>>>> Well, that's not going to make the diff simpler here as the switch will be
>>>> different. So I would keep the patch like that.
>>>
>>> The split is a bit iffy to me, but if you don't want to change it, I can
>>> live with it anyway.
>>
>> I don't think the other way will help. But I will do it.
> 
> Thank you
> 
> 
>>>>>
>>>>>> +    {
>>>>>> +    case ARM_SMCCC_NOT_SUPPORTED:
>>>>>> +        ssbd_state = ARM_SSBD_UNKNOWN;
>>>>>> +        return false;
>>>>>> +
>>>>>> +    case ARM_SMCCC_NOT_REQUIRED:
>>>>>> +        ssbd_state = ARM_SSBD_MITIGATED;
>>>>>> +        return false;
>>>>>> +
>>>>>> +    case ARM_SMCCC_SUCCESS:
>>>>>> +        required = true;
>>>>>> +        break;
>>>>>> +
>>>>>> +    case 1: /* Mitigation not required on this CPU. */
>>>>>> +        required = false;
>>>>>> +        break;
>>>>>
>>>>> This should "return false".
>>>>
>>>> It is perfectly fine to continue as it is safe to execute
>>>> ARCH_WORKAROUND_2 on
>>>> that CPU.
>>>
>>> This is the case where mitigation is not required but issuing the SMCCC
>>> is safe. Instead of returning immediately, we go through the next
>>> switch:
>>>
>>> 1) if ARM_SSBD_FORCE_DISABLE, we make the SMCCC
>>> 2) if ARM_SSBD_RUNTIME, we do nothing
>>> 3) if ARM_SSBD_FORCE_ENABLE, we make the SMCCC
>>>
>>> What is the desired outcome for this situation? Obviously, continuing for
>>> case 2) is pointless, we might as well return immediately. For 1) and 3)
>>> is the intention that the SMCCC will actually have an effect even if the
>>> mitigation is not required?
>>
>> While the SMCCC call in 1) and 3) will do nothing for those CPUs, you will
>> still print a warning message if the user choose to force enable/disable the
>> mitigation.
> 
> Printing warnings could be a good idea. However, I think we should do
> the same thing for "1" and for "ARM_SMCCC_NOT_REQUIRED", and maybe even
> for "ARM_SMCCC_NOT_SUPPORTED": printing warnings for all or for none.
> 
> I also noticed that if the first SMCCC returns "1" and we continue, in 
> case ssbd_state == ARM_SSBD_FORCE_ENABLE, "required" gets changed to
> "true".  Do we want to let the user force-enable the mitigation even
> when it will do nothing? I am not really sure, probably not? In any case
> I would prefer if we kept the same behavior across "1" and
> "ARM_SMCCC_NOT_REQUIRED".

I don't think it is right ot expect the same behavior for "1"and 
"ARM_SMCCC_NOT_REQUIRED". There are majors difference between those 2 
and ARM_SMCCC_NOT_SUPPORTED.

 From the spec ARM DEN 0070A section 2.2.5:
	- If your firmware does not support the call, ARM_SMCCC_NOT_SUPPORTED 
will be returned for *all* CPUs. This will happen on current firmwares.
	- If your firmware has mitigation permanently disabled/enabled for 
*all* CPUs, ARM_SMCCC_NOT_REQUIRED will be returned.
	- If one of the CPUs in the platform require dynamic mitigation, the 
call will return 0 for them. The others CPUs will return 1.

Printing a warning for the first two will likely scare the user because 
it is going to be printed on most of the current platforms.

Printing a warning for the last one makes sense because you know that 
one of the CPU may be affected. That CPU may be bring up later on. So 
you offer a print in similar place in the logs whatever the platform is.

> 
> 
>>>>>
>>>>>
>>>>>> +
>>>>>> +    default:
>>>>>> +        ASSERT_UNREACHABLE();
>>>>>> +        return false;
>>>>>> +    }
>>>>>> +
>>>>>> +    switch ( ssbd_state )
>>>>>> +    {
>>>>>> +    case ARM_SSBD_FORCE_DISABLE:
>>>>>> +    {
>>>>>> +        static bool once = true;
>>>>>> +
>>>>>> +        if ( once )
>>>>>> +            printk("%s disabled from command-line\n", entry->desc);
>>>>>> +        once = false;
>>>>>> +
>>>>>> +        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL);
>>>>>> +        required = false;
>>>>>> +
>>>>>> +        break;
>>>>>> +    }
>>>>>> +
>>>>>> +    case ARM_SSBD_RUNTIME:
>>>>>> +        if ( required )
>>>>>> +        {
>>>>>> +            this_cpu(ssbd_callback_required) = 1;
>>>>>
>>>>> We have the ARM_SSBD bit, the ssbd_state variable and
>>>>> ssbd_callback_required. Both ARM_SSBD and ssbd_state are shared across
>>>>> cores while ssbd_callback_required is per-cpu. Does
>>>>> ssbd_callback_required really need to be per-cpu? > Do we need both
>>>>> variables? For instance, we could just return ssbd_state ==
>>>>> ARM_SSBD_RUNTIME instead of this_cpu(ssbd_callback_required)?
>>>>
>>>> Let me start with because a guest vCPU may run on any pCPU, you always
>>>> have to
>>>> tell the guest the mitigation is required for all vCPUs.
>>>>
>>>> By default, Linux is calling the workaround at entry from EL0 to enable it
>>>> and
>>>> at exit to EL0 to disable it. The workaround will first trap in EL2 and
>>>> then
>>>> get forwarded to EL3.
>>>>
>>>> You can imagine that the trap to EL2 and then EL3 has a cost. If the
>>>> workaround is not necessary, then you can reduce that cost by avoiding to
>>>> trap
>>>> at EL3. As you can have a platform with heterogenous CPUs, you need that
>>>> workaround per-CPU.
>>>>
>>>> The ARM_SSBD feature bit is useful in order to put shortcut in place using
>>>> alternative (see check_workaround_ssbd). So on platform where the
>>>> mitigation
>>>> is not required, all the new code is nearly a NOP.
>>>>
>>>> The ssbd_state is used in various place to know what is the global state
>>>> of
>>>> the mitigation:
>>>> 	- To initialize the vCPU state for the mitigation
>>>> 	- To report the guest what is the state of the mitigation using SMCCC
>>>>
>>>> So all those variables have a specific purposes and cannot really be
>>>> replaced
>>>> by another way.
>>>
>>> Good explanation. Please add something like this to one of the commit
>>> messages. Please also consider the following suggestion.
>>>
>>> Wouldn't it make sense to remove ssbd_callback_required and make
>>> ssbd_state a per-cpu variable? The Xen command line option would remain
>>> the same, global, but it would initialize the value of ssbd_state on all
>>> cpus. Then, has_ssbd_mitigation would further modify ssbd_state on a
>>> specific cpu to ARM_SSBD_UNKNOWN (if ARM_SMCCC_NOT_SUPPORTED),
>>> ARM_SSBD_MITIGATED (if ARM_SMCCC_NOT_REQUIRED"), etc. In the common
>>> case, the CPUs that need the workaround will have ssbd_state set to
>>> ARM_SSBD_RUNTIME, and the others will have ARM_SSBD_UNKNOWN or
>>> ARM_SSBD_MITIGATED, or maybe a new value ARM_SSBD_UNNECESSARY. It looks
>>> like it would still be simple to check on ssbd_state from assembly as
>>> well, it can still be done with one instruction, we just need to make
>>> sure to assign integer values to the enum, such as:
>>>        ARM_SSBD_UNKNOWN = 0,
>>>     ARM_SSBD_FORCE_DISABLE = 1,
>>>
>>> etc.
>>
>> As I said in my previous e-mail, we need to know the global state of the
>> mitigation. This is because a vCPU may move from a affected CPU to a
>> non-affected one. Therefore we need to inform the same on every vCPU (i.e
>> mitigated, dynamic...).
> 
> All right, but if the SMCCC(ARM_SMCCC_ARCH_FEATURES_FID,
> ARM_SMCCC_ARCH_WORKAROUND_2_FID) returns ARM_SMCCC_SUCCESS on cpu0 and
> ARM_SMCCC_NOT_REQUIRED on cpu1, the result will be that ssbd_state is
> set to ARM_SSBD_MITIGATED for all cpus. Which is not what we want?
 >
> It doesn't look like the vsmc would return the right value anymore.
> 
> One solution would be that has_ssbd_mitigation is not allowed to set
> ssbd_state to ARM_SSBD_UNKNOWN or ARM_SSBD_MITIGATED if previously, or
> afterwards, the SMCCC returns ARM_SMCCC_SUCCESS. In other words,
> ARM_SMCCC_SUCCESS trumps any other return values.

There seem to be a misunderstanding of the spec. If 
ARM_SMCCC_NOT_REQUIRED is returned on one CPU, it will also be returned 
for all the others. It means that *all* CPUs have the mitigations 
permanently enabled/disabled.

Cheers,
Stefano Stabellini May 30, 2018, 8:10 p.m. | #11
On Wed, 30 May 2018, Julien Grall wrote:
> On 05/29/2018 11:34 PM, Stefano Stabellini wrote:
> > On Tue, 29 May 2018, Julien Grall wrote:
> > > On 25/05/18 21:51, Stefano Stabellini wrote:
> > > > On Thu, 24 May 2018, Julien Grall wrote:
> > > > > On 23/05/18 23:34, Stefano Stabellini wrote:
> > > > > > On Tue, 22 May 2018, Julien Grall  >>>>
> > > > > > arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
> > > > > > >                           ARM_SMCCC_ARCH_WORKAROUND_2_FID, &res);
> > > > > > > -    if ( (int)res.a0 != 0 )
> > > > > > > -        supported = false;
> > > > > > >     -    if ( supported )
> > > > > > > -        this_cpu(ssbd_callback_required) = 1;
> > > > > > > +    switch ( (int)res.a0 )
> > > > > > 
> > > > > > Please introduce this switch in the previous patch. But it makes
> > > > > > sense
> > > > > > to add the ssbd_state variable in this patch.
> > > > > 
> > > > > Well, that's not going to make the diff simpler here as the switch
> > > > > will be
> > > > > different. So I would keep the patch like that.
> > > > 
> > > > The split is a bit iffy to me, but if you don't want to change it, I can
> > > > live with it anyway.
> > > 
> > > I don't think the other way will help. But I will do it.
> > 
> > Thank you
> > 
> > 
> > > > > > 
> > > > > > > +    {
> > > > > > > +    case ARM_SMCCC_NOT_SUPPORTED:
> > > > > > > +        ssbd_state = ARM_SSBD_UNKNOWN;
> > > > > > > +        return false;
> > > > > > > +
> > > > > > > +    case ARM_SMCCC_NOT_REQUIRED:
> > > > > > > +        ssbd_state = ARM_SSBD_MITIGATED;
> > > > > > > +        return false;
> > > > > > > +
> > > > > > > +    case ARM_SMCCC_SUCCESS:
> > > > > > > +        required = true;
> > > > > > > +        break;
> > > > > > > +
> > > > > > > +    case 1: /* Mitigation not required on this CPU. */
> > > > > > > +        required = false;
> > > > > > > +        break;
> > > > > > 
> > > > > > This should "return false".
> > > > > 
> > > > > It is perfectly fine to continue as it is safe to execute
> > > > > ARCH_WORKAROUND_2 on
> > > > > that CPU.
> > > > 
> > > > This is the case where mitigation is not required but issuing the SMCCC
> > > > is safe. Instead of returning immediately, we go through the next
> > > > switch:
> > > > 
> > > > 1) if ARM_SSBD_FORCE_DISABLE, we make the SMCCC
> > > > 2) if ARM_SSBD_RUNTIME, we do nothing
> > > > 3) if ARM_SSBD_FORCE_ENABLE, we make the SMCCC
> > > > 
> > > > What is the desired outcome for this situation? Obviously, continuing
> > > > for
> > > > case 2) is pointless, we might as well return immediately. For 1) and 3)
> > > > is the intention that the SMCCC will actually have an effect even if the
> > > > mitigation is not required?
> > > 
> > > While the SMCCC call in 1) and 3) will do nothing for those CPUs, you will
> > > still print a warning message if the user choose to force enable/disable
> > > the
> > > mitigation.
> > 
> > Printing warnings could be a good idea. However, I think we should do
> > the same thing for "1" and for "ARM_SMCCC_NOT_REQUIRED", and maybe even
> > for "ARM_SMCCC_NOT_SUPPORTED": printing warnings for all or for none.
> > 
> > I also noticed that if the first SMCCC returns "1" and we continue, in case
> > ssbd_state == ARM_SSBD_FORCE_ENABLE, "required" gets changed to
> > "true".  Do we want to let the user force-enable the mitigation even
> > when it will do nothing? I am not really sure, probably not? In any case
> > I would prefer if we kept the same behavior across "1" and
> > "ARM_SMCCC_NOT_REQUIRED".
> 
> I don't think it is right ot expect the same behavior for "1"and
> "ARM_SMCCC_NOT_REQUIRED". There are majors difference between those 2 and
> ARM_SMCCC_NOT_SUPPORTED.
> 
> From the spec ARM DEN 0070A section 2.2.5:
> 	- If your firmware does not support the call, ARM_SMCCC_NOT_SUPPORTED
> will be returned for *all* CPUs. This will happen on current firmwares.
> 	- If your firmware has mitigation permanently disabled/enabled for
> *all* CPUs, ARM_SMCCC_NOT_REQUIRED will be returned.
> 	- If one of the CPUs in the platform require dynamic mitigation, the
> call will return 0 for them. The others CPUs will return 1.
> 
> Printing a warning for the first two will likely scare the user because it is
> going to be printed on most of the current platforms.
> 
> Printing a warning for the last one makes sense because you know that one of
> the CPU may be affected. That CPU may be bring up later on. So you offer a
> print in similar place in the logs whatever the platform is.

I should have read the spec more carefully, thanks for the pointer.
Sorry about that. Finally, these patches are starting to make sense :-)

All right. I can see why ssbd_state and ssbd_callback_required are
separate and their purpose. Aside from adding more info to the commit
message, I'll make a couple of different suggestions:

1) Let's check if ssbd_state == ARM_SSBD_UNKNOWN || ssbd_state ==
ARM_SSBD_MITIGATED at the beginning of has_ssbd_mitigation and return
early in that case. This will help clarify the intended behavior and
mitigate broken firmware returning ARM_SMCCC_NOT_SUPPORTED only on some
cpus. This is just optional, I am fine either way.

2) Can we turn ssbd_callback_required from a this_cpu variable to a
single cpu bitmask? It is not great to introduce a new per-cpu varible
for just one bit. It would save space and make it easier to access from
assembly as a bitmask as it would remove the need for the ldr_this_cpu
macro. If I am wrong and the bitmask makes things more complicated
rather than simpler, then keep the code as is and just mention it in the
next version of the patch.
Julien Grall May 31, 2018, 10:34 a.m. | #12
Hi,

On 30/05/18 21:10, Stefano Stabellini wrote:
> On Wed, 30 May 2018, Julien Grall wrote:
>> On 05/29/2018 11:34 PM, Stefano Stabellini wrote:
>>> On Tue, 29 May 2018, Julien Grall wrote:
>>>> On 25/05/18 21:51, Stefano Stabellini wrote:
>>>>> On Thu, 24 May 2018, Julien Grall wrote:
>>>>>> On 23/05/18 23:34, Stefano Stabellini wrote:
>>>>>>> On Tue, 22 May 2018, Julien Grall  >>>>
> I should have read the spec more carefully, thanks for the pointer.
> Sorry about that. Finally, these patches are starting to make sense :-)
> 
> All right. I can see why ssbd_state and ssbd_callback_required are
> separate and their purpose. Aside from adding more info to the commit
> message, I'll make a couple of different suggestions:
> 
> 1) Let's check if ssbd_state == ARM_SSBD_UNKNOWN || ssbd_state ==
> ARM_SSBD_MITIGATED at the beginning of has_ssbd_mitigation and return	
> early in that case. This will help clarify the intended behavior and
> mitigate broken firmware returning ARM_SMCCC_NOT_SUPPORTED only on some
> cpus. This is just optional, I am fine either way.
A vendor not able to do a simple return "ARM_SMCCC_NOT_SUPPORTED" in 
their firmware are not worth to support it in Xen. Most likely, more 
important bits of that firmware would be broken.

> 
> 2) Can we turn ssbd_callback_required from a this_cpu variable to a
> single cpu bitmask? It is not great to introduce a new per-cpu varible
> for just one bit. It would save space and make it easier to access from
> assembly as a bitmask as it would remove the need for the ldr_this_cpu
> macro. If I am wrong and the bitmask makes things more complicated
> rather than simpler, then keep the code as is and just mention it in the
> next version of the patch.

I hope you are aware that this will only save 8 byte per-CPU. On most of 
embedded platform you will have less than 16 CPUs. So you would save at 
most 128 bytes (woah!). If you are that tight in memory, then there are 
better place to reduce the footprint.

I am also not sure to understand the problem of having ldr_this_cpu 
around. The macro is simple and in any case, you would still require at 
least a load for the bitmask.

Feel free to suggest an assembly version for the bitmask.

Cheers,
Stefano Stabellini May 31, 2018, 8:58 p.m. | #13
On Thu, 31 May 2018, Julien Grall wrote:
> Hi,
> 
> On 30/05/18 21:10, Stefano Stabellini wrote:
> > On Wed, 30 May 2018, Julien Grall wrote:
> > > On 05/29/2018 11:34 PM, Stefano Stabellini wrote:
> > > > On Tue, 29 May 2018, Julien Grall wrote:
> > > > > On 25/05/18 21:51, Stefano Stabellini wrote:
> > > > > > On Thu, 24 May 2018, Julien Grall wrote:
> > > > > > > On 23/05/18 23:34, Stefano Stabellini wrote:
> > > > > > > > On Tue, 22 May 2018, Julien Grall  >>>>
> > I should have read the spec more carefully, thanks for the pointer.
> > Sorry about that. Finally, these patches are starting to make sense :-)
> > 
> > All right. I can see why ssbd_state and ssbd_callback_required are
> > separate and their purpose. Aside from adding more info to the commit
> > message, I'll make a couple of different suggestions:
> > 
> > 1) Let's check if ssbd_state == ARM_SSBD_UNKNOWN || ssbd_state ==
> > ARM_SSBD_MITIGATED at the beginning of has_ssbd_mitigation and return	
> > early in that case. This will help clarify the intended behavior and
> > mitigate broken firmware returning ARM_SMCCC_NOT_SUPPORTED only on some
> > cpus. This is just optional, I am fine either way.
> A vendor not able to do a simple return "ARM_SMCCC_NOT_SUPPORTED" in their
> firmware are not worth to support it in Xen. Most likely, more important bits
> of that firmware would be broken.
> 
> > 
> > 2) Can we turn ssbd_callback_required from a this_cpu variable to a
> > single cpu bitmask? It is not great to introduce a new per-cpu varible
> > for just one bit. It would save space and make it easier to access from
> > assembly as a bitmask as it would remove the need for the ldr_this_cpu
> > macro. If I am wrong and the bitmask makes things more complicated
> > rather than simpler, then keep the code as is and just mention it in the
> > next version of the patch.
> 
> I hope you are aware that this will only save 8 byte per-CPU. On most of
> embedded platform you will have less than 16 CPUs. So you would save at most
> 128 bytes (woah!). If you are that tight in memory, then there are better
> place to reduce the footprint.
> 
> I am also not sure to understand the problem of having ldr_this_cpu around.
> The macro is simple and in any case, you would still require at least a load
> for the bitmask.
> 
> Feel free to suggest an assembly version for the bitmask.

OK, this is very simple, the first that came to mind, I am sure you can
improve it:

        // 65 is the cpu number, in this example
        MOV X1, #65

        // X1 tells us which doubleword to consider
        // X2 has the bit shift for right doubleword
        // X3 is the shifted X2, we'll use it to check the bitmask
        AND X2, X1, #(64-1)
        LSR X1, X1, #3
        MOV X3, #0x1
        LSL X3, X3, X2

        // we load the pointer to the bitmask in X4
        LDR X4, =cpumask
        // increase the pointer to point to the right doubleword
        ADD X4, X4, X1
        // load the doubleword
        LDR X4, [X4]
        // mask with X3, the result is in X1
        AND X1, X4, X3
Julien Grall May 31, 2018, 9:29 p.m. | #14
(sorry for the formatting)

On Thu, 31 May 2018, 22:00 Stefano Stabellini, <sstabellini@kernel.org>
wrote:

> On Thu, 31 May 2018, Julien Grall wrote:

> > Hi,

> >

> > On 30/05/18 21:10, Stefano Stabellini wrote:

> > > On Wed, 30 May 2018, Julien Grall wrote:

> > > > On 05/29/2018 11:34 PM, Stefano Stabellini wrote:

> > > > > On Tue, 29 May 2018, Julien Grall wrote:

> > > > > > On 25/05/18 21:51, Stefano Stabellini wrote:

> > > > > > > On Thu, 24 May 2018, Julien Grall wrote:

> > > > > > > > On 23/05/18 23:34, Stefano Stabellini wrote:

> > > > > > > > > On Tue, 22 May 2018, Julien Grall  >>>>

> > > I should have read the spec more carefully, thanks for the pointer.

> > > Sorry about that. Finally, these patches are starting to make sense :-)

> > >

> > > All right. I can see why ssbd_state and ssbd_callback_required are

> > > separate and their purpose. Aside from adding more info to the commit

> > > message, I'll make a couple of different suggestions:

> > >

> > > 1) Let's check if ssbd_state == ARM_SSBD_UNKNOWN || ssbd_state ==

> > > ARM_SSBD_MITIGATED at the beginning of has_ssbd_mitigation and return

>

> > > early in that case. This will help clarify the intended behavior and

> > > mitigate broken firmware returning ARM_SMCCC_NOT_SUPPORTED only on some

> > > cpus. This is just optional, I am fine either way.

> > A vendor not able to do a simple return "ARM_SMCCC_NOT_SUPPORTED" in

> their

> > firmware are not worth to support it in Xen. Most likely, more important

> bits

> > of that firmware would be broken.

> >

> > >

> > > 2) Can we turn ssbd_callback_required from a this_cpu variable to a

> > > single cpu bitmask? It is not great to introduce a new per-cpu varible

> > > for just one bit. It would save space and make it easier to access from

> > > assembly as a bitmask as it would remove the need for the ldr_this_cpu

> > > macro. If I am wrong and the bitmask makes things more complicated

> > > rather than simpler, then keep the code as is and just mention it in

> the

> > > next version of the patch.

> >

> > I hope you are aware that this will only save 8 byte per-CPU. On most of

> > embedded platform you will have less than 16 CPUs. So you would save at

> most

> > 128 bytes (woah!). If you are that tight in memory, then there are better

> > place to reduce the footprint.

> >

> > I am also not sure to understand the problem of having ldr_this_cpu

> around.

> > The macro is simple and in any case, you would still require at least a

> load

> > for the bitmask.

> >

> > Feel free to suggest an assembly version for the bitmask.

>

> OK, this is very simple, the first that came to mind, I am sure you can

> improve it:

>

>         // 65 is the cpu number, in this example

>         MOV X1, #65

>

>         // X1 tells us which doubleword to consider

>         // X2 has the bit shift for right doubleword

>         // X3 is the shifted X2, we'll use it to check the bitmask

>         AND X2, X1, #(64-1)

>         LSR X1, X1, #3

>         MOV X3, #0x1

>         LSL X3, X3, X2

>

>         // we load the pointer to the bitmask in X4

>         LDR X4, =cpumask

>         // increase the pointer to point to the right doubleword

>         ADD X4, X4, X1

>         // load the doubleword

>         LDR X4, [X4]

>         // mask with X3, the result is in X1

>         AND X1, X4, X3

>


Well, because of the SMCC v1.1 convention, you can only use x0-x3. So x4 is
a no go.

You also cannot use x1 because it contains the enable/disable boolean.

Furthermore ldr_this_cpu is only 3 instructions. With your current
solution, you have 9 instructions.
Even optimized, I honestly doubt you will manage 3 instructions. This is
30% more instructions!

So you are maybe going to save few bytes in memory, but everytime you
execute the SMC you will lose some time. As this is happening at every
entry/exit from EL0, this will have a significant impact on your workload.

At this stage, I will keep with the percpu variable. That's the best
trade-off between performance and footprint.

Cheers,
<span>(sorry for the formatting)</span><br><br><div class="gmail_quote"><div dir="ltr">On Thu, 31 May 2018, 22:00 Stefano Stabellini, &lt;<a href="mailto:sstabellini@kernel.org">sstabellini@kernel.org</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">On Thu, 31 May 2018, Julien Grall wrote:<br>
&gt; Hi,<br>
&gt; <br>
&gt; On 30/05/18 21:10, Stefano Stabellini wrote:<br>
&gt; &gt; On Wed, 30 May 2018, Julien Grall wrote:<br>
&gt; &gt; &gt; On 05/29/2018 11:34 PM, Stefano Stabellini wrote:<br>
&gt; &gt; &gt; &gt; On Tue, 29 May 2018, Julien Grall wrote:<br>
&gt; &gt; &gt; &gt; &gt; On 25/05/18 21:51, Stefano Stabellini wrote:<br>
&gt; &gt; &gt; &gt; &gt; &gt; On Thu, 24 May 2018, Julien Grall wrote:<br>
&gt; &gt; &gt; &gt; &gt; &gt; &gt; On 23/05/18 23:34, Stefano Stabellini wrote:<br>
&gt; &gt; &gt; &gt; &gt; &gt; &gt; &gt; On Tue, 22 May 2018, Julien Grall  &gt;&gt;&gt;&gt;<br>
&gt; &gt; I should have read the spec more carefully, thanks for the pointer.<br>
&gt; &gt; Sorry about that. Finally, these patches are starting to make sense :-)<br>
&gt; &gt; <br>
&gt; &gt; All right. I can see why ssbd_state and ssbd_callback_required are<br>
&gt; &gt; separate and their purpose. Aside from adding more info to the commit<br>
&gt; &gt; message, I&#39;ll make a couple of different suggestions:<br>
&gt; &gt; <br>
&gt; &gt; 1) Let&#39;s check if ssbd_state == ARM_SSBD_UNKNOWN || ssbd_state ==<br>
&gt; &gt; ARM_SSBD_MITIGATED at the beginning of has_ssbd_mitigation and return       <br>
&gt; &gt; early in that case. This will help clarify the intended behavior and<br>
&gt; &gt; mitigate broken firmware returning ARM_SMCCC_NOT_SUPPORTED only on some<br>
&gt; &gt; cpus. This is just optional, I am fine either way.<br>
&gt; A vendor not able to do a simple return &quot;ARM_SMCCC_NOT_SUPPORTED&quot; in their<br>
&gt; firmware are not worth to support it in Xen. Most likely, more important bits<br>
&gt; of that firmware would be broken.<br>
&gt; <br>
&gt; &gt; <br>
&gt; &gt; 2) Can we turn ssbd_callback_required from a this_cpu variable to a<br>
&gt; &gt; single cpu bitmask? It is not great to introduce a new per-cpu varible<br>
&gt; &gt; for just one bit. It would save space and make it easier to access from<br>
&gt; &gt; assembly as a bitmask as it would remove the need for the ldr_this_cpu<br>
&gt; &gt; macro. If I am wrong and the bitmask makes things more complicated<br>
&gt; &gt; rather than simpler, then keep the code as is and just mention it in the<br>
&gt; &gt; next version of the patch.<br>
&gt; <br>
&gt; I hope you are aware that this will only save 8 byte per-CPU. On most of<br>
&gt; embedded platform you will have less than 16 CPUs. So you would save at most<br>
&gt; 128 bytes (woah!). If you are that tight in memory, then there are better<br>
&gt; place to reduce the footprint.<br>
&gt; <br>
&gt; I am also not sure to understand the problem of having ldr_this_cpu around.<br>
&gt; The macro is simple and in any case, you would still require at least a load<br>
&gt; for the bitmask.<br>
&gt; <br>
&gt; Feel free to suggest an assembly version for the bitmask.<br>
<br>
OK, this is very simple, the first that came to mind, I am sure you can<br>
improve it:<br>
<br>
        // 65 is the cpu number, in this example<br>
        MOV X1, #65<br>
<br>
        // X1 tells us which doubleword to consider<br>
        // X2 has the bit shift for right doubleword<br>
        // X3 is the shifted X2, we&#39;ll use it to check the bitmask<br>
        AND X2, X1, #(64-1)<br>
        LSR X1, X1, #3<br>
        MOV X3, #0x1<br>
        LSL X3, X3, X2<br>
<br>
        // we load the pointer to the bitmask in X4<br>
        LDR X4, =cpumask<br>
        // increase the pointer to point to the right doubleword<br>
        ADD X4, X4, X1<br>
        // load the doubleword<br>
        LDR X4, [X4]<br>
        // mask with X3, the result is in X1<br>
        AND X1, X4, X3<br></blockquote></div><div><br></div><div>Well, because of the SMCC v1.1 convention, you can only use x0-x3. So x4 is a no go.</div><div><br></div><div>You also cannot use x1 because it contains the enable/disable boolean.</div><div><br></div><div>Furthermore ldr_this_cpu is only 3 instructions. With your current solution, you have 9 instructions.</div><div>Even optimized, I honestly doubt you will manage 3 instructions. This is 30% more instructions!</div><div><br></div><div>So you are maybe going to save few bytes in memory, but everytime you execute the SMC you will lose some time. As this is happening at every entry/exit from EL0, this will have a significant impact on your workload.</div><div><br></div><div>At this stage, I will keep with the percpu variable. That&#39;s the best trade-off between performance and footprint.</div><div><br></div><div>Cheers,</div>

Patch

diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
index 8712a833a2..962028b6ed 100644
--- a/docs/misc/xen-command-line.markdown
+++ b/docs/misc/xen-command-line.markdown
@@ -1756,6 +1756,24 @@  enforces the maximum theoretically necessary timeout of 670ms. Any number
 is being interpreted as a custom timeout in milliseconds. Zero or boolean
 false disable the quirk workaround, which is also the default.
 
+### spec-ctrl (Arm)
+> `= List of [ ssbd=force-disable|runtime|force-enable ]`
+
+Controls for speculative execution sidechannel mitigations.
+
+The option `ssbd=` is used to control the state of Speculative Store
+Bypass Disable (SSBD) mitigation.
+
+* `ssbd=force-disable` will keep the mitigation permanently off. The guest
+will not be able to control the state of the mitigation.
+* `ssbd=runtime` will always turn on the mitigation when running in the
+hypervisor context. The guest will be to turn on/off the mitigation for
+itself by using the firmware interface ARCH\_WORKAROUND\_2.
+* `ssbd=force-enable` will keep the mitigation permanently on. The guest will
+not be able to control the state of the mitigation.
+
+By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`).
+
 ### spec-ctrl (x86)
 > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
 >              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd}=<bool> ]`
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index bcea2eb6e5..f921721a66 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -237,6 +237,41 @@  static int enable_ic_inv_hardening(void *data)
 
 #ifdef CONFIG_ARM_SSBD
 
+enum ssbd_state ssbd_state = ARM_SSBD_RUNTIME;
+
+static int __init parse_spec_ctrl(const char *s)
+{
+    const char *ss;
+    int rc = 0;
+
+    do {
+        ss = strchr(s, ',');
+        if ( !ss )
+            ss = strchr(s, '\0');
+
+        if ( !strncmp(s, "ssbd=", 5) )
+        {
+            s += 5;
+
+            if ( !strncmp(s, "force-disable", ss - s) )
+                ssbd_state = ARM_SSBD_FORCE_DISABLE;
+            else if ( !strncmp(s, "runtime", ss - s) )
+                ssbd_state = ARM_SSBD_RUNTIME;
+            else if ( !strncmp(s, "force-enable", ss - s) )
+                ssbd_state = ARM_SSBD_FORCE_ENABLE;
+            else
+                rc = -EINVAL;
+        }
+        else
+            rc = -EINVAL;
+
+        s = ss + 1;
+    } while ( *ss );
+
+    return rc;
+}
+custom_param("spec-ctrl", parse_spec_ctrl);
+
 /*
  * Assembly code may use the variable directly, so we need to make sure
  * it fits in a register.
@@ -246,25 +281,82 @@  DEFINE_PER_CPU_READ_MOSTLY(register_t, ssbd_callback_required);
 static bool has_ssbd_mitigation(const struct arm_cpu_capabilities *entry)
 {
     struct arm_smccc_res res;
-    bool supported = true;
+    bool required = true;
 
     if ( smccc_ver < SMCCC_VERSION(1, 1) )
         return false;
 
-    /*
-     * The probe function return value is either negative (unsupported
-     * or mitigated), positive (unaffected), or zero (requires
-     * mitigation). We only need to do anything in the last case.
-     */
     arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
                       ARM_SMCCC_ARCH_WORKAROUND_2_FID, &res);
-    if ( (int)res.a0 != 0 )
-        supported = false;
 
-    if ( supported )
-        this_cpu(ssbd_callback_required) = 1;
+    switch ( (int)res.a0 )
+    {
+    case ARM_SMCCC_NOT_SUPPORTED:
+        ssbd_state = ARM_SSBD_UNKNOWN;
+        return false;
+
+    case ARM_SMCCC_NOT_REQUIRED:
+        ssbd_state = ARM_SSBD_MITIGATED;
+        return false;
+
+    case ARM_SMCCC_SUCCESS:
+        required = true;
+        break;
+
+    case 1: /* Mitigation not required on this CPU. */
+        required = false;
+        break;
+
+    default:
+        ASSERT_UNREACHABLE();
+        return false;
+    }
+
+    switch ( ssbd_state )
+    {
+    case ARM_SSBD_FORCE_DISABLE:
+    {
+        static bool once = true;
+
+        if ( once )
+            printk("%s disabled from command-line\n", entry->desc);
+        once = false;
+
+        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 0, NULL);
+        required = false;
+
+        break;
+    }
+
+    case ARM_SSBD_RUNTIME:
+        if ( required )
+        {
+            this_cpu(ssbd_callback_required) = 1;
+            arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
+        }
+
+        break;
+
+    case ARM_SSBD_FORCE_ENABLE:
+    {
+        static bool once = true;
+
+        if ( once )
+            printk("%s forced from command-line\n", entry->desc);
+        once = false;
+
+        arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2_FID, 1, NULL);
+        required = true;
+
+        break;
+    }
+
+    default:
+        ASSERT_UNREACHABLE();
+        return false;
+    }
 
-    return supported;
+    return required;
 }
 #endif
 
@@ -371,6 +463,7 @@  static const struct arm_cpu_capabilities arm_errata[] = {
 #endif
 #ifdef CONFIG_ARM_SSBD
     {
+        .desc = "Speculative Store Bypass Disabled",
         .capability = ARM_SSBD,
         .matches = has_ssbd_mitigation,
     },
diff --git a/xen/include/asm-arm/cpuerrata.h b/xen/include/asm-arm/cpuerrata.h
index e628d3ff56..7fbb3dc0be 100644
--- a/xen/include/asm-arm/cpuerrata.h
+++ b/xen/include/asm-arm/cpuerrata.h
@@ -31,10 +31,26 @@  CHECK_WORKAROUND_HELPER(ssbd, ARM_SSBD, CONFIG_ARM_SSBD)
 
 #undef CHECK_WORKAROUND_HELPER
 
+enum ssbd_state
+{
+    ARM_SSBD_UNKNOWN,
+    ARM_SSBD_FORCE_DISABLE,
+    ARM_SSBD_RUNTIME,
+    ARM_SSBD_FORCE_ENABLE,
+    ARM_SSBD_MITIGATED,
+};
+
 #ifdef CONFIG_ARM_SSBD
 
 #include <asm/current.h>
 
+extern enum ssbd_state ssbd_state;
+
+static inline enum ssbd_state get_ssbd_state(void)
+{
+    return ssbd_state;
+}
+
 DECLARE_PER_CPU(register_t, ssbd_callback_required);
 
 static inline bool cpu_require_ssbd_mitigation(void)
@@ -49,6 +65,11 @@  static inline bool cpu_require_ssbd_mitigation(void)
     return false;
 }
 
+static inline enum ssbd_state get_sbdd_state(void)
+{
+    return ARM_SSBD_UNKNOWN;
+}
+
 #endif
 
 #endif /* __ARM_CPUERRATA_H__ */
diff --git a/xen/include/asm-arm/smccc.h b/xen/include/asm-arm/smccc.h
index 650744d28b..a6804cec99 100644
--- a/xen/include/asm-arm/smccc.h
+++ b/xen/include/asm-arm/smccc.h
@@ -265,6 +265,7 @@  struct arm_smccc_res {
                        0x7FFF)
 
 /* SMCCC error codes */
+#define ARM_SMCCC_NOT_REQUIRED          (-2)
 #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION  (-1)
 #define ARM_SMCCC_NOT_SUPPORTED         (-1)
 #define ARM_SMCCC_SUCCESS               (0)