[Xen-devel,v2,11/15] xen/arm64: Add ARM_SMCCC_ARCH_WORKAROUND_1 BP hardening support

Message ID 20180208192203.9556-12-julien.grall@arm.com
State Superseded
Headers show
Series
  • xen/arm: PSCI 1.1 and SMCCC-1.1 support and XSA-254 variant 2 update
Related show

Commit Message

Julien Grall Feb. 8, 2018, 7:21 p.m.
Add the detection and runtime code for ARM_SMCCC_ARCH_WORKAROUND_1.

Signed-off-by: Julien Grall <julien.grall@arm.com>

---
    Changes in v2:
        - Patch added
---
 xen/arch/arm/arm64/bpi.S    | 12 ++++++++++++
 xen/arch/arm/cpuerrata.c    | 32 +++++++++++++++++++++++++++++++-
 xen/include/asm-arm/smccc.h |  1 +
 3 files changed, 44 insertions(+), 1 deletion(-)

Comments

Volodymyr Babchuk Feb. 12, 2018, 4:55 p.m. | #1
Hi Julien,

On 08.02.18 21:21, Julien Grall wrote:
> Add the detection and runtime code for ARM_SMCCC_ARCH_WORKAROUND_1.
> 
> Signed-off-by: Julien Grall <julien.grall@arm.com>
> 
> ---
>      Changes in v2:
>          - Patch added
> ---
>   xen/arch/arm/arm64/bpi.S    | 12 ++++++++++++
>   xen/arch/arm/cpuerrata.c    | 32 +++++++++++++++++++++++++++++++-
>   xen/include/asm-arm/smccc.h |  1 +
>   3 files changed, 44 insertions(+), 1 deletion(-)
> 
> diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S
> index 4b7f1dc21f..ef237de7bd 100644
> --- a/xen/arch/arm/arm64/bpi.S
> +++ b/xen/arch/arm/arm64/bpi.S
> @@ -16,6 +16,8 @@
>    * along with this program.  If not, see <http://www.gnu.org/licenses/>.
>    */
>   
> +#include <asm/smccc.h>
> +
>   .macro ventry target
>       .rept 31
>       nop
> @@ -81,6 +83,16 @@ ENTRY(__psci_hyp_bp_inval_start)
>       add     sp, sp, #(8 * 18)
>   ENTRY(__psci_hyp_bp_inval_end)
>   
> +ENTRY(__smccc_workaround_1_smc_start)
> +    sub     sp, sp, #(8 * 4)
> +    stp     x2, x3, [sp, #(8 * 0)]
> +    stp     x0, x1, [sp, #(8 * 2)]
> +    mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
> +    ldp     x2, x3, [sp, #(8 * 0)]
> +    ldp     x0, x1, [sp, #(8 * 2)]
> +    add     sp, sp, #(8 * 4)
> +ENTRY(__smccc_workaround_1_smc_end)
> +

This code confuses me. You allocate 32 bytes on stack, save x0-x4 there, 
then you load ARM_SMCCC_ARCH_WORKAROUND_1_FID into w0 and restore values 
of x0-x4, overwriting value written into w0. Am I missing something?

Btw, you can use something like stp	x0, x1, [sp, #-16]! to avoid manual 
adjustment of sp. This will save you two instructions.

>   /*
>    * Local variables:
>    * mode: ASM
> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
> index 6704648b26..6557577bcb 100644
> --- a/xen/arch/arm/cpuerrata.c
> +++ b/xen/arch/arm/cpuerrata.c
> @@ -147,6 +147,34 @@ install_bp_hardening_vec(const struct arm_cpu_capabilities *entry,
>       return ret;
>   }
>   
> +extern char __smccc_workaround_1_smc_start[], __smccc_workaround_1_smc_end[];
> +
> +static bool
> +check_smccc_arch_workaround_1(const struct arm_cpu_capabilities *entry)
> +{
> +    struct arm_smccc_res res;
> +
> +    /*
> +     * Enable callbacks are called on every CPU based on the
> +     * capabilities. So double-check whether the CPU matches the
> +     * entry.
> +     */
> +    if ( !entry->matches(entry) )
> +        return false;
> +
> +    if ( smccc_ver < SMCCC_VERSION(1, 1) )
> +        return false;
> +
> +    arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
> +                      ARM_SMCCC_ARCH_WORKAROUND_1_FID, &res);
> +    if ( res.a0 != ARM_SMCCC_SUCCESS )
> +        return false;
> +
> +    return install_bp_hardening_vec(entry,__smccc_workaround_1_smc_start,
> +                                    __smccc_workaround_1_smc_end,
> +                                    "call ARM_SMCCC_ARCH_WORKAROUND_1");
> +}
> +
>   extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[];
>   
>   static int enable_psci_bp_hardening(void *data)
> @@ -154,12 +182,14 @@ static int enable_psci_bp_hardening(void *data)
>       bool ret = true;
>       static bool warned = false;
>   
> +    if ( check_smccc_arch_workaround_1(data) )
> +        return 0;
>       /*
>        * The mitigation is using PSCI version function to invalidate the
>        * branch predictor. This function is only available with PSCI 0.2
>        * and later.
>        */
> -    if ( psci_ver >= PSCI_VERSION(0, 2) )
> +    else if ( psci_ver >= PSCI_VERSION(0, 2) )
>           ret = install_bp_hardening_vec(data, __psci_hyp_bp_inval_start,
>                                          __psci_hyp_bp_inval_end,
>                                          "call PSCI get version");
> diff --git a/xen/include/asm-arm/smccc.h b/xen/include/asm-arm/smccc.h
> index 154772b728..8342cc33fe 100644
> --- a/xen/include/asm-arm/smccc.h
> +++ b/xen/include/asm-arm/smccc.h
> @@ -261,6 +261,7 @@ struct arm_smccc_res {
>   /* SMCCC error codes */
>   #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION  (-1)
>   #define ARM_SMCCC_NOT_SUPPORTED         (-1)
> +#define ARM_SMCCC_SUCCESS               (0)
>   
>   /* SMCCC function identifier range which is reserved for existing APIs */
>   #define ARM_SMCCC_RESERVED_RANGE_START  0x0
>
Julien Grall Feb. 12, 2018, 5:12 p.m. | #2
On 12/02/18 16:55, Volodymyr Babchuk wrote:
> Hi Julien,

Hi Volodymyr,

> On 08.02.18 21:21, Julien Grall wrote:
>> Add the detection and runtime code for ARM_SMCCC_ARCH_WORKAROUND_1.
>>
>> Signed-off-by: Julien Grall <julien.grall@arm.com>
>>
>> ---
>>      Changes in v2:
>>          - Patch added
>> ---
>>   xen/arch/arm/arm64/bpi.S    | 12 ++++++++++++
>>   xen/arch/arm/cpuerrata.c    | 32 +++++++++++++++++++++++++++++++-
>>   xen/include/asm-arm/smccc.h |  1 +
>>   3 files changed, 44 insertions(+), 1 deletion(-)
>>
>> diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S
>> index 4b7f1dc21f..ef237de7bd 100644
>> --- a/xen/arch/arm/arm64/bpi.S
>> +++ b/xen/arch/arm/arm64/bpi.S
>> @@ -16,6 +16,8 @@
>>    * along with this program.  If not, see 
>> <http://www.gnu.org/licenses/>.
>>    */
>> +#include <asm/smccc.h>
>> +
>>   .macro ventry target
>>       .rept 31
>>       nop
>> @@ -81,6 +83,16 @@ ENTRY(__psci_hyp_bp_inval_start)
>>       add     sp, sp, #(8 * 18)
>>   ENTRY(__psci_hyp_bp_inval_end)
>> +ENTRY(__smccc_workaround_1_smc_start)
>> +    sub     sp, sp, #(8 * 4)
>> +    stp     x2, x3, [sp, #(8 * 0)]
>> +    stp     x0, x1, [sp, #(8 * 2)]
>> +    mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
>> +    ldp     x2, x3, [sp, #(8 * 0)]
>> +    ldp     x0, x1, [sp, #(8 * 2)]
>> +    add     sp, sp, #(8 * 4)
>> +ENTRY(__smccc_workaround_1_smc_end)
>> +
> 
> This code confuses me. You allocate 32 bytes on stack, save x0-x4 there, 
> then you load ARM_SMCCC_ARCH_WORKAROUND_1_FID into w0 and restore values 
> of x0-x4, overwriting value written into w0. Am I missing something?

The call to ARM_SMCCC_ARCH_WORKAROUND_1 does not return any value. Even 
if it were, this code is executed on exception entry before jumping into 
the trap helper. So you want to restore all the registers saved.

> 
> Btw, you can use something like stp    x0, x1, [sp, #-16]! to avoid 
> manual adjustment of sp. This will save you two instructions.

It was pointed out on Linux Arm that updating sp once *might* be faster 
on some uarch.

> 
>>   /*
>>    * Local variables:
>>    * mode: ASM
>> diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
>> index 6704648b26..6557577bcb 100644
>> --- a/xen/arch/arm/cpuerrata.c
>> +++ b/xen/arch/arm/cpuerrata.c
>> @@ -147,6 +147,34 @@ install_bp_hardening_vec(const struct 
>> arm_cpu_capabilities *entry,
>>       return ret;
>>   }
>> +extern char __smccc_workaround_1_smc_start[], 
>> __smccc_workaround_1_smc_end[];
>> +
>> +static bool
>> +check_smccc_arch_workaround_1(const struct arm_cpu_capabilities *entry)
>> +{
>> +    struct arm_smccc_res res;
>> +
>> +    /*
>> +     * Enable callbacks are called on every CPU based on the
>> +     * capabilities. So double-check whether the CPU matches the
>> +     * entry.
>> +     */
>> +    if ( !entry->matches(entry) )
>> +        return false;
>> +
>> +    if ( smccc_ver < SMCCC_VERSION(1, 1) )
>> +        return false;
>> +
>> +    arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
>> +                      ARM_SMCCC_ARCH_WORKAROUND_1_FID, &res);
>> +    if ( res.a0 != ARM_SMCCC_SUCCESS )
>> +        return false;
>> +
>> +    return 
>> install_bp_hardening_vec(entry,__smccc_workaround_1_smc_start,
>> +                                    __smccc_workaround_1_smc_end,
>> +                                    "call ARM_SMCCC_ARCH_WORKAROUND_1");
>> +}
>> +
>>   extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[];
>>   static int enable_psci_bp_hardening(void *data)
>> @@ -154,12 +182,14 @@ static int enable_psci_bp_hardening(void *data)
>>       bool ret = true;
>>       static bool warned = false;
>> +    if ( check_smccc_arch_workaround_1(data) )
>> +        return 0;
>>       /*
>>        * The mitigation is using PSCI version function to invalidate the
>>        * branch predictor. This function is only available with PSCI 0.2
>>        * and later.
>>        */
>> -    if ( psci_ver >= PSCI_VERSION(0, 2) )
>> +    else if ( psci_ver >= PSCI_VERSION(0, 2) )
>>           ret = install_bp_hardening_vec(data, __psci_hyp_bp_inval_start,
>>                                          __psci_hyp_bp_inval_end,
>>                                          "call PSCI get version");
>> diff --git a/xen/include/asm-arm/smccc.h b/xen/include/asm-arm/smccc.h
>> index 154772b728..8342cc33fe 100644
>> --- a/xen/include/asm-arm/smccc.h
>> +++ b/xen/include/asm-arm/smccc.h
>> @@ -261,6 +261,7 @@ struct arm_smccc_res {
>>   /* SMCCC error codes */
>>   #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION  (-1)
>>   #define ARM_SMCCC_NOT_SUPPORTED         (-1)
>> +#define ARM_SMCCC_SUCCESS               (0)
>>   /* SMCCC function identifier range which is reserved for existing 
>> APIs */
>>   #define ARM_SMCCC_RESERVED_RANGE_START  0x0
>>
> 

Cheers,
Volodymyr Babchuk Feb. 12, 2018, 5:20 p.m. | #3
Julien,

On 12.02.18 19:12, Julien Grall wrote:
> On 12/02/18 16:55, Volodymyr Babchuk wrote:
>> Hi Julien,
> 
> Hi Volodymyr,
> 
>> On 08.02.18 21:21, Julien Grall wrote:
>>> Add the detection and runtime code for ARM_SMCCC_ARCH_WORKAROUND_1.
>>>
>>> Signed-off-by: Julien Grall <julien.grall@arm.com>
>>>
>>> ---
>>>      Changes in v2:
>>>          - Patch added
>>> ---
>>>   xen/arch/arm/arm64/bpi.S    | 12 ++++++++++++
>>>   xen/arch/arm/cpuerrata.c    | 32 +++++++++++++++++++++++++++++++-
>>>   xen/include/asm-arm/smccc.h |  1 +
>>>   3 files changed, 44 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S
>>> index 4b7f1dc21f..ef237de7bd 100644
>>> --- a/xen/arch/arm/arm64/bpi.S
>>> +++ b/xen/arch/arm/arm64/bpi.S
>>> @@ -16,6 +16,8 @@
>>>    * along with this program.  If not, see 
>>> <http://www.gnu.org/licenses/>.
>>>    */
>>> +#include <asm/smccc.h>
>>> +
>>>   .macro ventry target
>>>       .rept 31
>>>       nop
>>> @@ -81,6 +83,16 @@ ENTRY(__psci_hyp_bp_inval_start)
>>>       add     sp, sp, #(8 * 18)
>>>   ENTRY(__psci_hyp_bp_inval_end)
>>> +ENTRY(__smccc_workaround_1_smc_start)
>>> +    sub     sp, sp, #(8 * 4)
>>> +    stp     x2, x3, [sp, #(8 * 0)]
>>> +    stp     x0, x1, [sp, #(8 * 2)]
>>> +    mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
>>> +    ldp     x2, x3, [sp, #(8 * 0)]
>>> +    ldp     x0, x1, [sp, #(8 * 2)]
>>> +    add     sp, sp, #(8 * 4)
>>> +ENTRY(__smccc_workaround_1_smc_end)
>>> +
>>
>> This code confuses me. You allocate 32 bytes on stack, save x0-x4 
>> there, then you load ARM_SMCCC_ARCH_WORKAROUND_1_FID into w0 and 
>> restore values of x0-x4, overwriting value written into w0. Am I 
>> missing something?
> 
> The call to ARM_SMCCC_ARCH_WORKAROUND_1 does not return any value. Even 
> if it were, this code is executed on exception entry before jumping into 
> the trap helper. So you want to restore all the registers saved.
> 
I believe you missed smc instruction in the code above.

>>
>> Btw, you can use something like stp    x0, x1, [sp, #-16]! to avoid 
>> manual adjustment of sp. This will save you two instructions.
> 
> It was pointed out on Linux Arm that updating sp once *might* be faster 
> on some uarch.

So is this code is targeted for that some specific uarch? Then I would 
like to see a comment describing why you choose this approach.

[...]
Julien Grall Feb. 12, 2018, 5:26 p.m. | #4
On 12/02/18 17:20, Volodymyr Babchuk wrote:
> Julien,

Hi,

> On 12.02.18 19:12, Julien Grall wrote:
>> On 12/02/18 16:55, Volodymyr Babchuk wrote:
>>> Hi Julien,
>>
>> Hi Volodymyr,
>>
>>> On 08.02.18 21:21, Julien Grall wrote:
>>>> Add the detection and runtime code for ARM_SMCCC_ARCH_WORKAROUND_1.
>>>>
>>>> Signed-off-by: Julien Grall <julien.grall@arm.com>
>>>>
>>>> ---
>>>>      Changes in v2:
>>>>          - Patch added
>>>> ---
>>>>   xen/arch/arm/arm64/bpi.S    | 12 ++++++++++++
>>>>   xen/arch/arm/cpuerrata.c    | 32 +++++++++++++++++++++++++++++++-
>>>>   xen/include/asm-arm/smccc.h |  1 +
>>>>   3 files changed, 44 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S
>>>> index 4b7f1dc21f..ef237de7bd 100644
>>>> --- a/xen/arch/arm/arm64/bpi.S
>>>> +++ b/xen/arch/arm/arm64/bpi.S
>>>> @@ -16,6 +16,8 @@
>>>>    * along with this program.  If not, see 
>>>> <http://www.gnu.org/licenses/>.
>>>>    */
>>>> +#include <asm/smccc.h>
>>>> +
>>>>   .macro ventry target
>>>>       .rept 31
>>>>       nop
>>>> @@ -81,6 +83,16 @@ ENTRY(__psci_hyp_bp_inval_start)
>>>>       add     sp, sp, #(8 * 18)
>>>>   ENTRY(__psci_hyp_bp_inval_end)
>>>> +ENTRY(__smccc_workaround_1_smc_start)
>>>> +    sub     sp, sp, #(8 * 4)
>>>> +    stp     x2, x3, [sp, #(8 * 0)]
>>>> +    stp     x0, x1, [sp, #(8 * 2)]
>>>> +    mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
>>>> +    ldp     x2, x3, [sp, #(8 * 0)]
>>>> +    ldp     x0, x1, [sp, #(8 * 2)]
>>>> +    add     sp, sp, #(8 * 4)
>>>> +ENTRY(__smccc_workaround_1_smc_end)
>>>> +
>>>
>>> This code confuses me. You allocate 32 bytes on stack, save x0-x4 
>>> there, then you load ARM_SMCCC_ARCH_WORKAROUND_1_FID into w0 and 
>>> restore values of x0-x4, overwriting value written into w0. Am I 
>>> missing something?
>>
>> The call to ARM_SMCCC_ARCH_WORKAROUND_1 does not return any value. 
>> Even if it were, this code is executed on exception entry before 
>> jumping into the trap helper. So you want to restore all the registers 
>> saved.
>>
> I believe you missed smc instruction in the code above.

Whoops yes. I will fix it.

> 
>>>
>>> Btw, you can use something like stp    x0, x1, [sp, #-16]! to avoid 
>>> manual adjustment of sp. This will save you two instructions.
>>
>> It was pointed out on Linux Arm that updating sp once *might* be 
>> faster on some uarch.
> 
> So is this code is targeted for that some specific uarch? Then I would 
> like to see a comment describing why you choose this approach.

I can't confirm whether this will improve uarch A, B, C or Z. I just 
followed suggestion on Linux Arm (see [1]) and a personal choice on how 
to write assembly code. It is quite similar that why would I choose the 
other way around?

Cheers,

[1] https://www.spinics.net/lists/arm-kernel/msg626659.html

Patch

diff --git a/xen/arch/arm/arm64/bpi.S b/xen/arch/arm/arm64/bpi.S
index 4b7f1dc21f..ef237de7bd 100644
--- a/xen/arch/arm/arm64/bpi.S
+++ b/xen/arch/arm/arm64/bpi.S
@@ -16,6 +16,8 @@ 
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <asm/smccc.h>
+
 .macro ventry target
     .rept 31
     nop
@@ -81,6 +83,16 @@  ENTRY(__psci_hyp_bp_inval_start)
     add     sp, sp, #(8 * 18)
 ENTRY(__psci_hyp_bp_inval_end)
 
+ENTRY(__smccc_workaround_1_smc_start)
+    sub     sp, sp, #(8 * 4)
+    stp     x2, x3, [sp, #(8 * 0)]
+    stp     x0, x1, [sp, #(8 * 2)]
+    mov     w0, #ARM_SMCCC_ARCH_WORKAROUND_1_FID
+    ldp     x2, x3, [sp, #(8 * 0)]
+    ldp     x0, x1, [sp, #(8 * 2)]
+    add     sp, sp, #(8 * 4)
+ENTRY(__smccc_workaround_1_smc_end)
+
 /*
  * Local variables:
  * mode: ASM
diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c
index 6704648b26..6557577bcb 100644
--- a/xen/arch/arm/cpuerrata.c
+++ b/xen/arch/arm/cpuerrata.c
@@ -147,6 +147,34 @@  install_bp_hardening_vec(const struct arm_cpu_capabilities *entry,
     return ret;
 }
 
+extern char __smccc_workaround_1_smc_start[], __smccc_workaround_1_smc_end[];
+
+static bool
+check_smccc_arch_workaround_1(const struct arm_cpu_capabilities *entry)
+{
+    struct arm_smccc_res res;
+
+    /*
+     * Enable callbacks are called on every CPU based on the
+     * capabilities. So double-check whether the CPU matches the
+     * entry.
+     */
+    if ( !entry->matches(entry) )
+        return false;
+
+    if ( smccc_ver < SMCCC_VERSION(1, 1) )
+        return false;
+
+    arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FID,
+                      ARM_SMCCC_ARCH_WORKAROUND_1_FID, &res);
+    if ( res.a0 != ARM_SMCCC_SUCCESS )
+        return false;
+
+    return install_bp_hardening_vec(entry,__smccc_workaround_1_smc_start,
+                                    __smccc_workaround_1_smc_end,
+                                    "call ARM_SMCCC_ARCH_WORKAROUND_1");
+}
+
 extern char __psci_hyp_bp_inval_start[], __psci_hyp_bp_inval_end[];
 
 static int enable_psci_bp_hardening(void *data)
@@ -154,12 +182,14 @@  static int enable_psci_bp_hardening(void *data)
     bool ret = true;
     static bool warned = false;
 
+    if ( check_smccc_arch_workaround_1(data) )
+        return 0;
     /*
      * The mitigation is using PSCI version function to invalidate the
      * branch predictor. This function is only available with PSCI 0.2
      * and later.
      */
-    if ( psci_ver >= PSCI_VERSION(0, 2) )
+    else if ( psci_ver >= PSCI_VERSION(0, 2) )
         ret = install_bp_hardening_vec(data, __psci_hyp_bp_inval_start,
                                        __psci_hyp_bp_inval_end,
                                        "call PSCI get version");
diff --git a/xen/include/asm-arm/smccc.h b/xen/include/asm-arm/smccc.h
index 154772b728..8342cc33fe 100644
--- a/xen/include/asm-arm/smccc.h
+++ b/xen/include/asm-arm/smccc.h
@@ -261,6 +261,7 @@  struct arm_smccc_res {
 /* SMCCC error codes */
 #define ARM_SMCCC_ERR_UNKNOWN_FUNCTION  (-1)
 #define ARM_SMCCC_NOT_SUPPORTED         (-1)
+#define ARM_SMCCC_SUCCESS               (0)
 
 /* SMCCC function identifier range which is reserved for existing APIs */
 #define ARM_SMCCC_RESERVED_RANGE_START  0x0