diff mbox series

[v2,1/6] linux-user/aarch64: Extend PR_SET_TAGGED_ADDR_CTRL for FEAT_MTE3

Message ID 20240206030527.169147-2-richard.henderson@linaro.org
State New
Headers show
Series target/arm: assorted mte fixes | expand

Commit Message

Richard Henderson Feb. 6, 2024, 3:05 a.m. UTC
When MTE3 is supported, the kernel maps
  PR_MTE_TCF_ASYNC | PR_MTE_TCF_SYNC
to
  MTE_CTRL_TCF_ASYMM
and from there to
  SCTLR_EL1.TCF0 = 3

There is no error reported for setting ASYNC | SYNC when MTE3 is not
supported; the kernel simply selects the ASYNC behavior of TCF0=2.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 linux-user/aarch64/target_prctl.h | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

Comments

Peter Maydell Feb. 6, 2024, 2:23 p.m. UTC | #1
On Tue, 6 Feb 2024 at 03:06, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> When MTE3 is supported, the kernel maps
>   PR_MTE_TCF_ASYNC | PR_MTE_TCF_SYNC
> to
>   MTE_CTRL_TCF_ASYMM
> and from there to
>   SCTLR_EL1.TCF0 = 3

This depends on the setting of
/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred :
I think you only get asymm here if the sysadmin has set
mte_tcf_preferred to 'asymm'; the default is 'async'.

https://www.kernel.org/doc/Documentation/arch/arm64/memory-tagging-extension.rst
documents the intended semantics of the prctl, though it does have
one error (the default-order is asymm; async; sync, not async; asymm; sync).

> There is no error reported for setting ASYNC | SYNC when MTE3 is not
> supported; the kernel simply selects the ASYNC behavior of TCF0=2.

For QEMU's implementation, are there any particular
performance differences between sync, async and asymm ?
That might determine what we effectively consider to be the
mte_tcf_preferred setting for our user-mode CPUs.

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  linux-user/aarch64/target_prctl.h | 25 +++++++++++++------------
>  1 file changed, 13 insertions(+), 12 deletions(-)
>
> diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
> index 5067e7d731..49bd16aa95 100644
> --- a/linux-user/aarch64/target_prctl.h
> +++ b/linux-user/aarch64/target_prctl.h
> @@ -173,21 +173,22 @@ static abi_long do_prctl_set_tagged_addr_ctrl(CPUArchState *env, abi_long arg2)
>      env->tagged_addr_enable = arg2 & PR_TAGGED_ADDR_ENABLE;
>
>      if (cpu_isar_feature(aa64_mte, cpu)) {
> -        switch (arg2 & PR_MTE_TCF_MASK) {
> -        case PR_MTE_TCF_NONE:
> -        case PR_MTE_TCF_SYNC:
> -        case PR_MTE_TCF_ASYNC:
> -            break;
> -        default:
> -            return -EINVAL;
> -        }

We should probably check here and reject unknown bits being
set in arg2, as set_tagged_addr_ctrl() does; but the old
code didn't get that right either.

> -
>          /*
>           * Write PR_MTE_TCF to SCTLR_EL1[TCF0].
> -         * Note that the syscall values are consistent with hw.
> +         * Note that SYNC | ASYNC -> ASYMM with FEAT_MTE3,
> +         * otherwise mte_update_sctlr_user chooses ASYNC.
>           */
> -        env->cp15.sctlr_el[1] =
> -            deposit64(env->cp15.sctlr_el[1], 38, 2, arg2 >> PR_MTE_TCF_SHIFT);
> +        unsigned tcf = 0;
> +        if (arg2 & PR_MTE_TCF_ASYNC) {
> +            if ((arg2 & PR_MTE_TCF_SYNC) && cpu_isar_feature(aa64_mte3, cpu)) {
> +                tcf = 3;
> +            } else {
> +                tcf = 2;
> +            }
> +        } else if (arg2 & PR_MTE_TCF_SYNC) {
> +            tcf = 1;
> +        }
> +        env->cp15.sctlr_el[1] = deposit64(env->cp15.sctlr_el[1], 38, 2, tcf);
>
>          /*
>           * Write PR_MTE_TAG to GCR_EL1[Exclude].
> --
> 2.34.1

thanks
-- PMM
Gustavo Romero Feb. 6, 2024, 3:18 p.m. UTC | #2
On 2/6/24 12:05 AM, Richard Henderson wrote:
> When MTE3 is supported, the kernel maps
>    PR_MTE_TCF_ASYNC | PR_MTE_TCF_SYNC
> to
>    MTE_CTRL_TCF_ASYMM
> and from there to
>    SCTLR_EL1.TCF0 = 3
> 
> There is no error reported for setting ASYNC | SYNC when MTE3 is not
> supported; the kernel simply selects the ASYNC behavior of TCF0=2.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   linux-user/aarch64/target_prctl.h | 25 +++++++++++++------------
>   1 file changed, 13 insertions(+), 12 deletions(-)
> 
> diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
> index 5067e7d731..49bd16aa95 100644
> --- a/linux-user/aarch64/target_prctl.h
> +++ b/linux-user/aarch64/target_prctl.h
> @@ -173,21 +173,22 @@ static abi_long do_prctl_set_tagged_addr_ctrl(CPUArchState *env, abi_long arg2)
>       env->tagged_addr_enable = arg2 & PR_TAGGED_ADDR_ENABLE;
>   
>       if (cpu_isar_feature(aa64_mte, cpu)) {
> -        switch (arg2 & PR_MTE_TCF_MASK) {
> -        case PR_MTE_TCF_NONE:
> -        case PR_MTE_TCF_SYNC:
> -        case PR_MTE_TCF_ASYNC:
> -            break;
> -        default:
> -            return -EINVAL;
> -        }
> -
>           /*
>            * Write PR_MTE_TCF to SCTLR_EL1[TCF0].
> -         * Note that the syscall values are consistent with hw.
> +         * Note that SYNC | ASYNC -> ASYMM with FEAT_MTE3,
> +         * otherwise mte_update_sctlr_user chooses ASYNC.
>            */
> -        env->cp15.sctlr_el[1] =
> -            deposit64(env->cp15.sctlr_el[1], 38, 2, arg2 >> PR_MTE_TCF_SHIFT);
> +        unsigned tcf = 0;
> +        if (arg2 & PR_MTE_TCF_ASYNC) {
> +            if ((arg2 & PR_MTE_TCF_SYNC) && cpu_isar_feature(aa64_mte3, cpu)) {
> +                tcf = 3;
> +            } else {
> +                tcf = 2;
> +            }
> +        } else if (arg2 & PR_MTE_TCF_SYNC) {
> +            tcf = 1;
> +        }
> +        env->cp15.sctlr_el[1] = deposit64(env->cp15.sctlr_el[1], 38, 2, tcf);
>   
>           /*
>            * Write PR_MTE_TAG to GCR_EL1[Exclude].
> 

Reviewed-by: Gustavo Romero <gustavo.romero@linaro.org>
Richard Henderson Feb. 7, 2024, 12:31 a.m. UTC | #3
On 2/7/24 00:23, Peter Maydell wrote:
> On Tue, 6 Feb 2024 at 03:06, Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> When MTE3 is supported, the kernel maps
>>    PR_MTE_TCF_ASYNC | PR_MTE_TCF_SYNC
>> to
>>    MTE_CTRL_TCF_ASYMM
>> and from there to
>>    SCTLR_EL1.TCF0 = 3
> 
> This depends on the setting of
> /sys/devices/system/cpu/cpu<N>/mte_tcf_preferred :
> I think you only get asymm here if the sysadmin has set
> mte_tcf_preferred to 'asymm'; the default is 'async'.

Hmm, I missed that somewhere in the rat's nest.
I suspect this is over-engineered, such that no one will understand how to use it.

> For QEMU's implementation, are there any particular
> performance differences between sync, async and asymm ?

I doubt it.  Getting to the error path at all is the bulk of the work.

I think "performance" in this case would be highly test-case-centric.
Does the test "perform better" with async, which would allow the entire vector operation 
to finish in one go?

I suspect that for debugging purposes, sync is always preferred.
That might be the best setting for qemu.


r~
Richard Henderson Feb. 7, 2024, 2:10 a.m. UTC | #4
On 2/7/24 00:23, Peter Maydell wrote:
>> +++ b/linux-user/aarch64/target_prctl.h
>> @@ -173,21 +173,22 @@ static abi_long do_prctl_set_tagged_addr_ctrl(CPUArchState *env, abi_long arg2)
>>       env->tagged_addr_enable = arg2 & PR_TAGGED_ADDR_ENABLE;
>>
>>       if (cpu_isar_feature(aa64_mte, cpu)) {
>> -        switch (arg2 & PR_MTE_TCF_MASK) {
>> -        case PR_MTE_TCF_NONE:
>> -        case PR_MTE_TCF_SYNC:
>> -        case PR_MTE_TCF_ASYNC:
>> -            break;
>> -        default:
>> -            return -EINVAL;
>> -        }
> 
> We should probably check here and reject unknown bits being
> set in arg2, as set_tagged_addr_ctrl() does; but the old
> code didn't get that right either.

This is done higher up in this function:

     if (arg2 & ~valid_mask) {
         return -TARGET_EINVAL;
     }

The rejection of ASYNC | SYNC here was either a bug in my original implementation, or the 
kernel API changed since the initial implementation in June 2020 (not worth digging to 
find out).


r~
diff mbox series

Patch

diff --git a/linux-user/aarch64/target_prctl.h b/linux-user/aarch64/target_prctl.h
index 5067e7d731..49bd16aa95 100644
--- a/linux-user/aarch64/target_prctl.h
+++ b/linux-user/aarch64/target_prctl.h
@@ -173,21 +173,22 @@  static abi_long do_prctl_set_tagged_addr_ctrl(CPUArchState *env, abi_long arg2)
     env->tagged_addr_enable = arg2 & PR_TAGGED_ADDR_ENABLE;
 
     if (cpu_isar_feature(aa64_mte, cpu)) {
-        switch (arg2 & PR_MTE_TCF_MASK) {
-        case PR_MTE_TCF_NONE:
-        case PR_MTE_TCF_SYNC:
-        case PR_MTE_TCF_ASYNC:
-            break;
-        default:
-            return -EINVAL;
-        }
-
         /*
          * Write PR_MTE_TCF to SCTLR_EL1[TCF0].
-         * Note that the syscall values are consistent with hw.
+         * Note that SYNC | ASYNC -> ASYMM with FEAT_MTE3,
+         * otherwise mte_update_sctlr_user chooses ASYNC.
          */
-        env->cp15.sctlr_el[1] =
-            deposit64(env->cp15.sctlr_el[1], 38, 2, arg2 >> PR_MTE_TCF_SHIFT);
+        unsigned tcf = 0;
+        if (arg2 & PR_MTE_TCF_ASYNC) {
+            if ((arg2 & PR_MTE_TCF_SYNC) && cpu_isar_feature(aa64_mte3, cpu)) {
+                tcf = 3;
+            } else {
+                tcf = 2;
+            }
+        } else if (arg2 & PR_MTE_TCF_SYNC) {
+            tcf = 1;
+        }
+        env->cp15.sctlr_el[1] = deposit64(env->cp15.sctlr_el[1], 38, 2, tcf);
 
         /*
          * Write PR_MTE_TAG to GCR_EL1[Exclude].