diff mbox series

[v2,067/101] target/arm: Implement FCLAMP for SME2, SVE2p1

Message ID 20250621235037.74091-68-richard.henderson@linaro.org
State New
Headers show
Series target/arm: Implement FEAT_SME2p1 | expand

Commit Message

Richard Henderson June 21, 2025, 11:50 p.m. UTC
This is the single vector version within SVE decode space.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-sve.c | 22 ++++++++++++++++++++++
 target/arm/tcg/sve.decode      |  2 ++
 2 files changed, 24 insertions(+)

Comments

Richard Henderson June 22, 2025, 5:22 p.m. UTC | #1
On 6/21/25 16:50, Richard Henderson wrote:
> This is the single vector version within SVE decode space.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/arm/tcg/translate-sve.c | 22 ++++++++++++++++++++++
>   target/arm/tcg/sve.decode      |  2 ++
>   2 files changed, 24 insertions(+)
> 
> diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
> index abdcafd952..235022110f 100644
> --- a/target/arm/tcg/translate-sve.c
> +++ b/target/arm/tcg/translate-sve.c
> @@ -7381,6 +7381,28 @@ static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
>   
>   TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
>   
> +static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a)
> +{
> +    static gen_helper_gvec_3_ptr * const fn[] = {
> +        gen_helper_sme2_bfclamp,
> +        gen_helper_sme2_fclamp_h,
> +        gen_helper_sme2_fclamp_s,
> +        gen_helper_sme2_fclamp_d,
> +    };
> +
> +    /* This insn uses MO_8 to encode BFloat16. */
> +    if (a->esz == MO_8
> +        ? dc_isar_feature(aa64_sve_b16b16, s)
> +        : dc_isar_feature(aa64_sme2_or_sve2p1, s)) {

Missing !'s.  Fixed.


r~
Alex Bennée June 23, 2025, 9:24 a.m. UTC | #2
Richard Henderson <richard.henderson@linaro.org> writes:

> On 6/21/25 16:50, Richard Henderson wrote:
>> This is the single vector version within SVE decode space.
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>>   target/arm/tcg/translate-sve.c | 22 ++++++++++++++++++++++
>>   target/arm/tcg/sve.decode      |  2 ++
>>   2 files changed, 24 insertions(+)
>> diff --git a/target/arm/tcg/translate-sve.c
>> b/target/arm/tcg/translate-sve.c
>> index abdcafd952..235022110f 100644
>> --- a/target/arm/tcg/translate-sve.c
>> +++ b/target/arm/tcg/translate-sve.c
>> @@ -7381,6 +7381,28 @@ static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
>>     TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz,
>> gen_uclamp, a)
>>   +static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a)
>> +{
>> +    static gen_helper_gvec_3_ptr * const fn[] = {
>> +        gen_helper_sme2_bfclamp,
>> +        gen_helper_sme2_fclamp_h,
>> +        gen_helper_sme2_fclamp_s,
>> +        gen_helper_sme2_fclamp_d,
>> +    };
>> +
>> +    /* This insn uses MO_8 to encode BFloat16. */
>> +    if (a->esz == MO_8
>> +        ? dc_isar_feature(aa64_sve_b16b16, s)
>> +        : dc_isar_feature(aa64_sme2_or_sve2p1, s)) {
>
> Missing !'s.  Fixed.

With the fix:

Tested-by: Alex Bennée <alex.bennee@linaro.org>

That looks like it was the only blocker for the kleidiai tests (causing
an unexpected SIGILL). There are failures in some of the tests after but
no crashes.
diff mbox series

Patch

diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index abdcafd952..235022110f 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -7381,6 +7381,28 @@  static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
 
 TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a)
 
+static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a)
+{
+    static gen_helper_gvec_3_ptr * const fn[] = {
+        gen_helper_sme2_bfclamp,
+        gen_helper_sme2_fclamp_h,
+        gen_helper_sme2_fclamp_s,
+        gen_helper_sme2_fclamp_d,
+    };
+
+    /* This insn uses MO_8 to encode BFloat16. */
+    if (a->esz == MO_8
+        ? dc_isar_feature(aa64_sve_b16b16, s)
+        : dc_isar_feature(aa64_sme2_or_sve2p1, s)) {
+        return false;
+    }
+
+    /* So far we never optimize rda with MOVPRFX */
+    assert(a->rd == a->ra);
+    return gen_gvec_fpst_zzz(s, fn[a->esz], a->rd, a->rn, a->rm, 1,
+                             a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
+}
+
 TRANS_FEAT(SQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz,
            gen_helper_sme2_sqcvtn_sh, a->rd, a->rn, 0)
 TRANS_FEAT(UQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz,
diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index 7e29882a71..b0c7b58271 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -1722,3 +1722,5 @@  PSEL            00100101 .1 1 000 .. 01 .... 0 .... 0 ....  \
 
 SCLAMP          01000100 .. 0 ..... 110000 ..... .....          @rda_rn_rm
 UCLAMP          01000100 .. 0 ..... 110001 ..... .....          @rda_rn_rm
+
+FCLAMP          01100100 .. 1 ..... 001001 ..... .....          @rda_rn_rm