Message ID | 20250621235037.74091-68-richard.henderson@linaro.org |
---|---|
State | New |
Headers | show |
Series | target/arm: Implement FEAT_SME2p1 | expand |
On 6/21/25 16:50, Richard Henderson wrote: > This is the single vector version within SVE decode space. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/tcg/translate-sve.c | 22 ++++++++++++++++++++++ > target/arm/tcg/sve.decode | 2 ++ > 2 files changed, 24 insertions(+) > > diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c > index abdcafd952..235022110f 100644 > --- a/target/arm/tcg/translate-sve.c > +++ b/target/arm/tcg/translate-sve.c > @@ -7381,6 +7381,28 @@ static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, > > TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a) > > +static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a) > +{ > + static gen_helper_gvec_3_ptr * const fn[] = { > + gen_helper_sme2_bfclamp, > + gen_helper_sme2_fclamp_h, > + gen_helper_sme2_fclamp_s, > + gen_helper_sme2_fclamp_d, > + }; > + > + /* This insn uses MO_8 to encode BFloat16. */ > + if (a->esz == MO_8 > + ? dc_isar_feature(aa64_sve_b16b16, s) > + : dc_isar_feature(aa64_sme2_or_sve2p1, s)) { Missing !'s. Fixed. r~
Richard Henderson <richard.henderson@linaro.org> writes: > On 6/21/25 16:50, Richard Henderson wrote: >> This is the single vector version within SVE decode space. >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> target/arm/tcg/translate-sve.c | 22 ++++++++++++++++++++++ >> target/arm/tcg/sve.decode | 2 ++ >> 2 files changed, 24 insertions(+) >> diff --git a/target/arm/tcg/translate-sve.c >> b/target/arm/tcg/translate-sve.c >> index abdcafd952..235022110f 100644 >> --- a/target/arm/tcg/translate-sve.c >> +++ b/target/arm/tcg/translate-sve.c >> @@ -7381,6 +7381,28 @@ static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, >> TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, >> gen_uclamp, a) >> +static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a) >> +{ >> + static gen_helper_gvec_3_ptr * const fn[] = { >> + gen_helper_sme2_bfclamp, >> + gen_helper_sme2_fclamp_h, >> + gen_helper_sme2_fclamp_s, >> + gen_helper_sme2_fclamp_d, >> + }; >> + >> + /* This insn uses MO_8 to encode BFloat16. */ >> + if (a->esz == MO_8 >> + ? dc_isar_feature(aa64_sve_b16b16, s) >> + : dc_isar_feature(aa64_sme2_or_sve2p1, s)) { > > Missing !'s. Fixed. With the fix: Tested-by: Alex Bennée <alex.bennee@linaro.org> That looks like it was the only blocker for the kleidiai tests (causing an unexpected SIGILL). There are failures in some of the tests after but no crashes.
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index abdcafd952..235022110f 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -7381,6 +7381,28 @@ static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m, TRANS_FEAT(UCLAMP, aa64_sme_or_sve2p1, gen_gvec_fn_arg_zzzz, gen_uclamp, a) +static bool trans_FCLAMP(DisasContext *s, arg_FCLAMP *a) +{ + static gen_helper_gvec_3_ptr * const fn[] = { + gen_helper_sme2_bfclamp, + gen_helper_sme2_fclamp_h, + gen_helper_sme2_fclamp_s, + gen_helper_sme2_fclamp_d, + }; + + /* This insn uses MO_8 to encode BFloat16. */ + if (a->esz == MO_8 + ? dc_isar_feature(aa64_sve_b16b16, s) + : dc_isar_feature(aa64_sme2_or_sve2p1, s)) { + return false; + } + + /* So far we never optimize rda with MOVPRFX */ + assert(a->rd == a->ra); + return gen_gvec_fpst_zzz(s, fn[a->esz], a->rd, a->rn, a->rm, 1, + a->esz == MO_16 ? FPST_A64_F16 : FPST_A64); +} + TRANS_FEAT(SQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, gen_helper_sme2_sqcvtn_sh, a->rd, a->rn, 0) TRANS_FEAT(UQCVTN_sh, aa64_sme2_or_sve2p1, gen_gvec_ool_zz, diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode index 7e29882a71..b0c7b58271 100644 --- a/target/arm/tcg/sve.decode +++ b/target/arm/tcg/sve.decode @@ -1722,3 +1722,5 @@ PSEL 00100101 .1 1 000 .. 01 .... 0 .... 0 .... \ SCLAMP 01000100 .. 0 ..... 110000 ..... ..... @rda_rn_rm UCLAMP 01000100 .. 0 ..... 110001 ..... ..... @rda_rn_rm + +FCLAMP 01100100 .. 1 ..... 001001 ..... ..... @rda_rn_rm
This is the single vector version within SVE decode space. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/translate-sve.c | 22 ++++++++++++++++++++++ target/arm/tcg/sve.decode | 2 ++ 2 files changed, 24 insertions(+)