@@ -213,3 +213,8 @@ DEF_HELPER_FLAGS_5(sme2_umlsll_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr,
DEF_HELPER_FLAGS_5(sme2_umlsll_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sme2_usmlall_idx_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sme2_usmlall_idx_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_4(sme2_bfcvt, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sme2_bfcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sme2_fcvt_n, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sme2_fcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
@@ -1488,3 +1488,77 @@ DO_MLALL_IDX(sme2_usmlall_idx_s, uint32_t, uint8_t, int8_t, H4, H1, +)
DO_MLALL_IDX(sme2_usmlall_idx_d, uint64_t, uint16_t, int16_t, H8, H2, +)
#undef DO_MLALL_IDX
+
+/* Convert and compress */
+void HELPER(sme2_bfcvt)(void *vd, void *vs, float_status *fpst, uint32_t desc)
+{
+ ARMVectorReg scratch __attribute__((uninitialized));
+ size_t oprsz = simd_oprsz(desc);
+ size_t i, n = oprsz / 4;
+ float32 *s0 = vs;
+ float32 *s1 = vs + sizeof(ARMVectorReg);
+ bfloat16 *d = vd;
+
+ if (vd == s1) {
+ s1 = memcpy(&scratch, s1, oprsz);
+ }
+
+ for (i = 0; i < n; ++i) {
+ d[H2(i)] = float32_to_bfloat16(s0[H4(i)], fpst);
+ }
+ for (i = 0; i < n; ++i) {
+ d[H2(i) + n] = float32_to_bfloat16(s1[H4(i)], fpst);
+ }
+}
+
+void HELPER(sme2_fcvt_n)(void *vd, void *vs, float_status *fpst, uint32_t desc)
+{
+ ARMVectorReg scratch __attribute__((uninitialized));
+ size_t oprsz = simd_oprsz(desc);
+ size_t i, n = oprsz / 4;
+ float32 *s0 = vs;
+ float32 *s1 = vs + sizeof(ARMVectorReg);
+ float16 *d = vd;
+
+ if (vd == s1) {
+ s1 = memcpy(&scratch, s1, oprsz);
+ }
+
+ for (i = 0; i < n; ++i) {
+ d[H2(i)] = float32_to_float16(s0[H4(i)], true, fpst);
+ }
+ for (i = 0; i < n; ++i) {
+ d[H2(i) + n] = float32_to_float16(s1[H4(i)], true, fpst);
+ }
+}
+
+/* Convert and interleave */
+void HELPER(sme2_bfcvtn)(void *vd, void *vs, float_status *fpst, uint32_t desc)
+{
+ size_t i, n = simd_oprsz(desc) / 4;
+ float32 *s0 = vs;
+ float32 *s1 = vs + sizeof(ARMVectorReg);
+ bfloat16 *d = vd;
+
+ for (i = 0; i < n; ++i) {
+ bfloat16 d0 = float32_to_bfloat16(s0[H4(i)], fpst);
+ bfloat16 d1 = float32_to_bfloat16(s1[H4(i)], fpst);
+ d[H2(i * 2 + 0)] = d0;
+ d[H2(i * 2 + 1)] = d1;
+ }
+}
+
+void HELPER(sme2_fcvtn)(void *vd, void *vs, float_status *fpst, uint32_t desc)
+{
+ size_t i, n = simd_oprsz(desc) / 4;
+ float32 *s0 = vs;
+ float32 *s1 = vs + sizeof(ARMVectorReg);
+ bfloat16 *d = vd;
+
+ for (i = 0; i < n; ++i) {
+ bfloat16 d0 = float32_to_float16(s0[H4(i)], true, fpst);
+ bfloat16 d1 = float32_to_float16(s1[H4(i)], true, fpst);
+ d[H2(i * 2 + 0)] = d0;
+ d[H2(i * 2 + 1)] = d1;
+ }
+}
@@ -1263,3 +1263,28 @@ TRANS_FEAT(UMLALL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_umlall
TRANS_FEAT(UMLSLL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_smlsll_idx_d)
TRANS_FEAT(USMLALL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_usmlall_idx_d)
TRANS_FEAT(SUMLALL_nx_d, aa64_sme_i16i64, do_smlall_nx, a, gen_helper_sme2_sumlall_idx_d)
+
+static bool do_zz_fpst(DisasContext *s, arg_zz_n *a, int data,
+ ARMFPStatusFlavour type, gen_helper_gvec_2_ptr *fn)
+{
+ if (sme_sm_enabled_check(s)) {
+ int svl = streaming_vec_reg_size(s);
+ TCGv_ptr fpst = fpstatus_ptr(type);
+
+ for (int i = 0, n = a->n; i < n; ++i) {
+ tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->zd + i),
+ vec_full_reg_offset(s, a->zn + i),
+ fpst, svl, svl, data, fn);
+ }
+ }
+ return true;
+}
+
+TRANS_FEAT(BFCVT, aa64_sme2, do_zz_fpst, a, 0,
+ FPST_A64, gen_helper_sme2_bfcvt)
+TRANS_FEAT(BFCVTN, aa64_sme2, do_zz_fpst, a, 0,
+ FPST_A64, gen_helper_sme2_bfcvtn)
+TRANS_FEAT(FCVT_n, aa64_sme2, do_zz_fpst, a, 0,
+ FPST_A64, gen_helper_sme2_fcvt_n)
+TRANS_FEAT(FCVTN, aa64_sme2, do_zz_fpst, a, 0,
+ FPST_A64, gen_helper_sme2_fcvtn)
@@ -719,3 +719,15 @@ BFMLS_nx 11000001 0001 .... 1 .. 1.. ...01 1 .... @azx_4x1_i3_o3
FMLS_nx_h 11000001 0001 .... 1 .. 1.. ...00 1 .... @azx_4x1_i3_o3
FMLS_nx_s 11000001 0101 .... 1 .. 0.. ...00 10 ... @azx_4x1_i2_o3
FMLS_nx_d 11000001 1101 .... 1 .. 00. ...00 10 ... @azx_4x1_i1_o3
+
+### SME2 Multi-vector SVE Constructive Unary
+
+&zz_n zd zn n
+@zz_1x2 ........ ... ..... ...... ..... zd:5 \
+ &zz_n n=1 zn=%zn_ax2
+
+BFCVT 11000001 011 00000 111000 ....0 ..... @zz_1x2
+BFCVTN 11000001 011 00000 111000 ....1 ..... @zz_1x2
+
+FCVT_n 11000001 001 00000 111000 ....0 ..... @zz_1x2
+FCVTN 11000001 001 00000 111000 ....1 ..... @zz_1x2
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/helper-sme.h | 5 +++ target/arm/tcg/sme_helper.c | 74 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-sme.c | 25 ++++++++++++ target/arm/tcg/sme.decode | 12 ++++++ 4 files changed, 116 insertions(+)