@@ -717,10 +717,12 @@ DEF_HELPER_FLAGS_4(gvec_fclt0_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_fadd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_bfadd, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_fsub_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_fsub_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_fsub_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_5(gvec_bfsub, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_5(gvec_fmul_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
@@ -1005,6 +1005,50 @@ TRANS_FEAT(BFMLA_nx, aa64_sme2_b16b16, do_fmla_nx, a, FPST_ZA,
TRANS_FEAT(BFMLS_nx, aa64_sme2_b16b16, do_fmla_nx, a, FPST_ZA,
s->fpcr_ah ? gen_helper_gvec_ah_bfmls_idx : gen_helper_gvec_bfmls_idx)
+static bool do_faddsub(DisasContext *s, arg_az_n *a, ARMFPStatusFlavour fpst,
+ gen_helper_gvec_3_ptr *fn)
+{
+ if (sme_sm_enabled_check(s)) {
+ int svl = streaming_vec_reg_size(s);
+ int n = a->n;
+ int zm = a->zm;
+ int vstride = svl / n;
+ TCGv_ptr t_za = get_zarray(s, a->rv, a->off, n);
+ TCGv_ptr ptr = fpstatus_ptr(fpst);
+ TCGv_ptr t = tcg_temp_new_ptr();
+
+ for (int r = 0; r < n; ++r) {
+ TCGv_ptr t_zm = vec_full_reg_ptr(s, zm + r);
+ int o_za = r * vstride * sizeof(ARMVectorReg);
+ int desc = simd_desc(svl, svl, 0);
+
+ tcg_gen_addi_ptr(t, t_za, o_za);
+ fn(t, t, t_zm, ptr, tcg_constant_i32(desc));
+ }
+ }
+ return true;
+}
+
+TRANS_FEAT(FADD_nn_h, aa64_sme2_f16f16, do_faddsub, a,
+ FPST_ZA_F16, gen_helper_gvec_fadd_h)
+TRANS_FEAT(FSUB_nn_h, aa64_sme2_f16f16, do_faddsub, a,
+ FPST_ZA_F16, gen_helper_gvec_fsub_h)
+
+TRANS_FEAT(FADD_nn_s, aa64_sme2, do_faddsub, a,
+ FPST_ZA, gen_helper_gvec_fadd_s)
+TRANS_FEAT(FSUB_nn_s, aa64_sme2, do_faddsub, a,
+ FPST_ZA, gen_helper_gvec_fsub_s)
+
+TRANS_FEAT(FADD_nn_d, aa64_sme2_f64f64, do_faddsub, a,
+ FPST_ZA, gen_helper_gvec_fadd_d)
+TRANS_FEAT(FSUB_nn_d, aa64_sme2_f64f64, do_faddsub, a,
+ FPST_ZA, gen_helper_gvec_fsub_d)
+
+TRANS_FEAT(BFADD_nn, aa64_sme2_b16b16, do_faddsub, a,
+ FPST_ZA, gen_helper_gvec_bfadd)
+TRANS_FEAT(BFSUB_nn, aa64_sme2_b16b16, do_faddsub, a,
+ FPST_ZA, gen_helper_gvec_bfsub)
+
/*
* Expand array multi-vector single (n1), array multi-vector (nn),
* and array multi-vector indexed (nx), for integer accumulate.
@@ -1469,10 +1469,12 @@ void HELPER(NAME)(void *vd, void *vn, void *vm, \
DO_3OP(gvec_fadd_h, float16_add, float16)
DO_3OP(gvec_fadd_s, float32_add, float32)
DO_3OP(gvec_fadd_d, float64_add, float64)
+DO_3OP(gvec_bfadd, bfloat16_add, bfloat16)
DO_3OP(gvec_fsub_h, float16_sub, float16)
DO_3OP(gvec_fsub_s, float32_sub, float32)
DO_3OP(gvec_fsub_d, float64_sub, float64)
+DO_3OP(gvec_bfsub, bfloat16_sub, bfloat16)
DO_3OP(gvec_fmul_h, float16_mul, float16)
DO_3OP(gvec_fmul_s, float32_mul, float32)
@@ -515,6 +515,31 @@ FMLS_nn_h 11000001 101 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3
FMLS_nn_s 11000001 101 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3
FMLS_nn_d 11000001 111 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3
+&az_n n off rv zm
+@az_2x2_o3 ........ ... ..... . .. ... ..... .. off:3 \
+ &az_n n=2 rv=%mova_rv zm=%zn_ax2
+@az_4x4_o3 ........ ... ..... . .. ... ..... .. off:3 \
+ &az_n n=4 rv=%mova_rv zm=%zn_ax4
+
+FADD_nn_h 11000001 101 00100 0 .. 111 ....0 00 ... @az_2x2_o3
+FADD_nn_s 11000001 101 00000 0 .. 111 ....0 00 ... @az_2x2_o3
+FADD_nn_d 11000001 111 00000 0 .. 111 ....0 00 ... @az_2x2_o3
+FADD_nn_h 11000001 101 00101 0 .. 111 ...00 00 ... @az_4x4_o3
+FADD_nn_s 11000001 101 00001 0 .. 111 ...00 00 ... @az_4x4_o3
+FADD_nn_d 11000001 111 00001 0 .. 111 ...00 00 ... @az_4x4_o3
+
+FSUB_nn_h 11000001 101 00100 0 .. 111 ....0 01 ... @az_2x2_o3
+FSUB_nn_s 11000001 101 00000 0 .. 111 ....0 01 ... @az_2x2_o3
+FSUB_nn_d 11000001 111 00000 0 .. 111 ....0 01 ... @az_2x2_o3
+FSUB_nn_h 11000001 101 00101 0 .. 111 ...00 01 ... @az_4x4_o3
+FSUB_nn_s 11000001 101 00001 0 .. 111 ...00 01 ... @az_4x4_o3
+FSUB_nn_d 11000001 111 00001 0 .. 111 ...00 01 ... @az_4x4_o3
+
+BFADD_nn 11000001 111 00100 0 .. 111 ....0 00 ... @az_2x2_o3
+BFADD_nn 11000001 111 00101 0 .. 111 ...00 00 ... @az_4x4_o3
+BFSUB_nn 11000001 111 00100 0 .. 111 ....0 01 ... @az_2x2_o3
+BFSUB_nn 11000001 111 00101 0 .. 111 ...00 01 ... @az_4x4_o3
+
### SME2 Multi-vector Indexed
&azx_n n off rv zn zm idx
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/helper.h | 2 ++ target/arm/tcg/translate-sme.c | 44 ++++++++++++++++++++++++++++++++++ target/arm/tcg/vec_helper.c | 2 ++ target/arm/tcg/sme.decode | 25 +++++++++++++++++++ 4 files changed, 73 insertions(+)