@@ -761,6 +761,47 @@ TRANS_FEAT(SUB_azz_nn_d, aa64_sme2_i16i64, do_azz_nn, a, MO_64, tcg_gen_gvec_sub
*/
#define FPST_ENV -1
+static bool do_azz_fp(DisasContext *s, int nreg, int nsel,
+ int rv, int off, int zn, int zm,
+ int data, int shsel, bool multi, int fpst,
+ gen_helper_gvec_3_ptr *fn)
+{
+ if (sme_sm_enabled_check(s)) {
+ int svl = streaming_vec_reg_size(s);
+ int vstride = svl / nreg;
+ TCGv_ptr t_za = get_zarray(s, rv, off, nreg);
+ TCGv_ptr t, ptr;
+
+ if (fpst >= 0) {
+ ptr = fpstatus_ptr(fpst);
+ } else {
+ ptr = tcg_env;
+ }
+ t = tcg_temp_new_ptr();
+
+ for (int r = 0; r < nreg; ++r) {
+ TCGv_ptr t_zn = vec_full_reg_ptr(s, zn);
+ TCGv_ptr t_zm = vec_full_reg_ptr(s, zm);
+
+ for (int i = 0; i < nsel; ++i) {
+ int o_za = (r * vstride + i) * sizeof(ARMVectorReg);
+ int desc = simd_desc(svl, svl, data | (i << shsel));
+
+ tcg_gen_addi_ptr(t, t_za, o_za);
+ fn(t, t_zn, t_zm, ptr, tcg_constant_i32(desc));
+ }
+
+ /*
+ * For multiple-and-single vectors, Zn may wrap.
+ * For multiple vectors, both Zn and Zm are aligned.
+ */
+ zn = (zn + 1) % 32;
+ zm += multi;
+ }
+ }
+ return true;
+}
+
static bool do_azz_acc_fp(DisasContext *s, int nreg, int nsel,
int rv, int off, int zn, int zm,
int data, int shsel, bool multi, int fpst,
@@ -896,6 +937,60 @@ static bool trans_BFVDOT(DisasContext *s, arg_azx_n *a)
gen_helper_sme2_bfvdot_idx);
}
+static bool do_fmla(DisasContext *s, arg_azz_n *a, bool multi,
+ ARMFPStatusFlavour fpst, gen_helper_gvec_3_ptr *fn)
+{
+ return do_azz_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm,
+ 0, 0, multi, fpst, fn);
+}
+
+TRANS_FEAT(FMLA_n1_h, aa64_sme2_f16f16, do_fmla, a, false, FPST_ZA_F16,
+ gen_helper_gvec_vfma_h)
+TRANS_FEAT(FMLS_n1_h, aa64_sme2_f16f16, do_fmla, a, false, FPST_ZA_F16,
+ s->fpcr_ah ? gen_helper_gvec_ah_vfms_h : gen_helper_gvec_vfms_h)
+TRANS_FEAT(FMLA_nn_h, aa64_sme2_f16f16, do_fmla, a, true, FPST_ZA_F16,
+ gen_helper_gvec_vfma_h)
+TRANS_FEAT(FMLS_nn_h, aa64_sme2_f16f16, do_fmla, a, true, FPST_ZA_F16,
+ s->fpcr_ah ? gen_helper_gvec_ah_vfms_h : gen_helper_gvec_vfms_h)
+
+TRANS_FEAT(FMLA_n1_s, aa64_sme2, do_fmla, a, false, FPST_ZA,
+ gen_helper_gvec_vfma_s)
+TRANS_FEAT(FMLS_n1_s, aa64_sme2, do_fmla, a, false, FPST_ZA,
+ s->fpcr_ah ? gen_helper_gvec_ah_vfms_s : gen_helper_gvec_vfms_s)
+TRANS_FEAT(FMLA_nn_s, aa64_sme2, do_fmla, a, true, FPST_ZA,
+ gen_helper_gvec_vfma_s)
+TRANS_FEAT(FMLS_nn_s, aa64_sme2, do_fmla, a, true, FPST_ZA,
+ s->fpcr_ah ? gen_helper_gvec_ah_vfms_s : gen_helper_gvec_vfms_s)
+
+TRANS_FEAT(FMLA_n1_d, aa64_sme2_f64f64, do_fmla, a, false, FPST_ZA,
+ gen_helper_gvec_vfma_d)
+TRANS_FEAT(FMLS_n1_d, aa64_sme2_f64f64, do_fmla, a, false, FPST_ZA,
+ s->fpcr_ah ? gen_helper_gvec_ah_vfms_d : gen_helper_gvec_vfms_d)
+TRANS_FEAT(FMLA_nn_d, aa64_sme2_f64f64, do_fmla, a, true, FPST_ZA,
+ gen_helper_gvec_vfma_d)
+TRANS_FEAT(FMLS_nn_d, aa64_sme2_f64f64, do_fmla, a, true, FPST_ZA,
+ s->fpcr_ah ? gen_helper_gvec_ah_vfms_d : gen_helper_gvec_vfms_d)
+
+static bool do_fmla_nx(DisasContext *s, arg_azx_n *a,
+ ARMFPStatusFlavour fpst, gen_helper_gvec_4_ptr *fn)
+{
+ return do_azz_acc_fp(s, a->n, 1, a->rv, a->off, a->zn, a->zm,
+ a->idx, 0, false, fpst, fn);
+}
+
+TRANS_FEAT(FMLA_nx_h, aa64_sme2_f16f16, do_fmla_nx, a, FPST_ZA_F16,
+ gen_helper_gvec_fmla_idx_h)
+TRANS_FEAT(FMLS_nx_h, aa64_sme2_f16f16, do_fmla_nx, a, FPST_ZA_F16,
+ s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_h : gen_helper_gvec_fmls_idx_h)
+TRANS_FEAT(FMLA_nx_s, aa64_sme2, do_fmla_nx, a, FPST_ZA,
+ gen_helper_gvec_fmla_idx_s)
+TRANS_FEAT(FMLS_nx_s, aa64_sme2, do_fmla_nx, a, FPST_ZA,
+ s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_s : gen_helper_gvec_fmls_idx_s)
+TRANS_FEAT(FMLA_nx_d, aa64_sme2_f64f64, do_fmla_nx, a, FPST_ZA,
+ gen_helper_gvec_fmla_idx_d)
+TRANS_FEAT(FMLS_nx_d, aa64_sme2_f64f64, do_fmla_nx, a, FPST_ZA,
+ s->fpcr_ah ? gen_helper_gvec_ah_fmls_idx_d : gen_helper_gvec_fmls_idx_d)
+
/*
* Expand array multi-vector single (n1), array multi-vector (nn),
* and array multi-vector indexed (nx), for integer accumulate.
@@ -373,6 +373,20 @@ SUMLALL_n1_d 11000001 011 0 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=2
SUMLALL_n1_s 11000001 001 1 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=4
SUMLALL_n1_d 11000001 011 1 .... 0 .. 000 ..... 1010 . @azz_nx1_o1x4 n=4
+FMLA_n1_h 11000001 001 0 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=2
+FMLA_n1_s 11000001 001 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2
+FMLA_n1_d 11000001 011 0 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=2
+FMLA_n1_h 11000001 001 1 .... 0 .. 111 ..... 00 ... @azz_nx1_o3 n=4
+FMLA_n1_s 11000001 001 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4
+FMLA_n1_d 11000001 011 1 .... 0 .. 110 ..... 00 ... @azz_nx1_o3 n=4
+
+FMLS_n1_h 11000001 001 0 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=2
+FMLS_n1_s 11000001 001 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2
+FMLS_n1_d 11000001 011 0 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=2
+FMLS_n1_h 11000001 001 1 .... 0 .. 111 ..... 01 ... @azz_nx1_o3 n=4
+FMLS_n1_s 11000001 001 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4
+FMLS_n1_d 11000001 011 1 .... 0 .. 110 ..... 01 ... @azz_nx1_o3 n=4
+
### SME2 Multi-vector Multiple Array Vectors
%zn_ax2 6:4 !function=times_2
@@ -475,6 +489,20 @@ USMLALL_nn_d 11000001 111 ....0 0 .. 000 ....0 1010 . @azz_2x2_o1x4
USMLALL_nn_s 11000001 101 ...01 0 .. 000 ...00 1010 . @azz_4x4_o1x4
USMLALL_nn_d 11000001 111 ...01 0 .. 000 ...00 1010 . @azz_4x4_o1x4
+FMLA_nn_h 11000001 101 ....0 0 .. 100 ....0 01 ... @azz_2x2_o3
+FMLA_nn_s 11000001 101 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3
+FMLA_nn_d 11000001 111 ....0 0 .. 110 ....0 00 ... @azz_2x2_o3
+FMLA_nn_h 11000001 101 ...01 0 .. 100 ...00 01 ... @azz_4x4_o3
+FMLA_nn_s 11000001 101 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3
+FMLA_nn_d 11000001 111 ...01 0 .. 110 ...00 00 ... @azz_4x4_o3
+
+FMLS_nn_h 11000001 101 ....0 0 .. 100 ....0 11 ... @azz_2x2_o3
+FMLS_nn_s 11000001 101 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3
+FMLS_nn_d 11000001 111 ....0 0 .. 110 ....0 01 ... @azz_2x2_o3
+FMLS_nn_h 11000001 101 ...01 0 .. 100 ...00 11 ... @azz_4x4_o3
+FMLS_nn_s 11000001 101 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3
+FMLS_nn_d 11000001 111 ...01 0 .. 110 ...00 01 ... @azz_4x4_o3
+
### SME2 Multi-vector Indexed
&azx_n n off rv zn zm idx
@@ -628,3 +656,23 @@ SUMLALL_nx_s 11000001 0001 .... 0 .. 0.. ....1 10 ... @azx_2x1_i4_o1
SUMLALL_nx_d 11000001 1001 .... 0 .. 00. ....1 10 ... @azx_2x1_i3_o1
SUMLALL_nx_s 11000001 0001 .... 1 .. 0.. ...01 10 ... @azx_4x1_i4_o1
SUMLALL_nx_d 11000001 1001 .... 1 .. 00. ...01 10 ... @azx_4x1_i3_o1
+
+%idx3_10_3 10:2 3:1
+@azx_2x1_i3_o3 ........ .... zm:4 . .. ... ..... .. off:3 \
+ &azx_n n=2 rv=%mova_rv zn=%zn_ax2 idx=%idx3_10_3
+@azx_4x1_i3_o3 ........ .... zm:4 . .. ... ..... .. off:3 \
+ &azx_n n=4 rv=%mova_rv zn=%zn_ax4 idx=%idx3_10_3
+
+FMLA_nx_h 11000001 0001 .... 0 .. 1.. ....0 0 .... @azx_2x1_i3_o3
+FMLA_nx_s 11000001 0101 .... 0 .. 0.. ....0 00 ... @azx_2x1_i2_o3
+FMLA_nx_d 11000001 1101 .... 0 .. 00. ....0 00 ... @azx_2x1_i1_o3
+FMLA_nx_h 11000001 0001 .... 1 .. 1.. ...00 0 .... @azx_4x1_i3_o3
+FMLA_nx_s 11000001 0101 .... 1 .. 0.. ...00 00 ... @azx_4x1_i2_o3
+FMLA_nx_d 11000001 1101 .... 1 .. 00. ...00 00 ... @azx_4x1_i1_o3
+
+FMLS_nx_h 11000001 0001 .... 0 .. 1.. ....0 1 .... @azx_2x1_i3_o3
+FMLS_nx_s 11000001 0101 .... 0 .. 0.. ....0 10 ... @azx_2x1_i2_o3
+FMLS_nx_d 11000001 1101 .... 0 .. 00. ....0 10 ... @azx_2x1_i1_o3
+FMLS_nx_h 11000001 0001 .... 1 .. 1.. ...00 1 .... @azx_4x1_i3_o3
+FMLS_nx_s 11000001 0101 .... 1 .. 0.. ...00 10 ... @azx_4x1_i2_o3
+FMLS_nx_d 11000001 1101 .... 1 .. 00. ...00 10 ... @azx_4x1_i1_o3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/translate-sme.c | 95 ++++++++++++++++++++++++++++++++++ target/arm/tcg/sme.decode | 48 +++++++++++++++++ 2 files changed, 143 insertions(+)