@@ -692,6 +692,7 @@ static gen_helper_gvec_3_ptr * const f_vector_fminnm[4] = {
TRANS_FEAT(FMINNM_n1, aa64_sme2, do_z2z_n1_fpst, a, f_vector_fminnm)
TRANS_FEAT(FMINNM_nn, aa64_sme2, do_z2z_nn_fpst, a, f_vector_fminnm)
+/* Add/Sub vector Z[m] to each Z[n*N] with result in ZA[d*N]. */
static bool do_azz_n1(DisasContext *s, arg_azz_n *a, int esz,
GVecGen3FnVar *fn)
{
@@ -720,3 +721,33 @@ TRANS_FEAT(ADD_azz_n1_s, aa64_sme2, do_azz_n1, a, MO_32, tcg_gen_gvec_add_var)
TRANS_FEAT(SUB_azz_n1_s, aa64_sme2, do_azz_n1, a, MO_32, tcg_gen_gvec_sub_var)
TRANS_FEAT(ADD_azz_n1_d, aa64_sme2_i16i64, do_azz_n1, a, MO_64, tcg_gen_gvec_add_var)
TRANS_FEAT(SUB_azz_n1_d, aa64_sme2_i16i64, do_azz_n1, a, MO_64, tcg_gen_gvec_sub_var)
+
+/* Add/Sub each vector Z[m*N] to each Z[n*N] with result in ZA[d*N]. */
+static bool do_azz_nn(DisasContext *s, arg_azz_n *a, int esz,
+ GVecGen3FnVar *fn)
+{
+ TCGv_ptr t_za;
+ int svl, n;
+
+ if (!sme_smza_enabled_check(s)) {
+ return true;
+ }
+
+ n = a->n;
+ t_za = get_zarray(s, a->rv, a->off, n);
+ svl = streaming_vec_reg_size(s);
+
+ for (int i = 0; i < n; ++i) {
+ int o_za = (svl / n * sizeof(ARMVectorReg)) * i;
+ int o_zn = vec_full_reg_offset(s, a->zn + i);
+ int o_zm = vec_full_reg_offset(s, a->zm + i);
+
+ fn(esz, t_za, o_za, tcg_env, o_zn, tcg_env, o_zm, svl, svl);
+ }
+ return true;
+}
+
+TRANS_FEAT(ADD_azz_nn_s, aa64_sme2, do_azz_nn, a, MO_32, tcg_gen_gvec_add_var)
+TRANS_FEAT(SUB_azz_nn_s, aa64_sme2, do_azz_nn, a, MO_32, tcg_gen_gvec_sub_var)
+TRANS_FEAT(ADD_azz_nn_d, aa64_sme2_i16i64, do_azz_nn, a, MO_64, tcg_gen_gvec_add_var)
+TRANS_FEAT(SUB_azz_nn_d, aa64_sme2_i16i64, do_azz_nn, a, MO_64, tcg_gen_gvec_sub_var)
@@ -260,3 +260,23 @@ SUB_azz_n1_s 11000001 0010 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=2
SUB_azz_n1_s 11000001 0011 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=4
SUB_azz_n1_d 11000001 0110 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=2
SUB_azz_n1_d 11000001 0111 .... 0 .. 110 ..... 11 ... @azz_nx1_o3 n=4
+
+### SME2 Multi-vector Multiple Array Vectors
+
+%zn_ax2 6:4 !function=times_2
+%zn_ax4 7:3 !function=times_4
+
+@azz_2x2_o3 ........ ... ..... . .. ... ..... .. off:3 \
+ &azz_n n=2 rv=%mova_rv zn=%zn_ax2 zm=%zm_ax2
+@azz_4x4_o3 ........ ... ..... . .. ... ..... .. off:3 \
+ &azz_n n=4 rv=%mova_rv zn=%zn_ax4 zm=%zm_ax4
+
+ADD_azz_nn_s 11000001 101 ....0 0 .. 110 ....0 10 ... @azz_2x2_o3
+ADD_azz_nn_s 11000001 101 ...01 0 .. 110 ...00 10 ... @azz_4x4_o3
+ADD_azz_nn_d 11000001 111 ....0 0 .. 110 ....0 10 ... @azz_2x2_o3
+ADD_azz_nn_d 11000001 111 ...01 0 .. 110 ...00 10 ... @azz_4x4_o3
+
+SUB_azz_nn_s 11000001 101 ....0 0 .. 110 ....0 11 ... @azz_2x2_o3
+SUB_azz_nn_s 11000001 101 ...01 0 .. 110 ...00 11 ... @azz_4x4_o3
+SUB_azz_nn_d 11000001 111 ....0 0 .. 110 ....0 11 ... @azz_2x2_o3
+SUB_azz_nn_d 11000001 111 ...01 0 .. 110 ...00 11 ... @azz_4x4_o3
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/translate-sme.c | 31 +++++++++++++++++++++++++++++++ target/arm/tcg/sme.decode | 20 ++++++++++++++++++++ 2 files changed, 51 insertions(+)