@@ -42,6 +42,12 @@ DEF_HELPER_FLAGS_3(sme2_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sme2_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sme2_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2p1_movaz_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
@@ -250,6 +250,66 @@ void HELPER(sme2_mova_zc_d)(void *vdst, void *vsrc, uint32_t desc)
}
}
+void HELPER(sme2p1_movaz_zc_b)(void *vdst, void *vsrc, uint32_t desc)
+{
+ uint8_t *src = vsrc;
+ uint8_t *dst = vdst;
+ size_t i, n = simd_oprsz(desc);
+
+ for (i = 0; i < n; ++i) {
+ dst[i] = src[tile_vslice_index(i)];
+ src[tile_vslice_index(i)] = 0;
+ }
+}
+
+void HELPER(sme2p1_movaz_zc_h)(void *vdst, void *vsrc, uint32_t desc)
+{
+ uint16_t *src = vsrc;
+ uint16_t *dst = vdst;
+ size_t i, n = simd_oprsz(desc) / 2;
+
+ for (i = 0; i < n; ++i) {
+ dst[i] = src[tile_vslice_index(i)];
+ src[tile_vslice_index(i)] = 0;
+ }
+}
+
+void HELPER(sme2p1_movaz_zc_s)(void *vdst, void *vsrc, uint32_t desc)
+{
+ uint32_t *src = vsrc;
+ uint32_t *dst = vdst;
+ size_t i, n = simd_oprsz(desc) / 4;
+
+ for (i = 0; i < n; ++i) {
+ dst[i] = src[tile_vslice_index(i)];
+ src[tile_vslice_index(i)] = 0;
+ }
+}
+
+void HELPER(sme2p1_movaz_zc_d)(void *vdst, void *vsrc, uint32_t desc)
+{
+ uint64_t *src = vsrc;
+ uint64_t *dst = vdst;
+ size_t i, n = simd_oprsz(desc) / 8;
+
+ for (i = 0; i < n; ++i) {
+ dst[i] = src[tile_vslice_index(i)];
+ src[tile_vslice_index(i)] = 0;
+ }
+}
+
+void HELPER(sme2p1_movaz_zc_q)(void *vdst, void *vsrc, uint32_t desc)
+{
+ Int128 *src = vsrc;
+ Int128 *dst = vdst;
+ size_t i, n = simd_oprsz(desc) / 16;
+
+ for (i = 0; i < n; ++i) {
+ dst[i] = src[tile_vslice_index(i)];
+ memset(&src[tile_vslice_index(i)], 0, 16);
+ }
+}
+
/*
* Clear elements in a tile slice comprising len bytes.
*/
@@ -232,7 +232,8 @@ static bool do_mova_tile(DisasContext *s, arg_mova_p *a, bool to_vec)
TRANS_FEAT(MOVA_tz, aa64_sme, do_mova_tile, a, false)
TRANS_FEAT(MOVA_zt, aa64_sme, do_mova_tile, a, true)
-static bool do_mova_tile_n(DisasContext *s, arg_mova_t *a, int n, bool to_vec)
+static bool do_mova_tile_n(DisasContext *s, arg_mova_t *a, int n,
+ bool to_vec, bool zero)
{
static gen_helper_gvec_2 * const cz_fns[] = {
gen_helper_sme2_mova_cz_b, gen_helper_sme2_mova_cz_h,
@@ -242,9 +243,16 @@ static bool do_mova_tile_n(DisasContext *s, arg_mova_t *a, int n, bool to_vec)
gen_helper_sme2_mova_zc_b, gen_helper_sme2_mova_zc_h,
gen_helper_sme2_mova_zc_s, gen_helper_sme2_mova_zc_d,
};
+ static gen_helper_gvec_2 * const zc_z_fns[] = {
+ gen_helper_sme2p1_movaz_zc_b, gen_helper_sme2p1_movaz_zc_h,
+ gen_helper_sme2p1_movaz_zc_s, gen_helper_sme2p1_movaz_zc_d,
+ gen_helper_sme2p1_movaz_zc_q,
+ };
TCGv_ptr t_za;
int svl;
+ assert(a->esz <= MO_64 + zero);
+
if (!sme_smza_enabled_check(s)) {
return true;
}
@@ -262,7 +270,9 @@ static bool do_mova_tile_n(DisasContext *s, arg_mova_t *a, int n, bool to_vec)
TCGv_ptr t_zr = vec_full_reg_ptr(s, a->zr * n + i);
t_za = get_tile_rowcol(s, a->esz, a->rs, a->za,
a->off * n + i, 1, a->v);
- if (to_vec) {
+ if (zero) {
+ zc_z_fns[a->esz](t_zr, t_za, t_desc);
+ } else if (to_vec) {
zc_fns[a->esz](t_zr, t_za, t_desc);
} else {
cz_fns[a->esz](t_za, t_zr, t_desc);
@@ -275,6 +285,9 @@ static bool do_mova_tile_n(DisasContext *s, arg_mova_t *a, int n, bool to_vec)
a->off * n + i, 1, a->v);
if (to_vec) {
tcg_gen_gvec_mov_var(MO_8, tcg_env, o_zr, t_za, 0, svl, svl);
+ if (zero) {
+ tcg_gen_gvec_dup_imm_var(MO_8, t_za, 0, svl, svl, 0);
+ }
} else {
tcg_gen_gvec_mov_var(MO_8, t_za, 0, tcg_env, o_zr, svl, svl);
}
@@ -283,12 +296,17 @@ static bool do_mova_tile_n(DisasContext *s, arg_mova_t *a, int n, bool to_vec)
return true;
}
-TRANS_FEAT(MOVA_tz2, aa64_sme2, do_mova_tile_n, a, 2, false)
-TRANS_FEAT(MOVA_tz4, aa64_sme2, do_mova_tile_n, a, 4, false)
-TRANS_FEAT(MOVA_zt2, aa64_sme2, do_mova_tile_n, a, 2, true)
-TRANS_FEAT(MOVA_zt4, aa64_sme2, do_mova_tile_n, a, 4, true)
+TRANS_FEAT(MOVA_tz2, aa64_sme2, do_mova_tile_n, a, 2, false, false)
+TRANS_FEAT(MOVA_tz4, aa64_sme2, do_mova_tile_n, a, 4, false, false)
+TRANS_FEAT(MOVA_zt2, aa64_sme2, do_mova_tile_n, a, 2, true, false)
+TRANS_FEAT(MOVA_zt4, aa64_sme2, do_mova_tile_n, a, 4, true, false)
-static bool do_mova_array_n(DisasContext *s, arg_mova_a *a, int n, bool to_vec)
+TRANS_FEAT(MOVAZ_zt, aa64_sme2p1, do_mova_tile_n, a, 1, true, true)
+TRANS_FEAT(MOVAZ_zt2, aa64_sme2p1, do_mova_tile_n, a, 2, true, true)
+TRANS_FEAT(MOVAZ_zt4, aa64_sme2p1, do_mova_tile_n, a, 4, true, true)
+
+static bool do_mova_array_n(DisasContext *s, arg_mova_a *a, int n,
+ bool to_vec, bool zero)
{
TCGv_ptr t_za;
int svl;
@@ -306,6 +324,9 @@ static bool do_mova_array_n(DisasContext *s, arg_mova_a *a, int n, bool to_vec)
if (to_vec) {
tcg_gen_gvec_mov_var(MO_8, tcg_env, o_zr, t_za, o_za, svl, svl);
+ if (zero) {
+ tcg_gen_gvec_dup_imm_var(MO_8, t_za, o_za, svl, svl, 0);
+ }
} else {
tcg_gen_gvec_mov_var(MO_8, t_za, o_za, tcg_env, o_zr, svl, svl);
}
@@ -313,10 +334,13 @@ static bool do_mova_array_n(DisasContext *s, arg_mova_a *a, int n, bool to_vec)
return true;
}
-TRANS_FEAT(MOVA_az2, aa64_sme2, do_mova_array_n, a, 2, false)
-TRANS_FEAT(MOVA_az4, aa64_sme2, do_mova_array_n, a, 4, false)
-TRANS_FEAT(MOVA_za2, aa64_sme2, do_mova_array_n, a, 2, true)
-TRANS_FEAT(MOVA_za4, aa64_sme2, do_mova_array_n, a, 4, true)
+TRANS_FEAT(MOVA_az2, aa64_sme2, do_mova_array_n, a, 2, false, false)
+TRANS_FEAT(MOVA_az4, aa64_sme2, do_mova_array_n, a, 4, false, false)
+TRANS_FEAT(MOVA_za2, aa64_sme2, do_mova_array_n, a, 2, true, false)
+TRANS_FEAT(MOVA_za4, aa64_sme2, do_mova_array_n, a, 4, true, false)
+
+TRANS_FEAT(MOVAZ_za2, aa64_sme2p1, do_mova_array_n, a, 2, true, true)
+TRANS_FEAT(MOVAZ_za4, aa64_sme2p1, do_mova_array_n, a, 4, true, true)
static bool do_movt(DisasContext *s, arg_MOVT_rzt *a,
void (*func)(TCGv_i64, TCGv_ptr, tcg_target_long))
@@ -100,6 +100,42 @@ MOVA_za2 11000000 00 00011 00 .. 010 00 off:3 zr:4 0 \
MOVA_za4 11000000 00 00011 00 .. 011 00 off:3 zr:3 00 \
&mova_a rv=%mova_rv
+### SME Move and Zero
+
+MOVAZ_za2 11000000 00000110 0 .. 01010 off:3 zr:4 0 \
+ &mova_a rv=%mova_rv
+MOVAZ_za4 11000000 00000110 0 .. 01110 off:3 zr:3 00 \
+ &mova_a rv=%mova_rv
+
+MOVAZ_zt 11000000 00 00001 0 v:1 .. 0001 off:4 zr:5 \
+ &mova_t rs=%mova_rs esz=0 za=0
+MOVAZ_zt 11000000 01 00001 0 v:1 .. 0001 za:1 off:3 zr:5 \
+ &mova_t rs=%mova_rs esz=1
+MOVAZ_zt 11000000 10 00001 0 v:1 .. 0001 za:2 off:2 zr:5 \
+ &mova_t rs=%mova_rs esz=2
+MOVAZ_zt 11000000 11 00001 0 v:1 .. 0001 za:3 off:1 zr:5 \
+ &mova_t rs=%mova_rs esz=3
+MOVAZ_zt 11000000 11 00001 1 v:1 .. 0001 za:4 zr:5 \
+ &mova_t rs=%mova_rs esz=4 off=0
+
+MOVAZ_zt2 11000000 00 00011 0 v:1 .. 00010 off:3 zr:4 0 \
+ &mova_t rs=%mova_rs esz=0 za=0
+MOVAZ_zt2 11000000 01 00011 0 v:1 .. 00010 za:1 off:2 zr:4 0 \
+ &mova_t rs=%mova_rs esz=1
+MOVAZ_zt2 11000000 10 00011 0 v:1 .. 00010 za:2 off:1 zr:4 0 \
+ &mova_t rs=%mova_rs esz=2
+MOVAZ_zt2 11000000 11 00011 0 v:1 .. 00010 za:3 zr:4 0 \
+ &mova_t rs=%mova_rs esz=3 off=0
+
+MOVAZ_zt4 11000000 00 00011 0 v:1 .. 001100 off:2 zr:3 00 \
+ &mova_t rs=%mova_rs esz=0 za=0
+MOVAZ_zt4 11000000 01 00011 0 v:1 .. 001100 za:1 off:1 zr:3 00 \
+ &mova_t rs=%mova_rs esz=1
+MOVAZ_zt4 11000000 10 00011 0 v:1 .. 001100 za:2 zr:3 00 \
+ &mova_t rs=%mova_rs esz=2 off=0
+MOVAZ_zt4 11000000 11 00011 0 v:1 .. 00110 za:3 zr:3 00 \
+ &mova_t rs=%mova_rs esz=3 off=0
+
### SME Move into/from ZT0
MOVT_rzt 1100 0000 0100 1100 0 off:3 00 11111 rt:5
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/helper-sme.h | 6 ++++ target/arm/tcg/sme_helper.c | 60 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-sme.c | 46 +++++++++++++++++++------- target/arm/tcg/sme.decode | 36 ++++++++++++++++++++ 4 files changed, 137 insertions(+), 11 deletions(-)