@@ -238,3 +238,16 @@ DEF_HELPER_FLAGS_3(sme2_sqcvtun_sb, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sme2_sqcvtn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sme2_uqcvtn_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sme2_sqcvtun_dh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sme2_sunpk2_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_sunpk2_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_sunpk2_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_sunpk4_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_sunpk4_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_sunpk4_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_uunpk2_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_uunpk2_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_uunpk2_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_uunpk4_bh, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_uunpk4_hs, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sme2_uunpk4_sd, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
@@ -1676,6 +1676,44 @@ void HELPER(sme2_fcvt_w)(void *vd, void *vs, float_status *fpst, uint32_t desc)
}
}
+#define UNPK(NAME, SREG, TW, TN, HW, HN) \
+void HELPER(NAME)(void *vd, void *vs, uint32_t desc) \
+{ \
+ ARMVectorReg scratch[SREG] __attribute__((uninitialized)); \
+ size_t oprsz = simd_oprsz(desc); \
+ size_t n = oprsz / sizeof(TW); \
+ if ((vs - vd) < 2 * SREG * sizeof(ARMVectorReg)) { \
+ vs = memcpy(scratch, vs, sizeof(scratch)); \
+ } \
+ for (size_t r = 0; r < SREG; ++r) { \
+ TN *s = vs + r * sizeof(ARMVectorReg); \
+ for (size_t i = 0; i < 2; ++i) { \
+ TW *d = vd + (2 * r + i) * sizeof(ARMVectorReg); \
+ for (size_t e = 0; e < n; ++e) { \
+ d[HW(e)] = s[i * n + HN(e)]; \
+ } \
+ } \
+ } \
+}
+
+UNPK(sme2_sunpk2_bh, 1, int16_t, int8_t, H2, H1)
+UNPK(sme2_sunpk2_hs, 1, int32_t, int16_t, H4, H2)
+UNPK(sme2_sunpk2_sd, 1, int64_t, int32_t, H8, H4)
+
+UNPK(sme2_sunpk4_bh, 2, int16_t, int8_t, H2, H1)
+UNPK(sme2_sunpk4_hs, 2, int32_t, int16_t, H4, H2)
+UNPK(sme2_sunpk4_sd, 2, int64_t, int32_t, H8, H4)
+
+UNPK(sme2_uunpk2_bh, 1, uint16_t, uint8_t, H2, H1)
+UNPK(sme2_uunpk2_hs, 1, uint32_t, uint16_t, H4, H2)
+UNPK(sme2_uunpk2_sd, 1, uint64_t, uint32_t, H8, H4)
+
+UNPK(sme2_uunpk4_bh, 2, uint16_t, uint8_t, H2, H1)
+UNPK(sme2_uunpk4_hs, 2, uint32_t, uint16_t, H4, H2)
+UNPK(sme2_uunpk4_sd, 2, uint64_t, uint32_t, H8, H4)
+
+#undef UNPK
+
/* Deinterleave and convert. */
void HELPER(sme2_fcvtl)(void *vd, void *vs, float_status *fpst, uint32_t desc)
{
@@ -1347,3 +1347,19 @@ TRANS_FEAT(SQCVTUN_sb, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtun_sb)
TRANS_FEAT(SQCVTN_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtn_dh)
TRANS_FEAT(UQCVTN_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uqcvtn_dh)
TRANS_FEAT(SQCVTUN_dh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sqcvtun_dh)
+
+TRANS_FEAT(SUNPK_2bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk2_bh)
+TRANS_FEAT(SUNPK_2hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk2_hs)
+TRANS_FEAT(SUNPK_2sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk2_sd)
+
+TRANS_FEAT(SUNPK_4bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk4_bh)
+TRANS_FEAT(SUNPK_4hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk4_hs)
+TRANS_FEAT(SUNPK_4sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_sunpk4_sd)
+
+TRANS_FEAT(UUNPK_2bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk2_bh)
+TRANS_FEAT(UUNPK_2hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk2_hs)
+TRANS_FEAT(UUNPK_2sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk2_sd)
+
+TRANS_FEAT(UUNPK_4bh, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk4_bh)
+TRANS_FEAT(UUNPK_4hs, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk4_hs)
+TRANS_FEAT(UUNPK_4sd, aa64_sme2, do_zz, a, 0, gen_helper_sme2_uunpk4_sd)
@@ -733,6 +733,8 @@ FMLS_nx_d 11000001 1101 .... 1 .. 00. ...00 10 ... @azx_4x1_i1_o3
&zz_n n=2 zd=%zd_ax2 zn=%zn_ax2
@zz_4x4 ........ ... ..... ...... .... . ..... \
&zz_n n=4 zd=%zd_ax4 zn=%zn_ax4
+@zz_4x2_n1 ........ ... ..... ...... .... . ..... \
+ &zz_n n=1 zd=%zd_ax4 zn=%zn_ax2
BFCVT 11000001 011 00000 111000 ....0 ..... @zz_1x2
BFCVTN 11000001 011 00000 111000 ....1 ..... @zz_1x2
@@ -781,3 +783,19 @@ SQCVTUN_sb 11000001 011 10011 111000 ...10 ..... @zz_1x4
SQCVTN_dh 11000001 101 10011 111000 ...10 ..... @zz_1x4
UQCVTN_dh 11000001 101 10011 111000 ...11 ..... @zz_1x4
SQCVTUN_dh 11000001 111 10011 111000 ...10 ..... @zz_1x4
+
+SUNPK_2bh 11000001 011 00101 111000 ..... ....0 @zz_2x1
+SUNPK_2hs 11000001 101 00101 111000 ..... ....0 @zz_2x1
+SUNPK_2sd 11000001 111 00101 111000 ..... ....0 @zz_2x1
+
+UUNPK_2bh 11000001 011 00101 111000 ..... ....1 @zz_2x1
+UUNPK_2hs 11000001 101 00101 111000 ..... ....1 @zz_2x1
+UUNPK_2sd 11000001 111 00101 111000 ..... ....1 @zz_2x1
+
+SUNPK_4bh 11000001 011 10101 111000 ....0 ...00 @zz_4x2_n1
+SUNPK_4hs 11000001 101 10101 111000 ....0 ...00 @zz_4x2_n1
+SUNPK_4sd 11000001 111 10101 111000 ....0 ...00 @zz_4x2_n1
+
+UUNPK_4bh 11000001 011 10101 111000 ....0 ...01 @zz_4x2_n1
+UUNPK_4hs 11000001 101 10101 111000 ....0 ...01 @zz_4x2_n1
+UUNPK_4sd 11000001 111 10101 111000 ....0 ...01 @zz_4x2_n1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/helper-sme.h | 13 ++++++++++++ target/arm/tcg/sme_helper.c | 38 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-sme.c | 16 ++++++++++++++ target/arm/tcg/sme.decode | 18 ++++++++++++++++ 4 files changed, 85 insertions(+)