@@ -701,12 +701,22 @@ DEF_HELPER_FLAGS_4(sve_zip_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_zip_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_zip_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_zipq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_zipq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_zipq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_zipq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(sve_uzp_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_uzp_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_uzp_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_uzpq_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_uzpq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_uzpq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sve2p1_uzpq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(sve_trn_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_trn_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_trn_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
@@ -3817,6 +3817,35 @@ DO_UZP(sve_uzp_s, uint32_t, H1_4)
DO_UZP(sve_uzp_d, uint64_t, H1_8)
DO_UZP(sve2_uzp_q, Int128, )
+typedef void perseg_zzz_fn(void *vd, void *vn, void *vm, uint32_t desc);
+
+static void do_perseg_zzz(void *vd, void *vn, void *vm,
+ uint32_t desc, perseg_zzz_fn *fn)
+{
+ intptr_t oprsz = simd_oprsz(desc);
+
+ desc = simd_desc(16, 16, simd_data(desc));
+ for (intptr_t i = 0; i < oprsz; i += 16) {
+ fn(vd + i, vn + i, vm + i, desc);
+ }
+}
+
+#define DO_PERSEG_ZZZ(NAME, FUNC) \
+ void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
+ { do_perseg_zzz(vd, vn, vm, desc, FUNC); }
+
+DO_PERSEG_ZZZ(sve2p1_uzpq_b, helper_sve_uzp_b)
+DO_PERSEG_ZZZ(sve2p1_uzpq_h, helper_sve_uzp_h)
+DO_PERSEG_ZZZ(sve2p1_uzpq_s, helper_sve_uzp_s)
+DO_PERSEG_ZZZ(sve2p1_uzpq_d, helper_sve_uzp_d)
+
+DO_PERSEG_ZZZ(sve2p1_zipq_b, helper_sve_zip_b)
+DO_PERSEG_ZZZ(sve2p1_zipq_h, helper_sve_zip_h)
+DO_PERSEG_ZZZ(sve2p1_zipq_s, helper_sve_zip_s)
+DO_PERSEG_ZZZ(sve2p1_zipq_d, helper_sve_zip_d)
+
+#undef DO_PERSEG_ZZZ
+
#define DO_TRN(NAME, TYPE, H) \
void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
{ \
@@ -2584,11 +2584,19 @@ TRANS_FEAT(ZIP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
gen_helper_sve2_zip_q, a,
QEMU_ALIGN_DOWN(vec_full_reg_size(s), 32) / 2)
+static gen_helper_gvec_3 * const zipq_fns[4] = {
+ gen_helper_sve2p1_zipq_b, gen_helper_sve2p1_zipq_h,
+ gen_helper_sve2p1_zipq_s, gen_helper_sve2p1_zipq_d,
+};
+TRANS_FEAT(ZIPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ zipq_fns[a->esz], a, 0)
+TRANS_FEAT(ZIPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ zipq_fns[a->esz], a, 16 / 2)
+
static gen_helper_gvec_3 * const uzp_fns[4] = {
gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
};
-
TRANS_FEAT(UZP1_z, aa64_sve, gen_gvec_ool_arg_zzz,
uzp_fns[a->esz], a, 0)
TRANS_FEAT(UZP2_z, aa64_sve, gen_gvec_ool_arg_zzz,
@@ -2599,6 +2607,15 @@ TRANS_FEAT(UZP1_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
TRANS_FEAT(UZP2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
gen_helper_sve2_uzp_q, a, 16)
+static gen_helper_gvec_3 * const uzpq_fns[4] = {
+ gen_helper_sve2p1_uzpq_b, gen_helper_sve2p1_uzpq_h,
+ gen_helper_sve2p1_uzpq_s, gen_helper_sve2p1_uzpq_d,
+};
+TRANS_FEAT(UZPQ1, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ uzpq_fns[a->esz], a, 0)
+TRANS_FEAT(UZPQ2, aa64_sme2p1_or_sve2p1, gen_gvec_ool_arg_zzz,
+ uzpq_fns[a->esz], a, 1 << a->esz)
+
static gen_helper_gvec_3 * const trn_fns[4] = {
gen_helper_sve_trn_b, gen_helper_sve_trn_h,
gen_helper_sve_trn_s, gen_helper_sve_trn_d,
@@ -657,6 +657,12 @@ UZP2_q 00000101 10 1 ..... 000 011 ..... ..... @rd_rn_rm_e0
TRN1_q 00000101 10 1 ..... 000 110 ..... ..... @rd_rn_rm_e0
TRN2_q 00000101 10 1 ..... 000 111 ..... ..... @rd_rn_rm_e0
+# SVE2.1 permute vector elements (quadwords)
+ZIPQ1 01000100 .. 0 ..... 111 000 ..... ..... @rd_rn_rm
+ZIPQ2 01000100 .. 0 ..... 111 001 ..... ..... @rd_rn_rm
+UZPQ1 01000100 .. 0 ..... 111 010 ..... ..... @rd_rn_rm
+UZPQ2 01000100 .. 0 ..... 111 011 ..... ..... @rd_rn_rm
+
### SVE Permute - Predicated Group
# SVE compress active elements
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/helper-sve.h | 10 ++++++++++ target/arm/tcg/sve_helper.c | 29 +++++++++++++++++++++++++++++ target/arm/tcg/translate-sve.c | 19 ++++++++++++++++++- target/arm/tcg/sve.decode | 6 ++++++ 4 files changed, 63 insertions(+), 1 deletion(-)