@@ -2543,3 +2543,9 @@ DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(sve2_fminp_zpzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_eor3, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_bcax, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_bsl1n, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_bsl2n, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_nbsl, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
@@ -124,6 +124,10 @@
@rda_rn_rm ........ esz:2 . rm:5 ... ... rn:5 rd:5 \
&rrrr_esz ra=%reg_movprfx
+# Four operand with unused vector element size
+@rdn_ra_rm_e0 ........ ... rm:5 ... ... ra:5 rd:5 \
+ &rrrr_esz esz=0 rn=%reg_movprfx
+
# Three operand with "memory" size, aka immediate left shift
@rd_rn_msz_rm ........ ... rm:5 .... imm:2 rn:5 rd:5 &rrri
@@ -379,6 +383,14 @@ ORR_zzz 00000100 01 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
EOR_zzz 00000100 10 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
BIC_zzz 00000100 11 1 ..... 001 100 ..... ..... @rd_rn_rm_e0
+# SVE2 bitwise ternary operations
+EOR3 00000100 00 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0
+BSL 00000100 00 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
+BCAX 00000100 01 1 ..... 001 110 ..... ..... @rdn_ra_rm_e0
+BSL1N 00000100 01 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
+BSL2N 00000100 10 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
+NBSL 00000100 11 1 ..... 001 111 ..... ..... @rdn_ra_rm_e0
+
### SVE Index Generation Group
# SVE index generation (immediate start, immediate increment)
@@ -6825,3 +6825,53 @@ DO_ST1_ZPZ_D(dd_be, zd, MO_64)
#undef DO_ST1_ZPZ_S
#undef DO_ST1_ZPZ_D
+
+void HELPER(sve2_eor3)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = n[i] ^ m[i] ^ k[i];
+ }
+}
+
+void HELPER(sve2_bcax)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = n[i] ^ (m[i] & ~k[i]);
+ }
+}
+
+void HELPER(sve2_bsl1n)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = (~n[i] & k[i]) | (m[i] & ~k[i]);
+ }
+}
+
+void HELPER(sve2_bsl2n)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = (n[i] & k[i]) | (~m[i] & ~k[i]);
+ }
+}
+
+void HELPER(sve2_nbsl)(void *vd, void *vn, void *vm, void *vk, uint32_t desc)
+{
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+ uint64_t *d = vd, *n = vn, *m = vm, *k = vk;
+
+ for (i = 0; i < opr_sz; ++i) {
+ d[i] = ~((n[i] & k[i]) | (m[i] & ~k[i]));
+ }
+}
@@ -217,6 +217,17 @@ static void gen_gvec_fn_zzz(DisasContext *s, GVecGen3Fn *gvec_fn,
vec_full_reg_offset(s, rm), vsz, vsz);
}
+/* Invoke a vector expander on four Zregs. */
+static void gen_gvec_fn_zzzz(DisasContext *s, GVecGen4Fn *gvec_fn,
+ int esz, int rd, int rn, int rm, int ra)
+{
+ unsigned vsz = vec_full_reg_size(s);
+ gvec_fn(esz, vec_full_reg_offset(s, rd),
+ vec_full_reg_offset(s, rn),
+ vec_full_reg_offset(s, rm),
+ vec_full_reg_offset(s, ra), vsz, vsz);
+}
+
/* Invoke a vector move on two Zregs. */
static bool do_mov_z(DisasContext *s, int rd, int rn)
{
@@ -329,6 +340,208 @@ static bool trans_BIC_zzz(DisasContext *s, arg_rrr_esz *a)
return do_zzz_fn(s, a, tcg_gen_gvec_andc);
}
+static bool do_sve2_zzzz_fn(DisasContext *s, arg_rrrr_esz *a, GVecGen4Fn *fn)
+{
+ if (!dc_isar_feature(aa64_sve2, s)) {
+ return false;
+ }
+ if (sve_access_check(s)) {
+ gen_gvec_fn_zzzz(s, fn, a->esz, a->rd, a->rn, a->rm, a->ra);
+ }
+ return true;
+}
+
+static void gen_eor3_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_xor_i64(d, n, m);
+ tcg_gen_xor_i64(d, d, k);
+}
+
+static void gen_eor3_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ tcg_gen_xor_vec(vece, d, n, m);
+ tcg_gen_xor_vec(vece, d, d, k);
+}
+
+static void gen_eor3(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_eor3_i64,
+ .fniv = gen_eor3_vec,
+ .fno = gen_helper_sve2_eor3,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_EOR3(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_sve2_zzzz_fn(s, a, gen_eor3);
+}
+
+static void gen_bcax_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_andc_i64(d, m, k);
+ tcg_gen_xor_i64(d, d, n);
+}
+
+static void gen_bcax_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ tcg_gen_andc_vec(vece, d, m, k);
+ tcg_gen_xor_vec(vece, d, d, n);
+}
+
+static void gen_bcax(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_bcax_i64,
+ .fniv = gen_bcax_vec,
+ .fno = gen_helper_sve2_bcax,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_BCAX(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_sve2_zzzz_fn(s, a, gen_bcax);
+}
+
+static void gen_bsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ /* BSL differs from the generic bitsel in argument ordering. */
+ tcg_gen_gvec_bitsel(vece, d, a, n, m, oprsz, maxsz);
+}
+
+static bool trans_BSL(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_sve2_zzzz_fn(s, a, gen_bsl);
+}
+
+static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_andc_i64(n, k, n);
+ tcg_gen_andc_i64(m, m, k);
+ tcg_gen_or_i64(d, n, m);
+}
+
+static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ if (TCG_TARGET_HAS_bitsel_vec) {
+ tcg_gen_not_vec(vece, n, n);
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
+ } else {
+ tcg_gen_andc_vec(vece, n, k, n);
+ tcg_gen_andc_vec(vece, m, m, k);
+ tcg_gen_or_vec(vece, d, n, m);
+ }
+}
+
+static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_bsl1n_i64,
+ .fniv = gen_bsl1n_vec,
+ .fno = gen_helper_sve2_bsl1n,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_BSL1N(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_sve2_zzzz_fn(s, a, gen_bsl1n);
+}
+
+static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ /*
+ * Z[dn] = (n & k) | (~m & ~k)
+ * = | ~(m | k)
+ */
+ tcg_gen_and_i64(n, n, k);
+ if (TCG_TARGET_HAS_orc_i64) {
+ tcg_gen_or_i64(m, m, k);
+ tcg_gen_orc_i64(d, n, m);
+ } else {
+ tcg_gen_nor_i64(m, m, k);
+ tcg_gen_or_i64(d, n, m);
+ }
+}
+
+static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ if (TCG_TARGET_HAS_bitsel_vec) {
+ tcg_gen_not_vec(vece, m, m);
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
+ } else {
+ tcg_gen_and_vec(vece, n, n, k);
+ tcg_gen_or_vec(vece, m, m, k);
+ tcg_gen_orc_vec(vece, d, n, m);
+ }
+}
+
+static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_bsl2n_i64,
+ .fniv = gen_bsl2n_vec,
+ .fno = gen_helper_sve2_bsl2n,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_BSL2N(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_sve2_zzzz_fn(s, a, gen_bsl2n);
+}
+
+static void gen_nbsl_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k)
+{
+ tcg_gen_and_i64(n, n, k);
+ tcg_gen_andc_i64(m, m, k);
+ tcg_gen_nor_i64(d, n, m);
+}
+
+static void gen_nbsl_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+ TCGv_vec m, TCGv_vec k)
+{
+ tcg_gen_bitsel_vec(vece, d, k, n, m);
+ tcg_gen_not_vec(vece, d, d);
+}
+
+static void gen_nbsl(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+ uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+ static const GVecGen4 op = {
+ .fni8 = gen_nbsl_i64,
+ .fniv = gen_nbsl_vec,
+ .fno = gen_helper_sve2_nbsl,
+ .vece = MO_64,
+ .prefer_i64 = TCG_TARGET_REG_BITS == 64,
+ };
+ tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, &op);
+}
+
+static bool trans_NBSL(DisasContext *s, arg_rrrr_esz *a)
+{
+ return do_sve2_zzzz_fn(s, a, gen_nbsl);
+}
+
/*
*** SVE Integer Arithmetic - Unpredicated Group
*/
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/helper-sve.h | 6 ++ target/arm/sve.decode | 12 +++ target/arm/sve_helper.c | 50 +++++++++ target/arm/translate-sve.c | 213 +++++++++++++++++++++++++++++++++++++ 4 files changed, 281 insertions(+)