Message ID | 20210208024625.271018-13-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | tcg/arm: host neon support | expand |
On Mon, 8 Feb 2021 at 04:02, Richard Henderson <richard.henderson@linaro.org> wrote: > > NEON has 3 instructions implementing this 4 argument operation, > with each insn overlapping a different logical input onto the > destination register. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > @@ -2899,6 +2904,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, > } > return; > > + case INDEX_op_bitsel_vec: > + a3 = args[3]; > + if (a0 == a3) { > + tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1); > + } else if (a0 == a2) { > + tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1); > + } else { > + tcg_out_mov(s, type, a0, a1); Side note: aarch64 tcg guards this tcg_out_mov with "if (a0 != a1)", which if I understand correctly is superfluous and could be removed. > + tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3); > + } > + return; > + Reviewed-by: Peter Maydell <peter.maydell@linaro.org> thanks -- PMM
On 2/8/21 11:55 AM, Peter Maydell wrote: > On Mon, 8 Feb 2021 at 04:02, Richard Henderson > <richard.henderson@linaro.org> wrote: >> >> NEON has 3 instructions implementing this 4 argument operation, >> with each insn overlapping a different logical input onto the >> destination register. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> @@ -2899,6 +2904,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, >> } >> return; >> >> + case INDEX_op_bitsel_vec: >> + a3 = args[3]; >> + if (a0 == a3) { >> + tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1); >> + } else if (a0 == a2) { >> + tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1); >> + } else { >> + tcg_out_mov(s, type, a0, a1); > > Side note: aarch64 tcg guards this tcg_out_mov with "if (a0 != a1)", > which if I understand correctly is superfluous and could be removed. Yep, tcg_out_mov already does that test. r~
diff --git a/tcg/arm/tcg-target-con-set.h b/tcg/arm/tcg-target-con-set.h index cc006f99cd..d02797cbf4 100644 --- a/tcg/arm/tcg-target-con-set.h +++ b/tcg/arm/tcg-target-con-set.h @@ -34,6 +34,7 @@ C_O1_I2(w, w, w) C_O1_I2(w, w, wO) C_O1_I2(w, w, wV) C_O1_I2(w, w, wZ) +C_O1_I3(w, w, w, w) C_O1_I4(r, r, r, rI, rI) C_O1_I4(r, r, rIN, rIK, 0) C_O2_I1(r, r, l) diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 4815a34e75..d6222ba2db 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -169,7 +169,7 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_mul_vec 1 #define TCG_TARGET_HAS_sat_vec 1 #define TCG_TARGET_HAS_minmax_vec 1 -#define TCG_TARGET_HAS_bitsel_vec 0 +#define TCG_TARGET_HAS_bitsel_vec 1 #define TCG_TARGET_HAS_cmpsel_vec 0 #define TCG_TARGET_DEFAULT_MO (0) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index afd2807c09..875d975d4b 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -216,6 +216,10 @@ typedef enum { INSN_VSARI = 0xf2800010, /* VSHR.S */ INSN_VSHRI = 0xf3800010, /* VSHR.U */ + INSN_VBSL = 0xf3100110, + INSN_VBIT = 0xf3200110, + INSN_VBIF = 0xf3300110, + INSN_VTST = 0xf2000810, INSN_VDUP_G = 0xee800b10, /* VDUP (ARM core register) */ @@ -2427,7 +2431,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) return C_O1_I2(w, w, wV); case INDEX_op_cmp_vec: return C_O1_I2(w, w, wZ); - + case INDEX_op_bitsel_vec: + return C_O1_I3(w, w, w, w); default: g_assert_not_reached(); } @@ -2748,7 +2753,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, { TCGType type = vecl + TCG_TYPE_V64; unsigned q = vecl; - TCGArg a0, a1, a2; + TCGArg a0, a1, a2, a3; int cmode, imm8; a0 = args[0]; @@ -2899,6 +2904,18 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, } return; + case INDEX_op_bitsel_vec: + a3 = args[3]; + if (a0 == a3) { + tcg_out_vreg3(s, INSN_VBIT, q, 0, a0, a2, a1); + } else if (a0 == a2) { + tcg_out_vreg3(s, INSN_VBIF, q, 0, a0, a3, a1); + } else { + tcg_out_mov(s, type, a0, a1); + tcg_out_vreg3(s, INSN_VBSL, q, 0, a0, a2, a3); + } + return; + case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */ case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */ default: @@ -2924,6 +2941,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_sssub_vec: case INDEX_op_usadd_vec: case INDEX_op_ussub_vec: + case INDEX_op_bitsel_vec: return 1; case INDEX_op_abs_vec: case INDEX_op_cmp_vec:
NEON has 3 instructions implementing this 4 argument operation, with each insn overlapping a different logical input onto the destination register. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/arm/tcg-target-con-set.h | 1 + tcg/arm/tcg-target.h | 2 +- tcg/arm/tcg-target.c.inc | 22 ++++++++++++++++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) -- 2.25.1