Message ID | 20210208024625.271018-14-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | tcg/arm: host neon support | expand |
On Mon, 8 Feb 2021 at 03:28, Richard Henderson <richard.henderson@linaro.org> wrote: > > The three vector shift by vector operations are all implemented via > expansion. Therefore do not actually set TCG_TARGET_HAS_shv_vec, > as none of shlv_vec, shrv_vec, sarv_vec may actually appear in the > instruction stream, and therefore also do not appear in tcg_target_op_def. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > tcg/arm/tcg-target.opc.h | 3 ++ > tcg/arm/tcg-target.c.inc | 61 +++++++++++++++++++++++++++++++++++++++- > 2 files changed, 63 insertions(+), 1 deletion(-) > + switch (opc) { > + case INDEX_op_shlv_vec: > + /* > + * Merely propagate shlv_vec to arm_ushl_vec. > + * In this way we don't set TCG_TARGET_HAS_shv_vec > + * because everything is done via expansion. > + */ > + v2 = temp_tcgv_vec(arg_temp(a2)); > + vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0), > + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); > + break; tcg/aarch64 seems to set TCG_TARGET_HAS_shv_vec and only do the right-shifts via expand_op. Is there a difference between the two that means Neon has to do it this way, or is it just a "works either way" thing? > + Reviewed-by: Peter Maydell <peter.maydell@linaro.org> thanks -- PMM
On 2/8/21 12:50 PM, Peter Maydell wrote: > On Mon, 8 Feb 2021 at 03:28, Richard Henderson > <richard.henderson@linaro.org> wrote: >> >> The three vector shift by vector operations are all implemented via >> expansion. Therefore do not actually set TCG_TARGET_HAS_shv_vec, >> as none of shlv_vec, shrv_vec, sarv_vec may actually appear in the >> instruction stream, and therefore also do not appear in tcg_target_op_def. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> tcg/arm/tcg-target.opc.h | 3 ++ >> tcg/arm/tcg-target.c.inc | 61 +++++++++++++++++++++++++++++++++++++++- >> 2 files changed, 63 insertions(+), 1 deletion(-) > >> + switch (opc) { >> + case INDEX_op_shlv_vec: >> + /* >> + * Merely propagate shlv_vec to arm_ushl_vec. >> + * In this way we don't set TCG_TARGET_HAS_shv_vec >> + * because everything is done via expansion. >> + */ >> + v2 = temp_tcgv_vec(arg_temp(a2)); >> + vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0), >> + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); >> + break; > > tcg/aarch64 seems to set TCG_TARGET_HAS_shv_vec and > only do the right-shifts via expand_op. Is there a difference > between the two that means Neon has to do it this way, or is it > just a "works either way" thing? It's a works either way thing. r~ > >> + > > Reviewed-by: Peter Maydell <peter.maydell@linaro.org> > > thanks > -- PMM >
diff --git a/tcg/arm/tcg-target.opc.h b/tcg/arm/tcg-target.opc.h index 7a4578e9b4..d19153dcb9 100644 --- a/tcg/arm/tcg-target.opc.h +++ b/tcg/arm/tcg-target.opc.h @@ -10,3 +10,6 @@ * emitted by tcg_expand_vec_op. For those familiar with GCC internals, * consider these to be UNSPEC with names. */ + +DEF(arm_sshl_vec, 1, 2, 0, IMPLVEC) +DEF(arm_ushl_vec, 1, 2, 0, IMPLVEC) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 875d975d4b..b088f61a99 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -215,6 +215,8 @@ typedef enum { INSN_VSHLI = 0xf2800510, /* VSHL (immediate) */ INSN_VSARI = 0xf2800010, /* VSHR.S */ INSN_VSHRI = 0xf3800010, /* VSHR.U */ + INSN_VSHL_S = 0xf2000400, /* VSHL.S (register) */ + INSN_VSHL_U = 0xf3000400, /* VSHL.U (register) */ INSN_VBSL = 0xf3100110, INSN_VBIT = 0xf3200110, @@ -2422,6 +2424,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_usadd_vec: case INDEX_op_ussub_vec: case INDEX_op_xor_vec: + case INDEX_op_arm_sshl_vec: + case INDEX_op_arm_ushl_vec: return C_O1_I2(w, w, w); case INDEX_op_or_vec: case INDEX_op_andc_vec: @@ -2818,6 +2822,17 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, case INDEX_op_xor_vec: tcg_out_vreg3(s, INSN_VEOR, q, 0, a0, a1, a2); return; + case INDEX_op_arm_sshl_vec: + /* + * Note that Vm is the data and Vn is the shift count, + * therefore the arguments appear reversed. + */ + tcg_out_vreg3(s, INSN_VSHL_S, q, vece, a0, a2, a1); + return; + case INDEX_op_arm_ushl_vec: + /* See above. */ + tcg_out_vreg3(s, INSN_VSHL_U, q, vece, a0, a2, a1); + return; case INDEX_op_shli_vec: tcg_out_vshifti(s, INSN_VSHLI, q, a0, a1, a2 + (8 << vece)); return; @@ -2952,6 +2967,10 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) case INDEX_op_umax_vec: case INDEX_op_umin_vec: return vece < MO_64; + case INDEX_op_shlv_vec: + case INDEX_op_shrv_vec: + case INDEX_op_sarv_vec: + return -1; default: return 0; } @@ -2960,7 +2979,47 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece) void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece, TCGArg a0, ...) { - g_assert_not_reached(); + va_list va; + TCGv_vec v0, v1, v2, t1; + TCGArg a2; + + va_start(va, a0); + v0 = temp_tcgv_vec(arg_temp(a0)); + v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg))); + a2 = va_arg(va, TCGArg); + va_end(va); + + switch (opc) { + case INDEX_op_shlv_vec: + /* + * Merely propagate shlv_vec to arm_ushl_vec. + * In this way we don't set TCG_TARGET_HAS_shv_vec + * because everything is done via expansion. + */ + v2 = temp_tcgv_vec(arg_temp(a2)); + vec_gen_3(INDEX_op_arm_ushl_vec, type, vece, tcgv_vec_arg(v0), + tcgv_vec_arg(v1), tcgv_vec_arg(v2)); + break; + + case INDEX_op_shrv_vec: + case INDEX_op_sarv_vec: + /* Right shifts are negative left shifts for NEON. */ + v2 = temp_tcgv_vec(arg_temp(a2)); + t1 = tcg_temp_new_vec(type); + tcg_gen_neg_vec(vece, t1, v2); + if (opc == INDEX_op_shrv_vec) { + opc = INDEX_op_arm_ushl_vec; + } else { + opc = INDEX_op_arm_sshl_vec; + } + vec_gen_3(opc, type, vece, tcgv_vec_arg(v0), + tcgv_vec_arg(v1), tcgv_vec_arg(t1)); + tcg_temp_free_vec(t1); + break; + + default: + g_assert_not_reached(); + } } static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
The three vector shift by vector operations are all implemented via expansion. Therefore do not actually set TCG_TARGET_HAS_shv_vec, as none of shlv_vec, shrv_vec, sarv_vec may actually appear in the instruction stream, and therefore also do not appear in tcg_target_op_def. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/arm/tcg-target.opc.h | 3 ++ tcg/arm/tcg-target.c.inc | 61 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 1 deletion(-) -- 2.25.1