@@ -297,10 +297,9 @@ void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, sh - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_sari_vec(vece, d, a, sh);
tcg_gen_add_vec(vece, d, d, t);
@@ -492,10 +491,9 @@ void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec ones = tcg_temp_new_vec_matching(d);
+ TCGv_vec ones = tcg_constant_vec_matching(d, vece, 1);
tcg_gen_shri_vec(vece, t, a, shift - 1);
- tcg_gen_dupi_vec(vece, ones, 1);
tcg_gen_and_vec(vece, t, t, ones);
tcg_gen_shri_vec(vece, d, a, shift);
tcg_gen_add_vec(vece, d, d, t);
@@ -685,9 +683,9 @@ static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ int64_t mi = MAKE_64BIT_MASK((8 << vece) - sh, sh);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, mi);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
tcg_gen_shri_vec(vece, t, a, sh);
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
@@ -773,10 +771,9 @@ static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
TCGv_vec t = tcg_temp_new_vec_matching(d);
- TCGv_vec m = tcg_temp_new_vec_matching(d);
+ TCGv_vec m = tcg_constant_vec_matching(d, vece, MAKE_64BIT_MASK(0, sh));
tcg_gen_shli_vec(vece, t, a, sh);
- tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
tcg_gen_and_vec(vece, d, d, m);
tcg_gen_or_vec(vece, d, d, t);
}
@@ -1044,14 +1041,13 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec rval = tcg_temp_new_vec_matching(dst);
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
- TCGv_vec msk, max;
+ TCGv_vec max;
tcg_gen_neg_vec(vece, rsh, shift);
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- msk = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, msk, 0xff);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
tcg_gen_and_vec(vece, lsh, shift, msk);
tcg_gen_and_vec(vece, rsh, rsh, msk);
}
@@ -1064,9 +1060,6 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_shrv_vec(vece, rval, src, rsh);
- max = tcg_temp_new_vec_matching(dst);
- tcg_gen_dupi_vec(vece, max, 8 << vece);
-
/*
* The choice of LT (signed) and GEU (unsigned) are biased toward
* the instructions of the x86_64 host. For MO_8, the whole byte
@@ -1074,6 +1067,7 @@ static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
* have already masked to a byte and so a signed compare works.
* Other tcg hosts have a full set of comparisons and do not care.
*/
+ max = tcg_constant_vec_matching(dst, vece, 8 << vece);
if (vece == MO_8) {
tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
@@ -1170,6 +1164,7 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
+ TCGv_vec max, zero;
/*
* Rely on the TCG guarantee that out of range shifts produce
@@ -1180,15 +1175,15 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
if (vece == MO_8) {
tcg_gen_mov_vec(lsh, shift);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0xff);
- tcg_gen_and_vec(vece, lsh, shift, tmp);
- tcg_gen_and_vec(vece, rsh, rsh, tmp);
+ TCGv_vec msk = tcg_constant_vec_matching(dst, vece, 0xff);
+ tcg_gen_and_vec(vece, lsh, shift, msk);
+ tcg_gen_and_vec(vece, rsh, rsh, msk);
}
/* Bound rsh so out of bound right shift gets -1. */
- tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
- tcg_gen_umin_vec(vece, rsh, rsh, tmp);
- tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
+ max = tcg_constant_vec_matching(dst, vece, (8 << vece) - 1);
+ tcg_gen_umin_vec(vece, rsh, rsh, max);
+ tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, max);
tcg_gen_shlv_vec(vece, lval, src, lsh);
tcg_gen_sarv_vec(vece, rval, src, rsh);
@@ -1197,12 +1192,12 @@ static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
tcg_gen_andc_vec(vece, lval, lval, tmp);
/* Select between left and right shift. */
+ zero = tcg_constant_vec_matching(dst, vece, 0);
if (vece == MO_8) {
- tcg_gen_dupi_vec(vece, tmp, 0);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, zero, rval, lval);
} else {
- tcg_gen_dupi_vec(vece, tmp, 0x80);
- tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
+ TCGv_vec sgn = tcg_constant_vec_matching(dst, vece, 0x80);
+ tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, sgn, lval, rval);
}
}
Instead of copying a constant into a temporary with dupi, use a vector constant directly. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/gengvec.c | 43 ++++++++++++++++++---------------------- 1 file changed, 19 insertions(+), 24 deletions(-)