Message ID | 20250107080112.1175095-8-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | tcg: Merge *_i32 and *_i64 opcodes | expand |
On 7/1/25 08:59, Richard Henderson wrote: > Rely on tcg-op-vec.c to expand the opcode if missing. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/tcg/translate-sve.c | 20 ++++---------------- > 1 file changed, 4 insertions(+), 16 deletions(-) > > diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c > index 49d32fabc9..732453db6f 100644 > --- a/target/arm/tcg/translate-sve.c > +++ b/target/arm/tcg/translate-sve.c > @@ -596,14 +596,8 @@ static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) > static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, > TCGv_vec m, TCGv_vec k) > { > - if (TCG_TARGET_HAS_bitsel_vec) { > - tcg_gen_not_vec(vece, n, n); > - tcg_gen_bitsel_vec(vece, d, k, n, m); > - } else { Why aren't we doing the NOT n operation here? > - tcg_gen_andc_vec(vece, n, k, n); > - tcg_gen_andc_vec(vece, m, m, k); > - tcg_gen_or_vec(vece, d, n, m); > - } > + tcg_gen_not_vec(vece, n, n); > + tcg_gen_bitsel_vec(vece, d, k, n, m); > } > > static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, > @@ -640,14 +634,8 @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) > static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, > TCGv_vec m, TCGv_vec k) > { > - if (TCG_TARGET_HAS_bitsel_vec) { > - tcg_gen_not_vec(vece, m, m); > - tcg_gen_bitsel_vec(vece, d, k, n, m); > - } else { > - tcg_gen_and_vec(vece, n, n, k); > - tcg_gen_or_vec(vece, m, m, k); > - tcg_gen_orc_vec(vece, d, n, m); > - } > + tcg_gen_not_vec(vece, m, m); > + tcg_gen_bitsel_vec(vece, d, k, n, m); > } > > static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
On 1/8/25 09:46, Philippe Mathieu-Daudé wrote: > On 7/1/25 08:59, Richard Henderson wrote: >> Rely on tcg-op-vec.c to expand the opcode if missing. >> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> target/arm/tcg/translate-sve.c | 20 ++++---------------- >> 1 file changed, 4 insertions(+), 16 deletions(-) >> >> diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c >> index 49d32fabc9..732453db6f 100644 >> --- a/target/arm/tcg/translate-sve.c >> +++ b/target/arm/tcg/translate-sve.c >> @@ -596,14 +596,8 @@ static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, >> TCGv_i64 k) >> static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, >> TCGv_vec m, TCGv_vec k) >> { >> - if (TCG_TARGET_HAS_bitsel_vec) { >> - tcg_gen_not_vec(vece, n, n); >> - tcg_gen_bitsel_vec(vece, d, k, n, m); >> - } else { > > Why aren't we doing the NOT n operation here? > >> - tcg_gen_andc_vec(vece, n, k, n); >> - tcg_gen_andc_vec(vece, m, m, k); >> - tcg_gen_or_vec(vece, d, n, m); >> - } >> + tcg_gen_not_vec(vece, n, n); >> + tcg_gen_bitsel_vec(vece, d, k, n, m); Pardon? It's right there, unindented. Anyway, maybe I'll keep this, as it's still used on pre-avx512 x86 hosts. r~
On 8/1/25 22:38, Richard Henderson wrote: > On 1/8/25 09:46, Philippe Mathieu-Daudé wrote: >> On 7/1/25 08:59, Richard Henderson wrote: >>> Rely on tcg-op-vec.c to expand the opcode if missing. >>> >>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >>> --- >>> target/arm/tcg/translate-sve.c | 20 ++++---------------- >>> 1 file changed, 4 insertions(+), 16 deletions(-) >>> >>> diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/ >>> translate-sve.c >>> index 49d32fabc9..732453db6f 100644 >>> --- a/target/arm/tcg/translate-sve.c >>> +++ b/target/arm/tcg/translate-sve.c >>> @@ -596,14 +596,8 @@ static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 >>> n, TCGv_i64 m, TCGv_i64 k) >>> static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, >>> TCGv_vec m, TCGv_vec k) >>> { >>> - if (TCG_TARGET_HAS_bitsel_vec) { >>> - tcg_gen_not_vec(vece, n, n); >>> - tcg_gen_bitsel_vec(vece, d, k, n, m); >>> - } else { >> >> Why aren't we doing the NOT n operation here? >> >>> - tcg_gen_andc_vec(vece, n, k, n); >>> - tcg_gen_andc_vec(vece, m, m, k); >>> - tcg_gen_or_vec(vece, d, n, m); >>> - } >>> + tcg_gen_not_vec(vece, n, n); >>> + tcg_gen_bitsel_vec(vece, d, k, n, m); > > Pardon? It's right there, unindented. Sorry I'm not clear. Previous to your change, in the TCG_TARGET_HAS_bitsel_vec side we use the NOT opcode, but not in the other side where we expand, why? > Anyway, maybe I'll keep this, as it's still used on pre-avx512 x86 hosts. > > > r~
On 1/8/25 14:14, Philippe Mathieu-Daudé wrote: >>>> static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, >>>> TCGv_vec m, TCGv_vec k) >>>> { >>>> - if (TCG_TARGET_HAS_bitsel_vec) { >>>> - tcg_gen_not_vec(vece, n, n); >>>> - tcg_gen_bitsel_vec(vece, d, k, n, m); >>>> - } else { >>> >>> Why aren't we doing the NOT n operation here? >>> >>>> - tcg_gen_andc_vec(vece, n, k, n); >>>> - tcg_gen_andc_vec(vece, m, m, k); >>>> - tcg_gen_or_vec(vece, d, n, m); >>>> - } >>>> + tcg_gen_not_vec(vece, n, n); >>>> + tcg_gen_bitsel_vec(vece, d, k, n, m); >> >> Pardon? It's right there, unindented. > > Sorry I'm not clear. Previous to your change, in the > TCG_TARGET_HAS_bitsel_vec side we use the NOT opcode, > but not in the other side where we expand, why? Are you asking about the code being removed? Recall that bitsel = (n & k) | (m & ~k). Passing n = ~n' we get (~n & k) | (m & ~k), = (k & ~n) | (m & ~k). which is the two andc + or operations above. r~
On 8/1/25 23:30, Richard Henderson wrote: > On 1/8/25 14:14, Philippe Mathieu-Daudé wrote: >>>>> static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, >>>>> TCGv_vec m, TCGv_vec k) >>>>> { >>>>> - if (TCG_TARGET_HAS_bitsel_vec) { >>>>> - tcg_gen_not_vec(vece, n, n); >>>>> - tcg_gen_bitsel_vec(vece, d, k, n, m); >>>>> - } else { >>>> >>>> Why aren't we doing the NOT n operation here? >>>> >>>>> - tcg_gen_andc_vec(vece, n, k, n); [*] >>>>> - tcg_gen_andc_vec(vece, m, m, k); >>>>> - tcg_gen_or_vec(vece, d, n, m); >>>>> - } >>>>> + tcg_gen_not_vec(vece, n, n); >>>>> + tcg_gen_bitsel_vec(vece, d, k, n, m); >>> >>> Pardon? It's right there, unindented. >> >> Sorry I'm not clear. Previous to your change, in the >> TCG_TARGET_HAS_bitsel_vec side we use the NOT opcode, >> but not in the other side where we expand, why? > > Are you asking about the code being removed? > > Recall that bitsel = (n & k) | (m & ~k). > > Passing n = ~n' we get (~n & k) | (m & ~k), > = (k & ~n) | (m & ~k). > > which is the two andc + or operations above. Sorry, I misread the first ANDC [*] as AND... Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c index 49d32fabc9..732453db6f 100644 --- a/target/arm/tcg/translate-sve.c +++ b/target/arm/tcg/translate-sve.c @@ -596,14 +596,8 @@ static void gen_bsl1n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) static void gen_bsl1n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m, TCGv_vec k) { - if (TCG_TARGET_HAS_bitsel_vec) { - tcg_gen_not_vec(vece, n, n); - tcg_gen_bitsel_vec(vece, d, k, n, m); - } else { - tcg_gen_andc_vec(vece, n, k, n); - tcg_gen_andc_vec(vece, m, m, k); - tcg_gen_or_vec(vece, d, n, m); - } + tcg_gen_not_vec(vece, n, n); + tcg_gen_bitsel_vec(vece, d, k, n, m); } static void gen_bsl1n(unsigned vece, uint32_t d, uint32_t n, uint32_t m, @@ -640,14 +634,8 @@ static void gen_bsl2n_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 k) static void gen_bsl2n_vec(unsigned vece, TCGv_vec d, TCGv_vec n, TCGv_vec m, TCGv_vec k) { - if (TCG_TARGET_HAS_bitsel_vec) { - tcg_gen_not_vec(vece, m, m); - tcg_gen_bitsel_vec(vece, d, k, n, m); - } else { - tcg_gen_and_vec(vece, n, n, k); - tcg_gen_or_vec(vece, m, m, k); - tcg_gen_orc_vec(vece, d, n, m); - } + tcg_gen_not_vec(vece, m, m); + tcg_gen_bitsel_vec(vece, d, k, n, m); } static void gen_bsl2n(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
Rely on tcg-op-vec.c to expand the opcode if missing. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/translate-sve.c | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-)