diff mbox series

[RFC,9/9] target/ppc: Use tcg_gen_sextract_tl

Message ID 20231023160944.10692-10-philmd@linaro.org
State New
Headers show
Series tcg: Use tcg_gen_[s]extract_{i32,i64,tl} | expand

Commit Message

Philippe Mathieu-Daudé Oct. 23, 2023, 4:09 p.m. UTC
Inspired-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
RFC: Please double-check 32/64 & bits
---
 target/ppc/translate.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

Comments

Richard Henderson Oct. 24, 2023, 1:04 a.m. UTC | #1
On 10/23/23 09:09, Philippe Mathieu-Daudé wrote:
> Inspired-by: Richard Henderson <richard.henderson@linaro.org>
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
> RFC: Please double-check 32/64 & bits
> ---
>   target/ppc/translate.c | 22 ++++------------------
>   1 file changed, 4 insertions(+), 18 deletions(-)
> 
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index c6e1f7c2ca..1370db9bd5 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -2892,13 +2892,7 @@ static void gen_slw(DisasContext *ctx)
>   
>       t0 = tcg_temp_new();
>       /* AND rS with a mask that is 0 when rB >= 0x20 */
> -#if defined(TARGET_PPC64)
> -    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x3a);
> -    tcg_gen_sari_tl(t0, t0, 0x3f);
> -#else
> -    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1a);
> -    tcg_gen_sari_tl(t0, t0, 0x1f);
> -#endif
> +    tcg_gen_sextract_tl(t0, cpu_gpr[rB(ctx->opcode)], 5, 1);
>       tcg_gen_andc_tl(t0, cpu_gpr[rS(ctx->opcode)], t0);

Patch looks correct as is, so
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


However:
I'd be tempted to use and+movcond instead of sext+andc.
Also there is a special case of 32-bit shifts with 64-bit shift count on ppc64.

#ifdef TARGET_PPC64
     tcg_gen_andi_tl(t0, rb, 0x3f);
#else
     tcg_gen_andi_tl(t0, rb, 0x1f);
     tcg_gen_andi_tl(t1, rb, 0x20);
     tcg_gen_movcond_tl(TCG_COND_NE, t1, t1, zero, zero, rs);
     rs = t1;
#endif
     tcg_gen_shl_tl(ra, rs, t0);
     tcg_gen_ext32u_tl(ra, ra);


It also makes me wonder about adding some TCGCond for bit-test so that this could be

     tcg_gen_movcond_tl(TCG_COND_TSTNE, t1, rb, 0x20, 0, 0, rs);

and make use of the "test" vs "cmp" instructions on most hosts, but especially x86.


r~
Nicholas Piggin Oct. 25, 2023, 7:09 a.m. UTC | #2
On Tue Oct 24, 2023 at 11:04 AM AEST, Richard Henderson wrote:
> On 10/23/23 09:09, Philippe Mathieu-Daudé wrote:
> > Inspired-by: Richard Henderson <richard.henderson@linaro.org>
> > Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> > ---
> > RFC: Please double-check 32/64 & bits
> > ---
> >   target/ppc/translate.c | 22 ++++------------------
> >   1 file changed, 4 insertions(+), 18 deletions(-)
> > 
> > diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> > index c6e1f7c2ca..1370db9bd5 100644
> > --- a/target/ppc/translate.c
> > +++ b/target/ppc/translate.c
> > @@ -2892,13 +2892,7 @@ static void gen_slw(DisasContext *ctx)
> >   
> >       t0 = tcg_temp_new();
> >       /* AND rS with a mask that is 0 when rB >= 0x20 */
> > -#if defined(TARGET_PPC64)
> > -    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x3a);
> > -    tcg_gen_sari_tl(t0, t0, 0x3f);
> > -#else
> > -    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1a);
> > -    tcg_gen_sari_tl(t0, t0, 0x1f);
> > -#endif
> > +    tcg_gen_sextract_tl(t0, cpu_gpr[rB(ctx->opcode)], 5, 1);
> >       tcg_gen_andc_tl(t0, cpu_gpr[rS(ctx->opcode)], t0);
>
> Patch looks correct as is, so
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

>
>
> However:
> I'd be tempted to use and+movcond instead of sext+andc.

That would be simpler / more mechnical following of specification
in the ISA. Might be better to save that for a later patch though.
Any downsides for backend generation? On host without cmov?

> Also there is a special case of 32-bit shifts with 64-bit shift count on ppc64.
>
> #ifdef TARGET_PPC64
>      tcg_gen_andi_tl(t0, rb, 0x3f);
> #else
>      tcg_gen_andi_tl(t0, rb, 0x1f);
>      tcg_gen_andi_tl(t1, rb, 0x20);
>      tcg_gen_movcond_tl(TCG_COND_NE, t1, t1, zero, zero, rs);
>      rs = t1;
> #endif
>      tcg_gen_shl_tl(ra, rs, t0);
>      tcg_gen_ext32u_tl(ra, ra);
>
>
> It also makes me wonder about adding some TCGCond for bit-test so that this could be
>
>      tcg_gen_movcond_tl(TCG_COND_TSTNE, t1, rb, 0x20, 0, 0, rs);
>
> and make use of the "test" vs "cmp" instructions on most hosts, but especially x86.

Might be useful.

Thanks,
Nick
Philippe Mathieu-Daudé Oct. 25, 2023, 7:33 a.m. UTC | #3
Hi Nicholas,

On 25/10/23 09:09, Nicholas Piggin wrote:
> On Tue Oct 24, 2023 at 11:04 AM AEST, Richard Henderson wrote:
>> On 10/23/23 09:09, Philippe Mathieu-Daudé wrote:
>>> Inspired-by: Richard Henderson <richard.henderson@linaro.org>
>>> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>>> ---
>>> RFC: Please double-check 32/64 & bits
>>> ---
>>>    target/ppc/translate.c | 22 ++++------------------
>>>    1 file changed, 4 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>>> index c6e1f7c2ca..1370db9bd5 100644
>>> --- a/target/ppc/translate.c
>>> +++ b/target/ppc/translate.c
>>> @@ -2892,13 +2892,7 @@ static void gen_slw(DisasContext *ctx)
>>>    
>>>        t0 = tcg_temp_new();
>>>        /* AND rS with a mask that is 0 when rB >= 0x20 */
>>> -#if defined(TARGET_PPC64)
>>> -    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x3a);
>>> -    tcg_gen_sari_tl(t0, t0, 0x3f);
>>> -#else
>>> -    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1a);
>>> -    tcg_gen_sari_tl(t0, t0, 0x1f);
>>> -#endif
>>> +    tcg_gen_sextract_tl(t0, cpu_gpr[rB(ctx->opcode)], 5, 1);
>>>        tcg_gen_andc_tl(t0, cpu_gpr[rS(ctx->opcode)], t0);
>>
>> Patch looks correct as is, so
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 
> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

So are you OK to take this patch as-is as a first iteration?

>> However:
>> I'd be tempted to use and+movcond instead of sext+andc.
> 
> That would be simpler / more mechnical following of specification
> in the ISA. Might be better to save that for a later patch though.
> Any downsides for backend generation? On host without cmov?
> 
>> Also there is a special case of 32-bit shifts with 64-bit shift count on ppc64.
>>
>> #ifdef TARGET_PPC64
>>       tcg_gen_andi_tl(t0, rb, 0x3f);
>> #else
>>       tcg_gen_andi_tl(t0, rb, 0x1f);
>>       tcg_gen_andi_tl(t1, rb, 0x20);
>>       tcg_gen_movcond_tl(TCG_COND_NE, t1, t1, zero, zero, rs);
>>       rs = t1;
>> #endif
>>       tcg_gen_shl_tl(ra, rs, t0);
>>       tcg_gen_ext32u_tl(ra, ra);
>>
>>
>> It also makes me wonder about adding some TCGCond for bit-test so that this could be
>>
>>       tcg_gen_movcond_tl(TCG_COND_TSTNE, t1, rb, 0x20, 0, 0, rs);
>>
>> and make use of the "test" vs "cmp" instructions on most hosts, but especially x86.
> 
> Might be useful.

Now closer:
https://lore.kernel.org/qemu-devel/20231025072707.833943-1-richard.henderson@linaro.org/

:)
Richard Henderson Oct. 25, 2023, 7:38 a.m. UTC | #4
On 10/25/23 00:09, Nicholas Piggin wrote:
>> I'd be tempted to use and+movcond instead of sext+andc.
> 
> That would be simpler / more mechnical following of specification
> in the ISA. Might be better to save that for a later patch though.
> Any downsides for backend generation? On host without cmov?

Probably not enough to worry about.

Virtually all extant hosts do have cmov.  Those that don't have it as part of the ISA *do* 
have it in the TCG backend, implemented as branch over next, for minimal code size.


r~
Richard Henderson Oct. 25, 2023, 8:26 p.m. UTC | #5
On 10/25/23 00:33, Philippe Mathieu-Daudé wrote:
> Hi Nicholas,
> 
> On 25/10/23 09:09, Nicholas Piggin wrote:
>> On Tue Oct 24, 2023 at 11:04 AM AEST, Richard Henderson wrote:
>>> On 10/23/23 09:09, Philippe Mathieu-Daudé wrote:
>>>> Inspired-by: Richard Henderson <richard.henderson@linaro.org>
>>>> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>>>> ---
>>>> RFC: Please double-check 32/64 & bits
>>>> ---
>>>>    target/ppc/translate.c | 22 ++++------------------
>>>>    1 file changed, 4 insertions(+), 18 deletions(-)
>>>>
>>>> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
>>>> index c6e1f7c2ca..1370db9bd5 100644
>>>> --- a/target/ppc/translate.c
>>>> +++ b/target/ppc/translate.c
>>>> @@ -2892,13 +2892,7 @@ static void gen_slw(DisasContext *ctx)
>>>>        t0 = tcg_temp_new();
>>>>        /* AND rS with a mask that is 0 when rB >= 0x20 */
>>>> -#if defined(TARGET_PPC64)
>>>> -    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x3a);
>>>> -    tcg_gen_sari_tl(t0, t0, 0x3f);
>>>> -#else
>>>> -    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1a);
>>>> -    tcg_gen_sari_tl(t0, t0, 0x1f);
>>>> -#endif
>>>> +    tcg_gen_sextract_tl(t0, cpu_gpr[rB(ctx->opcode)], 5, 1);
>>>>        tcg_gen_andc_tl(t0, cpu_gpr[rS(ctx->opcode)], t0);
>>>
>>> Patch looks correct as is, so
>>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>
>> Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
> 
> So are you OK to take this patch as-is as a first iteration?

I am, yes.  The simple fact of the ifdef removal is worth it.


r~
diff mbox series

Patch

diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index c6e1f7c2ca..1370db9bd5 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -2892,13 +2892,7 @@  static void gen_slw(DisasContext *ctx)
 
     t0 = tcg_temp_new();
     /* AND rS with a mask that is 0 when rB >= 0x20 */
-#if defined(TARGET_PPC64)
-    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x3a);
-    tcg_gen_sari_tl(t0, t0, 0x3f);
-#else
-    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1a);
-    tcg_gen_sari_tl(t0, t0, 0x1f);
-#endif
+    tcg_gen_sextract_tl(t0, cpu_gpr[rB(ctx->opcode)], 5, 1);
     tcg_gen_andc_tl(t0, cpu_gpr[rS(ctx->opcode)], t0);
     t1 = tcg_temp_new();
     tcg_gen_andi_tl(t1, cpu_gpr[rB(ctx->opcode)], 0x1f);
@@ -2956,13 +2950,7 @@  static void gen_srw(DisasContext *ctx)
 
     t0 = tcg_temp_new();
     /* AND rS with a mask that is 0 when rB >= 0x20 */
-#if defined(TARGET_PPC64)
-    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x3a);
-    tcg_gen_sari_tl(t0, t0, 0x3f);
-#else
-    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x1a);
-    tcg_gen_sari_tl(t0, t0, 0x1f);
-#endif
+    tcg_gen_sextract_tl(t0, cpu_gpr[rB(ctx->opcode)], 5, 1);
     tcg_gen_andc_tl(t0, cpu_gpr[rS(ctx->opcode)], t0);
     tcg_gen_ext32u_tl(t0, t0);
     t1 = tcg_temp_new();
@@ -2981,8 +2969,7 @@  static void gen_sld(DisasContext *ctx)
 
     t0 = tcg_temp_new();
     /* AND rS with a mask that is 0 when rB >= 0x40 */
-    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x39);
-    tcg_gen_sari_tl(t0, t0, 0x3f);
+    tcg_gen_sextract_tl(t0, cpu_gpr[rB(ctx->opcode)], 6, 1);
     tcg_gen_andc_tl(t0, cpu_gpr[rS(ctx->opcode)], t0);
     t1 = tcg_temp_new();
     tcg_gen_andi_tl(t1, cpu_gpr[rB(ctx->opcode)], 0x3f);
@@ -3071,8 +3058,7 @@  static void gen_srd(DisasContext *ctx)
 
     t0 = tcg_temp_new();
     /* AND rS with a mask that is 0 when rB >= 0x40 */
-    tcg_gen_shli_tl(t0, cpu_gpr[rB(ctx->opcode)], 0x39);
-    tcg_gen_sari_tl(t0, t0, 0x3f);
+    tcg_gen_sextract_tl(t0, cpu_gpr[rB(ctx->opcode)], 6, 1);
     tcg_gen_andc_tl(t0, cpu_gpr[rS(ctx->opcode)], t0);
     t1 = tcg_temp_new();
     tcg_gen_andi_tl(t1, cpu_gpr[rB(ctx->opcode)], 0x3f);