diff mbox series

[13/29] tcg/i386: Support TCG_COND_TST{EQ,NE}

Message ID 20231026001542.1141412-27-richard.henderson@linaro.org
State New
Headers show
Series [PULL,01/94] target/sparc: Clear may_lookup for npc == DYNAMIC_PC | expand

Commit Message

Richard Henderson Oct. 26, 2023, 12:14 a.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/i386/tcg-target.c.inc | 43 +++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 13 deletions(-)

Comments

Paolo Bonzini Oct. 26, 2023, 11:29 a.m. UTC | #1
On 10/26/23 02:14, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---

Also, a TST{EQ,NE} with a one-bit immediate argument can be changed to:

- a TEST reg, reg + js/jns (or sets/setns, or cmovs/cmovns) when testing 
bits 7, 15 or 31

- a BT reg, imm + jc/jnc (or setc/setnc, or cmovc/cmovnc) when testing 
other bits in the 8..63 range.

I will take a look at using this to get rid of the mask field in 
CCPrepare, but I would not mind if someone else took a look at these 
code generation optimizations in tcg/i386.

Paolo

>   tcg/i386/tcg-target.c.inc | 43 +++++++++++++++++++++++++++------------
>   1 file changed, 30 insertions(+), 13 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
> index f4f456a2c0..0d97864174 100644
> --- a/tcg/i386/tcg-target.c.inc
> +++ b/tcg/i386/tcg-target.c.inc
> @@ -504,6 +504,8 @@ static const uint8_t tcg_cond_to_jcc[] = {
>       [TCG_COND_GEU] = JCC_JAE,
>       [TCG_COND_LEU] = JCC_JBE,
>       [TCG_COND_GTU] = JCC_JA,
> +    [TCG_COND_TSTEQ] = JCC_JE,
> +    [TCG_COND_TSTNE] = JCC_JNE,
>   };
>   
>   #if TCG_TARGET_REG_BITS == 64
> @@ -1419,12 +1421,14 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small)
>   }
>   
>   /* Test register R vs immediate bits I, setting Z flag for EQ/NE. */
> -static void __attribute__((unused))
> -tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i, int rexw)
> +static void tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i, int rexw)
>   {
>       if (i <= 0xff && (TCG_TARGET_REG_BITS == 64 || r < 4)) {
>           tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, r);
>           tcg_out8(s, i);
> +    } else if ((i & ~0xff00) == 0 && r < 4) {
> +        tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, r);
> +        tcg_out8(s, i >> 8);
>       } else {
>           tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_TESTi, r);
>           tcg_out32(s, i);
> @@ -1434,15 +1438,25 @@ tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i, int rexw)
>   static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
>                          TCGArg arg2, int const_arg2, int rexw)
>   {
> -    if (const_arg2) {
> -        if (arg2 == 0) {
> -            /* test r, r */
> +    if (is_tst_cond(cond)) {
> +        if (!const_arg2) {
> +            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg2);
> +        } else {
> +            if (!rexw) {
> +                arg2 = (uint32_t)arg2;
> +            } else if ((arg2 >> 31 >> 1) == 0) {
> +                rexw = 0;
> +            }
> +            tcg_out_testi(s, arg1, arg2, rexw);
> +        }
> +    } else {
> +        if (!const_arg2) {
> +            tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
> +        } else if (arg2 == 0) {
>               tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
>           } else {
>               tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
>           }
> -    } else {
> -        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
>       }
>       return tcg_cond_to_jcc[cond];
>   }
> @@ -1461,18 +1475,21 @@ static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
>   {
>       TCGLabel *label_next = gen_new_label();
>       TCGLabel *label_this = arg_label(args[5]);
> +    TCGCond cond = args[4];
>   
> -    switch(args[4]) {
> +    switch (cond) {
>       case TCG_COND_EQ:
> -        tcg_out_brcond(s, 0, TCG_COND_NE, args[0], args[2], const_args[2],
> -                       label_next, 1);
> -        tcg_out_brcond(s, 0, TCG_COND_EQ, args[1], args[3], const_args[3],
> +    case TCG_COND_TSTEQ:
> +        tcg_out_brcond(s, 0, tcg_invert_cond(cond),
> +                       args[0], args[2], const_args[2], label_next, 1);
> +        tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3],
>                          label_this, small);
>           break;
>       case TCG_COND_NE:
> -        tcg_out_brcond(s, 0, TCG_COND_NE, args[0], args[2], const_args[2],
> +    case TCG_COND_TSTNE:
> +        tcg_out_brcond(s, 0, cond, args[0], args[2], const_args[2],
>                          label_this, small);
> -        tcg_out_brcond(s, 0, TCG_COND_NE, args[1], args[3], const_args[3],
> +        tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3],
>                          label_this, small);
>           break;
>       case TCG_COND_LT:
Richard Henderson Oct. 26, 2023, 4:07 p.m. UTC | #2
On 10/26/23 04:29, Paolo Bonzini wrote:
> On 10/26/23 02:14, Richard Henderson wrote:
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
> 
> Also, a TST{EQ,NE} with a one-bit immediate argument can be changed to:
> 
> - a TEST reg, reg + js/jns (or sets/setns, or cmovs/cmovns) when testing bits 7, 15 or 31
> 
> - a BT reg, imm + jc/jnc (or setc/setnc, or cmovc/cmovnc) when testing other bits in the 
> 8..63 range.
> 
> I will take a look at using this to get rid of the mask field in CCPrepare, but I would 
> not mind if someone else took a look at these code generation optimizations in tcg/i386.

I thought about that while working on this series, and is part of the reason why 
tcg_out_cmp now returns a JCC_* value rather than having the caller look it up.

I thought I'd start simpler before adding these optimizations.


r~
Paolo Bonzini Oct. 27, 2023, 2:15 a.m. UTC | #3
On 10/26/23 02:14, Richard Henderson wrote:
> 
> +    } else if ((i & ~0xff00) == 0 && r < 4) {
> +        tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, r);

Should be "r + 4".

Paolo

> +        tcg_out8(s, i >> 8);
>      } else {
diff mbox series

Patch

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index f4f456a2c0..0d97864174 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -504,6 +504,8 @@  static const uint8_t tcg_cond_to_jcc[] = {
     [TCG_COND_GEU] = JCC_JAE,
     [TCG_COND_LEU] = JCC_JBE,
     [TCG_COND_GTU] = JCC_JA,
+    [TCG_COND_TSTEQ] = JCC_JE,
+    [TCG_COND_TSTNE] = JCC_JNE,
 };
 
 #if TCG_TARGET_REG_BITS == 64
@@ -1419,12 +1421,14 @@  static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small)
 }
 
 /* Test register R vs immediate bits I, setting Z flag for EQ/NE. */
-static void __attribute__((unused))
-tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i, int rexw)
+static void tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i, int rexw)
 {
     if (i <= 0xff && (TCG_TARGET_REG_BITS == 64 || r < 4)) {
         tcg_out_modrm(s, OPC_GRP3_Eb | P_REXB_RM, EXT3_TESTi, r);
         tcg_out8(s, i);
+    } else if ((i & ~0xff00) == 0 && r < 4) {
+        tcg_out_modrm(s, OPC_GRP3_Eb, EXT3_TESTi, r);
+        tcg_out8(s, i >> 8);
     } else {
         tcg_out_modrm(s, OPC_GRP3_Ev + rexw, EXT3_TESTi, r);
         tcg_out32(s, i);
@@ -1434,15 +1438,25 @@  tcg_out_testi(TCGContext *s, TCGReg r, uint32_t i, int rexw)
 static int tcg_out_cmp(TCGContext *s, TCGCond cond, TCGArg arg1,
                        TCGArg arg2, int const_arg2, int rexw)
 {
-    if (const_arg2) {
-        if (arg2 == 0) {
-            /* test r, r */
+    if (is_tst_cond(cond)) {
+        if (!const_arg2) {
+            tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg2);
+        } else {
+            if (!rexw) {
+                arg2 = (uint32_t)arg2;
+            } else if ((arg2 >> 31 >> 1) == 0) {
+                rexw = 0;
+            }
+            tcg_out_testi(s, arg1, arg2, rexw);
+        }
+    } else {
+        if (!const_arg2) {
+            tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
+        } else if (arg2 == 0) {
             tcg_out_modrm(s, OPC_TESTL + rexw, arg1, arg1);
         } else {
             tgen_arithi(s, ARITH_CMP + rexw, arg1, arg2, 0);
         }
-    } else {
-        tgen_arithr(s, ARITH_CMP + rexw, arg1, arg2);
     }
     return tcg_cond_to_jcc[cond];
 }
@@ -1461,18 +1475,21 @@  static void tcg_out_brcond2(TCGContext *s, const TCGArg *args,
 {
     TCGLabel *label_next = gen_new_label();
     TCGLabel *label_this = arg_label(args[5]);
+    TCGCond cond = args[4];
 
-    switch(args[4]) {
+    switch (cond) {
     case TCG_COND_EQ:
-        tcg_out_brcond(s, 0, TCG_COND_NE, args[0], args[2], const_args[2],
-                       label_next, 1);
-        tcg_out_brcond(s, 0, TCG_COND_EQ, args[1], args[3], const_args[3],
+    case TCG_COND_TSTEQ:
+        tcg_out_brcond(s, 0, tcg_invert_cond(cond),
+                       args[0], args[2], const_args[2], label_next, 1);
+        tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3],
                        label_this, small);
         break;
     case TCG_COND_NE:
-        tcg_out_brcond(s, 0, TCG_COND_NE, args[0], args[2], const_args[2],
+    case TCG_COND_TSTNE:
+        tcg_out_brcond(s, 0, cond, args[0], args[2], const_args[2],
                        label_this, small);
-        tcg_out_brcond(s, 0, TCG_COND_NE, args[1], args[3], const_args[3],
+        tcg_out_brcond(s, 0, cond, args[1], args[3], const_args[3],
                        label_this, small);
         break;
     case TCG_COND_LT: