diff mbox series

[PULL,4/9] tcg/optimize: Optimize setcond with zmask

Message ID 20240507143309.5528-5-richard.henderson@linaro.org
State Accepted
Commit 8d65cda7284edf31998778f92813bc6ef1e6ab77
Headers show
Series [PULL,1/9] tcg: Add write_aofs to GVecGen3i | expand

Commit Message

Richard Henderson May 7, 2024, 2:33 p.m. UTC
If we can show that high bits of an input are zero,
then we may optimize away some comparisons.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/optimize.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
diff mbox series

Patch

diff --git a/tcg/optimize.c b/tcg/optimize.c
index 2e9e5725a9..8886f7037a 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2099,6 +2099,108 @@  static bool fold_remainder(OptContext *ctx, TCGOp *op)
     return false;
 }
 
+static bool fold_setcond_zmask(OptContext *ctx, TCGOp *op, bool neg)
+{
+    uint64_t a_zmask, b_val;
+    TCGCond cond;
+
+    if (!arg_is_const(op->args[2])) {
+        return false;
+    }
+
+    a_zmask = arg_info(op->args[1])->z_mask;
+    b_val = arg_info(op->args[2])->val;
+    cond = op->args[3];
+
+    if (ctx->type == TCG_TYPE_I32) {
+        a_zmask = (uint32_t)a_zmask;
+        b_val = (uint32_t)b_val;
+    }
+
+    /*
+     * A with only low bits set vs B with high bits set means that A < B.
+     */
+    if (a_zmask < b_val) {
+        bool inv = false;
+
+        switch (cond) {
+        case TCG_COND_NE:
+        case TCG_COND_LEU:
+        case TCG_COND_LTU:
+            inv = true;
+            /* fall through */
+        case TCG_COND_GTU:
+        case TCG_COND_GEU:
+        case TCG_COND_EQ:
+            return tcg_opt_gen_movi(ctx, op, op->args[0], neg ? -inv : inv);
+        default:
+            break;
+        }
+    }
+
+    /*
+     * A with only lsb set is already boolean.
+     */
+    if (a_zmask <= 1) {
+        bool convert = false;
+        bool inv = false;
+
+        switch (cond) {
+        case TCG_COND_EQ:
+            inv = true;
+            /* fall through */
+        case TCG_COND_NE:
+            convert = (b_val == 0);
+            break;
+        case TCG_COND_LTU:
+        case TCG_COND_TSTEQ:
+            inv = true;
+            /* fall through */
+        case TCG_COND_GEU:
+        case TCG_COND_TSTNE:
+            convert = (b_val == 1);
+            break;
+        default:
+            break;
+        }
+        if (convert) {
+            TCGOpcode add_opc, xor_opc, neg_opc;
+
+            if (!inv && !neg) {
+                return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
+            }
+
+            switch (ctx->type) {
+            case TCG_TYPE_I32:
+                add_opc = INDEX_op_add_i32;
+                neg_opc = INDEX_op_neg_i32;
+                xor_opc = INDEX_op_xor_i32;
+                break;
+            case TCG_TYPE_I64:
+                add_opc = INDEX_op_add_i64;
+                neg_opc = INDEX_op_neg_i64;
+                xor_opc = INDEX_op_xor_i64;
+                break;
+            default:
+                g_assert_not_reached();
+            }
+
+            if (!inv) {
+                op->opc = neg_opc;
+            } else if (neg) {
+                op->opc = add_opc;
+                op->args[2] = arg_new_constant(ctx, -1);
+            } else {
+                op->opc = xor_opc;
+                op->args[2] = arg_new_constant(ctx, 1);
+            }
+            return false;
+        }
+    }
+
+    return false;
+}
+
 static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp *op, bool neg)
 {
     TCGOpcode and_opc, sub_opc, xor_opc, neg_opc, shr_opc;
@@ -2200,6 +2302,10 @@  static bool fold_setcond(OptContext *ctx, TCGOp *op)
     if (i >= 0) {
         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
     }
+
+    if (fold_setcond_zmask(ctx, op, false)) {
+        return true;
+    }
     fold_setcond_tst_pow2(ctx, op, false);
 
     ctx->z_mask = 1;
@@ -2214,6 +2320,10 @@  static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
     if (i >= 0) {
         return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
     }
+
+    if (fold_setcond_zmask(ctx, op, true)) {
+        return true;
+    }
     fold_setcond_tst_pow2(ctx, op, true);
 
     /* Value is {0,-1} so all bits are repetitions of the sign. */