diff mbox series

[09/11] tcg/riscv: Improve setcond expansion

Message ID 20230503085657.1814850-10-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg/riscv: Support for Zba, Zbb, Zicond extensions | expand

Commit Message

Richard Henderson May 3, 2023, 8:56 a.m. UTC
Split out a helper function, tcg_out_setcond_int, which does not
always produce the complete boolean result, but returns a set of
flags to do so.

Based on 21af16198425, the same improvement for loongarch64.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/riscv/tcg-target.c.inc | 164 +++++++++++++++++++++++++++----------
 1 file changed, 121 insertions(+), 43 deletions(-)

Comments

Daniel Henrique Barboza May 8, 2023, 12:46 p.m. UTC | #1
On 5/3/23 05:56, Richard Henderson wrote:
> Split out a helper function, tcg_out_setcond_int, which does not
> always produce the complete boolean result, but returns a set of
> flags to do so.
> 
> Based on 21af16198425, the same improvement for loongarch64.
> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---

Reviewed-by: Daniel Henrique Barboza <dbarboza@ventanamicro.com>

>   tcg/riscv/tcg-target.c.inc | 164 +++++++++++++++++++++++++++----------
>   1 file changed, 121 insertions(+), 43 deletions(-)
> 
> diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
> index 044ddfb160..84b646105c 100644
> --- a/tcg/riscv/tcg-target.c.inc
> +++ b/tcg/riscv/tcg-target.c.inc
> @@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
>       tcg_out_opc_branch(s, op, arg1, arg2, 0);
>   }
>   
> -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
> -                            TCGReg arg1, TCGReg arg2)
> +#define SETCOND_INV    TCG_TARGET_NB_REGS
> +#define SETCOND_NEZ    (SETCOND_INV << 1)
> +#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
> +
> +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
> +                               TCGReg arg1, tcg_target_long arg2, bool c2)
>   {
> +    int flags = 0;
> +
>       switch (cond) {
> -    case TCG_COND_EQ:
> -        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
> -        tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1);
> -        break;
> -    case TCG_COND_NE:
> -        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret);
> -        break;
> -    case TCG_COND_LT:
> -        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
> -        break;
> -    case TCG_COND_GE:
> -        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
> -        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -        break;
> -    case TCG_COND_LE:
> -        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
> -        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -        break;
> -    case TCG_COND_GT:
> -        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
> -        break;
> -    case TCG_COND_LTU:
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
> -        break;
> -    case TCG_COND_GEU:
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
> -        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -        break;
> -    case TCG_COND_LEU:
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
> -        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -        break;
> -    case TCG_COND_GTU:
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
> +    case TCG_COND_EQ:    /* -> NE  */
> +    case TCG_COND_GE:    /* -> LT  */
> +    case TCG_COND_GEU:   /* -> LTU */
> +    case TCG_COND_GT:    /* -> LE  */
> +    case TCG_COND_GTU:   /* -> LEU */
> +        cond = tcg_invert_cond(cond);
> +        flags ^= SETCOND_INV;
>           break;
>       default:
> -         g_assert_not_reached();
> -         break;
> -     }
> +        break;
> +    }
> +
> +    switch (cond) {
> +    case TCG_COND_LE:
> +    case TCG_COND_LEU:
> +        /*
> +         * If we have a constant input, the most efficient way to implement
> +         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
> +         * We don't need to care for this for LE because the constant input
> +         * is constrained to signed 12-bit, and 0x800 is representable in the
> +         * temporary register.
> +         */
> +        if (c2) {
> +            if (cond == TCG_COND_LEU) {
> +                /* unsigned <= -1 is true */
> +                if (arg2 == -1) {
> +                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
> +                    return ret;
> +                }
> +                cond = TCG_COND_LTU;
> +            } else {
> +                cond = TCG_COND_LT;
> +            }
> +            tcg_debug_assert(arg2 <= 0x7ff);
> +            if (++arg2 == 0x800) {
> +                tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
> +                arg2 = TCG_REG_TMP0;
> +                c2 = false;
> +            }
> +        } else {
> +            TCGReg tmp = arg2;
> +            arg2 = arg1;
> +            arg1 = tmp;
> +            cond = tcg_swap_cond(cond);    /* LE -> GE */
> +            cond = tcg_invert_cond(cond);  /* GE -> LT */
> +            flags ^= SETCOND_INV;
> +        }
> +        break;
> +    default:
> +        break;
> +    }
> +
> +    switch (cond) {
> +    case TCG_COND_NE:
> +        flags |= SETCOND_NEZ;
> +        if (!c2) {
> +            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
> +        } else if (arg2 == 0) {
> +            ret = arg1;
> +        } else {
> +            tcg_out_opc_reg(s, OPC_XORI, ret, arg1, arg2);
> +        }
> +        break;
> +
> +    case TCG_COND_LT:
> +        if (c2) {
> +            tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
> +        } else {
> +            tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
> +        }
> +        break;
> +
> +    case TCG_COND_LTU:
> +        if (c2) {
> +            tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
> +        } else {
> +            tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
> +        }
> +        break;
> +
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    return ret | flags;
> +}
> +
> +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
> +                            TCGReg arg1, tcg_target_long arg2, bool c2)
> +{
> +    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
> +
> +    if (tmpflags != ret) {
> +        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
> +
> +        switch (tmpflags & SETCOND_FLAGS) {
> +        case SETCOND_INV:
> +            /* Intermediate result is boolean: simply invert. */
> +            tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1);
> +            break;
> +        case SETCOND_NEZ:
> +            /* Intermediate result is zero/non-zero: test != 0. */
> +            tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
> +            break;
> +        case SETCOND_NEZ | SETCOND_INV:
> +            /* Intermediate result is zero/non-zero: test == 0. */
> +            tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1);
> +            break;
> +        default:
> +            g_assert_not_reached();
> +        }
> +    }
>   }
>   
>   static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
> @@ -1543,7 +1621,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>   
>       case INDEX_op_setcond_i32:
>       case INDEX_op_setcond_i64:
> -        tcg_out_setcond(s, args[3], a0, a1, a2);
> +        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
>           break;
>   
>       case INDEX_op_qemu_ld_i32:
> @@ -1662,6 +1740,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>       case INDEX_op_and_i64:
>       case INDEX_op_or_i64:
>       case INDEX_op_xor_i64:
> +    case INDEX_op_setcond_i32:
> +    case INDEX_op_setcond_i64:
>           return C_O1_I2(r, r, rI);
>   
>       case INDEX_op_andc_i32:
> @@ -1683,7 +1763,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>       case INDEX_op_divu_i32:
>       case INDEX_op_rem_i32:
>       case INDEX_op_remu_i32:
> -    case INDEX_op_setcond_i32:
>       case INDEX_op_mul_i64:
>       case INDEX_op_mulsh_i64:
>       case INDEX_op_muluh_i64:
> @@ -1691,7 +1770,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>       case INDEX_op_divu_i64:
>       case INDEX_op_rem_i64:
>       case INDEX_op_remu_i64:
> -    case INDEX_op_setcond_i64:
>           return C_O1_I2(r, rZ, rZ);
>   
>       case INDEX_op_shl_i32:
Alistair Francis May 17, 2023, 12:16 a.m. UTC | #2
On Wed, May 3, 2023 at 6:59 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Split out a helper function, tcg_out_setcond_int, which does not
> always produce the complete boolean result, but returns a set of
> flags to do so.
>
> Based on 21af16198425, the same improvement for loongarch64.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Acked-by: Alistair Francis <alistair.francis@wdc.com>

Alistair

> ---
>  tcg/riscv/tcg-target.c.inc | 164 +++++++++++++++++++++++++++----------
>  1 file changed, 121 insertions(+), 43 deletions(-)
>
> diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
> index 044ddfb160..84b646105c 100644
> --- a/tcg/riscv/tcg-target.c.inc
> +++ b/tcg/riscv/tcg-target.c.inc
> @@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
>      tcg_out_opc_branch(s, op, arg1, arg2, 0);
>  }
>
> -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
> -                            TCGReg arg1, TCGReg arg2)
> +#define SETCOND_INV    TCG_TARGET_NB_REGS
> +#define SETCOND_NEZ    (SETCOND_INV << 1)
> +#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
> +
> +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
> +                               TCGReg arg1, tcg_target_long arg2, bool c2)
>  {
> +    int flags = 0;
> +
>      switch (cond) {
> -    case TCG_COND_EQ:
> -        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
> -        tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1);
> -        break;
> -    case TCG_COND_NE:
> -        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret);
> -        break;
> -    case TCG_COND_LT:
> -        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
> -        break;
> -    case TCG_COND_GE:
> -        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
> -        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -        break;
> -    case TCG_COND_LE:
> -        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
> -        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -        break;
> -    case TCG_COND_GT:
> -        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
> -        break;
> -    case TCG_COND_LTU:
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
> -        break;
> -    case TCG_COND_GEU:
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
> -        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -        break;
> -    case TCG_COND_LEU:
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
> -        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
> -        break;
> -    case TCG_COND_GTU:
> -        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
> +    case TCG_COND_EQ:    /* -> NE  */
> +    case TCG_COND_GE:    /* -> LT  */
> +    case TCG_COND_GEU:   /* -> LTU */
> +    case TCG_COND_GT:    /* -> LE  */
> +    case TCG_COND_GTU:   /* -> LEU */
> +        cond = tcg_invert_cond(cond);
> +        flags ^= SETCOND_INV;
>          break;
>      default:
> -         g_assert_not_reached();
> -         break;
> -     }
> +        break;
> +    }
> +
> +    switch (cond) {
> +    case TCG_COND_LE:
> +    case TCG_COND_LEU:
> +        /*
> +         * If we have a constant input, the most efficient way to implement
> +         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
> +         * We don't need to care for this for LE because the constant input
> +         * is constrained to signed 12-bit, and 0x800 is representable in the
> +         * temporary register.
> +         */
> +        if (c2) {
> +            if (cond == TCG_COND_LEU) {
> +                /* unsigned <= -1 is true */
> +                if (arg2 == -1) {
> +                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
> +                    return ret;
> +                }
> +                cond = TCG_COND_LTU;
> +            } else {
> +                cond = TCG_COND_LT;
> +            }
> +            tcg_debug_assert(arg2 <= 0x7ff);
> +            if (++arg2 == 0x800) {
> +                tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
> +                arg2 = TCG_REG_TMP0;
> +                c2 = false;
> +            }
> +        } else {
> +            TCGReg tmp = arg2;
> +            arg2 = arg1;
> +            arg1 = tmp;
> +            cond = tcg_swap_cond(cond);    /* LE -> GE */
> +            cond = tcg_invert_cond(cond);  /* GE -> LT */
> +            flags ^= SETCOND_INV;
> +        }
> +        break;
> +    default:
> +        break;
> +    }
> +
> +    switch (cond) {
> +    case TCG_COND_NE:
> +        flags |= SETCOND_NEZ;
> +        if (!c2) {
> +            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
> +        } else if (arg2 == 0) {
> +            ret = arg1;
> +        } else {
> +            tcg_out_opc_reg(s, OPC_XORI, ret, arg1, arg2);
> +        }
> +        break;
> +
> +    case TCG_COND_LT:
> +        if (c2) {
> +            tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
> +        } else {
> +            tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
> +        }
> +        break;
> +
> +    case TCG_COND_LTU:
> +        if (c2) {
> +            tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
> +        } else {
> +            tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
> +        }
> +        break;
> +
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    return ret | flags;
> +}
> +
> +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
> +                            TCGReg arg1, tcg_target_long arg2, bool c2)
> +{
> +    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
> +
> +    if (tmpflags != ret) {
> +        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
> +
> +        switch (tmpflags & SETCOND_FLAGS) {
> +        case SETCOND_INV:
> +            /* Intermediate result is boolean: simply invert. */
> +            tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1);
> +            break;
> +        case SETCOND_NEZ:
> +            /* Intermediate result is zero/non-zero: test != 0. */
> +            tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
> +            break;
> +        case SETCOND_NEZ | SETCOND_INV:
> +            /* Intermediate result is zero/non-zero: test == 0. */
> +            tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1);
> +            break;
> +        default:
> +            g_assert_not_reached();
> +        }
> +    }
>  }
>
>  static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
> @@ -1543,7 +1621,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>
>      case INDEX_op_setcond_i32:
>      case INDEX_op_setcond_i64:
> -        tcg_out_setcond(s, args[3], a0, a1, a2);
> +        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
>          break;
>
>      case INDEX_op_qemu_ld_i32:
> @@ -1662,6 +1740,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>      case INDEX_op_and_i64:
>      case INDEX_op_or_i64:
>      case INDEX_op_xor_i64:
> +    case INDEX_op_setcond_i32:
> +    case INDEX_op_setcond_i64:
>          return C_O1_I2(r, r, rI);
>
>      case INDEX_op_andc_i32:
> @@ -1683,7 +1763,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>      case INDEX_op_divu_i32:
>      case INDEX_op_rem_i32:
>      case INDEX_op_remu_i32:
> -    case INDEX_op_setcond_i32:
>      case INDEX_op_mul_i64:
>      case INDEX_op_mulsh_i64:
>      case INDEX_op_muluh_i64:
> @@ -1691,7 +1770,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
>      case INDEX_op_divu_i64:
>      case INDEX_op_rem_i64:
>      case INDEX_op_remu_i64:
> -    case INDEX_op_setcond_i64:
>          return C_O1_I2(r, rZ, rZ);
>
>      case INDEX_op_shl_i32:
> --
> 2.34.1
>
>
diff mbox series

Patch

diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 044ddfb160..84b646105c 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -812,50 +812,128 @@  static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
     tcg_out_opc_branch(s, op, arg1, arg2, 0);
 }
 
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
-                            TCGReg arg1, TCGReg arg2)
+#define SETCOND_INV    TCG_TARGET_NB_REGS
+#define SETCOND_NEZ    (SETCOND_INV << 1)
+#define SETCOND_FLAGS  (SETCOND_INV | SETCOND_NEZ)
+
+static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret,
+                               TCGReg arg1, tcg_target_long arg2, bool c2)
 {
+    int flags = 0;
+
     switch (cond) {
-    case TCG_COND_EQ:
-        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
-        tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1);
-        break;
-    case TCG_COND_NE:
-        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
-        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret);
-        break;
-    case TCG_COND_LT:
-        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
-        break;
-    case TCG_COND_GE:
-        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
-        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-        break;
-    case TCG_COND_LE:
-        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
-        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-        break;
-    case TCG_COND_GT:
-        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
-        break;
-    case TCG_COND_LTU:
-        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
-        break;
-    case TCG_COND_GEU:
-        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
-        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-        break;
-    case TCG_COND_LEU:
-        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
-        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
-        break;
-    case TCG_COND_GTU:
-        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
+    case TCG_COND_EQ:    /* -> NE  */
+    case TCG_COND_GE:    /* -> LT  */
+    case TCG_COND_GEU:   /* -> LTU */
+    case TCG_COND_GT:    /* -> LE  */
+    case TCG_COND_GTU:   /* -> LEU */
+        cond = tcg_invert_cond(cond);
+        flags ^= SETCOND_INV;
         break;
     default:
-         g_assert_not_reached();
-         break;
-     }
+        break;
+    }
+
+    switch (cond) {
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+        /*
+         * If we have a constant input, the most efficient way to implement
+         * LE is by adding 1 and using LT.  Watch out for wrap around for LEU.
+         * We don't need to care for this for LE because the constant input
+         * is constrained to signed 12-bit, and 0x800 is representable in the
+         * temporary register.
+         */
+        if (c2) {
+            if (cond == TCG_COND_LEU) {
+                /* unsigned <= -1 is true */
+                if (arg2 == -1) {
+                    tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV));
+                    return ret;
+                }
+                cond = TCG_COND_LTU;
+            } else {
+                cond = TCG_COND_LT;
+            }
+            tcg_debug_assert(arg2 <= 0x7ff);
+            if (++arg2 == 0x800) {
+                tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2);
+                arg2 = TCG_REG_TMP0;
+                c2 = false;
+            }
+        } else {
+            TCGReg tmp = arg2;
+            arg2 = arg1;
+            arg1 = tmp;
+            cond = tcg_swap_cond(cond);    /* LE -> GE */
+            cond = tcg_invert_cond(cond);  /* GE -> LT */
+            flags ^= SETCOND_INV;
+        }
+        break;
+    default:
+        break;
+    }
+
+    switch (cond) {
+    case TCG_COND_NE:
+        flags |= SETCOND_NEZ;
+        if (!c2) {
+            tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2);
+        } else if (arg2 == 0) {
+            ret = arg1;
+        } else {
+            tcg_out_opc_reg(s, OPC_XORI, ret, arg1, arg2);
+        }
+        break;
+
+    case TCG_COND_LT:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+        }
+        break;
+
+    case TCG_COND_LTU:
+        if (c2) {
+            tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+        }
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    return ret | flags;
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGReg arg1, tcg_target_long arg2, bool c2)
+{
+    int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2);
+
+    if (tmpflags != ret) {
+        TCGReg tmp = tmpflags & ~SETCOND_FLAGS;
+
+        switch (tmpflags & SETCOND_FLAGS) {
+        case SETCOND_INV:
+            /* Intermediate result is boolean: simply invert. */
+            tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1);
+            break;
+        case SETCOND_NEZ:
+            /* Intermediate result is zero/non-zero: test != 0. */
+            tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp);
+            break;
+        case SETCOND_NEZ | SETCOND_INV:
+            /* Intermediate result is zero/non-zero: test == 0. */
+            tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1);
+            break;
+        default:
+            g_assert_not_reached();
+        }
+    }
 }
 
 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail)
@@ -1543,7 +1621,7 @@  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
     case INDEX_op_setcond_i32:
     case INDEX_op_setcond_i64:
-        tcg_out_setcond(s, args[3], a0, a1, a2);
+        tcg_out_setcond(s, args[3], a0, a1, a2, c2);
         break;
 
     case INDEX_op_qemu_ld_i32:
@@ -1662,6 +1740,8 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_and_i64:
     case INDEX_op_or_i64:
     case INDEX_op_xor_i64:
+    case INDEX_op_setcond_i32:
+    case INDEX_op_setcond_i64:
         return C_O1_I2(r, r, rI);
 
     case INDEX_op_andc_i32:
@@ -1683,7 +1763,6 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_divu_i32:
     case INDEX_op_rem_i32:
     case INDEX_op_remu_i32:
-    case INDEX_op_setcond_i32:
     case INDEX_op_mul_i64:
     case INDEX_op_mulsh_i64:
     case INDEX_op_muluh_i64:
@@ -1691,7 +1770,6 @@  static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_divu_i64:
     case INDEX_op_rem_i64:
     case INDEX_op_remu_i64:
-    case INDEX_op_setcond_i64:
         return C_O1_I2(r, rZ, rZ);
 
     case INDEX_op_shl_i32: