@@ -15,7 +15,6 @@
/* optional instructions */
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_extract2_i32 1
#define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_add2_i32 1
@@ -28,7 +27,6 @@
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
-#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_extract2_i64 1
#define TCG_TARGET_HAS_negsetcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
@@ -26,7 +26,6 @@ extern bool use_neon_instructions;
/* optional instructions */
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_extract2_i32 1
#define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
@@ -28,7 +28,6 @@
/* optional instructions */
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_ctpop_i32 have_popcnt
#define TCG_TARGET_HAS_extract2_i32 1
#define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_add2_i32 1
@@ -42,7 +41,6 @@
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
-#define TCG_TARGET_HAS_ctpop_i64 have_popcnt
#define TCG_TARGET_HAS_extract2_i64 1
#define TCG_TARGET_HAS_negsetcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
@@ -18,7 +18,6 @@
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
/* 64-bit operations */
@@ -28,7 +27,6 @@
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
-#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_add2_i64 0
#define TCG_TARGET_HAS_sub2_i64 0
#define TCG_TARGET_HAS_mulu2_i64 0
@@ -60,7 +60,6 @@ extern bool use_mips32r2_instructions;
/* optional instructions detected at runtime */
#define TCG_TARGET_HAS_extract2_i32 0
-#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_qemu_st8_i32 0
#if TCG_TARGET_REG_BITS == 64
@@ -68,7 +67,6 @@ extern bool use_mips32r2_instructions;
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
#define TCG_TARGET_HAS_extract2_i64 0
-#define TCG_TARGET_HAS_ctpop_i64 0
#endif
#define TCG_TARGET_HAS_qemu_ldst_i128 0
@@ -19,7 +19,6 @@
/* optional instructions */
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_ctpop_i32 have_isa_2_06
#define TCG_TARGET_HAS_extract2_i32 0
#define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_mulu2_i32 0
@@ -33,7 +32,6 @@
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
-#define TCG_TARGET_HAS_ctpop_i64 have_isa_2_06
#define TCG_TARGET_HAS_extract2_i64 0
#define TCG_TARGET_HAS_negsetcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
@@ -18,7 +18,6 @@
#define TCG_TARGET_HAS_muls2_i32 0
#define TCG_TARGET_HAS_bswap16_i32 (cpuinfo & CPUINFO_ZBB)
#define TCG_TARGET_HAS_bswap32_i32 (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_ctpop_i32 (cpuinfo & CPUINFO_ZBB)
#define TCG_TARGET_HAS_qemu_st8_i32 0
#define TCG_TARGET_HAS_negsetcond_i64 1
@@ -27,7 +26,6 @@
#define TCG_TARGET_HAS_bswap16_i64 (cpuinfo & CPUINFO_ZBB)
#define TCG_TARGET_HAS_bswap32_i64 (cpuinfo & CPUINFO_ZBB)
#define TCG_TARGET_HAS_bswap64_i64 (cpuinfo & CPUINFO_ZBB)
-#define TCG_TARGET_HAS_ctpop_i64 (cpuinfo & CPUINFO_ZBB)
#define TCG_TARGET_HAS_add2_i64 1
#define TCG_TARGET_HAS_sub2_i64 1
#define TCG_TARGET_HAS_mulu2_i64 0
@@ -31,7 +31,6 @@ extern uint64_t s390_facilities[3];
/* optional instructions */
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
-#define TCG_TARGET_HAS_ctpop_i32 1
#define TCG_TARGET_HAS_extract2_i32 0
#define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_add2_i32 1
@@ -44,7 +43,6 @@ extern uint64_t s390_facilities[3];
#define TCG_TARGET_HAS_bswap16_i64 1
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
-#define TCG_TARGET_HAS_ctpop_i64 1
#define TCG_TARGET_HAS_extract2_i64 0
#define TCG_TARGET_HAS_negsetcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
@@ -16,7 +16,6 @@ extern bool use_vis3_instructions;
/* optional instructions */
#define TCG_TARGET_HAS_bswap16_i32 0
#define TCG_TARGET_HAS_bswap32_i32 0
-#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_extract2_i32 0
#define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_add2_i32 1
@@ -29,7 +28,6 @@ extern bool use_vis3_instructions;
#define TCG_TARGET_HAS_bswap16_i64 0
#define TCG_TARGET_HAS_bswap32_i64 0
#define TCG_TARGET_HAS_bswap64_i64 0
-#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_extract2_i64 0
#define TCG_TARGET_HAS_negsetcond_i64 1
#define TCG_TARGET_HAS_add2_i64 1
@@ -15,7 +15,6 @@
#define TCG_TARGET_HAS_bswap16_i64 0
#define TCG_TARGET_HAS_bswap32_i64 0
#define TCG_TARGET_HAS_bswap64_i64 0
-#define TCG_TARGET_HAS_ctpop_i64 0
#define TCG_TARGET_HAS_extract2_i64 0
#define TCG_TARGET_HAS_negsetcond_i64 0
#define TCG_TARGET_HAS_add2_i64 0
@@ -10,7 +10,6 @@
#define TCG_TARGET_HAS_bswap16_i32 1
#define TCG_TARGET_HAS_bswap32_i32 1
#define TCG_TARGET_HAS_extract2_i32 0
-#define TCG_TARGET_HAS_ctpop_i32 1
#define TCG_TARGET_HAS_negsetcond_i32 0
#define TCG_TARGET_HAS_muls2_i32 1
#define TCG_TARGET_HAS_qemu_st8_i32 0
@@ -21,7 +20,6 @@
#define TCG_TARGET_HAS_bswap32_i64 1
#define TCG_TARGET_HAS_bswap64_i64 1
#define TCG_TARGET_HAS_extract2_i64 0
-#define TCG_TARGET_HAS_ctpop_i64 1
#define TCG_TARGET_HAS_negsetcond_i64 0
#define TCG_TARGET_HAS_muls2_i64 1
#define TCG_TARGET_HAS_add2_i32 1
@@ -765,7 +765,8 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
tcg_temp_free_i64(t2);
return;
}
- if (TCG_TARGET_HAS_ctpop_i32 || TCG_TARGET_HAS_ctpop_i64) {
+ if (tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0) ||
+ tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) {
t = tcg_temp_ebb_new_i32();
tcg_gen_subi_i32(t, arg1, 1);
tcg_gen_andc_i32(t, t, arg1);
@@ -788,8 +789,9 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
{
- if (!tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0)
- && TCG_TARGET_HAS_ctpop_i32 && arg2 == 32) {
+ if (arg2 == 32
+ && !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I32, 0)
+ && tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0)) {
/* This equivalence has the advantage of not requiring a fixup. */
TCGv_i32 t = tcg_temp_ebb_new_i32();
tcg_gen_subi_i32(t, arg1, 1);
@@ -817,9 +819,9 @@ void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
{
- if (TCG_TARGET_HAS_ctpop_i32) {
+ if (tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0)) {
tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
- } else if (TCG_TARGET_HAS_ctpop_i64) {
+ } else if (tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) {
TCGv_i64 t = tcg_temp_ebb_new_i64();
tcg_gen_extu_i32_i64(t, arg1);
tcg_gen_ctpop_i64(t, t);
@@ -2370,7 +2372,7 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
tcg_gen_op3_i64(INDEX_op_ctz, ret, arg1, arg2);
return;
}
- if (TCG_TARGET_HAS_ctpop_i64) {
+ if (tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) {
t = tcg_temp_ebb_new_i64();
tcg_gen_subi_i64(t, arg1, 1);
tcg_gen_andc_i64(t, t, arg1);
@@ -2404,7 +2406,7 @@ void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
tcg_temp_free_i32(t32);
} else if (arg2 == 64
&& !tcg_op_supported(INDEX_op_ctz, TCG_TYPE_I64, 0)
- && TCG_TARGET_HAS_ctpop_i64) {
+ && tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) {
/* This equivalence has the advantage of not requiring a fixup. */
TCGv_i64 t = tcg_temp_ebb_new_i64();
tcg_gen_subi_i64(t, arg1, 1);
@@ -2432,16 +2434,21 @@ void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
{
- if (TCG_TARGET_HAS_ctpop_i64) {
- tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
- } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
- tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
- tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
- tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
- tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (tcg_op_supported(INDEX_op_ctpop_i64, TCG_TYPE_I64, 0)) {
+ tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
+ return;
+ }
} else {
- gen_helper_ctpop_i64(ret, arg1);
+ if (tcg_op_supported(INDEX_op_ctpop_i32, TCG_TYPE_I32, 0)) {
+ tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+ tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+ tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
+ tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+ return;
+ }
}
+ gen_helper_ctpop_i64(ret, arg1);
}
void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1027,6 +1027,8 @@ static const TCGOutOp * const all_outop[NB_OPS] = {
OUTOP(INDEX_op_and, TCGOutOpBinary, outop_and),
OUTOP(INDEX_op_andc, TCGOutOpBinary, outop_andc),
OUTOP(INDEX_op_clz, TCGOutOpBinary, outop_clz),
+ OUTOP(INDEX_op_ctpop_i32, TCGOutOpUnary, outop_ctpop),
+ OUTOP(INDEX_op_ctpop_i64, TCGOutOpUnary, outop_ctpop),
OUTOP(INDEX_op_ctz, TCGOutOpBinary, outop_ctz),
OUTOP(INDEX_op_divs, TCGOutOpBinary, outop_divs),
OUTOP(INDEX_op_divu, TCGOutOpBinary, outop_divu),
@@ -2290,8 +2292,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
return TCG_TARGET_HAS_bswap16_i32;
case INDEX_op_bswap32_i32:
return TCG_TARGET_HAS_bswap32_i32;
- case INDEX_op_ctpop_i32:
- return TCG_TARGET_HAS_ctpop_i32;
case INDEX_op_brcond2_i32:
case INDEX_op_setcond2_i32:
@@ -2331,8 +2331,6 @@ bool tcg_op_supported(TCGOpcode op, TCGType type, unsigned flags)
return TCG_TARGET_HAS_bswap32_i64;
case INDEX_op_bswap64_i64:
return TCG_TARGET_HAS_bswap64_i64;
- case INDEX_op_ctpop_i64:
- return TCG_TARGET_HAS_ctpop_i64;
case INDEX_op_add2_i64:
return TCG_TARGET_HAS_add2_i64;
case INDEX_op_sub2_i64:
@@ -5438,6 +5436,8 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
}
break;
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
case INDEX_op_neg:
case INDEX_op_not:
{
@@ -26,6 +26,8 @@
#include <ffi.h>
+#define ctpop_tr glue(ctpop, TCG_TARGET_REG_BITS)
+
/*
* Enable TCI assertions only when debugging TCG (and without NDEBUG defined).
* Without assertions, the interpreter runs much faster.
@@ -575,6 +577,11 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_args_rr(insn, &r0, &r1);
regs[r0] = ~regs[r1];
break;
+ case INDEX_op_ctpop_i32:
+ case INDEX_op_ctpop_i64:
+ tci_args_rr(insn, &r0, &r1);
+ regs[r0] = ctpop_tr(regs[r1]);
+ break;
/* Arithmetic operations (32 bit). */
@@ -604,12 +611,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tmp32 = regs[r1];
regs[r0] = tmp32 ? ctz32(tmp32) : regs[r2];
break;
-#if TCG_TARGET_HAS_ctpop_i32
- case INDEX_op_ctpop_i32:
- tci_args_rr(insn, &r0, &r1);
- regs[r0] = ctpop32(regs[r1]);
- break;
-#endif
/* Shift/rotate operations. */
@@ -739,12 +740,6 @@ uintptr_t QEMU_DISABLE_CFI tcg_qemu_tb_exec(CPUArchState *env,
tci_args_rrr(insn, &r0, &r1, &r2);
regs[r0] = regs[r1] ? ctz64(regs[r1]) : regs[r2];
break;
-#if TCG_TARGET_HAS_ctpop_i64
- case INDEX_op_ctpop_i64:
- tci_args_rr(insn, &r0, &r1);
- regs[r0] = ctpop64(regs[r1]);
- break;
-#endif
#if TCG_TARGET_HAS_mulu2_i64
case INDEX_op_mulu2_i64:
tci_args_rrrr(insn, &r0, &r1, &r2, &r3);
@@ -2129,6 +2129,10 @@ static const TCGOutOpBinary outop_clz = {
.out_rri = tgen_clzi,
};
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_NotImplemented,
+};
+
static void tgen_ctz(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
@@ -1894,6 +1894,10 @@ static const TCGOutOpBinary outop_clz = {
.out_rri = tgen_clzi,
};
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_NotImplemented,
+};
+
static void tgen_ctz(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
@@ -2628,6 +2628,23 @@ static const TCGOutOpBinary outop_clz = {
.out_rri = tgen_clzi,
};
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
+{
+ int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
+ tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
+}
+
+static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
+{
+ return have_popcnt ? C_O1_I1(r, r) : C_NotImplemented;
+}
+
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_Dynamic,
+ .base.dynamic_constraint = cset_ctpop,
+ .out_rr = tgen_ctpop,
+};
+
static void tgen_ctz(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
@@ -3046,10 +3063,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
}
break;
- OP_32_64(ctpop):
- tcg_out_modrm(s, OPC_POPCNT + rexw, a0, a1);
- break;
-
OP_32_64(brcond):
tcg_out_brcond(s, rexw, a2, a0, a1, const_args[1],
arg_label(args[3]), 0);
@@ -3893,8 +3906,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
case INDEX_op_extract_i64:
case INDEX_op_sextract_i32:
case INDEX_op_sextract_i64:
- case INDEX_op_ctpop_i32:
- case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_extract2_i32:
@@ -1321,6 +1321,10 @@ static const TCGOutOpBinary outop_clz = {
.out_rri = tgen_clzi,
};
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_NotImplemented,
+};
+
static void tgen_ctzi(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, tcg_target_long a2)
{
@@ -1734,6 +1734,10 @@ static const TCGOutOpBinary outop_clz = {
.out_rri = tgen_clzi,
};
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_NotImplemented,
+};
+
static const TCGOutOpBinary outop_ctz = {
.base.static_constraint = C_NotImplemented,
};
@@ -2974,6 +2974,23 @@ static const TCGOutOpBinary outop_clz = {
.out_rri = tgen_clzi,
};
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
+{
+ uint32_t insn = type == TCG_TYPE_I32 ? CNTPOPW : CNTPOPD;
+ tcg_out32(s, insn | SAB(a1, a0, 0));
+}
+
+static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
+{
+ return have_isa_2_06 ? C_O1_I1(r, r) : C_NotImplemented;
+}
+
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_Dynamic,
+ .base.dynamic_constraint = cset_ctpop,
+ .out_rr = tgen_ctpop,
+};
+
static void tgen_ctz(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
@@ -3396,13 +3413,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
tcg_out_mem_long(s, STD, STDX, args[0], args[1], args[2]);
break;
- case INDEX_op_ctpop_i32:
- tcg_out32(s, CNTPOPW | SAB(args[1], args[0], 0));
- break;
- case INDEX_op_ctpop_i64:
- tcg_out32(s, CNTPOPD | SAB(args[1], args[0], 0));
- break;
-
case INDEX_op_brcond_i32:
tcg_out_brcond(s, args[2], args[0], args[1], const_args[1],
arg_label(args[3]), TCG_TYPE_I32);
@@ -4226,7 +4236,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
case INDEX_op_ld16u_i32:
case INDEX_op_ld16s_i32:
case INDEX_op_ld_i32:
- case INDEX_op_ctpop_i32:
case INDEX_op_bswap16_i32:
case INDEX_op_bswap32_i32:
case INDEX_op_extract_i32:
@@ -4238,7 +4247,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
case INDEX_op_ld32u_i64:
case INDEX_op_ld32s_i64:
case INDEX_op_ld_i64:
- case INDEX_op_ctpop_i64:
case INDEX_op_ext_i32_i64:
case INDEX_op_extu_i32_i64:
case INDEX_op_bswap16_i64:
@@ -2023,6 +2023,23 @@ static const TCGOutOpBinary outop_clz = {
.out_rri = tgen_clzi,
};
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
+{
+ RISCVInsn insn = type == TCG_TYPE_I32 ? OPC_CPOPW : OPC_CPOP;
+ tcg_out_opc_imm(s, insn, a0, a1, 0);
+}
+
+static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
+{
+ return cpuinfo & CPUINFO_ZBB ? C_O1_I1(r, r) : C_NotImplemented;
+}
+
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_Dynamic,
+ .base.dynamic_constraint = cset_ctpop,
+ .out_rr = tgen_ctpop,
+};
+
static void tgen_ctz(TCGContext *s, TCGType type,
TCGReg a0, TCGReg a1, TCGReg a2)
{
@@ -2438,13 +2455,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
}
break;
- case INDEX_op_ctpop_i32:
- tcg_out_opc_imm(s, OPC_CPOPW, a0, a1, 0);
- break;
- case INDEX_op_ctpop_i64:
- tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0);
- break;
-
case INDEX_op_add2_i32:
tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5],
const_args[4], const_args[5], false, true);
@@ -2808,8 +2818,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
case INDEX_op_bswap16_i64:
case INDEX_op_bswap32_i64:
case INDEX_op_bswap64_i64:
- case INDEX_op_ctpop_i32:
- case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_st8_i32:
@@ -1514,32 +1514,6 @@ static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
tgen_movcond_int(s, type, dest, v3, v3const, v4, cc, inv_cc);
}
-static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
-{
- /* With MIE3, and bit 0 of m4 set, we get the complete result. */
- if (HAVE_FACILITY(MISC_INSN_EXT3)) {
- if (type == TCG_TYPE_I32) {
- tcg_out_ext32u(s, dest, src);
- src = dest;
- }
- tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
- return;
- }
-
- /* Without MIE3, each byte gets the count of bits for the byte. */
- tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
-
- /* Multiply to sum each byte at the top of the word. */
- if (type == TCG_TYPE_I32) {
- tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
- tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
- } else {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
- tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
- tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
- }
-}
-
static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
int ofs, int len, int z)
{
@@ -2268,6 +2242,37 @@ static const TCGOutOpBinary outop_clz = {
.out_rri = tgen_clzi,
};
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+ /* With MIE3, and bit 0 of m4 set, we get the complete result. */
+ if (HAVE_FACILITY(MISC_INSN_EXT3)) {
+ if (type == TCG_TYPE_I32) {
+ tcg_out_ext32u(s, dest, src);
+ src = dest;
+ }
+ tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
+ return;
+ }
+
+ /* Without MIE3, each byte gets the count of bits for the byte. */
+ tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
+
+ /* Multiply to sum each byte at the top of the word. */
+ if (type == TCG_TYPE_I32) {
+ tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
+ tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 0x0101010101010101ull);
+ tcg_out_insn(s, RRE, MSGR, dest, TCG_TMP0);
+ tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
+ }
+}
+
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_O1_I1(r, r),
+ .out_rr = tgen_ctpop,
+};
+
static const TCGOutOpBinary outop_ctz = {
.base.static_constraint = C_NotImplemented,
};
@@ -2914,13 +2919,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
tgen_sextract(s, args[0], args[1], args[2], args[3]);
break;
- case INDEX_op_ctpop_i32:
- tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
- break;
- case INDEX_op_ctpop_i64:
- tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
- break;
-
case INDEX_op_mb:
/* The host memory model is quite strong, we simply need to
serialize the instruction stream. */
@@ -3429,8 +3427,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
case INDEX_op_extract_i64:
case INDEX_op_sextract_i32:
case INDEX_op_sextract_i64:
- case INDEX_op_ctpop_i32:
- case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_qemu_ld_i32:
@@ -1322,6 +1322,10 @@ static const TCGOutOpBinary outop_clz = {
.base.static_constraint = C_NotImplemented,
};
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_NotImplemented,
+};
+
static const TCGOutOpBinary outop_ctz = {
.base.static_constraint = C_NotImplemented,
};
@@ -66,8 +66,6 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
case INDEX_op_extract_i64:
case INDEX_op_sextract_i32:
case INDEX_op_sextract_i64:
- case INDEX_op_ctpop_i32:
- case INDEX_op_ctpop_i64:
return C_O1_I1(r, r);
case INDEX_op_st8_i32:
@@ -883,6 +881,22 @@ static const TCGOutOpBinary outop_xor = {
.out_rrr = tgen_xor,
};
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
+{
+ tcg_out_op_rr(s, glue(INDEX_op_ctpop_i,TCG_TARGET_REG_BITS), a0, a1);
+}
+
+static TCGConstraintSetIndex cset_ctpop(TCGType type, unsigned flags)
+{
+ return type == TCG_TYPE_REG ? C_O1_I1(r, r) : C_NotImplemented;
+}
+
+static const TCGOutOpUnary outop_ctpop = {
+ .base.static_constraint = C_Dynamic,
+ .base.dynamic_constraint = cset_ctpop,
+ .out_rr = tgen_ctpop,
+};
+
static void tgen_neg(TCGContext *s, TCGType type, TCGReg a0, TCGReg a1)
{
tcg_out_op_rr(s, INDEX_op_neg, a0, a1);
@@ -961,7 +975,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGType type,
tcg_out_op_rl(s, opc, TCG_REG_TMP, arg_label(args[3]));
break;
- CASE_32_64(ctpop) /* Optional (TCG_TARGET_HAS_ctpop_*). */
case INDEX_op_bswap32_i32: /* Optional (TCG_TARGET_HAS_bswap32_i32). */
case INDEX_op_bswap64_i64: /* Optional (TCG_TARGET_HAS_bswap64_i64). */
tcg_out_op_rr(s, opc, args[0], args[1]);
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- tcg/aarch64/tcg-target-has.h | 2 - tcg/arm/tcg-target-has.h | 1 - tcg/i386/tcg-target-has.h | 2 - tcg/loongarch64/tcg-target-has.h | 2 - tcg/mips/tcg-target-has.h | 2 - tcg/ppc/tcg-target-has.h | 2 - tcg/riscv/tcg-target-has.h | 2 - tcg/s390x/tcg-target-has.h | 2 - tcg/sparc64/tcg-target-has.h | 2 - tcg/tcg-has.h | 1 - tcg/tci/tcg-target-has.h | 2 - tcg/tcg-op.c | 37 ++++++++++-------- tcg/tcg.c | 8 ++-- tcg/tci.c | 19 ++++----- tcg/aarch64/tcg-target.c.inc | 4 ++ tcg/arm/tcg-target.c.inc | 4 ++ tcg/i386/tcg-target.c.inc | 23 ++++++++--- tcg/loongarch64/tcg-target.c.inc | 4 ++ tcg/mips/tcg-target.c.inc | 4 ++ tcg/ppc/tcg-target.c.inc | 26 ++++++++----- tcg/riscv/tcg-target.c.inc | 26 ++++++++----- tcg/s390x/tcg-target.c.inc | 66 +++++++++++++++----------------- tcg/sparc64/tcg-target.c.inc | 4 ++ tcg/tci/tcg-target.c.inc | 19 +++++++-- 24 files changed, 151 insertions(+), 113 deletions(-)