@@ -937,6 +937,7 @@ DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_2(sve2p1_cntp_c, TCG_CALL_NO_RWG_SE, i64, i32, i32)
DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
@@ -4184,6 +4184,65 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
return sum;
}
+uint64_t HELPER(sve2p1_cntp_c)(uint32_t png, uint32_t desc)
+{
+ int pl = FIELD_EX32(desc, PREDDESC, OPRSZ);
+ int vl = pl * 8;
+ unsigned v_esz = FIELD_EX32(desc, PREDDESC, ESZ);
+ int lg2_width = FIELD_EX32(desc, PREDDESC, DATA) + 1;
+ unsigned p_esz;
+ int p_count, maxelem;
+ bool p_invert;
+
+ /* C.f. Arm pseudocode CounterToPredicate. */
+ if ((png & 0xf) == 0) {
+ /* Canonical false predicate. */
+ return 0;
+ }
+ p_esz = ctz32(png);
+
+ /*
+ * maxbit = log2(pl * 4)
+ * = log2(vl / 8 * 4)
+ * = log2(vl / 2)
+ * = log2(vl) - 1
+ * maxbit_mask = ones<maxbit:0>
+ * = (1 << (maxbit + 1)) - 1
+ * = (1 << (log2(vl) - 1 + 1)) - 1
+ * = (1 << log2(vl)) - 1
+ * = pow2ceil(vl) - 1
+ * Note that we keep count in bytes, not elements.
+ */
+ p_count = (png & (pow2ceil(vl) - 1)) >> 1;
+ p_invert = (png >> 15) & 1;
+
+ /*
+ * If the esz encoded into the predicate is not larger than the
+ * vector operation esz, then the expanded predicate bit will
+ * be true for all vector elements. If the predicate esz is
+ * larger than the vector esz, then only even multiples can be
+ * true, and the rest will be false.
+ */
+ v_esz = MAX(v_esz, p_esz);
+ maxelem = (vl << lg2_width) >> v_esz;
+
+ if (p_count == 0) {
+ if (p_invert) {
+ /* Canonical true predicate: invert count zero. */
+ return maxelem;
+ }
+ /* Non-canonical false predicate. */
+ return 0;
+ }
+ if (p_invert) {
+ p_count = DIV_ROUND_UP(p_count, 1 << v_esz);
+ p_count = maxelem - p_count;
+ return MAX(0, p_count);
+ }
+ p_count >>= v_esz;
+ return MIN(p_count, maxelem);
+}
+
/* C.f. Arm pseudocode EncodePredCount */
static uint64_t encode_pred_count(uint32_t elements, uint32_t count,
uint32_t esz, bool invert)
@@ -3035,6 +3035,36 @@ static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
return true;
}
+static bool trans_CNTP_c(DisasContext *s, arg_CNTP_c *a)
+{
+ TCGv_i32 t_png;
+ uint32_t desc = 0;
+
+ if (dc_isar_feature(aa64_sve2p1, s)) {
+ if (!sve_access_check(s)) {
+ return true;
+ }
+ } else if (dc_isar_feature(aa64_sme2, s)) {
+ if (!sme_sm_enabled_check(s)) {
+ return true;
+ }
+ } else {
+ return false;
+ }
+
+ t_png = tcg_temp_new_i32();
+ tcg_gen_ld16u_i32(t_png, tcg_env,
+ pred_full_reg_offset(s, a->rn) ^
+ (HOST_BIG_ENDIAN ? 6 : 0));
+
+ desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
+ desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+ desc = FIELD_DP32(desc, PREDDESC, DATA, a->vl);
+
+ gen_helper_sve2p1_cntp_c(cpu_reg(s, a->rd), t_png, tcg_constant_i32(desc));
+ return true;
+}
+
static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
{
if (!dc_isar_feature(aa64_sve, s)) {
@@ -784,7 +784,8 @@ BRKN 00100101 0. 01100001 .... 0 .... 0 .... @pd_pg_pn_s
### SVE Predicate Count Group
# SVE predicate count
-CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn
+CNTP 00100101 .. 100 000 10 .... 0 .... ..... @rd_pg4_pn
+CNTP_c 00100101 esz:2 100 000 10 000 vl:1 1 rn:4 rd:5
# SVE inc/dec register by predicate count
INCDECP_r 00100101 .. 10110 d:1 10001 00 .... ..... @incdec_pred u=1
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/helper-sve.h | 1 + target/arm/tcg/sve_helper.c | 59 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-sve.c | 30 +++++++++++++++++ target/arm/tcg/sve.decode | 3 +- 4 files changed, 92 insertions(+), 1 deletion(-)