diff mbox series

[v2,085/101] target/arm: Implement CNTP (predicate as counter) for SME2/SVE2p1

Message ID 20250621235037.74091-86-richard.henderson@linaro.org
State New
Headers show
Series target/arm: Implement FEAT_SME2p1 | expand

Commit Message

Richard Henderson June 21, 2025, 11:50 p.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/helper-sve.h    |  1 +
 target/arm/tcg/sve_helper.c    | 59 ++++++++++++++++++++++++++++++++++
 target/arm/tcg/translate-sve.c | 30 +++++++++++++++++
 target/arm/tcg/sve.decode      |  3 +-
 4 files changed, 92 insertions(+), 1 deletion(-)

Comments

Richard Henderson June 22, 2025, 9:39 p.m. UTC | #1
On 6/21/25 16:50, Richard Henderson wrote:
> +uint64_t HELPER(sve2p1_cntp_c)(uint32_t png, uint32_t desc)
> +{
> +    int pl = FIELD_EX32(desc, PREDDESC, OPRSZ);
> +    int vl = pl * 8;
> +    unsigned v_esz = FIELD_EX32(desc, PREDDESC, ESZ);
> +    int lg2_width = FIELD_EX32(desc, PREDDESC, DATA) + 1;
> +    unsigned p_esz;
> +    int p_count, maxelem;
> +    bool p_invert;
> +
> +    /* C.f. Arm pseudocode CounterToPredicate. */
> +    if ((png & 0xf) == 0) {
> +        /* Canonical false predicate. */
> +        return 0;
> +    }
> +    p_esz = ctz32(png);
> +
> +    /*
> +     * maxbit = log2(pl * 4)
> +     *        = log2(vl / 8 * 4)
> +     *        = log2(vl / 2)
> +     *        = log2(vl) - 1
> +     * maxbit_mask = ones<maxbit:0>
> +     *             = (1 << (maxbit + 1)) - 1
> +     *             = (1 << (log2(vl) - 1 + 1)) - 1
> +     *             = (1 << log2(vl)) - 1
> +     *             = pow2ceil(vl) - 1
> +     * Note that we keep count in bytes, not elements.
> +     */
> +    p_count = (png & (pow2ceil(vl) - 1)) >> 1;

This is too clever for it's own good, and misses masking out the esz bit we located via 
ctz above.  All of the predicate-as-counter insns suffer the same error.

I'll put all of the counter parsing into some helper functions.


r~
diff mbox series

Patch

diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
index 906da384dc..733828a880 100644
--- a/target/arm/tcg/helper-sve.h
+++ b/target/arm/tcg/helper-sve.h
@@ -937,6 +937,7 @@  DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
+DEF_HELPER_FLAGS_2(sve2p1_cntp_c, TCG_CALL_NO_RWG_SE, i64, i32, i32)
 
 DEF_HELPER_FLAGS_3(sve_whilel, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
 DEF_HELPER_FLAGS_3(sve_whileg, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index ac38d62f04..7d6f5fbb58 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -4184,6 +4184,65 @@  uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
     return sum;
 }
 
+uint64_t HELPER(sve2p1_cntp_c)(uint32_t png, uint32_t desc)
+{
+    int pl = FIELD_EX32(desc, PREDDESC, OPRSZ);
+    int vl = pl * 8;
+    unsigned v_esz = FIELD_EX32(desc, PREDDESC, ESZ);
+    int lg2_width = FIELD_EX32(desc, PREDDESC, DATA) + 1;
+    unsigned p_esz;
+    int p_count, maxelem;
+    bool p_invert;
+
+    /* C.f. Arm pseudocode CounterToPredicate. */
+    if ((png & 0xf) == 0) {
+        /* Canonical false predicate. */
+        return 0;
+    }
+    p_esz = ctz32(png);
+
+    /*
+     * maxbit = log2(pl * 4)
+     *        = log2(vl / 8 * 4)
+     *        = log2(vl / 2)
+     *        = log2(vl) - 1
+     * maxbit_mask = ones<maxbit:0>
+     *             = (1 << (maxbit + 1)) - 1
+     *             = (1 << (log2(vl) - 1 + 1)) - 1
+     *             = (1 << log2(vl)) - 1
+     *             = pow2ceil(vl) - 1
+     * Note that we keep count in bytes, not elements.
+     */
+    p_count = (png & (pow2ceil(vl) - 1)) >> 1;
+    p_invert = (png >> 15) & 1;
+
+    /*
+     * If the esz encoded into the predicate is not larger than the
+     * vector operation esz, then the expanded predicate bit will
+     * be true for all vector elements.  If the predicate esz is
+     * larger than the vector esz, then only even multiples can be
+     * true, and the rest will be false.
+     */
+    v_esz = MAX(v_esz, p_esz);
+    maxelem = (vl << lg2_width) >> v_esz;
+
+    if (p_count == 0) {
+        if (p_invert) {
+            /* Canonical true predicate: invert count zero. */
+            return maxelem;
+        }
+        /* Non-canonical false predicate. */
+        return 0;
+    }
+    if (p_invert) {
+        p_count = DIV_ROUND_UP(p_count, 1 << v_esz);
+        p_count = maxelem - p_count;
+        return MAX(0, p_count);
+    }
+    p_count >>= v_esz;
+    return MIN(p_count, maxelem);
+}
+
 /* C.f. Arm pseudocode EncodePredCount */
 static uint64_t encode_pred_count(uint32_t elements, uint32_t count,
                                   uint32_t esz, bool invert)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index f3ac0f6300..62ace5d300 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3035,6 +3035,36 @@  static bool trans_CNTP(DisasContext *s, arg_CNTP *a)
     return true;
 }
 
+static bool trans_CNTP_c(DisasContext *s, arg_CNTP_c *a)
+{
+    TCGv_i32 t_png;
+    uint32_t desc = 0;
+
+    if (dc_isar_feature(aa64_sve2p1, s)) {
+        if (!sve_access_check(s)) {
+            return true;
+        }
+    } else if (dc_isar_feature(aa64_sme2, s)) {
+        if (!sme_sm_enabled_check(s)) {
+            return true;
+        }
+    } else {
+        return false;
+    }
+
+    t_png = tcg_temp_new_i32();
+    tcg_gen_ld16u_i32(t_png, tcg_env,
+                      pred_full_reg_offset(s, a->rn) ^
+                      (HOST_BIG_ENDIAN ? 6 : 0));
+
+    desc = FIELD_DP32(desc, PREDDESC, OPRSZ, pred_full_reg_size(s));
+    desc = FIELD_DP32(desc, PREDDESC, ESZ, a->esz);
+    desc = FIELD_DP32(desc, PREDDESC, DATA, a->vl);
+
+    gen_helper_sve2p1_cntp_c(cpu_reg(s, a->rd), t_png, tcg_constant_i32(desc));
+    return true;
+}
+
 static bool trans_INCDECP_r(DisasContext *s, arg_incdec_pred *a)
 {
     if (!dc_isar_feature(aa64_sve, s)) {
diff --git a/target/arm/tcg/sve.decode b/target/arm/tcg/sve.decode
index 0eb4fd9667..f3db790460 100644
--- a/target/arm/tcg/sve.decode
+++ b/target/arm/tcg/sve.decode
@@ -784,7 +784,8 @@  BRKN            00100101 0. 01100001 .... 0 .... 0 ....         @pd_pg_pn_s
 ### SVE Predicate Count Group
 
 # SVE predicate count
-CNTP            00100101 .. 100 000 10 .... 0 .... .....        @rd_pg4_pn
+CNTP            00100101 ..    100 000 10 ....     0 .... ..... @rd_pg4_pn
+CNTP_c          00100101 esz:2 100 000 10 000 vl:1 1 rn:4 rd:5
 
 # SVE inc/dec register by predicate count
 INCDECP_r       00100101 .. 10110 d:1 10001 00 .... .....     @incdec_pred u=1