diff mbox series

[v3,34/51] target/arm: Implement PSEL

Message ID 20220620175235.60881-35-richard.henderson@linaro.org
State Superseded
Headers show
Series target/arm: Scalable Matrix Extension | expand

Commit Message

Richard Henderson June 20, 2022, 5:52 p.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/sve.decode      | 20 +++++++++++++
 target/arm/translate-sve.c | 57 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 77 insertions(+)

Comments

Peter Maydell June 24, 2022, 12:51 p.m. UTC | #1
On Mon, 20 Jun 2022 at 19:14, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

Would be helpful to note in the commit message that this is an
SVE instruction that operates using the SVE vector length but that
it is present only if SME is implemented.

> +static bool trans_PSEL(DisasContext *s, arg_psel *a)
> +{
> +    int vl = vec_full_reg_size(s);
> +    int pl = pred_gvec_reg_size(s);
> +    int elements = vl >> a->esz;
> +    TCGv_i64 tmp, didx, dbit;
> +    TCGv_ptr ptr;
> +
> +    if (!dc_isar_feature(aa64_sme, s)) {
> +        return false;
> +    }
> +    if (!sve_access_check(s)) {
> +        return true;
> +    }
> +
> +    tmp = tcg_temp_new_i64();
> +    dbit = tcg_temp_new_i64();
> +    didx = tcg_temp_new_i64();
> +    ptr = tcg_temp_new_ptr();
> +
> +    /* Compute the predicate element. */
> +    tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
> +    if (is_power_of_2(elements)) {
> +        tcg_gen_andi_i64(tmp, tmp, elements - 1);
> +    } else {
> +        tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
> +    }
> +
> +    /* Extract the predicate byte and bit indices. */
> +    tcg_gen_shli_i64(tmp, tmp, a->esz);
> +    tcg_gen_andi_i64(dbit, tmp, 7);
> +    tcg_gen_shri_i64(didx, tmp, 3);
> +    if (HOST_BIG_ENDIAN) {
> +        tcg_gen_xori_i64(didx, didx, 7);
> +    }
> +
> +    /* Load the predicate word. */
> +    tcg_gen_trunc_i64_ptr(ptr, didx);
> +    tcg_gen_add_ptr(ptr, ptr, cpu_env);
> +    tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
> +
> +    /* Extract the predicate bit and replicate to MO_64. */
> +    tcg_gen_shr_i64(tmp, tmp, dbit);
> +    tcg_gen_andi_i64(tmp, tmp, 1);
> +    tcg_gen_neg_i64(tmp, tmp);
> +
> +    /* Apply to either copy the source, or write zeros. */
> +    tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
> +                      pred_full_reg_offset(s, a->pn), tmp, pl, pl);
> +
> +    tcg_temp_free_i64(tmp);
> +    tcg_temp_free_i64(dbit);
> +    tcg_temp_free_i64(didx);
> +    tcg_temp_free_ptr(ptr);
> +    return true;
> +}

Suspect this would be clearer to read as a helper function, but
it's not that long as a series of TCG ops, I suppose.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

thanks
-- PMM
diff mbox series

Patch

diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index bbdaac6ac7..bf561c270a 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1674,3 +1674,23 @@  BFMLALT_zzxw    01100100 11 1 ..... 0100.1 ..... .....     @rrxr_3a esz=2
 
 ### SVE2 floating-point bfloat16 dot-product (indexed)
 BFDOT_zzxz      01100100 01 1 ..... 010000 ..... .....     @rrxr_2 esz=2
+
+### SVE broadcast predicate element
+
+&psel           esz pd pn pm rv imm
+%psel_rv        16:2 !function=plus_12
+%psel_imm_b     22:2 19:2
+%psel_imm_h     22:2 20:1
+%psel_imm_s     22:2
+%psel_imm_d     23:1
+@psel           ........ .. . ... .. .. pn:4 . pm:4 . pd:4  \
+                &psel rv=%psel_rv
+
+PSEL            00100101 .. 1 ..1 .. 01 .... 0 .... 0 ....  \
+                @psel esz=0 imm=%psel_imm_b
+PSEL            00100101 .. 1 .10 .. 01 .... 0 .... 0 ....  \
+                @psel esz=1 imm=%psel_imm_h
+PSEL            00100101 .. 1 100 .. 01 .... 0 .... 0 ....  \
+                @psel esz=2 imm=%psel_imm_s
+PSEL            00100101 .1 1 000 .. 01 .... 0 .... 0 ....  \
+                @psel esz=3 imm=%psel_imm_d
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index adf0cd3e68..58d0894e15 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7379,3 +7379,60 @@  static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
 
 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
+
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
+{
+    int vl = vec_full_reg_size(s);
+    int pl = pred_gvec_reg_size(s);
+    int elements = vl >> a->esz;
+    TCGv_i64 tmp, didx, dbit;
+    TCGv_ptr ptr;
+
+    if (!dc_isar_feature(aa64_sme, s)) {
+        return false;
+    }
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    tmp = tcg_temp_new_i64();
+    dbit = tcg_temp_new_i64();
+    didx = tcg_temp_new_i64();
+    ptr = tcg_temp_new_ptr();
+
+    /* Compute the predicate element. */
+    tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
+    if (is_power_of_2(elements)) {
+        tcg_gen_andi_i64(tmp, tmp, elements - 1);
+    } else {
+        tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
+    }
+
+    /* Extract the predicate byte and bit indices. */
+    tcg_gen_shli_i64(tmp, tmp, a->esz);
+    tcg_gen_andi_i64(dbit, tmp, 7);
+    tcg_gen_shri_i64(didx, tmp, 3);
+    if (HOST_BIG_ENDIAN) {
+        tcg_gen_xori_i64(didx, didx, 7);
+    }
+
+    /* Load the predicate word. */
+    tcg_gen_trunc_i64_ptr(ptr, didx);
+    tcg_gen_add_ptr(ptr, ptr, cpu_env);
+    tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
+
+    /* Extract the predicate bit and replicate to MO_64. */
+    tcg_gen_shr_i64(tmp, tmp, dbit);
+    tcg_gen_andi_i64(tmp, tmp, 1);
+    tcg_gen_neg_i64(tmp, tmp);
+
+    /* Apply to either copy the source, or write zeros. */
+    tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
+                      pred_full_reg_offset(s, a->pn), tmp, pl, pl);
+
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i64(dbit);
+    tcg_temp_free_i64(didx);
+    tcg_temp_free_ptr(ptr);
+    return true;
+}