diff mbox series

[v3,34/81] target/arm: Implement SVE2 WHILERW, WHILEWR

Message ID 20200918183751.2787647-35-richard.henderson@linaro.org
State Superseded
Headers show
Series target/arm: Implement SVE2 | expand

Commit Message

Richard Henderson Sept. 18, 2020, 6:37 p.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
v2: Fix decodetree typo
---
 target/arm/sve.decode      |  3 ++
 target/arm/translate-sve.c | 62 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

Comments

LIU Zhiwei Oct. 13, 2020, 2:33 a.m. UTC | #1
On 2020/9/19 2:37, Richard Henderson wrote:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

> v2: Fix decodetree typo

> ---

>   target/arm/sve.decode      |  3 ++

>   target/arm/translate-sve.c | 62 ++++++++++++++++++++++++++++++++++++++

>   2 files changed, 65 insertions(+)

>

> diff --git a/target/arm/sve.decode b/target/arm/sve.decode

> index b7038f9f57..19d503e2f4 100644

> --- a/target/arm/sve.decode

> +++ b/target/arm/sve.decode

> @@ -702,6 +702,9 @@ CTERM           00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000

>   # SVE integer compare scalar count and limit

>   WHILE           00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4

>   

> +# SVE2 pointer conflict compare

> +WHILE_ptr       00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4

> +

>   ### SVE Integer Wide Immediate - Unpredicated Group

>   

>   # SVE broadcast floating-point immediate (unpredicated)

> diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c

> index f1bc4c63e6..d3241ce167 100644

> --- a/target/arm/translate-sve.c

> +++ b/target/arm/translate-sve.c

> @@ -3227,6 +3227,68 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)

>       return true;

>   }

>   

> +static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)

> +{

> +    TCGv_i64 op0, op1, diff, t1, tmax;

> +    TCGv_i32 t2, t3;

> +    TCGv_ptr ptr;

> +    unsigned desc, vsz = vec_full_reg_size(s);

> +

> +    if (!dc_isar_feature(aa64_sve2, s)) {

> +        return false;

> +    }

> +    if (!sve_access_check(s)) {

> +        return true;

> +    }

> +

> +    op0 = read_cpu_reg(s, a->rn, 1);

> +    op1 = read_cpu_reg(s, a->rm, 1);

> +

> +    tmax = tcg_const_i64(vsz);

> +    diff = tcg_temp_new_i64();

> +

> +    if (a->rw) {

> +        /* WHILERW */

> +        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */

> +        t1 = tcg_temp_new_i64();

> +        tcg_gen_sub_i64(diff, op0, op1);

> +        tcg_gen_sub_i64(t1, op1, op0);

> +        tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1);

It should be:

tcg_gen_movcond_i64(TCG_COND_GTU, diff, op0, op1, diff, t1);

> +        tcg_temp_free_i64(t1);

> +        /* If op1 == op0, diff == 0, and the condition is always true. */

> +        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);

> +    } else {

> +        /* WHILEWR */

> +        tcg_gen_sub_i64(diff, op1, op0);

> +        /* If op0 >= op1, diff <= 0, the condition is always true. */

> +        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);

> +    }

> +

> +    /* Bound to the maximum.  */

> +    tcg_gen_umin_i64(diff, diff, tmax);

> +    tcg_temp_free_i64(tmax);

> +

> +    /* Since we're bounded, pass as a 32-bit type.  */

> +    t2 = tcg_temp_new_i32();

> +    tcg_gen_extrl_i64_i32(t2, diff);

We should align count down to (1 << esz),

tcg_gen_andi_i32(t2,~MAKE_64BIT_MASK(0, esz));

Best Regards,
Zhiwei
> +    tcg_temp_free_i64(diff);

> +

> +    desc = (vsz / 8) - 2;

> +    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);

> +    t3 = tcg_const_i32(desc);

> +

> +    ptr = tcg_temp_new_ptr();

> +    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));

> +

> +    gen_helper_sve_whilel(t2, ptr, t2, t3);

> +    do_pred_flags(t2);

> +

> +    tcg_temp_free_ptr(ptr);

> +    tcg_temp_free_i32(t2);

> +    tcg_temp_free_i32(t3);

> +    return true;

> +}

> +

>   /*

>    *** SVE Integer Wide Immediate - Unpredicated Group

>    */
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body>
    <br>
    <br>
    <div class="moz-cite-prefix">On 2020/9/19 2:37, Richard Henderson
      wrote:<br>
    </div>
    <blockquote type="cite"
      cite="mid:20200918183751.2787647-35-richard.henderson@linaro.org">
      <pre class="moz-quote-pre" wrap="">Signed-off-by: Richard Henderson <a class="moz-txt-link-rfc2396E" href="mailto:richard.henderson@linaro.org">&lt;richard.henderson@linaro.org&gt;</a>
---
v2: Fix decodetree typo
---
 target/arm/sve.decode      |  3 ++
 target/arm/translate-sve.c | 62 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index b7038f9f57..19d503e2f4 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -702,6 +702,9 @@ CTERM           00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
 # SVE integer compare scalar count and limit
 WHILE           00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4
 
+# SVE2 pointer conflict compare
+WHILE_ptr       00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4
+
 ### SVE Integer Wide Immediate - Unpredicated Group
 
 # SVE broadcast floating-point immediate (unpredicated)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index f1bc4c63e6..d3241ce167 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -3227,6 +3227,68 @@ static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
     return true;
 }
 
+static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
+{
+    TCGv_i64 op0, op1, diff, t1, tmax;
+    TCGv_i32 t2, t3;
+    TCGv_ptr ptr;
+    unsigned desc, vsz = vec_full_reg_size(s);
+
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    op0 = read_cpu_reg(s, a-&gt;rn, 1);
+    op1 = read_cpu_reg(s, a-&gt;rm, 1);
+
+    tmax = tcg_const_i64(vsz);
+    diff = tcg_temp_new_i64();
+
+    if (a-&gt;rw) {
+        /* WHILERW */
+        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
+        t1 = tcg_temp_new_i64();
+        tcg_gen_sub_i64(diff, op0, op1);
+        tcg_gen_sub_i64(t1, op1, op0);
+        tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1);</pre>
    </blockquote>
    It should be:<br>
    <pre>tcg_gen_movcond_i64(TCG_COND_GTU, diff, op0, op1, diff, t1);</pre>
    <blockquote type="cite"
      cite="mid:20200918183751.2787647-35-richard.henderson@linaro.org">
      <pre class="moz-quote-pre" wrap="">
+        tcg_temp_free_i64(t1);
+        /* If op1 == op0, diff == 0, and the condition is always true. */
+        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
+    } else {
+        /* WHILEWR */
+        tcg_gen_sub_i64(diff, op1, op0);
+        /* If op0 &gt;= op1, diff &lt;= 0, the condition is always true. */
+        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
+    }
+
+    /* Bound to the maximum.  */
+    tcg_gen_umin_i64(diff, diff, tmax);
+    tcg_temp_free_i64(tmax);
+
+    /* Since we're bounded, pass as a 32-bit type.  */
+    t2 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t2, diff);</pre>
    </blockquote>
    We should align count down to (1 &lt;&lt; esz), <br>
    <pre>tcg_gen_andi_i32(t2,~MAKE_64BIT_MASK(0, esz));

</pre>
    Best Regards,<br>
    Zhiwei<br>
    <blockquote type="cite"
      cite="mid:20200918183751.2787647-35-richard.henderson@linaro.org">
      <pre class="moz-quote-pre" wrap="">
+    tcg_temp_free_i64(diff);
+
+    desc = (vsz / 8) - 2;
+    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a-&gt;esz);
+    t3 = tcg_const_i32(desc);
+
+    ptr = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a-&gt;rd));
+
+    gen_helper_sve_whilel(t2, ptr, t2, t3);</pre>
    </blockquote>
    <blockquote type="cite"
      cite="mid:20200918183751.2787647-35-richard.henderson@linaro.org">
      <pre class="moz-quote-pre" wrap="">
+    do_pred_flags(t2);
+
+    tcg_temp_free_ptr(ptr);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+    return true;
+}
+
 /*
  *** SVE Integer Wide Immediate - Unpredicated Group
  */
</pre>
    </blockquote>
    <br>
  </body>
</html>
Richard Henderson Oct. 19, 2020, 9:58 p.m. UTC | #2
On 10/12/20 7:33 PM, LIU Zhiwei wrote:
>> +    if (a->rw) {

>> +        /* WHILERW */

>> +        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */

>> +        t1 = tcg_temp_new_i64();

>> +        tcg_gen_sub_i64(diff, op0, op1);

>> +        tcg_gen_sub_i64(t1, op1, op0);

>> +        tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1);

> It should be:

> 

> tcg_gen_movcond_i64(TCG_COND_GTU, diff, op0, op1, diff, t1);


Yep.

> 

>> +        tcg_temp_free_i64(t1);

>> +        /* If op1 == op0, diff == 0, and the condition is always true. */

>> +        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);

>> +    } else {

>> +        /* WHILEWR */

>> +        tcg_gen_sub_i64(diff, op1, op0);

>> +        /* If op0 >= op1, diff <= 0, the condition is always true. */

>> +        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);

>> +    }

>> +

>> +    /* Bound to the maximum.  */

>> +    tcg_gen_umin_i64(diff, diff, tmax);

>> +    tcg_temp_free_i64(tmax);

>> +

>> +    /* Since we're bounded, pass as a 32-bit type.  */

>> +    t2 = tcg_temp_new_i32();

>> +    tcg_gen_extrl_i64_i32(t2, diff);

> We should align count down to (1 << esz),

> 

> tcg_gen_andi_i32(t2,~MAKE_64BIT_MASK(0, esz));


Yep, this corresponds to the "DIV (esize DIV 8)" portion of the psuedo code.
But it needs to go earlier, before we compare diff against 0 in the two movcond
above.

Will fix.  Thanks,


r~
diff mbox series

Patch

diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index b7038f9f57..19d503e2f4 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -702,6 +702,9 @@  CTERM           00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
 # SVE integer compare scalar count and limit
 WHILE           00100101 esz:2 1 rm:5 000 sf:1 u:1 lt:1 rn:5 eq:1 rd:4
 
+# SVE2 pointer conflict compare
+WHILE_ptr       00100101 esz:2 1 rm:5 001 100 rn:5 rw:1 rd:4
+
 ### SVE Integer Wide Immediate - Unpredicated Group
 
 # SVE broadcast floating-point immediate (unpredicated)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index f1bc4c63e6..d3241ce167 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -3227,6 +3227,68 @@  static bool trans_WHILE(DisasContext *s, arg_WHILE *a)
     return true;
 }
 
+static bool trans_WHILE_ptr(DisasContext *s, arg_WHILE_ptr *a)
+{
+    TCGv_i64 op0, op1, diff, t1, tmax;
+    TCGv_i32 t2, t3;
+    TCGv_ptr ptr;
+    unsigned desc, vsz = vec_full_reg_size(s);
+
+    if (!dc_isar_feature(aa64_sve2, s)) {
+        return false;
+    }
+    if (!sve_access_check(s)) {
+        return true;
+    }
+
+    op0 = read_cpu_reg(s, a->rn, 1);
+    op1 = read_cpu_reg(s, a->rm, 1);
+
+    tmax = tcg_const_i64(vsz);
+    diff = tcg_temp_new_i64();
+
+    if (a->rw) {
+        /* WHILERW */
+        /* diff = abs(op1 - op0), noting that op0/1 are unsigned. */
+        t1 = tcg_temp_new_i64();
+        tcg_gen_sub_i64(diff, op0, op1);
+        tcg_gen_sub_i64(t1, op1, op0);
+        tcg_gen_movcond_i64(TCG_COND_LTU, diff, op0, op1, diff, t1);
+        tcg_temp_free_i64(t1);
+        /* If op1 == op0, diff == 0, and the condition is always true. */
+        tcg_gen_movcond_i64(TCG_COND_EQ, diff, op0, op1, tmax, diff);
+    } else {
+        /* WHILEWR */
+        tcg_gen_sub_i64(diff, op1, op0);
+        /* If op0 >= op1, diff <= 0, the condition is always true. */
+        tcg_gen_movcond_i64(TCG_COND_GEU, diff, op0, op1, tmax, diff);
+    }
+
+    /* Bound to the maximum.  */
+    tcg_gen_umin_i64(diff, diff, tmax);
+    tcg_temp_free_i64(tmax);
+
+    /* Since we're bounded, pass as a 32-bit type.  */
+    t2 = tcg_temp_new_i32();
+    tcg_gen_extrl_i64_i32(t2, diff);
+    tcg_temp_free_i64(diff);
+
+    desc = (vsz / 8) - 2;
+    desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
+    t3 = tcg_const_i32(desc);
+
+    ptr = tcg_temp_new_ptr();
+    tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
+
+    gen_helper_sve_whilel(t2, ptr, t2, t3);
+    do_pred_flags(t2);
+
+    tcg_temp_free_ptr(ptr);
+    tcg_temp_free_i32(t2);
+    tcg_temp_free_i32(t3);
+    return true;
+}
+
 /*
  *** SVE Integer Wide Immediate - Unpredicated Group
  */