diff mbox series

[v5,32/36] target/s390x: Use tcg_gen_atomic_cmpxchg_i128 for CDSG

Message ID 20230126043824.54819-33-richard.henderson@linaro.org
State New
Headers show
Series tcg: Support for Int128 with helpers | expand

Commit Message

Richard Henderson Jan. 26, 2023, 4:38 a.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
Cc: David Hildenbrand <david@redhat.com>
Cc: Ilya Leoshkevich <iii@linux.ibm.com>
---
 target/s390x/helper.h            |  2 --
 target/s390x/tcg/insn-data.h.inc |  2 +-
 target/s390x/tcg/mem_helper.c    | 52 ---------------------------
 target/s390x/tcg/translate.c     | 60 ++++++++++++++++++++------------
 4 files changed, 38 insertions(+), 78 deletions(-)

Comments

David Hildenbrand Jan. 26, 2023, 11:27 a.m. UTC | #1
>   static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
> @@ -5419,6 +5410,14 @@ static void prep_r1_P(DisasContext *s, DisasOps *o)
>   }
>   #define SPEC_prep_r1_P SPEC_r1_even
>   
> +static void prep_r1_D64(DisasContext *s, DisasOps *o)
> +{
> +    int r1 = get_field(s, r1);
> +    o->out_128 = tcg_temp_new_i128();
> +    tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);

I really wonder if we should simply move the tcg_gen_concat_i64_i128() 
into the op and use a generic "allocate out_128" instead.

At least that part here confused me heavily.

Apart from that: LGTM although its a bit like black magic :D
Richard Henderson Jan. 26, 2023, 9:01 p.m. UTC | #2
On 1/26/23 01:27, David Hildenbrand wrote:
>>   static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
>> @@ -5419,6 +5410,14 @@ static void prep_r1_P(DisasContext *s, DisasOps *o)
>>   }
>>   #define SPEC_prep_r1_P SPEC_r1_even
>> +static void prep_r1_D64(DisasContext *s, DisasOps *o)
>> +{
>> +    int r1 = get_field(s, r1);
>> +    o->out_128 = tcg_temp_new_i128();
>> +    tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);
> 
> I really wonder if we should simply move the tcg_gen_concat_i64_i128() into the op and use 
> a generic "allocate out_128" instead.
> 
> At least that part here confused me heavily.

Just the prep_r1_D64 bit of it?

Better for you as


     C(0xeb3e, CDSG,    RSY_a, Z,   la2, r3_D64, 0, r1_D64, cdsg, 0)


  static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
  {
      int r1 = get_field(s, r1);
+    o->out_128 = tcg_temp_new_i128();
+    tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);


The existing generic "allocate out_128" is named "new_x", which I thought was also 
confusing, since this isn't an "x" format operation.  Since there's only one use, I did it 
inline.


r~
David Hildenbrand Jan. 27, 2023, 4:09 p.m. UTC | #3
On 26.01.23 22:01, Richard Henderson wrote:
> On 1/26/23 01:27, David Hildenbrand wrote:
>>>    static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
>>> @@ -5419,6 +5410,14 @@ static void prep_r1_P(DisasContext *s, DisasOps *o)
>>>    }
>>>    #define SPEC_prep_r1_P SPEC_r1_even
>>> +static void prep_r1_D64(DisasContext *s, DisasOps *o)
>>> +{
>>> +    int r1 = get_field(s, r1);
>>> +    o->out_128 = tcg_temp_new_i128();
>>> +    tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);
>>
>> I really wonder if we should simply move the tcg_gen_concat_i64_i128() into the op and use
>> a generic "allocate out_128" instead.
>>
>> At least that part here confused me heavily.
> 
> Just the prep_r1_D64 bit of it?

Yes.

> 
> Better for you as
> 
> 
>       C(0xeb3e, CDSG,    RSY_a, Z,   la2, r3_D64, 0, r1_D64, cdsg, 0)
> 
> 
>    static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
>    {
>        int r1 = get_field(s, r1);
> +    o->out_128 = tcg_temp_new_i128();
> +    tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);
> 
> 
> The existing generic "allocate out_128" is named "new_x", which I thought was also
> confusing, since this isn't an "x" format operation.  Since there's only one use, I did it
> inline.

LGTM
diff mbox series

Patch

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index bccd3bfca6..341bc51ec2 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -35,8 +35,6 @@  DEF_HELPER_3(cxgb, i128, env, s64, i32)
 DEF_HELPER_3(celgb, i64, env, i64, i32)
 DEF_HELPER_3(cdlgb, i64, env, i64, i32)
 DEF_HELPER_3(cxlgb, i128, env, i64, i32)
-DEF_HELPER_4(cdsg, void, env, i64, i32, i32)
-DEF_HELPER_4(cdsg_parallel, void, env, i64, i32, i32)
 DEF_HELPER_4(csst, i32, env, i32, i64, i64)
 DEF_HELPER_4(csst_parallel, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_3(aeb, TCG_CALL_NO_WG, i64, env, i64, i64)
diff --git a/target/s390x/tcg/insn-data.h.inc b/target/s390x/tcg/insn-data.h.inc
index 893f4b48db..ea34b4a277 100644
--- a/target/s390x/tcg/insn-data.h.inc
+++ b/target/s390x/tcg/insn-data.h.inc
@@ -276,7 +276,7 @@ 
 /* COMPARE DOUBLE AND SWAP */
     D(0xbb00, CDS,     RS_a,  Z,   r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
     D(0xeb31, CDSY,    RSY_a, LD,  r3_D32, r1_D32, new, r1_D32, cs, 0, MO_TEUQ)
-    C(0xeb3e, CDSG,    RSY_a, Z,   0, 0, 0, 0, cdsg, 0)
+    C(0xeb3e, CDSG,    RSY_a, Z,   la2, r3_D64, r1_D64, r1_D64, cdsg, 0)
 /* COMPARE AND SWAP AND STORE */
     C(0xc802, CSST,    SSF,   CASS, la1, a2, 0, 0, csst, 0)
 
diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
index 49969abda7..d6725fd18c 100644
--- a/target/s390x/tcg/mem_helper.c
+++ b/target/s390x/tcg/mem_helper.c
@@ -1771,58 +1771,6 @@  uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
     return cc;
 }
 
-void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
-                  uint32_t r1, uint32_t r3)
-{
-    uintptr_t ra = GETPC();
-    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
-    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
-    Int128 oldv;
-    uint64_t oldh, oldl;
-    bool fail;
-
-    check_alignment(env, addr, 16, ra);
-
-    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
-    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
-
-    oldv = int128_make128(oldl, oldh);
-    fail = !int128_eq(oldv, cmpv);
-    if (fail) {
-        newv = oldv;
-    }
-
-    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
-    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
-
-    env->cc_op = fail;
-    env->regs[r1] = int128_gethi(oldv);
-    env->regs[r1 + 1] = int128_getlo(oldv);
-}
-
-void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
-                           uint32_t r1, uint32_t r3)
-{
-    uintptr_t ra = GETPC();
-    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
-    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
-    int mem_idx;
-    MemOpIdx oi;
-    Int128 oldv;
-    bool fail;
-
-    assert(HAVE_CMPXCHG128);
-
-    mem_idx = cpu_mmu_index(env, false);
-    oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
-    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
-    fail = !int128_eq(oldv, cmpv);
-
-    env->cc_op = fail;
-    env->regs[r1] = int128_gethi(oldv);
-    env->regs[r1 + 1] = int128_getlo(oldv);
-}
-
 static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
                         uint64_t a2, bool parallel)
 {
diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
index d422a1e62b..0dafa27dab 100644
--- a/target/s390x/tcg/translate.c
+++ b/target/s390x/tcg/translate.c
@@ -2224,31 +2224,22 @@  static DisasJumpType op_cs(DisasContext *s, DisasOps *o)
 static DisasJumpType op_cdsg(DisasContext *s, DisasOps *o)
 {
     int r1 = get_field(s, r1);
-    int r3 = get_field(s, r3);
-    int d2 = get_field(s, d2);
-    int b2 = get_field(s, b2);
-    DisasJumpType ret = DISAS_NEXT;
-    TCGv_i64 addr;
-    TCGv_i32 t_r1, t_r3;
 
-    /* Note that R1:R1+1 = expected value and R3:R3+1 = new value.  */
-    addr = get_address(s, 0, b2, d2);
-    t_r1 = tcg_const_i32(r1);
-    t_r3 = tcg_const_i32(r3);
-    if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
-        gen_helper_cdsg(cpu_env, addr, t_r1, t_r3);
-    } else if (HAVE_CMPXCHG128) {
-        gen_helper_cdsg_parallel(cpu_env, addr, t_r1, t_r3);
-    } else {
-        gen_helper_exit_atomic(cpu_env);
-        ret = DISAS_NORETURN;
-    }
-    tcg_temp_free_i64(addr);
-    tcg_temp_free_i32(t_r1);
-    tcg_temp_free_i32(t_r3);
+    /* Note out (R1:R1+1) = expected value and in2 (R3:R3+1) = new value.  */
+    tcg_gen_atomic_cmpxchg_i128(o->out_128, o->addr1, o->out_128, o->in2_128,
+                                get_mem_index(s), MO_BE | MO_128 | MO_ALIGN);
 
-    set_cc_static(s);
-    return ret;
+    /*
+     * Extract result into cc_dst:cc_src, compare vs the expected value
+     * in the as yet unmodified input registers, then update CC_OP.
+     */
+    tcg_gen_extr_i128_i64(cc_src, cc_dst, o->out_128);
+    tcg_gen_xor_i64(cc_dst, cc_dst, regs[r1]);
+    tcg_gen_xor_i64(cc_src, cc_src, regs[r1 + 1]);
+    tcg_gen_or_i64(cc_dst, cc_dst, cc_src);
+    set_cc_nz_u64(s, cc_dst);
+
+    return DISAS_NEXT;
 }
 
 static DisasJumpType op_csst(DisasContext *s, DisasOps *o)
@@ -5419,6 +5410,14 @@  static void prep_r1_P(DisasContext *s, DisasOps *o)
 }
 #define SPEC_prep_r1_P SPEC_r1_even
 
+static void prep_r1_D64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    o->out_128 = tcg_temp_new_i128();
+    tcg_gen_concat_i64_i128(o->out_128, regs[r1 + 1], regs[r1]);
+}
+#define SPEC_prep_r1_D64 SPEC_r1_even
+
 static void prep_x1(DisasContext *s, DisasOps *o)
 {
     o->out_128 = load_freg_128(get_field(s, r1));
@@ -5488,6 +5487,13 @@  static void wout_r1_D32(DisasContext *s, DisasOps *o)
 }
 #define SPEC_wout_r1_D32 SPEC_r1_even
 
+static void wout_r1_D64(DisasContext *s, DisasOps *o)
+{
+    int r1 = get_field(s, r1);
+    tcg_gen_extr_i128_i64(regs[r1 + 1], regs[r1], o->out_128);
+}
+#define SPEC_wout_r1_D64 SPEC_r1_even
+
 static void wout_r3_P32(DisasContext *s, DisasOps *o)
 {
     int r3 = get_field(s, r3);
@@ -5935,6 +5941,14 @@  static void in2_r3(DisasContext *s, DisasOps *o)
 }
 #define SPEC_in2_r3 0
 
+static void in2_r3_D64(DisasContext *s, DisasOps *o)
+{
+    int r3 = get_field(s, r3);
+    o->in2_128 = tcg_temp_new_i128();
+    tcg_gen_concat_i64_i128(o->in2_128, regs[r3 + 1], regs[r3]);
+}
+#define SPEC_in2_r3_D64 SPEC_r3_even
+
 static void in2_r3_sr32(DisasContext *s, DisasOps *o)
 {
     o->in2 = tcg_temp_new_i64();