Message ID | 20180427002651.28356-8-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/arm: Implement v8.1-Atomics | expand |
On 27 April 2018 at 01:26, Richard Henderson <richard.henderson@linaro.org> wrote: > This implements all of the v8.1-Atomics instructions except > for compare-and-swap, which is decoded elsewhere. > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/translate-a64.c | 38 ++++++++++++++++++++++++++++++++++++-- > 1 file changed, 36 insertions(+), 2 deletions(-) > > diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c > index 0706c8c394..6ed7627d79 100644 > --- a/target/arm/translate-a64.c > +++ b/target/arm/translate-a64.c > @@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); > typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); > typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); > typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); > +typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp); > > /* Note that the gvec expanders operate on offsets + sizes. */ > typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t); > @@ -2772,6 +2773,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, > int rn = extract32(insn, 5, 5); > int o3_opc = extract32(insn, 12, 4); > int feature = ARM_FEATURE_V8_ATOMICS; > + TCGv_i64 tcg_rn, tcg_rs; > + AtomicThreeOpFn *fn; > > if (is_vector) { > unallocated_encoding(s); > @@ -2779,14 +2782,32 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, > } > switch (o3_opc) { > case 000: /* LDADD */ > + fn = tcg_gen_atomic_fetch_add_i64; > + break; > case 001: /* LDCLR */ > + fn = tcg_gen_atomic_fetch_and_i64; > + break; > case 002: /* LDEOR */ > + fn = tcg_gen_atomic_fetch_xor_i64; > + break; > case 003: /* LDSET */ > + fn = tcg_gen_atomic_fetch_or_i64; > + break; > case 004: /* LDSMAX */ > + fn = tcg_gen_atomic_fetch_smax_i64; > + break; > case 005: /* LDSMIN */ > + fn = tcg_gen_atomic_fetch_smin_i64; > + break; > case 006: /* LDUMAX */ > + fn = tcg_gen_atomic_fetch_umax_i64; > + break; > case 007: /* LDUMIN */ > + fn = tcg_gen_atomic_fetch_umin_i64; > + break; > case 010: /* SWP */ > + fn = tcg_gen_atomic_xchg_i64; > + break; > default: > unallocated_encoding(s); > return; > @@ -2796,8 +2817,21 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, > return; > } > > - (void)rs; > - (void)rn; > + if (rn == 31) { > + gen_check_sp_alignment(s); > + } > + tcg_rn = cpu_reg_sp(s, rn); > + tcg_rs = read_cpu_reg(s, rs, false); > + > + if (o3_opc == 1) { /* LDCLR */ > + tcg_gen_not_i64(tcg_rs, tcg_rs); > + } > + > + /* The tcg atomic primitives are all full barriers. Therefore we > + * can ignore the Acquire and Release bits of this instruction. > + */ > + fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s), > + s->be_data | size | MO_ALIGN); Does this definitely do the arithmetic operation at the datatype size and not the _i64 size ? (It makes a difference for example with LDEORB if Rs has high bits set: the result should always have [31:8] zero.) Still missing LDAPR*, but otherwise Reviewed-by: Peter Maydell <peter.maydell@linaro.org> > } > > /* Load/store register (all forms) */ > -- > 2.14.3 thanks -- PMM
On 05/03/2018 07:14 AM, Peter Maydell wrote: >> + /* The tcg atomic primitives are all full barriers. Therefore we >> + * can ignore the Acquire and Release bits of this instruction. >> + */ >> + fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s), >> + s->be_data | size | MO_ALIGN); > > Does this definitely do the arithmetic operation at the datatype > size and not the _i64 size ? (It makes a difference for example > with LDEORB if Rs has high bits set: the result should always > have [31:8] zero.) Yes. Also recall that this returns the original data not the result of the expression. r~
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c index 0706c8c394..6ed7627d79 100644 --- a/target/arm/translate-a64.c +++ b/target/arm/translate-a64.c @@ -84,6 +84,7 @@ typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64); typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr); typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32); typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr); +typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp); /* Note that the gvec expanders operate on offsets + sizes. */ typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t); @@ -2772,6 +2773,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int rn = extract32(insn, 5, 5); int o3_opc = extract32(insn, 12, 4); int feature = ARM_FEATURE_V8_ATOMICS; + TCGv_i64 tcg_rn, tcg_rs; + AtomicThreeOpFn *fn; if (is_vector) { unallocated_encoding(s); @@ -2779,14 +2782,32 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, } switch (o3_opc) { case 000: /* LDADD */ + fn = tcg_gen_atomic_fetch_add_i64; + break; case 001: /* LDCLR */ + fn = tcg_gen_atomic_fetch_and_i64; + break; case 002: /* LDEOR */ + fn = tcg_gen_atomic_fetch_xor_i64; + break; case 003: /* LDSET */ + fn = tcg_gen_atomic_fetch_or_i64; + break; case 004: /* LDSMAX */ + fn = tcg_gen_atomic_fetch_smax_i64; + break; case 005: /* LDSMIN */ + fn = tcg_gen_atomic_fetch_smin_i64; + break; case 006: /* LDUMAX */ + fn = tcg_gen_atomic_fetch_umax_i64; + break; case 007: /* LDUMIN */ + fn = tcg_gen_atomic_fetch_umin_i64; + break; case 010: /* SWP */ + fn = tcg_gen_atomic_xchg_i64; + break; default: unallocated_encoding(s); return; @@ -2796,8 +2817,21 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, return; } - (void)rs; - (void)rn; + if (rn == 31) { + gen_check_sp_alignment(s); + } + tcg_rn = cpu_reg_sp(s, rn); + tcg_rs = read_cpu_reg(s, rs, false); + + if (o3_opc == 1) { /* LDCLR */ + tcg_gen_not_i64(tcg_rs, tcg_rs); + } + + /* The tcg atomic primitives are all full barriers. Therefore we + * can ignore the Acquire and Release bits of this instruction. + */ + fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s), + s->be_data | size | MO_ALIGN); } /* Load/store register (all forms) */
This implements all of the v8.1-Atomics instructions except for compare-and-swap, which is decoded elsewhere. Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/translate-a64.c | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) -- 2.14.3