Message ID | 20241201150607.12812-24-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | target/arm: AArch64 decodetree conversion, final part | expand |
On Sun, 1 Dec 2024 at 15:16, Richard Henderson <richard.henderson@linaro.org> wrote: > > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > target/arm/tcg/translate-a64.c | 104 ++++++++++++++++++++++----------- > target/arm/tcg/a64.decode | 7 +++ > 2 files changed, 78 insertions(+), 33 deletions(-) > > diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c > index 2d3566e45c..87731e0be4 100644 > --- a/target/arm/tcg/translate-a64.c > +++ b/target/arm/tcg/translate-a64.c > @@ -8287,6 +8287,67 @@ static bool trans_CSEL(DisasContext *s, arg_CSEL *a) > return true; > } > > +typedef struct FPScalar1Int { > + void (*gen_h)(TCGv_i32, TCGv_i32); > + void (*gen_s)(TCGv_i32, TCGv_i32); > + void (*gen_d)(TCGv_i64, TCGv_i64); > +} FPScalar1Int; > + > +static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, > + const FPScalar1Int *f) > +{ > + switch (a->esz) { > + case MO_64: > + if (fp_access_check(s)) { > + TCGv_i64 t = read_fp_dreg(s, a->rn); > + f->gen_d(t, t); > + write_fp_dreg(s, a->rd, t); > + } > + break; > + case MO_32: > + if (fp_access_check(s)) { > + TCGv_i32 t = read_fp_sreg(s, a->rn); > + f->gen_s(t, t); > + write_fp_sreg(s, a->rd, t); > + } > + break; > + case MO_16: > + if (!dc_isar_feature(aa64_fp16, s)) { > + return false; > + } > + if (fp_access_check(s)) { > + TCGv_i32 t = read_fp_hreg(s, a->rn); > + f->gen_h(t, t); > + write_fp_sreg(s, a->rd, t); > + } > + break; > + default: > + return false; > + } > + return true; > +} > + > +static const FPScalar1Int f_scalar_fmov = { > + tcg_gen_mov_i32, > + tcg_gen_mov_i32, > + tcg_gen_mov_i64, > +}; > +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) > + > +static const FPScalar1Int f_scalar_fabs = { > + gen_vfp_absh, > + gen_vfp_abss, > + gen_vfp_absd, > +}; > +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) > + > +static const FPScalar1Int f_scalar_fneg = { > + gen_vfp_negh, > + gen_vfp_negs, > + gen_vfp_negd, > +}; > +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) > + > /* Floating-point data-processing (1 source) - half precision */ > static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) > { > @@ -8295,15 +8356,6 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) > TCGv_i32 tcg_res = tcg_temp_new_i32(); > > switch (opcode) { > - case 0x0: /* FMOV */ > - tcg_gen_mov_i32(tcg_res, tcg_op); > - break; > - case 0x1: /* FABS */ > - gen_vfp_absh(tcg_res, tcg_op); > - break; > - case 0x2: /* FNEG */ > - gen_vfp_negh(tcg_res, tcg_op); > - break; > case 0x3: /* FSQRT */ > fpst = fpstatus_ptr(FPST_FPCR_F16); > gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); > @@ -8331,6 +8383,9 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) > gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); > break; > default: > + case 0x0: /* FMOV */ > + case 0x1: /* FABS */ > + case 0x2: /* FNEG */ > g_assert_not_reached(); > } > In these changes to the "handle this op" functions we make the function assert if it's passed an op we've converted. But shouldn't there also be a change which makes the calling function disas_fp_1src() call unallocated_encoding() for the ops ? > @@ -8349,15 +8404,6 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) > tcg_res = tcg_temp_new_i32(); > > switch (opcode) { > - case 0x0: /* FMOV */ > - tcg_gen_mov_i32(tcg_res, tcg_op); > - goto done; > - case 0x1: /* FABS */ > - gen_vfp_abss(tcg_res, tcg_op); > - goto done; > - case 0x2: /* FNEG */ > - gen_vfp_negs(tcg_res, tcg_op); > - goto done; > case 0x3: /* FSQRT */ > gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); > goto done; > @@ -8393,6 +8439,9 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) > gen_fpst = gen_helper_frint64_s; > break; > default: > + case 0x0: /* FMOV */ > + case 0x1: /* FABS */ > + case 0x2: /* FNEG */ > g_assert_not_reached(); > } > > @@ -8417,22 +8466,10 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) > TCGv_ptr fpst; > int rmode = -1; > > - switch (opcode) { > - case 0x0: /* FMOV */ > - gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); > - return; > - } > - > tcg_op = read_fp_dreg(s, rn); > tcg_res = tcg_temp_new_i64(); > > switch (opcode) { > - case 0x1: /* FABS */ > - gen_vfp_absd(tcg_res, tcg_op); > - goto done; > - case 0x2: /* FNEG */ > - gen_vfp_negd(tcg_res, tcg_op); > - goto done; > case 0x3: /* FSQRT */ > gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); > goto done; > @@ -8465,6 +8502,9 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) > gen_fpst = gen_helper_frint64_d; > break; > default: > + case 0x0: /* FMOV */ > + case 0x1: /* FABS */ > + case 0x2: /* FNEG */ > g_assert_not_reached(); > } > > @@ -10881,13 +10921,11 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) > case 0x7b: /* FCVTZU */ > gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); > break; > - case 0x6f: /* FNEG */ > - tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); > - break; > case 0x7d: /* FRSQRTE */ > gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); > break; > default: > + case 0x6f: /* FNEG */ > g_assert_not_reached(); > } What's this change about? This bit of decode is not in the area that corresponds to the new patterns you add to a64.decode. Is this a bug in our existing decode where we decode something that should be undef? I think that 0x6f here corresponds to the decode table in section C4.1.96.6 ("Advanced SIMD scalar two-register miscellaneous FP16"), where it is U:a:opcode == 1 1 01111. But in that table that encoding is marked unallocated. A similar thing is true for case 0x2f FABS and case 07f FSQRT. All three of these should have set only_in_vector to true and not had the code handling in the is_scalar branch of the function. We should fix that bug in a separate patch before we do the decodetree conversion, I think. > diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode > index 928e69da69..fca64c63c3 100644 > --- a/target/arm/tcg/a64.decode > +++ b/target/arm/tcg/a64.decode > @@ -47,6 +47,7 @@ > @rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1 > @rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3 > @rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd > +@rr_hsd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_hsd > > @rrr_b ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=0 > @rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1 > @@ -1321,6 +1322,12 @@ FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2 > FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h > FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2 > > +# Floating-point data processing (1 source) > + > +FMOV_s 00011110 .. 1 000000 10000 ..... ..... @rr_hsd > +FABS_s 00011110 .. 1 000001 10000 ..... ..... @rr_hsd > +FNEG_s 00011110 .. 1 000010 10000 ..... ..... @rr_hsd > + > # Floating-point Immediate > > FMOVI_s 0001 1110 .. 1 imm:8 100 00000 rd:5 esz=%esz_hsd > -- > 2.43.0 thanks -- PMM
On 12/5/24 15:12, Peter Maydell wrote: >> @@ -8295,15 +8356,6 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) >> TCGv_i32 tcg_res = tcg_temp_new_i32(); >> >> switch (opcode) { >> - case 0x0: /* FMOV */ >> - tcg_gen_mov_i32(tcg_res, tcg_op); >> - break; >> - case 0x1: /* FABS */ >> - gen_vfp_absh(tcg_res, tcg_op); >> - break; >> - case 0x2: /* FNEG */ >> - gen_vfp_negh(tcg_res, tcg_op); >> - break; >> case 0x3: /* FSQRT */ >> fpst = fpstatus_ptr(FPST_FPCR_F16); >> gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); >> @@ -8331,6 +8383,9 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) >> gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); >> break; >> default: >> + case 0x0: /* FMOV */ >> + case 0x1: /* FABS */ >> + case 0x2: /* FNEG */ >> g_assert_not_reached(); >> } >> > > In these changes to the "handle this op" functions we make the > function assert if it's passed an op we've converted. But shouldn't > there also be a change which makes the calling function disas_fp_1src() > call unallocated_encoding() for the ops ? Yes. I missed that because the line is case 0x0 ... 0x3: without the usual set of comments. >> @@ -10881,13 +10921,11 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) >> case 0x7b: /* FCVTZU */ >> gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); >> break; >> - case 0x6f: /* FNEG */ >> - tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); >> - break; >> case 0x7d: /* FRSQRTE */ >> gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); >> break; >> default: >> + case 0x6f: /* FNEG */ >> g_assert_not_reached(); >> } > > What's this change about? This bit of decode is not in the area that > corresponds to the new patterns you add to a64.decode. > > Is this a bug in our existing decode where we decode something that > should be undef? I think that 0x6f here corresponds to the decode > table in section C4.1.96.6 ("Advanced SIMD scalar two-register > miscellaneous FP16"), where it is U:a:opcode == 1 1 01111. But > in that table that encoding is marked unallocated. A similar > thing is true for case 0x2f FABS and case 07f FSQRT. All three > of these should have set only_in_vector to true and not had the > code handling in the is_scalar branch of the function. > > We should fix that bug in a separate patch before we do the > decodetree conversion, I think. You're right. I had thought there was something weird here, in that FNEG was present but not FABS. The only scalar FNEG is in fp data-processing 1-src. r~
On 12/5/24 19:52, Richard Henderson wrote: > On 12/5/24 15:12, Peter Maydell wrote: >>> @@ -8295,15 +8356,6 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int >>> rd, int rn) >>> TCGv_i32 tcg_res = tcg_temp_new_i32(); >>> >>> switch (opcode) { >>> - case 0x0: /* FMOV */ >>> - tcg_gen_mov_i32(tcg_res, tcg_op); >>> - break; >>> - case 0x1: /* FABS */ >>> - gen_vfp_absh(tcg_res, tcg_op); >>> - break; >>> - case 0x2: /* FNEG */ >>> - gen_vfp_negh(tcg_res, tcg_op); >>> - break; >>> case 0x3: /* FSQRT */ >>> fpst = fpstatus_ptr(FPST_FPCR_F16); >>> gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); >>> @@ -8331,6 +8383,9 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int >>> rd, int rn) >>> gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); >>> break; >>> default: >>> + case 0x0: /* FMOV */ >>> + case 0x1: /* FABS */ >>> + case 0x2: /* FNEG */ >>> g_assert_not_reached(); >>> } >>> >> >> In these changes to the "handle this op" functions we make the >> function assert if it's passed an op we've converted. But shouldn't >> there also be a change which makes the calling function disas_fp_1src() >> call unallocated_encoding() for the ops ? > > Yes. I missed that because the line is > > case 0x0 ... 0x3: > > without the usual set of comments. The next two patches have the same problem. All fixed now. r~
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 2d3566e45c..87731e0be4 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -8287,6 +8287,67 @@ static bool trans_CSEL(DisasContext *s, arg_CSEL *a) return true; } +typedef struct FPScalar1Int { + void (*gen_h)(TCGv_i32, TCGv_i32); + void (*gen_s)(TCGv_i32, TCGv_i32); + void (*gen_d)(TCGv_i64, TCGv_i64); +} FPScalar1Int; + +static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a, + const FPScalar1Int *f) +{ + switch (a->esz) { + case MO_64: + if (fp_access_check(s)) { + TCGv_i64 t = read_fp_dreg(s, a->rn); + f->gen_d(t, t); + write_fp_dreg(s, a->rd, t); + } + break; + case MO_32: + if (fp_access_check(s)) { + TCGv_i32 t = read_fp_sreg(s, a->rn); + f->gen_s(t, t); + write_fp_sreg(s, a->rd, t); + } + break; + case MO_16: + if (!dc_isar_feature(aa64_fp16, s)) { + return false; + } + if (fp_access_check(s)) { + TCGv_i32 t = read_fp_hreg(s, a->rn); + f->gen_h(t, t); + write_fp_sreg(s, a->rd, t); + } + break; + default: + return false; + } + return true; +} + +static const FPScalar1Int f_scalar_fmov = { + tcg_gen_mov_i32, + tcg_gen_mov_i32, + tcg_gen_mov_i64, +}; +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov) + +static const FPScalar1Int f_scalar_fabs = { + gen_vfp_absh, + gen_vfp_abss, + gen_vfp_absd, +}; +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs) + +static const FPScalar1Int f_scalar_fneg = { + gen_vfp_negh, + gen_vfp_negs, + gen_vfp_negd, +}; +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg) + /* Floating-point data-processing (1 source) - half precision */ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) { @@ -8295,15 +8356,6 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) TCGv_i32 tcg_res = tcg_temp_new_i32(); switch (opcode) { - case 0x0: /* FMOV */ - tcg_gen_mov_i32(tcg_res, tcg_op); - break; - case 0x1: /* FABS */ - gen_vfp_absh(tcg_res, tcg_op); - break; - case 0x2: /* FNEG */ - gen_vfp_negh(tcg_res, tcg_op); - break; case 0x3: /* FSQRT */ fpst = fpstatus_ptr(FPST_FPCR_F16); gen_helper_sqrt_f16(tcg_res, tcg_op, fpst); @@ -8331,6 +8383,9 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn) gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst); break; default: + case 0x0: /* FMOV */ + case 0x1: /* FABS */ + case 0x2: /* FNEG */ g_assert_not_reached(); } @@ -8349,15 +8404,6 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) tcg_res = tcg_temp_new_i32(); switch (opcode) { - case 0x0: /* FMOV */ - tcg_gen_mov_i32(tcg_res, tcg_op); - goto done; - case 0x1: /* FABS */ - gen_vfp_abss(tcg_res, tcg_op); - goto done; - case 0x2: /* FNEG */ - gen_vfp_negs(tcg_res, tcg_op); - goto done; case 0x3: /* FSQRT */ gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env); goto done; @@ -8393,6 +8439,9 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn) gen_fpst = gen_helper_frint64_s; break; default: + case 0x0: /* FMOV */ + case 0x1: /* FABS */ + case 0x2: /* FNEG */ g_assert_not_reached(); } @@ -8417,22 +8466,10 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) TCGv_ptr fpst; int rmode = -1; - switch (opcode) { - case 0x0: /* FMOV */ - gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0); - return; - } - tcg_op = read_fp_dreg(s, rn); tcg_res = tcg_temp_new_i64(); switch (opcode) { - case 0x1: /* FABS */ - gen_vfp_absd(tcg_res, tcg_op); - goto done; - case 0x2: /* FNEG */ - gen_vfp_negd(tcg_res, tcg_op); - goto done; case 0x3: /* FSQRT */ gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env); goto done; @@ -8465,6 +8502,9 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn) gen_fpst = gen_helper_frint64_d; break; default: + case 0x0: /* FMOV */ + case 0x1: /* FABS */ + case 0x2: /* FNEG */ g_assert_not_reached(); } @@ -10881,13 +10921,11 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn) case 0x7b: /* FCVTZU */ gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus); break; - case 0x6f: /* FNEG */ - tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000); - break; case 0x7d: /* FRSQRTE */ gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus); break; default: + case 0x6f: /* FNEG */ g_assert_not_reached(); } diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index 928e69da69..fca64c63c3 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -47,6 +47,7 @@ @rr_h ........ ... ..... ...... rn:5 rd:5 &rr_e esz=1 @rr_d ........ ... ..... ...... rn:5 rd:5 &rr_e esz=3 @rr_sd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_sd +@rr_hsd ........ ... ..... ...... rn:5 rd:5 &rr_e esz=%esz_hsd @rrr_b ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=0 @rrr_h ........ ... rm:5 ...... rn:5 rd:5 &rrr_e esz=1 @@ -1321,6 +1322,12 @@ FMAXV_s 0110 1110 00 11000 01111 10 ..... ..... @rr_q1e2 FMINV_h 0.00 1110 10 11000 01111 10 ..... ..... @qrr_h FMINV_s 0110 1110 10 11000 01111 10 ..... ..... @rr_q1e2 +# Floating-point data processing (1 source) + +FMOV_s 00011110 .. 1 000000 10000 ..... ..... @rr_hsd +FABS_s 00011110 .. 1 000001 10000 ..... ..... @rr_hsd +FNEG_s 00011110 .. 1 000010 10000 ..... ..... @rr_hsd + # Floating-point Immediate FMOVI_s 0001 1110 .. 1 imm:8 100 00000 rd:5 esz=%esz_hsd
Signed-off-by: Richard Henderson <richard.henderson@linaro.org> --- target/arm/tcg/translate-a64.c | 104 ++++++++++++++++++++++----------- target/arm/tcg/a64.decode | 7 +++ 2 files changed, 78 insertions(+), 33 deletions(-)