diff mbox series

[23/67] target/arm: Convert FMOV, FABS, FNEG (scalar) to decodetree

Message ID 20241201150607.12812-24-richard.henderson@linaro.org
State Superseded
Headers show
Series target/arm: AArch64 decodetree conversion, final part | expand

Commit Message

Richard Henderson Dec. 1, 2024, 3:05 p.m. UTC
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-a64.c | 104 ++++++++++++++++++++++-----------
 target/arm/tcg/a64.decode      |   7 +++
 2 files changed, 78 insertions(+), 33 deletions(-)

Comments

Peter Maydell Dec. 5, 2024, 9:12 p.m. UTC | #1
On Sun, 1 Dec 2024 at 15:16, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  target/arm/tcg/translate-a64.c | 104 ++++++++++++++++++++++-----------
>  target/arm/tcg/a64.decode      |   7 +++
>  2 files changed, 78 insertions(+), 33 deletions(-)
>
> diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
> index 2d3566e45c..87731e0be4 100644
> --- a/target/arm/tcg/translate-a64.c
> +++ b/target/arm/tcg/translate-a64.c
> @@ -8287,6 +8287,67 @@ static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
>      return true;
>  }
>
> +typedef struct FPScalar1Int {
> +    void (*gen_h)(TCGv_i32, TCGv_i32);
> +    void (*gen_s)(TCGv_i32, TCGv_i32);
> +    void (*gen_d)(TCGv_i64, TCGv_i64);
> +} FPScalar1Int;
> +
> +static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
> +                              const FPScalar1Int *f)
> +{
> +    switch (a->esz) {
> +    case MO_64:
> +        if (fp_access_check(s)) {
> +            TCGv_i64 t = read_fp_dreg(s, a->rn);
> +            f->gen_d(t, t);
> +            write_fp_dreg(s, a->rd, t);
> +        }
> +        break;
> +    case MO_32:
> +        if (fp_access_check(s)) {
> +            TCGv_i32 t = read_fp_sreg(s, a->rn);
> +            f->gen_s(t, t);
> +            write_fp_sreg(s, a->rd, t);
> +        }
> +        break;
> +    case MO_16:
> +        if (!dc_isar_feature(aa64_fp16, s)) {
> +            return false;
> +        }
> +        if (fp_access_check(s)) {
> +            TCGv_i32 t = read_fp_hreg(s, a->rn);
> +            f->gen_h(t, t);
> +            write_fp_sreg(s, a->rd, t);
> +        }
> +        break;
> +    default:
> +        return false;
> +    }
> +    return true;
> +}
> +
> +static const FPScalar1Int f_scalar_fmov = {
> +    tcg_gen_mov_i32,
> +    tcg_gen_mov_i32,
> +    tcg_gen_mov_i64,
> +};
> +TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov)
> +
> +static const FPScalar1Int f_scalar_fabs = {
> +    gen_vfp_absh,
> +    gen_vfp_abss,
> +    gen_vfp_absd,
> +};
> +TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs)
> +
> +static const FPScalar1Int f_scalar_fneg = {
> +    gen_vfp_negh,
> +    gen_vfp_negs,
> +    gen_vfp_negd,
> +};
> +TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg)
> +
>  /* Floating-point data-processing (1 source) - half precision */
>  static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
>  {
> @@ -8295,15 +8356,6 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
>      TCGv_i32 tcg_res = tcg_temp_new_i32();
>
>      switch (opcode) {
> -    case 0x0: /* FMOV */
> -        tcg_gen_mov_i32(tcg_res, tcg_op);
> -        break;
> -    case 0x1: /* FABS */
> -        gen_vfp_absh(tcg_res, tcg_op);
> -        break;
> -    case 0x2: /* FNEG */
> -        gen_vfp_negh(tcg_res, tcg_op);
> -        break;
>      case 0x3: /* FSQRT */
>          fpst = fpstatus_ptr(FPST_FPCR_F16);
>          gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
> @@ -8331,6 +8383,9 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
>          gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
>          break;
>      default:
> +    case 0x0: /* FMOV */
> +    case 0x1: /* FABS */
> +    case 0x2: /* FNEG */
>          g_assert_not_reached();
>      }
>

In these changes to the "handle this op" functions we make the
function assert if it's passed an op we've converted. But shouldn't
there also be a change which makes the calling function disas_fp_1src()
call unallocated_encoding() for the ops ?

> @@ -8349,15 +8404,6 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
>      tcg_res = tcg_temp_new_i32();
>
>      switch (opcode) {
> -    case 0x0: /* FMOV */
> -        tcg_gen_mov_i32(tcg_res, tcg_op);
> -        goto done;
> -    case 0x1: /* FABS */
> -        gen_vfp_abss(tcg_res, tcg_op);
> -        goto done;
> -    case 0x2: /* FNEG */
> -        gen_vfp_negs(tcg_res, tcg_op);
> -        goto done;
>      case 0x3: /* FSQRT */
>          gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
>          goto done;
> @@ -8393,6 +8439,9 @@ static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
>          gen_fpst = gen_helper_frint64_s;
>          break;
>      default:
> +    case 0x0: /* FMOV */
> +    case 0x1: /* FABS */
> +    case 0x2: /* FNEG */
>          g_assert_not_reached();
>      }
>
> @@ -8417,22 +8466,10 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
>      TCGv_ptr fpst;
>      int rmode = -1;
>
> -    switch (opcode) {
> -    case 0x0: /* FMOV */
> -        gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
> -        return;
> -    }
> -
>      tcg_op = read_fp_dreg(s, rn);
>      tcg_res = tcg_temp_new_i64();
>
>      switch (opcode) {
> -    case 0x1: /* FABS */
> -        gen_vfp_absd(tcg_res, tcg_op);
> -        goto done;
> -    case 0x2: /* FNEG */
> -        gen_vfp_negd(tcg_res, tcg_op);
> -        goto done;
>      case 0x3: /* FSQRT */
>          gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
>          goto done;
> @@ -8465,6 +8502,9 @@ static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
>          gen_fpst = gen_helper_frint64_d;
>          break;
>      default:
> +    case 0x0: /* FMOV */
> +    case 0x1: /* FABS */
> +    case 0x2: /* FNEG */
>          g_assert_not_reached();
>      }
>
> @@ -10881,13 +10921,11 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
>          case 0x7b: /* FCVTZU */
>              gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
>              break;
> -        case 0x6f: /* FNEG */
> -            tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
> -            break;
>          case 0x7d: /* FRSQRTE */
>              gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
>              break;
>          default:
> +        case 0x6f: /* FNEG */
>              g_assert_not_reached();
>          }

What's this change about? This bit of decode is not in the area that
corresponds to the new patterns you add to a64.decode.

Is this a bug in our existing decode where we decode something that
should be undef? I think that 0x6f here corresponds to the decode
table in section C4.1.96.6 ("Advanced SIMD scalar two-register
miscellaneous FP16"), where it is U:a:opcode == 1 1 01111. But
in that table that encoding is marked unallocated. A similar
thing is true for case 0x2f FABS and case 07f FSQRT. All three
of these should have set only_in_vector to true and not had the
code handling in the is_scalar branch of the function.

We should fix that bug in a separate patch before we do the
decodetree conversion, I think.

> diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
> index 928e69da69..fca64c63c3 100644
> --- a/target/arm/tcg/a64.decode
> +++ b/target/arm/tcg/a64.decode
> @@ -47,6 +47,7 @@
>  @rr_h           ........ ... ..... ...... rn:5 rd:5     &rr_e esz=1
>  @rr_d           ........ ... ..... ...... rn:5 rd:5     &rr_e esz=3
>  @rr_sd          ........ ... ..... ...... rn:5 rd:5     &rr_e esz=%esz_sd
> +@rr_hsd         ........ ... ..... ...... rn:5 rd:5     &rr_e esz=%esz_hsd
>
>  @rrr_b          ........ ... rm:5 ...... rn:5 rd:5      &rrr_e esz=0
>  @rrr_h          ........ ... rm:5 ...... rn:5 rd:5      &rrr_e esz=1
> @@ -1321,6 +1322,12 @@ FMAXV_s         0110 1110 00 11000 01111 10 ..... .....     @rr_q1e2
>  FMINV_h         0.00 1110 10 11000 01111 10 ..... .....     @qrr_h
>  FMINV_s         0110 1110 10 11000 01111 10 ..... .....     @rr_q1e2
>
> +# Floating-point data processing (1 source)
> +
> +FMOV_s          00011110 .. 1 000000 10000 ..... .....      @rr_hsd
> +FABS_s          00011110 .. 1 000001 10000 ..... .....      @rr_hsd
> +FNEG_s          00011110 .. 1 000010 10000 ..... .....      @rr_hsd
> +
>  # Floating-point Immediate
>
>  FMOVI_s         0001 1110 .. 1 imm:8 100 00000 rd:5         esz=%esz_hsd
> --
> 2.43.0

thanks
-- PMM
Richard Henderson Dec. 6, 2024, 1:52 a.m. UTC | #2
On 12/5/24 15:12, Peter Maydell wrote:
>> @@ -8295,15 +8356,6 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
>>       TCGv_i32 tcg_res = tcg_temp_new_i32();
>>
>>       switch (opcode) {
>> -    case 0x0: /* FMOV */
>> -        tcg_gen_mov_i32(tcg_res, tcg_op);
>> -        break;
>> -    case 0x1: /* FABS */
>> -        gen_vfp_absh(tcg_res, tcg_op);
>> -        break;
>> -    case 0x2: /* FNEG */
>> -        gen_vfp_negh(tcg_res, tcg_op);
>> -        break;
>>       case 0x3: /* FSQRT */
>>           fpst = fpstatus_ptr(FPST_FPCR_F16);
>>           gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
>> @@ -8331,6 +8383,9 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
>>           gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
>>           break;
>>       default:
>> +    case 0x0: /* FMOV */
>> +    case 0x1: /* FABS */
>> +    case 0x2: /* FNEG */
>>           g_assert_not_reached();
>>       }
>>
> 
> In these changes to the "handle this op" functions we make the
> function assert if it's passed an op we've converted. But shouldn't
> there also be a change which makes the calling function disas_fp_1src()
> call unallocated_encoding() for the ops ?

Yes.  I missed that because the line is

     case 0x0 ... 0x3:

without the usual set of comments.

>> @@ -10881,13 +10921,11 @@ static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
>>           case 0x7b: /* FCVTZU */
>>               gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
>>               break;
>> -        case 0x6f: /* FNEG */
>> -            tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
>> -            break;
>>           case 0x7d: /* FRSQRTE */
>>               gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
>>               break;
>>           default:
>> +        case 0x6f: /* FNEG */
>>               g_assert_not_reached();
>>           }
> 
> What's this change about? This bit of decode is not in the area that
> corresponds to the new patterns you add to a64.decode.
> 
> Is this a bug in our existing decode where we decode something that
> should be undef? I think that 0x6f here corresponds to the decode
> table in section C4.1.96.6 ("Advanced SIMD scalar two-register
> miscellaneous FP16"), where it is U:a:opcode == 1 1 01111. But
> in that table that encoding is marked unallocated. A similar
> thing is true for case 0x2f FABS and case 07f FSQRT. All three
> of these should have set only_in_vector to true and not had the
> code handling in the is_scalar branch of the function.
> 
> We should fix that bug in a separate patch before we do the
> decodetree conversion, I think.

You're right.  I had thought there was something weird here, in that FNEG was present but 
not FABS.  The only scalar FNEG is in fp data-processing 1-src.


r~
Richard Henderson Dec. 6, 2024, 2:34 a.m. UTC | #3
On 12/5/24 19:52, Richard Henderson wrote:
> On 12/5/24 15:12, Peter Maydell wrote:
>>> @@ -8295,15 +8356,6 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int 
>>> rd, int rn)
>>>       TCGv_i32 tcg_res = tcg_temp_new_i32();
>>>
>>>       switch (opcode) {
>>> -    case 0x0: /* FMOV */
>>> -        tcg_gen_mov_i32(tcg_res, tcg_op);
>>> -        break;
>>> -    case 0x1: /* FABS */
>>> -        gen_vfp_absh(tcg_res, tcg_op);
>>> -        break;
>>> -    case 0x2: /* FNEG */
>>> -        gen_vfp_negh(tcg_res, tcg_op);
>>> -        break;
>>>       case 0x3: /* FSQRT */
>>>           fpst = fpstatus_ptr(FPST_FPCR_F16);
>>>           gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
>>> @@ -8331,6 +8383,9 @@ static void handle_fp_1src_half(DisasContext *s, int opcode, int 
>>> rd, int rn)
>>>           gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
>>>           break;
>>>       default:
>>> +    case 0x0: /* FMOV */
>>> +    case 0x1: /* FABS */
>>> +    case 0x2: /* FNEG */
>>>           g_assert_not_reached();
>>>       }
>>>
>>
>> In these changes to the "handle this op" functions we make the
>> function assert if it's passed an op we've converted. But shouldn't
>> there also be a change which makes the calling function disas_fp_1src()
>> call unallocated_encoding() for the ops ?
> 
> Yes.  I missed that because the line is
> 
>      case 0x0 ... 0x3:
> 
> without the usual set of comments.

The next two patches have the same problem.  All fixed now.

r~
diff mbox series

Patch

diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 2d3566e45c..87731e0be4 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -8287,6 +8287,67 @@  static bool trans_CSEL(DisasContext *s, arg_CSEL *a)
     return true;
 }
 
+typedef struct FPScalar1Int {
+    void (*gen_h)(TCGv_i32, TCGv_i32);
+    void (*gen_s)(TCGv_i32, TCGv_i32);
+    void (*gen_d)(TCGv_i64, TCGv_i64);
+} FPScalar1Int;
+
+static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
+                              const FPScalar1Int *f)
+{
+    switch (a->esz) {
+    case MO_64:
+        if (fp_access_check(s)) {
+            TCGv_i64 t = read_fp_dreg(s, a->rn);
+            f->gen_d(t, t);
+            write_fp_dreg(s, a->rd, t);
+        }
+        break;
+    case MO_32:
+        if (fp_access_check(s)) {
+            TCGv_i32 t = read_fp_sreg(s, a->rn);
+            f->gen_s(t, t);
+            write_fp_sreg(s, a->rd, t);
+        }
+        break;
+    case MO_16:
+        if (!dc_isar_feature(aa64_fp16, s)) {
+            return false;
+        }
+        if (fp_access_check(s)) {
+            TCGv_i32 t = read_fp_hreg(s, a->rn);
+            f->gen_h(t, t);
+            write_fp_sreg(s, a->rd, t);
+        }
+        break;
+    default:
+        return false;
+    }
+    return true;
+}
+
+static const FPScalar1Int f_scalar_fmov = {
+    tcg_gen_mov_i32,
+    tcg_gen_mov_i32,
+    tcg_gen_mov_i64,
+};
+TRANS(FMOV_s, do_fp1_scalar_int, a, &f_scalar_fmov)
+
+static const FPScalar1Int f_scalar_fabs = {
+    gen_vfp_absh,
+    gen_vfp_abss,
+    gen_vfp_absd,
+};
+TRANS(FABS_s, do_fp1_scalar_int, a, &f_scalar_fabs)
+
+static const FPScalar1Int f_scalar_fneg = {
+    gen_vfp_negh,
+    gen_vfp_negs,
+    gen_vfp_negd,
+};
+TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg)
+
 /* Floating-point data-processing (1 source) - half precision */
 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
 {
@@ -8295,15 +8356,6 @@  static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
     TCGv_i32 tcg_res = tcg_temp_new_i32();
 
     switch (opcode) {
-    case 0x0: /* FMOV */
-        tcg_gen_mov_i32(tcg_res, tcg_op);
-        break;
-    case 0x1: /* FABS */
-        gen_vfp_absh(tcg_res, tcg_op);
-        break;
-    case 0x2: /* FNEG */
-        gen_vfp_negh(tcg_res, tcg_op);
-        break;
     case 0x3: /* FSQRT */
         fpst = fpstatus_ptr(FPST_FPCR_F16);
         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
@@ -8331,6 +8383,9 @@  static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
         break;
     default:
+    case 0x0: /* FMOV */
+    case 0x1: /* FABS */
+    case 0x2: /* FNEG */
         g_assert_not_reached();
     }
 
@@ -8349,15 +8404,6 @@  static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
     tcg_res = tcg_temp_new_i32();
 
     switch (opcode) {
-    case 0x0: /* FMOV */
-        tcg_gen_mov_i32(tcg_res, tcg_op);
-        goto done;
-    case 0x1: /* FABS */
-        gen_vfp_abss(tcg_res, tcg_op);
-        goto done;
-    case 0x2: /* FNEG */
-        gen_vfp_negs(tcg_res, tcg_op);
-        goto done;
     case 0x3: /* FSQRT */
         gen_helper_vfp_sqrts(tcg_res, tcg_op, tcg_env);
         goto done;
@@ -8393,6 +8439,9 @@  static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
         gen_fpst = gen_helper_frint64_s;
         break;
     default:
+    case 0x0: /* FMOV */
+    case 0x1: /* FABS */
+    case 0x2: /* FNEG */
         g_assert_not_reached();
     }
 
@@ -8417,22 +8466,10 @@  static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
     TCGv_ptr fpst;
     int rmode = -1;
 
-    switch (opcode) {
-    case 0x0: /* FMOV */
-        gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
-        return;
-    }
-
     tcg_op = read_fp_dreg(s, rn);
     tcg_res = tcg_temp_new_i64();
 
     switch (opcode) {
-    case 0x1: /* FABS */
-        gen_vfp_absd(tcg_res, tcg_op);
-        goto done;
-    case 0x2: /* FNEG */
-        gen_vfp_negd(tcg_res, tcg_op);
-        goto done;
     case 0x3: /* FSQRT */
         gen_helper_vfp_sqrtd(tcg_res, tcg_op, tcg_env);
         goto done;
@@ -8465,6 +8502,9 @@  static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
         gen_fpst = gen_helper_frint64_d;
         break;
     default:
+    case 0x0: /* FMOV */
+    case 0x1: /* FABS */
+    case 0x2: /* FNEG */
         g_assert_not_reached();
     }
 
@@ -10881,13 +10921,11 @@  static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
         case 0x7b: /* FCVTZU */
             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
             break;
-        case 0x6f: /* FNEG */
-            tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
-            break;
         case 0x7d: /* FRSQRTE */
             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
             break;
         default:
+        case 0x6f: /* FNEG */
             g_assert_not_reached();
         }
 
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 928e69da69..fca64c63c3 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -47,6 +47,7 @@ 
 @rr_h           ........ ... ..... ...... rn:5 rd:5     &rr_e esz=1
 @rr_d           ........ ... ..... ...... rn:5 rd:5     &rr_e esz=3
 @rr_sd          ........ ... ..... ...... rn:5 rd:5     &rr_e esz=%esz_sd
+@rr_hsd         ........ ... ..... ...... rn:5 rd:5     &rr_e esz=%esz_hsd
 
 @rrr_b          ........ ... rm:5 ...... rn:5 rd:5      &rrr_e esz=0
 @rrr_h          ........ ... rm:5 ...... rn:5 rd:5      &rrr_e esz=1
@@ -1321,6 +1322,12 @@  FMAXV_s         0110 1110 00 11000 01111 10 ..... .....     @rr_q1e2
 FMINV_h         0.00 1110 10 11000 01111 10 ..... .....     @qrr_h
 FMINV_s         0110 1110 10 11000 01111 10 ..... .....     @rr_q1e2
 
+# Floating-point data processing (1 source)
+
+FMOV_s          00011110 .. 1 000000 10000 ..... .....      @rr_hsd
+FABS_s          00011110 .. 1 000001 10000 ..... .....      @rr_hsd
+FNEG_s          00011110 .. 1 000010 10000 ..... .....      @rr_hsd
+
 # Floating-point Immediate
 
 FMOVI_s         0001 1110 .. 1 imm:8 100 00000 rd:5         esz=%esz_hsd