[v4,14/31] arm/translate-a64: add FP16 FMULX/MLS/FMLA to simd_indexed

Message ID 20180227143852.11175-15-alex.bennee@linaro.org
State Superseded
Headers show
Series
  • Add ARMv8.2 half-precision functions
Related show

Commit Message

Alex Bennée Feb. 27, 2018, 2:38 p.m.
The helpers use the new re-factored muladd support in SoftFloat for
the float16 work.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>


---
v3
  - re-jigged switch statement to fall-through for unalloc
  - added is_fp16 bool for fpst
  - fixed up some long lines
v4
  - don't double-check for feature bit
---
 target/arm/translate-a64.c | 82 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 66 insertions(+), 16 deletions(-)

-- 
2.15.1

Comments

Richard Henderson Feb. 27, 2018, 5:09 p.m. | #1
On 02/27/2018 06:38 AM, Alex Bennée wrote:
> @@ -11244,7 +11245,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)

>          }

>          /* fall through */

>      case 0x9: /* FMUL, FMULX */

> -        if (!extract32(size, 1, 1)) {

> +        if (size == 1) {

>              unallocated_encoding(s);

>              return;

>          }


This is still redundant, since size == 1 is handled...

> @@ -11256,18 +11257,34 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)

>      }

>  

>      if (is_fp) {

> -        /* low bit of size indicates single/double */

> -        size = extract32(size, 0, 1) ? 3 : 2;

> -        if (size == 2) {

> +        /* convert insn encoded size to TCGMemOp size */

> +        switch (size) {

> +        case 2: /* single precision */

> +            size = MO_32;

>              index = h << 1 | l;

> -        } else {

> +            rm |= (m << 4);

> +            break;

> +        case 3: /* double precision */

> +            size = MO_64;

>              if (l || !is_q) {

>                  unallocated_encoding(s);

>                  return;

>              }

>              index = h;

> +            rm |= (m << 4);

> +            break;

> +        case 0: /* half precision */

> +            size = MO_16;

> +            index = h << 2 | l << 1 | m;

> +            is_fp16 = true;

> +            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {

> +                break;

> +            }

> +            /* fallthru */

> +        default: /* unallocated */

> +            unallocated_encoding(s);

> +            return;

>          }


... here.  But it's not wrong and I can clean this up along with the additional
changes I need to make to this function for fcmla support.  So,

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>



r~
Alex Bennée Feb. 27, 2018, 5:52 p.m. | #2
Richard Henderson <richard.henderson@linaro.org> writes:

> On 02/27/2018 06:38 AM, Alex Bennée wrote:

>> @@ -11244,7 +11245,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)

>>          }

>>          /* fall through */

>>      case 0x9: /* FMUL, FMULX */

>> -        if (!extract32(size, 1, 1)) {

>> +        if (size == 1) {

>>              unallocated_encoding(s);

>>              return;

>>          }

>

> This is still redundant, since size == 1 is handled...


doh!

will fix.

>

>> @@ -11256,18 +11257,34 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)

>>      }

>>

>>      if (is_fp) {

>> -        /* low bit of size indicates single/double */

>> -        size = extract32(size, 0, 1) ? 3 : 2;

>> -        if (size == 2) {

>> +        /* convert insn encoded size to TCGMemOp size */

>> +        switch (size) {

>> +        case 2: /* single precision */

>> +            size = MO_32;

>>              index = h << 1 | l;

>> -        } else {

>> +            rm |= (m << 4);

>> +            break;

>> +        case 3: /* double precision */

>> +            size = MO_64;

>>              if (l || !is_q) {

>>                  unallocated_encoding(s);

>>                  return;

>>              }

>>              index = h;

>> +            rm |= (m << 4);

>> +            break;

>> +        case 0: /* half precision */

>> +            size = MO_16;

>> +            index = h << 2 | l << 1 | m;

>> +            is_fp16 = true;

>> +            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {

>> +                break;

>> +            }

>> +            /* fallthru */

>> +        default: /* unallocated */

>> +            unallocated_encoding(s);

>> +            return;

>>          }

>

> ... here.  But it's not wrong and I can clean this up along with the additional

> changes I need to make to this function for fcmla support.  So,

>

> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

>

>

> r~



--
Alex Bennée
Peter Maydell Feb. 27, 2018, 5:57 p.m. | #3
On 27 February 2018 at 17:52, Alex Bennée <alex.bennee@linaro.org> wrote:
>

> Richard Henderson <richard.henderson@linaro.org> writes:

>

>> On 02/27/2018 06:38 AM, Alex Bennée wrote:

>>> @@ -11244,7 +11245,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)

>>>          }

>>>          /* fall through */

>>>      case 0x9: /* FMUL, FMULX */

>>> -        if (!extract32(size, 1, 1)) {

>>> +        if (size == 1) {

>>>              unallocated_encoding(s);

>>>              return;

>>>          }

>>

>> This is still redundant, since size == 1 is handled...

>

> doh!

>

> will fix.


I'd prefer it if you didn't, because I'm in the process of putting
this version of the patchset into target-arm.next...

thanks
-- PMM
Alex Bennée Feb. 27, 2018, 6:17 p.m. | #4
Peter Maydell <peter.maydell@linaro.org> writes:

> On 27 February 2018 at 17:52, Alex Bennée <alex.bennee@linaro.org> wrote:

>>

>> Richard Henderson <richard.henderson@linaro.org> writes:

>>

>>> On 02/27/2018 06:38 AM, Alex Bennée wrote:

>>>> @@ -11244,7 +11245,7 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)

>>>>          }

>>>>          /* fall through */

>>>>      case 0x9: /* FMUL, FMULX */

>>>> -        if (!extract32(size, 1, 1)) {

>>>> +        if (size == 1) {

>>>>              unallocated_encoding(s);

>>>>              return;

>>>>          }

>>>

>>> This is still redundant, since size == 1 is handled...

>>

>> doh!

>>

>> will fix.

>

> I'd prefer it if you didn't, because I'm in the process of putting

> this version of the patchset into target-arm.next...


Fair enough - you've picked up Richard's r-b?

--
Alex Bennée

Patch

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index e96e6cdd15..6a264bc134 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -11198,6 +11198,7 @@  static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     int rd = extract32(insn, 0, 5);
     bool is_long = false;
     bool is_fp = false;
+    bool is_fp16 = false;
     int index;
     TCGv_ptr fpst;
 
@@ -11244,7 +11245,7 @@  static void disas_simd_indexed(DisasContext *s, uint32_t insn)
         }
         /* fall through */
     case 0x9: /* FMUL, FMULX */
-        if (!extract32(size, 1, 1)) {
+        if (size == 1) {
             unallocated_encoding(s);
             return;
         }
@@ -11256,18 +11257,34 @@  static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     }
 
     if (is_fp) {
-        /* low bit of size indicates single/double */
-        size = extract32(size, 0, 1) ? 3 : 2;
-        if (size == 2) {
+        /* convert insn encoded size to TCGMemOp size */
+        switch (size) {
+        case 2: /* single precision */
+            size = MO_32;
             index = h << 1 | l;
-        } else {
+            rm |= (m << 4);
+            break;
+        case 3: /* double precision */
+            size = MO_64;
             if (l || !is_q) {
                 unallocated_encoding(s);
                 return;
             }
             index = h;
+            rm |= (m << 4);
+            break;
+        case 0: /* half precision */
+            size = MO_16;
+            index = h << 2 | l << 1 | m;
+            is_fp16 = true;
+            if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+                break;
+            }
+            /* fallthru */
+        default: /* unallocated */
+            unallocated_encoding(s);
+            return;
         }
-        rm |= (m << 4);
     } else {
         switch (size) {
         case 1:
@@ -11288,7 +11305,7 @@  static void disas_simd_indexed(DisasContext *s, uint32_t insn)
     }
 
     if (is_fp) {
-        fpst = get_fpstatus_ptr(false);
+        fpst = get_fpstatus_ptr(is_fp16);
     } else {
         fpst = NULL;
     }
@@ -11390,18 +11407,51 @@  static void disas_simd_indexed(DisasContext *s, uint32_t insn)
                 break;
             }
             case 0x5: /* FMLS */
-                /* As usual for ARM, separate negation for fused multiply-add */
-                gen_helper_vfp_negs(tcg_op, tcg_op);
-                /* fall through */
             case 0x1: /* FMLA */
-                read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
-                gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
+                read_vec_element_i32(s, tcg_res, rd, pass,
+                                     is_scalar ? size : MO_32);
+                switch (size) {
+                case 1:
+                    if (opcode == 0x5) {
+                        /* As usual for ARM, separate negation for fused
+                         * multiply-add */
+                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
+                    }
+                    gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
+                                               tcg_res, fpst);
+                    break;
+                case 2:
+                    if (opcode == 0x5) {
+                        /* As usual for ARM, separate negation for
+                         * fused multiply-add */
+                        tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
+                    }
+                    gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
+                                           tcg_res, fpst);
+                    break;
+                default:
+                    g_assert_not_reached();
+                }
                 break;
             case 0x9: /* FMUL, FMULX */
-                if (u) {
-                    gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
-                } else {
-                    gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+                switch (size) {
+                case 1:
+                    if (u) {
+                        gen_helper_advsimd_mulxh(tcg_res, tcg_op, tcg_idx,
+                                                 fpst);
+                    } else {
+                        g_assert_not_reached();
+                    }
+                    break;
+                case 2:
+                    if (u) {
+                        gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
+                    } else {
+                        gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
+                    }
+                    break;
+                default:
+                    g_assert_not_reached();
                 }
                 break;
             case 0xc: /* SQDMULH */