[v2,32/32] arm/translate-a64: add all single op FP16 to handle_fp_1src_half

Message ID 20180208173157.24705-33-alex.bennee@linaro.org
State Superseded
Headers show
Series
  • Add ARMv8.2 half-precision functions
Related show

Commit Message

Alex Bennée Feb. 8, 2018, 5:31 p.m.
This includes FMOV, FABS, FNEG, FSQRT and  FRINT[NPMZAXI]. We re-use
existing helpers to achieve this.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
 target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

-- 
2.15.1

Comments

Richard Henderson Feb. 9, 2018, 6:37 p.m. | #1
On 02/08/2018 09:31 AM, Alex Bennée wrote:
> This includes FMOV, FABS, FNEG, FSQRT and  FRINT[NPMZAXI]. We re-use

> existing helpers to achieve this.

> 

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> ---

>  target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++

>  1 file changed, 72 insertions(+)

> 

> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c

> index 92adf43a89..265bfb14d0 100644

> --- a/target/arm/translate-a64.c

> +++ b/target/arm/translate-a64.c

> @@ -4508,6 +4508,66 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)

>      tcg_temp_free_i64(t_true);

>  }

>  

> +/* Floating-point data-processing (1 source) - half precision */

> +static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)

> +{

> +    TCGv_ptr fpst = NULL;

> +    TCGv_i32 tcg_op;

> +    TCGv_i32 tcg_res;

> +

> +    tcg_op = read_fp_sreg(s, rn);

> +    tcg_res = tcg_temp_new_i32();

> +

> +    switch (opcode) {

> +    case 0x0: /* FMOV */

> +        tcg_gen_mov_i32(tcg_res, tcg_op);

> +        break;

> +    case 0x1: /* FABS */

> +        gen_helper_advsimd_absh(tcg_res, tcg_op);

> +        break;

> +    case 0x2: /* FNEG */

> +        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);

> +        break;

> +    case 0x3: /* FSQRT */

> +        gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);

> +        break;

> +    case 0x8: /* FRINTN */

> +    case 0x9: /* FRINTP */

> +    case 0xa: /* FRINTM */

> +    case 0xb: /* FRINTZ */

> +    case 0xc: /* FRINTA */

> +    {

> +        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));

> +        fpst = get_fpstatus_ptr(true);

> +

> +        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);

> +        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);

> +

> +        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);

> +        tcg_temp_free_i32(tcg_rmode);

> +        break;

> +    }

> +    case 0xe: /* FRINTX */

> +        fpst = get_fpstatus_ptr(true);

> +        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);

> +        break;

> +    case 0xf: /* FRINTI */

> +        fpst = get_fpstatus_ptr(true);

> +        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);

> +        break;

> +    default:

> +        abort();

> +    }

> +

> +    write_fp_sreg(s, rd, tcg_res);


Some of these helpers will zero-extend from 16 bits, but at least a few won't
-- notably fmov and fneg.  I wonder if it wouldn't be best to have a write_fp_hreg.


r~
Alex Bennée Feb. 23, 2018, 9:45 a.m. | #2
Richard Henderson <richard.henderson@linaro.org> writes:

> On 02/08/2018 09:31 AM, Alex Bennée wrote:

>> This includes FMOV, FABS, FNEG, FSQRT and  FRINT[NPMZAXI]. We re-use

>> existing helpers to achieve this.

>>

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> ---

>>  target/arm/translate-a64.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++

>>  1 file changed, 72 insertions(+)

>>

>> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c

>> index 92adf43a89..265bfb14d0 100644

>> --- a/target/arm/translate-a64.c

>> +++ b/target/arm/translate-a64.c

>> @@ -4508,6 +4508,66 @@ static void disas_fp_csel(DisasContext *s, uint32_t insn)

>>      tcg_temp_free_i64(t_true);

>>  }

>>

>> +/* Floating-point data-processing (1 source) - half precision */

>> +static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)

>> +{

>> +    TCGv_ptr fpst = NULL;

>> +    TCGv_i32 tcg_op;

>> +    TCGv_i32 tcg_res;

>> +

>> +    tcg_op = read_fp_sreg(s, rn);

>> +    tcg_res = tcg_temp_new_i32();

>> +

>> +    switch (opcode) {

>> +    case 0x0: /* FMOV */

>> +        tcg_gen_mov_i32(tcg_res, tcg_op);

>> +        break;

>> +    case 0x1: /* FABS */

>> +        gen_helper_advsimd_absh(tcg_res, tcg_op);

>> +        break;

>> +    case 0x2: /* FNEG */

>> +        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);

>> +        break;

>> +    case 0x3: /* FSQRT */

>> +        gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);

>> +        break;

>> +    case 0x8: /* FRINTN */

>> +    case 0x9: /* FRINTP */

>> +    case 0xa: /* FRINTM */

>> +    case 0xb: /* FRINTZ */

>> +    case 0xc: /* FRINTA */

>> +    {

>> +        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));

>> +        fpst = get_fpstatus_ptr(true);

>> +

>> +        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);

>> +        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);

>> +

>> +        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);

>> +        tcg_temp_free_i32(tcg_rmode);

>> +        break;

>> +    }

>> +    case 0xe: /* FRINTX */

>> +        fpst = get_fpstatus_ptr(true);

>> +        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);

>> +        break;

>> +    case 0xf: /* FRINTI */

>> +        fpst = get_fpstatus_ptr(true);

>> +        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);

>> +        break;

>> +    default:

>> +        abort();

>> +    }

>> +

>> +    write_fp_sreg(s, rd, tcg_res);

>

> Some of these helpers will zero-extend from 16 bits, but at least a few won't

> -- notably fmov and fneg.  I wonder if it wouldn't be best to have a

> write_fp_hreg.


I fixed this up by using read_vec_element to load the value.

--
Alex Bennée

Patch

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 92adf43a89..265bfb14d0 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -4508,6 +4508,66 @@  static void disas_fp_csel(DisasContext *s, uint32_t insn)
     tcg_temp_free_i64(t_true);
 }
 
+/* Floating-point data-processing (1 source) - half precision */
+static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
+{
+    TCGv_ptr fpst = NULL;
+    TCGv_i32 tcg_op;
+    TCGv_i32 tcg_res;
+
+    tcg_op = read_fp_sreg(s, rn);
+    tcg_res = tcg_temp_new_i32();
+
+    switch (opcode) {
+    case 0x0: /* FMOV */
+        tcg_gen_mov_i32(tcg_res, tcg_op);
+        break;
+    case 0x1: /* FABS */
+        gen_helper_advsimd_absh(tcg_res, tcg_op);
+        break;
+    case 0x2: /* FNEG */
+        tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
+        break;
+    case 0x3: /* FSQRT */
+        gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);
+        break;
+    case 0x8: /* FRINTN */
+    case 0x9: /* FRINTP */
+    case 0xa: /* FRINTM */
+    case 0xb: /* FRINTZ */
+    case 0xc: /* FRINTA */
+    {
+        TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
+        fpst = get_fpstatus_ptr(true);
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
+
+        gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
+        tcg_temp_free_i32(tcg_rmode);
+        break;
+    }
+    case 0xe: /* FRINTX */
+        fpst = get_fpstatus_ptr(true);
+        gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
+        break;
+    case 0xf: /* FRINTI */
+        fpst = get_fpstatus_ptr(true);
+        gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
+        break;
+    default:
+        abort();
+    }
+
+    write_fp_sreg(s, rd, tcg_res);
+
+    if (fpst) {
+        tcg_temp_free_ptr(fpst);
+    }
+    tcg_temp_free_i32(tcg_op);
+    tcg_temp_free_i32(tcg_res);
+}
+
 /* Floating-point data-processing (1 source) - single precision */
 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
 {
@@ -4734,6 +4794,18 @@  static void disas_fp_1src(DisasContext *s, uint32_t insn)
 
             handle_fp_1src_double(s, opcode, rd, rn);
             break;
+        case 2:
+            if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
+                unallocated_encoding(s);
+                return;
+            }
+
+            if (!fp_access_check(s)) {
+                return;
+            }
+
+            handle_fp_1src_half(s, opcode, rd, rn);
+            break;
         default:
             unallocated_encoding(s);
         }