diff mbox series

[v2,5/7] target/riscv: Check nanboxed inputs in trans_rvf.inc.c

Message ID 20200724002807.441147-6-richard.henderson@linaro.org
State Accepted
Commit ffe70e4dfc9cf2a6934e674b81b69c847b403c4b
Headers show
Series target/riscv: NaN-boxing for multiple precison | expand

Commit Message

Richard Henderson July 24, 2020, 12:28 a.m. UTC
If a 32-bit input is not properly nanboxed, then the input is replaced
with the default qnan.  The only inline expansion is for the sign-changing
set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------
 target/riscv/translate.c                | 18 +++++++
 2 files changed, 73 insertions(+), 16 deletions(-)

-- 
2.25.1

Comments

LIU Zhiwei July 24, 2020, 6:04 a.m. UTC | #1
On 2020/7/24 8:28, Richard Henderson wrote:
> If a 32-bit input is not properly nanboxed, then the input is replaced

> with the default qnan.  The only inline expansion is for the sign-changing

> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>   target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------

>   target/riscv/translate.c                | 18 +++++++

>   2 files changed, 73 insertions(+), 16 deletions(-)

>

> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c

> index 264d3139f1..f9a9e0643a 100644

> --- a/target/riscv/insn_trans/trans_rvf.inc.c

> +++ b/target/riscv/insn_trans/trans_rvf.inc.c

> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)

>   {

>       REQUIRE_FPU;

>       REQUIRE_EXT(ctx, RVF);

> +

>       if (a->rs1 == a->rs2) { /* FMOV */

> -        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

> +        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

>       } else { /* FSGNJ */

> -        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],

> -                            0, 31);

> +        TCGv_i64 rs1 = tcg_temp_new_i64();

> +        TCGv_i64 rs2 = tcg_temp_new_i64();

> +

> +        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

> +

> +        /* This formulation retains the nanboxing of rs2. */

> +        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);

> +        tcg_temp_free_i64(rs1);

> +        tcg_temp_free_i64(rs2);

>       }

> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

>       mark_fs_dirty(ctx);

>       return true;

>   }

>   

>   static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)

>   {

> +    TCGv_i64 rs1, rs2, mask;

> +

>       REQUIRE_FPU;

>       REQUIRE_EXT(ctx, RVF);

> +

> +    rs1 = tcg_temp_new_i64();

> +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

> +

>       if (a->rs1 == a->rs2) { /* FNEG */

> -        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);

> +        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));

>       } else {

> -        TCGv_i64 t0 = tcg_temp_new_i64();

> -        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);

> -        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);

> -        tcg_temp_free_i64(t0);

> +        rs2 = tcg_temp_new_i64();

> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

> +

> +        /*

> +         * Replace bit 31 in rs1 with inverse in rs2.

> +         * This formulation retains the nanboxing of rs1.

> +         */

> +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));

> +        tcg_gen_andc_i64(rs2, mask, rs2);

> +        tcg_gen_and_i64(rs1, mask, rs1);

> +        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);

> +

> +        tcg_temp_free_i64(mask);

> +        tcg_temp_free_i64(rs2);

>       }

> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

> +    tcg_temp_free_i64(rs1);

> +

>       mark_fs_dirty(ctx);

>       return true;

>   }

>   

>   static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)

>   {

> +    TCGv_i64 rs1, rs2;

> +

>       REQUIRE_FPU;

>       REQUIRE_EXT(ctx, RVF);

> +

> +    rs1 = tcg_temp_new_i64();

> +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

> +

>       if (a->rs1 == a->rs2) { /* FABS */

> -        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);

> +        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));

>       } else {

> -        TCGv_i64 t0 = tcg_temp_new_i64();

> -        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);

> -        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);

> -        tcg_temp_free_i64(t0);

> +        rs2 = tcg_temp_new_i64();

> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

> +

> +        /*

> +         * Xor bit 31 in rs1 with that in rs2.

> +         * This formulation retains the nanboxing of rs1.

> +         */

> +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));

> +        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);

> +

> +        tcg_temp_free_i64(rs2);

>       }

> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

> +    tcg_temp_free_i64(rs1);

> +

>       mark_fs_dirty(ctx);

>       return true;

>   }

> diff --git a/target/riscv/translate.c b/target/riscv/translate.c

> index 12a746da97..bf35182776 100644

> --- a/target/riscv/translate.c

> +++ b/target/riscv/translate.c

> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)

>       tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));

>   }

>   

> +/*

> + * A narrow n-bit operation, where n < FLEN, checks that input operands

> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.

> + * If so, the least-significant bits of the input are used, otherwise the

> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).

> + *

> + * Here, the result is always nan-boxed, even the canonical nan.

> + */

> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)

> +{

> +    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);

> +    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);

> +

> +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);

> +    tcg_temp_free_i64(t_max);

> +    tcg_temp_free_i64(t_nan);

> +}

> +

Reviewed-by: LIU Zhiwei <zhiwei_liu@c-sky.com>


Zhiwei
>   static void generate_exception(DisasContext *ctx, int excp)

>   {

>       tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);
Chih-Min Chao Aug. 6, 2020, 6:27 a.m. UTC | #2
On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> If a 32-bit input is not properly nanboxed, then the input is replaced

> with the default qnan.  The only inline expansion is for the sign-changing

> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------

>  target/riscv/translate.c                | 18 +++++++

>  2 files changed, 73 insertions(+), 16 deletions(-)

>

> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c

> b/target/riscv/insn_trans/trans_rvf.inc.c

> index 264d3139f1..f9a9e0643a 100644

> --- a/target/riscv/insn_trans/trans_rvf.inc.c

> +++ b/target/riscv/insn_trans/trans_rvf.inc.c

> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx,

> arg_fsgnj_s *a)

>  {

>      REQUIRE_FPU;

>      REQUIRE_EXT(ctx, RVF);

> +

>      if (a->rs1 == a->rs2) { /* FMOV */

> -        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

> +        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

>      } else { /* FSGNJ */

> -        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],

> cpu_fpr[a->rs1],

> -                            0, 31);

> +        TCGv_i64 rs1 = tcg_temp_new_i64();

> +        TCGv_i64 rs2 = tcg_temp_new_i64();

> +

> +        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

> +

> +        /* This formulation retains the nanboxing of rs2. */

> +        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);

> +        tcg_temp_free_i64(rs1);

> +        tcg_temp_free_i64(rs2);

>      }

> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

>      mark_fs_dirty(ctx);

>      return true;

>  }

>

>  static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)

>  {

> +    TCGv_i64 rs1, rs2, mask;

> +

>      REQUIRE_FPU;

>      REQUIRE_EXT(ctx, RVF);

> +

> +    rs1 = tcg_temp_new_i64();

> +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

> +

>      if (a->rs1 == a->rs2) { /* FNEG */

> -        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);

> +        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));

>      } else {

> -        TCGv_i64 t0 = tcg_temp_new_i64();

> -        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);

> -        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);

> -        tcg_temp_free_i64(t0);

> +        rs2 = tcg_temp_new_i64();

> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

> +

> +        /*

> +         * Replace bit 31 in rs1 with inverse in rs2.

> +         * This formulation retains the nanboxing of rs1.

> +         */

> +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));

> +        tcg_gen_andc_i64(rs2, mask, rs2);

> +        tcg_gen_and_i64(rs1, mask, rs1);

> +        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);

> +

> +        tcg_temp_free_i64(mask);

> +        tcg_temp_free_i64(rs2);

>      }

> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

> +    tcg_temp_free_i64(rs1);

> +

>      mark_fs_dirty(ctx);

>      return true;

>  }

>

>  static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)

>  {

> +    TCGv_i64 rs1, rs2;

> +

>      REQUIRE_FPU;

>      REQUIRE_EXT(ctx, RVF);

> +

> +    rs1 = tcg_temp_new_i64();

> +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

> +

>      if (a->rs1 == a->rs2) { /* FABS */

> -        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);

> +        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));

>      } else {

> -        TCGv_i64 t0 = tcg_temp_new_i64();

> -        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);

> -        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);

> -        tcg_temp_free_i64(t0);

> +        rs2 = tcg_temp_new_i64();

> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

> +

> +        /*

> +         * Xor bit 31 in rs1 with that in rs2.

> +         * This formulation retains the nanboxing of rs1.

> +         */

> +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));

> +        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);

> +

> +        tcg_temp_free_i64(rs2);

>      }

> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

> +    tcg_temp_free_i64(rs1);

> +

>      mark_fs_dirty(ctx);

>      return true;

>  }

> diff --git a/target/riscv/translate.c b/target/riscv/translate.c

> index 12a746da97..bf35182776 100644

> --- a/target/riscv/translate.c

> +++ b/target/riscv/translate.c

> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)

>      tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));

>  }

>

> +/*

> + * A narrow n-bit operation, where n < FLEN, checks that input operands

> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.

> + * If so, the least-significant bits of the input are used, otherwise the

> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).

> + *

> + * Here, the result is always nan-boxed, even the canonical nan.

> + */

> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)

> +{

> +    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);

> +    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);

> +

> +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);

> +    tcg_temp_free_i64(t_max);

> +    tcg_temp_free_i64(t_nan);

> +}

> +

>  static void generate_exception(DisasContext *ctx, int excp)

>  {

>      tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);

> --

> 2.25.1

>

>

>

Reviewed-by: Chih-Min Chao <chihmin.chao@sifive.com>


Chih-Min Chao
<div dir="ltr"><div dir="ltr"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr"><br></div></div></div></div><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson &lt;<a href="mailto:richard.henderson@linaro.org">richard.henderson@linaro.org</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">If a 32-bit input is not properly nanboxed, then the input is replaced<br>
with the default qnan.  The only inline expansion is for the sign-changing<br>
set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.<br>
<br>
Signed-off-by: Richard Henderson &lt;<a href="mailto:richard.henderson@linaro.org" target="_blank">richard.henderson@linaro.org</a>&gt;<br>

---<br>
 target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------<br>
 target/riscv/translate.c                | 18 +++++++<br>
 2 files changed, 73 insertions(+), 16 deletions(-)<br>
<br>
diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c<br>
index 264d3139f1..f9a9e0643a 100644<br>
--- a/target/riscv/insn_trans/trans_rvf.inc.c<br>
+++ b/target/riscv/insn_trans/trans_rvf.inc.c<br>
@@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)<br>
 {<br>
     REQUIRE_FPU;<br>
     REQUIRE_EXT(ctx, RVF);<br>
+<br>
     if (a-&gt;rs1 == a-&gt;rs2) { /* FMOV */<br>
-        tcg_gen_mov_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1]);<br>
+        gen_check_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1]);<br>
     } else { /* FSGNJ */<br>
-        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs2], cpu_fpr[a-&gt;rs1],<br>
-                            0, 31);<br>
+        TCGv_i64 rs1 = tcg_temp_new_i64();<br>
+        TCGv_i64 rs2 = tcg_temp_new_i64();<br>
+<br>
+        gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
+        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
+<br>
+        /* This formulation retains the nanboxing of rs2. */<br>
+        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], rs2, rs1, 0, 31);<br>
+        tcg_temp_free_i64(rs1);<br>
+        tcg_temp_free_i64(rs2);<br>
     }<br>
-    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
     mark_fs_dirty(ctx);<br>
     return true;<br>
 }<br>
<br>
 static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)<br>
 {<br>
+    TCGv_i64 rs1, rs2, mask;<br>
+<br>
     REQUIRE_FPU;<br>
     REQUIRE_EXT(ctx, RVF);<br>
+<br>
+    rs1 = tcg_temp_new_i64();<br>
+    gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
+<br>
     if (a-&gt;rs1 == a-&gt;rs2) { /* FNEG */<br>
-        tcg_gen_xori_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1], INT32_MIN);<br>
+        tcg_gen_xori_i64(cpu_fpr[a-&gt;rd], rs1, MAKE_64BIT_MASK(31, 1));<br>
     } else {<br>
-        TCGv_i64 t0 = tcg_temp_new_i64();<br>
-        tcg_gen_not_i64(t0, cpu_fpr[a-&gt;rs2]);<br>
-        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], t0, cpu_fpr[a-&gt;rs1], 0, 31);<br>
-        tcg_temp_free_i64(t0);<br>
+        rs2 = tcg_temp_new_i64();<br>
+        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
+<br>
+        /*<br>
+         * Replace bit 31 in rs1 with inverse in rs2.<br>
+         * This formulation retains the nanboxing of rs1.<br>
+         */<br>
+        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));<br>
+        tcg_gen_andc_i64(rs2, mask, rs2);<br>
+        tcg_gen_and_i64(rs1, mask, rs1);<br>
+        tcg_gen_or_i64(cpu_fpr[a-&gt;rd], rs1, rs2);<br>
+<br>
+        tcg_temp_free_i64(mask);<br>
+        tcg_temp_free_i64(rs2);<br>
     }<br>
-    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
+    tcg_temp_free_i64(rs1);<br>
+<br>
     mark_fs_dirty(ctx);<br>
     return true;<br>
 }<br>
<br>
 static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)<br>
 {<br>
+    TCGv_i64 rs1, rs2;<br>
+<br>
     REQUIRE_FPU;<br>
     REQUIRE_EXT(ctx, RVF);<br>
+<br>
+    rs1 = tcg_temp_new_i64();<br>
+    gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
+<br>
     if (a-&gt;rs1 == a-&gt;rs2) { /* FABS */<br>
-        tcg_gen_andi_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1], ~INT32_MIN);<br>
+        tcg_gen_andi_i64(cpu_fpr[a-&gt;rd], rs1, ~MAKE_64BIT_MASK(31, 1));<br>
     } else {<br>
-        TCGv_i64 t0 = tcg_temp_new_i64();<br>
-        tcg_gen_andi_i64(t0, cpu_fpr[a-&gt;rs2], INT32_MIN);<br>
-        tcg_gen_xor_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1], t0);<br>
-        tcg_temp_free_i64(t0);<br>
+        rs2 = tcg_temp_new_i64();<br>
+        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
+<br>
+        /*<br>
+         * Xor bit 31 in rs1 with that in rs2.<br>
+         * This formulation retains the nanboxing of rs1.<br>
+         */<br>
+        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));<br>
+        tcg_gen_xor_i64(cpu_fpr[a-&gt;rd], rs1, rs2);<br>
+<br>
+        tcg_temp_free_i64(rs2);<br>
     }<br>
-    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
+    tcg_temp_free_i64(rs1);<br>
+<br>
     mark_fs_dirty(ctx);<br>
     return true;<br>
 }<br>
diff --git a/target/riscv/translate.c b/target/riscv/translate.c<br>
index 12a746da97..bf35182776 100644<br>
--- a/target/riscv/translate.c<br>
+++ b/target/riscv/translate.c<br>
@@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br>
     tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));<br>
 }<br>
<br>
+/*<br>
+ * A narrow n-bit operation, where n &lt; FLEN, checks that input operands<br>
+ * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.<br>
+ * If so, the least-significant bits of the input are used, otherwise the<br>
+ * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).<br>
+ *<br>
+ * Here, the result is always nan-boxed, even the canonical nan.<br>
+ */<br>
+static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br>
+{<br>
+    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);<br>
+    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);<br>
+<br>
+    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);<br>
+    tcg_temp_free_i64(t_max);<br>
+    tcg_temp_free_i64(t_nan);<br>
+}<br>
+<br>
 static void generate_exception(DisasContext *ctx, int excp)<br>
 {<br>
     tcg_gen_movi_tl(cpu_pc, ctx-&gt;base.pc_next);<br>
-- <br>
2.25.1<br>
<br>
<br></blockquote><div><br></div><div>Reviewed-by: Chih-Min Chao &lt;<a href="mailto:chihmin.chao@sifive.com">chihmin.chao@sifive.com</a>&gt;</div><div><br></div><div><div dir="ltr" class="gmail_signature"><div dir="ltr"><span style="color:rgb(136,136,136)">Chih-Min Chao</span><div style="color:rgb(136,136,136)"></div></div></div></div><div> </div></div></div>
Chih-Min Chao Aug. 7, 2020, 8:24 p.m. UTC | #3
On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson <
richard.henderson@linaro.org> wrote:

> If a 32-bit input is not properly nanboxed, then the input is replaced

> with the default qnan.  The only inline expansion is for the sign-changing

> set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------

>  target/riscv/translate.c                | 18 +++++++

>  2 files changed, 73 insertions(+), 16 deletions(-)

>

> diff --git a/target/riscv/insn_trans/trans_rvf.inc.c

> b/target/riscv/insn_trans/trans_rvf.inc.c

> index 264d3139f1..f9a9e0643a 100644

> --- a/target/riscv/insn_trans/trans_rvf.inc.c

> +++ b/target/riscv/insn_trans/trans_rvf.inc.c

> @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx,

> arg_fsgnj_s *a)

>  {

>      REQUIRE_FPU;

>      REQUIRE_EXT(ctx, RVF);

> +

>      if (a->rs1 == a->rs2) { /* FMOV */

> -        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

> +        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

>      } else { /* FSGNJ */

> -        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],

> cpu_fpr[a->rs1],

> -                            0, 31);

> +        TCGv_i64 rs1 = tcg_temp_new_i64();

> +        TCGv_i64 rs2 = tcg_temp_new_i64();

> +

> +        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

> +

> +        /* This formulation retains the nanboxing of rs2. */

> +        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);

> +        tcg_temp_free_i64(rs1);

> +        tcg_temp_free_i64(rs2);

>      }

> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

>      mark_fs_dirty(ctx);

>      return true;

>  }

>

>  static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)

>  {

> +    TCGv_i64 rs1, rs2, mask;

> +

>      REQUIRE_FPU;

>      REQUIRE_EXT(ctx, RVF);

> +

> +    rs1 = tcg_temp_new_i64();

> +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

> +

>      if (a->rs1 == a->rs2) { /* FNEG */

> -        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);

> +        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));

>      } else {

> -        TCGv_i64 t0 = tcg_temp_new_i64();

> -        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);

> -        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);

> -        tcg_temp_free_i64(t0);

> +        rs2 = tcg_temp_new_i64();

> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

> +

> +        /*

> +         * Replace bit 31 in rs1 with inverse in rs2.

> +         * This formulation retains the nanboxing of rs1.

> +         */

> +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));

> +        tcg_gen_andc_i64(rs2, mask, rs2);

>


should be
              tcg_gen_not_i64(rs2, rs2);         // forget to inverse rs2
              tcg_gen_andc_i64(rs2, rs2, mask);  //mask needs to be
inverted to get only sign

 Chih-Min Chao

> +        tcg_gen_and_i64(rs1, mask, rs1);

> +        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);

> +

> +        tcg_temp_free_i64(mask);

> +        tcg_temp_free_i64(rs2);

>      }

> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

> +    tcg_temp_free_i64(rs1);

> +

>      mark_fs_dirty(ctx);

>      return true;

>  }

>

>  static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)

>  {

> +    TCGv_i64 rs1, rs2;

> +

>      REQUIRE_FPU;

>      REQUIRE_EXT(ctx, RVF);

> +

> +    rs1 = tcg_temp_new_i64();

> +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

> +

>      if (a->rs1 == a->rs2) { /* FABS */

> -        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);

> +        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));

>      } else {

> -        TCGv_i64 t0 = tcg_temp_new_i64();

> -        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);

> -        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);

> -        tcg_temp_free_i64(t0);

> +        rs2 = tcg_temp_new_i64();

> +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

> +

> +        /*

> +         * Xor bit 31 in rs1 with that in rs2.

> +         * This formulation retains the nanboxing of rs1.

> +         */

> +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));

> +        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);

> +

> +        tcg_temp_free_i64(rs2);

>      }

> -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

> +    tcg_temp_free_i64(rs1);

> +

>      mark_fs_dirty(ctx);

>      return true;

>  }

> diff --git a/target/riscv/translate.c b/target/riscv/translate.c

> index 12a746da97..bf35182776 100644

> --- a/target/riscv/translate.c

> +++ b/target/riscv/translate.c

> @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)

>      tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));

>  }

>

> +/*

> + * A narrow n-bit operation, where n < FLEN, checks that input operands

> + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.

> + * If so, the least-significant bits of the input are used, otherwise the

> + * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).

> + *

> + * Here, the result is always nan-boxed, even the canonical nan.

> + */

> +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)

> +{

> +    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);

> +    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);

> +

> +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);

> +    tcg_temp_free_i64(t_max);

> +    tcg_temp_free_i64(t_nan);

> +}

> +

>  static void generate_exception(DisasContext *ctx, int excp)

>  {

>      tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);

> --

> 2.25.1

>

>

>
<div dir="ltr"><div dir="ltr"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><div dir="ltr">On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson &lt;<a href="mailto:richard.henderson@linaro.org">richard.henderson@linaro.org</a>&gt; wrote:<br></div></div></div></div><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">If a 32-bit input is not properly nanboxed, then the input is replaced<br>
with the default qnan.  The only inline expansion is for the sign-changing<br>
set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.<br>
<br>
Signed-off-by: Richard Henderson &lt;<a href="mailto:richard.henderson@linaro.org" target="_blank">richard.henderson@linaro.org</a>&gt;<br>

---<br>
 target/riscv/insn_trans/trans_rvf.inc.c | 71 +++++++++++++++++++------<br>
 target/riscv/translate.c                | 18 +++++++<br>
 2 files changed, 73 insertions(+), 16 deletions(-)<br>
<br>
diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c<br>
index 264d3139f1..f9a9e0643a 100644<br>
--- a/target/riscv/insn_trans/trans_rvf.inc.c<br>
+++ b/target/riscv/insn_trans/trans_rvf.inc.c<br>
@@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)<br>
 {<br>
     REQUIRE_FPU;<br>
     REQUIRE_EXT(ctx, RVF);<br>
+<br>
     if (a-&gt;rs1 == a-&gt;rs2) { /* FMOV */<br>
-        tcg_gen_mov_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1]);<br>
+        gen_check_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1]);<br>
     } else { /* FSGNJ */<br>
-        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs2], cpu_fpr[a-&gt;rs1],<br>
-                            0, 31);<br>
+        TCGv_i64 rs1 = tcg_temp_new_i64();<br>
+        TCGv_i64 rs2 = tcg_temp_new_i64();<br>
+<br>
+        gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
+        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
+<br>
+        /* This formulation retains the nanboxing of rs2. */<br>
+        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], rs2, rs1, 0, 31);<br>
+        tcg_temp_free_i64(rs1);<br>
+        tcg_temp_free_i64(rs2);<br>
     }<br>
-    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
     mark_fs_dirty(ctx);<br>
     return true;<br>
 }<br>
<br>
 static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)<br>
 {<br>
+    TCGv_i64 rs1, rs2, mask;<br>
+<br>
     REQUIRE_FPU;<br>
     REQUIRE_EXT(ctx, RVF);<br>
+<br>
+    rs1 = tcg_temp_new_i64();<br>
+    gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
+<br>
     if (a-&gt;rs1 == a-&gt;rs2) { /* FNEG */<br>
-        tcg_gen_xori_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1], INT32_MIN);<br>
+        tcg_gen_xori_i64(cpu_fpr[a-&gt;rd], rs1, MAKE_64BIT_MASK(31, 1));<br>
     } else {<br>
-        TCGv_i64 t0 = tcg_temp_new_i64();<br>
-        tcg_gen_not_i64(t0, cpu_fpr[a-&gt;rs2]);<br>
-        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], t0, cpu_fpr[a-&gt;rs1], 0, 31);<br>
-        tcg_temp_free_i64(t0);<br>
+        rs2 = tcg_temp_new_i64();<br>
+        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
+<br>
+        /*<br>
+         * Replace bit 31 in rs1 with inverse in rs2.<br>
+         * This formulation retains the nanboxing of rs1.<br>
+         */<br>
+        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));<br>
+        tcg_gen_andc_i64(rs2, mask, rs2);<br></blockquote><div><br></div><div>should be </div><div>              tcg_gen_not_i64(rs2, rs2);         // forget to inverse rs2</div><div>              tcg_gen_andc_i64(rs2, rs2, mask);  //mask needs to be inverted to get only sign</div><div><br></div><div> Chih-Min Chao<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+        tcg_gen_and_i64(rs1, mask, rs1);<br>
+        tcg_gen_or_i64(cpu_fpr[a-&gt;rd], rs1, rs2);<br>
+<br>
+        tcg_temp_free_i64(mask);<br>
+        tcg_temp_free_i64(rs2);<br>
     }<br>
-    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
+    tcg_temp_free_i64(rs1);<br>
+<br>
     mark_fs_dirty(ctx);<br>
     return true;<br>
 }<br>
<br>
 static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)<br>
 {<br>
+    TCGv_i64 rs1, rs2;<br>
+<br>
     REQUIRE_FPU;<br>
     REQUIRE_EXT(ctx, RVF);<br>
+<br>
+    rs1 = tcg_temp_new_i64();<br>
+    gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
+<br>
     if (a-&gt;rs1 == a-&gt;rs2) { /* FABS */<br>
-        tcg_gen_andi_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1], ~INT32_MIN);<br>
+        tcg_gen_andi_i64(cpu_fpr[a-&gt;rd], rs1, ~MAKE_64BIT_MASK(31, 1));<br>
     } else {<br>
-        TCGv_i64 t0 = tcg_temp_new_i64();<br>
-        tcg_gen_andi_i64(t0, cpu_fpr[a-&gt;rs2], INT32_MIN);<br>
-        tcg_gen_xor_i64(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rs1], t0);<br>
-        tcg_temp_free_i64(t0);<br>
+        rs2 = tcg_temp_new_i64();<br>
+        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
+<br>
+        /*<br>
+         * Xor bit 31 in rs1 with that in rs2.<br>
+         * This formulation retains the nanboxing of rs1.<br>
+         */<br>
+        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));<br>
+        tcg_gen_xor_i64(cpu_fpr[a-&gt;rd], rs1, rs2);<br>
+<br>
+        tcg_temp_free_i64(rs2);<br>
     }<br>
-    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
+    tcg_temp_free_i64(rs1);<br>
+<br>
     mark_fs_dirty(ctx);<br>
     return true;<br>
 }<br>
diff --git a/target/riscv/translate.c b/target/riscv/translate.c<br>
index 12a746da97..bf35182776 100644<br>
--- a/target/riscv/translate.c<br>
+++ b/target/riscv/translate.c<br>
@@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br>
     tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));<br>
 }<br>
<br>
+/*<br>
+ * A narrow n-bit operation, where n &lt; FLEN, checks that input operands<br>
+ * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.<br>
+ * If so, the least-significant bits of the input are used, otherwise the<br>
+ * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).<br>
+ *<br>
+ * Here, the result is always nan-boxed, even the canonical nan.<br>
+ */<br>
+static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br>
+{<br>
+    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);<br>
+    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);<br>
+<br>
+    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);<br>
+    tcg_temp_free_i64(t_max);<br>
+    tcg_temp_free_i64(t_nan);<br>
+}<br>
+<br>
 static void generate_exception(DisasContext *ctx, int excp)<br>
 {<br>
     tcg_gen_movi_tl(cpu_pc, ctx-&gt;base.pc_next);<br>
-- <br>
2.25.1<br>
<br>
<br>
</blockquote></div></div>
LIU Zhiwei Aug. 8, 2020, 2:18 p.m. UTC | #4
On 2020/8/8 4:24, Chih-Min Chao wrote:
> On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson 

> <richard.henderson@linaro.org <mailto:richard.henderson@linaro.org>> 

> wrote:

>

>     If a 32-bit input is not properly nanboxed, then the input is replaced

>     with the default qnan.  The only inline expansion is for the

>     sign-changing

>     set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.

>

>     Signed-off-by: Richard Henderson <richard.henderson@linaro.org

>     <mailto:richard.henderson@linaro.org>>

>     ---

>      target/riscv/insn_trans/trans_rvf.inc.c | 71

>     +++++++++++++++++++------

>      target/riscv/translate.c                | 18 +++++++

>      2 files changed, 73 insertions(+), 16 deletions(-)

>

>     diff --git a/target/riscv/insn_trans/trans_rvf.inc.c

>     b/target/riscv/insn_trans/trans_rvf.inc.c

>     index 264d3139f1..f9a9e0643a 100644

>     --- a/target/riscv/insn_trans/trans_rvf.inc.c

>     +++ b/target/riscv/insn_trans/trans_rvf.inc.c

>     @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext *ctx,

>     arg_fsgnj_s *a)

>      {

>          REQUIRE_FPU;

>          REQUIRE_EXT(ctx, RVF);

>     +

>          if (a->rs1 == a->rs2) { /* FMOV */

>     -        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

>     +        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

>          } else { /* FSGNJ */

>     -        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],

>     cpu_fpr[a->rs1],

>     -                            0, 31);

>     +        TCGv_i64 rs1 = tcg_temp_new_i64();

>     +        TCGv_i64 rs2 = tcg_temp_new_i64();

>     +

>     +        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

>     +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

>     +

>     +        /* This formulation retains the nanboxing of rs2. */

>     +        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);

>     +        tcg_temp_free_i64(rs1);

>     +        tcg_temp_free_i64(rs2);

>          }

>     -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

>          mark_fs_dirty(ctx);

>          return true;

>      }

>

>      static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)

>      {

>     +    TCGv_i64 rs1, rs2, mask;

>     +

>          REQUIRE_FPU;

>          REQUIRE_EXT(ctx, RVF);

>     +

>     +    rs1 = tcg_temp_new_i64();

>     +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

>     +

>          if (a->rs1 == a->rs2) { /* FNEG */

>     -        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);

>     +        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31,

>     1));

>          } else {

>     -        TCGv_i64 t0 = tcg_temp_new_i64();

>     -        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);

>     -        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1],

>     0, 31);

>     -        tcg_temp_free_i64(t0);

>     +        rs2 = tcg_temp_new_i64();

>     +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

>     +

>     +        /*

>     +         * Replace bit 31 in rs1 with inverse in rs2.

>     +         * This formulation retains the nanboxing of rs1.

>     +         */

>     +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));

>     +        tcg_gen_andc_i64(rs2, mask, rs2);

>

>

> should be

>               tcg_gen_not_i64(rs2, rs2);         // forget to inverse rs2

>               tcg_gen_andc_i64(rs2, rs2, mask);  //mask needs to be 

> inverted to get only sign

Hi Chih-Min,

Thanks for pointing it out. It's a bug here. However, I think it should be

tcg_gen_andc_i64(rs2, rs2, mask);  // only get rs2 bit 31

tcg_gen_not_i64(rs2, rs2);  // inverse rs2


Best Regards,
Zhiwei
>

>  Chih-Min Chao

>

>     +        tcg_gen_and_i64(rs1, mask, rs1);

>     +        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);

>     +

>     +        tcg_temp_free_i64(mask);

>     +        tcg_temp_free_i64(rs2);

>          }

>     -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

>     +    tcg_temp_free_i64(rs1);

>     +

>          mark_fs_dirty(ctx);

>          return true;

>      }

>

>      static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)

>      {

>     +    TCGv_i64 rs1, rs2;

>     +

>          REQUIRE_FPU;

>          REQUIRE_EXT(ctx, RVF);

>     +

>     +    rs1 = tcg_temp_new_i64();

>     +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

>     +

>          if (a->rs1 == a->rs2) { /* FABS */

>     -        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1],

>     ~INT32_MIN);

>     +        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1,

>     ~MAKE_64BIT_MASK(31, 1));

>          } else {

>     -        TCGv_i64 t0 = tcg_temp_new_i64();

>     -        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);

>     -        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);

>     -        tcg_temp_free_i64(t0);

>     +        rs2 = tcg_temp_new_i64();

>     +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

>     +

>     +        /*

>     +         * Xor bit 31 in rs1 with that in rs2.

>     +         * This formulation retains the nanboxing of rs1.

>     +         */

>     +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));

>     +        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);

>     +

>     +        tcg_temp_free_i64(rs2);

>          }

>     -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

>     +    tcg_temp_free_i64(rs1);

>     +

>          mark_fs_dirty(ctx);

>          return true;

>      }

>     diff --git a/target/riscv/translate.c b/target/riscv/translate.c

>     index 12a746da97..bf35182776 100644

>     --- a/target/riscv/translate.c

>     +++ b/target/riscv/translate.c

>     @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out,

>     TCGv_i64 in)

>          tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));

>      }

>

>     +/*

>     + * A narrow n-bit operation, where n < FLEN, checks that input

>     operands

>     + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.

>     + * If so, the least-significant bits of the input are used,

>     otherwise the

>     + * input value is treated as an n-bit canonical NaN (v2.2 section

>     9.2).

>     + *

>     + * Here, the result is always nan-boxed, even the canonical nan.

>     + */

>     +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)

>     +{

>     +    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);

>     +    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);

>     +

>     +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);

>     +    tcg_temp_free_i64(t_max);

>     +    tcg_temp_free_i64(t_nan);

>     +}

>     +

>      static void generate_exception(DisasContext *ctx, int excp)

>      {

>          tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);

>     -- 

>     2.25.1

>

>
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body>
    <br>
    <br>
    <div class="moz-cite-prefix">On 2020/8/8 4:24, Chih-Min Chao wrote:<br>
    </div>
    <blockquote type="cite"
cite="mid:CAEiOBXVac0bFSZCrh_rhZbLVC7DGVwBe+D6YF90HQy1K-8wfYQ@mail.gmail.com">
      <meta http-equiv="content-type" content="text/html; charset=UTF-8">
      <div dir="ltr">
        <div dir="ltr">
          <div>
            <div dir="ltr" class="gmail_signature"
              data-smartmail="gmail_signature">
              <div dir="ltr">On Fri, Jul 24, 2020 at 8:28 AM Richard
                Henderson &lt;<a
                  href="mailto:richard.henderson@linaro.org"
                  moz-do-not-send="true">richard.henderson@linaro.org</a>&gt;
                wrote:<br>
              </div>
            </div>
          </div>
        </div>
        <div class="gmail_quote">
          <blockquote class="gmail_quote" style="margin:0px 0px 0px
            0.8ex;border-left:1px solid
            rgb(204,204,204);padding-left:1ex">If a 32-bit input is not
            properly nanboxed, then the input is replaced<br>
            with the default qnan.  The only inline expansion is for the
            sign-changing<br>
            set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.<br>
            <br>
            Signed-off-by: Richard Henderson &lt;<a

              href="mailto:richard.henderson@linaro.org" target="_blank"
              moz-do-not-send="true">richard.henderson@linaro.org</a>&gt;<br>
            ---<br>
             target/riscv/insn_trans/trans_rvf.inc.c | 71
            +++++++++++++++++++------<br>
             target/riscv/translate.c                | 18 +++++++<br>
             2 files changed, 73 insertions(+), 16 deletions(-)<br>
            <br>
            diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
            b/target/riscv/insn_trans/trans_rvf.inc.c<br>
            index 264d3139f1..f9a9e0643a 100644<br>
            --- a/target/riscv/insn_trans/trans_rvf.inc.c<br>
            +++ b/target/riscv/insn_trans/trans_rvf.inc.c<br>
            @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext
            *ctx, arg_fsgnj_s *a)<br>
             {<br>
                 REQUIRE_FPU;<br>
                 REQUIRE_EXT(ctx, RVF);<br>
            +<br>
                 if (a-&gt;rs1 == a-&gt;rs2) { /* FMOV */<br>
            -        tcg_gen_mov_i64(cpu_fpr[a-&gt;rd],
            cpu_fpr[a-&gt;rs1]);<br>
            +        gen_check_nanbox_s(cpu_fpr[a-&gt;rd],
            cpu_fpr[a-&gt;rs1]);<br>
                 } else { /* FSGNJ */<br>
            -        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd],
            cpu_fpr[a-&gt;rs2], cpu_fpr[a-&gt;rs1],<br>
            -                            0, 31);<br>
            +        TCGv_i64 rs1 = tcg_temp_new_i64();<br>
            +        TCGv_i64 rs2 = tcg_temp_new_i64();<br>
            +<br>
            +        gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
            +        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
            +<br>
            +        /* This formulation retains the nanboxing of rs2.
            */<br>
            +        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], rs2, rs1, 0,
            31);<br>
            +        tcg_temp_free_i64(rs1);<br>
            +        tcg_temp_free_i64(rs2);<br>
                 }<br>
            -    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
                 mark_fs_dirty(ctx);<br>
                 return true;<br>
             }<br>
            <br>
             static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s
            *a)<br>
             {<br>
            +    TCGv_i64 rs1, rs2, mask;<br>
            +<br>
                 REQUIRE_FPU;<br>
                 REQUIRE_EXT(ctx, RVF);<br>
            +<br>
            +    rs1 = tcg_temp_new_i64();<br>
            +    gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
            +<br>
                 if (a-&gt;rs1 == a-&gt;rs2) { /* FNEG */<br>
            -        tcg_gen_xori_i64(cpu_fpr[a-&gt;rd],
            cpu_fpr[a-&gt;rs1], INT32_MIN);<br>
            +        tcg_gen_xori_i64(cpu_fpr[a-&gt;rd], rs1,
            MAKE_64BIT_MASK(31, 1));<br>
                 } else {<br>
            -        TCGv_i64 t0 = tcg_temp_new_i64();<br>
            -        tcg_gen_not_i64(t0, cpu_fpr[a-&gt;rs2]);<br>
            -        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], t0,
            cpu_fpr[a-&gt;rs1], 0, 31);<br>
            -        tcg_temp_free_i64(t0);<br>
            +        rs2 = tcg_temp_new_i64();<br>
            +        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
            +<br>
            +        /*<br>
            +         * Replace bit 31 in rs1 with inverse in rs2.<br>
            +         * This formulation retains the nanboxing of rs1.<br>
            +         */<br>
            +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));<br>
            +        tcg_gen_andc_i64(rs2, mask, rs2);<br>
          </blockquote>
          <div><br>
          </div>
          <div>should be </div>
          <div>              tcg_gen_not_i64(rs2, rs2);         //
            forget to inverse rs2</div>
          <div>              tcg_gen_andc_i64(rs2, rs2, mask);  //mask
            needs to be inverted to get only sign</div>
        </div>
      </div>
    </blockquote>
    Hi Chih-Min,<br>
    <br>
    Thanks for pointing it out. It's a bug here. However, I think it
    should be <br>
    <br>
    <pre>tcg_gen_andc_i64(rs2, rs2, mask);  // only get rs2 bit 31</pre>
    <pre>tcg_gen_not_i64(rs2, rs2);  // inverse rs2</pre>
    <br>
    Best Regards,<br>
    Zhiwei<br>
    <blockquote type="cite"
cite="mid:CAEiOBXVac0bFSZCrh_rhZbLVC7DGVwBe+D6YF90HQy1K-8wfYQ@mail.gmail.com">
      <div dir="ltr">
        <div class="gmail_quote">
          <div><br>
          </div>
          <div> Chih-Min Chao<br>
          </div>
          <blockquote class="gmail_quote" style="margin:0px 0px 0px
            0.8ex;border-left:1px solid
            rgb(204,204,204);padding-left:1ex">
            +        tcg_gen_and_i64(rs1, mask, rs1);<br>
            +        tcg_gen_or_i64(cpu_fpr[a-&gt;rd], rs1, rs2);<br>
            +<br>
            +        tcg_temp_free_i64(mask);<br>
            +        tcg_temp_free_i64(rs2);<br>
                 }<br>
            -    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
            +    tcg_temp_free_i64(rs1);<br>
            +<br>
                 mark_fs_dirty(ctx);<br>
                 return true;<br>
             }<br>
            <br>
             static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s
            *a)<br>
             {<br>
            +    TCGv_i64 rs1, rs2;<br>
            +<br>
                 REQUIRE_FPU;<br>
                 REQUIRE_EXT(ctx, RVF);<br>
            +<br>
            +    rs1 = tcg_temp_new_i64();<br>
            +    gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
            +<br>
                 if (a-&gt;rs1 == a-&gt;rs2) { /* FABS */<br>
            -        tcg_gen_andi_i64(cpu_fpr[a-&gt;rd],
            cpu_fpr[a-&gt;rs1], ~INT32_MIN);<br>
            +        tcg_gen_andi_i64(cpu_fpr[a-&gt;rd], rs1,
            ~MAKE_64BIT_MASK(31, 1));<br>
                 } else {<br>
            -        TCGv_i64 t0 = tcg_temp_new_i64();<br>
            -        tcg_gen_andi_i64(t0, cpu_fpr[a-&gt;rs2],
            INT32_MIN);<br>
            -        tcg_gen_xor_i64(cpu_fpr[a-&gt;rd],
            cpu_fpr[a-&gt;rs1], t0);<br>
            -        tcg_temp_free_i64(t0);<br>
            +        rs2 = tcg_temp_new_i64();<br>
            +        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
            +<br>
            +        /*<br>
            +         * Xor bit 31 in rs1 with that in rs2.<br>
            +         * This formulation retains the nanboxing of rs1.<br>
            +         */<br>
            +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));<br>
            +        tcg_gen_xor_i64(cpu_fpr[a-&gt;rd], rs1, rs2);<br>
            +<br>
            +        tcg_temp_free_i64(rs2);<br>
                 }<br>
            -    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
            +    tcg_temp_free_i64(rs1);<br>
            +<br>
                 mark_fs_dirty(ctx);<br>
                 return true;<br>
             }<br>
            diff --git a/target/riscv/translate.c
            b/target/riscv/translate.c<br>
            index 12a746da97..bf35182776 100644<br>
            --- a/target/riscv/translate.c<br>
            +++ b/target/riscv/translate.c<br>
            @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out,
            TCGv_i64 in)<br>
                 tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));<br>
             }<br>
            <br>
            +/*<br>
            + * A narrow n-bit operation, where n &lt; FLEN, checks that
            input operands<br>
            + * are correctly Nan-boxed, i.e., all upper FLEN - n bits
            are 1.<br>
            + * If so, the least-significant bits of the input are used,
            otherwise the<br>
            + * input value is treated as an n-bit canonical NaN (v2.2
            section 9.2).<br>
            + *<br>
            + * Here, the result is always nan-boxed, even the canonical
            nan.<br>
            + */<br>
            +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br>
            +{<br>
            +    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);<br>
            +    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);<br>
            +<br>
            +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in,
            t_nan);<br>
            +    tcg_temp_free_i64(t_max);<br>
            +    tcg_temp_free_i64(t_nan);<br>
            +}<br>
            +<br>
             static void generate_exception(DisasContext *ctx, int excp)<br>
             {<br>
                 tcg_gen_movi_tl(cpu_pc, ctx-&gt;base.pc_next);<br>
            -- <br>
            2.25.1<br>
            <br>
            <br>
          </blockquote>
        </div>
      </div>
    </blockquote>
    <br>
  </body>
</html>
LIU Zhiwei Aug. 8, 2020, 11:06 p.m. UTC | #5
On 2020/8/8 22:18, LIU Zhiwei wrote:
>

>

> On 2020/8/8 4:24, Chih-Min Chao wrote:

>> On Fri, Jul 24, 2020 at 8:28 AM Richard Henderson 

>> <richard.henderson@linaro.org <mailto:richard.henderson@linaro.org>> 

>> wrote:

>>

>>     If a 32-bit input is not properly nanboxed, then the input is

>>     replaced

>>     with the default qnan.  The only inline expansion is for the

>>     sign-changing

>>     set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.

>>

>>     Signed-off-by: Richard Henderson <richard.henderson@linaro.org

>>     <mailto:richard.henderson@linaro.org>>

>>     ---

>>      target/riscv/insn_trans/trans_rvf.inc.c | 71

>>     +++++++++++++++++++------

>>      target/riscv/translate.c                | 18 +++++++

>>      2 files changed, 73 insertions(+), 16 deletions(-)

>>

>>     diff --git a/target/riscv/insn_trans/trans_rvf.inc.c

>>     b/target/riscv/insn_trans/trans_rvf.inc.c

>>     index 264d3139f1..f9a9e0643a 100644

>>     --- a/target/riscv/insn_trans/trans_rvf.inc.c

>>     +++ b/target/riscv/insn_trans/trans_rvf.inc.c

>>     @@ -161,47 +161,86 @@ static bool trans_fsgnj_s(DisasContext

>>     *ctx, arg_fsgnj_s *a)

>>      {

>>          REQUIRE_FPU;

>>          REQUIRE_EXT(ctx, RVF);

>>     +

>>          if (a->rs1 == a->rs2) { /* FMOV */

>>     -        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

>>     +        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);

>>          } else { /* FSGNJ */

>>     -        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2],

>>     cpu_fpr[a->rs1],

>>     -                            0, 31);

>>     +        TCGv_i64 rs1 = tcg_temp_new_i64();

>>     +        TCGv_i64 rs2 = tcg_temp_new_i64();

>>     +

>>     +        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

>>     +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

>>     +

>>     +        /* This formulation retains the nanboxing of rs2. */

>>     +        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);

>>     +        tcg_temp_free_i64(rs1);

>>     +        tcg_temp_free_i64(rs2);

>>          }

>>     -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

>>          mark_fs_dirty(ctx);

>>          return true;

>>      }

>>

>>      static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)

>>      {

>>     +    TCGv_i64 rs1, rs2, mask;

>>     +

>>          REQUIRE_FPU;

>>          REQUIRE_EXT(ctx, RVF);

>>     +

>>     +    rs1 = tcg_temp_new_i64();

>>     +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

>>     +

>>          if (a->rs1 == a->rs2) { /* FNEG */

>>     -        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1],

>>     INT32_MIN);

>>     +        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1,

>>     MAKE_64BIT_MASK(31, 1));

>>          } else {

>>     -        TCGv_i64 t0 = tcg_temp_new_i64();

>>     -        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);

>>     -        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1],

>>     0, 31);

>>     -        tcg_temp_free_i64(t0);

>>     +        rs2 = tcg_temp_new_i64();

>>     +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

>>     +

>>     +        /*

>>     +         * Replace bit 31 in rs1 with inverse in rs2.

>>     +         * This formulation retains the nanboxing of rs1.

>>     +         */

>>     +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));

>>     +        tcg_gen_andc_i64(rs2, mask, rs2);

>>

>>

>> should be

>>               tcg_gen_not_i64(rs2, rs2);         // forget to inverse rs2

>>               tcg_gen_andc_i64(rs2, rs2, mask);  //mask needs to be 

>> inverted to get only sign

> Hi Chih-Min,

>

> Thanks for pointing it out. It's a bug here. However, I think it 

> should be

>

> tcg_gen_andc_i64(rs2, rs2, mask);  // only get rs2 bit 31

> tcg_gen_not_i64(rs2, rs2);  // inverse rs2

>

Hi Chih-Min,

Sorry, your code is right.

Zhiwei
> Best Regards,

> Zhiwei

>>

>>  Chih-Min Chao

>>

>>     + tcg_gen_and_i64(rs1, mask, rs1);

>>     +        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);

>>     +

>>     +        tcg_temp_free_i64(mask);

>>     +        tcg_temp_free_i64(rs2);

>>          }

>>     -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

>>     +    tcg_temp_free_i64(rs1);

>>     +

>>          mark_fs_dirty(ctx);

>>          return true;

>>      }

>>

>>      static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)

>>      {

>>     +    TCGv_i64 rs1, rs2;

>>     +

>>          REQUIRE_FPU;

>>          REQUIRE_EXT(ctx, RVF);

>>     +

>>     +    rs1 = tcg_temp_new_i64();

>>     +    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);

>>     +

>>          if (a->rs1 == a->rs2) { /* FABS */

>>     -        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1],

>>     ~INT32_MIN);

>>     +        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1,

>>     ~MAKE_64BIT_MASK(31, 1));

>>          } else {

>>     -        TCGv_i64 t0 = tcg_temp_new_i64();

>>     -        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);

>>     -        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);

>>     -        tcg_temp_free_i64(t0);

>>     +        rs2 = tcg_temp_new_i64();

>>     +        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);

>>     +

>>     +        /*

>>     +         * Xor bit 31 in rs1 with that in rs2.

>>     +         * This formulation retains the nanboxing of rs1.

>>     +         */

>>     +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));

>>     +        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);

>>     +

>>     +        tcg_temp_free_i64(rs2);

>>          }

>>     -    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);

>>     +    tcg_temp_free_i64(rs1);

>>     +

>>          mark_fs_dirty(ctx);

>>          return true;

>>      }

>>     diff --git a/target/riscv/translate.c b/target/riscv/translate.c

>>     index 12a746da97..bf35182776 100644

>>     --- a/target/riscv/translate.c

>>     +++ b/target/riscv/translate.c

>>     @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64 out,

>>     TCGv_i64 in)

>>          tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));

>>      }

>>

>>     +/*

>>     + * A narrow n-bit operation, where n < FLEN, checks that input

>>     operands

>>     + * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.

>>     + * If so, the least-significant bits of the input are used,

>>     otherwise the

>>     + * input value is treated as an n-bit canonical NaN (v2.2

>>     section 9.2).

>>     + *

>>     + * Here, the result is always nan-boxed, even the canonical nan.

>>     + */

>>     +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)

>>     +{

>>     +    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);

>>     +    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);

>>     +

>>     +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);

>>     +    tcg_temp_free_i64(t_max);

>>     +    tcg_temp_free_i64(t_nan);

>>     +}

>>     +

>>      static void generate_exception(DisasContext *ctx, int excp)

>>      {

>>          tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);

>>     -- 

>>     2.25.1

>>

>>

>
<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  </head>
  <body>
    <br>
    <br>
    <div class="moz-cite-prefix">On 2020/8/8 22:18, LIU Zhiwei wrote:<br>
    </div>
    <blockquote type="cite"
      cite="mid:560581be-926e-c303-85a6-b15d4c187ad6@c-sky.com">
      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
      <br>
      <br>
      <div class="moz-cite-prefix">On 2020/8/8 4:24, Chih-Min Chao
        wrote:<br>
      </div>
      <blockquote type="cite"
cite="mid:CAEiOBXVac0bFSZCrh_rhZbLVC7DGVwBe+D6YF90HQy1K-8wfYQ@mail.gmail.com">
        <meta http-equiv="content-type" content="text/html;
          charset=UTF-8">
        <div dir="ltr">
          <div dir="ltr">
            <div>
              <div dir="ltr" class="gmail_signature"
                data-smartmail="gmail_signature">
                <div dir="ltr">On Fri, Jul 24, 2020 at 8:28 AM Richard
                  Henderson &lt;<a
                    href="mailto:richard.henderson@linaro.org"
                    moz-do-not-send="true">richard.henderson@linaro.org</a>&gt;
                  wrote:<br>
                </div>
              </div>
            </div>
          </div>
          <div class="gmail_quote">
            <blockquote class="gmail_quote" style="margin:0px 0px 0px
              0.8ex;border-left:1px solid
              rgb(204,204,204);padding-left:1ex">If a 32-bit input is
              not properly nanboxed, then the input is replaced<br>
              with the default qnan.  The only inline expansion is for
              the sign-changing<br>
              set of instructions: FSGNJ.S, FSGNJX.S, FSGNJN.S.<br>
              <br>
              Signed-off-by: Richard Henderson &lt;<a

                href="mailto:richard.henderson@linaro.org"
                target="_blank" moz-do-not-send="true">richard.henderson@linaro.org</a>&gt;<br>
              ---<br>
               target/riscv/insn_trans/trans_rvf.inc.c | 71
              +++++++++++++++++++------<br>
               target/riscv/translate.c                | 18 +++++++<br>
               2 files changed, 73 insertions(+), 16 deletions(-)<br>
              <br>
              diff --git a/target/riscv/insn_trans/trans_rvf.inc.c
              b/target/riscv/insn_trans/trans_rvf.inc.c<br>
              index 264d3139f1..f9a9e0643a 100644<br>
              --- a/target/riscv/insn_trans/trans_rvf.inc.c<br>
              +++ b/target/riscv/insn_trans/trans_rvf.inc.c<br>
              @@ -161,47 +161,86 @@ static bool
              trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)<br>
               {<br>
                   REQUIRE_FPU;<br>
                   REQUIRE_EXT(ctx, RVF);<br>
              +<br>
                   if (a-&gt;rs1 == a-&gt;rs2) { /* FMOV */<br>
              -        tcg_gen_mov_i64(cpu_fpr[a-&gt;rd],
              cpu_fpr[a-&gt;rs1]);<br>
              +        gen_check_nanbox_s(cpu_fpr[a-&gt;rd],
              cpu_fpr[a-&gt;rs1]);<br>
                   } else { /* FSGNJ */<br>
              -        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd],
              cpu_fpr[a-&gt;rs2], cpu_fpr[a-&gt;rs1],<br>
              -                            0, 31);<br>
              +        TCGv_i64 rs1 = tcg_temp_new_i64();<br>
              +        TCGv_i64 rs2 = tcg_temp_new_i64();<br>
              +<br>
              +        gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
              +        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
              +<br>
              +        /* This formulation retains the nanboxing of rs2.
              */<br>
              +        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], rs2, rs1,
              0, 31);<br>
              +        tcg_temp_free_i64(rs1);<br>
              +        tcg_temp_free_i64(rs2);<br>
                   }<br>
              -    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
                   mark_fs_dirty(ctx);<br>
                   return true;<br>
               }<br>
              <br>
               static bool trans_fsgnjn_s(DisasContext *ctx,
              arg_fsgnjn_s *a)<br>
               {<br>
              +    TCGv_i64 rs1, rs2, mask;<br>
              +<br>
                   REQUIRE_FPU;<br>
                   REQUIRE_EXT(ctx, RVF);<br>
              +<br>
              +    rs1 = tcg_temp_new_i64();<br>
              +    gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
              +<br>
                   if (a-&gt;rs1 == a-&gt;rs2) { /* FNEG */<br>
              -        tcg_gen_xori_i64(cpu_fpr[a-&gt;rd],
              cpu_fpr[a-&gt;rs1], INT32_MIN);<br>
              +        tcg_gen_xori_i64(cpu_fpr[a-&gt;rd], rs1,
              MAKE_64BIT_MASK(31, 1));<br>
                   } else {<br>
              -        TCGv_i64 t0 = tcg_temp_new_i64();<br>
              -        tcg_gen_not_i64(t0, cpu_fpr[a-&gt;rs2]);<br>
              -        tcg_gen_deposit_i64(cpu_fpr[a-&gt;rd], t0,
              cpu_fpr[a-&gt;rs1], 0, 31);<br>
              -        tcg_temp_free_i64(t0);<br>
              +        rs2 = tcg_temp_new_i64();<br>
              +        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
              +<br>
              +        /*<br>
              +         * Replace bit 31 in rs1 with inverse in rs2.<br>
              +         * This formulation retains the nanboxing of rs1.<br>
              +         */<br>
              +        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));<br>
              +        tcg_gen_andc_i64(rs2, mask, rs2);<br>
            </blockquote>
            <div><br>
            </div>
            <div>should be </div>
            <div>              tcg_gen_not_i64(rs2, rs2);         //
              forget to inverse rs2</div>
            <div>              tcg_gen_andc_i64(rs2, rs2, mask);  //mask
              needs to be inverted to get only sign</div>
          </div>
        </div>
      </blockquote>
      Hi Chih-Min,<br>
      <br>
      Thanks for pointing it out. It's a bug here. However, I think it
      should be <br>
      <br>
      <pre>tcg_gen_andc_i64(rs2, rs2, mask);  // only get rs2 bit 31</pre>
      <pre>tcg_gen_not_i64(rs2, rs2);  // inverse rs2</pre>
      <br>
    </blockquote>
    Hi Chih-Min,<br>
    <br>
    Sorry, your code is right. <br>
    <br>
    Zhiwei<br>
    <blockquote type="cite"
      cite="mid:560581be-926e-c303-85a6-b15d4c187ad6@c-sky.com"> Best
      Regards,<br>
      Zhiwei<br>
      <blockquote type="cite"
cite="mid:CAEiOBXVac0bFSZCrh_rhZbLVC7DGVwBe+D6YF90HQy1K-8wfYQ@mail.gmail.com">
        <div dir="ltr">
          <div class="gmail_quote">
            <div><br>
            </div>
            <div> Chih-Min Chao<br>
            </div>
            <blockquote class="gmail_quote" style="margin:0px 0px 0px
              0.8ex;border-left:1px solid
              rgb(204,204,204);padding-left:1ex"> +       
              tcg_gen_and_i64(rs1, mask, rs1);<br>
              +        tcg_gen_or_i64(cpu_fpr[a-&gt;rd], rs1, rs2);<br>
              +<br>
              +        tcg_temp_free_i64(mask);<br>
              +        tcg_temp_free_i64(rs2);<br>
                   }<br>
              -    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
              +    tcg_temp_free_i64(rs1);<br>
              +<br>
                   mark_fs_dirty(ctx);<br>
                   return true;<br>
               }<br>
              <br>
               static bool trans_fsgnjx_s(DisasContext *ctx,
              arg_fsgnjx_s *a)<br>
               {<br>
              +    TCGv_i64 rs1, rs2;<br>
              +<br>
                   REQUIRE_FPU;<br>
                   REQUIRE_EXT(ctx, RVF);<br>
              +<br>
              +    rs1 = tcg_temp_new_i64();<br>
              +    gen_check_nanbox_s(rs1, cpu_fpr[a-&gt;rs1]);<br>
              +<br>
                   if (a-&gt;rs1 == a-&gt;rs2) { /* FABS */<br>
              -        tcg_gen_andi_i64(cpu_fpr[a-&gt;rd],
              cpu_fpr[a-&gt;rs1], ~INT32_MIN);<br>
              +        tcg_gen_andi_i64(cpu_fpr[a-&gt;rd], rs1,
              ~MAKE_64BIT_MASK(31, 1));<br>
                   } else {<br>
              -        TCGv_i64 t0 = tcg_temp_new_i64();<br>
              -        tcg_gen_andi_i64(t0, cpu_fpr[a-&gt;rs2],
              INT32_MIN);<br>
              -        tcg_gen_xor_i64(cpu_fpr[a-&gt;rd],
              cpu_fpr[a-&gt;rs1], t0);<br>
              -        tcg_temp_free_i64(t0);<br>
              +        rs2 = tcg_temp_new_i64();<br>
              +        gen_check_nanbox_s(rs2, cpu_fpr[a-&gt;rs2]);<br>
              +<br>
              +        /*<br>
              +         * Xor bit 31 in rs1 with that in rs2.<br>
              +         * This formulation retains the nanboxing of rs1.<br>
              +         */<br>
              +        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31,
              1));<br>
              +        tcg_gen_xor_i64(cpu_fpr[a-&gt;rd], rs1, rs2);<br>
              +<br>
              +        tcg_temp_free_i64(rs2);<br>
                   }<br>
              -    gen_nanbox_s(cpu_fpr[a-&gt;rd], cpu_fpr[a-&gt;rd]);<br>
              +    tcg_temp_free_i64(rs1);<br>
              +<br>
                   mark_fs_dirty(ctx);<br>
                   return true;<br>
               }<br>
              diff --git a/target/riscv/translate.c
              b/target/riscv/translate.c<br>
              index 12a746da97..bf35182776 100644<br>
              --- a/target/riscv/translate.c<br>
              +++ b/target/riscv/translate.c<br>
              @@ -101,6 +101,24 @@ static void gen_nanbox_s(TCGv_i64
              out, TCGv_i64 in)<br>
                   tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));<br>
               }<br>
              <br>
              +/*<br>
              + * A narrow n-bit operation, where n &lt; FLEN, checks
              that input operands<br>
              + * are correctly Nan-boxed, i.e., all upper FLEN - n bits
              are 1.<br>
              + * If so, the least-significant bits of the input are
              used, otherwise the<br>
              + * input value is treated as an n-bit canonical NaN (v2.2
              section 9.2).<br>
              + *<br>
              + * Here, the result is always nan-boxed, even the
              canonical nan.<br>
              + */<br>
              +static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)<br>
              +{<br>
              +    TCGv_i64 t_max =
              tcg_const_i64(0xffffffff00000000ull);<br>
              +    TCGv_i64 t_nan =
              tcg_const_i64(0xffffffff7fc00000ull);<br>
              +<br>
              +    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in,
              t_nan);<br>
              +    tcg_temp_free_i64(t_max);<br>
              +    tcg_temp_free_i64(t_nan);<br>
              +}<br>
              +<br>
               static void generate_exception(DisasContext *ctx, int
              excp)<br>
               {<br>
                   tcg_gen_movi_tl(cpu_pc, ctx-&gt;base.pc_next);<br>
              -- <br>
              2.25.1<br>
              <br>
              <br>
            </blockquote>
          </div>
        </div>
      </blockquote>
      <br>
    </blockquote>
    <br>
  </body>
</html>
diff mbox series

Patch

diff --git a/target/riscv/insn_trans/trans_rvf.inc.c b/target/riscv/insn_trans/trans_rvf.inc.c
index 264d3139f1..f9a9e0643a 100644
--- a/target/riscv/insn_trans/trans_rvf.inc.c
+++ b/target/riscv/insn_trans/trans_rvf.inc.c
@@ -161,47 +161,86 @@  static bool trans_fsgnj_s(DisasContext *ctx, arg_fsgnj_s *a)
 {
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
+
     if (a->rs1 == a->rs2) { /* FMOV */
-        tcg_gen_mov_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
+        gen_check_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rs1]);
     } else { /* FSGNJ */
-        tcg_gen_deposit_i64(cpu_fpr[a->rd], cpu_fpr[a->rs2], cpu_fpr[a->rs1],
-                            0, 31);
+        TCGv_i64 rs1 = tcg_temp_new_i64();
+        TCGv_i64 rs2 = tcg_temp_new_i64();
+
+        gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+        /* This formulation retains the nanboxing of rs2. */
+        tcg_gen_deposit_i64(cpu_fpr[a->rd], rs2, rs1, 0, 31);
+        tcg_temp_free_i64(rs1);
+        tcg_temp_free_i64(rs2);
     }
-    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
     mark_fs_dirty(ctx);
     return true;
 }
 
 static bool trans_fsgnjn_s(DisasContext *ctx, arg_fsgnjn_s *a)
 {
+    TCGv_i64 rs1, rs2, mask;
+
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
+
+    rs1 = tcg_temp_new_i64();
+    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+
     if (a->rs1 == a->rs2) { /* FNEG */
-        tcg_gen_xori_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], INT32_MIN);
+        tcg_gen_xori_i64(cpu_fpr[a->rd], rs1, MAKE_64BIT_MASK(31, 1));
     } else {
-        TCGv_i64 t0 = tcg_temp_new_i64();
-        tcg_gen_not_i64(t0, cpu_fpr[a->rs2]);
-        tcg_gen_deposit_i64(cpu_fpr[a->rd], t0, cpu_fpr[a->rs1], 0, 31);
-        tcg_temp_free_i64(t0);
+        rs2 = tcg_temp_new_i64();
+        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+        /*
+         * Replace bit 31 in rs1 with inverse in rs2.
+         * This formulation retains the nanboxing of rs1.
+         */
+        mask = tcg_const_i64(~MAKE_64BIT_MASK(31, 1));
+        tcg_gen_andc_i64(rs2, mask, rs2);
+        tcg_gen_and_i64(rs1, mask, rs1);
+        tcg_gen_or_i64(cpu_fpr[a->rd], rs1, rs2);
+
+        tcg_temp_free_i64(mask);
+        tcg_temp_free_i64(rs2);
     }
-    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+    tcg_temp_free_i64(rs1);
+
     mark_fs_dirty(ctx);
     return true;
 }
 
 static bool trans_fsgnjx_s(DisasContext *ctx, arg_fsgnjx_s *a)
 {
+    TCGv_i64 rs1, rs2;
+
     REQUIRE_FPU;
     REQUIRE_EXT(ctx, RVF);
+
+    rs1 = tcg_temp_new_i64();
+    gen_check_nanbox_s(rs1, cpu_fpr[a->rs1]);
+
     if (a->rs1 == a->rs2) { /* FABS */
-        tcg_gen_andi_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], ~INT32_MIN);
+        tcg_gen_andi_i64(cpu_fpr[a->rd], rs1, ~MAKE_64BIT_MASK(31, 1));
     } else {
-        TCGv_i64 t0 = tcg_temp_new_i64();
-        tcg_gen_andi_i64(t0, cpu_fpr[a->rs2], INT32_MIN);
-        tcg_gen_xor_i64(cpu_fpr[a->rd], cpu_fpr[a->rs1], t0);
-        tcg_temp_free_i64(t0);
+        rs2 = tcg_temp_new_i64();
+        gen_check_nanbox_s(rs2, cpu_fpr[a->rs2]);
+
+        /*
+         * Xor bit 31 in rs1 with that in rs2.
+         * This formulation retains the nanboxing of rs1.
+         */
+        tcg_gen_andi_i64(rs2, rs2, MAKE_64BIT_MASK(31, 1));
+        tcg_gen_xor_i64(cpu_fpr[a->rd], rs1, rs2);
+
+        tcg_temp_free_i64(rs2);
     }
-    gen_nanbox_s(cpu_fpr[a->rd], cpu_fpr[a->rd]);
+    tcg_temp_free_i64(rs1);
+
     mark_fs_dirty(ctx);
     return true;
 }
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 12a746da97..bf35182776 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -101,6 +101,24 @@  static void gen_nanbox_s(TCGv_i64 out, TCGv_i64 in)
     tcg_gen_ori_i64(out, in, MAKE_64BIT_MASK(32, 32));
 }
 
+/*
+ * A narrow n-bit operation, where n < FLEN, checks that input operands
+ * are correctly Nan-boxed, i.e., all upper FLEN - n bits are 1.
+ * If so, the least-significant bits of the input are used, otherwise the
+ * input value is treated as an n-bit canonical NaN (v2.2 section 9.2).
+ *
+ * Here, the result is always nan-boxed, even the canonical nan.
+ */
+static void gen_check_nanbox_s(TCGv_i64 out, TCGv_i64 in)
+{
+    TCGv_i64 t_max = tcg_const_i64(0xffffffff00000000ull);
+    TCGv_i64 t_nan = tcg_const_i64(0xffffffff7fc00000ull);
+
+    tcg_gen_movcond_i64(TCG_COND_GEU, out, in, t_max, in, t_nan);
+    tcg_temp_free_i64(t_max);
+    tcg_temp_free_i64(t_nan);
+}
+
 static void generate_exception(DisasContext *ctx, int excp)
 {
     tcg_gen_movi_tl(cpu_pc, ctx->base.pc_next);