[v2,24/25] target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate)

Message ID	1394822294-14837-25-git-send-email-peter.maydell@linaro.org
State	Accepted
Headers	show Return-Path: <patchwork-forward+bncBC6Z756YVMIBBI4ZRWMQKGQEXQTONYQ@linaro.org> Received-SPF: neutral (google.com: 209.85.220.178 is neither permitted nor denied by best guess record for domain of patch+caf_=patchwork-forward=linaro.org@linaro.org) client-ip=209.85.220.178; Received-SPF: pass (google.com: best guess record for domain of pm215@archaic.org.uk designates 2001:8b0:1d0::1 as permitted sender) client-ip=2001:8b0:1d0::1; From: Peter Maydell <peter.maydell@linaro.org> To: qemu-devel@nongnu.org Cc: patches@linaro.org, Alexander Graf <agraf@suse.de>, Michael Matz <matz@suse.de>, Dirk Mueller <dmueller@suse.de>, Laurent Desnogues <laurent.desnogues@gmail.com>, kvmarm@lists.cs.columbia.edu, Richard Henderson <rth@twiddle.net>, =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>, Christoffer Dall <christoffer.dall@linaro.org>, Will Newton <will.newton@linaro.org>, Peter Crosthwaite <peter.crosthwaite@xilinx.com> Subject: [PATCH v2 24/25] target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate) Date: Fri, 14 Mar 2014 18:38:13 +0000 Message-Id: <1394822294-14837-25-git-send-email-peter.maydell@linaro.org> In-Reply-To: <1394822294-14837-1-git-send-email-peter.maydell@linaro.org> References: <1394822294-14837-1-git-send-email-peter.maydell@linaro.org> MIME-Version: 1.0 Precedence: list Mailing-list: list patchwork-forward@linaro.org; contact patchwork-forward+owners@linaro.org Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit

diff --git a/target-arm/helper.c b/target-arm/helper.c index 9059dea..64d982a 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -4720,12 +4720,12 @@ float64 HELPER(recpe_f64)(float64 input, void *fpstp) /* The algorithm that must be used to calculate the estimate * is specified by the ARM ARM. */ -static float64 recip_sqrt_estimate(float64 a, CPUARMState *env) +static float64 recip_sqrt_estimate(float64 a, float_status *real_fp_status) { /* These calculations mustn't set any fp exception flags, * so we use a local copy of the fp_status. */ - float_status dummy_status = env->vfp.standard_fp_status; + float_status dummy_status = *real_fp_status; float_status *s = &dummy_status; float64 q; int64_t q_int; @@ -4772,49 +4772,64 @@ static float64 recip_sqrt_estimate(float64 a, CPUARMState *env) return float64_div(int64_to_float64(q_int, s), float64_256, s); } -float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env) +float32 HELPER(rsqrte_f32)(float32 input, void *fpstp) { - float_status *s = &env->vfp.standard_fp_status; + float_status *s = fpstp; + float32 f32 = float32_squash_input_denormal(input, s); + uint32_t val = float32_val(f32); + uint32_t f32_sbit = 0x80000000 & val; + int32_t f32_exp = extract32(val, 23, 8); + uint32_t f32_frac = extract32(val, 0, 23); + uint64_t f64_frac; + uint64_t val64; int result_exp; float64 f64; - uint32_t val; - uint64_t val64; - val = float32_val(a); - - if (float32_is_any_nan(a)) { - if (float32_is_signaling_nan(a)) { + if (float32_is_any_nan(f32)) { + float32 nan = f32; + if (float32_is_signaling_nan(f32)) { float_raise(float_flag_invalid, s); + nan = float32_maybe_silence_nan(f32); } - return float32_default_nan; - } else if (float32_is_zero_or_denormal(a)) { - if (!float32_is_zero(a)) { - float_raise(float_flag_input_denormal, s); + if (s->default_nan_mode) { + nan = float32_default_nan; } + return nan; + } else if (float32_is_zero(f32)) { float_raise(float_flag_divbyzero, s); - return float32_set_sign(float32_infinity, float32_is_neg(a)); - } else if (float32_is_neg(a)) { + return float32_set_sign(float32_infinity, float32_is_neg(f32)); + } else if (float32_is_neg(f32)) { float_raise(float_flag_invalid, s); return float32_default_nan; - } else if (float32_is_infinity(a)) { + } else if (float32_is_infinity(f32)) { return float32_zero; } - /* Normalize to a double-precision value between 0.25 and 1.0, + /* Scale and normalize to a double-precision value between 0.25 and 1.0, * preserving the parity of the exponent. */ - if ((val & 0x800000) == 0) { - f64 = make_float64(((uint64_t)(val & 0x80000000) << 32) + + f64_frac = ((uint64_t) f32_frac) << 29; + if (f32_exp == 0) { + while (extract64(f64_frac, 51, 1) == 0) { + f64_frac = f64_frac << 1; + f32_exp = f32_exp-1; + } + f64_frac = extract64(f64_frac, 0, 51) << 1; + } + + if (extract64(f32_exp, 0, 1) == 0) { + f64 = make_float64(((uint64_t) f32_sbit) << 32 | (0x3feULL << 52) - | ((uint64_t)(val & 0x7fffff) << 29)); + | f64_frac); } else { - f64 = make_float64(((uint64_t)(val & 0x80000000) << 32) + f64 = make_float64(((uint64_t) f32_sbit) << 32 | (0x3fdULL << 52) - | ((uint64_t)(val & 0x7fffff) << 29)); + | f64_frac); } - result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2; + result_exp = (380 - f32_exp) / 2; - f64 = recip_sqrt_estimate(f64, env); + f64 = recip_sqrt_estimate(f64, s); val64 = float64_val(f64); @@ -4823,6 +4838,69 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUARMState *env) return make_float32(val); } +float64 HELPER(rsqrte_f64)(float64 input, void *fpstp) +{ + float_status *s = fpstp; + float64 f64 = float64_squash_input_denormal(input, s); + uint64_t val = float64_val(f64); + uint64_t f64_sbit = 0x8000000000000000ULL & val; + int64_t f64_exp = extract64(val, 52, 11); + uint64_t f64_frac = extract64(val, 0, 52); + int64_t result_exp; + uint64_t result_frac; + + if (float64_is_any_nan(f64)) { + float64 nan = f64; + if (float64_is_signaling_nan(f64)) { + float_raise(float_flag_invalid, s); + nan = float64_maybe_silence_nan(f64); + } + if (s->default_nan_mode) { + nan = float64_default_nan; + } + return nan; + } else if (float64_is_zero(f64)) { + float_raise(float_flag_divbyzero, s); + return float64_set_sign(float64_infinity, float64_is_neg(f64)); + } else if (float64_is_neg(f64)) { + float_raise(float_flag_invalid, s); + return float64_default_nan; + } else if (float64_is_infinity(f64)) { + return float64_zero; + } + + /* Scale and normalize to a double-precision value between 0.25 and 1.0, + * preserving the parity of the exponent. */ + + if (f64_exp == 0) { + while (extract64(f64_frac, 51, 1) == 0) { + f64_frac = f64_frac << 1; + f64_exp = f64_exp - 1; + } + f64_frac = extract64(f64_frac, 0, 51) << 1; + } + + if (extract64(f64_exp, 0, 1) == 0) { + f64 = make_float64(f64_sbit + | (0x3feULL << 52) + | f64_frac); + } else { + f64 = make_float64(f64_sbit + | (0x3fdULL << 52) + | f64_frac); + } + + result_exp = (3068 - f64_exp) / 2; + + f64 = recip_sqrt_estimate(f64, s); + + result_frac = extract64(float64_val(f64), 0, 52); + + return make_float64(f64_sbit | + ((result_exp & 0x7ff) << 52) | + result_frac); +} + uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp) { float_status *s = fpstp; @@ -4840,8 +4918,9 @@ uint32_t HELPER(recpe_u32)(uint32_t a, void *fpstp) return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff); } -uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env) +uint32_t HELPER(rsqrte_u32)(uint32_t a, void *fpstp) { + float_status *fpst = fpstp; float64 f64; if ((a & 0xc0000000) == 0) { @@ -4856,7 +4935,7 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUARMState *env) | ((uint64_t)(a & 0x3fffffff) << 22)); } - f64 = recip_sqrt_estimate(f64, env); + f64 = recip_sqrt_estimate(f64, fpst); return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff); } diff --git a/target-arm/helper.h b/target-arm/helper.h index f96a824..a3d6f32 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -169,9 +169,10 @@ DEF_HELPER_3(recps_f32, f32, f32, f32, env) DEF_HELPER_3(rsqrts_f32, f32, f32, f32, env) DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, ptr) -DEF_HELPER_2(rsqrte_f32, f32, f32, env) +DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, ptr) +DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, ptr) DEF_HELPER_2(recpe_u32, i32, i32, ptr) -DEF_HELPER_2(rsqrte_u32, i32, i32, env) +DEF_HELPER_FLAGS_2(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32, ptr) DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32) DEF_HELPER_3(shl_cc, i32, env, i32, i32) diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 235f880..befffac 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -7146,6 +7146,9 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode, case 0x3f: /* FRECPX */ gen_helper_frecpx_f64(tcg_res, tcg_op, fpst); break; + case 0x7d: /* FRSQRTE */ + gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst); + break; default: g_assert_not_reached(); } @@ -7181,6 +7184,9 @@ static void handle_2misc_reciprocal(DisasContext *s, int opcode, case 0x3f: /* FRECPX */ gen_helper_frecpx_f32(tcg_res, tcg_op, fpst); break; + case 0x7d: /* FRSQRTE */ + gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst); + break; default: g_assert_not_reached(); } @@ -7378,6 +7384,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } case 0x3d: /* FRECPE */ case 0x3f: /* FRECPX */ + case 0x7d: /* FRSQRTE */ handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd); return; case 0x1a: /* FCVTNS */ @@ -7404,9 +7411,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd); return; - case 0x7d: /* FRSQRTE */ - unsupported_encoding(s, insn); - return; default: unallocated_encoding(s); return; @@ -9255,6 +9259,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } /* fall through */ case 0x3d: /* FRECPE */ + case 0x7d: /* FRSQRTE */ + if (size == 3 && !is_q) { + unallocated_encoding(s); + return; + } handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd); return; case 0x56: /* FCVTXN, FCVTXN2 */ @@ -9297,9 +9306,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) } break; case 0x7c: /* URSQRTE */ - case 0x7d: /* FRSQRTE */ - unsupported_encoding(s, insn); - return; + if (size == 3) { + unallocated_encoding(s); + return; + } + need_fpstatus = true; + break; default: unallocated_encoding(s); return; @@ -9432,6 +9444,9 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) case 0x59: /* FRINTX */ gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus); break; + case 0x7c: /* URSQRTE */ + gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus); + break; default: g_assert_not_reached(); } diff --git a/target-arm/translate.c b/target-arm/translate.c index 3771953..56e3b4b 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -6689,8 +6689,12 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins break; } case NEON_2RM_VRSQRTE: - gen_helper_rsqrte_u32(tmp, tmp, cpu_env); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_rsqrte_u32(tmp, tmp, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case NEON_2RM_VRECPE_F: { TCGv_ptr fpstatus = get_fpstatus_ptr(1); @@ -6699,8 +6703,12 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins break; } case NEON_2RM_VRSQRTE_F: - gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env); + { + TCGv_ptr fpstatus = get_fpstatus_ptr(1); + gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, fpstatus); + tcg_temp_free_ptr(fpstatus); break; + } case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ gen_vfp_sito(0, 1); break;

[v2,24/25] target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate)

Commit Message

Patch