@@ -182,6 +182,11 @@ DEF_HELPER_FLAGS_4(mve_vcvt_rm_uh, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vcvt_rm_ss, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(mve_vcvt_rm_us, TCG_CALL_NO_WG, void, env, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(mve_vcvtb_sh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtt_sh, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtb_hs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+DEF_HELPER_FLAGS_3(mve_vcvtt_hs, TCG_CALL_NO_WG, void, env, ptr, ptr)
+
DEF_HELPER_FLAGS_3(mve_vmovnbb, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vmovnbh, TCG_CALL_NO_WG, void, env, ptr, ptr)
DEF_HELPER_FLAGS_3(mve_vmovntb, TCG_CALL_NO_WG, void, env, ptr, ptr)
@@ -221,6 +221,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
# The VSHLL T2 encoding is not a @2op pattern, but is here because it
# overlaps what would be size=0b11 VMULH/VRMULH
{
+ VCVTB_SH 111 0 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz
+
VMAXNMA 111 0 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=2
VSHLL_BS 111 0 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
@@ -235,6 +237,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
}
{
+ VCVTB_HS 111 1 1110 0 . 11 1111 ... 0 1110 0 0 . 0 ... 1 @1op_nosz
+
VMAXNMA 111 1 1110 0 . 11 1111 ... 0 1110 1 0 . 0 ... 1 @vmaxnma size=1
VSHLL_BU 111 1 1110 0 . 11 .. 01 ... 0 1110 0 0 . 0 ... 1 @2_shll_esize_b
@@ -247,6 +251,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
}
{
+ VCVTT_SH 111 0 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
+
VMINNMA 111 0 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=2
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TS 111 0 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
@@ -260,6 +266,8 @@ VMUL 1110 1111 0 . .. ... 0 ... 0 1001 . 1 . 1 ... 0 @2op
}
{
+ VCVTT_HS 111 1 1110 0 . 11 1111 ... 1 1110 0 0 . 0 ... 1 @1op_nosz
+
VMINNMA 111 1 1110 0 . 11 1111 ... 1 1110 1 0 . 0 ... 1 @vmaxnma size=1
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_b
VSHLL_TU 111 1 1110 0 . 11 .. 01 ... 1 1110 0 0 . 0 ... 1 @2_shll_esize_h
@@ -3337,3 +3337,74 @@ DO_VCVT_RMODE(vcvt_rm_sh, 2, uint16_t, helper_vfp_toshh)
DO_VCVT_RMODE(vcvt_rm_uh, 2, uint16_t, helper_vfp_touhh)
DO_VCVT_RMODE(vcvt_rm_ss, 4, uint32_t, helper_vfp_tosls)
DO_VCVT_RMODE(vcvt_rm_us, 4, uint32_t, helper_vfp_touls)
+
+/*
+ * VCVT between halfprec and singleprec. As usual for halfprec
+ * conversions, FZ16 is ignored and AHP is observed.
+ */
+#define DO_VCVT_SH(OP, TOP) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vm) \
+ { \
+ uint16_t *d = vd; \
+ uint32_t *m = vm; \
+ uint16_t r; \
+ uint16_t mask = mve_element_mask(env); \
+ bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ float_status *base_fpst = &env->vfp.standard_fp_status; \
+ bool old_fz = get_flush_to_zero(base_fpst); \
+ set_flush_to_zero(false, base_fpst); \
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
+ if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) { \
+ continue; \
+ } \
+ fpst = base_fpst; \
+ if (!(mask & 1)) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = float32_to_float16(m[H4(e)], ieee, fpst); \
+ mergemask(&d[H2(e * 2 + TOP)], r, mask >> (TOP * 2)); \
+ } \
+ set_flush_to_zero(old_fz, base_fpst); \
+ mve_advance_vpt(env); \
+ }
+
+#define DO_VCVT_HS(OP, TOP) \
+ void HELPER(glue(mve_, OP))(CPUARMState *env, void *vd, void *vm) \
+ { \
+ uint32_t *d = vd; \
+ uint16_t *m = vm; \
+ uint32_t r; \
+ uint16_t mask = mve_element_mask(env); \
+ bool ieee = !(env->vfp.xregs[ARM_VFP_FPSCR] & FPCR_AHP); \
+ unsigned e; \
+ float_status *fpst; \
+ float_status scratch_fpst; \
+ float_status *base_fpst = &env->vfp.standard_fp_status; \
+ bool old_fiz = get_flush_inputs_to_zero(base_fpst); \
+ set_flush_inputs_to_zero(false, base_fpst); \
+ for (e = 0; e < 16 / 4; e++, mask >>= 4) { \
+ if ((mask & MAKE_64BIT_MASK(0, 4)) == 0) { \
+ continue; \
+ } \
+ fpst = base_fpst; \
+ if (!(mask & (1 << (TOP * 2)))) { \
+ /* We need the result but without updating flags */ \
+ scratch_fpst = *fpst; \
+ fpst = &scratch_fpst; \
+ } \
+ r = float16_to_float32(m[H2(e * 2 + TOP)], ieee, fpst); \
+ mergemask(&d[H4(e)], r, mask); \
+ } \
+ set_flush_inputs_to_zero(old_fiz, base_fpst); \
+ mve_advance_vpt(env); \
+ }
+
+DO_VCVT_SH(vcvtb_sh, 0)
+DO_VCVT_SH(vcvtt_sh, 1)
+DO_VCVT_HS(vcvtb_hs, 0)
+DO_VCVT_HS(vcvtt_hs, 1)
@@ -627,6 +627,20 @@ DO_VCVT_RMODE(VCVTPU, FPROUNDING_POSINF, true)
DO_VCVT_RMODE(VCVTMS, FPROUNDING_NEGINF, false)
DO_VCVT_RMODE(VCVTMU, FPROUNDING_NEGINF, true)
+#define DO_VCVT_SH(INSN, FN) \
+ static bool trans_##INSN(DisasContext *s, arg_1op *a) \
+ { \
+ if (!dc_isar_feature(aa32_mve_fp, s)) { \
+ return false; \
+ } \
+ return do_1op(s, a, gen_helper_mve_##FN); \
+ } \
+
+DO_VCVT_SH(VCVTB_SH, vcvtb_sh)
+DO_VCVT_SH(VCVTT_SH, vcvtt_sh)
+DO_VCVT_SH(VCVTB_HS, vcvtb_hs)
+DO_VCVT_SH(VCVTT_HS, vcvtt_hs)
+
/* Narrowing moves: only size 0 and 1 are valid */
#define DO_VMOVN(INSN, FN) \
static bool trans_##INSN(DisasContext *s, arg_1op *a) \
Implement the MVE VCVT instruction which converts between single and half precision floating point. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> --- target/arm/helper-mve.h | 5 +++ target/arm/mve.decode | 8 +++++ target/arm/mve_helper.c | 71 ++++++++++++++++++++++++++++++++++++++ target/arm/translate-mve.c | 14 ++++++++ 4 files changed, 98 insertions(+) -- 2.20.1