[v4,18/22] fpu/softfloat: re-factor int/uint to float

Message ID	20180206164815.10084-19-alex.bennee@linaro.org
State	Superseded
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of qemu-devel-bounces+patch=linaro.org@nongnu.org designates 2001:4830:134:3::11 as permitted sender) client-ip=2001:4830:134:3::11; From: =?utf-8?q?Alex_Benn=C3=A9e?= <alex.bennee@linaro.org> To: richard.henderson@linaro.org, peter.maydell@linaro.org, laurent@vivier.eu, bharata@linux.vnet.ibm.com, andrew@andrewdutcher.com Date: Tue, 6 Feb 2018 16:48:11 +0000 Message-Id: <20180206164815.10084-19-alex.bennee@linaro.org> In-Reply-To: <20180206164815.10084-1-alex.bennee@linaro.org> References: <20180206164815.10084-1-alex.bennee@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Subject: [Qemu-devel] [PATCH v4 18/22] fpu/softfloat: re-factor int/uint to float Precedence: list Cc: =?utf-8?q?Alex_Benn=C3=A9e?= <alex.bennee@linaro.org>, qemu-devel@nongnu.org, Aurelien Jarno <aurelien@aurel32.net> Errors-To: qemu-devel-bounces+patch=linaro.org@nongnu.org Sender: "Qemu-devel" <qemu-devel-bounces+patch=linaro.org@nongnu.org>
Series	re-factor softfloat and add fp16 functions \| expand [v4,00/22] re-factor softfloat and add fp16 functions [v4,01/22] fpu/softfloat: implement float16_squash_input_denormal [v4,02/22] include/fpu/softfloat: remove USE_SOFTFLOAT_STRUCT_TYPES [v4,03/22] fpu/softfloat-types: new header to prevent excessive re-builds [v4,04/22] target/*/cpu.h: remove softfloat.h [v4,05/22] include/fpu/softfloat: implement float16_abs helper [v4,06/22] include/fpu/softfloat: implement float16_chs helper [v4,07/22] include/fpu/softfloat: implement float16_set_sign helper [v4,08/22] include/fpu/softfloat: add some float16 constants [v4,09/22] fpu/softfloat: improve comments on ARM NaN propagation [v4,10/22] fpu/softfloat: move the extract functions to the top of the file [v4,11/22] fpu/softfloat: define decompose structures [v4,12/22] fpu/softfloat: re-factor add/sub [v4,13/22] fpu/softfloat: re-factor mul [v4,14/22] fpu/softfloat: re-factor div [v4,15/22] fpu/softfloat: re-factor muladd [v4,16/22] fpu/softfloat: re-factor round_to_int [v4,17/22] fpu/softfloat: re-factor float to int/uint [v4,18/22] fpu/softfloat: re-factor int/uint to float [v4,19/22] fpu/softfloat: re-factor scalbn [v4,20/22] fpu/softfloat: re-factor minmax [v4,21/22] fpu/softfloat: re-factor compare [v4,22/22] fpu/softfloat: re-factor sqrt

diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 216d60df6e..9f9f101d35 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1501,6 +1501,169 @@ FLOAT_TO_UINT(64, 64) #undef FLOAT_TO_UINT +/* + * Integer to float conversions + * + * Returns the result of converting the two's complement integer `a' + * to the floating-point format. The conversion is performed according + * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. + */ + +static FloatParts int_to_float(int64_t a, float_status *status) +{ + FloatParts r; + if (a == 0) { + r.cls = float_class_zero; + r.sign = false; + } else if (a == (1ULL << 63)) { + r.cls = float_class_normal; + r.sign = true; + r.frac = DECOMPOSED_IMPLICIT_BIT; + r.exp = 63; + } else { + uint64_t f; + if (a < 0) { + f = -a; + r.sign = true; + } else { + f = a; + r.sign = false; + } + int shift = clz64(f) - 1; + r.cls = float_class_normal; + r.exp = (DECOMPOSED_BINARY_POINT - shift); + r.frac = f << shift; + } + + return r; +} + +float16 int64_to_float16(int64_t a, float_status *status) +{ + FloatParts pa = int_to_float(a, status); + return float16_round_pack_canonical(pa, status); +} + +float16 int32_to_float16(int32_t a, float_status *status) +{ + return int64_to_float16(a, status); +} + +float16 int16_to_float16(int16_t a, float_status *status) +{ + return int64_to_float16(a, status); +} + +float32 int64_to_float32(int64_t a, float_status *status) +{ + FloatParts pa = int_to_float(a, status); + return float32_round_pack_canonical(pa, status); +} + +float32 int32_to_float32(int32_t a, float_status *status) +{ + return int64_to_float32(a, status); +} + +float32 int16_to_float32(int16_t a, float_status *status) +{ + return int64_to_float32(a, status); +} + +float64 int64_to_float64(int64_t a, float_status *status) +{ + FloatParts pa = int_to_float(a, status); + return float64_round_pack_canonical(pa, status); +} + +float64 int32_to_float64(int32_t a, float_status *status) +{ + return int64_to_float64(a, status); +} + +float64 int16_to_float64(int16_t a, float_status *status) +{ + return int64_to_float64(a, status); +} + + +/* + * Unsigned Integer to float conversions + * + * Returns the result of converting the unsigned integer `a' to the + * floating-point format. The conversion is performed according to the + * IEC/IEEE Standard for Binary Floating-Point Arithmetic. + */ + +static FloatParts uint_to_float(uint64_t a, float_status *status) +{ + FloatParts r = { .sign = false}; + + if (a == 0) { + r.cls = float_class_zero; + } else { + int spare_bits = clz64(a) - 1; + r.cls = float_class_normal; + r.exp = DECOMPOSED_BINARY_POINT - spare_bits; + if (spare_bits < 0) { + shift64RightJamming(a, -spare_bits, &a); + r.frac = a; + } else { + r.frac = a << spare_bits; + } + } + + return r; +} + +float16 uint64_to_float16(uint64_t a, float_status *status) +{ + FloatParts pa = uint_to_float(a, status); + return float16_round_pack_canonical(pa, status); +} + +float16 uint32_to_float16(uint32_t a, float_status *status) +{ + return uint64_to_float16(a, status); +} + +float16 uint16_to_float16(uint16_t a, float_status *status) +{ + return uint64_to_float16(a, status); +} + +float32 uint64_to_float32(uint64_t a, float_status *status) +{ + FloatParts pa = uint_to_float(a, status); + return float32_round_pack_canonical(pa, status); +} + +float32 uint32_to_float32(uint32_t a, float_status *status) +{ + return uint64_to_float32(a, status); +} + +float32 uint16_to_float32(uint16_t a, float_status *status) +{ + return uint64_to_float32(a, status); +} + +float64 uint64_to_float64(uint64_t a, float_status *status) +{ + FloatParts pa = uint_to_float(a, status); + return float64_round_pack_canonical(pa, status); +} + +float64 uint32_to_float64(uint32_t a, float_status *status) +{ + return uint64_to_float64(a, status); +} + +float64 uint16_to_float64(uint16_t a, float_status *status) +{ + return uint64_to_float64(a, status); +} + /*---------------------------------------------------------------------------- | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 | and 7, and returns the properly rounded 32-bit integer corresponding to the @@ -2592,43 +2755,6 @@ static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp, } -/*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' -| to the single-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 int32_to_float32(int32_t a, float_status *status) -{ - flag zSign; - - if ( a == 0 ) return float32_zero; - if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); - zSign = ( a < 0 ); - return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status); -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' -| to the double-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 int32_to_float64(int32_t a, float_status *status) -{ - flag zSign; - uint32_t absA; - int8_t shiftCount; - uint64_t zSig; - - if ( a == 0 ) return float64_zero; - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = countLeadingZeros32( absA ) + 21; - zSig = absA; - return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount ); - -} /*---------------------------------------------------------------------------- | Returns the result of converting the 32-bit two's complement integer `a' @@ -2675,56 +2801,6 @@ float128 int32_to_float128(int32_t a, float_status *status) } -/*---------------------------------------------------------------------------- -| Returns the result of converting the 64-bit two's complement integer `a' -| to the single-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 int64_to_float32(int64_t a, float_status *status) -{ - flag zSign; - uint64_t absA; - int8_t shiftCount; - - if ( a == 0 ) return float32_zero; - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = countLeadingZeros64( absA ) - 40; - if ( 0 <= shiftCount ) { - return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount ); - } - else { - shiftCount += 7; - if ( shiftCount < 0 ) { - shift64RightJamming( absA, - shiftCount, &absA ); - } - else { - absA <<= shiftCount; - } - return roundAndPackFloat32(zSign, 0x9C - shiftCount, absA, status); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the 64-bit two's complement integer `a' -| to the double-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 int64_to_float64(int64_t a, float_status *status) -{ - flag zSign; - - if ( a == 0 ) return float64_zero; - if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) { - return packFloat64( 1, 0x43E, 0 ); - } - zSign = ( a < 0 ); - return normalizeRoundAndPackFloat64(zSign, 0x43C, zSign ? -a : a, status); -} - /*---------------------------------------------------------------------------- | Returns the result of converting the 64-bit two's complement integer `a' | to the extended double-precision floating-point format. The conversion @@ -2779,65 +2855,6 @@ float128 int64_to_float128(int64_t a, float_status *status) } -/*---------------------------------------------------------------------------- -| Returns the result of converting the 64-bit unsigned integer `a' -| to the single-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 uint64_to_float32(uint64_t a, float_status *status) -{ - int shiftcount; - - if (a == 0) { - return float32_zero; - } - - /* Determine (left) shift needed to put first set bit into bit posn 23 - * (since packFloat32() expects the binary point between bits 23 and 22); - * this is the fast case for smallish numbers. - */ - shiftcount = countLeadingZeros64(a) - 40; - if (shiftcount >= 0) { - return packFloat32(0, 0x95 - shiftcount, a << shiftcount); - } - /* Otherwise we need to do a round-and-pack. roundAndPackFloat32() - * expects the binary point between bits 30 and 29, hence the + 7. - */ - shiftcount += 7; - if (shiftcount < 0) { - shift64RightJamming(a, -shiftcount, &a); - } else { - a <<= shiftcount; - } - - return roundAndPackFloat32(0, 0x9c - shiftcount, a, status); -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the 64-bit unsigned integer `a' -| to the double-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 uint64_to_float64(uint64_t a, float_status *status) -{ - int exp = 0x43C; - int shiftcount; - - if (a == 0) { - return float64_zero; - } - - shiftcount = countLeadingZeros64(a) - 1; - if (shiftcount < 0) { - shift64RightJamming(a, -shiftcount, &a); - } else { - a <<= shiftcount; - } - return roundAndPackFloat64(0, exp - shiftcount, a, status); -} - /*---------------------------------------------------------------------------- | Returns the result of converting the 64-bit unsigned integer `a' | to the quadruple-precision floating-point format. The conversion is performed @@ -6715,19 +6732,6 @@ int float128_unordered_quiet(float128 a, float128 b, float_status *status) return 0; } -/* misc functions */ -float32 uint32_to_float32(uint32_t a, float_status *status) -{ - return int64_to_float32(a, status); -} - -float64 uint32_to_float64(uint32_t a, float_status *status) -{ - return int64_to_float64(a, status); -} - - - #define COMPARE(s, nan_exp) \ static inline int float ## s ## _compare_internal(float ## s a, float ## s b,\ int is_quiet, float_status *status) \ diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h index ec1e701c26..3e6fdd756a 100644 --- a/include/fpu/softfloat.h +++ b/include/fpu/softfloat.h @@ -190,9 +190,13 @@ enum { /*---------------------------------------------------------------------------- | Software IEC/IEEE integer-to-floating-point conversion routines. *----------------------------------------------------------------------------*/ +float32 int16_to_float32(int16_t, float_status *status); float32 int32_to_float32(int32_t, float_status *status); +float64 int16_to_float64(int16_t, float_status *status); float64 int32_to_float64(int32_t, float_status *status); +float32 uint16_to_float32(uint16_t, float_status *status); float32 uint32_to_float32(uint32_t, float_status *status); +float64 uint16_to_float64(uint16_t, float_status *status); float64 uint32_to_float64(uint32_t, float_status *status); floatx80 int32_to_floatx80(int32_t, float_status *status); float128 int32_to_float128(int32_t, float_status *status); @@ -204,27 +208,6 @@ float32 uint64_to_float32(uint64_t, float_status *status); float64 uint64_to_float64(uint64_t, float_status *status); float128 uint64_to_float128(uint64_t, float_status *status); -/* We provide the int16 versions for symmetry of API with float-to-int */ -static inline float32 int16_to_float32(int16_t v, float_status *status) -{ - return int32_to_float32(v, status); -} - -static inline float32 uint16_to_float32(uint16_t v, float_status *status) -{ - return uint32_to_float32(v, status); -} - -static inline float64 int16_to_float64(int16_t v, float_status *status) -{ - return int32_to_float64(v, status); -} - -static inline float64 uint16_to_float64(uint16_t v, float_status *status) -{ - return uint32_to_float64(v, status); -} - /*---------------------------------------------------------------------------- | Software half-precision conversion routines. *----------------------------------------------------------------------------*/ @@ -245,6 +228,11 @@ uint64_t float16_to_uint64(float16 a, float_status *status); int64_t float16_to_int64_round_to_zero(float16, float_status *status); uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *status); float16 int16_to_float16(int16_t a, float_status *status); +float16 int32_to_float16(int32_t a, float_status *status); +float16 int64_to_float16(int64_t a, float_status *status); +float16 uint16_to_float16(uint16_t a, float_status *status); +float16 uint32_to_float16(uint32_t a, float_status *status); +float16 uint64_to_float16(uint64_t a, float_status *status); /*---------------------------------------------------------------------------- | Software half-precision operations.

[v4,18/22] fpu/softfloat: re-factor int/uint to float

Commit Message

Patch