Message ID | 20180109122252.17670-11-alex.bennee@linaro.org |
---|---|
State | New |
Headers | show |
Series | re-factor softfloat and add fp16 functions | expand |
On 01/09/2018 04:22 AM, Alex Bennée wrote: > + float_class_qnan, > + float_class_snan, > + float_class_dnan, /* default nan */ here wouldn't go amiss. > + float_class_msnan, /* maybe silenced */ r~
On 9 January 2018 at 12:22, Alex Bennée <alex.bennee@linaro.org> wrote: > These structures pave the way for generic softfloat helper routines > that will operate on fully decomposed numbers. > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > fpu/softfloat.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 69 insertions(+), 1 deletion(-) > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 59afe81d06..fcba28d3f8 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -83,7 +83,7 @@ this code that are retained. > * target-dependent and needs the TARGET_* macros. > */ > #include "qemu/osdep.h" > - > +#include "qemu/bitops.h" > #include "fpu/softfloat.h" > > /* We only need stdlib for abort() */ > @@ -186,6 +186,74 @@ static inline flag extractFloat64Sign(float64 a) > return float64_val(a) >> 63; > } > > +/*---------------------------------------------------------------------------- > +| Classify a floating point number. > +*----------------------------------------------------------------------------*/ > + > +typedef enum { > + float_class_unclassified, > + float_class_zero, > + float_class_normal, > + float_class_inf, > + float_class_qnan, > + float_class_snan, > + float_class_dnan, > + float_class_msnan, /* maybe silenced */ > +} float_class; > + > +/*---------------------------------------------------------------------------- > +| Structure holding all of the decomposed parts of a float. > +| The exponent is unbiased and the fraction is normalized. > +*----------------------------------------------------------------------------*/ > + > +typedef struct { > + uint64_t frac : 64; > + int exp : 32; > + float_class cls : 8; > + int : 23; What is this unnamed 23 bit field for? > + bool sign : 1; Why are we using a bitfield struct here anyway? uint64_t is 64 bits, int is 32 bits, we don't care how big the float_class enum is represented as, and we're not trying to pack together lots of bools so it doesn't matter much if we have a whole byte for the sign. > +} decomposed_parts; > + > +#define DECOMPOSED_BINARY_POINT (64 - 2) > +#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) > +#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1) > + > +/* Structure holding all of the relevant parameters for a format. */ > +typedef struct { > + int exp_bias; > + int exp_max; > + int frac_shift; > + uint64_t frac_lsb; > + uint64_t frac_lsbm1; Why the '1' in the field name? Overall I think some brief comments about what the fields mean would be helpful. > + uint64_t round_mask; > + uint64_t roundeven_mask; > +} decomposed_params; > + > +#define FRAC_PARAMS(F) \ > + .frac_shift = F, \ > + .frac_lsb = 1ull << (F), \ > + .frac_lsbm1 = 1ull << ((F) - 1), \ > + .round_mask = (1ull << (F)) - 1, \ > + .roundeven_mask = (2ull << (F)) - 1 > + > +static const decomposed_params float16_params = { > + .exp_bias = 0x0f, > + .exp_max = 0x1f, > + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 10) > +}; > + > +static const decomposed_params float32_params = { > + .exp_bias = 0x7f, > + .exp_max = 0xff, > + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 23) > +}; > + > +static const decomposed_params float64_params = { > + .exp_bias = 0x3ff, > + .exp_max = 0x7ff, > + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 52) Maybe we should hide the DECOMPOSED_BINARY_POINT bit inside the macro? Then the 10/23/52 are just the number of fraction bits in the format. thanks -- PMM
Hi Alex, Richard, On 01/09/2018 09:22 AM, Alex Bennée wrote: > These structures pave the way for generic softfloat helper routines > that will operate on fully decomposed numbers. > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > Signed-off-by: Richard Henderson <richard.henderson@linaro.org> > --- > fpu/softfloat.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 69 insertions(+), 1 deletion(-) > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index 59afe81d06..fcba28d3f8 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -83,7 +83,7 @@ this code that are retained. > * target-dependent and needs the TARGET_* macros. > */ > #include "qemu/osdep.h" > - > +#include "qemu/bitops.h" > #include "fpu/softfloat.h" > > /* We only need stdlib for abort() */ > @@ -186,6 +186,74 @@ static inline flag extractFloat64Sign(float64 a) > return float64_val(a) >> 63; > } > > +/*---------------------------------------------------------------------------- > +| Classify a floating point number. > +*----------------------------------------------------------------------------*/ > + > +typedef enum { > + float_class_unclassified, > + float_class_zero, > + float_class_normal, > + float_class_inf, > + float_class_qnan, > + float_class_snan, > + float_class_dnan, > + float_class_msnan, /* maybe silenced */ > +} float_class; > + > +/*---------------------------------------------------------------------------- > +| Structure holding all of the decomposed parts of a float. > +| The exponent is unbiased and the fraction is normalized. > +*----------------------------------------------------------------------------*/ > + > +typedef struct { > + uint64_t frac : 64; I think this does not work on LLP64/IL32P64 model. Should we add a check in ./configure and refuse to build on IL32P64 model? This would be safer IMHO. > + int exp : 32; > + float_class cls : 8; > + int : 23; > + bool sign : 1; checking on "ISO/IEC 14882:1998" 9.6 Bit-fields: Alignment of bit-fields is implementation-defined. Bit-fields are packed into some addressable allocation unit. [Note: bit-fields straddle allocation units on some machines and not on others. Bit-fields are assigned right-to-left on some machines, left-to-right on others. ] I'd still write it: int :23, sign :1; > +} decomposed_parts; > + > +#define DECOMPOSED_BINARY_POINT (64 - 2) > +#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) > +#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1) > + > +/* Structure holding all of the relevant parameters for a format. */ > +typedef struct { > + int exp_bias; > + int exp_max; > + int frac_shift; > + uint64_t frac_lsb; > + uint64_t frac_lsbm1; > + uint64_t round_mask; > + uint64_t roundeven_mask; > +} decomposed_params; > + > +#define FRAC_PARAMS(F) \ > + .frac_shift = F, \ > + .frac_lsb = 1ull << (F), \ > + .frac_lsbm1 = 1ull << ((F) - 1), \ > + .round_mask = (1ull << (F)) - 1, \ > + .roundeven_mask = (2ull << (F)) - 1 > + > +static const decomposed_params float16_params = { > + .exp_bias = 0x0f, > + .exp_max = 0x1f, > + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 10) > +}; > + > +static const decomposed_params float32_params = { > + .exp_bias = 0x7f, > + .exp_max = 0xff, > + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 23) > +}; > + > +static const decomposed_params float64_params = { > + .exp_bias = 0x3ff, > + .exp_max = 0x7ff, > + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 52) > +}; > + > /*---------------------------------------------------------------------------- > | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 > | and 7, and returns the properly rounded 32-bit integer corresponding to the >
Philippe Mathieu-Daudé <f4bug@amsat.org> writes: > Hi Alex, Richard, > > On 01/09/2018 09:22 AM, Alex Bennée wrote: >> These structures pave the way for generic softfloat helper routines >> that will operate on fully decomposed numbers. >> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> fpu/softfloat.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- >> 1 file changed, 69 insertions(+), 1 deletion(-) >> >> diff --git a/fpu/softfloat.c b/fpu/softfloat.c >> index 59afe81d06..fcba28d3f8 100644 >> --- a/fpu/softfloat.c >> +++ b/fpu/softfloat.c >> @@ -83,7 +83,7 @@ this code that are retained. >> * target-dependent and needs the TARGET_* macros. >> */ >> #include "qemu/osdep.h" >> - >> +#include "qemu/bitops.h" >> #include "fpu/softfloat.h" >> >> /* We only need stdlib for abort() */ >> @@ -186,6 +186,74 @@ static inline flag extractFloat64Sign(float64 a) >> return float64_val(a) >> 63; >> } >> >> +/*---------------------------------------------------------------------------- >> +| Classify a floating point number. >> +*----------------------------------------------------------------------------*/ >> + >> +typedef enum { >> + float_class_unclassified, >> + float_class_zero, >> + float_class_normal, >> + float_class_inf, >> + float_class_qnan, >> + float_class_snan, >> + float_class_dnan, >> + float_class_msnan, /* maybe silenced */ >> +} float_class; >> + >> +/*---------------------------------------------------------------------------- >> +| Structure holding all of the decomposed parts of a float. >> +| The exponent is unbiased and the fraction is normalized. >> +*----------------------------------------------------------------------------*/ >> + >> +typedef struct { >> + uint64_t frac : 64; > > I think this does not work on LLP64/IL32P64 model. > > Should we add a check in ./configure and refuse to build on IL32P64 > model? This would be safer IMHO. > >> + int exp : 32; >> + float_class cls : 8; >> + int : 23; >> + bool sign : 1; > > checking on "ISO/IEC 14882:1998" 9.6 Bit-fields: > > Alignment of bit-fields is implementation-defined. Bit-fields are packed > into some addressable allocation unit. [Note: bit-fields straddle > allocation units on some machines and not on others. Bit-fields are > assigned right-to-left on some machines, left-to-right on others. ] > > I'd still write it: > > int :23, sign :1; > >> +} decomposed_parts; I think rather than stuff it into bit fields we can just leave it up to the compiler? >> + >> +#define DECOMPOSED_BINARY_POINT (64 - 2) >> +#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) >> +#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1) >> + >> +/* Structure holding all of the relevant parameters for a format. */ >> +typedef struct { >> + int exp_bias; >> + int exp_max; >> + int frac_shift; >> + uint64_t frac_lsb; >> + uint64_t frac_lsbm1; >> + uint64_t round_mask; >> + uint64_t roundeven_mask; >> +} decomposed_params; >> + >> +#define FRAC_PARAMS(F) \ >> + .frac_shift = F, \ >> + .frac_lsb = 1ull << (F), \ >> + .frac_lsbm1 = 1ull << ((F) - 1), \ >> + .round_mask = (1ull << (F)) - 1, \ >> + .roundeven_mask = (2ull << (F)) - 1 >> + >> +static const decomposed_params float16_params = { >> + .exp_bias = 0x0f, >> + .exp_max = 0x1f, >> + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 10) >> +}; >> + >> +static const decomposed_params float32_params = { >> + .exp_bias = 0x7f, >> + .exp_max = 0xff, >> + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 23) >> +}; >> + >> +static const decomposed_params float64_params = { >> + .exp_bias = 0x3ff, >> + .exp_max = 0x7ff, >> + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 52) >> +}; >> + >> /*---------------------------------------------------------------------------- >> | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 >> | and 7, and returns the properly rounded 32-bit integer corresponding to the >> -- Alex Bennée
Le 18 janv. 2018 10:09 AM, "Alex Bennée" <alex.bennee@linaro.org> a écrit : Philippe Mathieu-Daudé <f4bug@amsat.org> writes: > Hi Alex, Richard, > > On 01/09/2018 09:22 AM, Alex Bennée wrote: >> These structures pave the way for generic softfloat helper routines >> that will operate on fully decomposed numbers. >> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org> >> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> >> --- >> fpu/softfloat.c | 70 ++++++++++++++++++++++++++++++ ++++++++++++++++++++++++++- >> 1 file changed, 69 insertions(+), 1 deletion(-) >> >> diff --git a/fpu/softfloat.c b/fpu/softfloat.c >> index 59afe81d06..fcba28d3f8 100644 >> --- a/fpu/softfloat.c >> +++ b/fpu/softfloat.c >> @@ -83,7 +83,7 @@ this code that are retained. >> * target-dependent and needs the TARGET_* macros. >> */ >> #include "qemu/osdep.h" >> - >> +#include "qemu/bitops.h" >> #include "fpu/softfloat.h" >> >> /* We only need stdlib for abort() */ >> @@ -186,6 +186,74 @@ static inline flag extractFloat64Sign(float64 a) >> return float64_val(a) >> 63; >> } >> >> +/*--------------------------------------------------------- ------------------- >> +| Classify a floating point number. >> +*---------------------------------------------------------- ------------------*/ >> + >> +typedef enum { >> + float_class_unclassified, >> + float_class_zero, >> + float_class_normal, >> + float_class_inf, >> + float_class_qnan, >> + float_class_snan, >> + float_class_dnan, >> + float_class_msnan, /* maybe silenced */ >> +} float_class; >> + >> +/*--------------------------------------------------------- ------------------- >> +| Structure holding all of the decomposed parts of a float. >> +| The exponent is unbiased and the fraction is normalized. >> +*---------------------------------------------------------- ------------------*/ >> + >> +typedef struct { >> + uint64_t frac : 64; > > I think this does not work on LLP64/IL32P64 model. > > Should we add a check in ./configure and refuse to build on IL32P64 > model? This would be safer IMHO. > >> + int exp : 32; >> + float_class cls : 8; >> + int : 23; >> + bool sign : 1; > > checking on "ISO/IEC 14882:1998" 9.6 Bit-fields: > > Alignment of bit-fields is implementation-defined. Bit-fields are packed > into some addressable allocation unit. [Note: bit-fields straddle > allocation units on some machines and not on others. Bit-fields are > assigned right-to-left on some machines, left-to-right on others. ] > > I'd still write it: > > int :23, sign :1; > >> +} decomposed_parts; I think rather than stuff it into bit fields we can just leave it up to the compiler? Yep, my only worry here is the IL32P64 model, if we care. >> + >> +#define DECOMPOSED_BINARY_POINT (64 - 2) >> +#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) >> +#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1) >> + >> +/* Structure holding all of the relevant parameters for a format. */ >> +typedef struct { >> + int exp_bias; >> + int exp_max; >> + int frac_shift; >> + uint64_t frac_lsb; >> + uint64_t frac_lsbm1; >> + uint64_t round_mask; >> + uint64_t roundeven_mask; >> +} decomposed_params; >> + >> +#define FRAC_PARAMS(F) \ >> + .frac_shift = F, \ >> + .frac_lsb = 1ull << (F), \ >> + .frac_lsbm1 = 1ull << ((F) - 1), \ >> + .round_mask = (1ull << (F)) - 1, \ >> + .roundeven_mask = (2ull << (F)) - 1 >> + >> +static const decomposed_params float16_params = { >> + .exp_bias = 0x0f, >> + .exp_max = 0x1f, >> + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 10) >> +}; >> + >> +static const decomposed_params float32_params = { >> + .exp_bias = 0x7f, >> + .exp_max = 0xff, >> + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 23) >> +}; >> + >> +static const decomposed_params float64_params = { >> + .exp_bias = 0x3ff, >> + .exp_max = 0x7ff, >> + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 52) >> +}; >> + >> /*---------------------------------------------------------- ------------------ >> | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 >> | and 7, and returns the properly rounded 32-bit integer corresponding to the >> -- Alex Bennée
On 18 January 2018 at 14:26, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > > > Le 18 janv. 2018 10:09 AM, "Alex Bennée" <alex.bennee@linaro.org> a écrit : > > > Philippe Mathieu-Daudé <f4bug@amsat.org> writes: >>> +typedef struct { >>> + uint64_t frac : 64; >> >> I think this does not work on LLP64/IL32P64 model. >> >> Should we add a check in ./configure and refuse to build on IL32P64 >> model? This would be safer IMHO. >> >>> + int exp : 32; >>> + float_class cls : 8; >>> + int : 23; >>> + bool sign : 1; >> >> checking on "ISO/IEC 14882:1998" 9.6 Bit-fields: >> >> Alignment of bit-fields is implementation-defined. Bit-fields are packed >> into some addressable allocation unit. [Note: bit-fields straddle >> allocation units on some machines and not on others. Bit-fields are >> assigned right-to-left on some machines, left-to-right on others. ] >> >> I'd still write it: >> >> int :23, sign :1; >> >>> +} decomposed_parts; > > I think rather than stuff it into bit fields we can just leave it up to > the compiler? > > > Yep, my only worry here is the IL32P64 model, if we care. I don't think we care much about IL32P64, but the code should still work there, right? It doesn't actually make any assumptions about bitfield layout. I think I agree that we shouldn't use bitfields here if we don't need to, though. thanks -- PMM
On 01/18/2018 11:31 AM, Peter Maydell wrote: > On 18 January 2018 at 14:26, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: >> Le 18 janv. 2018 10:09 AM, "Alex Bennée" <alex.bennee@linaro.org> a écrit : >> Philippe Mathieu-Daudé <f4bug@amsat.org> writes: >>>> +typedef struct { >>>> + uint64_t frac : 64; >>> >>> I think this does not work on LLP64/IL32P64 model. >>> >>> Should we add a check in ./configure and refuse to build on IL32P64 >>> model? This would be safer IMHO. >>> >>>> + int exp : 32; >>>> + float_class cls : 8; >>>> + int : 23; >>>> + bool sign : 1; >>> >>> checking on "ISO/IEC 14882:1998" 9.6 Bit-fields: >>> >>> Alignment of bit-fields is implementation-defined. Bit-fields are packed >>> into some addressable allocation unit. [Note: bit-fields straddle >>> allocation units on some machines and not on others. Bit-fields are >>> assigned right-to-left on some machines, left-to-right on others. ] >>> >>> I'd still write it: >>> >>> int :23, sign :1; >>> >>>> +} decomposed_parts; >> >> I think rather than stuff it into bit fields we can just leave it up to >> the compiler? >> >> >> Yep, my only worry here is the IL32P64 model, if we care. > > I don't think we care much about IL32P64, but the code should > still work there, right? It doesn't actually make any assumptions > about bitfield layout. My comment was for a previous line: uint64_t frac : 64; I don't have enough compiler knowledge to be sure how this bitfield is interpreted by the compiler. I understood the standard as bitfields are for 'unsigned', and for IL32 we have sizeof(unsigned) = 32, so I wonder how a :64 bitfield ends (bits >= 32 silently truncated?). Richard do you have an idea? > > I think I agree that we shouldn't use bitfields here if we don't > need to, though. > > thanks > -- PMM >
On 18 January 2018 at 14:59, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: > My comment was for a previous line: > > uint64_t frac : 64; > > I don't have enough compiler knowledge to be sure how this bitfield is > interpreted by the compiler. I understood the standard as bitfields are > for 'unsigned', and for IL32 we have sizeof(unsigned) = 32, so I wonder > how a :64 bitfield ends (bits >= 32 silently truncated?). Defining a 64-bit bitfield is a bit pointless (why not just use uint64_t?) but there's nothing particularly different for IL32P64 here. The spec says the underlying type is _Bool, signed int, unsigned into, or an implementation defined type. For QEMU's hosts 'int' is always 32 bits, so if gcc and clang allow bitfields on a 64-bit type like uint64_t (as an impdef extension) then they should work on all hosts. (In any case it needs to either work or give a compiler error, silent truncation isn't an option.) thanks -- PMM
Peter Maydell <peter.maydell@linaro.org> writes: > On 18 January 2018 at 14:59, Philippe Mathieu-Daudé <f4bug@amsat.org> wrote: >> My comment was for a previous line: >> >> uint64_t frac : 64; >> >> I don't have enough compiler knowledge to be sure how this bitfield is >> interpreted by the compiler. I understood the standard as bitfields are >> for 'unsigned', and for IL32 we have sizeof(unsigned) = 32, so I wonder >> how a :64 bitfield ends (bits >= 32 silently truncated?). > > Defining a 64-bit bitfield is a bit pointless (why not just use > uint64_t?) but there's nothing particularly different for IL32P64 here. > The spec says the underlying type is _Bool, signed int, unsigned > into, or an implementation defined type. For QEMU's hosts 'int' > is always 32 bits, so if gcc and clang allow bitfields on a > 64-bit type like uint64_t (as an impdef extension) then they > should work on all hosts. (In any case it needs to either work > or give a compiler error, silent truncation isn't an option.) Using explicit size types and an attribute on FloatClass seemed to be enough: /* * Classify a floating point number. Everything above float_class_qnan * is a NaN so cls >= float_class_qnan is any NaN. */ typedef enum __attribute__ ((__packed__)) { float_class_unclassified, float_class_zero, float_class_normal, float_class_inf, float_class_qnan, /* all NaNs from here */ float_class_snan, float_class_dnan, float_class_msnan, /* maybe silenced */ } FloatClass; /* * Structure holding all of the decomposed parts of a float. The * exponent is unbiased and the fraction is normalized. All * calculations are done with a 64 bit fraction and then rounded as * appropriate for the final format. * * Thanks to the packed FloatClass a decent compiler should be able to * fit the whole structure into registers and avoid using the stack * for parameter passing. */ typedef struct { uint64_t frac; int32_t exp; FloatClass cls; bool sign; } FloatParts; -- Alex Bennée
diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 59afe81d06..fcba28d3f8 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -83,7 +83,7 @@ this code that are retained. * target-dependent and needs the TARGET_* macros. */ #include "qemu/osdep.h" - +#include "qemu/bitops.h" #include "fpu/softfloat.h" /* We only need stdlib for abort() */ @@ -186,6 +186,74 @@ static inline flag extractFloat64Sign(float64 a) return float64_val(a) >> 63; } +/*---------------------------------------------------------------------------- +| Classify a floating point number. +*----------------------------------------------------------------------------*/ + +typedef enum { + float_class_unclassified, + float_class_zero, + float_class_normal, + float_class_inf, + float_class_qnan, + float_class_snan, + float_class_dnan, + float_class_msnan, /* maybe silenced */ +} float_class; + +/*---------------------------------------------------------------------------- +| Structure holding all of the decomposed parts of a float. +| The exponent is unbiased and the fraction is normalized. +*----------------------------------------------------------------------------*/ + +typedef struct { + uint64_t frac : 64; + int exp : 32; + float_class cls : 8; + int : 23; + bool sign : 1; +} decomposed_parts; + +#define DECOMPOSED_BINARY_POINT (64 - 2) +#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT) +#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1) + +/* Structure holding all of the relevant parameters for a format. */ +typedef struct { + int exp_bias; + int exp_max; + int frac_shift; + uint64_t frac_lsb; + uint64_t frac_lsbm1; + uint64_t round_mask; + uint64_t roundeven_mask; +} decomposed_params; + +#define FRAC_PARAMS(F) \ + .frac_shift = F, \ + .frac_lsb = 1ull << (F), \ + .frac_lsbm1 = 1ull << ((F) - 1), \ + .round_mask = (1ull << (F)) - 1, \ + .roundeven_mask = (2ull << (F)) - 1 + +static const decomposed_params float16_params = { + .exp_bias = 0x0f, + .exp_max = 0x1f, + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 10) +}; + +static const decomposed_params float32_params = { + .exp_bias = 0x7f, + .exp_max = 0xff, + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 23) +}; + +static const decomposed_params float64_params = { + .exp_bias = 0x3ff, + .exp_max = 0x7ff, + FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 52) +}; + /*---------------------------------------------------------------------------- | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 | and 7, and returns the properly rounded 32-bit integer corresponding to the