diff mbox series

[v2,11/20] fpu/softfloat: re-factor add/sub

Message ID 20180109122252.17670-12-alex.bennee@linaro.org
State New
Headers show
Series re-factor softfloat and add fp16 functions | expand

Commit Message

Alex Bennée Jan. 9, 2018, 12:22 p.m. UTC
We can now add float16_add/sub and use the common decompose and
canonicalize functions to have a single implementation for
float16/32/64 add and sub functions.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 fpu/softfloat.c         | 904 +++++++++++++++++++++++++-----------------------
 include/fpu/softfloat.h |   4 +
 2 files changed, 481 insertions(+), 427 deletions(-)

-- 
2.15.1

Comments

Peter Maydell Jan. 12, 2018, 3:57 p.m. UTC | #1
On 9 January 2018 at 12:22, Alex Bennée <alex.bennee@linaro.org> wrote:
> We can now add float16_add/sub and use the common decompose and

> canonicalize functions to have a single implementation for

> float16/32/64 add and sub functions.

>

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  fpu/softfloat.c         | 904 +++++++++++++++++++++++++-----------------------

>  include/fpu/softfloat.h |   4 +

>  2 files changed, 481 insertions(+), 427 deletions(-)

>

> diff --git a/fpu/softfloat.c b/fpu/softfloat.c

> index fcba28d3f8..f89e47e3ef 100644

> --- a/fpu/softfloat.c

> +++ b/fpu/softfloat.c

> @@ -195,7 +195,7 @@ typedef enum {

>      float_class_zero,

>      float_class_normal,

>      float_class_inf,

> -    float_class_qnan,

> +    float_class_qnan,  /* all NaNs from here */


This comment change should be squashed into the previous patch.

>      float_class_snan,

>      float_class_dnan,

>      float_class_msnan, /* maybe silenced */

> @@ -254,6 +254,482 @@ static const decomposed_params float64_params = {

>      FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 52)

>  };

>

> +/* Unpack a float16 to parts, but do not canonicalize.  */

> +static inline decomposed_parts float16_unpack_raw(float16 f)

> +{

> +    return (decomposed_parts){

> +        .cls = float_class_unclassified,

> +        .sign = extract32(f, 15, 1),

> +        .exp = extract32(f, 10, 5),

> +        .frac = extract32(f, 0, 10)


In the previous patch we defined a bunch of structs that
give information about each float format, so it seems a bit
odd to be hardcoding bit numbers here.

> +    };

> +}

> +

> +/* Unpack a float32 to parts, but do not canonicalize.  */

> +static inline decomposed_parts float32_unpack_raw(float32 f)

> +{

> +    return (decomposed_parts){

> +        .cls = float_class_unclassified,

> +        .sign = extract32(f, 31, 1),

> +        .exp = extract32(f, 23, 8),

> +        .frac = extract32(f, 0, 23)

> +    };

> +}

> +

> +/* Unpack a float64 to parts, but do not canonicalize.  */

> +static inline decomposed_parts float64_unpack_raw(float64 f)

> +{

> +    return (decomposed_parts){

> +        .cls = float_class_unclassified,

> +        .sign = extract64(f, 63, 1),

> +        .exp = extract64(f, 52, 11),

> +        .frac = extract64(f, 0, 52),

> +    };

> +}

> +

> +/* Pack a float32 from parts, but do not canonicalize.  */

> +static inline float16 float16_pack_raw(decomposed_parts p)

> +{

> +    uint32_t ret = p.frac;

> +    ret = deposit32(ret, 10, 5, p.exp);

> +    ret = deposit32(ret, 15, 1, p.sign);

> +    return make_float16(ret);

> +}

> +

> +/* Pack a float32 from parts, but do not canonicalize.  */

> +static inline float32 float32_pack_raw(decomposed_parts p)

> +{

> +    uint32_t ret = p.frac;

> +    ret = deposit32(ret, 23, 8, p.exp);

> +    ret = deposit32(ret, 31, 1, p.sign);

> +    return make_float32(ret);

> +}

> +

> +/* Pack a float64 from parts, but do not canonicalize.  */

> +static inline float64 float64_pack_raw(decomposed_parts p)

> +{

> +    uint64_t ret = p.frac;

> +    ret = deposit64(ret, 52, 11, p.exp);

> +    ret = deposit64(ret, 63, 1, p.sign);

> +    return make_float64(ret);

> +}

> +

> +/* Canonicalize EXP and FRAC, setting CLS.  */

> +static decomposed_parts decomposed_canonicalize(decomposed_parts part,

> +                                        const decomposed_params *parm,


If you pick more compact names for your decomposed_params and
decomposed_parts structs, you won't have such awkwardness trying
to format function prototypes. (checkpatch complains that you have
an overlong line somewhere in this patch for this reason.)

In particular "decomposed_params" I think should change -- it's
confusingly similar to decomposed_parts, and it isn't really
a decomposed anything. It's just a collection of useful information
describing the float format. Try 'fmtinfo', maybe?

I see we're passing and returning decomposed_parts structs everywhere
rather than pointers to them. How well does that compile? (I guess
everything ends up inlining...)

> +                                        float_status *status)

> +{

> +    if (part.exp == parm->exp_max) {

> +        if (part.frac == 0) {

> +            part.cls = float_class_inf;

> +        } else {

> +#ifdef NO_SIGNALING_NANS


The old code didn't seem to need to ifdef this; why's the new
code different? (at some point we'll want to make this a runtime
setting so we can support one binary handling CPUs with it both
set and unset, but that is a far future thing we can ignore for now)

> +            part.cls = float_class_qnan;

> +#else

> +            int64_t msb = part.frac << (parm->frac_shift + 2);

> +            if ((msb < 0) == status->snan_bit_is_one) {

> +                part.cls = float_class_snan;

> +            } else {

> +                part.cls = float_class_qnan;

> +            }

> +#endif

> +        }

> +    } else if (part.exp == 0) {

> +        if (likely(part.frac == 0)) {

> +            part.cls = float_class_zero;

> +        } else if (status->flush_inputs_to_zero) {

> +            float_raise(float_flag_input_denormal, status);

> +            part.cls = float_class_zero;

> +            part.frac = 0;

> +        } else {

> +            int shift = clz64(part.frac) - 1;

> +            part.cls = float_class_normal;


This is really confusing. This is a *denormal*, but we're setting
the classification to "normal" ? (It's particularly confusing in
the code that uses the decomposed numbers, because it looks like
"if (a.cls == float_class_normal...)" is handling the normal-number
case and denormals are going to be in a later if branch, but actually
it's dealing with both.)

> +            part.exp = parm->frac_shift - parm->exp_bias - shift + 1;

> +            part.frac <<= shift;

> +        }

> +    } else {

> +        part.cls = float_class_normal;

> +        part.exp -= parm->exp_bias;

> +        part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);

> +    }

> +    return part;

> +}

> +

> +/* Round and uncanonicalize a floating-point number by parts.

> +   There are FRAC_SHIFT bits that may require rounding at the bottom

> +   of the fraction; these bits will be removed.  The exponent will be

> +   biased by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].  */


This is an inconsistent multiline comment style to what you use
elsewhere in this patch...

> +static decomposed_parts decomposed_round_canonical(decomposed_parts p,

> +                                                   float_status *s,

> +                                                   const decomposed_params *parm)

> +{

> +    const uint64_t frac_lsbm1 = parm->frac_lsbm1;

> +    const uint64_t round_mask = parm->round_mask;

> +    const uint64_t roundeven_mask = parm->roundeven_mask;

> +    const int exp_max = parm->exp_max;

> +    const int frac_shift = parm->frac_shift;

> +    uint64_t frac, inc;

> +    int exp, flags = 0;

> +    bool overflow_norm;

> +

> +    frac = p.frac;

> +    exp = p.exp;

> +

> +    switch (p.cls) {

> +    case float_class_normal:

> +        switch (s->float_rounding_mode) {

> +        case float_round_nearest_even:

> +            overflow_norm = false;

> +            inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);

> +            break;

> +        case float_round_ties_away:

> +            overflow_norm = false;

> +            inc = frac_lsbm1;

> +            break;

> +        case float_round_to_zero:

> +            overflow_norm = true;

> +            inc = 0;

> +            break;

> +        case float_round_up:

> +            inc = p.sign ? 0 : round_mask;

> +            overflow_norm = p.sign;

> +            break;

> +        case float_round_down:

> +            inc = p.sign ? round_mask : 0;

> +            overflow_norm = !p.sign;

> +            break;

> +        default:

> +            g_assert_not_reached();

> +        }

> +

> +        exp += parm->exp_bias;

> +        if (likely(exp > 0)) {

> +            if (frac & round_mask) {

> +                flags |= float_flag_inexact;

> +                frac += inc;

> +                if (frac & DECOMPOSED_OVERFLOW_BIT) {

> +                    frac >>= 1;

> +                    exp++;

> +                }

> +            }

> +            frac >>= frac_shift;

> +

> +            if (unlikely(exp >= exp_max)) {

> +                flags |= float_flag_overflow | float_flag_inexact;

> +                if (overflow_norm) {

> +                    exp = exp_max - 1;

> +                    frac = -1;

> +                } else {

> +                    p.cls = float_class_inf;

> +                    goto do_inf;

> +                }

> +            }

> +        } else if (s->flush_to_zero) {

> +            flags |= float_flag_output_denormal;

> +            p.cls = float_class_zero;

> +            goto do_zero;

> +        } else {

> +            bool is_tiny = (s->float_detect_tininess

> +                            == float_tininess_before_rounding)

> +                        || (exp < 0)

> +                        || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);

> +

> +            shift64RightJamming(frac, 1 - exp, &frac);

> +            if (frac & round_mask) {

> +                /* Need to recompute round-to-even.  */

> +                if (s->float_rounding_mode == float_round_nearest_even) {

> +                    inc = ((frac & roundeven_mask) != frac_lsbm1

> +                           ? frac_lsbm1 : 0);

> +                }

> +                flags |= float_flag_inexact;

> +                frac += inc;

> +            }

> +

> +            exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);

> +            frac >>= frac_shift;

> +

> +            if (is_tiny && (flags & float_flag_inexact)) {

> +                flags |= float_flag_underflow;

> +            }

> +            if (exp == 0 && frac == 0) {

> +                p.cls = float_class_zero;

> +            }

> +        }

> +        break;

> +

> +    case float_class_zero:

> +    do_zero:

> +        exp = 0;

> +        frac = 0;

> +        break;

> +

> +    case float_class_inf:

> +    do_inf:

> +        exp = exp_max;

> +        frac = 0;

> +        break;

> +

> +    case float_class_qnan:

> +    case float_class_snan:

> +        exp = exp_max;

> +        break;

> +

> +    default:

> +        g_assert_not_reached();

> +    }

> +

> +    float_raise(flags, s);

> +    p.exp = exp;

> +    p.frac = frac;

> +    return p;

> +}

> +

> +static decomposed_parts float16_unpack_canonical(float16 f, float_status *s)

> +{

> +    return decomposed_canonicalize(float16_unpack_raw(f), &float16_params, s);

> +}

> +

> +static float16 float16_round_pack_canonical(decomposed_parts p, float_status *s)

> +{

> +    switch (p.cls) {

> +    case float_class_dnan:

> +        return float16_default_nan(s);

> +    case float_class_msnan:

> +        return float16_maybe_silence_nan(float16_pack_raw(p), s);


I think you will find that doing the silencing of the NaNs like this
isn't quite the right approach. Specifically, for Arm targets we
currently have a bug in float-to-float conversion from a wider
format to a narrower one when the input is a signaling NaN that we
want to silence, and its non-zero mantissa bits are all at the
less-significant end of the mantissa such that they don't fit into
the narrower format. If you pack the float into a float16 first and
then call maybe_silence_nan() on it you've lost the info about those
low bits which the silence function needs to know to return the
right answer. What you want to do instead is pass the silence_nan
function the decomposed value.

(The effect of this bug is that we return a default NaN, with the
sign bit clear, but the Arm FPConvertNaN pseudocode says that we
should effectively get the default NaN but with the same sign bit
as the input SNaN.)

Given that this is a bug currently in the version we have, we don't
necessarily need to fix it now, but I thought I'd mention it since
the redesign has almost but not quite managed to deliver the right
information to the silencing code to allow us to fix it soon :-)

> +    default:

> +        p = decomposed_round_canonical(p, s, &float16_params);

> +        return float16_pack_raw(p);

> +    }

> +}

> +

> +static decomposed_parts float32_unpack_canonical(float32 f, float_status *s)

> +{

> +    return decomposed_canonicalize(float32_unpack_raw(f), &float32_params, s);

> +}

> +

> +static float32 float32_round_pack_canonical(decomposed_parts p, float_status *s)

> +{

> +    switch (p.cls) {

> +    case float_class_dnan:

> +        return float32_default_nan(s);

> +    case float_class_msnan:

> +        return float32_maybe_silence_nan(float32_pack_raw(p), s);

> +    default:

> +        p = decomposed_round_canonical(p, s, &float32_params);

> +        return float32_pack_raw(p);

> +    }

> +}

> +

> +static decomposed_parts float64_unpack_canonical(float64 f, float_status *s)

> +{

> +    return decomposed_canonicalize(float64_unpack_raw(f), &float64_params, s);

> +}

> +

> +static float64 float64_round_pack_canonical(decomposed_parts p, float_status *s)

> +{

> +    switch (p.cls) {

> +    case float_class_dnan:

> +        return float64_default_nan(s);

> +    case float_class_msnan:

> +        return float64_maybe_silence_nan(float64_pack_raw(p), s);

> +    default:

> +        p = decomposed_round_canonical(p, s, &float64_params);

> +        return float64_pack_raw(p);

> +    }

> +}

> +

> +static decomposed_parts pick_nan_parts(decomposed_parts a, decomposed_parts b,

> +                                       float_status *s)

> +{

> +    if (a.cls == float_class_snan || b.cls == float_class_snan) {

> +        s->float_exception_flags |= float_flag_invalid;

> +    }

> +

> +    if (s->default_nan_mode) {

> +        a.cls = float_class_dnan;

> +    } else {

> +        if (pickNaN(a.cls == float_class_qnan,

> +                    a.cls == float_class_snan,

> +                    b.cls == float_class_qnan,

> +                    b.cls == float_class_snan,

> +                    a.frac > b.frac

> +                    || (a.frac == b.frac && a.sign < b.sign))) {

> +            a = b;

> +        }

> +        a.cls = float_class_msnan;

> +    }

> +    return a;

> +}

> +

> +

> +/*

> + * Returns the result of adding the absolute values of the

> + * floating-point values `a' and `b'. If `subtract' is set, the sum is

> + * negated before being returned. `subtract' is ignored if the result

> + * is a NaN. The addition is performed according to the IEC/IEEE

> + * Standard for Binary Floating-Point Arithmetic.

> + */


This comment doesn't seem to match what the code is doing,
because it says it adds the absolute values of 'a' and 'b',
but the code looks at a_sign and b_sign to decide whether it's
doing an addition or subtraction rather than ignoring the signs
(as you would for absolute arithmetic).

Put another way, this comment has been copied from the old addFloat64Sigs()
and not updated to account for the way the new function includes handling
of subFloat64Sigs().

> +

> +static decomposed_parts add_decomposed(decomposed_parts a, decomposed_parts b,

> +                                       bool subtract, float_status *s)

> +{

> +    bool a_sign = a.sign;

> +    bool b_sign = b.sign ^ subtract;

> +

> +    if (a_sign != b_sign) {

> +        /* Subtraction */

> +

> +        if (a.cls == float_class_normal && b.cls == float_class_normal) {

> +            int a_exp = a.exp;

> +            int b_exp = b.exp;

> +            uint64_t a_frac = a.frac;

> +            uint64_t b_frac = b.frac;


Do we really have to use locals here rather than just using a.frac,
b.frac etc in place ? If we trust the compiler enough to throw
structs in and out of functions and let everything inline, it
ought to be able to handle a uint64_t in a struct local variable.

> +

> +            if (a_exp > b_exp || (a_exp == b_exp && a_frac >= b_frac)) {

> +                shift64RightJamming(b_frac, a_exp - b_exp, &b_frac);

> +                a_frac = a_frac - b_frac;

> +            } else {

> +                shift64RightJamming(a_frac, b_exp - a_exp, &a_frac);

> +                a_frac = b_frac - a_frac;

> +                a_exp = b_exp;

> +                a_sign ^= 1;

> +            }

> +

> +            if (a_frac == 0) {

> +                a.cls = float_class_zero;

> +                a.sign = s->float_rounding_mode == float_round_down;

> +            } else {

> +                int shift = clz64(a_frac) - 1;

> +                a.frac = a_frac << shift;

> +                a.exp = a_exp - shift;

> +                a.sign = a_sign;

> +            }

> +            return a;

> +        }

> +        if (a.cls >= float_class_qnan

> +            ||

> +            b.cls >= float_class_qnan)

> +        {

> +            return pick_nan_parts(a, b, s);

> +        }

> +        if (a.cls == float_class_inf) {

> +            if (b.cls == float_class_inf) {

> +                float_raise(float_flag_invalid, s);

> +                a.cls = float_class_dnan;

> +            }

> +            return a;

> +        }

> +        if (a.cls == float_class_zero && b.cls == float_class_zero) {

> +            a.sign = s->float_rounding_mode == float_round_down;

> +            return a;

> +        }

> +        if (a.cls == float_class_zero || b.cls == float_class_inf) {

> +            b.sign = a_sign ^ 1;

> +            return b;

> +        }

> +        if (b.cls == float_class_zero) {

> +            return a;

> +        }

> +    } else {

> +        /* Addition */

> +        if (a.cls == float_class_normal && b.cls == float_class_normal) {

> +            int a_exp = a.exp;

> +            int b_exp = b.exp;

> +            uint64_t a_frac = a.frac;

> +            uint64_t b_frac = b.frac;

> +

> +            if (a_exp > b_exp) {

> +                shift64RightJamming(b_frac, a_exp - b_exp, &b_frac);

> +            } else if (a_exp < b_exp) {

> +                shift64RightJamming(a_frac, b_exp - a_exp, &a_frac);

> +                a_exp = b_exp;

> +            }

> +            a_frac += b_frac;

> +            if (a_frac & DECOMPOSED_OVERFLOW_BIT) {

> +                a_frac >>= 1;

> +                a_exp += 1;

> +            }

> +

> +            a.exp = a_exp;

> +            a.frac = a_frac;

> +            return a;

> +        }

> +        if (a.cls >= float_class_qnan

> +            ||

> +            b.cls >= float_class_qnan) {


We should helper functions for "is some kind of NaN" rather than
baking the assumption about order of the enum values directly
into every function. (Also "float_is_any_nan(a)" is easier to read.)

> +            return pick_nan_parts(a, b, s);

> +        }

> +        if (a.cls == float_class_inf || b.cls == float_class_zero) {

> +            return a;

> +        }

> +        if (b.cls == float_class_inf || a.cls == float_class_zero) {

> +            b.sign = b_sign;

> +            return b;

> +        }

> +    }

> +    g_assert_not_reached();

> +}

> +

> +/*

> + * Returns the result of adding or subtracting the floating-point

> + * values `a' and `b'. The operation is performed according to the

> + * IEC/IEEE Standard for Binary Floating-Point Arithmetic.

> + */

> +

> +float16 float16_add(float16 a, float16 b, float_status *status)

> +{

> +    decomposed_parts pa = float16_unpack_canonical(a, status);

> +    decomposed_parts pb = float16_unpack_canonical(b, status);

> +    decomposed_parts pr = add_decomposed(pa, pb, false, status);

> +

> +    return float16_round_pack_canonical(pr, status);

> +}

> +

> +float32 float32_add(float32 a, float32 b, float_status *status)

> +{

> +    decomposed_parts pa = float32_unpack_canonical(a, status);

> +    decomposed_parts pb = float32_unpack_canonical(b, status);

> +    decomposed_parts pr = add_decomposed(pa, pb, false, status);

> +

> +    return float32_round_pack_canonical(pr, status);

> +}

> +

> +float64 float64_add(float64 a, float64 b, float_status *status)

> +{

> +    decomposed_parts pa = float64_unpack_canonical(a, status);

> +    decomposed_parts pb = float64_unpack_canonical(b, status);

> +    decomposed_parts pr = add_decomposed(pa, pb, false, status);

> +

> +    return float64_round_pack_canonical(pr, status);

> +}

> +

> +float16 float16_sub(float16 a, float16 b, float_status *status)

> +{

> +    decomposed_parts pa = float16_unpack_canonical(a, status);

> +    decomposed_parts pb = float16_unpack_canonical(b, status);

> +    decomposed_parts pr = add_decomposed(pa, pb, true, status);

> +

> +    return float16_round_pack_canonical(pr, status);

> +}

> +

> +float32 float32_sub(float32 a, float32 b, float_status *status)

> +{

> +    decomposed_parts pa = float32_unpack_canonical(a, status);

> +    decomposed_parts pb = float32_unpack_canonical(b, status);

> +    decomposed_parts pr = add_decomposed(pa, pb, true, status);

> +

> +    return float32_round_pack_canonical(pr, status);

> +}

> +

> +float64 float64_sub(float64 a, float64 b, float_status *status)

> +{

> +    decomposed_parts pa = float64_unpack_canonical(a, status);

> +    decomposed_parts pb = float64_unpack_canonical(b, status);

> +    decomposed_parts pr = add_decomposed(pa, pb, true, status);

> +

> +    return float64_round_pack_canonical(pr, status);

> +}


This part is a pretty good advert for the benefits of the refactoring...

I'm not particularly worried about the performance of softfloat,
but out of curiosity have you benchmarked the old vs new?

thanks
-- PMM
Richard Henderson Jan. 12, 2018, 6:30 p.m. UTC | #2
On 01/12/2018 07:57 AM, Peter Maydell wrote:
> I see we're passing and returning decomposed_parts structs everywhere

> rather than pointers to them. How well does that compile? (I guess

> everything ends up inlining...)


For the x86_64 abi at least, the structure (as defined with bitfields) is 128
bits and is passed and returned in registers.

I believe the same to be true for the aarch64 abi.  I'd have to do some
research to determine what happens for the other 64-bit hosts.


r~
Alex Bennée Jan. 18, 2018, 4:43 p.m. UTC | #3
Peter Maydell <peter.maydell@linaro.org> writes:

> On 9 January 2018 at 12:22, Alex Bennée <alex.bennee@linaro.org> wrote:

>> We can now add float16_add/sub and use the common decompose and

>> canonicalize functions to have a single implementation for

>> float16/32/64 add and sub functions.

>>

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

>> ---

>>  fpu/softfloat.c         | 904 +++++++++++++++++++++++++-----------------------

>>  include/fpu/softfloat.h |   4 +

>>  2 files changed, 481 insertions(+), 427 deletions(-)

>>

>> diff --git a/fpu/softfloat.c b/fpu/softfloat.c

>> index fcba28d3f8..f89e47e3ef 100644

>> --- a/fpu/softfloat.c

>> +++ b/fpu/softfloat.c

>> @@ -195,7 +195,7 @@ typedef enum {

>>      float_class_zero,

>>      float_class_normal,

>>      float_class_inf,

>> -    float_class_qnan,

>> +    float_class_qnan,  /* all NaNs from here */

>

> This comment change should be squashed into the previous patch.

>

>>      float_class_snan,

>>      float_class_dnan,

>>      float_class_msnan, /* maybe silenced */

>> @@ -254,6 +254,482 @@ static const decomposed_params float64_params = {

>>      FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 52)

>>  };

>>

>> +/* Unpack a float16 to parts, but do not canonicalize.  */

>> +static inline decomposed_parts float16_unpack_raw(float16 f)

>> +{

>> +    return (decomposed_parts){

>> +        .cls = float_class_unclassified,

>> +        .sign = extract32(f, 15, 1),

>> +        .exp = extract32(f, 10, 5),

>> +        .frac = extract32(f, 0, 10)

>

> In the previous patch we defined a bunch of structs that

> give information about each float format, so it seems a bit

> odd to be hardcoding bit numbers here.


So something like this:

  /* Structure holding all of the relevant parameters for a format.
   *   exp_bias: the offset applied to the exponent field
   *   exp_max: the maximum normalised exponent
   * The following are computed based the size of fraction
   *   frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
   *   frac_lsb: least significant bit of fraction
   *   fram_lsbm1: the bit bellow the least significant bit (for rounding)
   *   round_mask/roundeven_mask: masks used for rounding
   */
  typedef struct {
      int exp_bias;
      int exp_max;
      int exp_size;
      int frac_size;
      int frac_shift;
      uint64_t frac_lsb;
      uint64_t frac_lsbm1;
      uint64_t round_mask;
      uint64_t roundeven_mask;
  } FloatFmt;

  /* Expand fields based on the size of exponent and fraction */
  #define FRAC_PARAMS(E, F)                                            \
      .exp_size       = E,                                             \
      .frac_size      = F,                                             \
      .frac_shift     = DECOMPOSED_BINARY_POINT - F,                   \
      .frac_lsb       = 1ull << (DECOMPOSED_BINARY_POINT - F),         \
      .frac_lsbm1     = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1),   \
      .round_mask     = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1,   \
      .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1

  static const FloatFmt float16_params = {
      .exp_bias       = 0x0f,
      .exp_max        = 0x1f,
      FRAC_PARAMS(5, 10)
  };

  static const FloatFmt float32_params = {
      .exp_bias       = 0x7f,
      .exp_max        = 0xff,
      FRAC_PARAMS(8, 23)
  };

  static const FloatFmt float64_params = {
      .exp_bias       = 0x3ff,
      .exp_max        = 0x7ff,
      FRAC_PARAMS(11, 52)
  };

  /* Unpack a float to parts, but do not canonicalize.  */
  static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
  {
      return (FloatParts){
          .cls = float_class_unclassified,
          .sign = extract64(raw, fmt.frac_size + fmt.exp_size, 1),
          .exp = extract64(raw, fmt.frac_size, fmt.exp_size),
          .frac = extract64(raw, 0, fmt.frac_size),
      };
  }

  static inline FloatParts float16_unpack_raw(float16 f)
  {
      return unpack_raw(float16_params, f);
  }

  static inline FloatParts float32_unpack_raw(float32 f)
  {
      return unpack_raw(float32_params, f);
  }

  static inline FloatParts float64_unpack_raw(float64 f)
  {
      return unpack_raw(float64_params, f);
  }

  /* Pack a float from parts, but do not canonicalize.  */
  static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
  {
      uint64_t ret = p.frac;
      ret = deposit64(ret, fmt.frac_size, fmt.exp_size, p.exp);
      ret = deposit32(ret, fmt.frac_size + fmt.exp_size, 1, p.sign);
      return make_float16(ret);
  }

  static inline float16 float16_pack_raw(FloatParts p)
  {
      return make_float16(pack_raw(float16_params, p));
  }

  static inline float32 float32_pack_raw(FloatParts p)
  {
      return make_float32(pack_raw(float32_params, p));
  }

  static inline float64 float64_pack_raw(FloatParts p)
  {
      return make_float64(pack_raw(float64_params, p));
  }


>

>> +    };

>> +}

>> +

>> +/* Unpack a float32 to parts, but do not canonicalize.  */

>> +static inline decomposed_parts float32_unpack_raw(float32 f)

>> +{

>> +    return (decomposed_parts){

>> +        .cls = float_class_unclassified,

>> +        .sign = extract32(f, 31, 1),

>> +        .exp = extract32(f, 23, 8),

>> +        .frac = extract32(f, 0, 23)

>> +    };

>> +}

>> +

>> +/* Unpack a float64 to parts, but do not canonicalize.  */

>> +static inline decomposed_parts float64_unpack_raw(float64 f)

>> +{

>> +    return (decomposed_parts){

>> +        .cls = float_class_unclassified,

>> +        .sign = extract64(f, 63, 1),

>> +        .exp = extract64(f, 52, 11),

>> +        .frac = extract64(f, 0, 52),

>> +    };

>> +}

>> +

>> +/* Pack a float32 from parts, but do not canonicalize.  */

>> +static inline float16 float16_pack_raw(decomposed_parts p)

>> +{

>> +    uint32_t ret = p.frac;

>> +    ret = deposit32(ret, 10, 5, p.exp);

>> +    ret = deposit32(ret, 15, 1, p.sign);

>> +    return make_float16(ret);

>> +}

>> +

>> +/* Pack a float32 from parts, but do not canonicalize.  */

>> +static inline float32 float32_pack_raw(decomposed_parts p)

>> +{

>> +    uint32_t ret = p.frac;

>> +    ret = deposit32(ret, 23, 8, p.exp);

>> +    ret = deposit32(ret, 31, 1, p.sign);

>> +    return make_float32(ret);

>> +}

>> +

>> +/* Pack a float64 from parts, but do not canonicalize.  */

>> +static inline float64 float64_pack_raw(decomposed_parts p)

>> +{

>> +    uint64_t ret = p.frac;

>> +    ret = deposit64(ret, 52, 11, p.exp);

>> +    ret = deposit64(ret, 63, 1, p.sign);

>> +    return make_float64(ret);

>> +}

>> +

>> +/* Canonicalize EXP and FRAC, setting CLS.  */

>> +static decomposed_parts decomposed_canonicalize(decomposed_parts part,

>> +                                        const decomposed_params *parm,

>

> If you pick more compact names for your decomposed_params and

> decomposed_parts structs, you won't have such awkwardness trying

> to format function prototypes. (checkpatch complains that you have

> an overlong line somewhere in this patch for this reason.)

>

> In particular "decomposed_params" I think should change -- it's

> confusingly similar to decomposed_parts, and it isn't really

> a decomposed anything. It's just a collection of useful information

> describing the float format. Try 'fmtinfo', maybe?


I've gone for FloatParts and FloatParams

>

> I see we're passing and returning decomposed_parts structs everywhere

> rather than pointers to them. How well does that compile? (I guess

> everything ends up inlining...)


Yes - if you use the bitfield struct. Without it you end up with quite a
messy preamble.


--
Alex Bennée
Richard Henderson Jan. 18, 2018, 4:47 p.m. UTC | #4
On 01/18/2018 08:43 AM, Alex Bennée wrote:
>   /* Expand fields based on the size of exponent and fraction */

>   #define FRAC_PARAMS(E, F)                                            \

>       .exp_size       = E,                                             \

>       .frac_size      = F,                                             \

>       .frac_shift     = DECOMPOSED_BINARY_POINT - F,                   \

>       .frac_lsb       = 1ull << (DECOMPOSED_BINARY_POINT - F),         \

>       .frac_lsbm1     = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1),   \

>       .round_mask     = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1,   \

>       .roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1

> 

>   static const FloatFmt float16_params = {

>       .exp_bias       = 0x0f,

>       .exp_max        = 0x1f,

>       FRAC_PARAMS(5, 10)

>   };


You can compute exp_bias and exp_max from E as well.


r~
Alex Bennée Jan. 23, 2018, 8:05 p.m. UTC | #5
Peter Maydell <peter.maydell@linaro.org> writes:

<snip>
>

>> +                                        float_status *status)

>> +{

>> +    if (part.exp == parm->exp_max) {

>> +        if (part.frac == 0) {

>> +            part.cls = float_class_inf;

>> +        } else {

>> +#ifdef NO_SIGNALING_NANS

>

> The old code didn't seem to need to ifdef this; why's the new

> code different? (at some point we'll want to make this a runtime

> setting so we can support one binary handling CPUs with it both

> set and unset, but that is a far future thing we can ignore for now)


It does, but it's hidden behind propagateFloatXXNaN which in turn calls
floatXX_is_quiet/signalling_nan which are altered by the #ifdefs.

>

>> +            part.cls = float_class_qnan;

>> +#else

>> +            int64_t msb = part.frac << (parm->frac_shift + 2);

>> +            if ((msb < 0) == status->snan_bit_is_one) {

>> +                part.cls = float_class_snan;

>> +            } else {

>> +                part.cls = float_class_qnan;

>> +            }

>> +#endif

>> +        }

>> +    } else if (part.exp == 0) {

>> +        if (likely(part.frac == 0)) {

>> +            part.cls = float_class_zero;

>> +        } else if (status->flush_inputs_to_zero) {

>> +            float_raise(float_flag_input_denormal, status);

>> +            part.cls = float_class_zero;

>> +            part.frac = 0;

>> +        } else {

>> +            int shift = clz64(part.frac) - 1;

>> +            part.cls = float_class_normal;

>

> This is really confusing. This is a *denormal*, but we're setting

> the classification to "normal" ? (It's particularly confusing in

> the code that uses the decomposed numbers, because it looks like

> "if (a.cls == float_class_normal...)" is handling the normal-number

> case and denormals are going to be in a later if branch, but actually

> it's dealing with both.)


The code deals with canonicalized numbers - so unless we explicitly
flush denormals to zero they can be treated like any other for the rest
of the code.

What would you prefer? A comment in FloatClass?

<snip>
>> +

>> +static float16 float16_round_pack_canonical(decomposed_parts p, float_status *s)

>> +{

>> +    switch (p.cls) {

>> +    case float_class_dnan:

>> +        return float16_default_nan(s);

>> +    case float_class_msnan:

>> +        return float16_maybe_silence_nan(float16_pack_raw(p), s);

>

> I think you will find that doing the silencing of the NaNs like this

> isn't quite the right approach. Specifically, for Arm targets we

> currently have a bug in float-to-float conversion from a wider

> format to a narrower one when the input is a signaling NaN that we

> want to silence, and its non-zero mantissa bits are all at the

> less-significant end of the mantissa such that they don't fit into

> the narrower format. If you pack the float into a float16 first and

> then call maybe_silence_nan() on it you've lost the info about those

> low bits which the silence function needs to know to return the

> right answer. What you want to do instead is pass the silence_nan

> function the decomposed value.


So this is an inherited bug from softfloat-specialize.h? I guess we need
a common specific decomposed specialisation we can use for all the sizes.

>

> (The effect of this bug is that we return a default NaN, with the

> sign bit clear, but the Arm FPConvertNaN pseudocode says that we

> should effectively get the default NaN but with the same sign bit

> as the input SNaN.)

>

> Given that this is a bug currently in the version we have, we don't

> necessarily need to fix it now, but I thought I'd mention it since

> the redesign has almost but not quite managed to deliver the right

> information to the silencing code to allow us to fix it soon :-)


So comment for now? Currently all the information for decomposed is kept
internal to softfloat.c - I'm not sure we want to expose the internals
to a wider audience? Especially as these inline helpers in specialize.h
are also used by helpers.

<snip>
>> +

>> +

>> +/*

>> + * Returns the result of adding the absolute values of the

>> + * floating-point values `a' and `b'. If `subtract' is set, the sum is

>> + * negated before being returned. `subtract' is ignored if the result

>> + * is a NaN. The addition is performed according to the IEC/IEEE

>> + * Standard for Binary Floating-Point Arithmetic.

>> + */

>

> This comment doesn't seem to match what the code is doing,

> because it says it adds the absolute values of 'a' and 'b',

> but the code looks at a_sign and b_sign to decide whether it's

> doing an addition or subtraction rather than ignoring the signs

> (as you would for absolute arithmetic).

>

> Put another way, this comment has been copied from the old addFloat64Sigs()

> and not updated to account for the way the new function includes handling

> of subFloat64Sigs().

>

>> +

>> +static decomposed_parts add_decomposed(decomposed_parts a, decomposed_parts b,

>> +                                       bool subtract, float_status *s)

>> +{

>> +    bool a_sign = a.sign;

>> +    bool b_sign = b.sign ^ subtract;

>> +

>> +    if (a_sign != b_sign) {

>> +        /* Subtraction */

>> +

>> +        if (a.cls == float_class_normal && b.cls == float_class_normal) {

>> +            int a_exp = a.exp;

>> +            int b_exp = b.exp;

>> +            uint64_t a_frac = a.frac;

>> +            uint64_t b_frac = b.frac;

>

> Do we really have to use locals here rather than just using a.frac,

> b.frac etc in place ? If we trust the compiler enough to throw

> structs in and out of functions and let everything inline, it

> ought to be able to handle a uint64_t in a struct local variable.


Fixed.

>> +        if (a.cls >= float_class_qnan

>> +            ||

>> +            b.cls >= float_class_qnan) {

>

> We should helper functions for "is some kind of NaN" rather than

> baking the assumption about order of the enum values directly

> into every function. (Also "float_is_any_nan(a)" is easier to read.)


if (is_nan(a.cls) || is_nan(b.cls))
>> +float64 float64_sub(float64 a, float64 b, float_status *status)

>> +{

>> +    decomposed_parts pa = float64_unpack_canonical(a, status);

>> +    decomposed_parts pb = float64_unpack_canonical(b, status);

>> +    decomposed_parts pr = add_decomposed(pa, pb, true, status);

>> +

>> +    return float64_round_pack_canonical(pr, status);

>> +}

>

> This part is a pretty good advert for the benefits of the refactoring...

>

> I'm not particularly worried about the performance of softfloat,

> but out of curiosity have you benchmarked the old vs new?


Not yet but I can run some with my vector kernel benchmark.

>

> thanks

> -- PMM



--
Alex Bennée
diff mbox series

Patch

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index fcba28d3f8..f89e47e3ef 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -195,7 +195,7 @@  typedef enum {
     float_class_zero,
     float_class_normal,
     float_class_inf,
-    float_class_qnan,
+    float_class_qnan,  /* all NaNs from here */
     float_class_snan,
     float_class_dnan,
     float_class_msnan, /* maybe silenced */
@@ -254,6 +254,482 @@  static const decomposed_params float64_params = {
     FRAC_PARAMS(DECOMPOSED_BINARY_POINT - 52)
 };
 
+/* Unpack a float16 to parts, but do not canonicalize.  */
+static inline decomposed_parts float16_unpack_raw(float16 f)
+{
+    return (decomposed_parts){
+        .cls = float_class_unclassified,
+        .sign = extract32(f, 15, 1),
+        .exp = extract32(f, 10, 5),
+        .frac = extract32(f, 0, 10)
+    };
+}
+
+/* Unpack a float32 to parts, but do not canonicalize.  */
+static inline decomposed_parts float32_unpack_raw(float32 f)
+{
+    return (decomposed_parts){
+        .cls = float_class_unclassified,
+        .sign = extract32(f, 31, 1),
+        .exp = extract32(f, 23, 8),
+        .frac = extract32(f, 0, 23)
+    };
+}
+
+/* Unpack a float64 to parts, but do not canonicalize.  */
+static inline decomposed_parts float64_unpack_raw(float64 f)
+{
+    return (decomposed_parts){
+        .cls = float_class_unclassified,
+        .sign = extract64(f, 63, 1),
+        .exp = extract64(f, 52, 11),
+        .frac = extract64(f, 0, 52),
+    };
+}
+
+/* Pack a float32 from parts, but do not canonicalize.  */
+static inline float16 float16_pack_raw(decomposed_parts p)
+{
+    uint32_t ret = p.frac;
+    ret = deposit32(ret, 10, 5, p.exp);
+    ret = deposit32(ret, 15, 1, p.sign);
+    return make_float16(ret);
+}
+
+/* Pack a float32 from parts, but do not canonicalize.  */
+static inline float32 float32_pack_raw(decomposed_parts p)
+{
+    uint32_t ret = p.frac;
+    ret = deposit32(ret, 23, 8, p.exp);
+    ret = deposit32(ret, 31, 1, p.sign);
+    return make_float32(ret);
+}
+
+/* Pack a float64 from parts, but do not canonicalize.  */
+static inline float64 float64_pack_raw(decomposed_parts p)
+{
+    uint64_t ret = p.frac;
+    ret = deposit64(ret, 52, 11, p.exp);
+    ret = deposit64(ret, 63, 1, p.sign);
+    return make_float64(ret);
+}
+
+/* Canonicalize EXP and FRAC, setting CLS.  */
+static decomposed_parts decomposed_canonicalize(decomposed_parts part,
+                                        const decomposed_params *parm,
+                                        float_status *status)
+{
+    if (part.exp == parm->exp_max) {
+        if (part.frac == 0) {
+            part.cls = float_class_inf;
+        } else {
+#ifdef NO_SIGNALING_NANS
+            part.cls = float_class_qnan;
+#else
+            int64_t msb = part.frac << (parm->frac_shift + 2);
+            if ((msb < 0) == status->snan_bit_is_one) {
+                part.cls = float_class_snan;
+            } else {
+                part.cls = float_class_qnan;
+            }
+#endif
+        }
+    } else if (part.exp == 0) {
+        if (likely(part.frac == 0)) {
+            part.cls = float_class_zero;
+        } else if (status->flush_inputs_to_zero) {
+            float_raise(float_flag_input_denormal, status);
+            part.cls = float_class_zero;
+            part.frac = 0;
+        } else {
+            int shift = clz64(part.frac) - 1;
+            part.cls = float_class_normal;
+            part.exp = parm->frac_shift - parm->exp_bias - shift + 1;
+            part.frac <<= shift;
+        }
+    } else {
+        part.cls = float_class_normal;
+        part.exp -= parm->exp_bias;
+        part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
+    }
+    return part;
+}
+
+/* Round and uncanonicalize a floating-point number by parts.
+   There are FRAC_SHIFT bits that may require rounding at the bottom
+   of the fraction; these bits will be removed.  The exponent will be
+   biased by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].  */
+static decomposed_parts decomposed_round_canonical(decomposed_parts p,
+                                                   float_status *s,
+                                                   const decomposed_params *parm)
+{
+    const uint64_t frac_lsbm1 = parm->frac_lsbm1;
+    const uint64_t round_mask = parm->round_mask;
+    const uint64_t roundeven_mask = parm->roundeven_mask;
+    const int exp_max = parm->exp_max;
+    const int frac_shift = parm->frac_shift;
+    uint64_t frac, inc;
+    int exp, flags = 0;
+    bool overflow_norm;
+
+    frac = p.frac;
+    exp = p.exp;
+
+    switch (p.cls) {
+    case float_class_normal:
+        switch (s->float_rounding_mode) {
+        case float_round_nearest_even:
+            overflow_norm = false;
+            inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : 0);
+            break;
+        case float_round_ties_away:
+            overflow_norm = false;
+            inc = frac_lsbm1;
+            break;
+        case float_round_to_zero:
+            overflow_norm = true;
+            inc = 0;
+            break;
+        case float_round_up:
+            inc = p.sign ? 0 : round_mask;
+            overflow_norm = p.sign;
+            break;
+        case float_round_down:
+            inc = p.sign ? round_mask : 0;
+            overflow_norm = !p.sign;
+            break;
+        default:
+            g_assert_not_reached();
+        }
+
+        exp += parm->exp_bias;
+        if (likely(exp > 0)) {
+            if (frac & round_mask) {
+                flags |= float_flag_inexact;
+                frac += inc;
+                if (frac & DECOMPOSED_OVERFLOW_BIT) {
+                    frac >>= 1;
+                    exp++;
+                }
+            }
+            frac >>= frac_shift;
+
+            if (unlikely(exp >= exp_max)) {
+                flags |= float_flag_overflow | float_flag_inexact;
+                if (overflow_norm) {
+                    exp = exp_max - 1;
+                    frac = -1;
+                } else {
+                    p.cls = float_class_inf;
+                    goto do_inf;
+                }
+            }
+        } else if (s->flush_to_zero) {
+            flags |= float_flag_output_denormal;
+            p.cls = float_class_zero;
+            goto do_zero;
+        } else {
+            bool is_tiny = (s->float_detect_tininess
+                            == float_tininess_before_rounding)
+                        || (exp < 0)
+                        || !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
+
+            shift64RightJamming(frac, 1 - exp, &frac);
+            if (frac & round_mask) {
+                /* Need to recompute round-to-even.  */
+                if (s->float_rounding_mode == float_round_nearest_even) {
+                    inc = ((frac & roundeven_mask) != frac_lsbm1
+                           ? frac_lsbm1 : 0);
+                }
+                flags |= float_flag_inexact;
+                frac += inc;
+            }
+
+            exp = (frac & DECOMPOSED_IMPLICIT_BIT ? 1 : 0);
+            frac >>= frac_shift;
+
+            if (is_tiny && (flags & float_flag_inexact)) {
+                flags |= float_flag_underflow;
+            }
+            if (exp == 0 && frac == 0) {
+                p.cls = float_class_zero;
+            }
+        }
+        break;
+
+    case float_class_zero:
+    do_zero:
+        exp = 0;
+        frac = 0;
+        break;
+
+    case float_class_inf:
+    do_inf:
+        exp = exp_max;
+        frac = 0;
+        break;
+
+    case float_class_qnan:
+    case float_class_snan:
+        exp = exp_max;
+        break;
+
+    default:
+        g_assert_not_reached();
+    }
+
+    float_raise(flags, s);
+    p.exp = exp;
+    p.frac = frac;
+    return p;
+}
+
+static decomposed_parts float16_unpack_canonical(float16 f, float_status *s)
+{
+    return decomposed_canonicalize(float16_unpack_raw(f), &float16_params, s);
+}
+
+static float16 float16_round_pack_canonical(decomposed_parts p, float_status *s)
+{
+    switch (p.cls) {
+    case float_class_dnan:
+        return float16_default_nan(s);
+    case float_class_msnan:
+        return float16_maybe_silence_nan(float16_pack_raw(p), s);
+    default:
+        p = decomposed_round_canonical(p, s, &float16_params);
+        return float16_pack_raw(p);
+    }
+}
+
+static decomposed_parts float32_unpack_canonical(float32 f, float_status *s)
+{
+    return decomposed_canonicalize(float32_unpack_raw(f), &float32_params, s);
+}
+
+static float32 float32_round_pack_canonical(decomposed_parts p, float_status *s)
+{
+    switch (p.cls) {
+    case float_class_dnan:
+        return float32_default_nan(s);
+    case float_class_msnan:
+        return float32_maybe_silence_nan(float32_pack_raw(p), s);
+    default:
+        p = decomposed_round_canonical(p, s, &float32_params);
+        return float32_pack_raw(p);
+    }
+}
+
+static decomposed_parts float64_unpack_canonical(float64 f, float_status *s)
+{
+    return decomposed_canonicalize(float64_unpack_raw(f), &float64_params, s);
+}
+
+static float64 float64_round_pack_canonical(decomposed_parts p, float_status *s)
+{
+    switch (p.cls) {
+    case float_class_dnan:
+        return float64_default_nan(s);
+    case float_class_msnan:
+        return float64_maybe_silence_nan(float64_pack_raw(p), s);
+    default:
+        p = decomposed_round_canonical(p, s, &float64_params);
+        return float64_pack_raw(p);
+    }
+}
+
+static decomposed_parts pick_nan_parts(decomposed_parts a, decomposed_parts b,
+                                       float_status *s)
+{
+    if (a.cls == float_class_snan || b.cls == float_class_snan) {
+        s->float_exception_flags |= float_flag_invalid;
+    }
+
+    if (s->default_nan_mode) {
+        a.cls = float_class_dnan;
+    } else {
+        if (pickNaN(a.cls == float_class_qnan,
+                    a.cls == float_class_snan,
+                    b.cls == float_class_qnan,
+                    b.cls == float_class_snan,
+                    a.frac > b.frac
+                    || (a.frac == b.frac && a.sign < b.sign))) {
+            a = b;
+        }
+        a.cls = float_class_msnan;
+    }
+    return a;
+}
+
+
+/*
+ * Returns the result of adding the absolute values of the
+ * floating-point values `a' and `b'. If `subtract' is set, the sum is
+ * negated before being returned. `subtract' is ignored if the result
+ * is a NaN. The addition is performed according to the IEC/IEEE
+ * Standard for Binary Floating-Point Arithmetic.
+ */
+
+static decomposed_parts add_decomposed(decomposed_parts a, decomposed_parts b,
+                                       bool subtract, float_status *s)
+{
+    bool a_sign = a.sign;
+    bool b_sign = b.sign ^ subtract;
+
+    if (a_sign != b_sign) {
+        /* Subtraction */
+
+        if (a.cls == float_class_normal && b.cls == float_class_normal) {
+            int a_exp = a.exp;
+            int b_exp = b.exp;
+            uint64_t a_frac = a.frac;
+            uint64_t b_frac = b.frac;
+
+            if (a_exp > b_exp || (a_exp == b_exp && a_frac >= b_frac)) {
+                shift64RightJamming(b_frac, a_exp - b_exp, &b_frac);
+                a_frac = a_frac - b_frac;
+            } else {
+                shift64RightJamming(a_frac, b_exp - a_exp, &a_frac);
+                a_frac = b_frac - a_frac;
+                a_exp = b_exp;
+                a_sign ^= 1;
+            }
+
+            if (a_frac == 0) {
+                a.cls = float_class_zero;
+                a.sign = s->float_rounding_mode == float_round_down;
+            } else {
+                int shift = clz64(a_frac) - 1;
+                a.frac = a_frac << shift;
+                a.exp = a_exp - shift;
+                a.sign = a_sign;
+            }
+            return a;
+        }
+        if (a.cls >= float_class_qnan
+            ||
+            b.cls >= float_class_qnan)
+        {
+            return pick_nan_parts(a, b, s);
+        }
+        if (a.cls == float_class_inf) {
+            if (b.cls == float_class_inf) {
+                float_raise(float_flag_invalid, s);
+                a.cls = float_class_dnan;
+            }
+            return a;
+        }
+        if (a.cls == float_class_zero && b.cls == float_class_zero) {
+            a.sign = s->float_rounding_mode == float_round_down;
+            return a;
+        }
+        if (a.cls == float_class_zero || b.cls == float_class_inf) {
+            b.sign = a_sign ^ 1;
+            return b;
+        }
+        if (b.cls == float_class_zero) {
+            return a;
+        }
+    } else {
+        /* Addition */
+        if (a.cls == float_class_normal && b.cls == float_class_normal) {
+            int a_exp = a.exp;
+            int b_exp = b.exp;
+            uint64_t a_frac = a.frac;
+            uint64_t b_frac = b.frac;
+
+            if (a_exp > b_exp) {
+                shift64RightJamming(b_frac, a_exp - b_exp, &b_frac);
+            } else if (a_exp < b_exp) {
+                shift64RightJamming(a_frac, b_exp - a_exp, &a_frac);
+                a_exp = b_exp;
+            }
+            a_frac += b_frac;
+            if (a_frac & DECOMPOSED_OVERFLOW_BIT) {
+                a_frac >>= 1;
+                a_exp += 1;
+            }
+
+            a.exp = a_exp;
+            a.frac = a_frac;
+            return a;
+        }
+        if (a.cls >= float_class_qnan
+            ||
+            b.cls >= float_class_qnan) {
+            return pick_nan_parts(a, b, s);
+        }
+        if (a.cls == float_class_inf || b.cls == float_class_zero) {
+            return a;
+        }
+        if (b.cls == float_class_inf || a.cls == float_class_zero) {
+            b.sign = b_sign;
+            return b;
+        }
+    }
+    g_assert_not_reached();
+}
+
+/*
+ * Returns the result of adding or subtracting the floating-point
+ * values `a' and `b'. The operation is performed according to the
+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ */
+
+float16 float16_add(float16 a, float16 b, float_status *status)
+{
+    decomposed_parts pa = float16_unpack_canonical(a, status);
+    decomposed_parts pb = float16_unpack_canonical(b, status);
+    decomposed_parts pr = add_decomposed(pa, pb, false, status);
+
+    return float16_round_pack_canonical(pr, status);
+}
+
+float32 float32_add(float32 a, float32 b, float_status *status)
+{
+    decomposed_parts pa = float32_unpack_canonical(a, status);
+    decomposed_parts pb = float32_unpack_canonical(b, status);
+    decomposed_parts pr = add_decomposed(pa, pb, false, status);
+
+    return float32_round_pack_canonical(pr, status);
+}
+
+float64 float64_add(float64 a, float64 b, float_status *status)
+{
+    decomposed_parts pa = float64_unpack_canonical(a, status);
+    decomposed_parts pb = float64_unpack_canonical(b, status);
+    decomposed_parts pr = add_decomposed(pa, pb, false, status);
+
+    return float64_round_pack_canonical(pr, status);
+}
+
+float16 float16_sub(float16 a, float16 b, float_status *status)
+{
+    decomposed_parts pa = float16_unpack_canonical(a, status);
+    decomposed_parts pb = float16_unpack_canonical(b, status);
+    decomposed_parts pr = add_decomposed(pa, pb, true, status);
+
+    return float16_round_pack_canonical(pr, status);
+}
+
+float32 float32_sub(float32 a, float32 b, float_status *status)
+{
+    decomposed_parts pa = float32_unpack_canonical(a, status);
+    decomposed_parts pb = float32_unpack_canonical(b, status);
+    decomposed_parts pr = add_decomposed(pa, pb, true, status);
+
+    return float32_round_pack_canonical(pr, status);
+}
+
+float64 float64_sub(float64 a, float64 b, float_status *status)
+{
+    decomposed_parts pa = float64_unpack_canonical(a, status);
+    decomposed_parts pb = float64_unpack_canonical(b, status);
+    decomposed_parts pr = add_decomposed(pa, pb, true, status);
+
+    return float64_round_pack_canonical(pr, status);
+}
+
 /*----------------------------------------------------------------------------
 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
 | and 7, and returns the properly rounded 32-bit integer corresponding to the
@@ -2065,219 +2541,6 @@  float32 float32_round_to_int(float32 a, float_status *status)
 
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of adding the absolute values of the single-precision
-| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
-| before being returned.  `zSign' is ignored if the result is a NaN.
-| The addition is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float32 addFloat32Sigs(float32 a, float32 b, flag zSign,
-                              float_status *status)
-{
-    int aExp, bExp, zExp;
-    uint32_t aSig, bSig, zSig;
-    int expDiff;
-
-    aSig = extractFloat32Frac( a );
-    aExp = extractFloat32Exp( a );
-    bSig = extractFloat32Frac( b );
-    bExp = extractFloat32Exp( b );
-    expDiff = aExp - bExp;
-    aSig <<= 6;
-    bSig <<= 6;
-    if ( 0 < expDiff ) {
-        if ( aExp == 0xFF ) {
-            if (aSig) {
-                return propagateFloat32NaN(a, b, status);
-            }
-            return a;
-        }
-        if ( bExp == 0 ) {
-            --expDiff;
-        }
-        else {
-            bSig |= 0x20000000;
-        }
-        shift32RightJamming( bSig, expDiff, &bSig );
-        zExp = aExp;
-    }
-    else if ( expDiff < 0 ) {
-        if ( bExp == 0xFF ) {
-            if (bSig) {
-                return propagateFloat32NaN(a, b, status);
-            }
-            return packFloat32( zSign, 0xFF, 0 );
-        }
-        if ( aExp == 0 ) {
-            ++expDiff;
-        }
-        else {
-            aSig |= 0x20000000;
-        }
-        shift32RightJamming( aSig, - expDiff, &aSig );
-        zExp = bExp;
-    }
-    else {
-        if ( aExp == 0xFF ) {
-            if (aSig | bSig) {
-                return propagateFloat32NaN(a, b, status);
-            }
-            return a;
-        }
-        if ( aExp == 0 ) {
-            if (status->flush_to_zero) {
-                if (aSig | bSig) {
-                    float_raise(float_flag_output_denormal, status);
-                }
-                return packFloat32(zSign, 0, 0);
-            }
-            return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
-        }
-        zSig = 0x40000000 + aSig + bSig;
-        zExp = aExp;
-        goto roundAndPack;
-    }
-    aSig |= 0x20000000;
-    zSig = ( aSig + bSig )<<1;
-    --zExp;
-    if ( (int32_t) zSig < 0 ) {
-        zSig = aSig + bSig;
-        ++zExp;
-    }
- roundAndPack:
-    return roundAndPackFloat32(zSign, zExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the absolute values of the single-
-| precision floating-point values `a' and `b'.  If `zSign' is 1, the
-| difference is negated before being returned.  `zSign' is ignored if the
-| result is a NaN.  The subtraction is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float32 subFloat32Sigs(float32 a, float32 b, flag zSign,
-                              float_status *status)
-{
-    int aExp, bExp, zExp;
-    uint32_t aSig, bSig, zSig;
-    int expDiff;
-
-    aSig = extractFloat32Frac( a );
-    aExp = extractFloat32Exp( a );
-    bSig = extractFloat32Frac( b );
-    bExp = extractFloat32Exp( b );
-    expDiff = aExp - bExp;
-    aSig <<= 7;
-    bSig <<= 7;
-    if ( 0 < expDiff ) goto aExpBigger;
-    if ( expDiff < 0 ) goto bExpBigger;
-    if ( aExp == 0xFF ) {
-        if (aSig | bSig) {
-            return propagateFloat32NaN(a, b, status);
-        }
-        float_raise(float_flag_invalid, status);
-        return float32_default_nan(status);
-    }
-    if ( aExp == 0 ) {
-        aExp = 1;
-        bExp = 1;
-    }
-    if ( bSig < aSig ) goto aBigger;
-    if ( aSig < bSig ) goto bBigger;
-    return packFloat32(status->float_rounding_mode == float_round_down, 0, 0);
- bExpBigger:
-    if ( bExp == 0xFF ) {
-        if (bSig) {
-            return propagateFloat32NaN(a, b, status);
-        }
-        return packFloat32( zSign ^ 1, 0xFF, 0 );
-    }
-    if ( aExp == 0 ) {
-        ++expDiff;
-    }
-    else {
-        aSig |= 0x40000000;
-    }
-    shift32RightJamming( aSig, - expDiff, &aSig );
-    bSig |= 0x40000000;
- bBigger:
-    zSig = bSig - aSig;
-    zExp = bExp;
-    zSign ^= 1;
-    goto normalizeRoundAndPack;
- aExpBigger:
-    if ( aExp == 0xFF ) {
-        if (aSig) {
-            return propagateFloat32NaN(a, b, status);
-        }
-        return a;
-    }
-    if ( bExp == 0 ) {
-        --expDiff;
-    }
-    else {
-        bSig |= 0x40000000;
-    }
-    shift32RightJamming( bSig, expDiff, &bSig );
-    aSig |= 0x40000000;
- aBigger:
-    zSig = aSig - bSig;
-    zExp = aExp;
- normalizeRoundAndPack:
-    --zExp;
-    return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of adding the single-precision floating-point values `a'
-| and `b'.  The operation is performed according to the IEC/IEEE Standard for
-| Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 float32_add(float32 a, float32 b, float_status *status)
-{
-    flag aSign, bSign;
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    if ( aSign == bSign ) {
-        return addFloat32Sigs(a, b, aSign, status);
-    }
-    else {
-        return subFloat32Sigs(a, b, aSign, status);
-    }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the single-precision floating-point values
-| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 float32_sub(float32 a, float32 b, float_status *status)
-{
-    flag aSign, bSign;
-    a = float32_squash_input_denormal(a, status);
-    b = float32_squash_input_denormal(b, status);
-
-    aSign = extractFloat32Sign( a );
-    bSign = extractFloat32Sign( b );
-    if ( aSign == bSign ) {
-        return subFloat32Sigs(a, b, aSign, status);
-    }
-    else {
-        return addFloat32Sigs(a, b, aSign, status);
-    }
-
-}
 
 /*----------------------------------------------------------------------------
 | Returns the result of multiplying the single-precision floating-point values
@@ -3875,219 +4138,6 @@  float64 float64_trunc_to_int(float64 a, float_status *status)
     return res;
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of adding the absolute values of the double-precision
-| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
-| before being returned.  `zSign' is ignored if the result is a NaN.
-| The addition is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float64 addFloat64Sigs(float64 a, float64 b, flag zSign,
-                              float_status *status)
-{
-    int aExp, bExp, zExp;
-    uint64_t aSig, bSig, zSig;
-    int expDiff;
-
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    bSig = extractFloat64Frac( b );
-    bExp = extractFloat64Exp( b );
-    expDiff = aExp - bExp;
-    aSig <<= 9;
-    bSig <<= 9;
-    if ( 0 < expDiff ) {
-        if ( aExp == 0x7FF ) {
-            if (aSig) {
-                return propagateFloat64NaN(a, b, status);
-            }
-            return a;
-        }
-        if ( bExp == 0 ) {
-            --expDiff;
-        }
-        else {
-            bSig |= LIT64( 0x2000000000000000 );
-        }
-        shift64RightJamming( bSig, expDiff, &bSig );
-        zExp = aExp;
-    }
-    else if ( expDiff < 0 ) {
-        if ( bExp == 0x7FF ) {
-            if (bSig) {
-                return propagateFloat64NaN(a, b, status);
-            }
-            return packFloat64( zSign, 0x7FF, 0 );
-        }
-        if ( aExp == 0 ) {
-            ++expDiff;
-        }
-        else {
-            aSig |= LIT64( 0x2000000000000000 );
-        }
-        shift64RightJamming( aSig, - expDiff, &aSig );
-        zExp = bExp;
-    }
-    else {
-        if ( aExp == 0x7FF ) {
-            if (aSig | bSig) {
-                return propagateFloat64NaN(a, b, status);
-            }
-            return a;
-        }
-        if ( aExp == 0 ) {
-            if (status->flush_to_zero) {
-                if (aSig | bSig) {
-                    float_raise(float_flag_output_denormal, status);
-                }
-                return packFloat64(zSign, 0, 0);
-            }
-            return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
-        }
-        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
-        zExp = aExp;
-        goto roundAndPack;
-    }
-    aSig |= LIT64( 0x2000000000000000 );
-    zSig = ( aSig + bSig )<<1;
-    --zExp;
-    if ( (int64_t) zSig < 0 ) {
-        zSig = aSig + bSig;
-        ++zExp;
-    }
- roundAndPack:
-    return roundAndPackFloat64(zSign, zExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the absolute values of the double-
-| precision floating-point values `a' and `b'.  If `zSign' is 1, the
-| difference is negated before being returned.  `zSign' is ignored if the
-| result is a NaN.  The subtraction is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float64 subFloat64Sigs(float64 a, float64 b, flag zSign,
-                              float_status *status)
-{
-    int aExp, bExp, zExp;
-    uint64_t aSig, bSig, zSig;
-    int expDiff;
-
-    aSig = extractFloat64Frac( a );
-    aExp = extractFloat64Exp( a );
-    bSig = extractFloat64Frac( b );
-    bExp = extractFloat64Exp( b );
-    expDiff = aExp - bExp;
-    aSig <<= 10;
-    bSig <<= 10;
-    if ( 0 < expDiff ) goto aExpBigger;
-    if ( expDiff < 0 ) goto bExpBigger;
-    if ( aExp == 0x7FF ) {
-        if (aSig | bSig) {
-            return propagateFloat64NaN(a, b, status);
-        }
-        float_raise(float_flag_invalid, status);
-        return float64_default_nan(status);
-    }
-    if ( aExp == 0 ) {
-        aExp = 1;
-        bExp = 1;
-    }
-    if ( bSig < aSig ) goto aBigger;
-    if ( aSig < bSig ) goto bBigger;
-    return packFloat64(status->float_rounding_mode == float_round_down, 0, 0);
- bExpBigger:
-    if ( bExp == 0x7FF ) {
-        if (bSig) {
-            return propagateFloat64NaN(a, b, status);
-        }
-        return packFloat64( zSign ^ 1, 0x7FF, 0 );
-    }
-    if ( aExp == 0 ) {
-        ++expDiff;
-    }
-    else {
-        aSig |= LIT64( 0x4000000000000000 );
-    }
-    shift64RightJamming( aSig, - expDiff, &aSig );
-    bSig |= LIT64( 0x4000000000000000 );
- bBigger:
-    zSig = bSig - aSig;
-    zExp = bExp;
-    zSign ^= 1;
-    goto normalizeRoundAndPack;
- aExpBigger:
-    if ( aExp == 0x7FF ) {
-        if (aSig) {
-            return propagateFloat64NaN(a, b, status);
-        }
-        return a;
-    }
-    if ( bExp == 0 ) {
-        --expDiff;
-    }
-    else {
-        bSig |= LIT64( 0x4000000000000000 );
-    }
-    shift64RightJamming( bSig, expDiff, &bSig );
-    aSig |= LIT64( 0x4000000000000000 );
- aBigger:
-    zSig = aSig - bSig;
-    zExp = aExp;
- normalizeRoundAndPack:
-    --zExp;
-    return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of adding the double-precision floating-point values `a'
-| and `b'.  The operation is performed according to the IEC/IEEE Standard for
-| Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 float64_add(float64 a, float64 b, float_status *status)
-{
-    flag aSign, bSign;
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
-
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    if ( aSign == bSign ) {
-        return addFloat64Sigs(a, b, aSign, status);
-    }
-    else {
-        return subFloat64Sigs(a, b, aSign, status);
-    }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the double-precision floating-point values
-| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 float64_sub(float64 a, float64 b, float_status *status)
-{
-    flag aSign, bSign;
-    a = float64_squash_input_denormal(a, status);
-    b = float64_squash_input_denormal(b, status);
-
-    aSign = extractFloat64Sign( a );
-    bSign = extractFloat64Sign( b );
-    if ( aSign == bSign ) {
-        return subFloat64Sigs(a, b, aSign, status);
-    }
-    else {
-        return addFloat64Sigs(a, b, aSign, status);
-    }
-
-}
 
 /*----------------------------------------------------------------------------
 | Returns the result of multiplying the double-precision floating-point values
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index e64bf62f3d..3a21a2bcef 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -318,6 +318,10 @@  float64 float16_to_float64(float16 a, flag ieee, float_status *status);
 /*----------------------------------------------------------------------------
 | Software half-precision operations.
 *----------------------------------------------------------------------------*/
+
+float16 float16_add(float16, float16, float_status *status);
+float16 float16_sub(float16, float16, float_status *status);
+
 int float16_is_quiet_nan(float16, float_status *status);
 int float16_is_signaling_nan(float16, float_status *status);
 float16 float16_maybe_silence_nan(float16, float_status *status);