diff mbox

[1/4,libgcc,ARM] Generalise float-to-half conversion function.

Message ID 1477316680-5094-2-git-send-email-james.greenhalgh@arm.com
State Superseded
Headers show

Commit Message

James Greenhalgh Oct. 24, 2016, 1:44 p.m. UTC
Hi,

I'm adapting this patch from work started by Matthew Wahab.

Conversions from double precision floats to the ARM __fp16 are required
to round only once. A conversion function for double to __fp16 to
support this on soft-fp targets. This and the following patch add this
conversion function by reusing the exising float to __fp16 function
config/arm/fp16.c:__gnu_f2h_internal.

This patch generalizes __gnu_f2h_internal by adding a specification of
the source format and reworking the code to make use of it. Initially,
only the binary32 format is supported.

Bootstrapped on an ARMv8-A machine with no issues, and cross-tested with
a reasonable multi-lib range.

OK?

Thanks,
James

---
libgcc/

2016-10-24  James Greenhalgh  <james.greenhalgh@arm.com>
	    Matthew Wahab  <matthew.wahab@arm.com>

	* config/arm/fp16.c (struct format): New.
	(binary32): New.
	(__gnu_float2h_internal): New.  Body moved from
	__gnu_f2h_internal and generalize.
	(_gnu_f2h_internal): Move body to function __gnu_float2h_internal.
	Call it with binary32.

Comments

James Greenhalgh Nov. 8, 2016, 11:57 a.m. UTC | #1
On Mon, Oct 24, 2016 at 02:44:37PM +0100, James Greenhalgh wrote:
> 

> Hi,

> 

> I'm adapting this patch from work started by Matthew Wahab.

> 

> Conversions from double precision floats to the ARM __fp16 are required

> to round only once. A conversion function for double to __fp16 to

> support this on soft-fp targets. This and the following patch add this

> conversion function by reusing the exising float to __fp16 function

> config/arm/fp16.c:__gnu_f2h_internal.

> 

> This patch generalizes __gnu_f2h_internal by adding a specification of

> the source format and reworking the code to make use of it. Initially,

> only the binary32 format is supported.

> 

> Bootstrapped on an ARMv8-A machine with no issues, and cross-tested with

> a reasonable multi-lib range.

> 

> 2016-10-24  James Greenhalgh  <james.greenhalgh@arm.com>

> 	    Matthew Wahab  <matthew.wahab@arm.com>

> 

> 	* config/arm/fp16.c (struct format): New.

> 	(binary32): New.

> 	(__gnu_float2h_internal): New.  Body moved from

> 	__gnu_f2h_internal and generalize.

> 	(_gnu_f2h_internal): Move body to function __gnu_float2h_internal.

> 	Call it with binary32.


Looking at it carefully, this patch has a bug in the way it handles
rounding for doubles. The relevant code looks like this:

  if (aexp < -14)
    {
      mask = point | (point - 1);
      /* Minimum exponent for half-precision is 2^-24.  */
      if (aexp >= -25)
	mask >>= 25 + aexp;
    }
  else
    mask = 0x00001fff;

  /* Round.  */
  if (mantissa & mask)
    {
      increment = (mask + 1) >> 1;
      if ((mantissa & mask) == increment)
	increment = mantissa & (increment << 1);
      mantissa += increment;
      if (mantissa >= (point << 1))
	{
	  mantissa >>= 1;
	  aexp++;
	}
    }

But clearly that mask is not going to be sufficient when we are deciding
whether we ought to round a double or not as there are many more bits for
us to look at.

I'll work on a new spin of this patch.

I'm still hopeful that both this and the AArch64 (and generic bits and
pieces) support for _Float16 will make it for GCC 7.

Thanks,
James
diff mbox

Patch

diff --git a/libgcc/config/arm/fp16.c b/libgcc/config/arm/fp16.c
index 39c863c..8d02a24 100644
--- a/libgcc/config/arm/fp16.c
+++ b/libgcc/config/arm/fp16.c
@@ -22,35 +22,69 @@ 
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
+struct format
+{
+  /* Number of bits.  */
+  unsigned long long size;
+  /* Exponent bias.  */
+  unsigned long long bias;
+  /* Exponent width in bits.  */
+  unsigned long long exponent;
+  /* Significand precision in explicitly stored bits.  */
+  unsigned long long significand;
+};
+
+static const struct format
+binary32 =
+{
+  32,   /* size.  */
+  127,  /* bias.  */
+  8,    /* exponent.  */
+  23    /* significand.  */
+};
+
 static inline unsigned short
-__gnu_f2h_internal(unsigned int a, int ieee)
+__gnu_float2h_internal (const struct format* fmt,
+			unsigned long long a, int ieee)
 {
-  unsigned short sign = (a >> 16) & 0x8000;
-  int aexp = (a >> 23) & 0xff;
-  unsigned int mantissa = a & 0x007fffff;
-  unsigned int mask;
-  unsigned int increment;
+  unsigned long long point = 1ULL << fmt->significand;;
+  unsigned short sign = (a >> (fmt->size - 16)) & 0x8000;
+  int aexp;
+  unsigned long long mantissa;
+  unsigned long long mask;
+  unsigned long long increment;
+
+  /* Get the exponent and mantissa encodings.  */
+  mantissa = a & (point - 1);
+
+  mask = (1 << fmt->exponent) - 1;
+  aexp = (a >> fmt->significand) & mask;
 
-  if (aexp == 0xff)
+  /* Infinity, NaN and alternative format special case.  */
+  if (((unsigned int) aexp) == mask)
     {
       if (!ieee)
 	return sign;
       if (mantissa == 0)
 	return sign | 0x7c00;	/* Infinity.  */
       /* Remaining cases are NaNs.  Convert SNaN to QNaN.  */
-      return sign | 0x7e00 | (mantissa >> 13);
+      return sign | 0x7e00 | (mantissa >> (fmt->significand - 10));
     }
 
+  /* Zero.  */
   if (aexp == 0 && mantissa == 0)
     return sign;
 
-  aexp -= 127;
+  /* Construct the exponent and mantissa.  */
+  aexp -= fmt->bias;
+
+  /* Decimal point is immediately after the significand.  */
+  mantissa |= point;
 
-  /* Decimal point between bits 22 and 23.  */
-  mantissa |= 0x00800000;
   if (aexp < -14)
     {
-      mask = 0x00ffffff;
+      mask = point | (point - 1);
+      /* Minimum exponent for half-precision is 2^-24.  */
       if (aexp >= -25)
 	mask >>= 25 + aexp;
     }
@@ -64,8 +98,8 @@  __gnu_f2h_internal(unsigned int a, int ieee)
       if ((mantissa & mask) == increment)
 	increment = mantissa & (increment << 1);
       mantissa += increment;
-      if (mantissa >= 0x01000000)
-       	{
+      if (mantissa >= (point << 1))
+	{
 	  mantissa >>= 1;
 	  aexp++;
 	}
@@ -93,7 +127,13 @@  __gnu_f2h_internal(unsigned int a, int ieee)
 
   /* We leave the leading 1 in the mantissa, and subtract one
      from the exponent bias to compensate.  */
-  return sign | (((aexp + 14) << 10) + (mantissa >> 13));
+  return sign | (((aexp + 14) << 10) + (mantissa >> (fmt->significand - 10)));
+}
+
+static inline unsigned short
+__gnu_f2h_internal (unsigned int a, int ieee)
+{
+  return __gnu_float2h_internal (&binary32, (unsigned long long) a, ieee);
 }
 
 unsigned int