diff mbox

[RFC] Modify excess precision logic to permit FLT_EVAL_METHOD=16

Message ID 1473153792-30946-1-git-send-email-james.greenhalgh@arm.com
State New
Headers show

Commit Message

James Greenhalgh Sept. 6, 2016, 9:23 a.m. UTC
Hi,

ISO/IEC TS 18661-3 redefines FLT_EVAL_METHOD. In this patch I'm interested
in updating the code in GCC to handle two of the changes. The redefinition
of the meanings of {0, 1, 2}, and the meaning of "N" for the specific case
of N=16.

In ISO/IEC TS 18661-3, these are defined as so:

  0

    Evaluate all operations and constants, whose semantic type has at most
    the range and precision of float, to the range and precision of
    float; evaluate all other operations and constants to the range and
    precision of the semantic type.

  1

    Evaluate all operations and constants, whose semantic type has at most
    the range and precision of double, to the range and precision of
    double; evaluate all other operations and constants to the range and
    precision of the semantic type.

  2

    Evaluate all operations and constants, whose semantic type has at most
    the range and precision of long double, to the range and precision of
    long double; evaluate all other operations and constants to the range
    and precision of the semantic type.

  N, where _FloatN is a supported interchange floating type

    Evaluate operations and constants whose semantic type has at most
    the range and precision of the _FloatN type, to the range and precision
    of the _FloatN type; evaluate all other operations and constants to the
    range and precision of the semantic type;

When we enable support for _Float16 in AArch64 we would like, where
we have the ARMv8.2-A 16-bit floating point extensions available, to
evaluate operations to the range and precision of that type. Where we
do not have the extensions available, we'd like to evaluate operations
to the range and precision of float.

This will require adding support for FLT_EVAL_METHOD=16. In most cases
this is simply duplicating the exceptions that exist for FLT_EVAL_METHOD==0
and teaching some of the excess-precision logic that it may need to
handle _Float16 types if FLT_EVAL_METHOD=0.

In this patch, I first add a new enum for the 5 float methods, just to save
the repeated magic numbers now we are up to 5 of them. I've added this to
flag-types.h, but there may be a better home for it.

In c-family/c-cppbuiltin.c I've updated cpp_iec_559_value such that also
allow setting __GEC_IEC_559 if FLT_EVAL_METHOD=16, and I've updated
c_cpp_builtins to handle the new value, and use the new enum names.

Then I've updated init_excess_precision in toplev.c with the new enum
names, and with logic to understand that for targets that provide _Float16,
we can't set flag_excess_precision to "fast" when FLT_EVAL_METHOD=0. This is
because FLT_EVAL_METHOD=0 requires predictably using the precision of float
for _Float16 types.

In tree.c, I've modified excess_precision_type with the logic discussed
above, promoting _Float16 to the appropriate type if we are required to.

I've also added a special case that allows promoting to "float" from an
_Float16 type in the case that -fexcess-precision=fast. If we don't do
this, then the "fast" case will spend more time promoting and demoting
between HFmode and SFmode and the consequence will be slower code.

Bootstrapped on AArch64 and x86_64.

OK?

Thanks,
James

---
gcc/

2016-09-06  James Greenhalgh  <james.greenhalgh@arm.com>

	* flag-types.h (flt_eval_method): New.
	* toplev.c (init_excess_precision): Handle
	FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16.
	* tree.c (excess_precision_type): Likewise.

gcc/c-family/

2016-09-06  James Greenhalgh  <james.greenhalgh@arm.com>

	* c-cppbuiltin.c (cpp_iec_559_value): Support
	FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16.
	(c_cpp_builtins): Likewise.
diff mbox

Patch

diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index ee4d233..f278aff 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -735,10 +735,13 @@  cpp_iec_559_value (void)
      excess precision to mean lack of IEEE 754 support.  The same
      applies to unpredictable contraction.  For C++, and outside
      strict conformance mode, do not consider these options to mean
-     lack of IEEE 754 support.  */
+     lack of IEEE 754 support.  FLT_EVAL_METHOD_PROMOTE_TO_FLOAT and
+     FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 both give predictable excess
+     precision.  */
   if (flag_iso
       && !c_dialect_cxx ()
-      && TARGET_FLT_EVAL_METHOD != 0
+      && TARGET_FLT_EVAL_METHOD != FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
+      && TARGET_FLT_EVAL_METHOD != FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
       && flag_excess_precision_cmdline != EXCESS_PRECISION_STANDARD)
     ret = 0;
   if (flag_iso
@@ -1118,24 +1121,28 @@  c_cpp_builtins (cpp_reader *pfile)
 	    }
 	  builtin_define_with_value (macro_name, suffix, 0);
 	  bool excess_precision = false;
-	  if (TARGET_FLT_EVAL_METHOD != 0
-	      && mode != TYPE_MODE (long_double_type_node)
-	      && (mode == TYPE_MODE (float_type_node)
-		  || mode == TYPE_MODE (double_type_node)))
-	    switch (TARGET_FLT_EVAL_METHOD)
-	      {
-	      case -1:
-	      case 2:
-		excess_precision = true;
-		break;
-
-	      case 1:
-		excess_precision = mode == TYPE_MODE (float_type_node);
-		break;
-
-	      default:
-		gcc_unreachable ();
-	      }
+	  machine_mode float16_type_mode = (FLOATN_TYPE_NODE (0)
+					    ? TYPE_MODE (FLOATN_TYPE_NODE (0))
+					    : VOIDmode);
+	  switch (TARGET_FLT_EVAL_METHOD)
+	    {
+	    case FLT_EVAL_METHOD_UNPREDICTABLE:
+	    case FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE:
+	      excess_precision = (mode == float16_type_mode
+				  || mode == TYPE_MODE (float_type_node)
+				  || mode == TYPE_MODE (double_type_node));
+	      break;
+
+	    case FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE:
+	      excess_precision = (mode == float16_type_mode
+				  ||mode == TYPE_MODE (float_type_node));
+	      break;
+	    case FLT_EVAL_METHOD_PROMOTE_TO_FLOAT:
+	      excess_precision = mode == float16_type_mode;
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
 	  macro_name = (char *) alloca (strlen (name)
 					+ sizeof ("__LIBGCC__EXCESS_"
 						  "PRECISION__"));
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index dd57e16..adfe074 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -158,6 +158,16 @@  enum excess_precision
   EXCESS_PRECISION_STANDARD
 };
 
+/* The possible values for FLT_EVAL_METHOD.  */
+enum flt_eval_method
+{
+  FLT_EVAL_METHOD_UNPREDICTABLE = -1,
+  FLT_EVAL_METHOD_PROMOTE_TO_FLOAT = 0,
+  FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE = 1,
+  FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE = 2,
+  FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16 = 16
+};
+
 /* Type of stack check.  */
 enum stack_check_type
 {
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 66099ec..052f414 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1700,15 +1700,23 @@  init_excess_precision (void)
       int flt_eval_method = TARGET_FLT_EVAL_METHOD;
       switch (flt_eval_method)
 	{
-	case -1:
-	case 0:
-	  /* Either the target acts unpredictably (-1) or has all the
-	     operations required not to have excess precision (0).  */
+	case FLT_EVAL_METHOD_UNPREDICTABLE:
+	case FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16:
+	  /* Either the target acts unpredictably (-1) or has the required
+	     operations for any type we support.  */
 	  flag_excess_precision = EXCESS_PRECISION_FAST;
 	  break;
-	case 1:
-	case 2:
-	  /* In these cases, predictable excess precision makes
+	case FLT_EVAL_METHOD_PROMOTE_TO_FLOAT:
+	  /* Most targets will be in this case (FLT_EVAL_METHOD = 0).
+	     If the target supports _Float16, we need to predictably
+	     calculate it in the precision of float, otherwise, we can
+	     use EXCESS_PRECISION_FAST.  */
+	  if (targetm.floatn_mode (16, false) == VOIDmode)
+	    flag_excess_precision = EXCESS_PRECISION_FAST;
+	  break;
+	case FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE:
+	case FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE:
+	  /* In these cases, predictable excess precision always makes
 	     sense.  */
 	  break;
 	default:
diff --git a/gcc/tree.c b/gcc/tree.c
index 33e6f97..cbab851 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -8836,50 +8836,93 @@  build_complex_type (tree component_type)
 tree
 excess_precision_type (tree type)
 {
+  int flt_eval_method = TARGET_FLT_EVAL_METHOD;
+  machine_mode float16_type_mode = (float16_type_node
+				    ? TYPE_MODE (float16_type_node)
+				    : VOIDmode);
+  machine_mode float_type_mode = TYPE_MODE (float_type_node);
+  machine_mode double_type_mode = TYPE_MODE (float_type_node);
   if (flag_excess_precision != EXCESS_PRECISION_FAST)
     {
-      int flt_eval_method = TARGET_FLT_EVAL_METHOD;
       switch (TREE_CODE (type))
 	{
 	case REAL_TYPE:
-	  switch (flt_eval_method)
 	    {
-	    case 1:
-	      if (TYPE_MODE (type) == TYPE_MODE (float_type_node))
-		return double_type_node;
-	      break;
-	    case 2:
-	      if (TYPE_MODE (type) == TYPE_MODE (float_type_node)
-		  || TYPE_MODE (type) == TYPE_MODE (double_type_node))
-		return long_double_type_node;
+	      machine_mode type_mode = TYPE_MODE (type);
+	      switch (flt_eval_method)
+		{
+		case FLT_EVAL_METHOD_PROMOTE_TO_FLOAT:
+		  if (type_mode == float16_type_mode)
+		    return float_type_node;
+		  break;
+		case FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE:
+		  if (type_mode == float16_type_mode
+		      || type_mode == float_type_mode)
+		    return double_type_node;
+		  break;
+		case FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE:
+		  if (type_mode == float16_type_mode
+		      || type_mode == float_type_mode
+		      || type_mode == double_type_mode)
+		    return long_double_type_node;
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
 	      break;
-	    default:
-	      gcc_unreachable ();
 	    }
-	  break;
 	case COMPLEX_TYPE:
-	  if (TREE_CODE (TREE_TYPE (type)) != REAL_TYPE)
-	    return NULL_TREE;
-	  switch (flt_eval_method)
 	    {
-	    case 1:
-	      if (TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (float_type_node))
-		return complex_double_type_node;
-	      break;
-	    case 2:
-	      if (TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (float_type_node)
-		  || (TYPE_MODE (TREE_TYPE (type))
-		      == TYPE_MODE (double_type_node)))
-		return complex_long_double_type_node;
+	      if (TREE_CODE (TREE_TYPE (type)) != REAL_TYPE)
+		return NULL_TREE;
+	      machine_mode type_mode = TYPE_MODE (TREE_TYPE (type));
+	      switch (flt_eval_method)
+		{
+		case FLT_EVAL_METHOD_PROMOTE_TO_FLOAT:
+		  if (type_mode == float16_type_mode)
+		    return complex_float_type_node;
+		  break;
+		case FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE:
+		  if (type_mode == float16_type_mode
+		      || type_mode == float_type_mode)
+		    return complex_double_type_node;
+		  break;
+		case FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE:
+		  if (type_mode == float16_type_mode
+		      || type_mode == float_type_mode
+		      || type_mode == double_type_mode)
+		    return complex_long_double_type_node;
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
 	      break;
-	    default:
-	      gcc_unreachable ();
 	    }
-	  break;
 	default:
 	  break;
 	}
     }
+  else
+    {
+      /* A special case for EXCESS_PRECISION_FAST if we need to calculate in
+	 at-least-float precision.  If this is a _Float16
+	 type, and we would calculate it in float precision
+	 (FLT_EVAL_METHOD == FLT_EVAL_METHOD_AT_LEAST_FLOAT), then the target
+	 probably doesn't have HFmode operations.  We are therefore
+	 likely to generate better code by promoting once here.  If we
+	 don't, we'll promote before arithmetic operations so we can use
+	 their SFmode counterparts.  */
+      if (flt_eval_method == FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
+	  && float16_type_node)
+	{
+	  if (TREE_CODE (type) == REAL_TYPE
+	      && TYPE_MODE (type) == float16_type_mode)
+	    return float_type_node;
+	  if (TREE_CODE (type) == COMPLEX_TYPE
+	      && TYPE_MODE (TREE_TYPE (type)) == float16_type_mode)
+	    return complex_float_type_node;
+	}
+    }
   return NULL_TREE;
 }