diff mbox

[1/3,ARM] PR63870 Add qualifiers for NEON builtins

Message ID 1444175989-24944-2-git-send-email-charles.baylis@linaro.org
State Superseded
Headers show

Commit Message

Charles Baylis Oct. 6, 2015, 11:59 p.m. UTC
From: Charles Baylis <charles.baylis@linaro.org>

gcc/ChangeLog:

<DATE>  Charles Baylis  <charles.baylis@linaro.org>

	PR target/63870
	* config/arm/arm-builtins.c (enum arm_type_qualifiers): New enumerator
	qualifier_struct_load_store_lane_index.
	(builtin_arg): New enumerator NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX.
	(arm_expand_neon_args): New parameter. Remove ellipsis. Handle NEON
	argument qualifiers.
	(arm_expand_neon_builtin): Handle new NEON argument qualifier.
	* config/arm/arm.h (ENDIAN_LANE_N): New macro.

Change-Id: Iaa14d8736879fa53776319977eda2089f0a26647
---
 gcc/config/arm/arm-builtins.c | 46 ++++++++++++++++++++++++++++---------------
 gcc/config/arm/arm.c          |  1 +
 gcc/config/arm/arm.h          |  3 +++
 3 files changed, 34 insertions(+), 16 deletions(-)

Comments

Alan Lawrence Oct. 12, 2015, 10:58 a.m. UTC | #1
On 07/10/15 00:59, charles.baylis@linaro.org wrote:
> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
...
>               case NEON_ARG_MEMORY:
>   	      /* Check if expand failed.  */
>   	      if (op[argc] == const0_rtx)
>   	      {
> -		va_end (ap);
>   		return 0;
>   	      }

...and drop the braces?

> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 02f5dc3..448cde3 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -30117,4 +30117,5 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
>     *pri = tmp;
>     return;
>   }
> +
>   #include "gt-arm.h"

This looks unrelated (and is the only change to arm.c) - perhaps commit 
separately? (Note I am not a maintainer! But this looks "obvious"...)

> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> index 87c9f90..27ac4dc 100644
> --- a/gcc/config/arm/arm.h
> +++ b/gcc/config/arm/arm.h
> @@ -288,6 +288,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
>   #define TARGET_BPABI false
>   #endif
>
> +#define ENDIAN_LANE_N(mode, n)  \
> +  (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n)
> +

Given we are making changes here to how this all works on bigendian, have you 
tested armeb at all?

Generally I would say this all looks sensible :)

Cheers, Alan
Charles Baylis Oct. 14, 2015, 10:02 p.m. UTC | #2
On 12 October 2015 at 11:58, Alan Lawrence <alan.lawrence@arm.com> wrote:
> On 07/10/15 00:59, charles.baylis@linaro.org wrote:
>>
>> diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
>
> ...
>>
>>               case NEON_ARG_MEMORY:
>>               /* Check if expand failed.  */
>>               if (op[argc] == const0_rtx)
>>               {
>> -               va_end (ap);
>>                 return 0;
>>               }
>
>
> ...and drop the braces?

Will do.

>> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>> index 02f5dc3..448cde3 100644
>> --- a/gcc/config/arm/arm.c
>> +++ b/gcc/config/arm/arm.c
>> @@ -30117,4 +30117,5 @@ arm_sched_fusion_priority (rtx_insn *insn, int
>> max_pri,
>>     *pri = tmp;
>>     return;
>>   }
>> +
>>   #include "gt-arm.h"
>
>
> This looks unrelated (and is the only change to arm.c) - perhaps commit
> separately? (Note I am not a maintainer! But this looks "obvious"...)

It doesn't seem very useful. I'll drop it.

>> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
>> index 87c9f90..27ac4dc 100644
>> --- a/gcc/config/arm/arm.h
>> +++ b/gcc/config/arm/arm.h
>> @@ -288,6 +288,9 @@ extern void
>> (*arm_lang_output_object_attributes_hook)(void);
>>   #define TARGET_BPABI false
>>   #endif
>>
>> +#define ENDIAN_LANE_N(mode, n)  \
>> +  (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n)
>> +
>
>
> Given we are making changes here to how this all works on bigendian, have
> you tested armeb at all?

I tested on big endian, and it passes, except for a testsuite issue
with the *_f16 tests, which fail because they are built without the
fp16 options on big endian. This is because
check_effective_target_arm_neon_fp16_ok_nocache gets an ICE when it
attempts to compile the test program. I think those fp16 intrinsics
are in your area, do you want to take a look? :)

Thanks for the review

Charles
Alan Lawrence Oct. 19, 2015, 4:56 p.m. UTC | #3
On 14/10/15 23:02, Charles Baylis wrote:
> On 12 October 2015 at 11:58, Alan Lawrence <alan.lawrence@arm.com> wrote:
 >
>> Given we are making changes here to how this all works on bigendian, have
>> you tested armeb at all?
>
> I tested on big endian, and it passes, except....

Well, I asked because it seemed good to make sure that the changes/improvements 
to how lane-swapping was done, wasn't breaking anything on armeb by the back 
door, and so thank you, I'm happy with that as far as your patch is concerned ;).

> for a testsuite issue
> with the *_f16 tests, which fail because they are built without the
> fp16 options on big endian. This is because
> check_effective_target_arm_neon_fp16_ok_nocache gets an ICE when it
> attempts to compile the test program. I think those fp16 intrinsics
> are in your area, do you want to take a look? :)

Heh, yes, I see ;). So I've dug into this a bit, and the problem seems to be 
that we don't define a movv4hf pattern, and hence, we fall back to 
emit_multi_word_move. This uses subregs, and in simplify_subreg_regno, 
REG_CANNOT_CHANGE_MODE_P is true on bigendian (but false on little-endian).

That is, I *think* the right thing to do is just to add a movv4hf (and v8hf) 
pattern, I'm testing this now....

Cheers, Alan
diff mbox

Patch

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 0f5a1f1..a29f8d6 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -77,7 +77,9 @@  enum arm_type_qualifiers
   /* Polynomial types.  */
   qualifier_poly = 0x100,
   /* Lane indices - must be within range of previous argument = a vector.  */
-  qualifier_lane_index = 0x200
+  qualifier_lane_index = 0x200,
+  /* Lane indices for single lane structure loads and stores.  */
+  qualifier_struct_load_store_lane_index = 0x400
 };
 
 /*  The qualifier_internal allows generation of a unary builtin from
@@ -1973,6 +1975,7 @@  typedef enum {
   NEON_ARG_COPY_TO_REG,
   NEON_ARG_CONSTANT,
   NEON_ARG_LANE_INDEX,
+  NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
   NEON_ARG_MEMORY,
   NEON_ARG_STOP
 } builtin_arg;
@@ -2030,9 +2033,9 @@  neon_dereference_pointer (tree exp, tree type, machine_mode mem_mode,
 /* Expand a Neon builtin.  */
 static rtx
 arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
-		      int icode, int have_retval, tree exp, ...)
+		      int icode, int have_retval, tree exp,
+		      builtin_arg *args)
 {
-  va_list ap;
   rtx pat;
   tree arg[SIMD_MAX_BUILTIN_ARGS];
   rtx op[SIMD_MAX_BUILTIN_ARGS];
@@ -2047,13 +2050,11 @@  arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode)))
     target = gen_reg_rtx (tmode);
 
-  va_start (ap, exp);
-
   formals = TYPE_ARG_TYPES (TREE_TYPE (arm_builtin_decls[fcode]));
 
   for (;;)
     {
-      builtin_arg thisarg = (builtin_arg) va_arg (ap, int);
+      builtin_arg thisarg = args[argc];
 
       if (thisarg == NEON_ARG_STOP)
 	break;
@@ -2089,6 +2090,18 @@  arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
 		op[argc] = copy_to_mode_reg (mode[argc], op[argc]);
 	      break;
 
+	    case NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
+	      gcc_assert (argc > 1);
+	      if (CONST_INT_P (op[argc]))
+		{
+		  neon_lane_bounds (op[argc], 0,
+				    GET_MODE_NUNITS (map_mode), exp);
+		  /* Keep to GCC-vector-extension lane indices in the RTL.  */
+		  op[argc] =
+		    GEN_INT (ENDIAN_LANE_N (map_mode, INTVAL (op[argc])));
+		}
+	      goto constant_arg;
+
 	    case NEON_ARG_LANE_INDEX:
 	      /* Previous argument must be a vector, which this indexes.  */
 	      gcc_assert (argc > 0);
@@ -2099,17 +2112,22 @@  arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
 		}
 	      /* Fall through - if the lane index isn't a constant then
 		 the next case will error.  */
+
 	    case NEON_ARG_CONSTANT:
+constant_arg:
 	      if (!(*insn_data[icode].operand[opno].predicate)
 		  (op[argc], mode[argc]))
-		error_at (EXPR_LOCATION (exp), "incompatible type for argument %d, "
-		       "expected %<const int%>", argc + 1);
+		{
+		  error ("%Kargument %d must be a constant immediate",
+			 exp, argc + 1);
+		  return const0_rtx;
+		}
 	      break;
+
             case NEON_ARG_MEMORY:
 	      /* Check if expand failed.  */
 	      if (op[argc] == const0_rtx)
 	      {
-		va_end (ap);
 		return 0;
 	      }
 	      gcc_assert (MEM_P (op[argc]));
@@ -2132,8 +2150,6 @@  arm_expand_neon_args (rtx target, machine_mode map_mode, int fcode,
 	}
     }
 
-  va_end (ap);
-
   if (have_retval)
     switch (argc)
       {
@@ -2245,6 +2261,8 @@  arm_expand_neon_builtin (int fcode, tree exp, rtx target)
 
       if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
 	args[k] = NEON_ARG_LANE_INDEX;
+      else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
+	args[k] = NEON_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
       else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
 	args[k] = NEON_ARG_CONSTANT;
       else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
@@ -2270,11 +2288,7 @@  arm_expand_neon_builtin (int fcode, tree exp, rtx target)
      the function is void, and a 1 if it is not.  */
   return arm_expand_neon_args
 	  (target, d->mode, fcode, icode, !is_void, exp,
-	   args[1],
-	   args[2],
-	   args[3],
-	   args[4],
-	   NEON_ARG_STOP);
+	   &args[1]);
 }
 
 /* Expand an expression EXP that calls a built-in function,
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 02f5dc3..448cde3 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -30117,4 +30117,5 @@  arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
   *pri = tmp;
   return;
 }
+
 #include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 87c9f90..27ac4dc 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -288,6 +288,9 @@  extern void (*arm_lang_output_object_attributes_hook)(void);
 #define TARGET_BPABI false
 #endif
 
+#define ENDIAN_LANE_N(mode, n)  \
+  (BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 - n : n)
+
 /* Support for a compile-time default CPU, et cetera.  The rules are:
    --with-arch is ignored if -march or -mcpu are specified.
    --with-cpu is ignored if -march or -mcpu are specified, and is overridden