[ARM] Make 128 bits the default vector size for NEON

Message ID AANLkTinazE6UQu_WE_iwCFscjasV-CHNqNHZ5bO2zhdT@mail.gmail.com
State New
Headers show

Commit Message

Ira Rosen March 31, 2011, 11:39 a.m.
Hi,

This patch changes NEON's default vector size from 64 to 128 bits.

The patch doesn't touch mvectorize-with-neon-quad, but removes the
uses of TARGET_NEON_VECTORIZE_QUAD.
Following Julian's suggestion I added a param preferred-vector-size
for testing and debugging purposes.

I tested a slightly different version of the patch on
arm-linux-gnueabi with --with-arch=armv7-a --with-float=softfp
--with-fpu=neon. I am now retesting the final version. Also
bootstrapped and tested testsuite changes on powerpc64-suse-linux.

OK for trunk once the testing completes?

Thanks,
Ira

ChangeLog:

      * doc/invoke.texi (preferred-vector-size): Document.
      * params.h (PREFERRED_VECTOR_SIZE): Define.
      * config/arm/arm.c (arm_preferred_simd_mode): Use param
      PREFERRED_VECTOR_SIZE instead of
      TARGET_NEON_VECTORIZE_QUAD. Make 128 bits the default.
      (arm_autovectorize_vector_sizes): Likewise.
      * params.def (PARAM_PREFERRED_VECTOR_SIZE): Define.

testsuite/ChangeLog:

      * lib/target-supports.exp (check_effective_target_vect_multiple_sizes):
      New procedure.
      (add_options_for_quad_vectors): Replace with ...
      (add_options_for_double_vectors): ... this.
      * gfortran.dg/vect/pr19049.f90: Expect more printings on targets that
      support multiple vector sizes since the vectorizer attempts to
      vectorize with both vector sizes.
      * gcc.dg/vect/slp-reduc-6.c, gcc.dg/vect/no-vfa-vect-79.c,
      gcc.dg/vect/no-vfa-vect-102a.c, gcc.dg/vect/vect-outer-1a.c,
      gcc.dg/vect/vect-outer-1b.c, gcc.dg/vect/vect-outer-2b.c,
      gcc.dg/vect/vect-outer-3a.c, gcc.dg/vect/no-vfa-vect-37.c,
      gcc.dg/vect/vect-outer-3b.c, gcc.dg/vect/no-vfa-vect-101.c,
      gcc.dg/vect/no-vfa-vect-102.c, gcc.dg/vect/vect-reduc-dot-s8b.c,
      gcc.dg/vect/vect-outer-1.c, gcc.dg/vect/vect-104.c: Likewise.
      * gcc.dg/vect/vect-16.c: Rename to...
      * gcc.dg/vect/no-fast-math-vect-16.c: ... this to ensure that it runs
      without -ffast-math.
      * gcc.dg/vect/vect-42.c: Run with 64 bit vectors if applicable.
      * gcc.dg/vect/vect-multitypes-6.c, gcc.dg/vect/vect-52.c,
      gcc.dg/vect/vect-54.c, gcc.dg/vect/vect-46.c, gcc.dg/vect/vect-48.c,
      gcc.dg/vect/vect-96.c, gcc.dg/vect/vect-multitypes-3.c,
      gcc.dg/vect/vect-40.c: Likewise.
      * gcc.dg/vect/vect-outer-5.c: Remove quad-vectors option as
      redundant.
      * gcc.dg/vect/vect-109.c, gcc.dg/vect/vect-peel-1.c,
      gcc.dg/vect/vect-peel-2.c, gcc.dg/vect/slp-25.c,
      gcc.dg/vect/vect-multitypes-1.c, gcc.dg/vect/slp-3.c,
      gcc.dg/vect/no-vfa-pr29145.c, gcc.dg/vect/vect-multitypes-4.c:
      Likewise.
      * gcc.dg/vect/vect.exp: Run no-fast-math-vect*.c tests with
      -fno-fast-math.
Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp	(revision 171723)
+++ testsuite/lib/target-supports.exp	(working copy)
@@ -3203,6 +3203,24 @@ proc check_effective_target_vect_strided_wide { }
     return $et_vect_strided_wide_saved
 }
 
+# Return 1 if the target supports multiple vector sizes
+
+proc check_effective_target_vect_multiple_sizes { } {
+    global et_vect_multiple_sizes
+
+    if [info exists et_vect_multiple_sizes_saved] {
+        verbose "check_effective_target_vect_multiple_sizes: using cached result" 2
+    } else {
+        set et_vect_multiple_sizes_saved 0
+        if { ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+           set et_vect_multiple_sizes_saved 1
+        }
+    }
+
+    verbose "check_effective_target_vect_multiple_sizes: returning $et_vect_multiple_sizes_saved" 2
+    return $et_vect_multiple_sizes_saved
+}
+
 # Return 1 if the target supports section-anchors
 
 proc check_effective_target_section_anchors { } {
@@ -3585,9 +3603,9 @@ proc add_options_for_bind_pic_locally { flags } {
 
 # Add to FLAGS the flags needed to enable 128-bit vectors.
 
-proc add_options_for_quad_vectors { flags } {
+proc add_options_for_double_vectors { flags } {
     if [is-effective-target arm_neon_ok] {
-	return "$flags -mvectorize-with-neon-quad"
+	return "$flags --param preferred-vector-size=64"
     }
 
     return $flags
Index: testsuite/gfortran.dg/vect/pr19049.f90
===================================================================
--- testsuite/gfortran.dg/vect/pr19049.f90	(revision 171723)
+++ testsuite/gfortran.dg/vect/pr19049.f90	(working copy)
@@ -19,6 +19,7 @@ subroutine s111 (ntimes,ld,n,ctime,dtime,a,b,c,d,e
       end
 
 ! { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } }
-! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" } }
+! { dg-final { scan-tree-dump-times "complicated access pattern" 1 "vect" { xfail vect_multiple_sizes } } }
+! { dg-final { scan-tree-dump-times "complicated access pattern" 2 "vect" { target vect_multiple_sizes } } }
 ! { dg-final { cleanup-tree-dump "vect" } }
 
Index: testsuite/gcc.dg/vect/vect-16.c
===================================================================
--- testsuite/gcc.dg/vect/vect-16.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-16.c	(working copy)
@@ -1,38 +0,0 @@
-/* { dg-require-effective-target vect_float } */
-
-#include <stdarg.h>
-#include "tree-vect.h"
-
-#define N 16
-#define DIFF 240
-
-__attribute__ ((noinline))
-int main1 ()
-{
-  int i;
-  float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
-  float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
-  float diff;
-
-  diff = 0;
-  for (i = 0; i < N; i++) {
-    diff += (b[i] - c[i]);
-  }
-
-  /* check results:  */
-  if (diff != DIFF)
-    abort ();
-
-  return 0;
-}
-
-int main (void)
-{ 
-  check_vect ();
-  
-  return main1 ();
-}
-
-/* Requires fast-math.  */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-42.c
===================================================================
--- testsuite/gcc.dg/vect/vect-42.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-42.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/slp-reduc-6.c
===================================================================
--- testsuite/gcc.dg/vect/slp-reduc-6.c	(revision 171723)
+++ testsuite/gcc.dg/vect/slp-reduc-6.c	(working copy)
@@ -44,6 +44,7 @@ int main (void)
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail { vect_no_int_add || { ! vect_unpack } } } } } */
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { target { ! vect_no_int_add } } } } */
+/* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 1 "vect" { xfail { vect_multiple_sizes || vect_no_int_add } } } } */
+/* { dg-final { scan-tree-dump-times "different interleaving chains in one node" 2 "vect" { target { {! vect_no_int_add} && vect_multiple_sizes } } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
Index: testsuite/gcc.dg/vect/no-vfa-vect-79.c
===================================================================
--- testsuite/gcc.dg/vect/no-vfa-vect-79.c	(revision 171723)
+++ testsuite/gcc.dg/vect/no-vfa-vect-79.c	(working copy)
@@ -46,5 +46,6 @@ int main (void)
   If/when the aliasing problems are resolved, unalignment may
   prevent vectorization on some targets.  */
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/no-vfa-vect-102a.c
===================================================================
--- testsuite/gcc.dg/vect/no-vfa-vect-102a.c	(revision 171723)
+++ testsuite/gcc.dg/vect/no-vfa-vect-102a.c	(working copy)
@@ -53,6 +53,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
Index: testsuite/gcc.dg/vect/vect-outer-5.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-5.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-outer-5.c	(working copy)
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_float } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include <signal.h>
Index: testsuite/gcc.dg/vect/vect-multitypes-6.c
===================================================================
--- testsuite/gcc.dg/vect/vect-multitypes-6.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-multitypes-6.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-52.c
===================================================================
--- testsuite/gcc.dg/vect/vect-52.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-52.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-109.c
===================================================================
--- testsuite/gcc.dg/vect/vect-109.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-109.c	(working copy)
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-54.c
===================================================================
--- testsuite/gcc.dg/vect/vect-54.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-54.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-46.c
===================================================================
--- testsuite/gcc.dg/vect/vect-46.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-46.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-peel-1.c
===================================================================
--- testsuite/gcc.dg/vect/vect-peel-1.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-peel-1.c	(working copy)
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-outer-1a.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-1a.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-outer-1a.c	(working copy)
@@ -20,5 +20,6 @@ foo (){
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-peel-2.c
===================================================================
--- testsuite/gcc.dg/vect/vect-peel-2.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-peel-2.c	(working copy)
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-48.c
===================================================================
--- testsuite/gcc.dg/vect/vect-48.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-48.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-outer-1b.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-1b.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-outer-1b.c	(working copy)
@@ -22,5 +22,6 @@ foo (){
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-outer-2b.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-2b.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-outer-2b.c	(working copy)
@@ -37,5 +37,6 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "strided access in outer loop." 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-outer-3a.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-3a.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-outer-3a.c	(working copy)
@@ -49,5 +49,6 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail vect_no_align } } } */
-/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "step doesn't divide the vector-size" 4 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/no-vfa-vect-37.c
===================================================================
--- testsuite/gcc.dg/vect/no-vfa-vect-37.c	(revision 171723)
+++ testsuite/gcc.dg/vect/no-vfa-vect-37.c	(working copy)
@@ -58,5 +58,6 @@ int main (void)
    If/when the aliasing problems are resolved, unalignment may
    prevent vectorization on some targets.  */
 /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence between" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 4 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/slp-25.c
===================================================================
--- testsuite/gcc.dg/vect/slp-25.c	(revision 171723)
+++ testsuite/gcc.dg/vect/slp-25.c	(working copy)
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-outer-3b.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-3b.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-outer-3b.c	(working copy)
@@ -49,5 +49,6 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 4 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/no-fast-math-vect-16.c
===================================================================
--- testsuite/gcc.dg/vect/no-fast-math-vect-16.c	(revision 0)
+++ testsuite/gcc.dg/vect/no-fast-math-vect-16.c	(revision 0)
@@ -0,0 +1,38 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 16
+#define DIFF 240
+
+__attribute__ ((noinline))
+int main1 ()
+{
+  int i;
+  float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
+  float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+  float diff;
+
+  diff = 0;
+  for (i = 0; i < N; i++) {
+    diff += (b[i] - c[i]);
+  }
+
+  /* check results:  */
+  if (diff != DIFF)
+    abort ();
+
+  return 0;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  return main1 ();
+}
+
+/* Requires fast-math.  */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/no-vfa-vect-101.c
===================================================================
--- testsuite/gcc.dg/vect/no-vfa-vect-101.c	(revision 171723)
+++ testsuite/gcc.dg/vect/no-vfa-vect-101.c	(working copy)
@@ -45,6 +45,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "can't determine dependence" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
Index: testsuite/gcc.dg/vect/no-vfa-vect-102.c
===================================================================
--- testsuite/gcc.dg/vect/no-vfa-vect-102.c	(revision 171723)
+++ testsuite/gcc.dg/vect/no-vfa-vect-102.c	(working copy)
@@ -53,6 +53,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
Index: testsuite/gcc.dg/vect/vect-96.c
===================================================================
--- testsuite/gcc.dg/vect/vect-96.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-96.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-multitypes-1.c
===================================================================
--- testsuite/gcc.dg/vect/vect-multitypes-1.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-multitypes-1.c	(working copy)
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
===================================================================
--- testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c	(working copy)
@@ -58,7 +58,8 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_multiple_sizes } } } */
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
 
Index: testsuite/gcc.dg/vect/vect-outer-1.c
===================================================================
--- testsuite/gcc.dg/vect/vect-outer-1.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-outer-1.c	(working copy)
@@ -22,5 +22,6 @@ foo (){
 }
 
 /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "strided access in outer loop" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/slp-3.c
===================================================================
--- testsuite/gcc.dg/vect/slp-3.c	(revision 171723)
+++ testsuite/gcc.dg/vect/slp-3.c	(working copy)
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
@@ -79,7 +78,7 @@ main1 ()
     }
 
   /* check results:  */
-  for (i = 0; i < N/2; i++)
+  for (i = 0; i < N/4; i++)
     {
       if (out[i*16] !=  in[i*16]
          || out[i*16 + 1] != in[i*16 + 1]
Index: testsuite/gcc.dg/vect/no-vfa-pr29145.c
===================================================================
--- testsuite/gcc.dg/vect/no-vfa-pr29145.c	(revision 171723)
+++ testsuite/gcc.dg/vect/no-vfa-pr29145.c	(working copy)
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect.exp
===================================================================
--- testsuite/gcc.dg/vect/vect.exp	(revision 171723)
+++ testsuite/gcc.dg/vect/vect.exp	(working copy)
@@ -102,6 +102,12 @@ lappend DEFAULT_VECTCFLAGS "-ffast-math"
 dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-*.\[cS\]]]  \
 	"" $DEFAULT_VECTCFLAGS
 
+# -fno-fast-math tests
+set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+lappend DEFAULT_VECTCFLAGS "-fno-fast-math"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/no-fast-math-*.\[cS\]]]  \
+        "" $DEFAULT_VECTCFLAGS
+
 # -fno-math-errno tests
 set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
 lappend DEFAULT_VECTCFLAGS "-fno-math-errno"
Index: testsuite/gcc.dg/vect/vect-multitypes-3.c
===================================================================
--- testsuite/gcc.dg/vect/vect-multitypes-3.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-multitypes-3.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-40.c
===================================================================
--- testsuite/gcc.dg/vect/vect-40.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-40.c	(working copy)
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_float } */
+/* { dg-add-options double_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"
Index: testsuite/gcc.dg/vect/vect-104.c
===================================================================
--- testsuite/gcc.dg/vect/vect-104.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-104.c	(working copy)
@@ -64,6 +64,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
-/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 1 "vect" { xfail vect_multiple_sizes } } } */
+/* { dg-final { scan-tree-dump-times "possible dependence between data-refs" 2 "vect" { target vect_multiple_sizes } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
Index: testsuite/gcc.dg/vect/vect-multitypes-4.c
===================================================================
--- testsuite/gcc.dg/vect/vect-multitypes-4.c	(revision 171723)
+++ testsuite/gcc.dg/vect/vect-multitypes-4.c	(working copy)
@@ -1,5 +1,4 @@
 /* { dg-require-effective-target vect_int } */
-/* { dg-add-options quad_vectors } */
 
 #include <stdarg.h>
 #include "tree-vect.h"

Comments

Ira Rosen March 31, 2011, 1:08 p.m. | #1
On 31 March 2011 14:28, Joseph S. Myers <joseph@codesourcery.com> wrote:
> On Thu, 31 Mar 2011, Ira Rosen wrote:
>
>> +Illegal values are ignored.  The default is 128.
>
> See the GNU Coding Standards
> <http://www.gnu.org/prep/standards/html_node/GNU-Manuals.html>:
>
>   Please do not use the term "illegal" to refer to erroneous input to a
>   computer program. Please use "invalid" for this, and reserve the term
>   "illegal" for activities prohibited by law.

I'll fix this.

Thanks,
Ira

>
> --
> Joseph S. Myers
> joseph@codesourcery.com
>
Nathan Froyd March 31, 2011, 1:11 p.m. | #2
On Thu, Mar 31, 2011 at 01:39:05PM +0200, Ira Rosen wrote:
> This patch changes NEON's default vector size from 64 to 128 bits.

No comments about the patch itself, but this change should be noted in
changes.html.

-Nathan
Ira Rosen March 31, 2011, 1:20 p.m. | #3
On 31 March 2011 15:11, Nathan Froyd <froydnj@codesourcery.com> wrote:
> On Thu, Mar 31, 2011 at 01:39:05PM +0200, Ira Rosen wrote:
>> This patch changes NEON's default vector size from 64 to 128 bits.
>
> No comments about the patch itself, but this change should be noted in
> changes.html.

I'll do that.

Thanks,
Ira

>
> -Nathan
>
Ramana Radhakrishnan April 5, 2011, 1:30 p.m. | #4
On 31/03/11 12:39, Ira Rosen wrote:
> Hi,
>
> This patch changes NEON's default vector size from 64 to 128 bits.
>
> The patch doesn't touch mvectorize-with-neon-quad, but removes the
> uses of TARGET_NEON_VECTORIZE_QUAD.
> Following Julian's suggestion I added a param preferred-vector-size
> for testing and debugging purposes.

IIUC, this patch makes mvectorize-with-neon-quad effectively a no-op 
which is ok with me.

If this is now the default, what is the behaviour of 
-mno-vectorize-with-neon-quad now ? Should that set the value of the 
parameter to 64 or should we be rejecting the negative of that option ?

If not, we should then consider adding a RejectNegative to that option.


cheers
Ramana


--
Ramana Radhakrishnan
Ira Rosen April 7, 2011, 6:34 a.m. | #5
On 6 April 2011 16:07, Hans-Peter Nilsson <hans-peter.nilsson@axis.com> wrote:
>> Date: Thu, 31 Mar 2011 13:39:05 +0200
>> From: Ira Rosen <ira.rosen@linaro.org>
>
>> This patch changes NEON's default vector size from 64 to 128 bits.
>
> I'm wondering, are there NEON-specific measurements to support
> this change?
>
> A colleague of mine implemented support for 64- and 128-bit NEON
> for RAPP <http://savannah.nongnu.org/projects/rapp/>, but found
> that the 128-bit version was slower.

The best vector size depends on the particular target (a version of
ARM) and the benchmark. The vectorizer has a cost model that allows it
to fall back to 64-bit vectors if 128-bit version is not profitable
(-fvect-cost-model). We plan to enhance the cost model to estimate
both versions and vectorize with the most profitable vector size. For
now, you can use preferred-vector-size param if needed. Having 64 as a
default doesn't allow us to use different NEON instructions that use
both types of vectors.

Ira

>
> brgds, H-P
>

Patch

Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 171723)
+++ doc/invoke.texi	(working copy)
@@ -8874,6 +8874,10 @@  The maximum number of conditional stores paires th
 if either vectorization (@option{-ftree-vectorize}) or if-conversion
 (@option{-ftree-loop-if-convert}) is disabled.  The default is 2.
 
+@item preferred-vector-size
+Preferred vector size in bits for targets that support multiple vector sizes.
+Illegal values are ignored.  The default is 128.
+
 @end table
 @end table
 
Index: params.h
===================================================================
--- params.h	(revision 171723)
+++ params.h	(working copy)
@@ -204,6 +204,8 @@  extern void init_param_values (int *params);
   PARAM_VALUE (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO)
 #define MIN_NONDEBUG_INSN_UID \
   PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID)
+#define PREFERRED_VECTOR_SIZE \
+  PARAM_VALUE (PARAM_PREFERRED_VECTOR_SIZE)
 #define MAX_STORES_TO_SINK \
   PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
 #endif /* ! GCC_PARAMS_H */
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 171723)
+++ config/arm/arm.c	(working copy)
@@ -22297,17 +22297,16 @@  arm_preferred_simd_mode (enum machine_mode mode)
     switch (mode)
       {
       case SFmode:
-	return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
+        return (PREFERRED_VECTOR_SIZE == 64) ? V2SFmode : V4SFmode;
       case SImode:
-	return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
+        return (PREFERRED_VECTOR_SIZE == 64) ? V2SImode : V4SImode;
       case HImode:
-	return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
+        return (PREFERRED_VECTOR_SIZE == 64) ? V4HImode : V8HImode;
       case QImode:
-	return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
+        return (PREFERRED_VECTOR_SIZE == 64)? V8QImode : V16QImode;
       case DImode:
-	if (TARGET_NEON_VECTORIZE_QUAD)
-	  return V2DImode;
-	break;
+        if (PREFERRED_VECTOR_SIZE != 64)
+          return V2DImode;
 
       default:;
       }
@@ -23535,7 +23534,7 @@  arm_expand_sync (enum machine_mode mode,
 static unsigned int
 arm_autovectorize_vector_sizes (void)
 {
-  return TARGET_NEON_VECTORIZE_QUAD ? 16 | 8 : 0;
+  return (PREFERRED_VECTOR_SIZE != 64) ? 16 | 8 : 0;
 }
 
 static bool
Index: params.def
===================================================================
--- params.def	(revision 171723)
+++ params.def	(working copy)
@@ -880,6 +880,12 @@  DEFPARAM (PARAM_MAX_STORES_TO_SINK,
           2, 0, 0)
 
 
+/* Preferred vector size in bits.  Illegal values are ignored.  */
+DEFPARAM (PARAM_PREFERRED_VECTOR_SIZE,
+          "preferred-vector-size",
+          "Preferred vector size in bits",
+          128, 0, 0)
+
 /*
 Local variables:
 mode:c