Improve detection of widening multiplication in the vectorizer

Hi,

The vectorizer expects widening multiplication pattern to be:

     type a_t, b_t;
     TYPE a_T, b_T, prod_T;

     a_T = (TYPE) a_t;
     b_T = (TYPE) b_t;
     prod_T = a_T * b_T;

where type 'TYPE' is double the size of type 'type'. This works fine
when the types are signed. For the unsigned types the code looks like:

     unsigned type a_t, b_t;
     unsigned TYPE u_prod_T;
     TYPE a_T, b_T, prod_T;

      a_T = (TYPE) a_t;
      b_T = (TYPE) b_t;
      prod_T = a_T * b_T;
      u_prod_T = (unsigned TYPE) prod_T;

i.e., the multiplication is done on signed, followed by a cast to unsigned.
This patch adds a support of such patterns and generates
WIDEN_MULT_EXPR for the unsigned type.

Another unsupported case is multiplication by a constant (e.g., b_T is
a constant). This patch checks that the constant fits the smaller type
'type' and recognizes such cases as widening multiplication.

Bootstrapped and tested on powerpc64-suse-linux. Tested the
vectorization testsuite on arm-linux-gnueabi.
I'll commit the patch shortly if there are no comments/objections.

Ira

ChangeLog:

       * tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be
       a pointer.
       * tree-vect-patterns.c (vect_recog_widen_sum_pattern,
       vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern,
       vect_recog_pow_pattern): Likewise.
       (vect_pattern_recog_1): Remove declaration.
       (widened_name_p): Remove declaration.  Add new argument to specify
       whether to check that both types are either signed or unsigned.
       (vect_recog_widen_mult_pattern): Update documentation.  Handle
       unsigned patterns and multiplication by constants.
       (vect_pattern_recog_1): Update vect_recog_func references.  Use
       statement information from the statement returned from pattern
       detection functions.
       (vect_pattern_recog): Update vect_recog_func reference.
       * tree-vect-stmts.c (vectorizable_type_promotion): For widening
       multiplication by a constant use the type of the other operand.

testsuite/ChangeLog:

       * lib/target-supports.exp
(check_effective_target_vect_widen_mult_qi_to_hi):
       Add NEON as supporting target.
       (check_effective_target_vect_widen_mult_hi_to_si): Likewise.
       (check_effective_target_vect_widen_mult_qi_to_hi_pattern): New.
       (check_effective_target_vect_widen_mult_hi_to_si_pattern): New.
       * gcc.dg/vect/vect-widen-mult-u8.c: Expect to be vectorized
using widening
       multiplication on targets that support it.
       * gcc.dg/vect/vect-widen-mult-u16.c: Likewise.
       * gcc.dg/vect/vect-widen-mult-const-s16.c: New test.
       * gcc.dg/vect/vect-widen-mult-const-u16.c: New test.
Index: testsuite/lib/target-supports.exp
===================================================================

--- testsuite/lib/target-supports.exp	(revision 174475)
+++ testsuite/lib/target-supports.exp	(working copy)
@@ -2668,7 +2668,8 @@ proc check_effective_target_vect_widen_mult_qi_to_
 	} else {
 	    set et_vect_widen_mult_qi_to_hi_saved 0
 	}
-        if { [istarget powerpc*-*-*] } {
+        if { [istarget powerpc*-*-*]
+              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
             set et_vect_widen_mult_qi_to_hi_saved 1
         }
     }
@@ -2701,7 +2702,8 @@ proc check_effective_target_vect_widen_mult_hi_to_
 	      || [istarget spu-*-*]
 	      || [istarget ia64-*-*]
 	      || [istarget i?86-*-*]
-	      || [istarget x86_64-*-*] } {
+	      || [istarget x86_64-*-*]
+              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
             set et_vect_widen_mult_hi_to_si_saved 1
         }
     }
@@ -2710,6 +2712,52 @@ proc check_effective_target_vect_widen_mult_hi_to_
 }
 
 # Return 1 if the target plus current options supports a vector
+# widening multiplication of *char* args into *short* result, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } {
+    global et_vect_widen_mult_qi_to_hi_pattern
+
+    if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] {
+        verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2
+    } else {
+        set et_vect_widen_mult_qi_to_hi_pattern_saved 0
+        if { [istarget powerpc*-*-*]
+              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+            set et_vect_widen_mult_qi_to_hi_pattern_saved 1
+        }
+    }
+    verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2
+    return $et_vect_widen_mult_qi_to_hi_pattern_saved
+}
+
+# Return 1 if the target plus current options supports a vector
+# widening multiplication of *short* args into *int* result, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } {
+    global et_vect_widen_mult_hi_to_si_pattern
+
+    if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] {
+        verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2
+    } else {
+        set et_vect_widen_mult_hi_to_si_pattern_saved 0
+        if { [istarget powerpc*-*-*]
+              || [istarget spu-*-*]
+              || [istarget ia64-*-*]
+              || [istarget i?86-*-*]
+              || [istarget x86_64-*-*]
+              || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+            set et_vect_widen_mult_hi_to_si_pattern_saved 1
+        }
+    }
+    verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2
+    return $et_vect_widen_mult_hi_to_si_pattern_saved
+}
+
+# Return 1 if the target plus current options supports a vector
 # dot-product of signed chars, 0 otherwise.
 #
 # This won't change for different subtargets so cache the result.
Index: testsuite/gcc.dg/vect/vect-widen-mult-u8.c
===================================================================
--- testsuite/gcc.dg/vect/vect-widen-mult-u8.c	(revision 174475)
+++ testsuite/gcc.dg/vect/vect-widen-mult-u8.c	(working copy)
@@ -9,7 +9,7 @@ unsigned char X[N] __attribute__ ((__aligned__(__B
 unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
 unsigned short result[N];
 
-/* char->short widening-mult */
+/* unsigned char-> unsigned short widening-mult.  */
 __attribute__ ((noinline)) int
 foo1(int len) {
   int i;
@@ -28,8 +28,7 @@ int main (void)
   for (i=0; i<N; i++) {
     X[i] = i;
     Y[i] = 64-i;
-    if (i%4 == 0)
-      X[i] = 5;
+    __asm__ volatile ("");
   }
 
   foo1 (N);
@@ -43,5 +42,7 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 
Index: testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
===================================================================
--- testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c	(revision 0)
+++ testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c	(revision 0)
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+#include <stdlib.h>
+
+#define N 32
+
+__attribute__ ((noinline)) void 
+foo (int *__restrict a,
+     short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = b[i] * 2333;
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * 2333)
+      abort ();
+}
+
+__attribute__ ((noinline)) void
+bar (int *__restrict a,
+     short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = b[i] * (short) 2333;
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * (short) 2333)
+      abort ();
+}
+
+int main (void)
+{
+  int i;
+  int a[N];
+  short b[N];
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 0;
+      b[i] = i;
+      __asm__ volatile ("");
+    }
+
+  foo (a, b, N);
+  bar (a, b, N);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
===================================================================
--- testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c	(revision 0)
+++ testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c	(revision 0)
@@ -0,0 +1,77 @@
+/* { dg-require-effective-target vect_int } */
+
+#include "tree-vect.h"
+#include <stdlib.h>
+
+#define N 32
+
+__attribute__ ((noinline)) void 
+foo (unsigned int *__restrict a,
+     unsigned short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = b[i] * 2333;
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * 2333)
+      abort ();
+}
+
+__attribute__ ((noinline)) void
+bar (unsigned int *__restrict a,
+     unsigned short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = (unsigned short) 2333 * b[i];
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * (unsigned short) 2333)
+      abort ();
+}
+
+__attribute__ ((noinline)) void
+baz (unsigned int *__restrict a,
+     unsigned short *__restrict b,
+     int n)
+{
+  int i;
+
+  for (i = 0; i < n; i++)
+    a[i] = b[i] * 233333333;
+
+  for (i = 0; i < n; i++)
+    if (a[i] != b[i] * 233333333)
+      abort ();
+}
+
+
+int main (void)
+{
+  int i;
+  unsigned int a[N];
+  unsigned short b[N];
+
+  for (i = 0; i < N; i++)
+    {
+      a[i] = 0;
+      b[i] = i;
+      __asm__ volatile ("");
+    }
+
+  foo (a, b, N);
+  bar (a, b, N);
+  baz (a, b, N);
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target vect_widen_mult_hi_to_si } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
Index: testsuite/gcc.dg/vect/vect-widen-mult-u16.c
===================================================================
--- testsuite/gcc.dg/vect/vect-widen-mult-u16.c	(revision 174475)
+++ testsuite/gcc.dg/vect/vect-widen-mult-u16.c	(working copy)
@@ -9,13 +9,11 @@ unsigned short X[N] __attribute__ ((__aligned__(__
 unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
 unsigned int result[N];
 
-/* short->int widening-mult */
+/* unsigned short->unsigned int widening-mult.  */
 __attribute__ ((noinline)) int
 foo1(int len) {
   int i;
 
-  /* Not vectorized because X[i] and Y[i] are casted to 'int'
-     so the widening multiplication pattern is not recognized.  */
   for (i=0; i<len; i++) {
     result[i] = (unsigned int)(X[i] * Y[i]);
   }
@@ -43,8 +41,8 @@ int main (void)
   return 0;
 }
 
-/*The induction loop is vectorized  */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
+/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */


Improve detection of widening multiplication in the vectorizer

Commit Message

Comments

Patch