===================================================================
@@ -4238,6 +4238,17 @@ are vectors with N signed/unsigned elements of siz
elements of the two vectors, and put the N/2 products of size 2*S in the
output vector (operand 0).
+@cindex @code{vec_widen_ushiftl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_ushiftl_lo_@var{m}} instruction pattern
+@cindex @code{vec_widen_sshiftl_hi_@var{m}} instruction pattern
+@cindex @code{vec_widen_sshiftl_lo_@var{m}} instruction pattern
+@item @samp{vec_widen_ushiftl_hi_@var{m}}, @samp{vec_widen_ushiftl_lo_@var{m}}
+@itemx @samp{vec_widen_sshiftl_hi_@var{m}}, @samp{vec_widen_sshiftl_lo_@var{m}}
+Signed/Unsigned widening shift left. The first input (operand 1) is a vector
+with N signed/unsigned elements of size S@. Operand 2 is a constant. Shift
+the high/low elements of operand 1, and put the N/2 results of size 2*S in the
+output vector (operand 0).
+
@cindex @code{mulhisi3} instruction pattern
@item @samp{mulhisi3}
Multiply operands 1 and 2, which have mode @code{HImode}, and store
===================================================================
@@ -1599,6 +1599,7 @@ dump_generic_node (pretty_printer *buffer, tree no
case RROTATE_EXPR:
case VEC_LSHIFT_EXPR:
case VEC_RSHIFT_EXPR:
+ case WIDEN_SHIFT_LEFT_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
@@ -2287,6 +2288,22 @@ dump_generic_node (pretty_printer *buffer, tree no
pp_string (buffer, " > ");
break;
+ case VEC_WIDEN_SHIFT_LEFT_HI_EXPR:
+ pp_string (buffer, " VEC_WIDEN_SHIFT_LEFT_HI_EXPR < ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_string (buffer, " > ");
+ break;
+
+ case VEC_WIDEN_SHIFT_LEFT_LO_EXPR:
+ pp_string (buffer, " VEC_WIDEN_SHIFT_LEFT_HI_EXPR < ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
+ pp_string (buffer, " > ");
+ break;
+
case VEC_UNPACK_HI_EXPR:
pp_string (buffer, " VEC_UNPACK_HI_EXPR < ");
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
@@ -2609,6 +2626,9 @@ op_code_prio (enum tree_code code)
case RSHIFT_EXPR:
case LROTATE_EXPR:
case RROTATE_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_HI_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_LO_EXPR:
+ case WIDEN_SHIFT_LEFT_EXPR:
return 11;
case WIDEN_SUM_EXPR:
@@ -2784,6 +2804,9 @@ op_symbol_code (enum tree_code code)
case VEC_RSHIFT_EXPR:
return "v>>";
+ case WIDEN_SHIFT_LEFT_EXPR:
+ return "w<<";
+
case POINTER_PLUS_EXPR:
return "+";
===================================================================
@@ -479,6 +479,14 @@ optab_for_tree_code (enum tree_code code, const_tr
return TYPE_UNSIGNED (type) ?
vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
+ case VEC_WIDEN_SHIFT_LEFT_HI_EXPR:
+ return TYPE_UNSIGNED (type) ?
+ vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
+
+ case VEC_WIDEN_SHIFT_LEFT_LO_EXPR:
+ return TYPE_UNSIGNED (type) ?
+ vec_widen_ushiftl_lo_optab : vec_widen_sshiftl_lo_optab;
+
case VEC_UNPACK_HI_EXPR:
return TYPE_UNSIGNED (type) ?
vec_unpacku_hi_optab : vec_unpacks_hi_optab;
@@ -6132,6 +6140,10 @@ init_optabs (void)
init_optab (vec_widen_umult_lo_optab, UNKNOWN);
init_optab (vec_widen_smult_hi_optab, UNKNOWN);
init_optab (vec_widen_smult_lo_optab, UNKNOWN);
+ init_optab (vec_widen_ushiftl_hi_optab, UNKNOWN);
+ init_optab (vec_widen_ushiftl_lo_optab, UNKNOWN);
+ init_optab (vec_widen_sshiftl_hi_optab, UNKNOWN);
+ init_optab (vec_widen_sshiftl_lo_optab, UNKNOWN);
init_optab (vec_unpacks_hi_optab, UNKNOWN);
init_optab (vec_unpacks_lo_optab, UNKNOWN);
init_optab (vec_unpacku_hi_optab, UNKNOWN);
===================================================================
@@ -351,6 +351,12 @@ enum optab_index
OTI_vec_widen_umult_lo,
OTI_vec_widen_smult_hi,
OTI_vec_widen_smult_lo,
+ /* Widening shift left.
+ The high/low part of the resulting vector is returned. */
+ OTI_vec_widen_ushiftl_hi,
+ OTI_vec_widen_ushiftl_lo,
+ OTI_vec_widen_sshiftl_hi,
+ OTI_vec_widen_sshiftl_lo,
/* Extract and widen the high/low part of a vector of signed or
floating point elements. */
OTI_vec_unpacks_hi,
@@ -544,6 +550,10 @@ enum optab_index
#define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo])
#define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi])
#define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo])
+#define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi])
+#define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo])
+#define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi])
+#define vec_widen_sshiftl_lo_optab (&optab_table[OTI_vec_widen_sshiftl_lo])
#define vec_unpacks_hi_optab (&optab_table[OTI_vec_unpacks_hi])
#define vec_unpacks_lo_optab (&optab_table[OTI_vec_unpacks_lo])
#define vec_unpacku_hi_optab (&optab_table[OTI_vec_unpacku_hi])
===================================================================
@@ -269,6 +269,10 @@ static const char * const optabs[] =
"set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))",
"set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))",
"set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))",
+ "set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))",
+ "set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))",
+ "set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))",
+ "set_optab_handler (vec_widen_sshiftl_lo_optab, $A, CODE_FOR_$(vec_widen_sshiftl_lo_$a$))",
"set_optab_handler (vec_unpacks_hi_optab, $A, CODE_FOR_$(vec_unpacks_hi_$a$))",
"set_optab_handler (vec_unpacks_lo_optab, $A, CODE_FOR_$(vec_unpacks_lo_$a$))",
"set_optab_handler (vec_unpacku_hi_optab, $A, CODE_FOR_$(vec_unpacku_hi_$a$))",
===================================================================
@@ -2890,6 +2890,26 @@ proc check_effective_target_vect_widen_mult_hi_to_
}
# Return 1 if the target plus current options supports a vector
+# widening shift, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_widen_shift { } {
+ global et_vect_widen_shift
+
+ if [info exists et_vect_shift_saved] {
+ verbose "check_effective_target_vect_widen_shift: using cached result" 2
+ } else {
+ set et_vect_widen_shift_saved 0
+ if { ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
+ set et_vect_widen_shift_saved 1
+ }
+ }
+ verbose "check_effective_target_vect_widen_shift: returning $et_vect_widen_shift_saved" 2
+ return $et_vect_widen_shift_saved
+}
+
+# Return 1 if the target plus current options supports a vector
# dot-product of signed chars, 0 otherwise.
#
# This won't change for different subtargets so cache the result.
===================================================================
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define C 7
+
+__attribute__ ((noinline)) void
+foo (unsigned short *src, unsigned int *dst)
+{
+ int i;
+ unsigned short b, *s = src;
+ unsigned int *d = dst;
+
+ for (i = 0; i < N; i++)
+ {
+ b = *s++;
+ *d = b<<C;
+ d++;
+ }
+
+ s = src;
+ d = dst;
+ for (i = 0; i < N; i++)
+ {
+ b = *s++;
+ if (*d != b<<C)
+ abort ();
+ d++;
+ }
+}
+
+int main (void)
+{
+ int i;
+ unsigned short in[N];
+ unsigned int out[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in[i] = i;
+ out[i] = 255;
+ __asm__ volatile ("");
+ }
+
+ foo (in, out);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
===================================================================
@@ -4,7 +4,8 @@
#include <stdlib.h>
#define N 32
-#define COEF 32470
+#define COEF1 32470
+#define COEF2 32
unsigned char in[N];
int out[N];
@@ -15,7 +16,7 @@ foo ()
int i;
for (i = 0; i < N; i++)
- out[i] = in[i] * COEF;
+ out[i] = in[i] * COEF1;
}
__attribute__ ((noinline)) void
@@ -24,7 +25,7 @@ bar ()
int i;
for (i = 0; i < N; i++)
- out[i] = COEF * in[i];
+ out[i] = COEF2 * in[i];
}
int main (void)
@@ -40,13 +41,13 @@ int main (void)
foo ();
for (i = 0; i < N; i++)
- if (out[i] != in[i] * COEF)
+ if (out[i] != in[i] * COEF1)
abort ();
bar ();
for (i = 0; i < N; i++)
- if (out[i] != in[i] * COEF)
+ if (out[i] != in[i] * COEF2)
abort ();
return 0;
===================================================================
@@ -0,0 +1,58 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define C 7
+
+__attribute__ ((noinline)) void
+foo (char *src, int *dst)
+{
+ int i;
+ char b, *s = src;
+ int *d = dst;
+
+ for (i = 0; i < N; i++)
+ {
+ b = *s++;
+ *d = b << C;
+ d++;
+ }
+
+ s = src;
+ d = dst;
+ for (i = 0; i < N; i++)
+ {
+ b = *s++;
+ if (*d != b << C)
+ abort ();
+ d++;
+ }
+}
+
+int main (void)
+{
+ int i;
+ char in[N];
+ int out[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in[i] = i;
+ out[i] = 255;
+ __asm__ volatile ("");
+ }
+
+ foo (in, out);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
===================================================================
@@ -0,0 +1,65 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define C1 10
+#define C2 5
+
+__attribute__ ((noinline)) void
+foo (unsigned char *src, unsigned int *dst1, unsigned int *dst2)
+{
+ int i;
+ unsigned char b, *s = src;
+ unsigned int *d1 = dst1, *d2 = dst2;
+
+ for (i = 0; i < N; i++)
+ {
+ b = *s++;
+ *d1 = b << C1;
+ d1++;
+ *d2 = b << C2;
+ d2++;
+ }
+
+ s = src;
+ d1 = dst1;
+ d2 = dst2;
+ for (i = 0; i < N; i++)
+ {
+ b = *s++;
+ if (*d1 != b << C1 || *d2 != b << C2)
+ abort ();
+ d1++;
+ d2++;
+ }
+}
+
+int main (void)
+{
+ int i;
+ unsigned char in[N];
+ unsigned int out1[N];
+ unsigned int out2[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in[i] = i;
+ out1[i] = 255;
+ out2[i] = 255;
+ __asm__ volatile ("");
+ }
+
+ foo (in, out1, out2);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 1 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
===================================================================
@@ -0,0 +1,107 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define C 16
+
+__attribute__ ((noinline)) void
+foo (short *src, int *dst)
+{
+ int i;
+ short b, b0, b1, b2, b3, *s = src;
+ int *d = dst;
+
+ for (i = 0; i < N/4; i++)
+ {
+ b0 = *s++;
+ b1 = *s++;
+ b2 = *s++;
+ b3 = *s++;
+ *d = b0 << C;
+ d++;
+ *d = b1 << C;
+ d++;
+ *d = b2 << C;
+ d++;
+ *d = b3 << C;
+ d++;
+ }
+
+ s = src;
+ d = dst;
+ for (i = 0; i < N; i++)
+ {
+ b = *s++;
+ if (*d != b << C)
+ abort ();
+ d++;
+ }
+
+ s = src;
+ d = dst;
+ for (i = 0; i < N/4; i++)
+ {
+ b0 = *s++;
+ b1 = *s++;
+ b2 = *s++;
+ b3 = *s++;
+ *d = b0 << C;
+ d++;
+ *d = b1 << C;
+ d++;
+ *d = b2 << C;
+ d++;
+ *d = b3 << 6;
+ d++;
+ }
+
+ s = src;
+ d = dst;
+ for (i = 0; i < N/4; i++)
+ {
+ b = *s++;
+ if (*d != b << C)
+ abort ();
+ d++;
+ b = *s++;
+ if (*d != b << C)
+ abort ();
+ d++;
+ b = *s++;
+ if (*d != b << C)
+ abort ();
+ d++;
+ b = *s++;
+ if (*d != b << 6)
+ abort ();
+ d++;
+ }
+}
+
+int main (void)
+{
+ int i;
+ short in[N];
+ int out[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ in[i] = i;
+ out[i] = 255;
+ __asm__ volatile ("");
+ }
+
+ foo (in, out);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_widen_shift_pattern: detected" 8 "vect" { target vect_widen_shift } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
===================================================================
@@ -8600,6 +8600,19 @@ expand_expr_real_2 (sepops ops, rtx target, enum m
return target;
}
+ case VEC_WIDEN_SHIFT_LEFT_HI_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_LO_EXPR:
+ {
+ tree oprnd0 = treeop0;
+ tree oprnd1 = treeop1;
+
+ expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+ target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX,
+ target, unsignedp);
+ gcc_assert (target);
+ return target;
+ }
+
case VEC_PACK_TRUNC_EXPR:
case VEC_PACK_SAT_EXPR:
case VEC_PACK_FIX_TRUNC_EXPR:
===================================================================
@@ -343,6 +343,8 @@ dump_binary_rhs (pretty_printer *buffer, gimple gs
case VEC_EXTRACT_ODD_EXPR:
case VEC_INTERLEAVE_HIGH_EXPR:
case VEC_INTERLEAVE_LOW_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_HI_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_LO_EXPR:
for (p = tree_code_name [(int) code]; *p; p++)
pp_character (buffer, TOUPPER (*p));
pp_string (buffer, " <");
===================================================================
@@ -902,7 +902,7 @@ extern void vect_slp_transform_bb (basic_block);
Additional pattern recognition functions can (and will) be added
in the future. */
typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
-#define NUM_PATTERNS 5
+#define NUM_PATTERNS 6
void vect_pattern_recog (loop_vec_info);
/* In tree-vectorizer.c. */
===================================================================
@@ -1111,6 +1111,19 @@ DEFTREECODE (WIDEN_MULT_PLUS_EXPR, "widen_mult_plu
is subtracted from t3. */
DEFTREECODE (WIDEN_MULT_MINUS_EXPR, "widen_mult_minus_expr", tcc_expression, 3)
+/* Widening shift left.
+ The first operand is of type t1.
+ The second operand is the number of bits to shift by; it need not be the
+ same type as the first operand and result.
+ Note that the result is undefined if the second operand is larger
+ than or equal to the first operand's type size.
+ The type of the entire expression is t2, such that t2 is at least twice
+ the size of t1.
+ WIDEN_SHIFT_EXPR is equivalent to first widening (promoting)
+ the first argument from type t1 to type t2, and then shifting it
+ by the second argument. */
+DEFTREECODE (WIDEN_SHIFT_LEFT_EXPR, "widen_shift_expr", tcc_binary, 2)
+
/* Fused multiply-add.
All operands and the result are of the same type. No intermediate
rounding is performed after multiplying operand one with operand two
@@ -1166,6 +1179,16 @@ DEFTREECODE (VEC_EXTRACT_ODD_EXPR, "vec_extractodd
DEFTREECODE (VEC_INTERLEAVE_HIGH_EXPR, "vec_interleavehigh_expr", tcc_binary, 2)
DEFTREECODE (VEC_INTERLEAVE_LOW_EXPR, "vec_interleavelow_expr", tcc_binary, 2)
+/* Widening vector shift left in bits.
+ Operand 0 is a vector to be shifted with N elements of size S.
+ Operand 1 is an integer shift amount in bits.
+ The result of the operation is N elements of size 2*S.
+ VEC_WIDEN_SHIFT_LEFT_HI_EXPR computes the N/2 high results.
+ VEC_WIDEN_SHIFT_LEFT_LO_EXPR computes the N/2 low results.
+ */
+DEFTREECODE (VEC_WIDEN_SHIFT_LEFT_HI_EXPR, "widen_shift_left_hi_expr", tcc_binary, 2)
+DEFTREECODE (VEC_WIDEN_SHIFT_LEFT_LO_EXPR, "widen_shift_left_lo_expr", tcc_binary, 2)
+
/* PREDICT_EXPR. Specify hint for branch prediction. The
PREDICT_EXPR_PREDICTOR specify predictor and PREDICT_EXPR_OUTCOME the
outcome (0 for not taken and 1 for taken). Once the profile is guessed
===================================================================
@@ -3264,6 +3264,8 @@ expand_debug_expr (tree exp)
case VEC_UNPACK_LO_EXPR:
case VEC_WIDEN_MULT_HI_EXPR:
case VEC_WIDEN_MULT_LO_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_HI_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_LO_EXPR:
return NULL;
/* Misc codes. */
===================================================================
@@ -49,12 +49,15 @@ static gimple vect_recog_dot_prod_pattern (VEC (gi
static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
tree *);
+static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
+ tree *, tree *);
static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
vect_recog_widen_mult_pattern,
vect_recog_widen_sum_pattern,
vect_recog_dot_prod_pattern,
vect_recog_pow_pattern,
- vect_recog_over_widening_pattern};
+ vect_recog_over_widening_pattern,
+ vect_recog_widen_shift_pattern};
/* Function widened_name_p
@@ -335,27 +338,36 @@ vect_recog_dot_prod_pattern (VEC (gimple, heap) **
}
-/* Handle two cases of multiplication by a constant. The first one is when
- the constant, CONST_OPRND, fits the type (HALF_TYPE) of the second
- operand (OPRND). In that case, we can peform widen-mult from HALF_TYPE to
- TYPE.
+/* Handle widening operation by a constant. At the moment we support MULT_EXPR
+ and LSHIFT_EXPR.
+ For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
+ we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
+
Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
- HALF_TYPE, and CONST_OPRND fits an intermediate type (2 times smaller than
- TYPE), we can perform widen-mult from the intermediate type to TYPE and
- replace a_T = (TYPE) a_t; with a_it - (interm_type) a_t; */
+ HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
+ that satisfies the above restrictions, we can perform a widening opeartion
+ from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
+ with a_it = (interm_type) a_t; */
static bool
-vect_handle_widen_mult_by_const (gimple stmt, tree const_oprnd, tree *oprnd,
- VEC (gimple, heap) **stmts, tree type,
- tree *half_type, gimple def_stmt)
+vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
+ tree const_oprnd, tree *oprnd,
+ VEC (gimple, heap) **stmts, tree type,
+ tree *half_type, gimple def_stmt)
{
tree new_type, new_oprnd, tmp;
gimple new_stmt;
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt));
struct loop *loop = LOOP_VINFO_LOOP (loop_info);
- if (int_fits_type_p (const_oprnd, *half_type))
+ if (code != MULT_EXPR && code != LSHIFT_EXPR)
+ return false;
+
+ if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
+ || (code == LSHIFT_EXPR
+ && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type)) != 1))
+ && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
{
/* CONST_OPRND is a constant of HALF_TYPE. */
*oprnd = gimple_assign_rhs1 (def_stmt);
@@ -368,14 +380,16 @@ static bool
|| !vinfo_for_stmt (def_stmt))
return false;
- /* TYPE is 4 times bigger than HALF_TYPE, try widen-mult for
+ /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
a type 2 times bigger than HALF_TYPE. */
new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
TYPE_UNSIGNED (type));
- if (!int_fits_type_p (const_oprnd, new_type))
+ if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
+ || (code == LSHIFT_EXPR
+ && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
return false;
- /* Use NEW_TYPE for widen_mult. */
+ /* Use NEW_TYPE for widening operation. */
if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
{
new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
@@ -385,6 +399,7 @@ static bool
|| TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
return false;
+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
*oprnd = gimple_assign_lhs (new_stmt);
}
else
@@ -495,7 +510,7 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap)
enum tree_code dummy_code;
int dummy_int;
VEC (tree, heap) *dummy_vec;
- bool op0_ok, op1_ok;
+ bool op1_ok;
if (!is_gimple_assign (last_stmt))
return NULL;
@@ -515,38 +530,23 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap)
return NULL;
/* Check argument 0. */
- op0_ok = widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false);
+ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
+ return NULL;
/* Check argument 1. */
op1_ok = widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1, false);
- /* In case of multiplication by a constant one of the operands may not match
- the pattern, but not both. */
- if (!op0_ok && !op1_ok)
- return NULL;
-
- if (op0_ok && op1_ok)
+ if (op1_ok)
{
oprnd0 = gimple_assign_rhs1 (def_stmt0);
oprnd1 = gimple_assign_rhs1 (def_stmt1);
}
- else if (!op0_ok)
+ else
{
- if (TREE_CODE (oprnd0) == INTEGER_CST
- && TREE_CODE (half_type1) == INTEGER_TYPE
- && vect_handle_widen_mult_by_const (last_stmt, oprnd0, &oprnd1,
- stmts, type,
- &half_type1, def_stmt1))
- half_type0 = half_type1;
- else
- return NULL;
- }
- else if (!op1_ok)
- {
if (TREE_CODE (oprnd1) == INTEGER_CST
&& TREE_CODE (half_type0) == INTEGER_TYPE
- && vect_handle_widen_mult_by_const (last_stmt, oprnd1, &oprnd0,
- stmts, type,
- &half_type0, def_stmt0))
+ && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
+ &oprnd0, stmts, type,
+ &half_type0, def_stmt0))
half_type1 = half_type0;
else
return NULL;
@@ -1001,6 +1001,7 @@ vect_operation_fits_smaller_type (gimple stmt, tre
|| TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
return false;
+ VEC_safe_push (gimple, heap, *stmts, def_stmt);
oprnd = gimple_assign_lhs (new_stmt);
}
else
@@ -1070,10 +1071,8 @@ vect_operation_fits_smaller_type (gimple stmt, tre
constants.
Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
be 'type' or some intermediate type. For now, we expect S5 to be a type
- demotion operation. We also check that S3 and S4 have only one use.
-.
+ demotion operation. We also check that S3 and S4 have only one use. */
-*/
static gimple
vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
tree *type_in, tree *type_out)
@@ -1217,12 +1216,184 @@ vect_recog_over_widening_pattern (VEC (gimple, hea
return pattern_stmt;
}
+/* Detect widening shift pattern:
+
+ type a_t;
+ TYPE a_T, res_T;
+
+ S1 a_t = ;
+ S2 a_T = (TYPE) a_t;
+ S3 res_T = a_T << CONST;
+ where type 'TYPE' is at least double the size of type 'type'.
+
+ Also detect unsgigned cases:
+
+ unsigned type a_t;
+ unsigned TYPE u_res_T;
+ TYPE a_T, res_T;
+
+ S1 a_t = ;
+ S2 a_T = (TYPE) a_t;
+ S3 res_T = a_T << CONST;
+ S4 u_res_T = (unsigned TYPE) res_T;
+
+ And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
+ create an additional pattern stmt for S2 to create a variable of an
+ intermediate type, and perform widen-shift on the intermediate type:
+
+ type a_t;
+ interm_type a_it;
+ TYPE a_T, res_T, res_T';
+
+ S1 a_t = ;
+ S2 a_T = (TYPE) a_t;
+ '--> a_it = (interm_type) a_t;
+ S3 res_T = a_T << CONST;
+ '--> res_T' = a_it <<* CONST;
+
+ Input/Output:
+
+ * STMTS: Contains a stmt from which the pattern search begins.
+ In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
+ in STMTS. When an intermediate type is used and a pattern statement is
+ created for S2, we also put S2 here (before S3).
+
+ Output:
+
+ * TYPE_IN: The type of the input arguments to the pattern.
+
+ * TYPE_OUT: The type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be:
+ WIDEN_SHIFT_LEFT_EXPR <a_t, CONST>. */
+
+static gimple
+vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
+ tree *type_in, tree *type_out)
+{
+ gimple last_stmt = VEC_pop (gimple, *stmts);
+ gimple def_stmt0;
+ tree oprnd0, oprnd1;
+ tree type, half_type0;
+ gimple pattern_stmt;
+ tree vectype, vectype_out = NULL_TREE;
+ tree dummy;
+ tree var;
+ enum tree_code dummy_code;
+ int dummy_int;
+ VEC (tree, heap) * dummy_vec;
+
+ if (!is_gimple_assign (last_stmt))
+ return NULL;
+
+ if (!vinfo_for_stmt (last_stmt)
+ || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
+ return NULL;
+
+ type = gimple_expr_type (last_stmt);
+
+ /* Starting from LAST_STMT, follow the defs of its uses in search
+ of the above pattern. */
+ if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
+ return NULL;
+
+ oprnd0 = gimple_assign_rhs1 (last_stmt);
+ oprnd1 = gimple_assign_rhs2 (last_stmt);
+ if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
+ return NULL;
+
+ /* Check operand 0: it has to be defined by a type promotion. */
+ if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0, false))
+ return NULL;
+
+ /* Check operand 1: has to be positive. We check that it fits the type
+ in vect_handle_widen_op_by_const(). */
+ if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
+ return NULL;
+
+ oprnd0 = gimple_assign_rhs1 (def_stmt0);
+
+ /* Check if this a widening operation. */
+ if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
+ &oprnd0, stmts,
+ type, &half_type0, def_stmt0))
+ return NULL;
+
+ /* Handle unsigned case. Look for
+ S4 u_res_T = (unsigned TYPE) res_T;
+ Use unsigned TYPE as the type for WIDEN_SHIFT_LEFT_EXPR. */
+ if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
+ {
+ tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
+ imm_use_iterator imm_iter;
+ use_operand_p use_p;
+ int nuses = 0;
+ gimple use_stmt = NULL;
+ tree use_type;
+
+ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+ {
+ if (is_gimple_debug (USE_STMT (use_p)))
+ continue;
+ use_stmt = USE_STMT (use_p);
+ nuses++;
+ }
+
+ if (nuses != 1 || !is_gimple_assign (use_stmt)
+ || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
+ return NULL;
+
+ use_lhs = gimple_assign_lhs (use_stmt);
+ use_type = TREE_TYPE (use_lhs);
+ if (!INTEGRAL_TYPE_P (use_type)
+ || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
+ || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
+ return NULL;
+
+ type = use_type;
+ last_stmt = use_stmt;
+ }
+
+ /* Pattern detected. */
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
+
+ /* Check target support. */
+ vectype = get_vectype_for_scalar_type (half_type0);
+ vectype_out = get_vectype_for_scalar_type (type);
+
+ if (!vectype
+ || !vectype_out
+ || !supportable_widening_operation (WIDEN_SHIFT_LEFT_EXPR, last_stmt,
+ vectype_out, vectype,
+ &dummy, &dummy, &dummy_code,
+ &dummy_code, &dummy_int,
+ &dummy_vec))
+ return NULL;
+
+ *type_in = vectype;
+ *type_out = vectype_out;
+
+ /* Pattern supported. Create a stmt to be used to replace the pattern. */
+ var = vect_recog_temp_ssa_var (type, NULL);
+ pattern_stmt =
+ gimple_build_assign_with_ops (WIDEN_SHIFT_LEFT_EXPR, var, oprnd0, oprnd1);
+ SSA_NAME_DEF_STMT (var) = pattern_stmt;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
+
+ VEC_safe_push (gimple, heap, *stmts, last_stmt);
+ return pattern_stmt;
+}
+
/* Mark statements that are involved in a pattern. */
static inline void
vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
- tree pattern_vectype)
+ tree pattern_vectype)
{
stmt_vec_info pattern_stmt_info, def_stmt_info;
stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
@@ -1239,6 +1410,7 @@ vect_mark_pattern_stmts (gimple orig_stmt, gimple
= STMT_VINFO_DEF_TYPE (orig_stmt_info);
STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
+
STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
= STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
===================================================================
@@ -3318,6 +3318,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
int multi_step_cvt = 0;
VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
VEC (tree, heap) *vec_dsts = NULL, *interm_types = NULL, *tmp_vec_dsts = NULL;
+ unsigned int k;
/* FORNOW: not supported by basic block SLP vectorization. */
gcc_assert (loop_vinfo);
@@ -3337,7 +3338,8 @@ vectorizable_type_promotion (gimple stmt, gimple_s
code = gimple_assign_rhs_code (stmt);
if (!CONVERT_EXPR_CODE_P (code)
- && code != WIDEN_MULT_EXPR)
+ && code != WIDEN_MULT_EXPR
+ && code != WIDEN_SHIFT_LEFT_EXPR)
return false;
scalar_dest = gimple_assign_lhs (stmt);
@@ -3365,7 +3367,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
bool ok;
op1 = gimple_assign_rhs2 (stmt);
- if (code == WIDEN_MULT_EXPR)
+ if (code == WIDEN_MULT_EXPR || code == WIDEN_SHIFT_LEFT_EXPR)
{
/* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
OP1. */
@@ -3442,7 +3444,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
ncopies);
- if (code == WIDEN_MULT_EXPR)
+ if (code == WIDEN_MULT_EXPR || code == WIDEN_SHIFT_LEFT_EXPR)
{
if (CONSTANT_CLASS_P (op0))
op0 = fold_convert (TREE_TYPE (op1), op0);
@@ -3483,6 +3485,8 @@ vectorizable_type_promotion (gimple stmt, gimple_s
if (op_type == binary_op)
vec_oprnds1 = VEC_alloc (tree, heap, 1);
}
+ else if (code == WIDEN_SHIFT_LEFT_EXPR)
+ vec_oprnds1 = VEC_alloc (tree, heap, slp_node->vec_stmts_size);
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
@@ -3496,15 +3500,33 @@ vectorizable_type_promotion (gimple stmt, gimple_s
if (j == 0)
{
if (slp_node)
- vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
- &vec_oprnds1, -1);
- else
+ {
+ if (code == WIDEN_SHIFT_LEFT_EXPR)
+ {
+ vec_oprnd1 = op1;
+ /* Store vec_oprnd1 for every vector stmt to be created
+ for SLP_NODE. We check during the analysis that all
+ the shift arguments are the same. */
+ for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
+ VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
+
+ vect_get_slp_defs (op0, NULL_TREE, slp_node, &vec_oprnds0, NULL,
+ -1);
+ }
+ else
+ vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0,
+ &vec_oprnds1, -1);
+ }
+ else
{
vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
VEC_quick_push (tree, vec_oprnds0, vec_oprnd0);
if (op_type == binary_op)
{
- vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
+ if (code == WIDEN_SHIFT_LEFT_EXPR)
+ vec_oprnd1 = op1;
+ else
+ vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
}
}
@@ -3515,7 +3537,10 @@ vectorizable_type_promotion (gimple stmt, gimple_s
VEC_replace (tree, vec_oprnds0, 0, vec_oprnd0);
if (op_type == binary_op)
{
- vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
+ if (code == WIDEN_SHIFT_LEFT_EXPR)
+ vec_oprnd1 = op1;
+ else
+ vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
VEC_replace (tree, vec_oprnds1, 0, vec_oprnd1);
}
}
@@ -5785,6 +5810,19 @@ supportable_widening_operation (enum tree_code cod
}
break;
+ case WIDEN_SHIFT_LEFT_EXPR:
+ if (BYTES_BIG_ENDIAN)
+ {
+ c1 = VEC_WIDEN_SHIFT_LEFT_HI_EXPR;
+ c2 = VEC_WIDEN_SHIFT_LEFT_LO_EXPR;
+ }
+ else
+ {
+ c2 = VEC_WIDEN_SHIFT_LEFT_HI_EXPR;
+ c1 = VEC_WIDEN_SHIFT_LEFT_LO_EXPR;
+ }
+ break;
+
CASE_CONVERT:
if (BYTES_BIG_ENDIAN)
{
===================================================================
@@ -3354,6 +3354,7 @@ estimate_operator_cost (enum tree_code code, eni_w
case DOT_PROD_EXPR:
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
+ case WIDEN_SHIFT_LEFT_EXPR:
case VEC_WIDEN_MULT_HI_EXPR:
case VEC_WIDEN_MULT_LO_EXPR:
@@ -3368,6 +3369,8 @@ estimate_operator_cost (enum tree_code code, eni_w
case VEC_EXTRACT_ODD_EXPR:
case VEC_INTERLEAVE_HIGH_EXPR:
case VEC_INTERLEAVE_LOW_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_HI_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_LO_EXPR:
return 1;
===================================================================
@@ -552,7 +552,9 @@ expand_vector_operations_1 (gimple_stmt_iterator *
|| code == VEC_UNPACK_LO_EXPR
|| code == VEC_PACK_TRUNC_EXPR
|| code == VEC_PACK_SAT_EXPR
- || code == VEC_PACK_FIX_TRUNC_EXPR)
+ || code == VEC_PACK_FIX_TRUNC_EXPR
+ || code == VEC_WIDEN_SHIFT_LEFT_HI_EXPR
+ || code == VEC_WIDEN_SHIFT_LEFT_LO_EXPR)
type = TREE_TYPE (rhs1);
/* Optabs will try converting a negation into a subtraction, so
===================================================================
@@ -3609,6 +3609,9 @@ do_pointer_plus_expr_check:
case VEC_EXTRACT_ODD_EXPR:
case VEC_INTERLEAVE_HIGH_EXPR:
case VEC_INTERLEAVE_LOW_EXPR:
+ case WIDEN_SHIFT_LEFT_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_HI_EXPR:
+ case VEC_WIDEN_SHIFT_LEFT_LO_EXPR:
/* FIXME. */
return false;
===================================================================
@@ -144,6 +144,8 @@
UNSPEC_MISALIGNED_ACCESS
UNSPEC_VCLE
UNSPEC_VCLT
+ UNSPEC_VSHLL_LO
+ UNSPEC_VSHLL_HI
])
@@ -5550,6 +5552,80 @@
}
)
+(define_insn "neon_vec_<US>shiftl_lo_<mode>"
+ [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+ (unspec:<V_unpack> [(SE:<V_unpack> (vec_select:<V_HALF>
+ (match_operand:VU 1 "register_operand" "w")
+ (match_operand:VU 2 "vect_par_constant_low" "")))
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_VSHLL_LO))]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+ neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode) + 1);
+ return "vshll.<US><V_sz_elem> %q0, %e1, %3";
+}
+ [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
+ [(match_operand:<V_unpack> 0 "register_operand" "")
+ (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+ (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+ rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
+ rtx t1;
+ int i;
+ for (i = 0; i < (<V_mode_nunits>/2) ; i++)
+ RTVEC_ELT (v, i) = GEN_INT (i);
+ t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+
+ emit_insn (gen_neon_vec_<US>shiftl_lo_<mode> (operands[0],
+ operands[1],
+ t1,
+ operands[2]));
+ DONE;
+ }
+)
+
+(define_insn "neon_vec_<US>shiftl_hi_<mode>"
+ [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
+ (unspec:<V_unpack> [(SE:<V_unpack> (vec_select:<V_HALF>
+ (match_operand:VU 1 "register_operand" "w")
+ (match_operand:VU 2 "vect_par_constant_high" "")))
+ (match_operand:SI 3 "immediate_operand" "i")]
+ UNSPEC_VSHLL_HI))]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+{
+ /* The boundaries are: 0 < imm <= size. */
+ neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode) + 1);
+ return "vshll.<US><V_sz_elem> %q0, %f1, %3";
+}
+ [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
+ [(match_operand:<V_unpack> 0 "register_operand" "")
+ (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
+ (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON && !BYTES_BIG_ENDIAN"
+ {
+ rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
+ rtx t1;
+ int i;
+ for (i = 0; i < (<V_mode_nunits>/2) ; i++)
+ RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
+ t1 = gen_rtx_PARALLEL (<MODE>mode, v);
+
+
+ emit_insn (gen_neon_vec_<US>shiftl_hi_<mode> (operands[0],
+ operands[1],
+ t1,
+ operands[2]));
+ DONE;
+ }
+)
+
;; Vectorize for non-neon-quad case
(define_insn "neon_unpack<US>_<mode>"
[(set (match_operand:<V_widen> 0 "register_operand" "=w")
@@ -5626,6 +5702,51 @@
}
)
+(define_insn "neon_vec_<US>shift_left_<mode>"
+ [(set (match_operand:<V_widen> 0 "register_operand" "=w")
+ (unspec:<V_widen> [(SE:<V_widen>
+ (match_operand:VDI 1 "register_operand" "w"))
+ (match_operand:SI 2 "immediate_operand" "i")]
+ UNSPEC_VSHLL_N))]
+ "TARGET_NEON"
+{
+ /* The boundaries are: 0 < imm <= size. */
+ neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
+ return "vshll.<US><V_sz_elem> %q0, %P1, %2";
+}
+ [(set_attr "neon_type" "neon_shift_1")]
+)
+
+(define_expand "vec_widen_<US>shiftl_hi_<mode>"
+ [(match_operand:<V_double_width> 0 "register_operand" "")
+ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+ (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON"
+ {
+ rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+ emit_insn (gen_neon_vec_<US>shift_left_<mode> (tmpreg, operands[1], operands[2]));
+ emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
+
+ DONE;
+
+ }
+)
+
+(define_expand "vec_widen_<US>shiftl_lo_<mode>"
+ [(match_operand:<V_double_width> 0 "register_operand" "")
+ (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
+ (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON"
+ {
+ rtx tmpreg = gen_reg_rtx (<V_widen>mode);
+ emit_insn (gen_neon_vec_<US>shift_left_<mode> (tmpreg, operands[1], operands[2]));
+ emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
+
+ DONE;
+
+ }
+)
+
; FIXME: These instruction patterns can't be used safely in big-endian mode
; because the ordering of vector elements in Q registers is different from what
; the semantics of the instructions require.
===================================================================
@@ -459,6 +459,11 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_
}
}
}
+ else if (rhs_code == WIDEN_SHIFT_LEFT_EXPR)
+ {
+ need_same_oprnds = true;
+ first_op1 = gimple_assign_rhs2 (stmt);
+ }
}
else
{