===================================================================
@@ -1,3 +1,25 @@
+2011-04-28 Ira Rosen <ira.rosen@linaro.org>
+
+ PR tree-optimization/48765
+ * tree-vectorizer.h (vect_make_slp_decision): Return bool.
+ * tree-vect-loop.c (vect_analyze_loop_operations): Add new argument
+ to indicate if loop aware SLP is being used. Scan the statements
+ and update the vectorization factor according to the type of
+ vectorization before statement analysis.
+ (vect_analyze_loop_2): Get a return value from vect_make_slp_decision,
+ pass it to vect_analyze_loop_operations.
+ (vectorizable_reduction): Set number of copies to 1 in case of pure
+ SLP statement.
+ * tree-vect-stmts.c (vectorizable_conversion,
+ vectorizable_assignment, vectorizable_shift,
+ vectorizable_operation, vectorizable_type_demotion,
+ vectorizable_type_promotion, vectorizable_store, vectorizable_load):
+ Likewise.
+ (vectorizable_condition): Move the check that it is not SLP
+ vectorization before the number of copies check.
+ * tree-vect-slp.c (vect_make_slp_decision): Return TRUE if decided
+ to vectorize the loop using SLP.
+
2011-04-28 Jakub Jelinek <jakub@redhat.com>
PR middle-end/48597
===================================================================
@@ -0,0 +1,82 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-options "-m64 -O3 -mcpu=power6" } */
+
+enum reg_class
+{
+ NO_REGS, AP_REG, XRF_REGS, GENERAL_REGS, AGRF_REGS, XGRF_REGS, ALL_REGS,
+ LIM_REG_CLASSES
+};
+enum machine_mode
+{
+ VOIDmode, QImode, HImode, PSImode, SImode, PDImode, DImode, TImode, OImode,
+ QFmode, HFmode, TQFmode, SFmode, DFmode, XFmode, TFmode, SCmode, DCmode,
+ XCmode, TCmode, CQImode, CHImode, CSImode, CDImode, CTImode, COImode,
+ BLKmode, CCmode, CCEVENmode, MAX_MACHINE_MODE
+};
+typedef struct rtx_def
+{
+ int mode:8;
+}
+ *rtx;
+extern rtx *regno_reg_rtx;
+typedef unsigned int HARD_REG_ELT_TYPE;
+typedef HARD_REG_ELT_TYPE HARD_REG_SET[((64 + 32 - 1) / 32)];
+extern int reg_alloc_order[64];
+extern int max_regno;
+extern int *reg_n_calls_crossed;
+extern short *reg_renumber;
+static int *reg_where_dead;
+static int *reg_where_born;
+static int *reg_order;
+static char *regs_change_size;
+static HARD_REG_SET *after_insn_hard_regs;
+static int stupid_find_reg (int, enum reg_class, enum machine_mode, int, int,
+ int);
+void
+stupid_life_analysis (f, nregs, file)
+ rtx f;
+{
+ register int i;
+ for (i = (((64)) + 3) + 1; i < max_regno; i++)
+ {
+ register int r = reg_order[i];
+ if ((int) LIM_REG_CLASSES > 1)
+ reg_renumber[r] =
+ stupid_find_reg (reg_n_calls_crossed[r], reg_preferred_class (r),
+ ((regno_reg_rtx[r])->mode), reg_where_born[r],
+ reg_where_dead[r], regs_change_size[r]);
+ }
+}
+
+static int
+stupid_find_reg (call_preserved, class, mode, born_insn, dead_insn,
+ changes_size)
+ int call_preserved;
+ enum reg_class class;
+ enum machine_mode mode;
+{
+ register int i, ins;
+ HARD_REG_SET used, this_reg;
+ for (ins = born_insn; ins < dead_insn; ins++)
+ do
+ {
+ register HARD_REG_ELT_TYPE *scan_tp_ = (used), *scan_fp_ =
+ (after_insn_hard_regs[ins]);
+ for (i = 0; i < ((64 + 32 - 1) / 32); i++)
+ *scan_tp_++ |= *scan_fp_++;
+ }
+ while (0);
+ for (i = 0; i < 64; i++)
+ {
+ int regno = reg_alloc_order[i];
+ if (((used)[(regno) / ((unsigned) 32)] &
+ (((HARD_REG_ELT_TYPE) (1)) << ((regno) % ((unsigned) 32)))))
+ {
+ register int j;
+ if (j == regno)
+ return regno;
+ }
+ }
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */
===================================================================
@@ -1,3 +1,8 @@
+2011-04-28 Ira Rosen <ira.rosen@linaro.org>
+
+ PR tree-optimization/48765
+ * gcc.dg/vect/pr48765.c: New.
+
2011-04-28 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
PR tree-optimization/48775
===================================================================
@@ -870,7 +870,7 @@ extern bool vect_transform_slp_perm_load (gimple,
extern bool vect_schedule_slp (loop_vec_info, bb_vec_info);
extern void vect_update_slp_costs_according_to_vf (loop_vec_info);
extern bool vect_analyze_slp (loop_vec_info, bb_vec_info);
-extern void vect_make_slp_decision (loop_vec_info);
+extern bool vect_make_slp_decision (loop_vec_info);
extern void vect_detect_hybrid_slp (loop_vec_info);
extern void vect_get_slp_defs (tree, tree, slp_tree, VEC (tree,heap) **,
VEC (tree,heap) **, int);
===================================================================
@@ -1146,7 +1146,7 @@ vect_get_cost (enum vect_cost_for_stmt type_of_cos
Scan the loop stmts and make sure they are all vectorizable. */
static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@@ -1167,7 +1167,41 @@ static bool
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ if (slp)
+ {
+ /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+ vectorization factor of the loop is the unrolling factor required by
+ the SLP instances. If that unrolling factor is 1, we say, that we
+ perform pure SLP on loop - cross iteration parallelism is not
+ exploited. */
+ for (i = 0; i < nbbs; i++)
+ {
+ basic_block bb = bbs[i];
+ for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+ {
+ gimple stmt = gsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ gcc_assert (stmt_info);
+ if ((STMT_VINFO_RELEVANT_P (stmt_info)
+ || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+ && !PURE_SLP_STMT (stmt_info))
+ /* STMT needs both SLP and loop-based vectorization. */
+ only_slp_in_loop = false;
+ }
+ }
+ if (only_slp_in_loop)
+ vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+ else
+ vectorization_factor = least_common_multiple (vectorization_factor,
+ LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "Updating vectorization factor to %d ",
+ vectorization_factor);
+ }
+
for (i = 0; i < nbbs; i++)
{
basic_block bb = bbs[i];
@@ -1272,18 +1306,8 @@ static bool
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
{
gimple stmt = gsi_stmt (si);
- stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
- gcc_assert (stmt_info);
-
if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
return false;
-
- if ((STMT_VINFO_RELEVANT_P (stmt_info)
- || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
- && !PURE_SLP_STMT (stmt_info))
- /* STMT needs both SLP and loop-based vectorization. */
- only_slp_in_loop = false;
}
} /* bbs */
@@ -1303,18 +1327,6 @@ static bool
return false;
}
- /* If all the stmts in the loop can be SLPed, we perform only SLP, and
- vectorization factor of the loop is the unrolling factor required by the
- SLP instances. If that unrolling factor is 1, we say, that we perform
- pure SLP on loop - cross iteration parallelism is not exploited. */
- if (only_slp_in_loop)
- vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
- else
- vectorization_factor = least_common_multiple (vectorization_factor,
- LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
- LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump,
@@ -1410,7 +1422,7 @@ static bool
static bool
vect_analyze_loop_2 (loop_vec_info loop_vinfo)
{
- bool ok, dummy;
+ bool ok, dummy, slp = false;
int max_vf = MAX_VECTORIZATION_FACTOR;
int min_vf = 2;
@@ -1524,7 +1536,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
if (ok)
{
/* Decide which possible SLP instances to SLP. */
- vect_make_slp_decision (loop_vinfo);
+ slp = vect_make_slp_decision (loop_vinfo);
/* Find stmts that need to be both vectorized and SLPed. */
vect_detect_hybrid_slp (loop_vinfo);
@@ -1533,7 +1545,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
/* Scan all the operations in the loop and make sure they are
vectorizable. */
- ok = vect_analyze_loop_operations (loop_vinfo);
+ ok = vect_analyze_loop_operations (loop_vinfo, slp);
if (!ok)
{
if (vect_print_dump_info (REPORT_DETAILS))
@@ -4136,7 +4148,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_i
if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
return false;
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
===================================================================
@@ -1747,7 +1747,7 @@ vectorizable_conversion (gimple stmt, gimple_stmt_
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
/* Sanity check: make sure that at least one copy of the vectorized stmt
@@ -1940,7 +1940,7 @@ vectorizable_assignment (gimple stmt, gimple_stmt_
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -2149,7 +2149,7 @@ vectorizable_shift (gimple stmt, gimple_stmt_itera
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2497,7 +2497,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_i
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -2895,7 +2895,7 @@ vectorizable_type_demotion (gimple stmt, gimple_st
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
@@ -3175,7 +3175,7 @@ vectorizable_type_promotion (gimple stmt, gimple_s
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp_node)
+ if (slp_node || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
@@ -3358,7 +3358,7 @@ vectorizable_store (gimple stmt, gimple_stmt_itera
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp)
+ if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -3851,7 +3851,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterat
/* Multiple types in SLP are handled by creating the appropriate number of
vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
case of SLP. */
- if (slp)
+ if (slp || PURE_SLP_STMT (stmt_info))
ncopies = 1;
else
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
@@ -4457,6 +4457,10 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
/* FORNOW: unsupported in basic block SLP. */
gcc_assert (loop_vinfo);
+ /* FORNOW: SLP not supported. */
+ if (STMT_SLP_TYPE (stmt_info))
+ return false;
+
gcc_assert (ncopies >= 1);
if (reduc_index && ncopies > 1)
return false; /* FORNOW */
@@ -4469,10 +4473,6 @@ vectorizable_condition (gimple stmt, gimple_stmt_i
&& reduc_def))
return false;
- /* FORNOW: SLP not supported. */
- if (STMT_SLP_TYPE (stmt_info))
- return false;
-
/* FORNOW: not yet supported. */
if (STMT_VINFO_LIVE_P (stmt_info))
{
===================================================================
@@ -1351,9 +1351,10 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec
/* For each possible SLP instance decide whether to SLP it and calculate overall
- unrolling factor needed to SLP the loop. */
+ unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
+ least one instance. */
-void
+bool
vect_make_slp_decision (loop_vec_info loop_vinfo)
{
unsigned int i, unrolling_factor = 1;
@@ -1382,6 +1383,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
decided_to_slp, unrolling_factor);
+
+ return (decided_to_slp > 0);
}