diff mbox

Use base inequality for some vector alias checks

Message ID 87pofqs70q.fsf@linaro.org
State New
Headers show

Commit Message

Richard Sandiford May 3, 2017, 8:01 a.m. UTC
This patch checks whether two data references x and y cannot
partially overlap and so are independent whenever &x != &y.
We can then use this in the vectoriser to optimise alias checks.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Thanks,
Richard


gcc/
2016-05-03  Richard Sandiford  <richard.sandiford@linaro.org>

	* hash-traits.h (pair_hash): New struct.
	* tree-data-ref.h (data_dependence_relation): Add object_a and
	object_b fields.
	(DDR_OBJECT_A, DDR_OBJECT_B): New macros.
	* tree-data-ref.c (initialize_data_dependence_relation): Initialize
	DDR_OBJECT_A and DDR_OBJECT_B.
	* tree-vectorizer.h (vec_object_pair): New type.
	(_loop_vec_info): Add a check_unequal_addrs field.
	(LOOP_VINFO_CHECK_UNEQUAL_ADDRS): New macro.
	(LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Return true if there is an
	entry in check_unequal_addrs.  Check comp_alias_ddrs instead of
	may_alias_ddrs.
	* tree-vect-loop.c (destroy_loop_vec_info): Release
	LOOP_VINFO_CHECK_UNEQUAL_ADDRS.
	(vect_analyze_loop_2): Likewise, when restarting.
	(vect_estimate_min_profitable_iters): Estimate the cost of
	LOOP_VINFO_CHECK_UNEQUAL_ADDRS.
	* tree-vect-data-refs.c: Include tree-hash-traits.h.
	(vect_prune_runtime_alias_test_list): Try to handle conflicts
	using LOOP_VINFO_CHECK_UNEQUAL_ADDRS, if the data dependence allows.
	Count such tests in the final summary.
	* tree-vect-loop-manip.c (chain_cond_expr): New function.
	(vect_create_cond_for_align_checks): Use it.
	(vect_create_cond_for_alias_checks): Likewise.
	(vect_create_cond_for_unequal_addrs): New function.
	(vect_loop_versioning): Call it.

gcc/testsuite/
	* gcc.dg/vect/vect-alias-check-6.c: New test.

Comments

Richard Sandiford May 31, 2017, 6:56 a.m. UTC | #1
Ping

Richard Sandiford <richard.sandiford@linaro.org> writes:
> This patch checks whether two data references x and y cannot

> partially overlap and so are independent whenever &x != &y.

> We can then use this in the vectoriser to optimise alias checks.

>

> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

>

> Thanks,

> Richard

>

>

> gcc/

> 2016-05-03  Richard Sandiford  <richard.sandiford@linaro.org>

>

> 	* hash-traits.h (pair_hash): New struct.

> 	* tree-data-ref.h (data_dependence_relation): Add object_a and

> 	object_b fields.

> 	(DDR_OBJECT_A, DDR_OBJECT_B): New macros.

> 	* tree-data-ref.c (initialize_data_dependence_relation): Initialize

> 	DDR_OBJECT_A and DDR_OBJECT_B.

> 	* tree-vectorizer.h (vec_object_pair): New type.

> 	(_loop_vec_info): Add a check_unequal_addrs field.

> 	(LOOP_VINFO_CHECK_UNEQUAL_ADDRS): New macro.

> 	(LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Return true if there is an

> 	entry in check_unequal_addrs.  Check comp_alias_ddrs instead of

> 	may_alias_ddrs.

> 	* tree-vect-loop.c (destroy_loop_vec_info): Release

> 	LOOP_VINFO_CHECK_UNEQUAL_ADDRS.

> 	(vect_analyze_loop_2): Likewise, when restarting.

> 	(vect_estimate_min_profitable_iters): Estimate the cost of

> 	LOOP_VINFO_CHECK_UNEQUAL_ADDRS.

> 	* tree-vect-data-refs.c: Include tree-hash-traits.h.

> 	(vect_prune_runtime_alias_test_list): Try to handle conflicts

> 	using LOOP_VINFO_CHECK_UNEQUAL_ADDRS, if the data dependence allows.

> 	Count such tests in the final summary.

> 	* tree-vect-loop-manip.c (chain_cond_expr): New function.

> 	(vect_create_cond_for_align_checks): Use it.

> 	(vect_create_cond_for_alias_checks): Likewise.

> 	(vect_create_cond_for_unequal_addrs): New function.

> 	(vect_loop_versioning): Call it.

>

> gcc/testsuite/

> 	* gcc.dg/vect/vect-alias-check-6.c: New test.

>

> Index: gcc/hash-traits.h

> ===================================================================

> --- gcc/hash-traits.h	2017-02-23 19:54:15.000000000 +0000

> +++ gcc/hash-traits.h	2017-05-03 08:48:53.312035228 +0100

> @@ -301,6 +301,76 @@ struct ggc_cache_ptr_hash : pointer_hash

>  

>  struct nofree_string_hash : string_hash, typed_noop_remove <const char *> {};

>  

> +/* Traits for pairs of values, using the first to record empty and

> +   deleted slots.  */

> +

> +template <typename T1, typename T2>

> +struct pair_hash

> +{

> +  typedef std::pair <typename T1::value_type,

> +		     typename T2::value_type> value_type;

> +  typedef std::pair <typename T1::compare_type,

> +		     typename T2::compare_type> compare_type;

> +

> +  static inline hashval_t hash (const value_type &);

> +  static inline bool equal (const value_type &, const compare_type &);

> +  static inline void remove (value_type &);

> +  static inline void mark_deleted (value_type &);

> +  static inline void mark_empty (value_type &);

> +  static inline bool is_deleted (const value_type &);

> +  static inline bool is_empty (const value_type &);

> +};

> +

> +template <typename T1, typename T2>

> +inline hashval_t

> +pair_hash <T1, T2>::hash (const value_type &x)

> +{

> +  return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second));

> +}

> +

> +template <typename T1, typename T2>

> +inline bool

> +pair_hash <T1, T2>::equal (const value_type &x, const compare_type &y)

> +{

> +  return T1::equal (x.first, y.first) && T2::equal (x.second, y.second);

> +}

> +

> +template <typename T1, typename T2>

> +inline void

> +pair_hash <T1, T2>::remove (value_type &x)

> +{

> +  T1::remove (x.first);

> +  T2::remove (x.second);

> +}

> +

> +template <typename T1, typename T2>

> +inline void

> +pair_hash <T1, T2>::mark_deleted (value_type &x)

> +{

> +  T1::mark_deleted (x.first);

> +}

> +

> +template <typename T1, typename T2>

> +inline void

> +pair_hash <T1, T2>::mark_empty (value_type &x)

> +{

> +  T1::mark_empty (x.first);

> +}

> +

> +template <typename T1, typename T2>

> +inline bool

> +pair_hash <T1, T2>::is_deleted (const value_type &x)

> +{

> +  return T1::is_deleted (x.first);

> +}

> +

> +template <typename T1, typename T2>

> +inline bool

> +pair_hash <T1, T2>::is_empty (const value_type &x)

> +{

> +  return T1::is_empty (x.first);

> +}

> +

>  template <typename T> struct default_hash_traits : T {};

>  

>  template <typename T>

> Index: gcc/tree-data-ref.h

> ===================================================================

> --- gcc/tree-data-ref.h	2017-05-03 08:48:48.737038502 +0100

> +++ gcc/tree-data-ref.h	2017-05-03 08:48:53.313041828 +0100

> @@ -240,6 +240,13 @@ struct data_dependence_relation

>         but the analyzer cannot be more specific.  */

>    tree are_dependent;

>  

> +  /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are

> +     independent when the runtime addresses of OBJECT_A and OBJECT_B

> +     are different.  The addresses of both objects are invariant in the

> +     loop nest.  */

> +  tree object_a;

> +  tree object_b;

> +

>    /* For each subscript in the dependence test, there is an element in

>       this array.  This is the attribute that labels the edge A->B of

>       the data_dependence_relation.  */

> @@ -303,6 +310,8 @@ #define DDR_A(DDR) (DDR)->a

>  #define DDR_B(DDR) (DDR)->b

>  #define DDR_AFFINE_P(DDR) (DDR)->affine_p

>  #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent

> +#define DDR_OBJECT_A(DDR) (DDR)->object_a

> +#define DDR_OBJECT_B(DDR) (DDR)->object_b

>  #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts

>  #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I]

>  #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length ()

> Index: gcc/tree-data-ref.c

> ===================================================================

> --- gcc/tree-data-ref.c	2017-05-03 08:48:48.737038502 +0100

> +++ gcc/tree-data-ref.c	2017-05-03 08:48:53.313041828 +0100

> @@ -1715,6 +1715,15 @@ initialize_data_dependence_relation (str

>  	}

>  

>        DDR_COULD_BE_INDEPENDENT_P (res) = true;

> +      if (!loop_nest.exists ()

> +	  || (object_address_invariant_in_loop_p (loop_nest[0],

> +						  struct_ref_a)

> +	      && object_address_invariant_in_loop_p (loop_nest[0],

> +						     struct_ref_b)))

> +	{

> +	  DDR_OBJECT_A (res) = struct_ref_a;

> +	  DDR_OBJECT_B (res) = struct_ref_b;

> +	}

>      }

>  

>    DDR_AFFINE_P (res) = true;

> Index: gcc/tree-vectorizer.h

> ===================================================================

> --- gcc/tree-vectorizer.h	2017-05-03 08:48:48.738045102 +0100

> +++ gcc/tree-vectorizer.h	2017-05-03 08:48:53.315055028 +0100

> @@ -174,6 +174,10 @@ struct dr_with_seg_len_pair_t

>  

>  

>  

> +/* Describes two objects whose addresses must be unequal for the vectorized

> +   loop to be valid.  */

> +typedef std::pair<tree, tree> vec_object_pair;

> +

>  /* Vectorizer state common between loop and basic-block vectorization.  */

>  struct vec_info {

>    enum { bb, loop } kind;

> @@ -270,6 +274,9 @@ typedef struct _loop_vec_info : public v

>       lengths from which the run-time aliasing check is built.  */

>    vec<dr_with_seg_len_pair_t> comp_alias_ddrs;

>  

> +  /* Check that the addresses of each pair of objects is unequal.  */

> +  vec<vec_object_pair> check_unequal_addrs;

> +

>    /* Statements in the loop that have data references that are candidates for a

>       runtime (loop versioning) misalignment check.  */

>    vec<gimple *> may_misalign_stmts;

> @@ -364,6 +371,7 @@ #define LOOP_VINFO_UNALIGNED_DR(L)

>  #define LOOP_VINFO_MAY_MISALIGN_STMTS(L)   (L)->may_misalign_stmts

>  #define LOOP_VINFO_MAY_ALIAS_DDRS(L)       (L)->may_alias_ddrs

>  #define LOOP_VINFO_COMP_ALIAS_DDRS(L)      (L)->comp_alias_ddrs

> +#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L)  (L)->check_unequal_addrs

>  #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores

>  #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances

>  #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor

> @@ -383,7 +391,8 @@ #define LOOP_VINFO_ORIG_LOOP_INFO(L)

>  #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L)	\

>    ((L)->may_misalign_stmts.length () > 0)

>  #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L)		\

> -  ((L)->comp_alias_ddrs.length () > 0)

> +  ((L)->comp_alias_ddrs.length () > 0 \

> +   || (L)->check_unequal_addrs.length () > 0)

>  #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L)		\

>    (LOOP_VINFO_NITERS_ASSUMPTIONS (L))

>  #define LOOP_REQUIRES_VERSIONING(L)			\

> Index: gcc/tree-vect-loop.c

> ===================================================================

> --- gcc/tree-vect-loop.c	2017-04-18 19:52:35.059158859 +0100

> +++ gcc/tree-vect-loop.c	2017-05-03 08:48:53.315055028 +0100

> @@ -1275,6 +1275,8 @@ destroy_loop_vec_info (loop_vec_info loo

>    destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));

>    loop_vinfo->scalar_cost_vec.release ();

>  

> +  LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();

> +

>    free (loop_vinfo);

>    loop->aux = NULL;

>  }

> @@ -2299,6 +2301,7 @@ vect_analyze_loop_2 (loop_vec_info loop_

>      }

>    /* Free optimized alias test DDRS.  */

>    LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();

> +  LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();

>    /* Reset target cost data.  */

>    destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));

>    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)

> @@ -3261,6 +3264,11 @@ vect_estimate_min_profitable_iters (loop

>        unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();

>        (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,

>  			    vect_prologue);

> +      len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();

> +      if (len)

> +	/* Count LEN - 1 ANDs and LEN comparisons.  */

> +	(void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt,

> +			      NULL, 0, vect_prologue);

>        dump_printf (MSG_NOTE,

>                     "cost model: Adding cost of checks for loop "

>                     "versioning aliasing.\n");

> Index: gcc/tree-vect-data-refs.c

> ===================================================================

> --- gcc/tree-vect-data-refs.c	2017-05-03 08:48:48.738045102 +0100

> +++ gcc/tree-vect-data-refs.c	2017-05-03 08:48:53.314048428 +0100

> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o

>  #include "expr.h"

>  #include "builtins.h"

>  #include "params.h"

> +#include "tree-hash-traits.h"

>  

>  /* Return true if load- or store-lanes optab OPTAB is implemented for

>     COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */

> @@ -3085,10 +3086,14 @@ dependence_distance_ge_vf (data_dependen

>  bool

>  vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)

>  {

> -  vec<ddr_p> may_alias_ddrs =

> -    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);

> -  vec<dr_with_seg_len_pair_t>& comp_alias_ddrs =

> -    LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);

> +  typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;

> +  hash_set <tree_pair_hash> compared_objects;

> +

> +  vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);

> +  vec<dr_with_seg_len_pair_t> &comp_alias_ddrs

> +    = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);

> +  vec<vec_object_pair> &check_unequal_addrs

> +    = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);

>    int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);

>    tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);

>  

> @@ -3151,6 +3156,24 @@ vect_prune_runtime_alias_test_list (loop

>        if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))

>  	continue;

>  

> +      if (DDR_OBJECT_A (ddr))

> +	{

> +	  vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));

> +	  if (!compared_objects.add (new_pair))

> +	    {

> +	      if (dump_enabled_p ())

> +		{

> +		  dump_printf_loc (MSG_NOTE, vect_location, "checking that ");

> +		  dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first);

> +		  dump_printf (MSG_NOTE, " and ");

> +		  dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second);

> +		  dump_printf (MSG_NOTE, " have different addresses\n");

> +		}

> +	      LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);

> +	    }

> +	  continue;

> +	}

> +

>        dr_a = DDR_A (ddr);

>        stmt_a = DR_STMT (DDR_A (ddr));

>        dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));

> @@ -3346,11 +3369,12 @@ vect_prune_runtime_alias_test_list (loop

>  	}

>      }

>  

> +  unsigned int count = (comp_alias_ddrs.length ()

> +			+ check_unequal_addrs.length ());

>    dump_printf_loc (MSG_NOTE, vect_location,

>  		   "improved number of alias checks from %d to %d\n",

> -		   may_alias_ddrs.length (), comp_alias_ddrs.length ());

> -  if ((int) comp_alias_ddrs.length () >

> -      PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))

> +		   may_alias_ddrs.length (), count);

> +  if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))

>      {

>        if (dump_enabled_p ())

>  	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> Index: gcc/tree-vect-loop-manip.c

> ===================================================================

> --- gcc/tree-vect-loop-manip.c	2017-04-18 19:52:34.027608884 +0100

> +++ gcc/tree-vect-loop-manip.c	2017-05-03 08:48:53.314048428 +0100

> @@ -1926,6 +1926,20 @@ vect_create_cond_for_niters_checks (loop

>      *cond_expr = part_cond_expr;

>  }

>  

> +/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR

> +   and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */

> +

> +static void

> +chain_cond_expr (tree *cond_expr, tree part_cond_expr)

> +{

> +  if (*cond_expr)

> +    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,

> +			      *cond_expr, part_cond_expr);

> +  else

> +    *cond_expr = part_cond_expr;

> +}

> +

> +

>  /* Function vect_create_cond_for_align_checks.

>  

>     Create a conditional expression that represents the alignment checks for

> @@ -2037,13 +2051,32 @@ vect_create_cond_for_align_checks (loop_

>    ptrsize_zero = build_int_cst (int_ptrsize_type, 0);

>    part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,

>  				and_tmp_name, ptrsize_zero);

> -  if (*cond_expr)

> -    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,

> -			      *cond_expr, part_cond_expr);

> -  else

> -    *cond_expr = part_cond_expr;

> +  chain_cond_expr (cond_expr, part_cond_expr);

>  }

>  

> +

> +/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,

> +   create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).

> +   Set *COND_EXPR to a tree that is true when both the original *COND_EXPR

> +   and this new condition are true.  Treat a null *COND_EXPR as "true".  */

> +

> +static void

> +vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)

> +{

> +  vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);

> +  unsigned int i;

> +  vec_object_pair *pair;

> +  FOR_EACH_VEC_ELT (pairs, i, pair)

> +    {

> +      tree addr1 = build_fold_addr_expr (pair->first);

> +      tree addr2 = build_fold_addr_expr (pair->second);

> +      tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,

> +					 addr1, addr2);

> +      chain_cond_expr (cond_expr, part_cond_expr);

> +    }

> +}

> +

> +

>  /* Given two data references and segment lengths described by DR_A and DR_B,

>     create expression checking if the two addresses ranges intersect with

>     each other based on index of the two addresses.  This can only be done

> @@ -2280,11 +2313,7 @@ vect_create_cond_for_alias_checks (loop_

>  

>        /* Create condition expression for each pair data references.  */

>        create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);

> -      if (*cond_expr)

> -	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,

> -				  *cond_expr, part_cond_expr);

> -      else

> -	*cond_expr = part_cond_expr;

> +      chain_cond_expr (cond_expr, part_cond_expr);

>      }

>  

>    if (dump_enabled_p ())

> @@ -2353,7 +2382,10 @@ vect_loop_versioning (loop_vec_info loop

>  				       &cond_expr_stmt_list);

>  

>    if (version_alias)

> -    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);

> +    {

> +      vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);

> +      vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);

> +    }

>  

>    cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,

>  				      is_gimple_condexpr, NULL_TREE);

> Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c

> ===================================================================

> --- /dev/null	2017-05-03 08:16:26.972699664 +0100

> +++ gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c	2017-05-03 08:48:53.312035228 +0100

> @@ -0,0 +1,23 @@

> +/* { dg-do compile } */

> +/* { dg-require-effective-target vect_int } */

> +

> +#define N 16

> +

> +struct s { int x[N]; };

> +

> +void

> +f1 (struct s *a, struct s *b)

> +{

> +  for (int i = 0; i < N - 1; ++i)

> +    a->x[i + 1] += b->x[i];

> +}

> +

> +void

> +f2 (struct s *a, struct s *b)

> +{

> +  for (int i = 0; i < N; ++i)

> +    a->x[i] += b->x[N - i - 1];

> +}

> +

> +/* { dg-final { scan-tree-dump-times {checking that [^\n]* and [^\n]* have different addresses} 2 "vect" } } */

> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
Richard Biener June 7, 2017, 10:10 a.m. UTC | #2
On Wed, May 31, 2017 at 8:56 AM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> Ping

>

> Richard Sandiford <richard.sandiford@linaro.org> writes:

>> This patch checks whether two data references x and y cannot

>> partially overlap and so are independent whenever &x != &y.

>> We can then use this in the vectoriser to optimise alias checks.

>>

>> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?


Looks good to me.  Probably needs refactoring now if Bin was faster
with factoring out the machinery to elsewhere.

Thanks,
Richard.

>> Thanks,

>> Richard

>>

>>

>> gcc/

>> 2016-05-03  Richard Sandiford  <richard.sandiford@linaro.org>

>>

>>       * hash-traits.h (pair_hash): New struct.

>>       * tree-data-ref.h (data_dependence_relation): Add object_a and

>>       object_b fields.

>>       (DDR_OBJECT_A, DDR_OBJECT_B): New macros.

>>       * tree-data-ref.c (initialize_data_dependence_relation): Initialize

>>       DDR_OBJECT_A and DDR_OBJECT_B.

>>       * tree-vectorizer.h (vec_object_pair): New type.

>>       (_loop_vec_info): Add a check_unequal_addrs field.

>>       (LOOP_VINFO_CHECK_UNEQUAL_ADDRS): New macro.

>>       (LOOP_REQUIRES_VERSIONING_FOR_ALIAS): Return true if there is an

>>       entry in check_unequal_addrs.  Check comp_alias_ddrs instead of

>>       may_alias_ddrs.

>>       * tree-vect-loop.c (destroy_loop_vec_info): Release

>>       LOOP_VINFO_CHECK_UNEQUAL_ADDRS.

>>       (vect_analyze_loop_2): Likewise, when restarting.

>>       (vect_estimate_min_profitable_iters): Estimate the cost of

>>       LOOP_VINFO_CHECK_UNEQUAL_ADDRS.

>>       * tree-vect-data-refs.c: Include tree-hash-traits.h.

>>       (vect_prune_runtime_alias_test_list): Try to handle conflicts

>>       using LOOP_VINFO_CHECK_UNEQUAL_ADDRS, if the data dependence allows.

>>       Count such tests in the final summary.

>>       * tree-vect-loop-manip.c (chain_cond_expr): New function.

>>       (vect_create_cond_for_align_checks): Use it.

>>       (vect_create_cond_for_alias_checks): Likewise.

>>       (vect_create_cond_for_unequal_addrs): New function.

>>       (vect_loop_versioning): Call it.

>>

>> gcc/testsuite/

>>       * gcc.dg/vect/vect-alias-check-6.c: New test.

>>

>> Index: gcc/hash-traits.h

>> ===================================================================

>> --- gcc/hash-traits.h 2017-02-23 19:54:15.000000000 +0000

>> +++ gcc/hash-traits.h 2017-05-03 08:48:53.312035228 +0100

>> @@ -301,6 +301,76 @@ struct ggc_cache_ptr_hash : pointer_hash

>>

>>  struct nofree_string_hash : string_hash, typed_noop_remove <const char *> {};

>>

>> +/* Traits for pairs of values, using the first to record empty and

>> +   deleted slots.  */

>> +

>> +template <typename T1, typename T2>

>> +struct pair_hash

>> +{

>> +  typedef std::pair <typename T1::value_type,

>> +                  typename T2::value_type> value_type;

>> +  typedef std::pair <typename T1::compare_type,

>> +                  typename T2::compare_type> compare_type;

>> +

>> +  static inline hashval_t hash (const value_type &);

>> +  static inline bool equal (const value_type &, const compare_type &);

>> +  static inline void remove (value_type &);

>> +  static inline void mark_deleted (value_type &);

>> +  static inline void mark_empty (value_type &);

>> +  static inline bool is_deleted (const value_type &);

>> +  static inline bool is_empty (const value_type &);

>> +};

>> +

>> +template <typename T1, typename T2>

>> +inline hashval_t

>> +pair_hash <T1, T2>::hash (const value_type &x)

>> +{

>> +  return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second));

>> +}

>> +

>> +template <typename T1, typename T2>

>> +inline bool

>> +pair_hash <T1, T2>::equal (const value_type &x, const compare_type &y)

>> +{

>> +  return T1::equal (x.first, y.first) && T2::equal (x.second, y.second);

>> +}

>> +

>> +template <typename T1, typename T2>

>> +inline void

>> +pair_hash <T1, T2>::remove (value_type &x)

>> +{

>> +  T1::remove (x.first);

>> +  T2::remove (x.second);

>> +}

>> +

>> +template <typename T1, typename T2>

>> +inline void

>> +pair_hash <T1, T2>::mark_deleted (value_type &x)

>> +{

>> +  T1::mark_deleted (x.first);

>> +}

>> +

>> +template <typename T1, typename T2>

>> +inline void

>> +pair_hash <T1, T2>::mark_empty (value_type &x)

>> +{

>> +  T1::mark_empty (x.first);

>> +}

>> +

>> +template <typename T1, typename T2>

>> +inline bool

>> +pair_hash <T1, T2>::is_deleted (const value_type &x)

>> +{

>> +  return T1::is_deleted (x.first);

>> +}

>> +

>> +template <typename T1, typename T2>

>> +inline bool

>> +pair_hash <T1, T2>::is_empty (const value_type &x)

>> +{

>> +  return T1::is_empty (x.first);

>> +}

>> +

>>  template <typename T> struct default_hash_traits : T {};

>>

>>  template <typename T>

>> Index: gcc/tree-data-ref.h

>> ===================================================================

>> --- gcc/tree-data-ref.h       2017-05-03 08:48:48.737038502 +0100

>> +++ gcc/tree-data-ref.h       2017-05-03 08:48:53.313041828 +0100

>> @@ -240,6 +240,13 @@ struct data_dependence_relation

>>         but the analyzer cannot be more specific.  */

>>    tree are_dependent;

>>

>> +  /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are

>> +     independent when the runtime addresses of OBJECT_A and OBJECT_B

>> +     are different.  The addresses of both objects are invariant in the

>> +     loop nest.  */

>> +  tree object_a;

>> +  tree object_b;

>> +

>>    /* For each subscript in the dependence test, there is an element in

>>       this array.  This is the attribute that labels the edge A->B of

>>       the data_dependence_relation.  */

>> @@ -303,6 +310,8 @@ #define DDR_A(DDR) (DDR)->a

>>  #define DDR_B(DDR) (DDR)->b

>>  #define DDR_AFFINE_P(DDR) (DDR)->affine_p

>>  #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent

>> +#define DDR_OBJECT_A(DDR) (DDR)->object_a

>> +#define DDR_OBJECT_B(DDR) (DDR)->object_b

>>  #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts

>>  #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I]

>>  #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length ()

>> Index: gcc/tree-data-ref.c

>> ===================================================================

>> --- gcc/tree-data-ref.c       2017-05-03 08:48:48.737038502 +0100

>> +++ gcc/tree-data-ref.c       2017-05-03 08:48:53.313041828 +0100

>> @@ -1715,6 +1715,15 @@ initialize_data_dependence_relation (str

>>       }

>>

>>        DDR_COULD_BE_INDEPENDENT_P (res) = true;

>> +      if (!loop_nest.exists ()

>> +       || (object_address_invariant_in_loop_p (loop_nest[0],

>> +                                               struct_ref_a)

>> +           && object_address_invariant_in_loop_p (loop_nest[0],

>> +                                                  struct_ref_b)))

>> +     {

>> +       DDR_OBJECT_A (res) = struct_ref_a;

>> +       DDR_OBJECT_B (res) = struct_ref_b;

>> +     }

>>      }

>>

>>    DDR_AFFINE_P (res) = true;

>> Index: gcc/tree-vectorizer.h

>> ===================================================================

>> --- gcc/tree-vectorizer.h     2017-05-03 08:48:48.738045102 +0100

>> +++ gcc/tree-vectorizer.h     2017-05-03 08:48:53.315055028 +0100

>> @@ -174,6 +174,10 @@ struct dr_with_seg_len_pair_t

>>

>>

>>

>> +/* Describes two objects whose addresses must be unequal for the vectorized

>> +   loop to be valid.  */

>> +typedef std::pair<tree, tree> vec_object_pair;

>> +

>>  /* Vectorizer state common between loop and basic-block vectorization.  */

>>  struct vec_info {

>>    enum { bb, loop } kind;

>> @@ -270,6 +274,9 @@ typedef struct _loop_vec_info : public v

>>       lengths from which the run-time aliasing check is built.  */

>>    vec<dr_with_seg_len_pair_t> comp_alias_ddrs;

>>

>> +  /* Check that the addresses of each pair of objects is unequal.  */

>> +  vec<vec_object_pair> check_unequal_addrs;

>> +

>>    /* Statements in the loop that have data references that are candidates for a

>>       runtime (loop versioning) misalignment check.  */

>>    vec<gimple *> may_misalign_stmts;

>> @@ -364,6 +371,7 @@ #define LOOP_VINFO_UNALIGNED_DR(L)

>>  #define LOOP_VINFO_MAY_MISALIGN_STMTS(L)   (L)->may_misalign_stmts

>>  #define LOOP_VINFO_MAY_ALIAS_DDRS(L)       (L)->may_alias_ddrs

>>  #define LOOP_VINFO_COMP_ALIAS_DDRS(L)      (L)->comp_alias_ddrs

>> +#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L)  (L)->check_unequal_addrs

>>  #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores

>>  #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances

>>  #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor

>> @@ -383,7 +391,8 @@ #define LOOP_VINFO_ORIG_LOOP_INFO(L)

>>  #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L)    \

>>    ((L)->may_misalign_stmts.length () > 0)

>>  #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L)                \

>> -  ((L)->comp_alias_ddrs.length () > 0)

>> +  ((L)->comp_alias_ddrs.length () > 0 \

>> +   || (L)->check_unequal_addrs.length () > 0)

>>  #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L)               \

>>    (LOOP_VINFO_NITERS_ASSUMPTIONS (L))

>>  #define LOOP_REQUIRES_VERSIONING(L)                  \

>> Index: gcc/tree-vect-loop.c

>> ===================================================================

>> --- gcc/tree-vect-loop.c      2017-04-18 19:52:35.059158859 +0100

>> +++ gcc/tree-vect-loop.c      2017-05-03 08:48:53.315055028 +0100

>> @@ -1275,6 +1275,8 @@ destroy_loop_vec_info (loop_vec_info loo

>>    destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));

>>    loop_vinfo->scalar_cost_vec.release ();

>>

>> +  LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();

>> +

>>    free (loop_vinfo);

>>    loop->aux = NULL;

>>  }

>> @@ -2299,6 +2301,7 @@ vect_analyze_loop_2 (loop_vec_info loop_

>>      }

>>    /* Free optimized alias test DDRS.  */

>>    LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();

>> +  LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();

>>    /* Reset target cost data.  */

>>    destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));

>>    LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)

>> @@ -3261,6 +3264,11 @@ vect_estimate_min_profitable_iters (loop

>>        unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();

>>        (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,

>>                           vect_prologue);

>> +      len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();

>> +      if (len)

>> +     /* Count LEN - 1 ANDs and LEN comparisons.  */

>> +     (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt,

>> +                           NULL, 0, vect_prologue);

>>        dump_printf (MSG_NOTE,

>>                     "cost model: Adding cost of checks for loop "

>>                     "versioning aliasing.\n");

>> Index: gcc/tree-vect-data-refs.c

>> ===================================================================

>> --- gcc/tree-vect-data-refs.c 2017-05-03 08:48:48.738045102 +0100

>> +++ gcc/tree-vect-data-refs.c 2017-05-03 08:48:53.314048428 +0100

>> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o

>>  #include "expr.h"

>>  #include "builtins.h"

>>  #include "params.h"

>> +#include "tree-hash-traits.h"

>>

>>  /* Return true if load- or store-lanes optab OPTAB is implemented for

>>     COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */

>> @@ -3085,10 +3086,14 @@ dependence_distance_ge_vf (data_dependen

>>  bool

>>  vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)

>>  {

>> -  vec<ddr_p> may_alias_ddrs =

>> -    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);

>> -  vec<dr_with_seg_len_pair_t>& comp_alias_ddrs =

>> -    LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);

>> +  typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;

>> +  hash_set <tree_pair_hash> compared_objects;

>> +

>> +  vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);

>> +  vec<dr_with_seg_len_pair_t> &comp_alias_ddrs

>> +    = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);

>> +  vec<vec_object_pair> &check_unequal_addrs

>> +    = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);

>>    int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);

>>    tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);

>>

>> @@ -3151,6 +3156,24 @@ vect_prune_runtime_alias_test_list (loop

>>        if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))

>>       continue;

>>

>> +      if (DDR_OBJECT_A (ddr))

>> +     {

>> +       vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));

>> +       if (!compared_objects.add (new_pair))

>> +         {

>> +           if (dump_enabled_p ())

>> +             {

>> +               dump_printf_loc (MSG_NOTE, vect_location, "checking that ");

>> +               dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first);

>> +               dump_printf (MSG_NOTE, " and ");

>> +               dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second);

>> +               dump_printf (MSG_NOTE, " have different addresses\n");

>> +             }

>> +           LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);

>> +         }

>> +       continue;

>> +     }

>> +

>>        dr_a = DDR_A (ddr);

>>        stmt_a = DR_STMT (DDR_A (ddr));

>>        dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));

>> @@ -3346,11 +3369,12 @@ vect_prune_runtime_alias_test_list (loop

>>       }

>>      }

>>

>> +  unsigned int count = (comp_alias_ddrs.length ()

>> +                     + check_unequal_addrs.length ());

>>    dump_printf_loc (MSG_NOTE, vect_location,

>>                  "improved number of alias checks from %d to %d\n",

>> -                may_alias_ddrs.length (), comp_alias_ddrs.length ());

>> -  if ((int) comp_alias_ddrs.length () >

>> -      PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))

>> +                may_alias_ddrs.length (), count);

>> +  if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))

>>      {

>>        if (dump_enabled_p ())

>>       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

>> Index: gcc/tree-vect-loop-manip.c

>> ===================================================================

>> --- gcc/tree-vect-loop-manip.c        2017-04-18 19:52:34.027608884 +0100

>> +++ gcc/tree-vect-loop-manip.c        2017-05-03 08:48:53.314048428 +0100

>> @@ -1926,6 +1926,20 @@ vect_create_cond_for_niters_checks (loop

>>      *cond_expr = part_cond_expr;

>>  }

>>

>> +/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR

>> +   and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */

>> +

>> +static void

>> +chain_cond_expr (tree *cond_expr, tree part_cond_expr)

>> +{

>> +  if (*cond_expr)

>> +    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,

>> +                           *cond_expr, part_cond_expr);

>> +  else

>> +    *cond_expr = part_cond_expr;

>> +}

>> +

>> +

>>  /* Function vect_create_cond_for_align_checks.

>>

>>     Create a conditional expression that represents the alignment checks for

>> @@ -2037,13 +2051,32 @@ vect_create_cond_for_align_checks (loop_

>>    ptrsize_zero = build_int_cst (int_ptrsize_type, 0);

>>    part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,

>>                               and_tmp_name, ptrsize_zero);

>> -  if (*cond_expr)

>> -    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,

>> -                           *cond_expr, part_cond_expr);

>> -  else

>> -    *cond_expr = part_cond_expr;

>> +  chain_cond_expr (cond_expr, part_cond_expr);

>>  }

>>

>> +

>> +/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,

>> +   create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).

>> +   Set *COND_EXPR to a tree that is true when both the original *COND_EXPR

>> +   and this new condition are true.  Treat a null *COND_EXPR as "true".  */

>> +

>> +static void

>> +vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)

>> +{

>> +  vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);

>> +  unsigned int i;

>> +  vec_object_pair *pair;

>> +  FOR_EACH_VEC_ELT (pairs, i, pair)

>> +    {

>> +      tree addr1 = build_fold_addr_expr (pair->first);

>> +      tree addr2 = build_fold_addr_expr (pair->second);

>> +      tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,

>> +                                      addr1, addr2);

>> +      chain_cond_expr (cond_expr, part_cond_expr);

>> +    }

>> +}

>> +

>> +

>>  /* Given two data references and segment lengths described by DR_A and DR_B,

>>     create expression checking if the two addresses ranges intersect with

>>     each other based on index of the two addresses.  This can only be done

>> @@ -2280,11 +2313,7 @@ vect_create_cond_for_alias_checks (loop_

>>

>>        /* Create condition expression for each pair data references.  */

>>        create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);

>> -      if (*cond_expr)

>> -     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,

>> -                               *cond_expr, part_cond_expr);

>> -      else

>> -     *cond_expr = part_cond_expr;

>> +      chain_cond_expr (cond_expr, part_cond_expr);

>>      }

>>

>>    if (dump_enabled_p ())

>> @@ -2353,7 +2382,10 @@ vect_loop_versioning (loop_vec_info loop

>>                                      &cond_expr_stmt_list);

>>

>>    if (version_alias)

>> -    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);

>> +    {

>> +      vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);

>> +      vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);

>> +    }

>>

>>    cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,

>>                                     is_gimple_condexpr, NULL_TREE);

>> Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c

>> ===================================================================

>> --- /dev/null 2017-05-03 08:16:26.972699664 +0100

>> +++ gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c    2017-05-03 08:48:53.312035228 +0100

>> @@ -0,0 +1,23 @@

>> +/* { dg-do compile } */

>> +/* { dg-require-effective-target vect_int } */

>> +

>> +#define N 16

>> +

>> +struct s { int x[N]; };

>> +

>> +void

>> +f1 (struct s *a, struct s *b)

>> +{

>> +  for (int i = 0; i < N - 1; ++i)

>> +    a->x[i + 1] += b->x[i];

>> +}

>> +

>> +void

>> +f2 (struct s *a, struct s *b)

>> +{

>> +  for (int i = 0; i < N; ++i)

>> +    a->x[i] += b->x[N - i - 1];

>> +}

>> +

>> +/* { dg-final { scan-tree-dump-times {checking that [^\n]* and [^\n]* have different addresses} 2 "vect" } } */

>> +/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
diff mbox

Patch

Index: gcc/hash-traits.h
===================================================================
--- gcc/hash-traits.h	2017-02-23 19:54:15.000000000 +0000
+++ gcc/hash-traits.h	2017-05-03 08:48:53.312035228 +0100
@@ -301,6 +301,76 @@  struct ggc_cache_ptr_hash : pointer_hash
 
 struct nofree_string_hash : string_hash, typed_noop_remove <const char *> {};
 
+/* Traits for pairs of values, using the first to record empty and
+   deleted slots.  */
+
+template <typename T1, typename T2>
+struct pair_hash
+{
+  typedef std::pair <typename T1::value_type,
+		     typename T2::value_type> value_type;
+  typedef std::pair <typename T1::compare_type,
+		     typename T2::compare_type> compare_type;
+
+  static inline hashval_t hash (const value_type &);
+  static inline bool equal (const value_type &, const compare_type &);
+  static inline void remove (value_type &);
+  static inline void mark_deleted (value_type &);
+  static inline void mark_empty (value_type &);
+  static inline bool is_deleted (const value_type &);
+  static inline bool is_empty (const value_type &);
+};
+
+template <typename T1, typename T2>
+inline hashval_t
+pair_hash <T1, T2>::hash (const value_type &x)
+{
+  return iterative_hash_hashval_t (T1::hash (x.first), T2::hash (x.second));
+}
+
+template <typename T1, typename T2>
+inline bool
+pair_hash <T1, T2>::equal (const value_type &x, const compare_type &y)
+{
+  return T1::equal (x.first, y.first) && T2::equal (x.second, y.second);
+}
+
+template <typename T1, typename T2>
+inline void
+pair_hash <T1, T2>::remove (value_type &x)
+{
+  T1::remove (x.first);
+  T2::remove (x.second);
+}
+
+template <typename T1, typename T2>
+inline void
+pair_hash <T1, T2>::mark_deleted (value_type &x)
+{
+  T1::mark_deleted (x.first);
+}
+
+template <typename T1, typename T2>
+inline void
+pair_hash <T1, T2>::mark_empty (value_type &x)
+{
+  T1::mark_empty (x.first);
+}
+
+template <typename T1, typename T2>
+inline bool
+pair_hash <T1, T2>::is_deleted (const value_type &x)
+{
+  return T1::is_deleted (x.first);
+}
+
+template <typename T1, typename T2>
+inline bool
+pair_hash <T1, T2>::is_empty (const value_type &x)
+{
+  return T1::is_empty (x.first);
+}
+
 template <typename T> struct default_hash_traits : T {};
 
 template <typename T>
Index: gcc/tree-data-ref.h
===================================================================
--- gcc/tree-data-ref.h	2017-05-03 08:48:48.737038502 +0100
+++ gcc/tree-data-ref.h	2017-05-03 08:48:53.313041828 +0100
@@ -240,6 +240,13 @@  struct data_dependence_relation
        but the analyzer cannot be more specific.  */
   tree are_dependent;
 
+  /* If nonnull, COULD_BE_INDEPENDENT_P is true and the accesses are
+     independent when the runtime addresses of OBJECT_A and OBJECT_B
+     are different.  The addresses of both objects are invariant in the
+     loop nest.  */
+  tree object_a;
+  tree object_b;
+
   /* For each subscript in the dependence test, there is an element in
      this array.  This is the attribute that labels the edge A->B of
      the data_dependence_relation.  */
@@ -303,6 +310,8 @@  #define DDR_A(DDR) (DDR)->a
 #define DDR_B(DDR) (DDR)->b
 #define DDR_AFFINE_P(DDR) (DDR)->affine_p
 #define DDR_ARE_DEPENDENT(DDR) (DDR)->are_dependent
+#define DDR_OBJECT_A(DDR) (DDR)->object_a
+#define DDR_OBJECT_B(DDR) (DDR)->object_b
 #define DDR_SUBSCRIPTS(DDR) (DDR)->subscripts
 #define DDR_SUBSCRIPT(DDR, I) DDR_SUBSCRIPTS (DDR)[I]
 #define DDR_NUM_SUBSCRIPTS(DDR) DDR_SUBSCRIPTS (DDR).length ()
Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c	2017-05-03 08:48:48.737038502 +0100
+++ gcc/tree-data-ref.c	2017-05-03 08:48:53.313041828 +0100
@@ -1715,6 +1715,15 @@  initialize_data_dependence_relation (str
 	}
 
       DDR_COULD_BE_INDEPENDENT_P (res) = true;
+      if (!loop_nest.exists ()
+	  || (object_address_invariant_in_loop_p (loop_nest[0],
+						  struct_ref_a)
+	      && object_address_invariant_in_loop_p (loop_nest[0],
+						     struct_ref_b)))
+	{
+	  DDR_OBJECT_A (res) = struct_ref_a;
+	  DDR_OBJECT_B (res) = struct_ref_b;
+	}
     }
 
   DDR_AFFINE_P (res) = true;
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	2017-05-03 08:48:48.738045102 +0100
+++ gcc/tree-vectorizer.h	2017-05-03 08:48:53.315055028 +0100
@@ -174,6 +174,10 @@  struct dr_with_seg_len_pair_t
 
 
 
+/* Describes two objects whose addresses must be unequal for the vectorized
+   loop to be valid.  */
+typedef std::pair<tree, tree> vec_object_pair;
+
 /* Vectorizer state common between loop and basic-block vectorization.  */
 struct vec_info {
   enum { bb, loop } kind;
@@ -270,6 +274,9 @@  typedef struct _loop_vec_info : public v
      lengths from which the run-time aliasing check is built.  */
   vec<dr_with_seg_len_pair_t> comp_alias_ddrs;
 
+  /* Check that the addresses of each pair of objects is unequal.  */
+  vec<vec_object_pair> check_unequal_addrs;
+
   /* Statements in the loop that have data references that are candidates for a
      runtime (loop versioning) misalignment check.  */
   vec<gimple *> may_misalign_stmts;
@@ -364,6 +371,7 @@  #define LOOP_VINFO_UNALIGNED_DR(L)
 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L)   (L)->may_misalign_stmts
 #define LOOP_VINFO_MAY_ALIAS_DDRS(L)       (L)->may_alias_ddrs
 #define LOOP_VINFO_COMP_ALIAS_DDRS(L)      (L)->comp_alias_ddrs
+#define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L)  (L)->check_unequal_addrs
 #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores
 #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
@@ -383,7 +391,8 @@  #define LOOP_VINFO_ORIG_LOOP_INFO(L)
 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L)	\
   ((L)->may_misalign_stmts.length () > 0)
 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L)		\
-  ((L)->comp_alias_ddrs.length () > 0)
+  ((L)->comp_alias_ddrs.length () > 0 \
+   || (L)->check_unequal_addrs.length () > 0)
 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L)		\
   (LOOP_VINFO_NITERS_ASSUMPTIONS (L))
 #define LOOP_REQUIRES_VERSIONING(L)			\
Index: gcc/tree-vect-loop.c
===================================================================
--- gcc/tree-vect-loop.c	2017-04-18 19:52:35.059158859 +0100
+++ gcc/tree-vect-loop.c	2017-05-03 08:48:53.315055028 +0100
@@ -1275,6 +1275,8 @@  destroy_loop_vec_info (loop_vec_info loo
   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
   loop_vinfo->scalar_cost_vec.release ();
 
+  LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
+
   free (loop_vinfo);
   loop->aux = NULL;
 }
@@ -2299,6 +2301,7 @@  vect_analyze_loop_2 (loop_vec_info loop_
     }
   /* Free optimized alias test DDRS.  */
   LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
+  LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
   /* Reset target cost data.  */
   destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
   LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
@@ -3261,6 +3264,11 @@  vect_estimate_min_profitable_iters (loop
       unsigned len = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).length ();
       (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
 			    vect_prologue);
+      len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
+      if (len)
+	/* Count LEN - 1 ANDs and LEN comparisons.  */
+	(void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt,
+			      NULL, 0, vect_prologue);
       dump_printf (MSG_NOTE,
                    "cost model: Adding cost of checks for loop "
                    "versioning aliasing.\n");
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	2017-05-03 08:48:48.738045102 +0100
+++ gcc/tree-vect-data-refs.c	2017-05-03 08:48:53.314048428 +0100
@@ -50,6 +50,7 @@  Software Foundation; either version 3, o
 #include "expr.h"
 #include "builtins.h"
 #include "params.h"
+#include "tree-hash-traits.h"
 
 /* Return true if load- or store-lanes optab OPTAB is implemented for
    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
@@ -3085,10 +3086,14 @@  dependence_distance_ge_vf (data_dependen
 bool
 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
 {
-  vec<ddr_p> may_alias_ddrs =
-    LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
-  vec<dr_with_seg_len_pair_t>& comp_alias_ddrs =
-    LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
+  typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
+  hash_set <tree_pair_hash> compared_objects;
+
+  vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
+  vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
+    = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
+  vec<vec_object_pair> &check_unequal_addrs
+    = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
   int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
 
@@ -3151,6 +3156,24 @@  vect_prune_runtime_alias_test_list (loop
       if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
 	continue;
 
+      if (DDR_OBJECT_A (ddr))
+	{
+	  vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
+	  if (!compared_objects.add (new_pair))
+	    {
+	      if (dump_enabled_p ())
+		{
+		  dump_printf_loc (MSG_NOTE, vect_location, "checking that ");
+		  dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.first);
+		  dump_printf (MSG_NOTE, " and ");
+		  dump_generic_expr (MSG_NOTE, TDF_SLIM, new_pair.second);
+		  dump_printf (MSG_NOTE, " have different addresses\n");
+		}
+	      LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
+	    }
+	  continue;
+	}
+
       dr_a = DDR_A (ddr);
       stmt_a = DR_STMT (DDR_A (ddr));
       dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
@@ -3346,11 +3369,12 @@  vect_prune_runtime_alias_test_list (loop
 	}
     }
 
+  unsigned int count = (comp_alias_ddrs.length ()
+			+ check_unequal_addrs.length ());
   dump_printf_loc (MSG_NOTE, vect_location,
 		   "improved number of alias checks from %d to %d\n",
-		   may_alias_ddrs.length (), comp_alias_ddrs.length ());
-  if ((int) comp_alias_ddrs.length () >
-      PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
+		   may_alias_ddrs.length (), count);
+  if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
Index: gcc/tree-vect-loop-manip.c
===================================================================
--- gcc/tree-vect-loop-manip.c	2017-04-18 19:52:34.027608884 +0100
+++ gcc/tree-vect-loop-manip.c	2017-05-03 08:48:53.314048428 +0100
@@ -1926,6 +1926,20 @@  vect_create_cond_for_niters_checks (loop
     *cond_expr = part_cond_expr;
 }
 
+/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
+   and PART_COND_EXPR are true.  Treat a null *COND_EXPR as "true".  */
+
+static void
+chain_cond_expr (tree *cond_expr, tree part_cond_expr)
+{
+  if (*cond_expr)
+    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+			      *cond_expr, part_cond_expr);
+  else
+    *cond_expr = part_cond_expr;
+}
+
+
 /* Function vect_create_cond_for_align_checks.
 
    Create a conditional expression that represents the alignment checks for
@@ -2037,13 +2051,32 @@  vect_create_cond_for_align_checks (loop_
   ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
   part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
 				and_tmp_name, ptrsize_zero);
-  if (*cond_expr)
-    *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
-			      *cond_expr, part_cond_expr);
-  else
-    *cond_expr = part_cond_expr;
+  chain_cond_expr (cond_expr, part_cond_expr);
 }
 
+
+/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
+   create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
+   Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
+   and this new condition are true.  Treat a null *COND_EXPR as "true".  */
+
+static void
+vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
+{
+  vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
+  unsigned int i;
+  vec_object_pair *pair;
+  FOR_EACH_VEC_ELT (pairs, i, pair)
+    {
+      tree addr1 = build_fold_addr_expr (pair->first);
+      tree addr2 = build_fold_addr_expr (pair->second);
+      tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
+					 addr1, addr2);
+      chain_cond_expr (cond_expr, part_cond_expr);
+    }
+}
+
+
 /* Given two data references and segment lengths described by DR_A and DR_B,
    create expression checking if the two addresses ranges intersect with
    each other based on index of the two addresses.  This can only be done
@@ -2280,11 +2313,7 @@  vect_create_cond_for_alias_checks (loop_
 
       /* Create condition expression for each pair data references.  */
       create_intersect_range_checks (loop_vinfo, &part_cond_expr, dr_a, dr_b);
-      if (*cond_expr)
-	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
-				  *cond_expr, part_cond_expr);
-      else
-	*cond_expr = part_cond_expr;
+      chain_cond_expr (cond_expr, part_cond_expr);
     }
 
   if (dump_enabled_p ())
@@ -2353,7 +2382,10 @@  vect_loop_versioning (loop_vec_info loop
 				       &cond_expr_stmt_list);
 
   if (version_alias)
-    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
+    {
+      vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
+      vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
+    }
 
   cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
 				      is_gimple_condexpr, NULL_TREE);
Index: gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c
===================================================================
--- /dev/null	2017-05-03 08:16:26.972699664 +0100
+++ gcc/testsuite/gcc.dg/vect/vect-alias-check-6.c	2017-05-03 08:48:53.312035228 +0100
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+
+#define N 16
+
+struct s { int x[N]; };
+
+void
+f1 (struct s *a, struct s *b)
+{
+  for (int i = 0; i < N - 1; ++i)
+    a->x[i + 1] += b->x[i];
+}
+
+void
+f2 (struct s *a, struct s *b)
+{
+  for (int i = 0; i < N; ++i)
+    a->x[i] += b->x[N - i - 1];
+}
+
+/* { dg-final { scan-tree-dump-times {checking that [^\n]* and [^\n]* have different addresses} 2 "vect" } } */
+/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */