diff mbox

Add DR_BASE_ALIGNMENT

Message ID 87mv8s6xii.fsf@linaro.org
State New
Headers show

Commit Message

Richard Sandiford June 28, 2017, 1:40 p.m. UTC
This patch records the base alignment in data_reference, to avoid the
second-guessing that was previously done in vect_compute_data_ref_alignment.
It also makes vect_analyze_data_refs use dr_analyze_innermost, instead
of having an almost-copy of the same code.

I'd originally tried to do the second part as a standalone patch,
but on its own it caused us to miscompute the base alignment (due to
the second-guessing not quite working).  This was previously latent
because the old code set STMT_VINFO_DR_ALIGNED_TO to a byte value,
whereas it should have been bits.

After the previous patch, the only thing that dr_analyze_innermost
read from the dr was the DR_REF.  I thought it would be better to pass
that in and make data_reference write-only.  This means that callers
like vect_analyze_data_refs don't have to set any fields first
(and thus not worry about *which* fields they should set).

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2017-06-28  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* tree-data-ref.h (data_reference): Add a base_alignment field.
	(DR_BASE_ALIGNMENT): New macro.
	(dr_analyze_innermost): Add a tree argument.
	* tree-data-ref.c: Include builtins.h.
	(dr_analyze_innermost): Take the tree reference as argument.
	Set up DR_BASE_ALIGNMENT.
	(create_data_ref): Update call accordingly.
	* tree-predcom.c (find_looparound_phi): Likewise.
	* tree-vectorizer.h (_stmt_vec_info): Add dr_base_alignment.
	(STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
	* tree-vect-data-refs.c: Include tree-cfg.h.
	(vect_compute_data_ref_alignment): Use DR_BASE_ALIGNMENT instead
	of calculating an alignment here.
	(vect_analyze_data_refs): Use dr_analyze_innermost.  Record the
	base alignment in STMT_VINFO_DR_BASE_ALIGNMENT.

Comments

Richard Biener June 29, 2017, 11:19 a.m. UTC | #1
On Wed, Jun 28, 2017 at 3:40 PM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> This patch records the base alignment in data_reference, to avoid the

> second-guessing that was previously done in vect_compute_data_ref_alignment.

> It also makes vect_analyze_data_refs use dr_analyze_innermost, instead

> of having an almost-copy of the same code.

>

> I'd originally tried to do the second part as a standalone patch,

> but on its own it caused us to miscompute the base alignment (due to

> the second-guessing not quite working).  This was previously latent

> because the old code set STMT_VINFO_DR_ALIGNED_TO to a byte value,

> whereas it should have been bits.

>

> After the previous patch, the only thing that dr_analyze_innermost

> read from the dr was the DR_REF.  I thought it would be better to pass

> that in and make data_reference write-only.  This means that callers

> like vect_analyze_data_refs don't have to set any fields first

> (and thus not worry about *which* fields they should set).

>

> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

>

> Richard

>

>

> 2017-06-28  Richard Sandiford  <richard.sandiford@linaro.org>

>

> gcc/

>         * tree-data-ref.h (data_reference): Add a base_alignment field.

>         (DR_BASE_ALIGNMENT): New macro.

>         (dr_analyze_innermost): Add a tree argument.

>         * tree-data-ref.c: Include builtins.h.

>         (dr_analyze_innermost): Take the tree reference as argument.

>         Set up DR_BASE_ALIGNMENT.

>         (create_data_ref): Update call accordingly.

>         * tree-predcom.c (find_looparound_phi): Likewise.

>         * tree-vectorizer.h (_stmt_vec_info): Add dr_base_alignment.

>         (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.

>         * tree-vect-data-refs.c: Include tree-cfg.h.

>         (vect_compute_data_ref_alignment): Use DR_BASE_ALIGNMENT instead

>         of calculating an alignment here.

>         (vect_analyze_data_refs): Use dr_analyze_innermost.  Record the

>         base alignment in STMT_VINFO_DR_BASE_ALIGNMENT.

>

> Index: gcc/tree-data-ref.h

> ===================================================================

> --- gcc/tree-data-ref.h 2017-06-26 19:41:19.549571836 +0100

> +++ gcc/tree-data-ref.h 2017-06-28 14:26:19.651051322 +0100

> @@ -119,6 +119,10 @@ struct data_reference

>    /* True when the data reference is in RHS of a stmt.  */

>    bool is_read;

>

> +  /* The alignment of INNERMOST.base_address, in bits.  This is logically

> +     part of INNERMOST, but is kept here to avoid unnecessary padding.  */

> +  unsigned int base_alignment;

> +


But then it would be nice to have dr_analyze_innermost take a struct
innermost_loop_behavior *
only.  That way the vectorizer copy for the outer loop behavior can
just be a innermost_loop_behavior
sub-struct as well and predcom wouldn't need to invent an interesting
DR_STMT either.  The
DR_ accessors wouldn't work on that but I guess they could be made work with

inline tree dr_base_address (struct data_reference *dr) { return
dr->innermost.base_address; }
inline tree dr_base_address (struct innermost_loop_behavior *p) {
return p->base_address; }
#define DR_BASE_ADDRESS(DR) (dr_base_address (dr))

anyway, that's implementation detail ;)

>    /* Behavior of the memory reference in the innermost loop.  */

>    struct innermost_loop_behavior innermost;

>

> @@ -139,6 +143,7 @@ #define DR_NUM_DIMENSIONS(DR)      DR_AC

>  #define DR_IS_READ(DR)             (DR)->is_read

>  #define DR_IS_WRITE(DR)            (!DR_IS_READ (DR))

>  #define DR_BASE_ADDRESS(DR)        (DR)->innermost.base_address

> +#define DR_BASE_ALIGNMENT(DR)      (DR)->base_alignment

>  #define DR_OFFSET(DR)              (DR)->innermost.offset

>  #define DR_INIT(DR)                (DR)->innermost.init

>  #define DR_STEP(DR)                (DR)->innermost.step

> @@ -322,7 +327,7 @@ #define DDR_DIST_VECT(DDR, I) \

>  #define DDR_REVERSED_P(DDR) (DDR)->reversed_p

>

>

> -bool dr_analyze_innermost (struct data_reference *, struct loop *);

> +bool dr_analyze_innermost (struct data_reference *, tree, struct loop *);

>  extern bool compute_data_dependences_for_loop (struct loop *, bool,

>                                                vec<loop_p> *,

>                                                vec<data_reference_p> *,

> Index: gcc/tree-data-ref.c

> ===================================================================

> --- gcc/tree-data-ref.c 2017-06-28 14:26:12.946306736 +0100

> +++ gcc/tree-data-ref.c 2017-06-28 14:26:19.651051322 +0100

> @@ -94,6 +94,7 @@ Software Foundation; either version 3, o

>  #include "dumpfile.h"

>  #include "tree-affine.h"

>  #include "params.h"

> +#include "builtins.h"

>

>  static struct datadep_stats

>  {

> @@ -749,7 +750,7 @@ canonicalize_base_object_address (tree a

>    return build_fold_addr_expr (TREE_OPERAND (addr, 0));

>  }

>

> -/* Analyze the behavior of memory reference DR.  There are two modes:

> +/* Analyze the behavior of memory reference REF.  There are two modes:

>

>     - BB analysis.  In this case we simply split the address into base,

>       init and offset components, without reference to any containing loop.

> @@ -767,12 +768,13 @@ canonicalize_base_object_address (tree a

>     dummy outermost loop.  In other cases perform loop analysis.

>

>     Return true if the analysis succeeded and store the results in DR if so.

> -   BB analysis can only fail for bitfield or reversed-storage accesses.  */

> +   BB analysis can only fail for bitfield or reversed-storage accesses.

> +

> +   Note that DR is purely an output; the contents on input don't matter.  */

>

>  bool

> -dr_analyze_innermost (struct data_reference *dr, struct loop *loop)

> +dr_analyze_innermost (struct data_reference *dr, tree ref, struct loop *loop)

>  {

> -  tree ref = DR_REF (dr);

>    HOST_WIDE_INT pbitsize, pbitpos;

>    tree base, poffset;

>    machine_mode pmode;

> @@ -802,6 +804,7 @@ dr_analyze_innermost (struct data_refere

>        return false;

>      }

>

> +  unsigned int base_alignment = get_object_alignment (base);


I think you want to use get_obect_alignment_1 to also track an known
misalignment so you can add...

>    if (TREE_CODE (base) == MEM_REF)

>      {

>        if (!integer_zerop (TREE_OPERAND (base, 1)))

> @@ -858,16 +861,31 @@ dr_analyze_innermost (struct data_refere

>

>    init = ssize_int (pbitpos / BITS_PER_UNIT);

>    split_constant_offset (base_iv.base, &base_iv.base, &dinit);

> -  init =  size_binop (PLUS_EXPR, init, dinit);

> +

> +  /* If the stripped offset has a lower alignment than the original base,

> +     we need to adjust the alignment of the new base down to match.  */

> +  unsigned int dinit_bits_low = ((unsigned int) TREE_INT_CST_LOW (dinit)

> +                                * BITS_PER_UNIT);


... dinit * BITS_PER_UNIT to it and then do the following on the misalign value.
That should get you slightly better results in some cases.

Ok with that change with or without the suggested above refactoring (not
sure if it will really make things so much nicer).

Thanks,
Richard.

> +  if (dinit_bits_low != 0)

> +    base_alignment = MIN (base_alignment, dinit_bits_low & -dinit_bits_low);

> +  init = size_binop (PLUS_EXPR, init, dinit);

> +

>    split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);

> -  init =  size_binop (PLUS_EXPR, init, dinit);

> +  init = size_binop (PLUS_EXPR, init, dinit);

>

>    step = size_binop (PLUS_EXPR,

>                      fold_convert (ssizetype, base_iv.step),

>                      fold_convert (ssizetype, offset_iv.step));

>

> -  DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);

> +  base = canonicalize_base_object_address (base_iv.base);

> +

> +  /* See if get_pointer_alignment can guarantee a higher alignment than

> +     the one we calculated above.  */

> +  unsigned int alt_alignment = get_pointer_alignment (base);

> +  base_alignment = MAX (base_alignment, alt_alignment);

>

> +  DR_BASE_ADDRESS (dr) = base;

> +  DR_BASE_ALIGNMENT (dr) = base_alignment;

>    DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);

>    DR_INIT (dr) = init;

>    DR_STEP (dr) = step;

> @@ -1071,7 +1089,7 @@ create_data_ref (loop_p nest, loop_p loo

>    DR_REF (dr) = memref;

>    DR_IS_READ (dr) = is_read;

>

> -  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);

> +  dr_analyze_innermost (dr, memref, nest != NULL ? loop : NULL);

>    dr_analyze_indices (dr, nest, loop);

>    dr_analyze_alias (dr);

>

> Index: gcc/tree-predcom.c

> ===================================================================

> --- gcc/tree-predcom.c  2017-05-18 07:51:11.896799736 +0100

> +++ gcc/tree-predcom.c  2017-06-28 14:26:19.651051322 +0100

> @@ -1149,7 +1149,7 @@ find_looparound_phi (struct loop *loop,

>    memset (&init_dr, 0, sizeof (struct data_reference));

>    DR_REF (&init_dr) = init_ref;

>    DR_STMT (&init_dr) = phi;

> -  if (!dr_analyze_innermost (&init_dr, loop))

> +  if (!dr_analyze_innermost (&init_dr, init_ref, loop))

>      return NULL;

>

>    if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))

> Index: gcc/tree-vectorizer.h

> ===================================================================

> --- gcc/tree-vectorizer.h       2017-06-26 19:41:19.549571836 +0100

> +++ gcc/tree-vectorizer.h       2017-06-28 14:26:19.653051245 +0100

> @@ -553,7 +553,8 @@ typedef struct _stmt_vec_info {

>    struct data_reference *data_ref_info;

>

>    /* Information about the data-ref relative to this loop

> -     nest (the loop that is being considered for vectorization).  */

> +     nest (the loop that is being considered for vectorization).

> +     See also dr_base_alignment.  */

>    tree dr_base_address;

>    tree dr_init;

>    tree dr_offset;

> @@ -652,6 +653,10 @@ typedef struct _stmt_vec_info {

>

>    /* The number of scalar stmt references from active SLP instances.  */

>    unsigned int num_slp_uses;

> +

> +  /* Logically belongs with dr_base_address etc., but is kept here to

> +     avoid unnecessary padding.  */

> +  unsigned int dr_base_alignment;

>  } *stmt_vec_info;

>

>  /* Information about a gather/scatter call.  */

> @@ -711,6 +716,7 @@ #define STMT_VINFO_DR_INIT(S)

>  #define STMT_VINFO_DR_OFFSET(S)            (S)->dr_offset

>  #define STMT_VINFO_DR_STEP(S)              (S)->dr_step

>  #define STMT_VINFO_DR_ALIGNED_TO(S)        (S)->dr_aligned_to

> +#define STMT_VINFO_DR_BASE_ALIGNMENT(S)    (S)->dr_base_alignment

>

>  #define STMT_VINFO_IN_PATTERN_P(S)         (S)->in_pattern_p

>  #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt

> Index: gcc/tree-vect-data-refs.c

> ===================================================================

> --- gcc/tree-vect-data-refs.c   2017-06-28 14:25:58.811888377 +0100

> +++ gcc/tree-vect-data-refs.c   2017-06-28 14:26:19.652051284 +0100

> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o

>  #include "expr.h"

>  #include "builtins.h"

>  #include "params.h"

> +#include "tree-cfg.h"

>

>  /* Return true if load- or store-lanes optab OPTAB is implemented for

>     COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */

> @@ -672,6 +673,7 @@ vect_compute_data_ref_alignment (struct

>    tree aligned_to;

>    tree step;

>    unsigned HOST_WIDE_INT alignment;

> +  unsigned int base_alignment;

>

>    if (dump_enabled_p ())

>      dump_printf_loc (MSG_NOTE, vect_location,

> @@ -687,6 +689,7 @@ vect_compute_data_ref_alignment (struct

>      misalign = DR_INIT (dr);

>    aligned_to = DR_ALIGNED_TO (dr);

>    base_addr = DR_BASE_ADDRESS (dr);

> +  base_alignment = DR_BASE_ALIGNMENT (dr);

>    vectype = STMT_VINFO_VECTYPE (stmt_info);

>

>    /* In case the dataref is in an inner-loop of the loop that is being

> @@ -708,6 +711,7 @@ vect_compute_data_ref_alignment (struct

>           misalign = STMT_VINFO_DR_INIT (stmt_info);

>           aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);

>           base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);

> +         base_alignment = STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info);

>          }

>        else

>         {

> @@ -738,40 +742,6 @@ vect_compute_data_ref_alignment (struct

>         }

>      }

>

> -  /* To look at alignment of the base we have to preserve an inner MEM_REF

> -     as that carries alignment information of the actual access.  */

> -  base = ref;

> -  while (handled_component_p (base))

> -    base = TREE_OPERAND (base, 0);

> -  unsigned int base_alignment = 0;

> -  unsigned HOST_WIDE_INT base_bitpos;

> -  get_object_alignment_1 (base, &base_alignment, &base_bitpos);

> -  /* As data-ref analysis strips the MEM_REF down to its base operand

> -     to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to

> -     adjust things to make base_alignment valid as the alignment of

> -     DR_BASE_ADDRESS.  */

> -  if (TREE_CODE (base) == MEM_REF)

> -    {

> -      /* Note all this only works if DR_BASE_ADDRESS is the same as

> -        MEM_REF operand zero, otherwise DR/SCEV analysis might have factored

> -        in other offsets.  We need to rework DR to compute the alingment

> -        of DR_BASE_ADDRESS as long as all information is still available.  */

> -      if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))

> -       {

> -         base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;

> -         base_bitpos &= (base_alignment - 1);

> -       }

> -      else

> -       base_bitpos = BITS_PER_UNIT;

> -    }

> -  if (base_bitpos != 0)

> -    base_alignment = base_bitpos & -base_bitpos;

> -  /* Also look at the alignment of the base address DR analysis

> -     computed.  */

> -  unsigned int base_addr_alignment = get_pointer_alignment (base_addr);

> -  if (base_addr_alignment > base_alignment)

> -    base_alignment = base_addr_alignment;

> -

>    if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))

>      DR_VECT_AUX (dr)->base_element_aligned = true;

>

> @@ -3560,100 +3530,35 @@ vect_analyze_data_refs (vec_info *vinfo,

>          the outer-loop.  */

>        if (loop && nested_in_vect_loop_p (loop, stmt))

>         {

> -         tree outer_step, outer_base, outer_init;

> -         HOST_WIDE_INT pbitsize, pbitpos;

> -         tree poffset;

> -         machine_mode pmode;

> -         int punsignedp, preversep, pvolatilep;

> -         affine_iv base_iv, offset_iv;

> -         tree dinit;

> -

>           /* Build a reference to the first location accessed by the

> -            inner-loop: *(BASE+INIT).  (The first location is actually

> -            BASE+INIT+OFFSET, but we add OFFSET separately later).  */

> -          tree inner_base = build_fold_indirect_ref

> -                                (fold_build_pointer_plus (base, init));

> +            inner loop: *(BASE + INIT + OFFSET).  By construction,

> +            this address must be invariant in the inner loop, so we

> +            can consider it as being used in the outer loop.  */

> +         tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),

> +                                         init, offset);

> +         tree init_addr = fold_build_pointer_plus (base, init_offset);

> +         tree init_ref = build_fold_indirect_ref (init_addr);

>

>           if (dump_enabled_p ())

>             {

>               dump_printf_loc (MSG_NOTE, vect_location,

> -                               "analyze in outer-loop: ");

> -             dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);

> +                               "analyze in outer loop: ");

> +             dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);

>               dump_printf (MSG_NOTE, "\n");

>             }

>

> -         outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,

> -                                           &poffset, &pmode, &punsignedp,

> -                                           &preversep, &pvolatilep);

> -         gcc_assert (outer_base != NULL_TREE);

> -

> -         if (pbitpos % BITS_PER_UNIT != 0)

> -           {

> -             if (dump_enabled_p ())

> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> -                                 "failed: bit offset alignment.\n");

> -             return false;

> -           }

> -

> -         if (preversep)

> -           {

> -             if (dump_enabled_p ())

> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> -                                "failed: reverse storage order.\n");

> -             return false;

> -           }

> -

> -         outer_base = build_fold_addr_expr (outer_base);

> -         if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,

> -                          &base_iv, false))

> -           {

> -             if (dump_enabled_p ())

> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> -                                 "failed: evolution of base is not affine.\n");

> -             return false;

> -           }

> -

> -         if (offset)

> -           {

> -             if (poffset)

> -               poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,

> -                                       poffset);

> -             else

> -               poffset = offset;

> -           }

> -

> -         if (!poffset)

> -           {

> -             offset_iv.base = ssize_int (0);

> -             offset_iv.step = ssize_int (0);

> -           }

> -         else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,

> -                               &offset_iv, false))

> -           {

> -             if (dump_enabled_p ())

> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> -                                 "evolution of offset is not affine.\n");

> -             return false;

> -           }

> +         data_reference init_dr;

> +         if (!dr_analyze_innermost (&init_dr, init_ref, loop))

> +           /* dr_analyze_innermost already explained the faiure.  */

> +           return false;

>

> -         outer_init = ssize_int (pbitpos / BITS_PER_UNIT);

> -         split_constant_offset (base_iv.base, &base_iv.base, &dinit);

> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);

> -         split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);

> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);

> -

> -         outer_step = size_binop (PLUS_EXPR,

> -                               fold_convert (ssizetype, base_iv.step),

> -                               fold_convert (ssizetype, offset_iv.step));

> -

> -         STMT_VINFO_DR_STEP (stmt_info) = outer_step;

> -         /* FIXME: Use canonicalize_base_object_address (base_iv.base); */

> -         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;

> -         STMT_VINFO_DR_INIT (stmt_info) = outer_init;

> -         STMT_VINFO_DR_OFFSET (stmt_info) =

> -                               fold_convert (ssizetype, offset_iv.base);

> -         STMT_VINFO_DR_ALIGNED_TO (stmt_info) =

> -                               size_int (highest_pow2_factor (offset_iv.base));

> +         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = DR_BASE_ADDRESS (&init_dr);

> +         STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info)

> +           = DR_BASE_ALIGNMENT (&init_dr);

> +         STMT_VINFO_DR_STEP (stmt_info) = DR_STEP (&init_dr);

> +         STMT_VINFO_DR_INIT (stmt_info) = DR_INIT (&init_dr);

> +         STMT_VINFO_DR_OFFSET (stmt_info) = DR_OFFSET (&init_dr);

> +         STMT_VINFO_DR_ALIGNED_TO (stmt_info) = DR_ALIGNED_TO (&init_dr);

>

>            if (dump_enabled_p ())

>             {
Richard Sandiford July 3, 2017, 6:48 a.m. UTC | #2
Richard Biener <richard.guenther@gmail.com> writes:
> On Wed, Jun 28, 2017 at 3:40 PM, Richard Sandiford

> <richard.sandiford@linaro.org> wrote:

>> This patch records the base alignment in data_reference, to avoid the

>> second-guessing that was previously done in vect_compute_data_ref_alignment.

>> It also makes vect_analyze_data_refs use dr_analyze_innermost, instead

>> of having an almost-copy of the same code.

>>

>> I'd originally tried to do the second part as a standalone patch,

>> but on its own it caused us to miscompute the base alignment (due to

>> the second-guessing not quite working).  This was previously latent

>> because the old code set STMT_VINFO_DR_ALIGNED_TO to a byte value,

>> whereas it should have been bits.

>>

>> After the previous patch, the only thing that dr_analyze_innermost

>> read from the dr was the DR_REF.  I thought it would be better to pass

>> that in and make data_reference write-only.  This means that callers

>> like vect_analyze_data_refs don't have to set any fields first

>> (and thus not worry about *which* fields they should set).

>>

>> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

>>

>> Richard

>>

>>

>> 2017-06-28  Richard Sandiford  <richard.sandiford@linaro.org>

>>

>> gcc/

>>         * tree-data-ref.h (data_reference): Add a base_alignment field.

>>         (DR_BASE_ALIGNMENT): New macro.

>>         (dr_analyze_innermost): Add a tree argument.

>>         * tree-data-ref.c: Include builtins.h.

>>         (dr_analyze_innermost): Take the tree reference as argument.

>>         Set up DR_BASE_ALIGNMENT.

>>         (create_data_ref): Update call accordingly.

>>         * tree-predcom.c (find_looparound_phi): Likewise.

>>         * tree-vectorizer.h (_stmt_vec_info): Add dr_base_alignment.

>>         (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.

>>         * tree-vect-data-refs.c: Include tree-cfg.h.

>>         (vect_compute_data_ref_alignment): Use DR_BASE_ALIGNMENT instead

>>         of calculating an alignment here.

>>         (vect_analyze_data_refs): Use dr_analyze_innermost.  Record the

>>         base alignment in STMT_VINFO_DR_BASE_ALIGNMENT.

>>

>> Index: gcc/tree-data-ref.h

>> ===================================================================

>> --- gcc/tree-data-ref.h 2017-06-26 19:41:19.549571836 +0100

>> +++ gcc/tree-data-ref.h 2017-06-28 14:26:19.651051322 +0100

>> @@ -119,6 +119,10 @@ struct data_reference

>>    /* True when the data reference is in RHS of a stmt.  */

>>    bool is_read;

>>

>> +  /* The alignment of INNERMOST.base_address, in bits.  This is logically

>> +     part of INNERMOST, but is kept here to avoid unnecessary padding.  */

>> +  unsigned int base_alignment;

>> +

>

> But then it would be nice to have dr_analyze_innermost take a struct

> innermost_loop_behavior * only.  That way the vectorizer copy for the

> outer loop behavior can just be a innermost_loop_behavior sub-struct

> as well and predcom wouldn't need to invent an interesting DR_STMT

> either.


Yeah, had wondered that too.  Comments below led to a new series
in which the information ends up being nicely padded, so there was
no excuse not to switch to innermost_loop_behavior.

> The DR_ accessors wouldn't work on that but I guess they

> could be made work with

>

> inline tree dr_base_address (struct data_reference *dr) { return

> dr->innermost.base_address; }

> inline tree dr_base_address (struct innermost_loop_behavior *p) {

> return p->base_address; }

> #define DR_BASE_ADDRESS(DR) (dr_base_address (dr))

>

> anyway, that's implementation detail ;)


I just went for direct field accesses, hope that's OK.

>>    /* Behavior of the memory reference in the innermost loop.  */

>>    struct innermost_loop_behavior innermost;

>>

>> @@ -139,6 +143,7 @@ #define DR_NUM_DIMENSIONS(DR)      DR_AC

>>  #define DR_IS_READ(DR)             (DR)->is_read

>>  #define DR_IS_WRITE(DR)            (!DR_IS_READ (DR))

>>  #define DR_BASE_ADDRESS(DR)        (DR)->innermost.base_address

>> +#define DR_BASE_ALIGNMENT(DR)      (DR)->base_alignment

>>  #define DR_OFFSET(DR)              (DR)->innermost.offset

>>  #define DR_INIT(DR)                (DR)->innermost.init

>>  #define DR_STEP(DR)                (DR)->innermost.step

>> @@ -322,7 +327,7 @@ #define DDR_DIST_VECT(DDR, I) \

>>  #define DDR_REVERSED_P(DDR) (DDR)->reversed_p

>>

>>

>> -bool dr_analyze_innermost (struct data_reference *, struct loop *);

>> +bool dr_analyze_innermost (struct data_reference *, tree, struct loop *);

>>  extern bool compute_data_dependences_for_loop (struct loop *, bool,

>>                                                vec<loop_p> *,

>>                                                vec<data_reference_p> *,

>> Index: gcc/tree-data-ref.c

>> ===================================================================

>> --- gcc/tree-data-ref.c 2017-06-28 14:26:12.946306736 +0100

>> +++ gcc/tree-data-ref.c 2017-06-28 14:26:19.651051322 +0100

>> @@ -94,6 +94,7 @@ Software Foundation; either version 3, o

>>  #include "dumpfile.h"

>>  #include "tree-affine.h"

>>  #include "params.h"

>> +#include "builtins.h"

>>

>>  static struct datadep_stats

>>  {

>> @@ -749,7 +750,7 @@ canonicalize_base_object_address (tree a

>>    return build_fold_addr_expr (TREE_OPERAND (addr, 0));

>>  }

>>

>> -/* Analyze the behavior of memory reference DR.  There are two modes:

>> +/* Analyze the behavior of memory reference REF.  There are two modes:

>>

>>     - BB analysis.  In this case we simply split the address into base,

>>       init and offset components, without reference to any containing loop.

>> @@ -767,12 +768,13 @@ canonicalize_base_object_address (tree a

>>     dummy outermost loop.  In other cases perform loop analysis.

>>

>>     Return true if the analysis succeeded and store the results in DR if so.

>> -   BB analysis can only fail for bitfield or reversed-storage accesses.  */

>> +   BB analysis can only fail for bitfield or reversed-storage accesses.

>> +

>> +   Note that DR is purely an output; the contents on input don't matter.  */

>>

>>  bool

>> -dr_analyze_innermost (struct data_reference *dr, struct loop *loop)

>> +dr_analyze_innermost (struct data_reference *dr, tree ref, struct loop *loop)

>>  {

>> -  tree ref = DR_REF (dr);

>>    HOST_WIDE_INT pbitsize, pbitpos;

>>    tree base, poffset;

>>    machine_mode pmode;

>> @@ -802,6 +804,7 @@ dr_analyze_innermost (struct data_refere

>>        return false;

>>      }

>>

>> +  unsigned int base_alignment = get_object_alignment (base);

>

> I think you want to use get_obect_alignment_1 to also track an known

> misalignment so you can add...

>

>>    if (TREE_CODE (base) == MEM_REF)

>>      {

>>        if (!integer_zerop (TREE_OPERAND (base, 1)))

>> @@ -858,16 +861,31 @@ dr_analyze_innermost (struct data_refere

>>

>>    init = ssize_int (pbitpos / BITS_PER_UNIT);

>>    split_constant_offset (base_iv.base, &base_iv.base, &dinit);

>> -  init =  size_binop (PLUS_EXPR, init, dinit);

>> +

>> +  /* If the stripped offset has a lower alignment than the original base,

>> +     we need to adjust the alignment of the new base down to match.  */

>> +  unsigned int dinit_bits_low = ((unsigned int) TREE_INT_CST_LOW (dinit)

>> +                                * BITS_PER_UNIT);

>

> ... dinit * BITS_PER_UNIT to it and then do the following on the misalign value.

> That should get you slightly better results in some cases.


...and as mentioned on IRC, that also made me finally understand why
having the misalignment in the DR would help.

I'll post the updated patch as part of a new series.

Thanks,
Richard

> Ok with that change with or without the suggested above refactoring (not

> sure if it will really make things so much nicer).

>

> Thanks,

> Richard.

>

>> +  if (dinit_bits_low != 0)

>> +    base_alignment = MIN (base_alignment, dinit_bits_low & -dinit_bits_low);

>> +  init = size_binop (PLUS_EXPR, init, dinit);

>> +

>>    split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);

>> -  init =  size_binop (PLUS_EXPR, init, dinit);

>> +  init = size_binop (PLUS_EXPR, init, dinit);

>>

>>    step = size_binop (PLUS_EXPR,

>>                      fold_convert (ssizetype, base_iv.step),

>>                      fold_convert (ssizetype, offset_iv.step));

>>

>> -  DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);

>> +  base = canonicalize_base_object_address (base_iv.base);

>> +

>> +  /* See if get_pointer_alignment can guarantee a higher alignment than

>> +     the one we calculated above.  */

>> +  unsigned int alt_alignment = get_pointer_alignment (base);

>> +  base_alignment = MAX (base_alignment, alt_alignment);

>>

>> +  DR_BASE_ADDRESS (dr) = base;

>> +  DR_BASE_ALIGNMENT (dr) = base_alignment;

>>    DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);

>>    DR_INIT (dr) = init;

>>    DR_STEP (dr) = step;

>> @@ -1071,7 +1089,7 @@ create_data_ref (loop_p nest, loop_p loo

>>    DR_REF (dr) = memref;

>>    DR_IS_READ (dr) = is_read;

>>

>> -  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);

>> +  dr_analyze_innermost (dr, memref, nest != NULL ? loop : NULL);

>>    dr_analyze_indices (dr, nest, loop);

>>    dr_analyze_alias (dr);

>>

>> Index: gcc/tree-predcom.c

>> ===================================================================

>> --- gcc/tree-predcom.c  2017-05-18 07:51:11.896799736 +0100

>> +++ gcc/tree-predcom.c  2017-06-28 14:26:19.651051322 +0100

>> @@ -1149,7 +1149,7 @@ find_looparound_phi (struct loop *loop,

>>    memset (&init_dr, 0, sizeof (struct data_reference));

>>    DR_REF (&init_dr) = init_ref;

>>    DR_STMT (&init_dr) = phi;

>> -  if (!dr_analyze_innermost (&init_dr, loop))

>> +  if (!dr_analyze_innermost (&init_dr, init_ref, loop))

>>      return NULL;

>>

>>    if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))

>> Index: gcc/tree-vectorizer.h

>> ===================================================================

>> --- gcc/tree-vectorizer.h       2017-06-26 19:41:19.549571836 +0100

>> +++ gcc/tree-vectorizer.h       2017-06-28 14:26:19.653051245 +0100

>> @@ -553,7 +553,8 @@ typedef struct _stmt_vec_info {

>>    struct data_reference *data_ref_info;

>>

>>    /* Information about the data-ref relative to this loop

>> -     nest (the loop that is being considered for vectorization).  */

>> +     nest (the loop that is being considered for vectorization).

>> +     See also dr_base_alignment.  */

>>    tree dr_base_address;

>>    tree dr_init;

>>    tree dr_offset;

>> @@ -652,6 +653,10 @@ typedef struct _stmt_vec_info {

>>

>>    /* The number of scalar stmt references from active SLP instances.  */

>>    unsigned int num_slp_uses;

>> +

>> +  /* Logically belongs with dr_base_address etc., but is kept here to

>> +     avoid unnecessary padding.  */

>> +  unsigned int dr_base_alignment;

>>  } *stmt_vec_info;

>>

>>  /* Information about a gather/scatter call.  */

>> @@ -711,6 +716,7 @@ #define STMT_VINFO_DR_INIT(S)

>>  #define STMT_VINFO_DR_OFFSET(S)            (S)->dr_offset

>>  #define STMT_VINFO_DR_STEP(S)              (S)->dr_step

>>  #define STMT_VINFO_DR_ALIGNED_TO(S)        (S)->dr_aligned_to

>> +#define STMT_VINFO_DR_BASE_ALIGNMENT(S)    (S)->dr_base_alignment

>>

>>  #define STMT_VINFO_IN_PATTERN_P(S)         (S)->in_pattern_p

>>  #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt

>> Index: gcc/tree-vect-data-refs.c

>> ===================================================================

>> --- gcc/tree-vect-data-refs.c   2017-06-28 14:25:58.811888377 +0100

>> +++ gcc/tree-vect-data-refs.c   2017-06-28 14:26:19.652051284 +0100

>> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o

>>  #include "expr.h"

>>  #include "builtins.h"

>>  #include "params.h"

>> +#include "tree-cfg.h"

>>

>>  /* Return true if load- or store-lanes optab OPTAB is implemented for

>>     COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */

>> @@ -672,6 +673,7 @@ vect_compute_data_ref_alignment (struct

>>    tree aligned_to;

>>    tree step;

>>    unsigned HOST_WIDE_INT alignment;

>> +  unsigned int base_alignment;

>>

>>    if (dump_enabled_p ())

>>      dump_printf_loc (MSG_NOTE, vect_location,

>> @@ -687,6 +689,7 @@ vect_compute_data_ref_alignment (struct

>>      misalign = DR_INIT (dr);

>>    aligned_to = DR_ALIGNED_TO (dr);

>>    base_addr = DR_BASE_ADDRESS (dr);

>> +  base_alignment = DR_BASE_ALIGNMENT (dr);

>>    vectype = STMT_VINFO_VECTYPE (stmt_info);

>>

>>    /* In case the dataref is in an inner-loop of the loop that is being

>> @@ -708,6 +711,7 @@ vect_compute_data_ref_alignment (struct

>>           misalign = STMT_VINFO_DR_INIT (stmt_info);

>>           aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);

>>           base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);

>> +         base_alignment = STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info);

>>          }

>>        else

>>         {

>> @@ -738,40 +742,6 @@ vect_compute_data_ref_alignment (struct

>>         }

>>      }

>>

>> -  /* To look at alignment of the base we have to preserve an inner MEM_REF

>> -     as that carries alignment information of the actual access.  */

>> -  base = ref;

>> -  while (handled_component_p (base))

>> -    base = TREE_OPERAND (base, 0);

>> -  unsigned int base_alignment = 0;

>> -  unsigned HOST_WIDE_INT base_bitpos;

>> -  get_object_alignment_1 (base, &base_alignment, &base_bitpos);

>> -  /* As data-ref analysis strips the MEM_REF down to its base operand

>> -     to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to

>> -     adjust things to make base_alignment valid as the alignment of

>> -     DR_BASE_ADDRESS.  */

>> -  if (TREE_CODE (base) == MEM_REF)

>> -    {

>> -      /* Note all this only works if DR_BASE_ADDRESS is the same as

>> -        MEM_REF operand zero, otherwise DR/SCEV analysis might have factored

>> -        in other offsets.  We need to rework DR to compute the alingment

>> -        of DR_BASE_ADDRESS as long as all information is still available.  */

>> -      if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))

>> -       {

>> -         base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;

>> -         base_bitpos &= (base_alignment - 1);

>> -       }

>> -      else

>> -       base_bitpos = BITS_PER_UNIT;

>> -    }

>> -  if (base_bitpos != 0)

>> -    base_alignment = base_bitpos & -base_bitpos;

>> -  /* Also look at the alignment of the base address DR analysis

>> -     computed.  */

>> -  unsigned int base_addr_alignment = get_pointer_alignment (base_addr);

>> -  if (base_addr_alignment > base_alignment)

>> -    base_alignment = base_addr_alignment;

>> -

>>    if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))

>>      DR_VECT_AUX (dr)->base_element_aligned = true;

>>

>> @@ -3560,100 +3530,35 @@ vect_analyze_data_refs (vec_info *vinfo,

>>          the outer-loop.  */

>>        if (loop && nested_in_vect_loop_p (loop, stmt))

>>         {

>> -         tree outer_step, outer_base, outer_init;

>> -         HOST_WIDE_INT pbitsize, pbitpos;

>> -         tree poffset;

>> -         machine_mode pmode;

>> -         int punsignedp, preversep, pvolatilep;

>> -         affine_iv base_iv, offset_iv;

>> -         tree dinit;

>> -

>>           /* Build a reference to the first location accessed by the

>> -            inner-loop: *(BASE+INIT).  (The first location is actually

>> -            BASE+INIT+OFFSET, but we add OFFSET separately later).  */

>> -          tree inner_base = build_fold_indirect_ref

>> -                                (fold_build_pointer_plus (base, init));

>> +            inner loop: *(BASE + INIT + OFFSET).  By construction,

>> +            this address must be invariant in the inner loop, so we

>> +            can consider it as being used in the outer loop.  */

>> +         tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),

>> +                                         init, offset);

>> +         tree init_addr = fold_build_pointer_plus (base, init_offset);

>> +         tree init_ref = build_fold_indirect_ref (init_addr);

>>

>>           if (dump_enabled_p ())

>>             {

>>               dump_printf_loc (MSG_NOTE, vect_location,

>> -                               "analyze in outer-loop: ");

>> -             dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);

>> +                               "analyze in outer loop: ");

>> +             dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);

>>               dump_printf (MSG_NOTE, "\n");

>>             }

>>

>> -         outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,

>> -                                           &poffset, &pmode, &punsignedp,

>> -                                           &preversep, &pvolatilep);

>> -         gcc_assert (outer_base != NULL_TREE);

>> -

>> -         if (pbitpos % BITS_PER_UNIT != 0)

>> -           {

>> -             if (dump_enabled_p ())

>> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

>> -                                 "failed: bit offset alignment.\n");

>> -             return false;

>> -           }

>> -

>> -         if (preversep)

>> -           {

>> -             if (dump_enabled_p ())

>> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

>> -                                "failed: reverse storage order.\n");

>> -             return false;

>> -           }

>> -

>> -         outer_base = build_fold_addr_expr (outer_base);

>> -         if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,

>> -                          &base_iv, false))

>> -           {

>> -             if (dump_enabled_p ())

>> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

>> -                                 "failed: evolution of base is not affine.\n");

>> -             return false;

>> -           }

>> -

>> -         if (offset)

>> -           {

>> -             if (poffset)

>> -               poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,

>> -                                       poffset);

>> -             else

>> -               poffset = offset;

>> -           }

>> -

>> -         if (!poffset)

>> -           {

>> -             offset_iv.base = ssize_int (0);

>> -             offset_iv.step = ssize_int (0);

>> -           }

>> -         else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,

>> -                               &offset_iv, false))

>> -           {

>> -             if (dump_enabled_p ())

>> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

>> -                                 "evolution of offset is not affine.\n");

>> -             return false;

>> -           }

>> +         data_reference init_dr;

>> +         if (!dr_analyze_innermost (&init_dr, init_ref, loop))

>> +           /* dr_analyze_innermost already explained the faiure.  */

>> +           return false;

>>

>> -         outer_init = ssize_int (pbitpos / BITS_PER_UNIT);

>> -         split_constant_offset (base_iv.base, &base_iv.base, &dinit);

>> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);

>> -         split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);

>> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);

>> -

>> -         outer_step = size_binop (PLUS_EXPR,

>> -                               fold_convert (ssizetype, base_iv.step),

>> -                               fold_convert (ssizetype, offset_iv.step));

>> -

>> -         STMT_VINFO_DR_STEP (stmt_info) = outer_step;

>> -         /* FIXME: Use canonicalize_base_object_address (base_iv.base); */

>> -         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;

>> -         STMT_VINFO_DR_INIT (stmt_info) = outer_init;

>> -         STMT_VINFO_DR_OFFSET (stmt_info) =

>> -                               fold_convert (ssizetype, offset_iv.base);

>> -         STMT_VINFO_DR_ALIGNED_TO (stmt_info) =

>> -                               size_int (highest_pow2_factor (offset_iv.base));

>> +         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = DR_BASE_ADDRESS (&init_dr);

>> +         STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info)

>> +           = DR_BASE_ALIGNMENT (&init_dr);

>> +         STMT_VINFO_DR_STEP (stmt_info) = DR_STEP (&init_dr);

>> +         STMT_VINFO_DR_INIT (stmt_info) = DR_INIT (&init_dr);

>> +         STMT_VINFO_DR_OFFSET (stmt_info) = DR_OFFSET (&init_dr);

>> +         STMT_VINFO_DR_ALIGNED_TO (stmt_info) = DR_ALIGNED_TO (&init_dr);

>>

>>            if (dump_enabled_p ())

>>             {
diff mbox

Patch

Index: gcc/tree-data-ref.h
===================================================================
--- gcc/tree-data-ref.h	2017-06-26 19:41:19.549571836 +0100
+++ gcc/tree-data-ref.h	2017-06-28 14:26:19.651051322 +0100
@@ -119,6 +119,10 @@  struct data_reference
   /* True when the data reference is in RHS of a stmt.  */
   bool is_read;
 
+  /* The alignment of INNERMOST.base_address, in bits.  This is logically
+     part of INNERMOST, but is kept here to avoid unnecessary padding.  */
+  unsigned int base_alignment;
+
   /* Behavior of the memory reference in the innermost loop.  */
   struct innermost_loop_behavior innermost;
 
@@ -139,6 +143,7 @@  #define DR_NUM_DIMENSIONS(DR)      DR_AC
 #define DR_IS_READ(DR)             (DR)->is_read
 #define DR_IS_WRITE(DR)            (!DR_IS_READ (DR))
 #define DR_BASE_ADDRESS(DR)        (DR)->innermost.base_address
+#define DR_BASE_ALIGNMENT(DR)      (DR)->base_alignment
 #define DR_OFFSET(DR)              (DR)->innermost.offset
 #define DR_INIT(DR)                (DR)->innermost.init
 #define DR_STEP(DR)                (DR)->innermost.step
@@ -322,7 +327,7 @@  #define DDR_DIST_VECT(DDR, I) \
 #define DDR_REVERSED_P(DDR) (DDR)->reversed_p
 
 
-bool dr_analyze_innermost (struct data_reference *, struct loop *);
+bool dr_analyze_innermost (struct data_reference *, tree, struct loop *);
 extern bool compute_data_dependences_for_loop (struct loop *, bool,
 					       vec<loop_p> *,
 					       vec<data_reference_p> *,
Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c	2017-06-28 14:26:12.946306736 +0100
+++ gcc/tree-data-ref.c	2017-06-28 14:26:19.651051322 +0100
@@ -94,6 +94,7 @@  Software Foundation; either version 3, o
 #include "dumpfile.h"
 #include "tree-affine.h"
 #include "params.h"
+#include "builtins.h"
 
 static struct datadep_stats
 {
@@ -749,7 +750,7 @@  canonicalize_base_object_address (tree a
   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 }
 
-/* Analyze the behavior of memory reference DR.  There are two modes:
+/* Analyze the behavior of memory reference REF.  There are two modes:
 
    - BB analysis.  In this case we simply split the address into base,
      init and offset components, without reference to any containing loop.
@@ -767,12 +768,13 @@  canonicalize_base_object_address (tree a
    dummy outermost loop.  In other cases perform loop analysis.
 
    Return true if the analysis succeeded and store the results in DR if so.
-   BB analysis can only fail for bitfield or reversed-storage accesses.  */
+   BB analysis can only fail for bitfield or reversed-storage accesses.
+
+   Note that DR is purely an output; the contents on input don't matter.  */
 
 bool
-dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
+dr_analyze_innermost (struct data_reference *dr, tree ref, struct loop *loop)
 {
-  tree ref = DR_REF (dr);
   HOST_WIDE_INT pbitsize, pbitpos;
   tree base, poffset;
   machine_mode pmode;
@@ -802,6 +804,7 @@  dr_analyze_innermost (struct data_refere
       return false;
     }
 
+  unsigned int base_alignment = get_object_alignment (base);
   if (TREE_CODE (base) == MEM_REF)
     {
       if (!integer_zerop (TREE_OPERAND (base, 1)))
@@ -858,16 +861,31 @@  dr_analyze_innermost (struct data_refere
 
   init = ssize_int (pbitpos / BITS_PER_UNIT);
   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
-  init =  size_binop (PLUS_EXPR, init, dinit);
+
+  /* If the stripped offset has a lower alignment than the original base,
+     we need to adjust the alignment of the new base down to match.  */
+  unsigned int dinit_bits_low = ((unsigned int) TREE_INT_CST_LOW (dinit)
+				 * BITS_PER_UNIT);
+  if (dinit_bits_low != 0)
+    base_alignment = MIN (base_alignment, dinit_bits_low & -dinit_bits_low);
+  init = size_binop (PLUS_EXPR, init, dinit);
+
   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
-  init =  size_binop (PLUS_EXPR, init, dinit);
+  init = size_binop (PLUS_EXPR, init, dinit);
 
   step = size_binop (PLUS_EXPR,
 		     fold_convert (ssizetype, base_iv.step),
 		     fold_convert (ssizetype, offset_iv.step));
 
-  DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
+  base = canonicalize_base_object_address (base_iv.base);
+
+  /* See if get_pointer_alignment can guarantee a higher alignment than
+     the one we calculated above.  */
+  unsigned int alt_alignment = get_pointer_alignment (base);
+  base_alignment = MAX (base_alignment, alt_alignment);
 
+  DR_BASE_ADDRESS (dr) = base;
+  DR_BASE_ALIGNMENT (dr) = base_alignment;
   DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
   DR_INIT (dr) = init;
   DR_STEP (dr) = step;
@@ -1071,7 +1089,7 @@  create_data_ref (loop_p nest, loop_p loo
   DR_REF (dr) = memref;
   DR_IS_READ (dr) = is_read;
 
-  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);
+  dr_analyze_innermost (dr, memref, nest != NULL ? loop : NULL);
   dr_analyze_indices (dr, nest, loop);
   dr_analyze_alias (dr);
 
Index: gcc/tree-predcom.c
===================================================================
--- gcc/tree-predcom.c	2017-05-18 07:51:11.896799736 +0100
+++ gcc/tree-predcom.c	2017-06-28 14:26:19.651051322 +0100
@@ -1149,7 +1149,7 @@  find_looparound_phi (struct loop *loop,
   memset (&init_dr, 0, sizeof (struct data_reference));
   DR_REF (&init_dr) = init_ref;
   DR_STMT (&init_dr) = phi;
-  if (!dr_analyze_innermost (&init_dr, loop))
+  if (!dr_analyze_innermost (&init_dr, init_ref, loop))
     return NULL;
 
   if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	2017-06-26 19:41:19.549571836 +0100
+++ gcc/tree-vectorizer.h	2017-06-28 14:26:19.653051245 +0100
@@ -553,7 +553,8 @@  typedef struct _stmt_vec_info {
   struct data_reference *data_ref_info;
 
   /* Information about the data-ref relative to this loop
-     nest (the loop that is being considered for vectorization).  */
+     nest (the loop that is being considered for vectorization).
+     See also dr_base_alignment.  */
   tree dr_base_address;
   tree dr_init;
   tree dr_offset;
@@ -652,6 +653,10 @@  typedef struct _stmt_vec_info {
 
   /* The number of scalar stmt references from active SLP instances.  */
   unsigned int num_slp_uses;
+
+  /* Logically belongs with dr_base_address etc., but is kept here to
+     avoid unnecessary padding.  */
+  unsigned int dr_base_alignment;
 } *stmt_vec_info;
 
 /* Information about a gather/scatter call.  */
@@ -711,6 +716,7 @@  #define STMT_VINFO_DR_INIT(S)
 #define STMT_VINFO_DR_OFFSET(S)            (S)->dr_offset
 #define STMT_VINFO_DR_STEP(S)              (S)->dr_step
 #define STMT_VINFO_DR_ALIGNED_TO(S)        (S)->dr_aligned_to
+#define STMT_VINFO_DR_BASE_ALIGNMENT(S)    (S)->dr_base_alignment
 
 #define STMT_VINFO_IN_PATTERN_P(S)         (S)->in_pattern_p
 #define STMT_VINFO_RELATED_STMT(S)         (S)->related_stmt
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	2017-06-28 14:25:58.811888377 +0100
+++ gcc/tree-vect-data-refs.c	2017-06-28 14:26:19.652051284 +0100
@@ -50,6 +50,7 @@  Software Foundation; either version 3, o
 #include "expr.h"
 #include "builtins.h"
 #include "params.h"
+#include "tree-cfg.h"
 
 /* Return true if load- or store-lanes optab OPTAB is implemented for
    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
@@ -672,6 +673,7 @@  vect_compute_data_ref_alignment (struct
   tree aligned_to;
   tree step;
   unsigned HOST_WIDE_INT alignment;
+  unsigned int base_alignment;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -687,6 +689,7 @@  vect_compute_data_ref_alignment (struct
     misalign = DR_INIT (dr);
   aligned_to = DR_ALIGNED_TO (dr);
   base_addr = DR_BASE_ADDRESS (dr);
+  base_alignment = DR_BASE_ALIGNMENT (dr);
   vectype = STMT_VINFO_VECTYPE (stmt_info);
 
   /* In case the dataref is in an inner-loop of the loop that is being
@@ -708,6 +711,7 @@  vect_compute_data_ref_alignment (struct
 	  misalign = STMT_VINFO_DR_INIT (stmt_info);
 	  aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
 	  base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
+	  base_alignment = STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info);
         }
       else
 	{
@@ -738,40 +742,6 @@  vect_compute_data_ref_alignment (struct
 	}
     }
 
-  /* To look at alignment of the base we have to preserve an inner MEM_REF
-     as that carries alignment information of the actual access.  */
-  base = ref;
-  while (handled_component_p (base))
-    base = TREE_OPERAND (base, 0);
-  unsigned int base_alignment = 0;
-  unsigned HOST_WIDE_INT base_bitpos;
-  get_object_alignment_1 (base, &base_alignment, &base_bitpos);
-  /* As data-ref analysis strips the MEM_REF down to its base operand
-     to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
-     adjust things to make base_alignment valid as the alignment of
-     DR_BASE_ADDRESS.  */
-  if (TREE_CODE (base) == MEM_REF)
-    {
-      /* Note all this only works if DR_BASE_ADDRESS is the same as
-	 MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
-	 in other offsets.  We need to rework DR to compute the alingment
-	 of DR_BASE_ADDRESS as long as all information is still available.  */
-      if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
-	{
-	  base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;
-	  base_bitpos &= (base_alignment - 1);
-	}
-      else
-	base_bitpos = BITS_PER_UNIT;
-    }
-  if (base_bitpos != 0)
-    base_alignment = base_bitpos & -base_bitpos;
-  /* Also look at the alignment of the base address DR analysis
-     computed.  */
-  unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
-  if (base_addr_alignment > base_alignment)
-    base_alignment = base_addr_alignment;
-
   if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
     DR_VECT_AUX (dr)->base_element_aligned = true;
 
@@ -3560,100 +3530,35 @@  vect_analyze_data_refs (vec_info *vinfo,
 	 the outer-loop.  */
       if (loop && nested_in_vect_loop_p (loop, stmt))
 	{
-	  tree outer_step, outer_base, outer_init;
-	  HOST_WIDE_INT pbitsize, pbitpos;
-	  tree poffset;
-	  machine_mode pmode;
-	  int punsignedp, preversep, pvolatilep;
-	  affine_iv base_iv, offset_iv;
-	  tree dinit;
-
 	  /* Build a reference to the first location accessed by the
-	     inner-loop: *(BASE+INIT).  (The first location is actually
-	     BASE+INIT+OFFSET, but we add OFFSET separately later).  */
-          tree inner_base = build_fold_indirect_ref
-                                (fold_build_pointer_plus (base, init));
+	     inner loop: *(BASE + INIT + OFFSET).  By construction,
+	     this address must be invariant in the inner loop, so we
+	     can consider it as being used in the outer loop.  */
+	  tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
+					  init, offset);
+	  tree init_addr = fold_build_pointer_plus (base, init_offset);
+	  tree init_ref = build_fold_indirect_ref (init_addr);
 
 	  if (dump_enabled_p ())
 	    {
 	      dump_printf_loc (MSG_NOTE, vect_location,
-                               "analyze in outer-loop: ");
-	      dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
+                               "analyze in outer loop: ");
+	      dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
 	      dump_printf (MSG_NOTE, "\n");
 	    }
 
-	  outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
-					    &poffset, &pmode, &punsignedp,
-					    &preversep, &pvolatilep);
-	  gcc_assert (outer_base != NULL_TREE);
-
-	  if (pbitpos % BITS_PER_UNIT != 0)
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "failed: bit offset alignment.\n");
-	      return false;
-	    }
-
-	  if (preversep)
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "failed: reverse storage order.\n");
-	      return false;
-	    }
-
-	  outer_base = build_fold_addr_expr (outer_base);
-	  if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
-                          &base_iv, false))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "failed: evolution of base is not affine.\n");
-	      return false;
-	    }
-
-	  if (offset)
-	    {
-	      if (poffset)
-		poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
-                                       poffset);
-	      else
-		poffset = offset;
-	    }
-
-	  if (!poffset)
-	    {
-	      offset_iv.base = ssize_int (0);
-	      offset_iv.step = ssize_int (0);
-	    }
-	  else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
-                               &offset_iv, false))
-	    {
-	      if (dump_enabled_p ())
-	        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "evolution of offset is not affine.\n");
-	      return false;
-	    }
+	  data_reference init_dr;
+	  if (!dr_analyze_innermost (&init_dr, init_ref, loop))
+	    /* dr_analyze_innermost already explained the faiure.  */
+	    return false;
 
-	  outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
-	  split_constant_offset (base_iv.base, &base_iv.base, &dinit);
-	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
-	  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
-	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
-
-	  outer_step = size_binop (PLUS_EXPR,
-				fold_convert (ssizetype, base_iv.step),
-				fold_convert (ssizetype, offset_iv.step));
-
-	  STMT_VINFO_DR_STEP (stmt_info) = outer_step;
-	  /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
-	  STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
-	  STMT_VINFO_DR_INIT (stmt_info) = outer_init;
-	  STMT_VINFO_DR_OFFSET (stmt_info) =
-				fold_convert (ssizetype, offset_iv.base);
-	  STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
-				size_int (highest_pow2_factor (offset_iv.base));
+	  STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = DR_BASE_ADDRESS (&init_dr);
+	  STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info)
+	    = DR_BASE_ALIGNMENT (&init_dr);
+	  STMT_VINFO_DR_STEP (stmt_info) = DR_STEP (&init_dr);
+	  STMT_VINFO_DR_INIT (stmt_info) = DR_INIT (&init_dr);
+	  STMT_VINFO_DR_OFFSET (stmt_info) = DR_OFFSET (&init_dr);
+	  STMT_VINFO_DR_ALIGNED_TO (stmt_info) = DR_ALIGNED_TO (&init_dr);
 
           if (dump_enabled_p ())
 	    {