diff mbox series

[5/7] Add DR_BASE_ALIGNMENT and DR_BASE_MISALIGNMENT

Message ID 87o9t2x95f.fsf@linaro.org
State New
Headers show
Series None | expand

Commit Message

Richard Sandiford July 3, 2017, 7:38 a.m. UTC
This patch records the base alignment and misalignment in
innermost_loop_behavior, to avoid the second-guessing that was
previously done in vect_compute_data_ref_alignment.  It also makes
vect_analyze_data_refs use dr_analyze_innermost, instead of having an
almost-copy of the same code.

I wasn't sure whether the alignments should be measured in bits
(for consistency with most other interfaces) or in bytes (for consistency
with DR_ALIGNED_TO, now DR_OFFSET_ALIGNMENT, and with *_ptr_info_alignment).
I went for bytes because:

- I think in practice most consumers are going to want bytes.
  E.g. using bytes avoids having to mix TYPE_ALIGN and TYPE_ALIGN_UNIT
  in vect_compute_data_ref_alignment.

- It means that any bit-level paranoia is dealt with when building
  the innermost_loop_behavior and doesn't get pushed down to consumers.

Tested an aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2017-07-03  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* tree-data-ref.h (innermost_loop_behavior): Add base_alignment
	and base_misalignment fields.
	(DR_BASE_ALIGNMENT, DR_BASE_MISALIGNMENT): New macros.
	* tree-data-ref.c: Include builtins.h.
	(dr_analyze_innermost): Set up the new innmost_loop_behavior fields.
	* tree-vectorizer.h (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
	(STMT_VINFO_DR_BASE_MISALIGNMENT): Likewise.
	* tree-vect-data-refs.c: Include tree-cfg.h.
	(vect_compute_data_ref_alignment): Use the new innermost_loop_behavior
	fields instead of calculating an alignment here.
	(vect_analyze_data_refs): Use dr_analyze_innermost.  Dump the new
	innermost_loop_behavior fields.

Comments

Richard Biener July 3, 2017, 10:41 a.m. UTC | #1
On Mon, Jul 3, 2017 at 9:38 AM, Richard Sandiford
<richard.sandiford@linaro.org> wrote:
> This patch records the base alignment and misalignment in

> innermost_loop_behavior, to avoid the second-guessing that was

> previously done in vect_compute_data_ref_alignment.  It also makes

> vect_analyze_data_refs use dr_analyze_innermost, instead of having an

> almost-copy of the same code.

>

> I wasn't sure whether the alignments should be measured in bits

> (for consistency with most other interfaces) or in bytes (for consistency

> with DR_ALIGNED_TO, now DR_OFFSET_ALIGNMENT, and with *_ptr_info_alignment).

> I went for bytes because:

>

> - I think in practice most consumers are going to want bytes.

>   E.g. using bytes avoids having to mix TYPE_ALIGN and TYPE_ALIGN_UNIT

>   in vect_compute_data_ref_alignment.

>

> - It means that any bit-level paranoia is dealt with when building

>   the innermost_loop_behavior and doesn't get pushed down to consumers.

>

> Tested an aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?


Ok.

Thanks,
Richard.

> Richard

>

>

> 2017-07-03  Richard Sandiford  <richard.sandiford@linaro.org>

>

> gcc/

>         * tree-data-ref.h (innermost_loop_behavior): Add base_alignment

>         and base_misalignment fields.

>         (DR_BASE_ALIGNMENT, DR_BASE_MISALIGNMENT): New macros.

>         * tree-data-ref.c: Include builtins.h.

>         (dr_analyze_innermost): Set up the new innmost_loop_behavior fields.

>         * tree-vectorizer.h (STMT_VINFO_DR_BASE_ALIGNMENT): New macro.

>         (STMT_VINFO_DR_BASE_MISALIGNMENT): Likewise.

>         * tree-vect-data-refs.c: Include tree-cfg.h.

>         (vect_compute_data_ref_alignment): Use the new innermost_loop_behavior

>         fields instead of calculating an alignment here.

>         (vect_analyze_data_refs): Use dr_analyze_innermost.  Dump the new

>         innermost_loop_behavior fields.

>

> Index: gcc/tree-data-ref.h

> ===================================================================

> --- gcc/tree-data-ref.h 2017-07-03 07:52:14.194782203 +0100

> +++ gcc/tree-data-ref.h 2017-07-03 07:52:55.920272347 +0100

> @@ -52,6 +52,42 @@ struct innermost_loop_behavior

>    tree init;

>    tree step;

>

> +  /* BASE_ADDRESS is known to be misaligned by BASE_MISALIGNMENT bytes

> +     from an alignment boundary of BASE_ALIGNMENT bytes.  For example,

> +     if we had:

> +

> +       struct S __attribute__((aligned(16))) { ... };

> +

> +       char *ptr;

> +       ... *(struct S *) (ptr - 4) ...;

> +

> +     the information would be:

> +

> +       base_address:      ptr

> +       base_aligment:      16

> +       base_misalignment:   4

> +       init:               -4

> +

> +     where init cancels the base misalignment.  If instead we had a

> +     reference to a particular field:

> +

> +       struct S __attribute__((aligned(16))) { ... int f; ... };

> +

> +       char *ptr;

> +       ... ((struct S *) (ptr - 4))->f ...;

> +

> +     the information would be:

> +

> +       base_address:      ptr

> +       base_aligment:      16

> +       base_misalignment:   4

> +       init:               -4 + offsetof (S, f)

> +

> +     where base_address + init might also be misaligned, and by a different

> +     amount from base_address.  */

> +  unsigned int base_alignment;

> +  unsigned int base_misalignment;

> +

>    /* The largest power of two that divides OFFSET, capped to a suitably

>       high value if the offset is zero.  This is a byte rather than a bit

>       quantity.  */

> @@ -147,6 +183,8 @@ #define DR_OFFSET(DR)              (DR)-

>  #define DR_INIT(DR)                (DR)->innermost.init

>  #define DR_STEP(DR)                (DR)->innermost.step

>  #define DR_PTR_INFO(DR)            (DR)->alias.ptr_info

> +#define DR_BASE_ALIGNMENT(DR)      (DR)->innermost.base_alignment

> +#define DR_BASE_MISALIGNMENT(DR)   (DR)->innermost.base_misalignment

>  #define DR_OFFSET_ALIGNMENT(DR)    (DR)->innermost.offset_alignment

>  #define DR_STEP_ALIGNMENT(DR)      (DR)->innermost.step_alignment

>  #define DR_INNERMOST(DR)           (DR)->innermost

> Index: gcc/tree-data-ref.c

> ===================================================================

> --- gcc/tree-data-ref.c 2017-07-03 07:52:14.193782226 +0100

> +++ gcc/tree-data-ref.c 2017-07-03 07:52:55.920272347 +0100

> @@ -94,6 +94,7 @@ Software Foundation; either version 3, o

>  #include "dumpfile.h"

>  #include "tree-affine.h"

>  #include "params.h"

> +#include "builtins.h"

>

>  static struct datadep_stats

>  {

> @@ -802,11 +803,26 @@ dr_analyze_innermost (struct data_refere

>        return false;

>      }

>

> +  /* Calculate the alignment and misalignment for the inner reference.  */

> +  unsigned int HOST_WIDE_INT base_misalignment;

> +  unsigned int base_alignment;

> +  get_object_alignment_1 (base, &base_alignment, &base_misalignment);

> +

> +  /* There are no bitfield references remaining in BASE, so the values

> +     we got back must be whole bytes.  */

> +  gcc_assert (base_alignment % BITS_PER_UNIT == 0

> +             && base_misalignment % BITS_PER_UNIT == 0);

> +  base_alignment /= BITS_PER_UNIT;

> +  base_misalignment /= BITS_PER_UNIT;

> +

>    if (TREE_CODE (base) == MEM_REF)

>      {

>        if (!integer_zerop (TREE_OPERAND (base, 1)))

>         {

> +         /* Subtract MOFF from the base and add it to POFFSET instead.

> +            Adjust the misalignment to reflect the amount we subtracted.  */

>           offset_int moff = mem_ref_offset (base);

> +         base_misalignment -= moff.to_short_addr ();

>           tree mofft = wide_int_to_tree (sizetype, moff);

>           if (!poffset)

>             poffset = mofft;

> @@ -855,20 +871,46 @@ dr_analyze_innermost (struct data_refere

>      }

>

>    init = ssize_int (pbitpos / BITS_PER_UNIT);

> +

> +  /* Subtract any constant component from the base and add it to INIT instead.

> +     Adjust the misalignment to reflect the amount we subtracted.  */

>    split_constant_offset (base_iv.base, &base_iv.base, &dinit);

> -  init =  size_binop (PLUS_EXPR, init, dinit);

> +  init = size_binop (PLUS_EXPR, init, dinit);

> +  base_misalignment -= TREE_INT_CST_LOW (dinit);

> +

>    split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);

> -  init =  size_binop (PLUS_EXPR, init, dinit);

> +  init = size_binop (PLUS_EXPR, init, dinit);

>

>    step = size_binop (PLUS_EXPR,

>                      fold_convert (ssizetype, base_iv.step),

>                      fold_convert (ssizetype, offset_iv.step));

>

> -  drb->base_address = canonicalize_base_object_address (base_iv.base);

> +  base = canonicalize_base_object_address (base_iv.base);

> +

> +  /* See if get_pointer_alignment can guarantee a higher alignment than

> +     the one we calculated above.  */

> +  unsigned int HOST_WIDE_INT alt_misalignment;

> +  unsigned int alt_alignment;

> +  get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);

> +

> +  /* As above, these values must be whole bytes.  */

> +  gcc_assert (alt_alignment % BITS_PER_UNIT == 0

> +             && alt_misalignment % BITS_PER_UNIT == 0);

> +  alt_alignment /= BITS_PER_UNIT;

> +  alt_misalignment /= BITS_PER_UNIT;

> +

> +  if (base_alignment < alt_alignment)

> +    {

> +      base_alignment = alt_alignment;

> +      base_misalignment = alt_misalignment;

> +    }

>

> +  drb->base_address = base;

>    drb->offset = fold_convert (ssizetype, offset_iv.base);

>    drb->init = init;

>    drb->step = step;

> +  drb->base_alignment = base_alignment;

> +  drb->base_misalignment = base_misalignment & (base_alignment - 1);

>    drb->offset_alignment = highest_pow2_factor (offset_iv.base);

>    drb->step_alignment = highest_pow2_factor (step);

>

> @@ -1084,6 +1126,9 @@ create_data_ref (loop_p nest, loop_p loo

>        print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);

>        fprintf (dump_file, "\n\tstep: ");

>        print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);

> +      fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));

> +      fprintf (dump_file, "\n\tbase misalignment: %d",

> +              DR_BASE_MISALIGNMENT (dr));

>        fprintf (dump_file, "\n\toffset alignment: %d",

>                DR_OFFSET_ALIGNMENT (dr));

>        fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));

> Index: gcc/tree-vectorizer.h

> ===================================================================

> --- gcc/tree-vectorizer.h       2017-07-03 07:52:14.196782157 +0100

> +++ gcc/tree-vectorizer.h       2017-07-03 07:52:55.921272300 +0100

> @@ -707,6 +707,9 @@ #define STMT_VINFO_DR_BASE_ADDRESS(S)

>  #define STMT_VINFO_DR_INIT(S)              (S)->dr_wrt_vec_loop.init

>  #define STMT_VINFO_DR_OFFSET(S)            (S)->dr_wrt_vec_loop.offset

>  #define STMT_VINFO_DR_STEP(S)              (S)->dr_wrt_vec_loop.step

> +#define STMT_VINFO_DR_BASE_ALIGNMENT(S)    (S)->dr_wrt_vec_loop.base_alignment

> +#define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \

> +  (S)->dr_wrt_vec_loop.base_misalignment

>  #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \

>    (S)->dr_wrt_vec_loop.offset_alignment

>  #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \

> Index: gcc/tree-vect-data-refs.c

> ===================================================================

> --- gcc/tree-vect-data-refs.c   2017-07-03 07:52:14.194782203 +0100

> +++ gcc/tree-vect-data-refs.c   2017-07-03 07:52:55.921272300 +0100

> @@ -50,6 +50,7 @@ Software Foundation; either version 3, o

>  #include "expr.h"

>  #include "builtins.h"

>  #include "params.h"

> +#include "tree-cfg.h"

>

>  /* Return true if load- or store-lanes optab OPTAB is implemented for

>     COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */

> @@ -667,8 +668,6 @@ vect_compute_data_ref_alignment (struct

>    struct loop *loop = NULL;

>    tree ref = DR_REF (dr);

>    tree vectype = STMT_VINFO_VECTYPE (stmt_info);

> -  tree base;

> -  unsigned HOST_WIDE_INT alignment;

>

>    if (dump_enabled_p ())

>      dump_printf_loc (MSG_NOTE, vect_location,

> @@ -728,48 +727,18 @@ vect_compute_data_ref_alignment (struct

>         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

>                          "step doesn't divide the vector-size.\n");

>      }

> -  tree base_addr = drb->base_address;

>

> -  /* To look at alignment of the base we have to preserve an inner MEM_REF

> -     as that carries alignment information of the actual access.  */

> -  base = ref;

> -  while (handled_component_p (base))

> -    base = TREE_OPERAND (base, 0);

> -  unsigned int base_alignment = 0;

> -  unsigned HOST_WIDE_INT base_bitpos;

> -  get_object_alignment_1 (base, &base_alignment, &base_bitpos);

> -  /* As data-ref analysis strips the MEM_REF down to its base operand

> -     to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to

> -     adjust things to make base_alignment valid as the alignment of

> -     DR_BASE_ADDRESS.  */

> -  if (TREE_CODE (base) == MEM_REF)

> -    {

> -      /* Note all this only works if DR_BASE_ADDRESS is the same as

> -        MEM_REF operand zero, otherwise DR/SCEV analysis might have factored

> -        in other offsets.  We need to rework DR to compute the alingment

> -        of DR_BASE_ADDRESS as long as all information is still available.  */

> -      if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))

> -       {

> -         base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;

> -         base_bitpos &= (base_alignment - 1);

> -       }

> -      else

> -       base_bitpos = BITS_PER_UNIT;

> -    }

> -  if (base_bitpos != 0)

> -    base_alignment = base_bitpos & -base_bitpos;

> -  /* Also look at the alignment of the base address DR analysis

> -     computed.  */

> -  unsigned int base_addr_alignment = get_pointer_alignment (base_addr);

> -  if (base_addr_alignment > base_alignment)

> -    base_alignment = base_addr_alignment;

> +  unsigned int base_alignment = drb->base_alignment;

> +  unsigned int base_misalignment = drb->base_misalignment;

> +  unsigned HOST_WIDE_INT vector_alignment = TYPE_ALIGN_UNIT (vectype);

> +  unsigned HOST_WIDE_INT element_alignment

> +    = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));

>

> -  if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))

> +  if (base_alignment >= element_alignment

> +      && (base_misalignment & (element_alignment - 1)) == 0)

>      DR_VECT_AUX (dr)->base_element_aligned = true;

>

> -  alignment = TYPE_ALIGN_UNIT (vectype);

> -

> -  if (drb->offset_alignment < alignment

> +  if (drb->offset_alignment < vector_alignment

>        || !step_preserves_misalignment_p

>        /* We need to know whether the step wrt the vectorized loop is

>          negative when computing the starting misalignment below.  */

> @@ -785,12 +754,13 @@ vect_compute_data_ref_alignment (struct

>        return true;

>      }

>

> -  if (base_alignment < TYPE_ALIGN (vectype))

> +  if (base_alignment < vector_alignment)

>      {

> -      base = base_addr;

> +      tree base = drb->base_address;

>        if (TREE_CODE (base) == ADDR_EXPR)

>         base = TREE_OPERAND (base, 0);

> -      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))

> +      if (!vect_can_force_dr_alignment_p (base,

> +                                         vector_alignment * BITS_PER_UNIT))

>         {

>           if (dump_enabled_p ())

>             {

> @@ -828,24 +798,20 @@ vect_compute_data_ref_alignment (struct

>        DR_VECT_AUX (dr)->base_decl = base;

>        DR_VECT_AUX (dr)->base_misaligned = true;

>        DR_VECT_AUX (dr)->base_element_aligned = true;

> +      base_misalignment = 0;

>      }

> +  unsigned int misalignment = (base_misalignment

> +                              + TREE_INT_CST_LOW (drb->init));

>

>    /* If this is a backward running DR then first access in the larger

>       vectype actually is N-1 elements before the address in the DR.

>       Adjust misalign accordingly.  */

> -  tree misalign = drb->init;

>    if (tree_int_cst_sgn (drb->step) < 0)

> -    {

> -      tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);

> -      /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,

> -        otherwise we wouldn't be here.  */

> -      offset = fold_build2 (MULT_EXPR, ssizetype, offset, drb->step);

> -      /* PLUS because STEP was negative.  */

> -      misalign = size_binop (PLUS_EXPR, misalign, offset);

> -    }

> +    /* PLUS because STEP is negative.  */

> +    misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)

> +                    * TREE_INT_CST_LOW (drb->step));

>

> -  SET_DR_MISALIGNMENT (dr,

> -                      wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ());

> +  SET_DR_MISALIGNMENT (dr, misalignment & (vector_alignment - 1));

>

>    if (dump_enabled_p ())

>      {

> @@ -3554,100 +3520,27 @@ vect_analyze_data_refs (vec_info *vinfo,

>          the outer-loop.  */

>        if (loop && nested_in_vect_loop_p (loop, stmt))

>         {

> -         tree outer_step, outer_base, outer_init;

> -         HOST_WIDE_INT pbitsize, pbitpos;

> -         tree poffset;

> -         machine_mode pmode;

> -         int punsignedp, preversep, pvolatilep;

> -         affine_iv base_iv, offset_iv;

> -         tree dinit;

> -

>           /* Build a reference to the first location accessed by the

> -            inner-loop: *(BASE+INIT).  (The first location is actually

> -            BASE+INIT+OFFSET, but we add OFFSET separately later).  */

> -          tree inner_base = build_fold_indirect_ref

> -                                (fold_build_pointer_plus (base, init));

> +            inner loop: *(BASE + INIT + OFFSET).  By construction,

> +            this address must be invariant in the inner loop, so we

> +            can consider it as being used in the outer loop.  */

> +         tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),

> +                                         init, offset);

> +         tree init_addr = fold_build_pointer_plus (base, init_offset);

> +         tree init_ref = build_fold_indirect_ref (init_addr);

>

>           if (dump_enabled_p ())

>             {

>               dump_printf_loc (MSG_NOTE, vect_location,

> -                               "analyze in outer-loop: ");

> -             dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);

> +                               "analyze in outer loop: ");

> +             dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);

>               dump_printf (MSG_NOTE, "\n");

>             }

>

> -         outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,

> -                                           &poffset, &pmode, &punsignedp,

> -                                           &preversep, &pvolatilep);

> -         gcc_assert (outer_base != NULL_TREE);

> -

> -         if (pbitpos % BITS_PER_UNIT != 0)

> -           {

> -             if (dump_enabled_p ())

> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> -                                 "failed: bit offset alignment.\n");

> -             return false;

> -           }

> -

> -         if (preversep)

> -           {

> -             if (dump_enabled_p ())

> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> -                                "failed: reverse storage order.\n");

> -             return false;

> -           }

> -

> -         outer_base = build_fold_addr_expr (outer_base);

> -         if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,

> -                          &base_iv, false))

> -           {

> -             if (dump_enabled_p ())

> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> -                                 "failed: evolution of base is not affine.\n");

> -             return false;

> -           }

> -

> -         if (offset)

> -           {

> -             if (poffset)

> -               poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,

> -                                       poffset);

> -             else

> -               poffset = offset;

> -           }

> -

> -         if (!poffset)

> -           {

> -             offset_iv.base = ssize_int (0);

> -             offset_iv.step = ssize_int (0);

> -           }

> -         else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,

> -                               &offset_iv, false))

> -           {

> -             if (dump_enabled_p ())

> -               dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,

> -                                 "evolution of offset is not affine.\n");

> -             return false;

> -           }

> -

> -         outer_init = ssize_int (pbitpos / BITS_PER_UNIT);

> -         split_constant_offset (base_iv.base, &base_iv.base, &dinit);

> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);

> -         split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);

> -         outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);

> -

> -         outer_step = size_binop (PLUS_EXPR,

> -                               fold_convert (ssizetype, base_iv.step),

> -                               fold_convert (ssizetype, offset_iv.step));

> -

> -         STMT_VINFO_DR_STEP (stmt_info) = outer_step;

> -         /* FIXME: Use canonicalize_base_object_address (base_iv.base); */

> -         STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;

> -         STMT_VINFO_DR_INIT (stmt_info) = outer_init;

> -         STMT_VINFO_DR_OFFSET (stmt_info) =

> -                               fold_convert (ssizetype, offset_iv.base);

> -         STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info)

> -           = highest_pow2_factor (offset_iv.base);

> +         if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),

> +                                    init_ref, loop))

> +           /* dr_analyze_innermost already explained the failure.  */

> +           return false;

>

>            if (dump_enabled_p ())

>             {

> @@ -3665,6 +3558,10 @@ vect_analyze_data_refs (vec_info *vinfo,

>               dump_printf (MSG_NOTE, "\n\touter step: ");

>               dump_generic_expr (MSG_NOTE, TDF_SLIM,

>                                   STMT_VINFO_DR_STEP (stmt_info));

> +             dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",

> +                          STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));

> +             dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",

> +                          STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));

>               dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",

>                            STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));

>               dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",
diff mbox series

Patch

Index: gcc/tree-data-ref.h
===================================================================
--- gcc/tree-data-ref.h	2017-07-03 07:52:14.194782203 +0100
+++ gcc/tree-data-ref.h	2017-07-03 07:52:55.920272347 +0100
@@ -52,6 +52,42 @@  struct innermost_loop_behavior
   tree init;
   tree step;
 
+  /* BASE_ADDRESS is known to be misaligned by BASE_MISALIGNMENT bytes
+     from an alignment boundary of BASE_ALIGNMENT bytes.  For example,
+     if we had:
+
+       struct S __attribute__((aligned(16))) { ... };
+
+       char *ptr;
+       ... *(struct S *) (ptr - 4) ...;
+
+     the information would be:
+
+       base_address:      ptr
+       base_aligment:      16
+       base_misalignment:   4
+       init:               -4
+
+     where init cancels the base misalignment.  If instead we had a
+     reference to a particular field:
+
+       struct S __attribute__((aligned(16))) { ... int f; ... };
+
+       char *ptr;
+       ... ((struct S *) (ptr - 4))->f ...;
+
+     the information would be:
+
+       base_address:      ptr
+       base_aligment:      16
+       base_misalignment:   4
+       init:               -4 + offsetof (S, f)
+
+     where base_address + init might also be misaligned, and by a different
+     amount from base_address.  */
+  unsigned int base_alignment;
+  unsigned int base_misalignment;
+
   /* The largest power of two that divides OFFSET, capped to a suitably
      high value if the offset is zero.  This is a byte rather than a bit
      quantity.  */
@@ -147,6 +183,8 @@  #define DR_OFFSET(DR)              (DR)-
 #define DR_INIT(DR)                (DR)->innermost.init
 #define DR_STEP(DR)                (DR)->innermost.step
 #define DR_PTR_INFO(DR)            (DR)->alias.ptr_info
+#define DR_BASE_ALIGNMENT(DR)      (DR)->innermost.base_alignment
+#define DR_BASE_MISALIGNMENT(DR)   (DR)->innermost.base_misalignment
 #define DR_OFFSET_ALIGNMENT(DR)    (DR)->innermost.offset_alignment
 #define DR_STEP_ALIGNMENT(DR)      (DR)->innermost.step_alignment
 #define DR_INNERMOST(DR)           (DR)->innermost
Index: gcc/tree-data-ref.c
===================================================================
--- gcc/tree-data-ref.c	2017-07-03 07:52:14.193782226 +0100
+++ gcc/tree-data-ref.c	2017-07-03 07:52:55.920272347 +0100
@@ -94,6 +94,7 @@  Software Foundation; either version 3, o
 #include "dumpfile.h"
 #include "tree-affine.h"
 #include "params.h"
+#include "builtins.h"
 
 static struct datadep_stats
 {
@@ -802,11 +803,26 @@  dr_analyze_innermost (struct data_refere
       return false;
     }
 
+  /* Calculate the alignment and misalignment for the inner reference.  */
+  unsigned int HOST_WIDE_INT base_misalignment;
+  unsigned int base_alignment;
+  get_object_alignment_1 (base, &base_alignment, &base_misalignment);
+
+  /* There are no bitfield references remaining in BASE, so the values
+     we got back must be whole bytes.  */
+  gcc_assert (base_alignment % BITS_PER_UNIT == 0
+	      && base_misalignment % BITS_PER_UNIT == 0);
+  base_alignment /= BITS_PER_UNIT;
+  base_misalignment /= BITS_PER_UNIT;
+
   if (TREE_CODE (base) == MEM_REF)
     {
       if (!integer_zerop (TREE_OPERAND (base, 1)))
 	{
+	  /* Subtract MOFF from the base and add it to POFFSET instead.
+	     Adjust the misalignment to reflect the amount we subtracted.  */
 	  offset_int moff = mem_ref_offset (base);
+	  base_misalignment -= moff.to_short_addr ();
 	  tree mofft = wide_int_to_tree (sizetype, moff);
 	  if (!poffset)
 	    poffset = mofft;
@@ -855,20 +871,46 @@  dr_analyze_innermost (struct data_refere
     }
 
   init = ssize_int (pbitpos / BITS_PER_UNIT);
+
+  /* Subtract any constant component from the base and add it to INIT instead.
+     Adjust the misalignment to reflect the amount we subtracted.  */
   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
-  init =  size_binop (PLUS_EXPR, init, dinit);
+  init = size_binop (PLUS_EXPR, init, dinit);
+  base_misalignment -= TREE_INT_CST_LOW (dinit);
+
   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
-  init =  size_binop (PLUS_EXPR, init, dinit);
+  init = size_binop (PLUS_EXPR, init, dinit);
 
   step = size_binop (PLUS_EXPR,
 		     fold_convert (ssizetype, base_iv.step),
 		     fold_convert (ssizetype, offset_iv.step));
 
-  drb->base_address = canonicalize_base_object_address (base_iv.base);
+  base = canonicalize_base_object_address (base_iv.base);
+
+  /* See if get_pointer_alignment can guarantee a higher alignment than
+     the one we calculated above.  */
+  unsigned int HOST_WIDE_INT alt_misalignment;
+  unsigned int alt_alignment;
+  get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
+
+  /* As above, these values must be whole bytes.  */
+  gcc_assert (alt_alignment % BITS_PER_UNIT == 0
+	      && alt_misalignment % BITS_PER_UNIT == 0);
+  alt_alignment /= BITS_PER_UNIT;
+  alt_misalignment /= BITS_PER_UNIT;
+
+  if (base_alignment < alt_alignment)
+    {
+      base_alignment = alt_alignment;
+      base_misalignment = alt_misalignment;
+    }
 
+  drb->base_address = base;
   drb->offset = fold_convert (ssizetype, offset_iv.base);
   drb->init = init;
   drb->step = step;
+  drb->base_alignment = base_alignment;
+  drb->base_misalignment = base_misalignment & (base_alignment - 1);
   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
   drb->step_alignment = highest_pow2_factor (step);
 
@@ -1084,6 +1126,9 @@  create_data_ref (loop_p nest, loop_p loo
       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
       fprintf (dump_file, "\n\tstep: ");
       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
+      fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
+      fprintf (dump_file, "\n\tbase misalignment: %d",
+	       DR_BASE_MISALIGNMENT (dr));
       fprintf (dump_file, "\n\toffset alignment: %d",
 	       DR_OFFSET_ALIGNMENT (dr));
       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
Index: gcc/tree-vectorizer.h
===================================================================
--- gcc/tree-vectorizer.h	2017-07-03 07:52:14.196782157 +0100
+++ gcc/tree-vectorizer.h	2017-07-03 07:52:55.921272300 +0100
@@ -707,6 +707,9 @@  #define STMT_VINFO_DR_BASE_ADDRESS(S)
 #define STMT_VINFO_DR_INIT(S)              (S)->dr_wrt_vec_loop.init
 #define STMT_VINFO_DR_OFFSET(S)            (S)->dr_wrt_vec_loop.offset
 #define STMT_VINFO_DR_STEP(S)              (S)->dr_wrt_vec_loop.step
+#define STMT_VINFO_DR_BASE_ALIGNMENT(S)    (S)->dr_wrt_vec_loop.base_alignment
+#define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \
+  (S)->dr_wrt_vec_loop.base_misalignment
 #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
   (S)->dr_wrt_vec_loop.offset_alignment
 #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
Index: gcc/tree-vect-data-refs.c
===================================================================
--- gcc/tree-vect-data-refs.c	2017-07-03 07:52:14.194782203 +0100
+++ gcc/tree-vect-data-refs.c	2017-07-03 07:52:55.921272300 +0100
@@ -50,6 +50,7 @@  Software Foundation; either version 3, o
 #include "expr.h"
 #include "builtins.h"
 #include "params.h"
+#include "tree-cfg.h"
 
 /* Return true if load- or store-lanes optab OPTAB is implemented for
    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
@@ -667,8 +668,6 @@  vect_compute_data_ref_alignment (struct
   struct loop *loop = NULL;
   tree ref = DR_REF (dr);
   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
-  tree base;
-  unsigned HOST_WIDE_INT alignment;
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
@@ -728,48 +727,18 @@  vect_compute_data_ref_alignment (struct
 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 			 "step doesn't divide the vector-size.\n");
     }
-  tree base_addr = drb->base_address;
 
-  /* To look at alignment of the base we have to preserve an inner MEM_REF
-     as that carries alignment information of the actual access.  */
-  base = ref;
-  while (handled_component_p (base))
-    base = TREE_OPERAND (base, 0);
-  unsigned int base_alignment = 0;
-  unsigned HOST_WIDE_INT base_bitpos;
-  get_object_alignment_1 (base, &base_alignment, &base_bitpos);
-  /* As data-ref analysis strips the MEM_REF down to its base operand
-     to form DR_BASE_ADDRESS and adds the offset to DR_INIT we have to
-     adjust things to make base_alignment valid as the alignment of
-     DR_BASE_ADDRESS.  */
-  if (TREE_CODE (base) == MEM_REF)
-    {
-      /* Note all this only works if DR_BASE_ADDRESS is the same as
-	 MEM_REF operand zero, otherwise DR/SCEV analysis might have factored
-	 in other offsets.  We need to rework DR to compute the alingment
-	 of DR_BASE_ADDRESS as long as all information is still available.  */
-      if (operand_equal_p (TREE_OPERAND (base, 0), base_addr, 0))
-	{
-	  base_bitpos -= mem_ref_offset (base).to_short_addr () * BITS_PER_UNIT;
-	  base_bitpos &= (base_alignment - 1);
-	}
-      else
-	base_bitpos = BITS_PER_UNIT;
-    }
-  if (base_bitpos != 0)
-    base_alignment = base_bitpos & -base_bitpos;
-  /* Also look at the alignment of the base address DR analysis
-     computed.  */
-  unsigned int base_addr_alignment = get_pointer_alignment (base_addr);
-  if (base_addr_alignment > base_alignment)
-    base_alignment = base_addr_alignment;
+  unsigned int base_alignment = drb->base_alignment;
+  unsigned int base_misalignment = drb->base_misalignment;
+  unsigned HOST_WIDE_INT vector_alignment = TYPE_ALIGN_UNIT (vectype);
+  unsigned HOST_WIDE_INT element_alignment
+    = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
 
-  if (base_alignment >= TYPE_ALIGN (TREE_TYPE (vectype)))
+  if (base_alignment >= element_alignment
+      && (base_misalignment & (element_alignment - 1)) == 0)
     DR_VECT_AUX (dr)->base_element_aligned = true;
 
-  alignment = TYPE_ALIGN_UNIT (vectype);
-
-  if (drb->offset_alignment < alignment
+  if (drb->offset_alignment < vector_alignment
       || !step_preserves_misalignment_p
       /* We need to know whether the step wrt the vectorized loop is
 	 negative when computing the starting misalignment below.  */
@@ -785,12 +754,13 @@  vect_compute_data_ref_alignment (struct
       return true;
     }
 
-  if (base_alignment < TYPE_ALIGN (vectype))
+  if (base_alignment < vector_alignment)
     {
-      base = base_addr;
+      tree base = drb->base_address;
       if (TREE_CODE (base) == ADDR_EXPR)
 	base = TREE_OPERAND (base, 0);
-      if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
+      if (!vect_can_force_dr_alignment_p (base,
+					  vector_alignment * BITS_PER_UNIT))
 	{
 	  if (dump_enabled_p ())
 	    {
@@ -828,24 +798,20 @@  vect_compute_data_ref_alignment (struct
       DR_VECT_AUX (dr)->base_decl = base;
       DR_VECT_AUX (dr)->base_misaligned = true;
       DR_VECT_AUX (dr)->base_element_aligned = true;
+      base_misalignment = 0;
     }
+  unsigned int misalignment = (base_misalignment
+			       + TREE_INT_CST_LOW (drb->init));
 
   /* If this is a backward running DR then first access in the larger
      vectype actually is N-1 elements before the address in the DR.
      Adjust misalign accordingly.  */
-  tree misalign = drb->init;
   if (tree_int_cst_sgn (drb->step) < 0)
-    {
-      tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
-      /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
-	 otherwise we wouldn't be here.  */
-      offset = fold_build2 (MULT_EXPR, ssizetype, offset, drb->step);
-      /* PLUS because STEP was negative.  */
-      misalign = size_binop (PLUS_EXPR, misalign, offset);
-    }
+    /* PLUS because STEP is negative.  */
+    misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
+		     * TREE_INT_CST_LOW (drb->step));
 
-  SET_DR_MISALIGNMENT (dr,
-		       wi::mod_floor (misalign, alignment, SIGNED).to_uhwi ());
+  SET_DR_MISALIGNMENT (dr, misalignment & (vector_alignment - 1));
 
   if (dump_enabled_p ())
     {
@@ -3554,100 +3520,27 @@  vect_analyze_data_refs (vec_info *vinfo,
 	 the outer-loop.  */
       if (loop && nested_in_vect_loop_p (loop, stmt))
 	{
-	  tree outer_step, outer_base, outer_init;
-	  HOST_WIDE_INT pbitsize, pbitpos;
-	  tree poffset;
-	  machine_mode pmode;
-	  int punsignedp, preversep, pvolatilep;
-	  affine_iv base_iv, offset_iv;
-	  tree dinit;
-
 	  /* Build a reference to the first location accessed by the
-	     inner-loop: *(BASE+INIT).  (The first location is actually
-	     BASE+INIT+OFFSET, but we add OFFSET separately later).  */
-          tree inner_base = build_fold_indirect_ref
-                                (fold_build_pointer_plus (base, init));
+	     inner loop: *(BASE + INIT + OFFSET).  By construction,
+	     this address must be invariant in the inner loop, so we
+	     can consider it as being used in the outer loop.  */
+	  tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
+					  init, offset);
+	  tree init_addr = fold_build_pointer_plus (base, init_offset);
+	  tree init_ref = build_fold_indirect_ref (init_addr);
 
 	  if (dump_enabled_p ())
 	    {
 	      dump_printf_loc (MSG_NOTE, vect_location,
-                               "analyze in outer-loop: ");
-	      dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
+                               "analyze in outer loop: ");
+	      dump_generic_expr (MSG_NOTE, TDF_SLIM, init_ref);
 	      dump_printf (MSG_NOTE, "\n");
 	    }
 
-	  outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
-					    &poffset, &pmode, &punsignedp,
-					    &preversep, &pvolatilep);
-	  gcc_assert (outer_base != NULL_TREE);
-
-	  if (pbitpos % BITS_PER_UNIT != 0)
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "failed: bit offset alignment.\n");
-	      return false;
-	    }
-
-	  if (preversep)
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-				 "failed: reverse storage order.\n");
-	      return false;
-	    }
-
-	  outer_base = build_fold_addr_expr (outer_base);
-	  if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
-                          &base_iv, false))
-	    {
-	      if (dump_enabled_p ())
-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "failed: evolution of base is not affine.\n");
-	      return false;
-	    }
-
-	  if (offset)
-	    {
-	      if (poffset)
-		poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
-                                       poffset);
-	      else
-		poffset = offset;
-	    }
-
-	  if (!poffset)
-	    {
-	      offset_iv.base = ssize_int (0);
-	      offset_iv.step = ssize_int (0);
-	    }
-	  else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
-                               &offset_iv, false))
-	    {
-	      if (dump_enabled_p ())
-	        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-                                 "evolution of offset is not affine.\n");
-	      return false;
-	    }
-
-	  outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
-	  split_constant_offset (base_iv.base, &base_iv.base, &dinit);
-	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
-	  split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
-	  outer_init =  size_binop (PLUS_EXPR, outer_init, dinit);
-
-	  outer_step = size_binop (PLUS_EXPR,
-				fold_convert (ssizetype, base_iv.step),
-				fold_convert (ssizetype, offset_iv.step));
-
-	  STMT_VINFO_DR_STEP (stmt_info) = outer_step;
-	  /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
-	  STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
-	  STMT_VINFO_DR_INIT (stmt_info) = outer_init;
-	  STMT_VINFO_DR_OFFSET (stmt_info) =
-				fold_convert (ssizetype, offset_iv.base);
-	  STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info)
-	    = highest_pow2_factor (offset_iv.base);
+	  if (!dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
+				     init_ref, loop))
+	    /* dr_analyze_innermost already explained the failure.  */
+	    return false;
 
           if (dump_enabled_p ())
 	    {
@@ -3665,6 +3558,10 @@  vect_analyze_data_refs (vec_info *vinfo,
 	      dump_printf (MSG_NOTE, "\n\touter step: ");
 	      dump_generic_expr (MSG_NOTE, TDF_SLIM,
                                  STMT_VINFO_DR_STEP (stmt_info));
+	      dump_printf (MSG_NOTE, "\n\touter base alignment: %d\n",
+			   STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info));
+	      dump_printf (MSG_NOTE, "\n\touter base misalignment: %d\n",
+			   STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info));
 	      dump_printf (MSG_NOTE, "\n\touter offset alignment: %d\n",
 			   STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info));
 	      dump_printf (MSG_NOTE, "\n\touter step alignment: %d\n",