[016/nnn] poly_int: dse.c

Message ID 87y3o1rdy7.fsf@linaro.org
State New
Headers show
Series
  • [016/nnn] poly_int: dse.c
Related show

Commit Message

Richard Sandiford Oct. 23, 2017, 5:06 p.m.
This patch makes RTL DSE use poly_int for offsets and sizes.
The local phase can optimise them normally but the global phase
treats them as wild accesses.


2017-10-23  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* dse.c (store_info): Change offset and width from HOST_WIDE_INT
	to poly_int64.  Update commentary for positions_needed.large.
	(read_info_type): Change offset and width from HOST_WIDE_INT
	to poly_int64.
	(set_usage_bits): Likewise.
	(canon_address): Return the offset as a poly_int64 rather than
	a HOST_WIDE_INT.  Use strip_offset_and_add.
	(set_all_positions_unneeded, any_positions_needed_p): Use
	positions_needed.large to track stores with non-constant widths.
	(all_positions_needed_p): Likewise.  Take the offset and width
	as poly_int64s rather than ints.  Assert that rhs is nonnull.
	(record_store): Cope with non-constant offsets and widths.
	Nullify the rhs of an earlier store if we can't tell which bytes
	of it are needed.
	(find_shift_sequence): Take the access_size and shift as poly_int64s
	rather than ints.
	(get_stored_val): Take the read_offset and read_width as poly_int64s
	rather than HOST_WIDE_INTs.
	(check_mem_read_rtx, scan_stores, scan_reads, dse_step5): Handle
	non-constant offsets and widths.

Comments

Jeff Law Nov. 18, 2017, 4:23 a.m. | #1
On 10/23/2017 11:06 AM, Richard Sandiford wrote:
> This patch makes RTL DSE use poly_int for offsets and sizes.

> The local phase can optimise them normally but the global phase

> treats them as wild accesses.

> 

> 

> 2017-10-23  Richard Sandiford  <richard.sandiford@linaro.org>

> 	    Alan Hayward  <alan.hayward@arm.com>

> 	    David Sherwood  <david.sherwood@arm.com>

> 

> gcc/

> 	* dse.c (store_info): Change offset and width from HOST_WIDE_INT

> 	to poly_int64.  Update commentary for positions_needed.large.

> 	(read_info_type): Change offset and width from HOST_WIDE_INT

> 	to poly_int64.

> 	(set_usage_bits): Likewise.

> 	(canon_address): Return the offset as a poly_int64 rather than

> 	a HOST_WIDE_INT.  Use strip_offset_and_add.

> 	(set_all_positions_unneeded, any_positions_needed_p): Use

> 	positions_needed.large to track stores with non-constant widths.

> 	(all_positions_needed_p): Likewise.  Take the offset and width

> 	as poly_int64s rather than ints.  Assert that rhs is nonnull.

> 	(record_store): Cope with non-constant offsets and widths.

> 	Nullify the rhs of an earlier store if we can't tell which bytes

> 	of it are needed.

> 	(find_shift_sequence): Take the access_size and shift as poly_int64s

> 	rather than ints.

> 	(get_stored_val): Take the read_offset and read_width as poly_int64s

> 	rather than HOST_WIDE_INTs.

> 	(check_mem_read_rtx, scan_stores, scan_reads, dse_step5): Handle

> 	non-constant offsets and widths.

OK.
jeff

Patch

Index: gcc/dse.c
===================================================================
--- gcc/dse.c	2017-10-23 16:52:20.003305798 +0100
+++ gcc/dse.c	2017-10-23 17:01:54.249406896 +0100
@@ -244,11 +244,11 @@  struct store_info
   rtx mem_addr;
 
   /* The offset of the first byte associated with the operation.  */
-  HOST_WIDE_INT offset;
+  poly_int64 offset;
 
   /* The number of bytes covered by the operation.  This is always exact
      and known (rather than -1).  */
-  HOST_WIDE_INT width;
+  poly_int64 width;
 
   union
     {
@@ -259,12 +259,19 @@  struct store_info
 
       struct
 	{
-	  /* A bitmap with one bit per byte.  Cleared bit means the position
-	     is needed.  Used if IS_LARGE is false.  */
+	  /* A bitmap with one bit per byte, or null if the number of
+	     bytes isn't known at compile time.  A cleared bit means
+	     the position is needed.  Used if IS_LARGE is true.  */
 	  bitmap bmap;
 
-	  /* Number of set bits (i.e. unneeded bytes) in BITMAP.  If it is
-	     equal to WIDTH, the whole store is unused.  */
+	  /* When BITMAP is nonnull, this counts the number of set bits
+	     (i.e. unneeded bytes) in the bitmap.  If it is equal to
+	     WIDTH, the whole store is unused.
+
+	     When BITMAP is null:
+	     - the store is definitely not needed when COUNT == 1
+	     - all the store is needed when COUNT == 0 and RHS is nonnull
+	     - otherwise we don't know which parts of the store are needed.  */
 	  int count;
 	} large;
     } positions_needed;
@@ -308,10 +315,10 @@  struct read_info_type
   int group_id;
 
   /* The offset of the first byte associated with the operation.  */
-  HOST_WIDE_INT offset;
+  poly_int64 offset;
 
   /* The number of bytes covered by the operation, or -1 if not known.  */
-  HOST_WIDE_INT width;
+  poly_int64 width;
 
   /* The mem being read.  */
   rtx mem;
@@ -940,13 +947,18 @@  can_escape (tree expr)
    OFFSET and WIDTH.  */
 
 static void
-set_usage_bits (group_info *group, HOST_WIDE_INT offset, HOST_WIDE_INT width,
+set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width,
                 tree expr)
 {
-  HOST_WIDE_INT i;
+  /* Non-constant offsets and widths act as global kills, so there's no point
+     trying to use them to derive global DSE candidates.  */
+  HOST_WIDE_INT i, const_offset, const_width;
   bool expr_escapes = can_escape (expr);
-  if (offset > -MAX_OFFSET && offset + width < MAX_OFFSET)
-    for (i=offset; i<offset+width; i++)
+  if (offset.is_constant (&const_offset)
+      && width.is_constant (&const_width)
+      && const_offset > -MAX_OFFSET
+      && const_offset + const_width < MAX_OFFSET)
+    for (i = const_offset; i < const_offset + const_width; ++i)
       {
 	bitmap store1;
 	bitmap store2;
@@ -1080,7 +1092,7 @@  const_or_frame_p (rtx x)
 static bool
 canon_address (rtx mem,
 	       int *group_id,
-	       HOST_WIDE_INT *offset,
+	       poly_int64 *offset,
 	       cselib_val **base)
 {
   machine_mode address_mode = get_address_mode (mem);
@@ -1147,12 +1159,7 @@  canon_address (rtx mem,
       if (GET_CODE (address) == CONST)
 	address = XEXP (address, 0);
 
-      if (GET_CODE (address) == PLUS
-	  && CONST_INT_P (XEXP (address, 1)))
-	{
-	  *offset = INTVAL (XEXP (address, 1));
-	  address = XEXP (address, 0);
-	}
+      address = strip_offset_and_add (address, offset);
 
       if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
 	  && const_or_frame_p (address))
@@ -1160,8 +1167,11 @@  canon_address (rtx mem,
 	  group_info *group = get_group_info (address);
 
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "  gid=%d offset=%d \n",
-		     group->id, (int)*offset);
+	    {
+	      fprintf (dump_file, "  gid=%d offset=", group->id);
+	      print_dec (*offset, dump_file);
+	      fprintf (dump_file, "\n");
+	    }
 	  *base = NULL;
 	  *group_id = group->id;
 	  return true;
@@ -1178,8 +1188,12 @@  canon_address (rtx mem,
       return false;
     }
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "  varying cselib base=%u:%u offset = %d\n",
-	     (*base)->uid, (*base)->hash, (int)*offset);
+    {
+      fprintf (dump_file, "  varying cselib base=%u:%u offset = ",
+	       (*base)->uid, (*base)->hash);
+      print_dec (*offset, dump_file);
+      fprintf (dump_file, "\n");
+    }
   return true;
 }
 
@@ -1228,9 +1242,17 @@  set_all_positions_unneeded (store_info *
 {
   if (__builtin_expect (s_info->is_large, false))
     {
-      bitmap_set_range (s_info->positions_needed.large.bmap,
-			0, s_info->width);
-      s_info->positions_needed.large.count = s_info->width;
+      HOST_WIDE_INT width;
+      if (s_info->width.is_constant (&width))
+	{
+	  bitmap_set_range (s_info->positions_needed.large.bmap, 0, width);
+	  s_info->positions_needed.large.count = width;
+	}
+      else
+	{
+	  gcc_checking_assert (!s_info->positions_needed.large.bmap);
+	  s_info->positions_needed.large.count = 1;
+	}
     }
   else
     s_info->positions_needed.small_bitmask = HOST_WIDE_INT_0U;
@@ -1242,35 +1264,64 @@  set_all_positions_unneeded (store_info *
 any_positions_needed_p (store_info *s_info)
 {
   if (__builtin_expect (s_info->is_large, false))
-    return s_info->positions_needed.large.count < s_info->width;
+    {
+      HOST_WIDE_INT width;
+      if (s_info->width.is_constant (&width))
+	{
+	  gcc_checking_assert (s_info->positions_needed.large.bmap);
+	  return s_info->positions_needed.large.count < width;
+	}
+      else
+	{
+	  gcc_checking_assert (!s_info->positions_needed.large.bmap);
+	  return s_info->positions_needed.large.count == 0;
+	}
+    }
   else
     return (s_info->positions_needed.small_bitmask != HOST_WIDE_INT_0U);
 }
 
 /* Return TRUE if all bytes START through START+WIDTH-1 from S_INFO
-   store are needed.  */
+   store are known to be needed.  */
 
 static inline bool
-all_positions_needed_p (store_info *s_info, int start, int width)
+all_positions_needed_p (store_info *s_info, poly_int64 start,
+			poly_int64 width)
 {
+  gcc_assert (s_info->rhs);
+  if (!s_info->width.is_constant ())
+    {
+      gcc_assert (s_info->is_large
+		  && !s_info->positions_needed.large.bmap);
+      return s_info->positions_needed.large.count == 0;
+    }
+
+  /* Otherwise, if START and WIDTH are non-constant, we're asking about
+     a non-constant region of a constant-sized store.  We can't say for
+     sure that all positions are needed.  */
+  HOST_WIDE_INT const_start, const_width;
+  if (!start.is_constant (&const_start)
+      || !width.is_constant (&const_width))
+    return false;
+
   if (__builtin_expect (s_info->is_large, false))
     {
-      int end = start + width;
-      while (start < end)
-	if (bitmap_bit_p (s_info->positions_needed.large.bmap, start++))
+      for (HOST_WIDE_INT i = const_start; i < const_start + const_width; ++i)
+	if (bitmap_bit_p (s_info->positions_needed.large.bmap, i))
 	  return false;
       return true;
     }
   else
     {
-      unsigned HOST_WIDE_INT mask = lowpart_bitmask (width) << start;
+      unsigned HOST_WIDE_INT mask
+	= lowpart_bitmask (const_width) << const_start;
       return (s_info->positions_needed.small_bitmask & mask) == mask;
     }
 }
 
 
-static rtx get_stored_val (store_info *, machine_mode, HOST_WIDE_INT,
-			   HOST_WIDE_INT, basic_block, bool);
+static rtx get_stored_val (store_info *, machine_mode, poly_int64,
+			   poly_int64, basic_block, bool);
 
 
 /* BODY is an instruction pattern that belongs to INSN.  Return 1 if
@@ -1281,8 +1332,8 @@  static rtx get_stored_val (store_info *,
 record_store (rtx body, bb_info_t bb_info)
 {
   rtx mem, rhs, const_rhs, mem_addr;
-  HOST_WIDE_INT offset = 0;
-  HOST_WIDE_INT width = 0;
+  poly_int64 offset = 0;
+  poly_int64 width = 0;
   insn_info_t insn_info = bb_info->last_insn;
   store_info *store_info = NULL;
   int group_id;
@@ -1437,7 +1488,7 @@  record_store (rtx body, bb_info_t bb_inf
       group_info *group = rtx_group_vec[group_id];
       mem_addr = group->canon_base_addr;
     }
-  if (offset)
+  if (maybe_nonzero (offset))
     mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
 
   while (ptr)
@@ -1497,18 +1548,27 @@  record_store (rtx body, bb_info_t bb_inf
 		}
 	    }
 
+	  HOST_WIDE_INT begin_unneeded, const_s_width, const_width;
 	  if (known_subrange_p (s_info->offset, s_info->width, offset, width))
 	    /* The new store touches every byte that S_INFO does.  */
 	    set_all_positions_unneeded (s_info);
-	  else
+	  else if ((offset - s_info->offset).is_constant (&begin_unneeded)
+		   && s_info->width.is_constant (&const_s_width)
+		   && width.is_constant (&const_width))
 	    {
-	      HOST_WIDE_INT begin_unneeded = offset - s_info->offset;
-	      HOST_WIDE_INT end_unneeded = begin_unneeded + width;
+	      HOST_WIDE_INT end_unneeded = begin_unneeded + const_width;
 	      begin_unneeded = MAX (begin_unneeded, 0);
-	      end_unneeded = MIN (end_unneeded, s_info->width);
+	      end_unneeded = MIN (end_unneeded, const_s_width);
 	      for (i = begin_unneeded; i < end_unneeded; ++i)
 		set_position_unneeded (s_info, i);
 	    }
+	  else
+	    {
+	      /* We don't know which parts of S_INFO are needed and
+		 which aren't, so invalidate the RHS.  */
+	      s_info->rhs = NULL;
+	      s_info->const_rhs = NULL;
+	    }
 	}
       else if (s_info->rhs)
 	/* Need to see if it is possible for this store to overwrite
@@ -1554,7 +1614,14 @@  record_store (rtx body, bb_info_t bb_inf
   store_info->mem = mem;
   store_info->mem_addr = mem_addr;
   store_info->cse_base = base;
-  if (width > HOST_BITS_PER_WIDE_INT)
+  HOST_WIDE_INT const_width;
+  if (!width.is_constant (&const_width))
+    {
+      store_info->is_large = true;
+      store_info->positions_needed.large.count = 0;
+      store_info->positions_needed.large.bmap = NULL;
+    }
+  else if (const_width > HOST_BITS_PER_WIDE_INT)
     {
       store_info->is_large = true;
       store_info->positions_needed.large.count = 0;
@@ -1563,7 +1630,8 @@  record_store (rtx body, bb_info_t bb_inf
   else
     {
       store_info->is_large = false;
-      store_info->positions_needed.small_bitmask = lowpart_bitmask (width);
+      store_info->positions_needed.small_bitmask
+	= lowpart_bitmask (const_width);
     }
   store_info->group_id = group_id;
   store_info->offset = offset;
@@ -1598,10 +1666,10 @@  dump_insn_info (const char * start, insn
    shift.  */
 
 static rtx
-find_shift_sequence (int access_size,
+find_shift_sequence (poly_int64 access_size,
 		     store_info *store_info,
 		     machine_mode read_mode,
-		     int shift, bool speed, bool require_cst)
+		     poly_int64 shift, bool speed, bool require_cst)
 {
   machine_mode store_mode = GET_MODE (store_info->mem);
   scalar_int_mode new_mode;
@@ -1737,11 +1805,11 @@  look_for_hardregs (rtx x, const_rtx pat
 
 static rtx
 get_stored_val (store_info *store_info, machine_mode read_mode,
-		HOST_WIDE_INT read_offset, HOST_WIDE_INT read_width,
+		poly_int64 read_offset, poly_int64 read_width,
 		basic_block bb, bool require_cst)
 {
   machine_mode store_mode = GET_MODE (store_info->mem);
-  HOST_WIDE_INT gap;
+  poly_int64 gap;
   rtx read_reg;
 
   /* To get here the read is within the boundaries of the write so
@@ -1755,10 +1823,10 @@  get_stored_val (store_info *store_info,
   else
     gap = read_offset - store_info->offset;
 
-  if (gap != 0)
+  if (maybe_nonzero (gap))
     {
-      HOST_WIDE_INT shift = gap * BITS_PER_UNIT;
-      HOST_WIDE_INT access_size = GET_MODE_SIZE (read_mode) + gap;
+      poly_int64 shift = gap * BITS_PER_UNIT;
+      poly_int64 access_size = GET_MODE_SIZE (read_mode) + gap;
       read_reg = find_shift_sequence (access_size, store_info, read_mode,
 				      shift, optimize_bb_for_speed_p (bb),
 				      require_cst);
@@ -1977,8 +2045,8 @@  check_mem_read_rtx (rtx *loc, bb_info_t
 {
   rtx mem = *loc, mem_addr;
   insn_info_t insn_info;
-  HOST_WIDE_INT offset = 0;
-  HOST_WIDE_INT width = 0;
+  poly_int64 offset = 0;
+  poly_int64 width = 0;
   cselib_val *base = NULL;
   int group_id;
   read_info_t read_info;
@@ -2027,7 +2095,7 @@  check_mem_read_rtx (rtx *loc, bb_info_t
       group_info *group = rtx_group_vec[group_id];
       mem_addr = group->canon_base_addr;
     }
-  if (offset)
+  if (maybe_nonzero (offset))
     mem_addr = plus_constant (get_address_mode (mem), mem_addr, offset);
 
   if (group_id >= 0)
@@ -2039,7 +2107,7 @@  check_mem_read_rtx (rtx *loc, bb_info_t
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
-	  if (width == -1)
+	  if (!known_size_p (width))
 	    fprintf (dump_file, " processing const load gid=%d[BLK]\n",
 		     group_id);
 	  else
@@ -2073,7 +2141,7 @@  check_mem_read_rtx (rtx *loc, bb_info_t
 	    {
 	      /* This is a block mode load.  We may get lucky and
 		 canon_true_dependence may save the day.  */
-	      if (width == -1)
+	      if (!known_size_p (width))
 		remove
 		  = canon_true_dependence (store_info->mem,
 					   GET_MODE (store_info->mem),
@@ -2803,13 +2871,17 @@  scan_stores (store_info *store_info, bit
 {
   while (store_info)
     {
-      HOST_WIDE_INT i;
+      HOST_WIDE_INT i, offset, width;
       group_info *group_info
 	= rtx_group_vec[store_info->group_id];
-      if (group_info->process_globally)
+      /* We can (conservatively) ignore stores whose bounds aren't known;
+	 they simply don't generate new global dse opportunities.  */
+      if (group_info->process_globally
+	  && store_info->offset.is_constant (&offset)
+	  && store_info->width.is_constant (&width))
 	{
-	  HOST_WIDE_INT end = store_info->offset + store_info->width;
-	  for (i = store_info->offset; i < end; i++)
+	  HOST_WIDE_INT end = offset + width;
+	  for (i = offset; i < end; i++)
 	    {
 	      int index = get_bitmap_index (group_info, i);
 	      if (index != 0)
@@ -2869,7 +2941,12 @@  scan_reads (insn_info_t insn_info, bitma
 	    {
 	      if (i == read_info->group_id)
 		{
-		  if (!known_size_p (read_info->width))
+		  HOST_WIDE_INT offset, width;
+		  /* Reads with non-constant size kill all DSE opportunities
+		     in the group.  */
+		  if (!read_info->offset.is_constant (&offset)
+		      || !read_info->width.is_constant (&width)
+		      || !known_size_p (width))
 		    {
 		      /* Handle block mode reads.  */
 		      if (kill)
@@ -2881,8 +2958,8 @@  scan_reads (insn_info_t insn_info, bitma
 		      /* The groups are the same, just process the
 			 offsets.  */
 		      HOST_WIDE_INT j;
-		      HOST_WIDE_INT end = read_info->offset + read_info->width;
-		      for (j = read_info->offset; j < end; j++)
+		      HOST_WIDE_INT end = offset + width;
+		      for (j = offset; j < end; j++)
 			{
 			  int index = get_bitmap_index (group, j);
 			  if (index != 0)
@@ -3298,22 +3375,30 @@  dse_step5 (void)
 	      while (!store_info->is_set)
 		store_info = store_info->next;
 
-	      HOST_WIDE_INT i;
+	      HOST_WIDE_INT i, offset, width;
 	      group_info *group_info = rtx_group_vec[store_info->group_id];
 
-	      HOST_WIDE_INT end = store_info->offset + store_info->width;
-	      for (i = store_info->offset; i < end; i++)
+	      if (!store_info->offset.is_constant (&offset)
+		  || !store_info->width.is_constant (&width))
+		deleted = false;
+	      else
 		{
-		  int index = get_bitmap_index (group_info, i);
-
-		  if (dump_file && (dump_flags & TDF_DETAILS))
-		    fprintf (dump_file, "i = %d, index = %d\n", (int)i, index);
-		  if (index == 0 || !bitmap_bit_p (v, index))
+		  HOST_WIDE_INT end = offset + width;
+		  for (i = offset; i < end; i++)
 		    {
+		      int index = get_bitmap_index (group_info, i);
+
 		      if (dump_file && (dump_flags & TDF_DETAILS))
-			fprintf (dump_file, "failing at i = %d\n", (int)i);
-		      deleted = false;
-		      break;
+			fprintf (dump_file, "i = %d, index = %d\n",
+				 (int) i, index);
+		      if (index == 0 || !bitmap_bit_p (v, index))
+			{
+			  if (dump_file && (dump_flags & TDF_DETAILS))
+			    fprintf (dump_file, "failing at i = %d\n",
+				     (int) i);
+			  deleted = false;
+			  break;
+			}
 		    }
 		}
 	      if (deleted)