[021/nnn] poly_int: extract_bit_field bitrange

Message ID 87d15drduy.fsf@linaro.org
State New
Headers show
Series
  • [021/nnn] poly_int: extract_bit_field bitrange
Related show

Commit Message

Richard Sandiford Oct. 23, 2017, 5:08 p.m.
Similar to the previous store_bit_field patch, but for extractions
rather than insertions.  The patch splits out the extraction-as-subreg
handling into a new function (extract_bit_field_as_subreg), both for
ease of writing and because a later patch will add another caller.

The simplify_gen_subreg overload is temporary; it goes away
in a later patch.


2017-10-23  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* rtl.h (simplify_gen_subreg): Add a temporary overload that
	accepts poly_uint64 offsets.
	* expmed.h (extract_bit_field): Take bitsize and bitnum as
	poly_uint64s rather than unsigned HOST_WIDE_INTs.
	* expmed.c (lowpart_bit_field_p): Likewise.
	(extract_bit_field_as_subreg): New function, split out from...
	(extract_bit_field_1): ...here.  Take bitsize and bitnum as
	poly_uint64s rather than unsigned HOST_WIDE_INTs.  For vector
	extractions, check that BITSIZE matches the size of the extracted
	value and that BITNUM is an exact multiple of that size.
	If all else fails, try forcing the value into memory if
	BITNUM is variable, and adjusting the address so that the
	offset is constant.  Split the part that can only handle constant
	bitsize and bitnum out into...
	(extract_integral_bit_field): ...this new function.
	(extract_bit_field): Take bitsize and bitnum as poly_uint64s
	rather than unsigned HOST_WIDE_INTs.

Comments

Jeff Law Dec. 5, 2017, 11:46 p.m. | #1
On 10/23/2017 11:08 AM, Richard Sandiford wrote:
> Similar to the previous store_bit_field patch, but for extractions

> rather than insertions.  The patch splits out the extraction-as-subreg

> handling into a new function (extract_bit_field_as_subreg), both for

> ease of writing and because a later patch will add another caller.

> 

> The simplify_gen_subreg overload is temporary; it goes away

> in a later patch.

> 

> 

> 2017-10-23  Richard Sandiford  <richard.sandiford@linaro.org>

> 	    Alan Hayward  <alan.hayward@arm.com>

> 	    David Sherwood  <david.sherwood@arm.com>

> 

> gcc/

> 	* rtl.h (simplify_gen_subreg): Add a temporary overload that

> 	accepts poly_uint64 offsets.

> 	* expmed.h (extract_bit_field): Take bitsize and bitnum as

> 	poly_uint64s rather than unsigned HOST_WIDE_INTs.

> 	* expmed.c (lowpart_bit_field_p): Likewise.

> 	(extract_bit_field_as_subreg): New function, split out from...

> 	(extract_bit_field_1): ...here.  Take bitsize and bitnum as

> 	poly_uint64s rather than unsigned HOST_WIDE_INTs.  For vector

> 	extractions, check that BITSIZE matches the size of the extracted

> 	value and that BITNUM is an exact multiple of that size.

> 	If all else fails, try forcing the value into memory if

> 	BITNUM is variable, and adjusting the address so that the

> 	offset is constant.  Split the part that can only handle constant

> 	bitsize and bitnum out into...

> 	(extract_integral_bit_field): ...this new function.

> 	(extract_bit_field): Take bitsize and bitnum as poly_uint64s

> 	rather than unsigned HOST_WIDE_INTs.

OK.

jeff

Patch

Index: gcc/rtl.h
===================================================================
--- gcc/rtl.h	2017-10-23 17:11:43.774024962 +0100
+++ gcc/rtl.h	2017-10-23 17:11:50.109574423 +0100
@@ -3267,6 +3267,12 @@  extern rtx simplify_subreg (machine_mode
 			    unsigned int);
 extern rtx simplify_gen_subreg (machine_mode, rtx, machine_mode,
 				unsigned int);
+inline rtx
+simplify_gen_subreg (machine_mode omode, rtx x, machine_mode imode,
+		     poly_uint64 offset)
+{
+  return simplify_gen_subreg (omode, x, imode, offset.to_constant ());
+}
 extern rtx lowpart_subreg (machine_mode, rtx, machine_mode);
 extern rtx simplify_replace_fn_rtx (rtx, const_rtx,
 				    rtx (*fn) (rtx, const_rtx, void *), void *);
Index: gcc/expmed.h
===================================================================
--- gcc/expmed.h	2017-10-23 17:11:43.774024962 +0100
+++ gcc/expmed.h	2017-10-23 17:11:50.109574423 +0100
@@ -722,8 +722,7 @@  extern void store_bit_field (rtx, poly_u
 			     unsigned HOST_WIDE_INT,
 			     unsigned HOST_WIDE_INT,
 			     machine_mode, rtx, bool);
-extern rtx extract_bit_field (rtx, unsigned HOST_WIDE_INT,
-			      unsigned HOST_WIDE_INT, int, rtx,
+extern rtx extract_bit_field (rtx, poly_uint64, poly_uint64, int, rtx,
 			      machine_mode, machine_mode, bool, rtx *);
 extern rtx extract_low_bits (machine_mode, machine_mode, rtx);
 extern rtx expand_mult (machine_mode, rtx, rtx, rtx, int);
Index: gcc/expmed.c
===================================================================
--- gcc/expmed.c	2017-10-23 17:11:43.774024962 +0100
+++ gcc/expmed.c	2017-10-23 17:11:50.109574423 +0100
@@ -68,6 +68,10 @@  static void store_split_bit_field (rtx,
 				   unsigned HOST_WIDE_INT,
 				   unsigned HOST_WIDE_INT,
 				   rtx, scalar_int_mode, bool);
+static rtx extract_integral_bit_field (rtx, opt_scalar_int_mode,
+				       unsigned HOST_WIDE_INT,
+				       unsigned HOST_WIDE_INT, int, rtx,
+				       machine_mode, machine_mode, bool, bool);
 static rtx extract_fixed_bit_field (machine_mode, rtx, opt_scalar_int_mode,
 				    unsigned HOST_WIDE_INT,
 				    unsigned HOST_WIDE_INT, rtx, int, bool);
@@ -509,17 +513,17 @@  adjust_bit_field_mem_for_reg (enum extra
    offset is then BITNUM / BITS_PER_UNIT.  */
 
 static bool
-lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
-		     unsigned HOST_WIDE_INT bitsize,
+lowpart_bit_field_p (poly_uint64 bitnum, poly_uint64 bitsize,
 		     machine_mode struct_mode)
 {
-  unsigned HOST_WIDE_INT regsize = REGMODE_NATURAL_SIZE (struct_mode);
+  poly_uint64 regsize = REGMODE_NATURAL_SIZE (struct_mode);
   if (BYTES_BIG_ENDIAN)
-    return (bitnum % BITS_PER_UNIT == 0
-	    && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
-		|| (bitnum + bitsize) % (regsize * BITS_PER_UNIT) == 0));
+    return (multiple_p (bitnum, BITS_PER_UNIT)
+	    && (must_eq (bitnum + bitsize, GET_MODE_BITSIZE (struct_mode))
+		|| multiple_p (bitnum + bitsize,
+			       regsize * BITS_PER_UNIT)));
   else
-    return bitnum % (regsize * BITS_PER_UNIT) == 0;
+    return multiple_p (bitnum, regsize * BITS_PER_UNIT);
 }
 
 /* Return true if -fstrict-volatile-bitfields applies to an access of OP0
@@ -1574,16 +1578,33 @@  extract_bit_field_using_extv (const extr
   return NULL_RTX;
 }
 
+/* See whether it would be valid to extract the part of OP0 described
+   by BITNUM and BITSIZE into a value of mode MODE using a subreg
+   operation.  Return the subreg if so, otherwise return null.  */
+
+static rtx
+extract_bit_field_as_subreg (machine_mode mode, rtx op0,
+			     poly_uint64 bitsize, poly_uint64 bitnum)
+{
+  poly_uint64 bytenum;
+  if (multiple_p (bitnum, BITS_PER_UNIT, &bytenum)
+      && must_eq (bitsize, GET_MODE_BITSIZE (mode))
+      && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
+      && TRULY_NOOP_TRUNCATION_MODES_P (mode, GET_MODE (op0)))
+    return simplify_gen_subreg (mode, op0, GET_MODE (op0), bytenum);
+  return NULL_RTX;
+}
+
 /* A subroutine of extract_bit_field, with the same arguments.
    If FALLBACK_P is true, fall back to extract_fixed_bit_field
    if we can find no other means of implementing the operation.
    if FALLBACK_P is false, return NULL instead.  */
 
 static rtx
-extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
-		     unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
-		     machine_mode mode, machine_mode tmode,
-		     bool reverse, bool fallback_p, rtx *alt_rtl)
+extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
+		     int unsignedp, rtx target, machine_mode mode,
+		     machine_mode tmode, bool reverse, bool fallback_p,
+		     rtx *alt_rtl)
 {
   rtx op0 = str_rtx;
   machine_mode mode1;
@@ -1600,13 +1621,13 @@  extract_bit_field_1 (rtx str_rtx, unsign
   /* If we have an out-of-bounds access to a register, just return an
      uninitialized register of the required mode.  This can occur if the
      source code contains an out-of-bounds access to a small array.  */
-  if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
+  if (REG_P (op0) && must_ge (bitnum, GET_MODE_BITSIZE (GET_MODE (op0))))
     return gen_reg_rtx (tmode);
 
   if (REG_P (op0)
       && mode == GET_MODE (op0)
-      && bitnum == 0
-      && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
+      && known_zero (bitnum)
+      && must_eq (bitsize, GET_MODE_BITSIZE (GET_MODE (op0))))
     {
       if (reverse)
 	op0 = flip_storage_order (mode, op0);
@@ -1618,6 +1639,7 @@  extract_bit_field_1 (rtx str_rtx, unsign
   if (VECTOR_MODE_P (GET_MODE (op0))
       && !MEM_P (op0)
       && VECTOR_MODE_P (tmode)
+      && must_eq (bitsize, GET_MODE_SIZE (tmode))
       && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (tmode))
     {
       machine_mode new_mode = GET_MODE (op0);
@@ -1633,18 +1655,17 @@  extract_bit_field_1 (rtx str_rtx, unsign
 	      || !targetm.vector_mode_supported_p (new_mode))
 	    new_mode = VOIDmode;
 	}
+      poly_uint64 pos;
       if (new_mode != VOIDmode
 	  && (convert_optab_handler (vec_extract_optab, new_mode, tmode)
 	      != CODE_FOR_nothing)
-	  && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (tmode)
-	      == bitnum / GET_MODE_BITSIZE (tmode)))
+	  && multiple_p (bitnum, GET_MODE_BITSIZE (tmode), &pos))
 	{
 	  struct expand_operand ops[3];
 	  machine_mode outermode = new_mode;
 	  machine_mode innermode = tmode;
 	  enum insn_code icode
 	    = convert_optab_handler (vec_extract_optab, outermode, innermode);
-	  unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
 
 	  if (new_mode != GET_MODE (op0))
 	    op0 = gen_lowpart (new_mode, op0);
@@ -1697,17 +1718,17 @@  extract_bit_field_1 (rtx str_rtx, unsign
      available.  */
   machine_mode outermode = GET_MODE (op0);
   scalar_mode innermode = GET_MODE_INNER (outermode);
+  poly_uint64 pos;
   if (VECTOR_MODE_P (outermode)
       && !MEM_P (op0)
       && (convert_optab_handler (vec_extract_optab, outermode, innermode)
 	  != CODE_FOR_nothing)
-      && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (innermode)
-	  == bitnum / GET_MODE_BITSIZE (innermode)))
+      && must_eq (bitsize, GET_MODE_BITSIZE (innermode))
+      && multiple_p (bitnum, GET_MODE_BITSIZE (innermode), &pos))
     {
       struct expand_operand ops[3];
       enum insn_code icode
 	= convert_optab_handler (vec_extract_optab, outermode, innermode);
-      unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
 
       create_output_operand (&ops[0], target, innermode);
       ops[0].target = 1;
@@ -1765,14 +1786,9 @@  extract_bit_field_1 (rtx str_rtx, unsign
   /* Extraction of a full MODE1 value can be done with a subreg as long
      as the least significant bit of the value is the least significant
      bit of either OP0 or a word of OP0.  */
-  if (!MEM_P (op0)
-      && !reverse
-      && lowpart_bit_field_p (bitnum, bitsize, op0_mode.require ())
-      && bitsize == GET_MODE_BITSIZE (mode1)
-      && TRULY_NOOP_TRUNCATION_MODES_P (mode1, op0_mode.require ()))
+  if (!MEM_P (op0) && !reverse)
     {
-      rtx sub = simplify_gen_subreg (mode1, op0, op0_mode.require (),
-				     bitnum / BITS_PER_UNIT);
+      rtx sub = extract_bit_field_as_subreg (mode1, op0, bitsize, bitnum);
       if (sub)
 	return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
     }
@@ -1788,6 +1804,39 @@  extract_bit_field_1 (rtx str_rtx, unsign
       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
     }
 
+  /* If we have a memory source and a non-constant bit offset, restrict
+     the memory to the referenced bytes.  This is a worst-case fallback
+     but is useful for things like vector booleans.  */
+  if (MEM_P (op0) && !bitnum.is_constant ())
+    {
+      bytenum = bits_to_bytes_round_down (bitnum);
+      bitnum = num_trailing_bits (bitnum);
+      poly_uint64 bytesize = bits_to_bytes_round_up (bitnum + bitsize);
+      op0 = adjust_bitfield_address_size (op0, BLKmode, bytenum, bytesize);
+      op0_mode = opt_scalar_int_mode ();
+    }
+
+  /* It's possible we'll need to handle other cases here for
+     polynomial bitnum and bitsize.  */
+
+  /* From here on we need to be looking at a fixed-size insertion.  */
+  return extract_integral_bit_field (op0, op0_mode, bitsize.to_constant (),
+				     bitnum.to_constant (), unsignedp,
+				     target, mode, tmode, reverse, fallback_p);
+}
+
+/* Subroutine of extract_bit_field_1, with the same arguments, except
+   that BITSIZE and BITNUM are constant.  Handle cases specific to
+   integral modes.  If OP0_MODE is defined, it is the mode of OP0,
+   otherwise OP0 is a BLKmode MEM.  */
+
+static rtx
+extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
+			    unsigned HOST_WIDE_INT bitsize,
+			    unsigned HOST_WIDE_INT bitnum, int unsignedp,
+			    rtx target, machine_mode mode, machine_mode tmode,
+			    bool reverse, bool fallback_p)
+{
   /* Handle fields bigger than a word.  */
 
   if (bitsize > BITS_PER_WORD)
@@ -1807,12 +1856,16 @@  extract_bit_field_1 (rtx str_rtx, unsign
 
       /* In case we're about to clobber a base register or something 
 	 (see gcc.c-torture/execute/20040625-1.c).   */
-      if (reg_mentioned_p (target, str_rtx))
+      if (reg_mentioned_p (target, op0))
 	target = gen_reg_rtx (mode);
 
       /* Indicate for flow that the entire target reg is being set.  */
       emit_clobber (target);
 
+      /* The mode must be fixed-size, since extract_bit_field_1 handles
+	 extractions from variable-sized objects before calling this
+	 function.  */
+      unsigned int target_size = GET_MODE_SIZE (GET_MODE (target));
       last = get_last_insn ();
       for (i = 0; i < nwords; i++)
 	{
@@ -1820,9 +1873,7 @@  extract_bit_field_1 (rtx str_rtx, unsign
 	     if I is 1, use the next to lowest word; and so on.  */
 	  /* Word number in TARGET to use.  */
 	  unsigned int wordnum
-	    = (backwards
-	       ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
-	       : i);
+	    = (backwards ? target_size / UNITS_PER_WORD - i - 1 : i);
 	  /* Offset from start of field in OP0.  */
 	  unsigned int bit_offset = (backwards ^ reverse
 				     ? MAX ((int) bitsize - ((int) i + 1)
@@ -1851,11 +1902,11 @@  extract_bit_field_1 (rtx str_rtx, unsign
 	{
 	  /* Unless we've filled TARGET, the upper regs in a multi-reg value
 	     need to be zero'd out.  */
-	  if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
+	  if (target_size > nwords * UNITS_PER_WORD)
 	    {
 	      unsigned int i, total_words;
 
-	      total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
+	      total_words = target_size / UNITS_PER_WORD;
 	      for (i = nwords; i < total_words; i++)
 		emit_move_insn
 		  (operand_subword (target,
@@ -1993,10 +2044,9 @@  extract_bit_field_1 (rtx str_rtx, unsign
    if they are equally easy.  */
 
 rtx
-extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
-		   unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
-		   machine_mode mode, machine_mode tmode, bool reverse,
-		   rtx *alt_rtl)
+extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
+		   int unsignedp, rtx target, machine_mode mode,
+		   machine_mode tmode, bool reverse, rtx *alt_rtl)
 {
   machine_mode mode1;
 
@@ -2008,28 +2058,34 @@  extract_bit_field (rtx str_rtx, unsigned
   else
     mode1 = tmode;
 
+  unsigned HOST_WIDE_INT ibitsize, ibitnum;
   scalar_int_mode int_mode;
-  if (is_a <scalar_int_mode> (mode1, &int_mode)
-      && strict_volatile_bitfield_p (str_rtx, bitsize, bitnum, int_mode, 0, 0))
+  if (bitsize.is_constant (&ibitsize)
+      && bitnum.is_constant (&ibitnum)
+      && is_a <scalar_int_mode> (mode1, &int_mode)
+      && strict_volatile_bitfield_p (str_rtx, ibitsize, ibitnum,
+				     int_mode, 0, 0))
     {
       /* Extraction of a full INT_MODE value can be done with a simple load.
 	 We know here that the field can be accessed with one single
 	 instruction.  For targets that support unaligned memory,
 	 an unaligned access may be necessary.  */
-      if (bitsize == GET_MODE_BITSIZE (int_mode))
+      if (ibitsize == GET_MODE_BITSIZE (int_mode))
 	{
 	  rtx result = adjust_bitfield_address (str_rtx, int_mode,
-						bitnum / BITS_PER_UNIT);
+						ibitnum / BITS_PER_UNIT);
 	  if (reverse)
 	    result = flip_storage_order (int_mode, result);
-	  gcc_assert (bitnum % BITS_PER_UNIT == 0);
+	  gcc_assert (ibitnum % BITS_PER_UNIT == 0);
 	  return convert_extracted_bit_field (result, mode, tmode, unsignedp);
 	}
 
-      str_rtx = narrow_bit_field_mem (str_rtx, int_mode, bitsize, bitnum,
-				      &bitnum);
-      gcc_assert (bitnum + bitsize <= GET_MODE_BITSIZE (int_mode));
+      str_rtx = narrow_bit_field_mem (str_rtx, int_mode, ibitsize, ibitnum,
+				      &ibitnum);
+      gcc_assert (ibitnum + ibitsize <= GET_MODE_BITSIZE (int_mode));
       str_rtx = copy_to_reg (str_rtx);
+      return extract_bit_field_1 (str_rtx, ibitsize, ibitnum, unsignedp,
+				  target, mode, tmode, reverse, true, alt_rtl);
     }
 
   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,