Message ID | 87fu7la5jb.fsf@linaro.org |
---|---|
State | Accepted |
Commit | 646e47bcd15483fcafcc695efefd7a0ddeb4c716 |
Headers | show |
Series | Extra subreg fold for variable-length CONST_VECTORs | expand |
Ping. FWIW, the SLP test failures it fixes were ICEs rather than code-quality tests, so this is a correctness fix rather than an optimisation. Thanks, Richard Richard Sandiford <richard.sandiford@linaro.org> writes: > The SVE support for the new CONST_VECTOR encoding needs to be able > to extract the first N bits of the vector and duplicate it. This patch > adds a simplify_subreg rule for that. > > The code is covered by the gcc.target/aarch64/sve_slp_*.c tests. > > Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. > Also tested by comparing the before and after assembly output for at > least one target per CPU directory. OK to install? > > Richard > > > 2018-01-04 Richard Sandiford <richard.sandiford@linaro.org> > > gcc/ > * simplify-rtx.c (simplify_immed_subreg): Add an inner_bytes > parameter and use it instead of GET_MODE_SIZE (innermode). Use > inner_bytes * BITS_PER_UNIT instead of GET_MODE_BITSIZE (innermode). > Use CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)) instead of > GET_MODE_NUNITS (innermode). Also add a first_elem parameter. > Change innermode from fixed_mode_size to machine_mode. > (simplify_subreg): Update call accordingly. Handle a constant-sized > subreg of a variable-length CONST_VECTOR. > > Index: gcc/simplify-rtx.c > =================================================================== > --- gcc/simplify-rtx.c 2018-01-03 21:42:44.569646782 +0000 > +++ gcc/simplify-rtx.c 2018-01-04 17:35:25.747473457 +0000 > @@ -5971,13 +5971,16 @@ simplify_ternary_operation (enum rtx_cod > or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or > CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR. > > - Works by unpacking OP into a collection of 8-bit values > + Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values > represented as a little-endian array of 'unsigned char', selecting by BYTE, > - and then repacking them again for OUTERMODE. */ > + and then repacking them again for OUTERMODE. If OP is a CONST_VECTOR, > + FIRST_ELEM is the number of the first element to extract, otherwise > + FIRST_ELEM is ignored. */ > > static rtx > simplify_immed_subreg (fixed_size_mode outermode, rtx op, > - fixed_size_mode innermode, unsigned int byte) > + machine_mode innermode, unsigned int byte, > + unsigned int first_elem, unsigned int inner_bytes) > { > enum { > value_bit = 8, > @@ -6007,13 +6010,13 @@ simplify_immed_subreg (fixed_size_mode o > > /* We support any size mode. */ > max_bitsize = MAX (GET_MODE_BITSIZE (outermode), > - GET_MODE_BITSIZE (innermode)); > + inner_bytes * BITS_PER_UNIT); > > /* Unpack the value. */ > > if (GET_CODE (op) == CONST_VECTOR) > { > - num_elem = GET_MODE_NUNITS (innermode); > + num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)); > elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode); > } > else > @@ -6030,7 +6033,7 @@ simplify_immed_subreg (fixed_size_mode o > { > unsigned char * vp; > rtx el = (GET_CODE (op) == CONST_VECTOR > - ? CONST_VECTOR_ELT (op, elem) > + ? CONST_VECTOR_ELT (op, first_elem + elem) > : op); > > /* Vectors are kept in target memory order. (This is probably > @@ -6157,10 +6160,9 @@ simplify_immed_subreg (fixed_size_mode o > /* Renumber BYTE so that the least-significant byte is byte 0. A special > case is paradoxical SUBREGs, which shouldn't be adjusted since they > will already have offset 0. */ > - if (GET_MODE_SIZE (innermode) >= GET_MODE_SIZE (outermode)) > + if (inner_bytes >= GET_MODE_SIZE (outermode)) > { > - unsigned ibyte = (GET_MODE_SIZE (innermode) - GET_MODE_SIZE (outermode) > - - byte); > + unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte; > unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; > unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; > byte = (subword_byte % UNITS_PER_WORD > @@ -6169,7 +6171,7 @@ simplify_immed_subreg (fixed_size_mode o > > /* BYTE should still be inside OP. (Note that BYTE is unsigned, > so if it's become negative it will instead be very large.) */ > - gcc_assert (byte < GET_MODE_SIZE (innermode)); > + gcc_assert (byte < inner_bytes); > > /* Convert from bytes to chunks of size value_bit. */ > value_start = byte * (BITS_PER_UNIT / value_bit); > @@ -6358,7 +6360,18 @@ simplify_subreg (machine_mode outermode, > if (is_a <fixed_size_mode> (outermode, &fs_outermode) > && is_a <fixed_size_mode> (innermode, &fs_innermode) > && byte.is_constant (&cbyte)) > - return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte); > + return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte, > + 0, GET_MODE_SIZE (fs_innermode)); > + > + /* Handle constant-sized outer modes and variable-sized inner modes. */ > + unsigned HOST_WIDE_INT first_elem; > + if (GET_CODE (op) == CONST_VECTOR > + && is_a <fixed_size_mode> (outermode, &fs_outermode) > + && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode), > + &first_elem)) > + return simplify_immed_subreg (fs_outermode, op, innermode, 0, > + first_elem, > + GET_MODE_SIZE (fs_outermode)); > > return NULL_RTX; > }
On 01/12/2018 03:00 AM, Richard Sandiford wrote: > Ping. > > FWIW, the SLP test failures it fixes were ICEs rather than code-quality > tests, so this is a correctness fix rather than an optimisation. > > Thanks, > Richard > > Richard Sandiford <richard.sandiford@linaro.org> writes: >> The SVE support for the new CONST_VECTOR encoding needs to be able >> to extract the first N bits of the vector and duplicate it. This patch >> adds a simplify_subreg rule for that. >> >> The code is covered by the gcc.target/aarch64/sve_slp_*.c tests. >> >> Tested on aarch64-linux-gnu, x86_64-linux-gnu and powerpc64le-linux-gnu. >> Also tested by comparing the before and after assembly output for at >> least one target per CPU directory. OK to install? >> >> Richard >> >> >> 2018-01-04 Richard Sandiford <richard.sandiford@linaro.org> >> >> gcc/ >> * simplify-rtx.c (simplify_immed_subreg): Add an inner_bytes >> parameter and use it instead of GET_MODE_SIZE (innermode). Use >> inner_bytes * BITS_PER_UNIT instead of GET_MODE_BITSIZE (innermode). >> Use CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)) instead of >> GET_MODE_NUNITS (innermode). Also add a first_elem parameter. >> Change innermode from fixed_mode_size to machine_mode. >> (simplify_subreg): Update call accordingly. Handle a constant-sized >> subreg of a variable-length CONST_VECTOR. OK. jeff
Index: gcc/simplify-rtx.c =================================================================== --- gcc/simplify-rtx.c 2018-01-03 21:42:44.569646782 +0000 +++ gcc/simplify-rtx.c 2018-01-04 17:35:25.747473457 +0000 @@ -5971,13 +5971,16 @@ simplify_ternary_operation (enum rtx_cod or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR. - Works by unpacking OP into a collection of 8-bit values + Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values represented as a little-endian array of 'unsigned char', selecting by BYTE, - and then repacking them again for OUTERMODE. */ + and then repacking them again for OUTERMODE. If OP is a CONST_VECTOR, + FIRST_ELEM is the number of the first element to extract, otherwise + FIRST_ELEM is ignored. */ static rtx simplify_immed_subreg (fixed_size_mode outermode, rtx op, - fixed_size_mode innermode, unsigned int byte) + machine_mode innermode, unsigned int byte, + unsigned int first_elem, unsigned int inner_bytes) { enum { value_bit = 8, @@ -6007,13 +6010,13 @@ simplify_immed_subreg (fixed_size_mode o /* We support any size mode. */ max_bitsize = MAX (GET_MODE_BITSIZE (outermode), - GET_MODE_BITSIZE (innermode)); + inner_bytes * BITS_PER_UNIT); /* Unpack the value. */ if (GET_CODE (op) == CONST_VECTOR) { - num_elem = GET_MODE_NUNITS (innermode); + num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)); elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode); } else @@ -6030,7 +6033,7 @@ simplify_immed_subreg (fixed_size_mode o { unsigned char * vp; rtx el = (GET_CODE (op) == CONST_VECTOR - ? CONST_VECTOR_ELT (op, elem) + ? CONST_VECTOR_ELT (op, first_elem + elem) : op); /* Vectors are kept in target memory order. (This is probably @@ -6157,10 +6160,9 @@ simplify_immed_subreg (fixed_size_mode o /* Renumber BYTE so that the least-significant byte is byte 0. A special case is paradoxical SUBREGs, which shouldn't be adjusted since they will already have offset 0. */ - if (GET_MODE_SIZE (innermode) >= GET_MODE_SIZE (outermode)) + if (inner_bytes >= GET_MODE_SIZE (outermode)) { - unsigned ibyte = (GET_MODE_SIZE (innermode) - GET_MODE_SIZE (outermode) - - byte); + unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte; unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; byte = (subword_byte % UNITS_PER_WORD @@ -6169,7 +6171,7 @@ simplify_immed_subreg (fixed_size_mode o /* BYTE should still be inside OP. (Note that BYTE is unsigned, so if it's become negative it will instead be very large.) */ - gcc_assert (byte < GET_MODE_SIZE (innermode)); + gcc_assert (byte < inner_bytes); /* Convert from bytes to chunks of size value_bit. */ value_start = byte * (BITS_PER_UNIT / value_bit); @@ -6358,7 +6360,18 @@ simplify_subreg (machine_mode outermode, if (is_a <fixed_size_mode> (outermode, &fs_outermode) && is_a <fixed_size_mode> (innermode, &fs_innermode) && byte.is_constant (&cbyte)) - return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte); + return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte, + 0, GET_MODE_SIZE (fs_innermode)); + + /* Handle constant-sized outer modes and variable-sized inner modes. */ + unsigned HOST_WIDE_INT first_elem; + if (GET_CODE (op) == CONST_VECTOR + && is_a <fixed_size_mode> (outermode, &fs_outermode) + && constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode), + &first_elem)) + return simplify_immed_subreg (fs_outermode, op, innermode, 0, + first_elem, + GET_MODE_SIZE (fs_outermode)); return NULL_RTX; }