Message ID | OF8E0EC0C3.51E64C85-ONC2257883.00282C16-C2257883.0029331D@il.ibm.com |
---|---|
State | Accepted |
Headers | show |
>> I have no objections to this going into 4.5 and 4.6 since it corrects >> the implementation of the neon intrinsics but please check with the >> release managers. > > OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi? Ok. Please allow 24 hours for an RM (cc'd) to comment since this is technically not a regression but is a long term bug that needs fixing. cheers Ramana > > Thanks, > Ira > > 4.5 and 4.6 ChangeLog: > > Backport from mainline: > 2011-04-18 Ulrich Weigand<ulrich.weigand@linaro.org> > Ira Rosen<ira.rosen@linaro.org> > > PR target/48252 > * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments > to match neon_vzip/vuzp/vtrn_internal. > * config/arm/neon.md (neon_vtrn<mode>_internal): Make both > outputs explicitly dependent on both inputs. > (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise. > > testsuite/Changelog: > > Backport from mainline: > 2011-04-18 Ulrich Weigand<ulrich.weigand@linaro.org> > Ira Rosen<ira.rosen@linaro.org> > > PR target/48252 > * gcc.target/arm/pr48252.c: New test. > > > 4.5 patch: > > Index: config/arm/arm.c > =================================================================== > --- config/arm/arm.c (revision 172714) > +++ config/arm/arm.c (working copy) > @@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode > rtx tmp1 = gen_reg_rtx (mode); > rtx tmp2 = gen_reg_rtx (mode); > > - emit_insn (intfn (tmp1, op1, tmp2, op2)); > + emit_insn (intfn (tmp1, op1, op2, tmp2)); > > emit_move_insn (mem, tmp1); > mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); > Index: config/arm/neon.md > =================================================================== > --- config/arm/neon.md (revision 172714) > +++ config/arm/neon.md (working copy) > @@ -3895,13 +3895,14 @@ > > (define_insn "neon_vtrn<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > - UNSPEC_VTRN1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VTRN2))] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > + UNSPEC_VTRN1)) > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VTRN2))] > "TARGET_NEON" > - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > @@ -3921,13 +3922,14 @@ > > (define_insn "neon_vzip<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > - UNSPEC_VZIP1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VZIP2))] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > + UNSPEC_VZIP1)) > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VZIP2))] > "TARGET_NEON" > - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > @@ -3947,13 +3949,14 @@ > > (define_insn "neon_vuzp<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > UNSPEC_VUZP1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VUZP2))] > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VUZP2))] > "TARGET_NEON" > - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > Index: testsuite/gcc.target/arm/pr48252.c > =================================================================== > --- testsuite/gcc.target/arm/pr48252.c (revision 0) > +++ testsuite/gcc.target/arm/pr48252.c (revision 0) > @@ -0,0 +1,32 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target arm_neon_hw } */ > +/* { dg-options "-O2" } */ > +/* { dg-add-options arm_neon } */ > + > +#include "arm_neon.h" > +#include<stdlib.h> > + > +int main(void) > +{ > + uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1}; > + uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2}; > + uint8x8x2_t vd1, vd2; > + union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4; > + int i; > + > + vd1 = vzip_u8(v1, vdup_n_u8(0)); > + vd2 = vzip_u8(v2, vdup_n_u8(0)); > + > + vst1_u8(d1.buf, vd1.val[0]); > + vst1_u8(d2.buf, vd1.val[1]); > + vst1_u8(d3.buf, vd2.val[0]); > + vst1_u8(d4.buf, vd2.val[1]); > + > + for (i = 0; i< 8; i++) > + if ((i % 2 == 0&& d4.buf[i] != 2) > + || (i % 2 == 1&& d4.buf[i] != 0)) > + abort (); > + > + return 0; > +} > + > > > 4.6 patch: > > Index: config/arm/arm.c > =================================================================== > --- config/arm/arm.c (revision 172810) > +++ config/arm/arm.c (working copy) > @@ -19564,7 +19564,7 @@ neon_emit_pair_result_insn (enum machine_mode mode > rtx tmp1 = gen_reg_rtx (mode); > rtx tmp2 = gen_reg_rtx (mode); > > - emit_insn (intfn (tmp1, op1, tmp2, op2)); > + emit_insn (intfn (tmp1, op1, op2, tmp2)); > > emit_move_insn (mem, tmp1); > mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); > Index: config/arm/neon.md > =================================================================== > --- config/arm/neon.md (revision 172810) > +++ config/arm/neon.md (working copy) > @@ -4079,13 +4079,14 @@ > > (define_insn "neon_vtrn<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > - UNSPEC_VTRN1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VTRN2))] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > + UNSPEC_VTRN1)) > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VTRN2))] > "TARGET_NEON" > - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > @@ -4105,13 +4106,14 @@ > > (define_insn "neon_vzip<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > - UNSPEC_VZIP1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VZIP2))] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > + UNSPEC_VZIP1)) > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VZIP2))] > "TARGET_NEON" > - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > @@ -4131,13 +4133,14 @@ > > (define_insn "neon_vuzp<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > UNSPEC_VUZP1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VUZP2))] > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VUZP2))] > "TARGET_NEON" > - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > Index: testsuite/gcc.target/arm/pr48252.c > =================================================================== > --- testsuite/gcc.target/arm/pr48252.c (revision 0) > +++ testsuite/gcc.target/arm/pr48252.c (revision 0) > @@ -0,0 +1,32 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target arm_neon_hw } */ > +/* { dg-options "-O2" } */ > +/* { dg-add-options arm_neon } */ > + > +#include "arm_neon.h" > +#include<stdlib.h> > + > +int main(void) > +{ > + uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1}; > + uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2}; > + uint8x8x2_t vd1, vd2; > + union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4; > + int i; > + > + vd1 = vzip_u8(v1, vdup_n_u8(0)); > + vd2 = vzip_u8(v2, vdup_n_u8(0)); > + > + vst1_u8(d1.buf, vd1.val[0]); > + vst1_u8(d2.buf, vd1.val[1]); > + vst1_u8(d3.buf, vd2.val[0]); > + vst1_u8(d4.buf, vd2.val[1]); > + > + for (i = 0; i< 8; i++) > + if ((i % 2 == 0&& d4.buf[i] != 2) > + || (i % 2 == 1&& d4.buf[i] != 0)) > + abort (); > + > + return 0; > +} > + > > >> >> cheers >> Ramana >> >>> >>> Thanks, >>> Ira >>> >>> ChangeLog: >>> >>> 2011-04-07 Ulrich Weigand<ulrich.weigand@linaro.org> >>> Ira Rosen<ira.rosen@linaro.org> >>> >>> PR target/48252 >>> * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments >>> to match neon_vzip/vuzp/vtrn_internal. >>> * config/arm/neon.md (neon_vtrn<mode>_internal): Make both >>> outputs explicitly dependent on both inputs. >>> (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise. >>> >>> testsuite/Changelog: >>> >>> PR target/48252 >>> * gcc.target/arm/pr48252.c: New test. >> >
On Tue, 3 May 2011, Ramana Radhakrishnan wrote: > > > > I have no objections to this going into 4.5 and 4.6 since it corrects > > > the implementation of the neon intrinsics but please check with the > > > release managers. > > > > OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi? > > > Ok. Please allow 24 hours for an RM (cc'd) to comment since this is > technically not a regression but is a long term bug that needs fixing. As it's purely target local we defer to the target maintainers. Richard. > cheers > Ramana > > > > > Thanks, > > Ira > > > > 4.5 and 4.6 ChangeLog: > > > > Backport from mainline: > > 2011-04-18 Ulrich Weigand<ulrich.weigand@linaro.org> > > Ira Rosen<ira.rosen@linaro.org> > > > > PR target/48252 > > * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments > > to match neon_vzip/vuzp/vtrn_internal. > > * config/arm/neon.md (neon_vtrn<mode>_internal): Make both > > outputs explicitly dependent on both inputs. > > (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise. > > > > testsuite/Changelog: > > > > Backport from mainline: > > 2011-04-18 Ulrich Weigand<ulrich.weigand@linaro.org> > > Ira Rosen<ira.rosen@linaro.org> > > > > PR target/48252 > > * gcc.target/arm/pr48252.c: New test. > > > > > > 4.5 patch: > > > > Index: config/arm/arm.c > > =================================================================== > > --- config/arm/arm.c (revision 172714) > > +++ config/arm/arm.c (working copy) > > @@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode > > rtx tmp1 = gen_reg_rtx (mode); > > rtx tmp2 = gen_reg_rtx (mode); > > > > - emit_insn (intfn (tmp1, op1, tmp2, op2)); > > + emit_insn (intfn (tmp1, op1, op2, tmp2)); > > > > emit_move_insn (mem, tmp1); > > mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); > > Index: config/arm/neon.md > > =================================================================== > > --- config/arm/neon.md (revision 172714) > > +++ config/arm/neon.md (working copy) > > @@ -3895,13 +3895,14 @@ > > > > (define_insn "neon_vtrn<mode>_internal" > > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > > - UNSPEC_VTRN1)) > > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > > - UNSPEC_VTRN2))] > > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > > + (match_operand:VDQW 2 "s_register_operand" "w")] > > + UNSPEC_VTRN1)) > > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > > + UNSPEC_VTRN2))] > > "TARGET_NEON" > > - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > > + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > > [(set (attr "neon_type") > > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > > (const_string "neon_bp_simple") > > @@ -3921,13 +3922,14 @@ > > > > (define_insn "neon_vzip<mode>_internal" > > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > > - UNSPEC_VZIP1)) > > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > > - UNSPEC_VZIP2))] > > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > > + (match_operand:VDQW 2 "s_register_operand" "w")] > > + UNSPEC_VZIP1)) > > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > > + UNSPEC_VZIP2))] > > "TARGET_NEON" > > - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > > + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > > [(set (attr "neon_type") > > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > > (const_string "neon_bp_simple") > > @@ -3947,13 +3949,14 @@ > > > > (define_insn "neon_vuzp<mode>_internal" > > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > > + (match_operand:VDQW 2 "s_register_operand" "w")] > > UNSPEC_VUZP1)) > > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > > - UNSPEC_VUZP2))] > > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > > + UNSPEC_VUZP2))] > > "TARGET_NEON" > > - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > > + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > > [(set (attr "neon_type") > > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > > (const_string "neon_bp_simple") > > Index: testsuite/gcc.target/arm/pr48252.c > > =================================================================== > > --- testsuite/gcc.target/arm/pr48252.c (revision 0) > > +++ testsuite/gcc.target/arm/pr48252.c (revision 0) > > @@ -0,0 +1,32 @@ > > +/* { dg-do run } */ > > +/* { dg-require-effective-target arm_neon_hw } */ > > +/* { dg-options "-O2" } */ > > +/* { dg-add-options arm_neon } */ > > + > > +#include "arm_neon.h" > > +#include<stdlib.h> > > + > > +int main(void) > > +{ > > + uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1}; > > + uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2}; > > + uint8x8x2_t vd1, vd2; > > + union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4; > > + int i; > > + > > + vd1 = vzip_u8(v1, vdup_n_u8(0)); > > + vd2 = vzip_u8(v2, vdup_n_u8(0)); > > + > > + vst1_u8(d1.buf, vd1.val[0]); > > + vst1_u8(d2.buf, vd1.val[1]); > > + vst1_u8(d3.buf, vd2.val[0]); > > + vst1_u8(d4.buf, vd2.val[1]); > > + > > + for (i = 0; i< 8; i++) > > + if ((i % 2 == 0&& d4.buf[i] != 2) > > + || (i % 2 == 1&& d4.buf[i] != 0)) > > + abort (); > > + > > + return 0; > > +} > > + > > > > > > 4.6 patch: > > > > Index: config/arm/arm.c > > =================================================================== > > --- config/arm/arm.c (revision 172810) > > +++ config/arm/arm.c (working copy) > > @@ -19564,7 +19564,7 @@ neon_emit_pair_result_insn (enum machine_mode mode > > rtx tmp1 = gen_reg_rtx (mode); > > rtx tmp2 = gen_reg_rtx (mode); > > > > - emit_insn (intfn (tmp1, op1, tmp2, op2)); > > + emit_insn (intfn (tmp1, op1, op2, tmp2)); > > > > emit_move_insn (mem, tmp1); > > mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); > > Index: config/arm/neon.md > > =================================================================== > > --- config/arm/neon.md (revision 172810) > > +++ config/arm/neon.md (working copy) > > @@ -4079,13 +4079,14 @@ > > > > (define_insn "neon_vtrn<mode>_internal" > > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > > - UNSPEC_VTRN1)) > > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > > - UNSPEC_VTRN2))] > > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > > + (match_operand:VDQW 2 "s_register_operand" "w")] > > + UNSPEC_VTRN1)) > > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > > + UNSPEC_VTRN2))] > > "TARGET_NEON" > > - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > > + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > > [(set (attr "neon_type") > > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > > (const_string "neon_bp_simple") > > @@ -4105,13 +4106,14 @@ > > > > (define_insn "neon_vzip<mode>_internal" > > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > > - UNSPEC_VZIP1)) > > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > > - UNSPEC_VZIP2))] > > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > > + (match_operand:VDQW 2 "s_register_operand" "w")] > > + UNSPEC_VZIP1)) > > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > > + UNSPEC_VZIP2))] > > "TARGET_NEON" > > - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > > + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > > [(set (attr "neon_type") > > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > > (const_string "neon_bp_simple") > > @@ -4131,13 +4133,14 @@ > > > > (define_insn "neon_vuzp<mode>_internal" > > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > > + (match_operand:VDQW 2 "s_register_operand" "w")] > > UNSPEC_VUZP1)) > > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > > - UNSPEC_VUZP2))] > > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > > + UNSPEC_VUZP2))] > > "TARGET_NEON" > > - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > > + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > > [(set (attr "neon_type") > > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > > (const_string "neon_bp_simple") > > Index: testsuite/gcc.target/arm/pr48252.c > > =================================================================== > > --- testsuite/gcc.target/arm/pr48252.c (revision 0) > > +++ testsuite/gcc.target/arm/pr48252.c (revision 0) > > @@ -0,0 +1,32 @@ > > +/* { dg-do run } */ > > +/* { dg-require-effective-target arm_neon_hw } */ > > +/* { dg-options "-O2" } */ > > +/* { dg-add-options arm_neon } */ > > + > > +#include "arm_neon.h" > > +#include<stdlib.h> > > + > > +int main(void) > > +{ > > + uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1}; > > + uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2}; > > + uint8x8x2_t vd1, vd2; > > + union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4; > > + int i; > > + > > + vd1 = vzip_u8(v1, vdup_n_u8(0)); > > + vd2 = vzip_u8(v2, vdup_n_u8(0)); > > + > > + vst1_u8(d1.buf, vd1.val[0]); > > + vst1_u8(d2.buf, vd1.val[1]); > > + vst1_u8(d3.buf, vd2.val[0]); > > + vst1_u8(d4.buf, vd2.val[1]); > > + > > + for (i = 0; i< 8; i++) > > + if ((i % 2 == 0&& d4.buf[i] != 2) > > + || (i % 2 == 1&& d4.buf[i] != 0)) > > + abort (); > > + > > + return 0; > > +} > > + > > > > > > > > > > cheers > > > Ramana > > > > > > > > > > > Thanks, > > > > Ira > > > > > > > > ChangeLog: > > > > > > > > 2011-04-07 Ulrich Weigand<ulrich.weigand@linaro.org> > > > > Ira Rosen<ira.rosen@linaro.org> > > > > > > > > PR target/48252 > > > > * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments > > > > to match neon_vzip/vuzp/vtrn_internal. > > > > * config/arm/neon.md (neon_vtrn<mode>_internal): Make both > > > > outputs explicitly dependent on both inputs. > > > > (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise. > > > > > > > > testsuite/Changelog: > > > > > > > > PR target/48252 > > > > * gcc.target/arm/pr48252.c: New test. > > > > > > >
On Sun, 2011-05-01 at 10:30 +0300, Ira Rosen wrote: > > Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> wrote on 07/04/2011 > 03:16:44 PM: > > > > > On 07/04/11 08:42, Ira Rosen wrote: > > > Hi, > > > > > > This patch makes both outputs of neon_vzip/vuzp/vtrn_internal > > > explicitly dependent on both inputs, preventing incorrect > > > optimization: > > > for > > > (a,b)<- vzip (c,d) > > > and > > > (e,f)<- vzip (g,d) > > > CSE decides that b==f, since b and f depend only on d. > > > > > > Tested on arm-linux-gnueabi. OK for trunk? > > > > This is OK for trunk. > > > > > OK for 4.6 after testing? > > I don't understand how it has happened, but the 4.6 patch that has been committed is corrupt (the patch submitted here looks OK). Please remember that it is essential to test release branches before commits are made. R. > > I have no objections to this going into 4.5 and 4.6 since it corrects > > the implementation of the neon intrinsics but please check with the > > release managers. > > OK to backport to 4.5 and 4.6 - both tested on arm-linux-gnueabi? > > Thanks, > Ira > > 4.5 and 4.6 ChangeLog: > > Backport from mainline: > 2011-04-18 Ulrich Weigand <ulrich.weigand@linaro.org> > Ira Rosen <ira.rosen@linaro.org> > > PR target/48252 > * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments > to match neon_vzip/vuzp/vtrn_internal. > * config/arm/neon.md (neon_vtrn<mode>_internal): Make both > outputs explicitly dependent on both inputs. > (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise. > > testsuite/Changelog: > > Backport from mainline: > 2011-04-18 Ulrich Weigand <ulrich.weigand@linaro.org> > Ira Rosen <ira.rosen@linaro.org> > > PR target/48252 > * gcc.target/arm/pr48252.c: New test. > > > 4.5 patch: > > Index: config/arm/arm.c > =================================================================== > --- config/arm/arm.c (revision 172714) > +++ config/arm/arm.c (working copy) > @@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode > rtx tmp1 = gen_reg_rtx (mode); > rtx tmp2 = gen_reg_rtx (mode); > > - emit_insn (intfn (tmp1, op1, tmp2, op2)); > + emit_insn (intfn (tmp1, op1, op2, tmp2)); > > emit_move_insn (mem, tmp1); > mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); > Index: config/arm/neon.md > =================================================================== > --- config/arm/neon.md (revision 172714) > +++ config/arm/neon.md (working copy) > @@ -3895,13 +3895,14 @@ > > (define_insn "neon_vtrn<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > - UNSPEC_VTRN1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VTRN2))] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > + UNSPEC_VTRN1)) > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VTRN2))] > "TARGET_NEON" > - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > @@ -3921,13 +3922,14 @@ > > (define_insn "neon_vzip<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > - UNSPEC_VZIP1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VZIP2))] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > + UNSPEC_VZIP1)) > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VZIP2))] > "TARGET_NEON" > - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > @@ -3947,13 +3949,14 @@ > > (define_insn "neon_vuzp<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > UNSPEC_VUZP1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VUZP2))] > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VUZP2))] > "TARGET_NEON" > - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > Index: testsuite/gcc.target/arm/pr48252.c > =================================================================== > --- testsuite/gcc.target/arm/pr48252.c (revision 0) > +++ testsuite/gcc.target/arm/pr48252.c (revision 0) > @@ -0,0 +1,32 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target arm_neon_hw } */ > +/* { dg-options "-O2" } */ > +/* { dg-add-options arm_neon } */ > + > +#include "arm_neon.h" > +#include <stdlib.h> > + > +int main(void) > +{ > + uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1}; > + uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2}; > + uint8x8x2_t vd1, vd2; > + union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4; > + int i; > + > + vd1 = vzip_u8(v1, vdup_n_u8(0)); > + vd2 = vzip_u8(v2, vdup_n_u8(0)); > + > + vst1_u8(d1.buf, vd1.val[0]); > + vst1_u8(d2.buf, vd1.val[1]); > + vst1_u8(d3.buf, vd2.val[0]); > + vst1_u8(d4.buf, vd2.val[1]); > + > + for (i = 0; i < 8; i++) > + if ((i % 2 == 0 && d4.buf[i] != 2) > + || (i % 2 == 1 && d4.buf[i] != 0)) > + abort (); > + > + return 0; > +} > + > > > 4.6 patch: > > Index: config/arm/arm.c > =================================================================== > --- config/arm/arm.c (revision 172810) > +++ config/arm/arm.c (working copy) > @@ -19564,7 +19564,7 @@ neon_emit_pair_result_insn (enum machine_mode mode > rtx tmp1 = gen_reg_rtx (mode); > rtx tmp2 = gen_reg_rtx (mode); > > - emit_insn (intfn (tmp1, op1, tmp2, op2)); > + emit_insn (intfn (tmp1, op1, op2, tmp2)); > > emit_move_insn (mem, tmp1); > mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); > Index: config/arm/neon.md > =================================================================== > --- config/arm/neon.md (revision 172810) > +++ config/arm/neon.md (working copy) > @@ -4079,13 +4079,14 @@ > > (define_insn "neon_vtrn<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > - UNSPEC_VTRN1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VTRN2))] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > + UNSPEC_VTRN1)) > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VTRN2))] > "TARGET_NEON" > - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > @@ -4105,13 +4106,14 @@ > > (define_insn "neon_vzip<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > - UNSPEC_VZIP1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VZIP2))] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > + UNSPEC_VZIP1)) > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VZIP2))] > "TARGET_NEON" > - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > @@ -4131,13 +4133,14 @@ > > (define_insn "neon_vuzp<mode>_internal" > [(set (match_operand:VDQW 0 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] > + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") > + (match_operand:VDQW 2 "s_register_operand" "w")] > UNSPEC_VUZP1)) > - (set (match_operand:VDQW 2 "s_register_operand" "=w") > - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] > - UNSPEC_VUZP2))] > + (set (match_operand:VDQW 3 "s_register_operand" "=2") > + (unspec:VDQW [(match_dup 1) (match_dup 2)] > + UNSPEC_VUZP2))] > "TARGET_NEON" > - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" > + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" > [(set (attr "neon_type") > (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) > (const_string "neon_bp_simple") > Index: testsuite/gcc.target/arm/pr48252.c > =================================================================== > --- testsuite/gcc.target/arm/pr48252.c (revision 0) > +++ testsuite/gcc.target/arm/pr48252.c (revision 0) > @@ -0,0 +1,32 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target arm_neon_hw } */ > +/* { dg-options "-O2" } */ > +/* { dg-add-options arm_neon } */ > + > +#include "arm_neon.h" > +#include <stdlib.h> > + > +int main(void) > +{ > + uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1}; > + uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2}; > + uint8x8x2_t vd1, vd2; > + union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4; > + int i; > + > + vd1 = vzip_u8(v1, vdup_n_u8(0)); > + vd2 = vzip_u8(v2, vdup_n_u8(0)); > + > + vst1_u8(d1.buf, vd1.val[0]); > + vst1_u8(d2.buf, vd1.val[1]); > + vst1_u8(d3.buf, vd2.val[0]); > + vst1_u8(d4.buf, vd2.val[1]); > + > + for (i = 0; i < 8; i++) > + if ((i % 2 == 0 && d4.buf[i] != 2) > + || (i % 2 == 1 && d4.buf[i] != 0)) > + abort (); > + > + return 0; > +} > + > > > > > > cheers > > Ramana > > > > > > > > Thanks, > > > Ira > > > > > > ChangeLog: > > > > > > 2011-04-07 Ulrich Weigand<ulrich.weigand@linaro.org> > > > Ira Rosen<ira.rosen@linaro.org> > > > > > > PR target/48252 > > > * config/arm/arm.c (neon_emit_pair_result_insn): Swap arguments > > > to match neon_vzip/vuzp/vtrn_internal. > > > * config/arm/neon.md (neon_vtrn<mode>_internal): Make both > > > outputs explicitly dependent on both inputs. > > > (neon_vzip<mode>_internal, neon_vuzp<mode>_internal): Likewise. > > > > > > testsuite/Changelog: > > > > > > PR target/48252 > > > * gcc.target/arm/pr48252.c: New test. > > >
Index: config/arm/arm.c =================================================================== --- config/arm/arm.c (revision 172714) +++ config/arm/arm.c (working copy) @@ -18237,7 +18237,7 @@ neon_emit_pair_result_insn (enum machine_mode mode rtx tmp1 = gen_reg_rtx (mode); rtx tmp2 = gen_reg_rtx (mode); - emit_insn (intfn (tmp1, op1, tmp2, op2)); + emit_insn (intfn (tmp1, op1, op2, tmp2)); emit_move_insn (mem, tmp1); mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); Index: config/arm/neon.md =================================================================== --- config/arm/neon.md (revision 172714) +++ config/arm/neon.md (working copy) @@ -3895,13 +3895,14 @@ (define_insn "neon_vtrn<mode>_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] - UNSPEC_VTRN1)) - (set (match_operand:VDQW 2 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VTRN2))] + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] + UNSPEC_VTRN1)) + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VTRN2))] "TARGET_NEON" - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) (const_string "neon_bp_simple") @@ -3921,13 +3922,14 @@ (define_insn "neon_vzip<mode>_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] - UNSPEC_VZIP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VZIP2))] + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] + UNSPEC_VZIP1)) + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VZIP2))] "TARGET_NEON" - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) (const_string "neon_bp_simple") @@ -3947,13 +3949,14 @@ (define_insn "neon_vuzp<mode>_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] UNSPEC_VUZP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VUZP2))] + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VUZP2))] "TARGET_NEON" - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) (const_string "neon_bp_simple") Index: testsuite/gcc.target/arm/pr48252.c =================================================================== --- testsuite/gcc.target/arm/pr48252.c (revision 0) +++ testsuite/gcc.target/arm/pr48252.c (revision 0) @@ -0,0 +1,32 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include <stdlib.h> + +int main(void) +{ + uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1}; + uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2}; + uint8x8x2_t vd1, vd2; + union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4; + int i; + + vd1 = vzip_u8(v1, vdup_n_u8(0)); + vd2 = vzip_u8(v2, vdup_n_u8(0)); + + vst1_u8(d1.buf, vd1.val[0]); + vst1_u8(d2.buf, vd1.val[1]); + vst1_u8(d3.buf, vd2.val[0]); + vst1_u8(d4.buf, vd2.val[1]); + + for (i = 0; i < 8; i++) + if ((i % 2 == 0 && d4.buf[i] != 2) + || (i % 2 == 1 && d4.buf[i] != 0)) + abort (); + + return 0; +} + 4.6 patch: Index: config/arm/arm.c =================================================================== --- config/arm/arm.c (revision 172810) +++ config/arm/arm.c (working copy) @@ -19564,7 +19564,7 @@ neon_emit_pair_result_insn (enum machine_mode mode rtx tmp1 = gen_reg_rtx (mode); rtx tmp2 = gen_reg_rtx (mode); - emit_insn (intfn (tmp1, op1, tmp2, op2)); + emit_insn (intfn (tmp1, op1, op2, tmp2)); emit_move_insn (mem, tmp1); mem = adjust_address (mem, mode, GET_MODE_SIZE (mode)); Index: config/arm/neon.md =================================================================== --- config/arm/neon.md (revision 172810) +++ config/arm/neon.md (working copy) @@ -4079,13 +4079,14 @@ (define_insn "neon_vtrn<mode>_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] - UNSPEC_VTRN1)) - (set (match_operand:VDQW 2 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VTRN2))] + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] + UNSPEC_VTRN1)) + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VTRN2))] "TARGET_NEON" - "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" + "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) (const_string "neon_bp_simple") @@ -4105,13 +4106,14 @@ (define_insn "neon_vzip<mode>_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] - UNSPEC_VZIP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VZIP2))] + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] + UNSPEC_VZIP1)) + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VZIP2))] "TARGET_NEON" - "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" + "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) (const_string "neon_bp_simple") @@ -4131,13 +4133,14 @@ (define_insn "neon_vuzp<mode>_internal" [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")] + (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w")] UNSPEC_VUZP1)) - (set (match_operand:VDQW 2 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 3 "s_register_operand" "2")] - UNSPEC_VUZP2))] + (set (match_operand:VDQW 3 "s_register_operand" "=2") + (unspec:VDQW [(match_dup 1) (match_dup 2)] + UNSPEC_VUZP2))] "TARGET_NEON" - "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2" + "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_d_reg>") (const_int 0)) (const_string "neon_bp_simple") Index: testsuite/gcc.target/arm/pr48252.c =================================================================== --- testsuite/gcc.target/arm/pr48252.c (revision 0) +++ testsuite/gcc.target/arm/pr48252.c (revision 0) @@ -0,0 +1,32 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_hw } */ +/* { dg-options "-O2" } */ +/* { dg-add-options arm_neon } */ + +#include "arm_neon.h" +#include <stdlib.h> + +int main(void) +{ + uint8x8_t v1 = {1, 1, 1, 1, 1, 1, 1, 1}; + uint8x8_t v2 = {2, 2, 2, 2, 2, 2, 2, 2}; + uint8x8x2_t vd1, vd2; + union {uint8x8_t v; uint8_t buf[8];} d1, d2, d3, d4; + int i; + + vd1 = vzip_u8(v1, vdup_n_u8(0)); + vd2 = vzip_u8(v2, vdup_n_u8(0)); + + vst1_u8(d1.buf, vd1.val[0]); + vst1_u8(d2.buf, vd1.val[1]); + vst1_u8(d3.buf, vd2.val[0]); + vst1_u8(d4.buf, vd2.val[1]); + + for (i = 0; i < 8; i++) + if ((i % 2 == 0 && d4.buf[i] != 2) + || (i % 2 == 1 && d4.buf[i] != 0)) + abort (); + + return 0; +} +