@@ -19890,8 +19890,12 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
par = emit_insn (par);
REG_NOTES (par) = dwarf;
- arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
- base_reg, base_reg);
+ /* REG_CFA_ADJUST_CFA NOTE is added to handle dwarf info issue when
+ shrink-wrap is enabled. So when shrink-wrap is not enabled, we should
+ not add the note. */
+ if (flag_shrink_wrap)
+ arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
+ base_reg, base_reg);
}
/* Generate and emit a pattern that will be recognized as LDRD pattern. If even
new file mode 100644
@@ -0,0 +1,79 @@
+/* { dg-do compile { target arm_neon } } */
+/* { dg-options " -Os -fno-omit-frame-pointer -mapcs -mabi=aapcs-linux -marm -mfloat-abi=softfp -g " } */
+
+#include <arm_neon.h>
+
+typedef uint8x16_t unative_t;
+static inline unative_t SHLBYTE(unative_t v)
+{
+ return vshlq_n_u8(v, 1);
+}
+
+static inline unative_t MASK(unative_t v)
+{
+ const uint8x16_t temp = ((unative_t){0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0});
+ return (unative_t)vcltq_s8((int8x16_t)v, (int8x16_t)temp);
+}
+
+void raid6_neon4_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
+{
+ uint8_t **dptr = (uint8_t **)ptrs;
+ uint8_t *p, *q;
+ int d, z, z0;
+
+ register unative_t wd0, wq0, wp0, w10, w20;
+ register unative_t wd1, wq1, wp1, w11, w21;
+ register unative_t wd2, wq2, wp2, w12, w22;
+ register unative_t wd3, wq3, wp3, w13, w23;
+ const unative_t x1d = ((unative_t){0x1d,0x1d,0x1d,0x1d, 0x1d,0x1d,0x1d,0x1d, 0x1d,0x1d,0x1d,0x1d, 0x1d,0x1d,0x1d,0x1d});
+
+ z0 = disks - 3;
+ p = dptr[z0+1];
+ q = dptr[z0+2];
+
+ for ( d = 0 ; d < bytes ; d += sizeof(unative_t)*4 ) {
+ wq0 = wp0 = vld1q_u8(&dptr[z0][d+0*sizeof(unative_t)]);
+ wq1 = wp1 = vld1q_u8(&dptr[z0][d+1*sizeof(unative_t)]);
+ wq2 = wp2 = vld1q_u8(&dptr[z0][d+2*sizeof(unative_t)]);
+ wq3 = wp3 = vld1q_u8(&dptr[z0][d+3*sizeof(unative_t)]);
+ for ( z = z0-1 ; z >= 0 ; z-- ) {
+ wd0 = vld1q_u8(&dptr[z][d+0*sizeof(unative_t)]);
+ wd1 = vld1q_u8(&dptr[z][d+1*sizeof(unative_t)]);
+ wd2 = vld1q_u8(&dptr[z][d+2*sizeof(unative_t)]);
+ wd3 = vld1q_u8(&dptr[z][d+3*sizeof(unative_t)]);
+ wp0 = veorq_u8(wp0, wd0);
+ wp1 = veorq_u8(wp1, wd1);
+ wp2 = veorq_u8(wp2, wd2);
+ wp3 = veorq_u8(wp3, wd3);
+ w20 = MASK(wq0);
+ w21 = MASK(wq1);
+ w22 = MASK(wq2);
+ w23 = MASK(wq3);
+ w10 = SHLBYTE(wq0);
+ w11 = SHLBYTE(wq1);
+ w12 = SHLBYTE(wq2);
+ w13 = SHLBYTE(wq3);
+
+ w20 = vandq_u8(w20, x1d);
+ w21 = vandq_u8(w21, x1d);
+ w22 = vandq_u8(w22, x1d);
+ w23 = vandq_u8(w23, x1d);
+ w10 = veorq_u8(w10, w20);
+ w11 = veorq_u8(w11, w21);
+ w12 = veorq_u8(w12, w22);
+ w13 = veorq_u8(w13, w23);
+ wq0 = veorq_u8(w10, wd0);
+ wq1 = veorq_u8(w11, wd1);
+ wq2 = veorq_u8(w12, wd2);
+ wq3 = veorq_u8(w13, wd3);
+ }
+ vst1q_u8(&p[d+sizeof(unative_t)*0], wp0);
+ vst1q_u8(&p[d+sizeof(unative_t)*1], wp1);
+ vst1q_u8(&p[d+sizeof(unative_t)*2], wp2);
+ vst1q_u8(&p[d+sizeof(unative_t)*3], wp3);
+ vst1q_u8(&q[d+sizeof(unative_t)*0], wq0);
+ vst1q_u8(&q[d+sizeof(unative_t)*1], wq1);
+ vst1q_u8(&q[d+sizeof(unative_t)*2], wq2);
+ vst1q_u8(&q[d+sizeof(unative_t)*3], wq3);
+ }
+}