aarch64 sim sxtl/uxtl fix, plus another addv fix

Message ID CABXYE2WciJXO08+r8L+0gEGp=jUP6vtQmWV_pfYbkRJ=jNr30Q@mail.gmail.com
State New
Headers show

Commit Message

Jim Wilson Feb. 19, 2017, 9:30 p.m.
This started with the observation that sxtl/uxtl were broken for the
8b/16b cases.  The code is multiplying bias (which is equal to 0 or 2)
by 3, when it should be by 4.  3 would have been correct for
exponentiation, but not for multiply.

The new testcase fails without the patch, and works with the patch.
This patch reduces gcc C testsuite failures from 1558 to 1510 (-48).

When writing the testcase, and verifying on hardware, I noticed that
the addv instruction was still broken.  I had fixed a bug in it
before, I found another one.  It isn't truncating results to the input
size, e.g. the 8h case should produce a result truncated to 8 bits,
but writes 64 bits to the dest register.  This was fixed by moving the
val variable declaration inside each case to give it an appropriate
type size.

Since I had used addv in a number of testcases, this required fixing 4
existing testcases to check for the correct addv result.  This fix
unfortunately doesn't help reduce the number of gcc C testsuite
failures, which remain unchanged by this addv fix at 1510.  This is
probably why I didn't notice it before.

Jim

Patch hide | download patch | download mbox

2017-02-19  Jim Wilson  <jim.wilson@linaro.org>

	sim/aarch64/
	* simulator.c (do_vec_ADDV): Mov val declaration inside each case,
	with type set to input type size.
	(do_vec_xtl): Change bias from 3 to 4 for byte case.

	sim/testsuite/sim/aarch64/
	* bit.s: Change cmp immediates to account for addv bug fix.
	* cmtst.s, ldn_single.s, stn_single.s: Likewise.
	* xtl.s: New.

diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c
index 7c28219..d31cb10 100644
--- a/sim/aarch64/simulator.c
+++ b/sim/aarch64/simulator.c
@@ -3433,7 +3433,6 @@  do_vec_ADDV (sim_cpu *cpu)
   unsigned vm = INSTR (9, 5);
   unsigned rd = INSTR (4, 0);
   unsigned i;
-  uint64_t val = 0;
   int      full = INSTR (30, 30);
 
   NYI_assert (29, 24, 0x0E);
@@ -3443,24 +3442,33 @@  do_vec_ADDV (sim_cpu *cpu)
   switch (INSTR (23, 22))
     {
     case 0:
-      for (i = 0; i < (full ? 16 : 8); i++)
-	val += aarch64_get_vec_u8 (cpu, vm, i);
-      aarch64_set_vec_u64 (cpu, rd, 0, val);
-      return;
+      {
+	uint8_t val = 0;
+	for (i = 0; i < (full ? 16 : 8); i++)
+	  val += aarch64_get_vec_u8 (cpu, vm, i);
+	aarch64_set_vec_u64 (cpu, rd, 0, val);
+	return;
+      }
 
     case 1:
-      for (i = 0; i < (full ? 8 : 4); i++)
-	val += aarch64_get_vec_u16 (cpu, vm, i);
-      aarch64_set_vec_u64 (cpu, rd, 0, val);
-      return;
+      {
+	uint16_t val = 0;
+	for (i = 0; i < (full ? 8 : 4); i++)
+	  val += aarch64_get_vec_u16 (cpu, vm, i);
+	aarch64_set_vec_u64 (cpu, rd, 0, val);
+	return;
+      }
 
     case 2:
-      if (! full)
-	HALT_UNALLOC;
-      for (i = 0; i < 4; i++)
-	val += aarch64_get_vec_u32 (cpu, vm, i);
-      aarch64_set_vec_u64 (cpu, rd, 0, val);
-      return;
+      {
+	uint32_t val = 0;
+	if (! full)
+	  HALT_UNALLOC;
+	for (i = 0; i < 4; i++)
+	  val += aarch64_get_vec_u32 (cpu, vm, i);
+	aarch64_set_vec_u64 (cpu, rd, 0, val);
+	return;
+      }
 
     case 3:
       HALT_UNALLOC;
@@ -5694,7 +5702,7 @@  do_vec_xtl (sim_cpu *cpu)
 	  NYI_assert (19, 19, 1);
 
 	  shift = INSTR (18, 16);
-	  bias *= 3;
+	  bias *= 4;
 	  for (i = 0; i < 8; i++)
 	    v[i] = aarch64_get_vec_s8 (cpu, vs, i + bias) << shift;
 	  for (i = 0; i < 8; i++)
@@ -5730,7 +5738,7 @@  do_vec_xtl (sim_cpu *cpu)
 	  NYI_assert (19, 19, 1);
 
 	  shift = INSTR (18, 16);
-	  bias *= 3;
+	  bias *= 4;
 	  for (i = 0; i < 8; i++)
 	    v[i] = aarch64_get_vec_u8 (cpu, vs, i + bias) << shift;
 	  for (i = 0; i < 8; i++)
diff --git a/sim/testsuite/sim/aarch64/bit.s b/sim/testsuite/sim/aarch64/bit.s
index 650d317..01a1d4e 100644
--- a/sim/testsuite/sim/aarch64/bit.s
+++ b/sim/testsuite/sim/aarch64/bit.s
@@ -34,56 +34,56 @@  mask:
 	bif v3.8b, v1.8b, v2.8b
 	addv b4, v3.8b
 	mov x1, v4.d[0]
-	cmp x1, #306
+	cmp x1, #50
 	bne .Lfailure
 
 	mov v3.16b, v0.16b
 	bif v3.16b, v1.16b, v2.16b
 	addv b4, v3.16b
 	mov x1, v4.d[0]
-	cmp x1, #1020
+	cmp x1, #252
 	bne .Lfailure
 
 	mov v3.8b, v0.8b
 	bit v3.8b, v1.8b, v2.8b
 	addv b4, v3.8b
 	mov x1, v4.d[0]
-	cmp x1, #306
+	cmp x1, #50
 	bne .Lfailure
 
 	mov v3.16b, v0.16b
 	bit v3.16b, v1.16b, v2.16b
 	addv b4, v3.16b
 	mov x1, v4.d[0]
-	cmp x1, #1037
+	cmp x1, #13
 	bne .Lfailure
 
 	mov v3.8b, v2.8b
 	bsl v3.8b, v0.8b, v1.8b
 	addv b4, v3.8b
 	mov x1, v4.d[0]
-	cmp x1, #306
+	cmp x1, #50
 	bne .Lfailure
 
 	mov v3.16b, v2.16b
 	bsl v3.16b, v0.16b, v1.16b
 	addv b4, v3.16b
 	mov x1, v4.d[0]
-	cmp x1, #1020
+	cmp x1, #252
 	bne .Lfailure
 
 	mov v3.8b, v0.8b
 	eor v3.8b, v1.8b, v2.8b
 	addv b4, v3.8b
 	mov x1, v4.d[0]
-	cmp x1, #1020
+	cmp x1, #252
 	bne .Lfailure
 
 	mov v3.16b, v0.16b
 	eor v3.16b, v1.16b, v2.16b
 	addv b4, v3.16b
 	mov x1, v4.d[0]
-	cmp x1, #2039
+	cmp x1, #247
 	bne .Lfailure
 
 	pass
diff --git a/sim/testsuite/sim/aarch64/cmtst.s b/sim/testsuite/sim/aarch64/cmtst.s
index 64c8b27..7e6a4c3 100644
--- a/sim/testsuite/sim/aarch64/cmtst.s
+++ b/sim/testsuite/sim/aarch64/cmtst.s
@@ -40,13 +40,13 @@  inputd2:
 	cmtst v2.8b, v0.8b, v1.8b
 	addv b3, v2.8b
 	mov x1, v3.d[0]
-	cmp x1, #0x5fa
+	cmp x1, #0xfa
 	bne .Lfailure
 
 	cmtst v2.16b, v0.16b, v1.16b
 	addv b3, v2.16b
 	mov x1, v3.d[0]
-	cmp x1, #0xbf4
+	cmp x1, #0xf4
 	bne .Lfailure
 
 	adrp x0, inputh
@@ -56,14 +56,14 @@  inputd2:
 	cmtst v2.4h, v0.4h, v1.4h
 	addv h3, v2.4h
 	mov x1, v3.d[0]
-	mov x2, #0x1fffe
+	mov x2, #0xfffe
 	cmp x1, x2
 	bne .Lfailure
 
 	cmtst v2.8h, v0.8h, v1.8h
 	addv h3, v2.8h
 	mov x1, v3.d[0]
-	mov x2, #0x3fffc
+	mov x2, #0xfffc
 	cmp x1, x2
 	bne .Lfailure
 
@@ -82,7 +82,7 @@  inputd2:
 	cmtst v2.4s, v0.4s, v1.4s
 	addv s3, v2.4s
 	mov x1, v3.d[0]
-	mov x2, #0x1fffffffe
+	mov x2, #0xfffffffe
 	cmp x1, x2
 	bne .Lfailure
 
diff --git a/sim/testsuite/sim/aarch64/ldn_single.s b/sim/testsuite/sim/aarch64/ldn_single.s
index 3102e9e..4c460fb 100644
--- a/sim/testsuite/sim/aarch64/ldn_single.s
+++ b/sim/testsuite/sim/aarch64/ldn_single.s
@@ -48,7 +48,7 @@  input:
 	mov x6, v3.d[0]
 	cmp x5, #221
 	bne .Lfailure
-	cmp x6, #307
+	cmp x6, #51
 	bne .Lfailure
 
 	mov x2, x0
@@ -68,7 +68,7 @@  input:
 	bne .Lfailure
 	cmp x5, #200
 	bne .Lfailure
-	cmp x6, #264
+	cmp x6, #8
 	bne .Lfailure
 
 	mov x2, x0
@@ -90,9 +90,9 @@  input:
 	bne .Lfailure
 	cmp x5, #232
 	bne .Lfailure
-	cmp x6, #296
+	cmp x6, #40
 	bne .Lfailure
-	cmp x7, #360
+	cmp x7, #104
 	bne .Lfailure
 
 	pass
diff --git a/sim/testsuite/sim/aarch64/stn_single.s b/sim/testsuite/sim/aarch64/stn_single.s
index 5527c84..2bd19cf 100644
--- a/sim/testsuite/sim/aarch64/stn_single.s
+++ b/sim/testsuite/sim/aarch64/stn_single.s
@@ -63,7 +63,7 @@  output:
 	mov x6, v5.d[0]
 	cmp x5, #136
 	bne .Lfailure
-	cmp x6, #264
+	cmp x6, #8
 	bne .Lfailure
 
 	mov x2, x1
@@ -114,7 +114,7 @@  output:
 	bne .Lfailure
 	cmp x6, #232
 	bne .Lfailure
-	cmp x7, #296
+	cmp x7, #40
 	bne .Lfailure
 
 	pass
diff --git a/sim/testsuite/sim/aarch64/xtl.s b/sim/testsuite/sim/aarch64/xtl.s
new file mode 100644
index 0000000..16ef892
--- /dev/null
+++ b/sim/testsuite/sim/aarch64/xtl.s
@@ -0,0 +1,101 @@ 
+#mach: aarch64
+
+# Check the extend long instructions: sxtl, sxtl2, uxtl, uxtl2.
+
+.include "testutils.inc"
+
+	.data
+	.align 4
+input:
+	.word 0x04030201
+	.word 0x08070605
+	.word 0xfcfdfeff
+	.word 0xf8f9fafb
+
+	start
+	adrp x0, input
+	ldr q0, [x0, #:lo12:input]
+
+	uxtl v1.8h, v0.8b
+	uxtl2 v2.8h, v0.16b
+	addv h3, v1.8h
+	addv h4, v2.8h
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	cmp x1, #36
+	bne .Lfailure
+	cmp x2, #2012
+	bne .Lfailure
+
+	uxtl v1.4s, v0.4h
+	uxtl2 v2.4s, v0.8h
+	addv s3, v1.4s
+	addv s4, v2.4s
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	mov x3, #5136
+	cmp x1, x3
+	bne .Lfailure
+	mov x4, #0xeff0
+	movk x4, 0x3, lsl #16
+	cmp x2, x4
+	bne .Lfailure
+
+	uxtl v1.2d, v0.2s
+	uxtl2 v2.2d, v0.4s
+	addv s3, v1.4s
+	addv s4, v2.4s
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	mov x3, #0x0806
+	movk x3, #0x0c0a, lsl #16
+	cmp x1, x3
+	bne .Lfailure
+	mov x4, #0xf9fa
+	movk x4, #0xf5f7, lsl #16
+	cmp x2, x4
+	bne .Lfailure
+
+	sxtl v1.8h, v0.8b
+	sxtl2 v2.8h, v0.16b
+	addv h3, v1.8h
+	addv h4, v2.8h
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	cmp x1, #36
+	bne .Lfailure
+	mov x3, #0xffdc
+	cmp x2, x3
+	bne .Lfailure
+
+	sxtl v1.4s, v0.4h
+	sxtl2 v2.4s, v0.8h
+	addv s3, v1.4s
+	addv s4, v2.4s
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	mov x3, #5136
+	cmp x1, x3
+	bne .Lfailure
+	mov x4, #0xeff0
+	movk x4, 0xffff, lsl #16
+	bne .Lfailure
+
+	sxtl v1.2d, v0.2s
+	sxtl2 v2.2d, v0.4s
+	addv s3, v1.4s
+	addv s4, v2.4s
+	mov x1, v3.d[0]
+	mov x2, v4.d[0]
+	mov x3, #0x0806
+	movk x3, #0x0c0a, lsl #16
+	cmp x1, x3
+	bne .Lfailure
+	mov x4, #0xf9f8
+	movk x4, #0xf5f7, lsl #16
+	cmp x2, x4
+	bne .Lfailure
+
+	pass
+.Lfailure:
+	fail