aarch64 sim mls and movi bug fixes

Message ID CABXYE2WOYKD-B=zscOG1sU3E8Put_6SnwsMPh3=PJ+eRXtu_ww@mail.gmail.com
State New
Headers show

Commit Message

Jim Wilson Jan. 2, 2017, 11:12 p.m.
The mls instruction computes accumulator - product, but the simulator
accidentally has the subtract operands swapped.  While writing the
testcase for this problem, I discovered that the movi instruction is
broken, there is a missing break after the half word support.  Both
are simple fixes.

The testcase works with the patch, and fails without the patch.  The
mls fix takes GCC C testsuite failures from 2407 to 2406, and the movi
fix then takes it from 2406 to 2295.

Jim

Comments

Nick Clifton Jan. 3, 2017, 2:36 p.m. | #1
Hi Jim,

> The mls instruction computes accumulator - product, but the simulator

> accidentally has the subtract operands swapped.  While writing the

> testcase for this problem, I discovered that the movi instruction is

> broken, there is a missing break after the half word support.  Both

> are simple fixes.

> 

> The testcase works with the patch, and fails without the patch.  The

> mls fix takes GCC C testsuite failures from 2407 to 2406, and the movi

> fix then takes it from 2406 to 2295.


Approved - please apply.

Cheers
  Nick

Patch

	sim/aarch64/
	* simulator.c (do_vec_MOV_immediate, case 0x8): Add missing break.
	(do_vec_MLS): Reverse order of subtraction operands.

	sim/testsuite/sim/aarch64/
	* mls.s: New.

diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c
index be3d6c7..65a0e2e 100644
--- a/sim/aarch64/simulator.c
+++ b/sim/aarch64/simulator.c
@@ -3221,7 +3221,8 @@  do_vec_MOV_immediate (sim_cpu *cpu)
     case 0x8: /* 16-bit, no shift.  */
       for (i = 0; i < (full ? 8 : 4); i++)
 	aarch64_set_vec_u16 (cpu, vd, i, val);
-      /* Fall through.  */
+      break;
+
     case 0xd: /* 32-bit, mask shift by 16.  */
       val <<= 8;
       val |= 0xFF;
@@ -6346,25 +6347,25 @@  do_vec_MLS (sim_cpu *cpu)
     case 0:
       for (i = 0; i < (full ? 16 : 8); i++)
 	aarch64_set_vec_u8 (cpu, vd, i,
-			    (aarch64_get_vec_u8 (cpu, vn, i)
-			     * aarch64_get_vec_u8 (cpu, vm, i))
-			    - aarch64_get_vec_u8 (cpu, vd, i));
+			    aarch64_get_vec_u8 (cpu, vd, i)
+			    - (aarch64_get_vec_u8 (cpu, vn, i)
+			       * aarch64_get_vec_u8 (cpu, vm, i)));
       return;
 
     case 1:
       for (i = 0; i < (full ? 8 : 4); i++)
 	aarch64_set_vec_u16 (cpu, vd, i,
-			     (aarch64_get_vec_u16 (cpu, vn, i)
-			      * aarch64_get_vec_u16 (cpu, vm, i))
-			     - aarch64_get_vec_u16 (cpu, vd, i));
+			     aarch64_get_vec_u16 (cpu, vd, i)
+			     - (aarch64_get_vec_u16 (cpu, vn, i)
+				* aarch64_get_vec_u16 (cpu, vm, i)));
       return;
 
     case 2:
       for (i = 0; i < (full ? 4 : 2); i++)
 	aarch64_set_vec_u32 (cpu, vd, i,
-			     (aarch64_get_vec_u32 (cpu, vn, i)
-			      * aarch64_get_vec_u32 (cpu, vm, i))
-			     - aarch64_get_vec_u32 (cpu, vd, i));
+			     aarch64_get_vec_u32 (cpu, vd, i)
+			     - (aarch64_get_vec_u32 (cpu, vn, i)
+				* aarch64_get_vec_u32 (cpu, vm, i)));
       return;
 
     default:
diff --git a/sim/testsuite/sim/aarch64/mls.s b/sim/testsuite/sim/aarch64/mls.s
new file mode 100644
index 0000000..a34a1aa
--- /dev/null
+++ b/sim/testsuite/sim/aarch64/mls.s
@@ -0,0 +1,103 @@ 
+# mach: aarch64
+
+# Check the vector multiply subtract instruction: mls.
+
+.include "testutils.inc"
+
+input:
+	.word 0x04030201
+	.word 0x08070605
+	.word 0x0c0b0a09
+	.word 0x100f0e0d
+m8b:
+	.word 0xf1f8fd00
+	.word 0xc1d0dde8
+m16b:
+	.word 0xf1f8fd00
+	.word 0xc1d0dde8
+	.word 0x71889db0
+	.word 0x01203d58
+m4h:
+	.word 0xe7f8fc00
+	.word 0x8fd0c3e8
+m8h:
+	.word 0xe7f8fc00
+	.word 0x8fd0c3e8
+	.word 0xf7884bb0
+	.word 0x1f209358
+m2s:
+	.word 0xebf5fc00
+	.word 0x5b95c3e8
+m4s:
+	.word 0xebf5fc00
+	.word 0x5b95c3e8
+	.word 0x4ad54bb0
+	.word 0xb9b49358
+
+	start
+	adrp x0, input
+	ldr q0, [x0, #:lo12:input]
+
+	movi v1.8b, #1
+	mls v1.8b, v0.8b, v0.8b
+	mov x1, v1.d[0]
+	adrp x3, m8b
+	ldr x4, [x3, #:lo12:m8b]
+	cmp x1, x4
+	bne .Lfailure
+
+	movi v1.16b, #1
+	mls v1.16b, v0.16b, v0.16b
+	mov x1, v1.d[0]
+	mov x2, v1.d[1]
+	adrp x3, m16b
+	ldr x4, [x3, #:lo12:m16b]
+	cmp x1, x4
+	bne .Lfailure
+	ldr x5, [x3, #:lo12:m16b+8]
+	cmp x2, x5
+	bne .Lfailure
+
+	movi v1.4h, #1
+	mls v1.4h, v0.4h, v0.4h
+	mov x1, v1.d[0]
+	adrp x3, m4h
+	ldr x4, [x3, #:lo12:m4h]
+	cmp x1, x4
+	bne .Lfailure
+
+	movi v1.8h, #1
+	mls v1.8h, v0.8h, v0.8h
+	mov x1, v1.d[0]
+	mov x2, v1.d[1]
+	adrp x3, m8h
+	ldr x4, [x3, #:lo12:m8h]
+	cmp x1, x4
+	bne .Lfailure
+	ldr x5, [x3, #:lo12:m8h+8]
+	cmp x2, x5
+	bne .Lfailure
+
+	movi v1.2s, #1
+	mls v1.2s, v0.2s, v0.2s
+	mov x1, v1.d[0]
+	adrp x3, m2s
+	ldr x4, [x3, #:lo12:m2s]
+	cmp x1, x4
+	bne .Lfailure
+
+	movi v1.4s, #1
+	mls v1.4s, v0.4s, v0.4s
+	mov x1, v1.d[0]
+	mov x2, v1.d[1]
+	adrp x3, m4s
+	ldr x4, [x3, #:lo12:m4s]
+	cmp x1, x4
+	bne .Lfailure
+	ldr x5, [x3, #:lo12:m4s+8]
+	cmp x2, x5
+	bne .Lfailure
+
+	pass
+.Lfailure:
+	fail