sim/aarch64/
* simulator.c (do_vec_MOV_immediate, case 0x8): Add missing break.
(do_vec_MLS): Reverse order of subtraction operands.
sim/testsuite/sim/aarch64/
* mls.s: New.
@@ -3221,7 +3221,8 @@ do_vec_MOV_immediate (sim_cpu *cpu)
case 0x8: /* 16-bit, no shift. */
for (i = 0; i < (full ? 8 : 4); i++)
aarch64_set_vec_u16 (cpu, vd, i, val);
- /* Fall through. */
+ break;
+
case 0xd: /* 32-bit, mask shift by 16. */
val <<= 8;
val |= 0xFF;
@@ -6346,25 +6347,25 @@ do_vec_MLS (sim_cpu *cpu)
case 0:
for (i = 0; i < (full ? 16 : 8); i++)
aarch64_set_vec_u8 (cpu, vd, i,
- (aarch64_get_vec_u8 (cpu, vn, i)
- * aarch64_get_vec_u8 (cpu, vm, i))
- - aarch64_get_vec_u8 (cpu, vd, i));
+ aarch64_get_vec_u8 (cpu, vd, i)
+ - (aarch64_get_vec_u8 (cpu, vn, i)
+ * aarch64_get_vec_u8 (cpu, vm, i)));
return;
case 1:
for (i = 0; i < (full ? 8 : 4); i++)
aarch64_set_vec_u16 (cpu, vd, i,
- (aarch64_get_vec_u16 (cpu, vn, i)
- * aarch64_get_vec_u16 (cpu, vm, i))
- - aarch64_get_vec_u16 (cpu, vd, i));
+ aarch64_get_vec_u16 (cpu, vd, i)
+ - (aarch64_get_vec_u16 (cpu, vn, i)
+ * aarch64_get_vec_u16 (cpu, vm, i)));
return;
case 2:
for (i = 0; i < (full ? 4 : 2); i++)
aarch64_set_vec_u32 (cpu, vd, i,
- (aarch64_get_vec_u32 (cpu, vn, i)
- * aarch64_get_vec_u32 (cpu, vm, i))
- - aarch64_get_vec_u32 (cpu, vd, i));
+ aarch64_get_vec_u32 (cpu, vd, i)
+ - (aarch64_get_vec_u32 (cpu, vn, i)
+ * aarch64_get_vec_u32 (cpu, vm, i)));
return;
default:
new file mode 100644
@@ -0,0 +1,103 @@
+# mach: aarch64
+
+# Check the vector multiply subtract instruction: mls.
+
+.include "testutils.inc"
+
+input:
+ .word 0x04030201
+ .word 0x08070605
+ .word 0x0c0b0a09
+ .word 0x100f0e0d
+m8b:
+ .word 0xf1f8fd00
+ .word 0xc1d0dde8
+m16b:
+ .word 0xf1f8fd00
+ .word 0xc1d0dde8
+ .word 0x71889db0
+ .word 0x01203d58
+m4h:
+ .word 0xe7f8fc00
+ .word 0x8fd0c3e8
+m8h:
+ .word 0xe7f8fc00
+ .word 0x8fd0c3e8
+ .word 0xf7884bb0
+ .word 0x1f209358
+m2s:
+ .word 0xebf5fc00
+ .word 0x5b95c3e8
+m4s:
+ .word 0xebf5fc00
+ .word 0x5b95c3e8
+ .word 0x4ad54bb0
+ .word 0xb9b49358
+
+ start
+ adrp x0, input
+ ldr q0, [x0, #:lo12:input]
+
+ movi v1.8b, #1
+ mls v1.8b, v0.8b, v0.8b
+ mov x1, v1.d[0]
+ adrp x3, m8b
+ ldr x4, [x3, #:lo12:m8b]
+ cmp x1, x4
+ bne .Lfailure
+
+ movi v1.16b, #1
+ mls v1.16b, v0.16b, v0.16b
+ mov x1, v1.d[0]
+ mov x2, v1.d[1]
+ adrp x3, m16b
+ ldr x4, [x3, #:lo12:m16b]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:m16b+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ movi v1.4h, #1
+ mls v1.4h, v0.4h, v0.4h
+ mov x1, v1.d[0]
+ adrp x3, m4h
+ ldr x4, [x3, #:lo12:m4h]
+ cmp x1, x4
+ bne .Lfailure
+
+ movi v1.8h, #1
+ mls v1.8h, v0.8h, v0.8h
+ mov x1, v1.d[0]
+ mov x2, v1.d[1]
+ adrp x3, m8h
+ ldr x4, [x3, #:lo12:m8h]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:m8h+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ movi v1.2s, #1
+ mls v1.2s, v0.2s, v0.2s
+ mov x1, v1.d[0]
+ adrp x3, m2s
+ ldr x4, [x3, #:lo12:m2s]
+ cmp x1, x4
+ bne .Lfailure
+
+ movi v1.4s, #1
+ mls v1.4s, v0.4s, v0.4s
+ mov x1, v1.d[0]
+ mov x2, v1.d[1]
+ adrp x3, m4s
+ ldr x4, [x3, #:lo12:m4s]
+ cmp x1, x4
+ bne .Lfailure
+ ldr x5, [x3, #:lo12:m4s+8]
+ cmp x2, x5
+ bne .Lfailure
+
+ pass
+.Lfailure:
+ fail