aarch64 sim, add cnt insn support

Message ID CABXYE2W2z-n+d_QZvH6epMcS-zerQYaimz475DxSuXd4Ebtm4w@mail.gmail.com
State New
Headers show

Commit Message

Jim Wilson Feb. 26, 2017, 3:53 a.m.
This patch adds missing support for the cnt (popcount) instruction.

The new testcase fails without the patch and works with the patch.
This patch reduces GCC C testsuite failures from 1510 to 1493 (-17).

Jim

Comments

Jim Wilson March 14, 2017, 3:59 a.m. | #1
On Sun, Mar 12, 2017 at 9:41 PM, Mike Frysinger <vapier@gentoo.org> wrote:
> On 25 Feb 2017 19:53, Jim Wilson wrote:

>> +/* Return the number of bits set in the input value.  */

>> +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)

>> +# define popcount __builtin_popcount

>> +#else

>> +static int

>> +popcount (unsigned char x)

>> ...

>

> gas/config/tc-ia64.c does the same thing, and bfd/elf32-arm.c.

> should we consolidate these in libiberty ?


I noticed the other popcount functions, but 3 didn't seem like a high
enough number to me to justify generalizing the function.  Also note
that the one in the aarch64 sim isn't exactly the same as the others,
as the others take int input and mine takes char input.  So the
aarch64 sim one is faster than the others, as it doesn't need
recursion or a loop.

Jim

Patch hide | download patch | download mbox

2017-02-25  Jim Wilson  <jim.wilson@linaro.org>

	sim/aarch64/
	* simulator.c (popcount): New.
	(do_vec_CNT): New.
	(do_vec_op1): Add do_vec_CNT call.

	sim/testsuite/sim/aarch64/
	* cnt.s: New.

diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c
index d31cb10..f66ca78 100644
--- a/sim/aarch64/simulator.c
+++ b/sim/aarch64/simulator.c
@@ -4197,6 +4288,56 @@  do_vec_XTN (sim_cpu *cpu)
     }
 }
 
+/* Return the number of bits set in the input value.  */
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+# define popcount __builtin_popcount
+#else
+static int
+popcount (unsigned char x)
+{
+  static const unsigned char popcnt[16] =
+    {
+      0, 1, 1, 2,
+      1, 2, 2, 3,
+      1, 2, 2, 3,
+      2, 3, 3, 4
+    };
+
+  /* Only counts the low 8 bits of the input as that is all we need.  */
+  return popcnt[x % 16] + popcnt[x / 16];
+}
+#endif
+
+static void
+do_vec_CNT (sim_cpu *cpu)
+{
+  /* instr[31]    = 0
+     instr[30]    = half (0)/ full (1)
+     instr[29,24] = 00 1110
+     instr[23,22] = size: byte(00)
+     instr[21,10] = 1000 0001 0110
+     instr[9,5]   = Vs
+     instr[4,0]   = Vd.  */
+
+  unsigned vs = INSTR (9, 5);
+  unsigned vd = INSTR (4, 0);
+  int full = INSTR (30, 30);
+  int size = INSTR (23, 22);
+  int i;
+
+  NYI_assert (29, 24, 0x0E);
+  NYI_assert (21, 10, 0x816);
+
+  if (size != 0)
+    HALT_UNALLOC;
+
+  TRACE_DECODE (cpu, "emulated at line %d", __LINE__);
+
+  for (i = 0; i < (full ? 16 : 8); i++)
+    aarch64_set_vec_u8 (cpu, vd, i,
+			popcount (aarch64_get_vec_u8 (cpu, vs, i)));
+}
+
 static void
 do_vec_maxv (sim_cpu *cpu)
 {
@@ -5605,6 +5691,7 @@  do_vec_op1 (sim_cpu *cpu)
     case 0x08: do_vec_sub_long (cpu); return;
     case 0x0a: do_vec_XTN (cpu); return;
     case 0x11: do_vec_SSHL (cpu); return;
+    case 0x16: do_vec_CNT (cpu); return;
     case 0x19: do_vec_max (cpu); return;
     case 0x1B: do_vec_min (cpu); return;
     case 0x21: do_vec_add (cpu); return;
diff --git a/sim/testsuite/sim/aarch64/cnt.s b/sim/testsuite/sim/aarch64/cnt.s
new file mode 100644
index 0000000..cf53fe0
--- /dev/null
+++ b/sim/testsuite/sim/aarch64/cnt.s
@@ -0,0 +1,33 @@ 
+# mach: aarch64
+
+# Check the popcount instruction: cnt.
+
+.include "testutils.inc"
+
+	.data
+	.align 4
+input:
+	.word 0x04030201
+	.word 0x0f070605
+	.word 0x44332211
+	.word 0xff776655
+
+	start
+	adrp x0, input
+	ldr q0, [x0, #:lo12:input]
+
+	cnt v1.8b, v0.8b
+	addv b2, v1.8b
+	mov x1, v2.d[0]
+	cmp x1, #16
+	bne .Lfailure
+
+	cnt v1.16b, v0.16b
+	addv b2, v1.16b
+	mov x1, v2.d[0]
+	cmp x1, #48
+	bne .Lfailure
+
+	pass
+.Lfailure:
+	fail