diff mbox series

[v2,13/18] tests/tcg: add vectorised sha512 versions

Message ID 20220225172021.3493923-14-alex.bennee@linaro.org
State Superseded
Headers show
Series testing and semihosting pre-PR | expand

Commit Message

Alex Bennée Feb. 25, 2022, 5:20 p.m. UTC
This builds vectorised versions of sha512 to exercise the vector code:

  - aarch64 (AdvSimd)
  - i386 (SSE)
  - s390x (MVX)
  - ppc64/ppc64le (power10 vectors)

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20220202191242.652607-5-alex.bennee@linaro.org>
Message-Id: <20220211160309.335014-12-alex.bennee@linaro.org>

---
v2
  - expanded to include both flavours of ppc64
---
 tests/tcg/aarch64/Makefile.target | 7 +++++++
 tests/tcg/arm/Makefile.target     | 8 ++++++++
 tests/tcg/i386/Makefile.target    | 6 ++++++
 tests/tcg/ppc64/Makefile.target   | 9 ++++++++-
 tests/tcg/ppc64le/Makefile.target | 9 ++++++++-
 tests/tcg/s390x/Makefile.target   | 9 +++++++++
 tests/tcg/x86_64/Makefile.target  | 7 +++++++
 7 files changed, 53 insertions(+), 2 deletions(-)

Comments

Richard Henderson Feb. 25, 2022, 10:52 p.m. UTC | #1
On 2/25/22 07:20, Alex Bennée wrote:
> +++ b/tests/tcg/i386/Makefile.target
> @@ -71,3 +71,9 @@ TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)
>   
>   # On i386 and x86_64 Linux only supports 4k pages (large pages are a different hack)
>   EXTRA_RUNS+=run-test-mmap-4096
> +
> +sha512-sse: CFLAGS=-msse4.1 -O3
> +sha512-sse: sha512.c
> +	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
> +
> +TESTS+=sha512-sse

The default cpu, qemu32, only implements sse3, not sse4.1, so we get a guest SIGILL.  We 
can execute this with -cpu max, or we could limit the vectorization.


r~
Alex Bennée Feb. 28, 2022, 1:58 p.m. UTC | #2
Richard Henderson <richard.henderson@linaro.org> writes:

> On 2/25/22 07:20, Alex Bennée wrote:
>> +++ b/tests/tcg/i386/Makefile.target
>> @@ -71,3 +71,9 @@ TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)
>>     # On i386 and x86_64 Linux only supports 4k pages (large pages
>> are a different hack)
>>   EXTRA_RUNS+=run-test-mmap-4096
>> +
>> +sha512-sse: CFLAGS=-msse4.1 -O3
>> +sha512-sse: sha512.c
>> +	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
>> +
>> +TESTS+=sha512-sse
>
> The default cpu, qemu32, only implements sse3, not sse4.1, so we get a
> guest SIGILL.  We can execute this with -cpu max, or we could limit
> the vectorization.

OK I'll add:

run-sha512-vector: QEMU_OPTS+=-cpu max

TESTS+=sha512-sse                                                          

>
>
> r~
Alex Bennée Feb. 28, 2022, 4:43 p.m. UTC | #3
Alex Bennée <alex.bennee@linaro.org> writes:

> Richard Henderson <richard.henderson@linaro.org> writes:
>
>> On 2/25/22 07:20, Alex Bennée wrote:
>>> +++ b/tests/tcg/i386/Makefile.target
>>> @@ -71,3 +71,9 @@ TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)
>>>     # On i386 and x86_64 Linux only supports 4k pages (large pages
>>> are a different hack)
>>>   EXTRA_RUNS+=run-test-mmap-4096
>>> +
>>> +sha512-sse: CFLAGS=-msse4.1 -O3
>>> +sha512-sse: sha512.c
>>> +	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
>>> +
>>> +TESTS+=sha512-sse
>>
>> The default cpu, qemu32, only implements sse3, not sse4.1, so we get a
>> guest SIGILL.  We can execute this with -cpu max, or we could limit
>> the vectorization.
>
> OK I'll add:
>
> run-sha512-vector: QEMU_OPTS+=-cpu max
>
> TESTS+=sha512-sse

Or you know something that will actually work :-/

  sha512-sse: CFLAGS=-msse4.1 -O3
  sha512-sse: sha512.c
          $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)

  run-sha512-sse: QEMU_OPTS+=-cpu max
  run-plugin-sha512-sse-with-%: QEMU_OPTS+=-cpu max

  TESTS+=sha512-sse

>
>>
>>
>> r~
Richard Henderson Feb. 28, 2022, 8:56 p.m. UTC | #4
On 2/25/22 07:20, Alex Bennée wrote:
> +# TCG does not yet support all SSE (SIGILL on pshufb)
> +# sha512-sse: CFLAGS=-march=core2 -O3
> +# sha512-sse: sha512.c
> +# 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
> +

Given

ops_sse.h:void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
ops_sse_header.h:DEF_HELPER_3(glue(pshufb, SUFFIX), void, env, Reg, Reg)
tcg/translate.c:    [0x00] = SSSE3_OP(pshufb),

I'm inclined to think -cpu max might work here as well.  Yep:

static const X86CPUDefinition builtin_x86_defs[] = {
     {
         .name = "qemu64",
...
         .features[FEAT_1_ECX] =
             CPUID_EXT_SSE3 | CPUID_EXT_CX16,

does not include SSE4.


r~
diff mbox series

Patch

diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
index df3f8e9438..ac07acde66 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -60,6 +60,13 @@  run-sha1-vector: sha1-vector run-sha1
 
 TESTS += sha1-vector
 
+# Vector versions of sha512 (-O3 triggers vectorisation)
+sha512-vector: CFLAGS=-O3
+sha512-vector: sha512.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
+
+TESTS += sha512-vector
+
 ifneq ($(HAVE_GDB_BIN),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
 
diff --git a/tests/tcg/arm/Makefile.target b/tests/tcg/arm/Makefile.target
index 2dc94931c3..2f815120a5 100644
--- a/tests/tcg/arm/Makefile.target
+++ b/tests/tcg/arm/Makefile.target
@@ -79,6 +79,14 @@  run-sha1-vector: sha1-vector run-sha1
 	$(call diff-out, sha1-vector, sha1.out)
 
 ARM_TESTS += sha1-vector
+
+# Vector versions of sha512 (-O3 triggers vectorisation)
+sha512-vector: CFLAGS=-O3
+sha512-vector: sha512.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
+
+ARM_TESTS += sha512-vector
+
 TESTS += $(ARM_TESTS)
 
 # On ARM Linux only supports 4k pages
diff --git a/tests/tcg/i386/Makefile.target b/tests/tcg/i386/Makefile.target
index 38c10379af..099556efdc 100644
--- a/tests/tcg/i386/Makefile.target
+++ b/tests/tcg/i386/Makefile.target
@@ -71,3 +71,9 @@  TESTS=$(MULTIARCH_TESTS) $(I386_TESTS)
 
 # On i386 and x86_64 Linux only supports 4k pages (large pages are a different hack)
 EXTRA_RUNS+=run-test-mmap-4096
+
+sha512-sse: CFLAGS=-msse4.1 -O3
+sha512-sse: sha512.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
+
+TESTS+=sha512-sse
diff --git a/tests/tcg/ppc64/Makefile.target b/tests/tcg/ppc64/Makefile.target
index 9d6dfc1e26..c9498053df 100644
--- a/tests/tcg/ppc64/Makefile.target
+++ b/tests/tcg/ppc64/Makefile.target
@@ -13,12 +13,19 @@  $(PPC64_TESTS): CFLAGS += -mpower8-vector
 PPC64_TESTS += mtfsf
 
 ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER10),)
-PPC64_TESTS += byte_reverse
+PPC64_TESTS += byte_reverse sha512-vector
 endif
 byte_reverse: CFLAGS += -mcpu=power10
 run-byte_reverse: QEMU_OPTS+=-cpu POWER10
 run-plugin-byte_reverse-with-%: QEMU_OPTS+=-cpu POWER10
 
+sha512-vector: CFLAGS +=-mcpu=power10 -O3
+sha512-vector: sha512.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
+
+run-sha512-vector: QEMU_OPTS+=-cpu POWER10
+run-plugin-sha512-vector-with-%: QEMU_OPTS+=-cpu POWER10
+
 PPC64_TESTS += signal_save_restore_xer
 
 TESTS += $(PPC64_TESTS)
diff --git a/tests/tcg/ppc64le/Makefile.target b/tests/tcg/ppc64le/Makefile.target
index 480ff0898d..12d85e946b 100644
--- a/tests/tcg/ppc64le/Makefile.target
+++ b/tests/tcg/ppc64le/Makefile.target
@@ -10,12 +10,19 @@  endif
 $(PPC64LE_TESTS): CFLAGS += -mpower8-vector
 
 ifneq ($(DOCKER_IMAGE)$(CROSS_CC_HAS_POWER10),)
-PPC64LE_TESTS += byte_reverse
+PPC64LE_TESTS += byte_reverse sha512-vector
 endif
 byte_reverse: CFLAGS += -mcpu=power10
 run-byte_reverse: QEMU_OPTS+=-cpu POWER10
 run-plugin-byte_reverse-with-%: QEMU_OPTS+=-cpu POWER10
 
+sha512-vector: CFLAGS +=-mcpu=power10 -O3
+sha512-vector: sha512.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
+
+run-sha512-vector: QEMU_OPTS+=-cpu POWER10
+run-plugin-sha512-vector-with-%: QEMU_OPTS+=-cpu POWER10
+
 PPC64LE_TESTS += mtfsf
 PPC64LE_TESTS += signal_save_restore_xer
 
diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
index 1a7238b4eb..e53b599b22 100644
--- a/tests/tcg/s390x/Makefile.target
+++ b/tests/tcg/s390x/Makefile.target
@@ -25,3 +25,12 @@  run-gdbstub-signals-s390x: signals-s390x
 
 EXTRA_RUNS += run-gdbstub-signals-s390x
 endif
+
+# MVX versions of sha512
+sha512-mvx: CFLAGS=-march=z13 -mvx -O3
+sha512-mvx: sha512.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
+
+run-sha512-mvx: QEMU_OPTS+=-cpu max
+
+TESTS+=sha512-mvx
diff --git a/tests/tcg/x86_64/Makefile.target b/tests/tcg/x86_64/Makefile.target
index 4a8a464c57..17cf168f0a 100644
--- a/tests/tcg/x86_64/Makefile.target
+++ b/tests/tcg/x86_64/Makefile.target
@@ -22,3 +22,10 @@  test-x86_64: test-i386.c test-i386.h test-i386-shift.h test-i386-muldiv.h
 
 vsyscall: $(SRC_PATH)/tests/tcg/x86_64/vsyscall.c
 	$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
+
+# TCG does not yet support all SSE (SIGILL on pshufb)
+# sha512-sse: CFLAGS=-march=core2 -O3
+# sha512-sse: sha512.c
+# 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
+
+TESTS+=sha512-sse