diff mbox series

[v2] target/arm: Fix SVE STR increment

Message ID 20231031143215.29764-1-richard.henderson@linaro.org
State New
Headers show
Series [v2] target/arm: Fix SVE STR increment | expand

Commit Message

Richard Henderson Oct. 31, 2023, 2:32 p.m. UTC
The previous change missed updating one of the increments and
one of the MemOps.  Add a test case for all vector lengths.

Cc: qemu-stable@nongnu.org
Fixes: e6dd5e782be ("target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r")
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/tcg/translate-sve.c    |  5 ++--
 tests/tcg/aarch64/sve-str.c       | 49 +++++++++++++++++++++++++++++++
 tests/tcg/aarch64/Makefile.target |  6 +++-
 3 files changed, 57 insertions(+), 3 deletions(-)
 create mode 100644 tests/tcg/aarch64/sve-str.c

Comments

Philippe Mathieu-Daudé Oct. 31, 2023, 2:45 p.m. UTC | #1
On 31/10/23 15:32, Richard Henderson wrote:
> The previous change missed updating one of the increments and
> one of the MemOps.  Add a test case for all vector lengths.
> 
> Cc: qemu-stable@nongnu.org
> Fixes: e6dd5e782be ("target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r")
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/arm/tcg/translate-sve.c    |  5 ++--
>   tests/tcg/aarch64/sve-str.c       | 49 +++++++++++++++++++++++++++++++
>   tests/tcg/aarch64/Makefile.target |  6 +++-
>   3 files changed, 57 insertions(+), 3 deletions(-)
>   create mode 100644 tests/tcg/aarch64/sve-str.c

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Peter Maydell Nov. 2, 2023, 11:13 a.m. UTC | #2
On Tue, 31 Oct 2023 at 14:33, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> The previous change missed updating one of the increments and
> one of the MemOps.  Add a test case for all vector lengths.
>
> Cc: qemu-stable@nongnu.org
> Fixes: e6dd5e782be ("target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r")
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

I'm told that with this fix we now pass the llvm-test-suite
when built for SVE2, with a variety of vector lengths.

Applied to target-arm.next, thanks.

-- PMM
Alex Bennée Nov. 9, 2023, 12:23 p.m. UTC | #3
Richard Henderson <richard.henderson@linaro.org> writes:

> The previous change missed updating one of the increments and
> one of the MemOps.  Add a test case for all vector lengths.
>
> Cc: qemu-stable@nongnu.org
> Fixes: e6dd5e782be ("target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r")
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

This was Cd'd for stable but didn't make it in. There is a trivial
re-base conflict but I can re-send the patch if that helps.

With this:

   tuxrun --runtime docker \
     --qemu-binary ~/lsrc/qemu.git/builds/bisect/qemu-system-aarch64 \
     --device qemu-arm64 --boot-args rw \
     --kernel https://storage.tuxsuite.com/public/linaro/lkft/builds/2XmWuGCI7saydsrZw4FcWSu6JGQ/Image.gz \
     --modules https://storage.tuxsuite.com/public/linaro/lkft/builds/2XmWuGCI7saydsrZw4FcWSu6JGQ/modules.tar.xz \
     --rootfs https://storage.tuxboot.com/debian/bookworm/arm64/rootfs.ext4.xz \
     --parameters SHARD_INDEX=4 --parameters SKIPFILE=skipfile-lkft.yaml \
     --parameters SHARD_NUMBER=4 \
     --parameters KSELFTEST=https://storage.tuxsuite.com/public/linaro/lkft/builds/2XmWuGCI7saydsrZw4FcWSu6JGQ/kselftest.tar.xz \
     --image docker.io/linaro/tuxrun-dispatcher:v0.52.0 --tests kselftest-arm64 --timeouts boot=30 kselftest-arm64=60

on my branch:

  https://gitlab.com/stsquad/qemu/-/tree/for-8.1-stable?ref_type=heads

the test works.

> ---
>  target/arm/tcg/translate-sve.c    |  5 ++--
>  tests/tcg/aarch64/sve-str.c       | 49 +++++++++++++++++++++++++++++++
>  tests/tcg/aarch64/Makefile.target |  6 +++-
>  3 files changed, 57 insertions(+), 3 deletions(-)
>  create mode 100644 tests/tcg/aarch64/sve-str.c
>
> diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
> index 7b39962f20..296e7d1ce2 100644
> --- a/target/arm/tcg/translate-sve.c
> +++ b/target/arm/tcg/translate-sve.c
> @@ -4294,7 +4294,7 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
>          t0 = tcg_temp_new_i64();
>          t1 = tcg_temp_new_i64();
>          t16 = tcg_temp_new_i128();
> -        for (i = 0; i < len_align; i += 8) {
> +        for (i = 0; i < len_align; i += 16) {
>              tcg_gen_ld_i64(t0, base, vofs + i);
>              tcg_gen_ld_i64(t1, base, vofs + i + 8);
>              tcg_gen_concat_i64_i128(t16, t0, t1);
> @@ -4320,7 +4320,8 @@ void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
>          t16 = tcg_temp_new_i128();
>          tcg_gen_concat_i64_i128(t16, t0, t1);
>  
> -        tcg_gen_qemu_st_i128(t16, clean_addr, midx, MO_LEUQ);
> +        tcg_gen_qemu_st_i128(t16, clean_addr, midx,
> +                             MO_LE | MO_128 | MO_ATOM_NONE);
>          tcg_gen_addi_i64(clean_addr, clean_addr, 16);
>  
>          tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
> diff --git a/tests/tcg/aarch64/sve-str.c b/tests/tcg/aarch64/sve-str.c
> new file mode 100644
> index 0000000000..551f0d6f18
> --- /dev/null
> +++ b/tests/tcg/aarch64/sve-str.c
> @@ -0,0 +1,49 @@
> +#include <stdio.h>
> +#include <sys/prctl.h>
> +
> +#define N  (256+16)
> +
> +static int __attribute__((noinline)) test(int vl)
> +{
> +    unsigned char buf[N];
> +    int err = 0;
> +
> +    for (int i = 0; i < N; ++i) {
> +        buf[i] = (unsigned char)i;
> +    }
> +
> +    asm volatile (
> +        "mov z0.b, #255\n\t"
> +        "str z0, %0"
> +        : : "m" (buf) : "z0", "memory");
> +
> +    for (int i = 0; i < vl; ++i) {
> +        if (buf[i] != 0xff) {
> +            fprintf(stderr, "vl %d, index %d, expected 255, got %d\n",
> +                    vl, i, buf[i]);
> +            err = 1;
> +        }
> +    }
> +
> +    for (int i = vl; i < N; ++i) {
> +        if (buf[i] != (unsigned char)i) {
> +            fprintf(stderr, "vl %d, index %d, expected %d, got %d\n",
> +                    vl, i, (unsigned char)i, buf[i]);
> +            err = 1;
> +        }
> +    }
> +
> +    return err;
> +}
> +
> +int main()
> +{
> +    int err = 0;
> +
> +    for (int i = 16; i <= 256; i += 16) {
> +        if (prctl(PR_SVE_SET_VL, i, 0, 0, 0, 0) == i) {
> +            err |= test(i);
> +        }
> +    }
> +    return err;
> +}
> diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
> index 62b38c792f..c6542b5f1b 100644
> --- a/tests/tcg/aarch64/Makefile.target
> +++ b/tests/tcg/aarch64/Makefile.target
> @@ -103,7 +103,11 @@ sha512-sve: CFLAGS=-O3 -march=armv8.1-a+sve
>  sha512-sve: sha512.c
>  	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
>  
> -TESTS += sha512-sve
> +sve-str: CFLAGS=-O1 -march=armv8.1-a+sve
> +sve-str: sve-str.c
> +	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
> +
> +TESTS += sha512-sve sve-str
>  
>  ifneq ($(GDB),)
>  GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py
Michael Tokarev Nov. 9, 2023, 1:20 p.m. UTC | #4
09.11.2023 15:23, Alex Bennée:
> Richard Henderson <richard.henderson@linaro.org> writes:
> 
>> The previous change missed updating one of the increments and
>> one of the MemOps.  Add a test case for all vector lengths.
>>
>> Cc: qemu-stable@nongnu.org
>> Fixes: e6dd5e782be ("target/arm: Use tcg_gen_qemu_{ld, st}_i128 in gen_sve_{ld, st}r")
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> 
> This was Cd'd for stable but didn't make it in. There is a trivial
> re-base conflict but I can re-send the patch if that helps.

Nope, I picked it up, here:

  https://gitlab.com/mjt0k/qemu/-/commits/staging-8.1/?ref_type=heads

commit eabe320b, committed 5 days ago, - about the time when it has
been committed to master.

I want to send announce for the next 8.1 release, just haven't decided
when to do that, - usually there's quite a few fixes coming during the
freeze. Maybe the best is to get 8.1.3 now and 8.1.4 together with 8.2.0.

/mjt
Alex Bennée Nov. 9, 2023, 1:37 p.m. UTC | #5
Michael Tokarev <mjt@tls.msk.ru> writes:

> 09.11.2023 15:23, Alex Bennée:
>> Richard Henderson <richard.henderson@linaro.org> writes:
>> 
>>> The previous change missed updating one of the increments and
>>> one of the MemOps.  Add a test case for all vector lengths.
>>>
>>> Cc: qemu-stable@nongnu.org
>>> Fixes: e6dd5e782be ("target/arm: Use tcg_gen_qemu_{ld, st}_i128 in
>>> gen_sve_{ld, st}r")
>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> This was Cd'd for stable but didn't make it in. There is a trivial
>> re-base conflict but I can re-send the patch if that helps.
>
> Nope, I picked it up, here:
>
>  https://gitlab.com/mjt0k/qemu/-/commits/staging-8.1/?ref_type=heads
>
> commit eabe320b, committed 5 days ago, - about the time when it has
> been committed to master.

Awesome, I shall try to remember to check that branch first ;-)

Have a Tested-by: Alex Bennée <alex.bennee@linaro.org>

for the branch fix.

> I want to send announce for the next 8.1 release, just haven't decided
> when to do that, - usually there's quite a few fixes coming during the
> freeze. Maybe the best is to get 8.1.3 now and 8.1.4 together with 8.2.0.
>
> /mjt
Michael Tokarev Nov. 9, 2023, 2:37 p.m. UTC | #6
09.11.2023 16:37, Alex Bennée wrote:
> Awesome, I shall try to remember to check that branch first ;-)

I tend to avoid pushing stuff to main qemu repo,
to avoid spending CI minutes.  But from time to time I do push there.

> Have a Tested-by: Alex Bennée <alex.bennee@linaro.org>
> 
> for the branch fix.

Woops. You ruined my branch, so I had to force-push it with your new tag :)

/mjt
diff mbox series

Patch

diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 7b39962f20..296e7d1ce2 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -4294,7 +4294,7 @@  void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
         t0 = tcg_temp_new_i64();
         t1 = tcg_temp_new_i64();
         t16 = tcg_temp_new_i128();
-        for (i = 0; i < len_align; i += 8) {
+        for (i = 0; i < len_align; i += 16) {
             tcg_gen_ld_i64(t0, base, vofs + i);
             tcg_gen_ld_i64(t1, base, vofs + i + 8);
             tcg_gen_concat_i64_i128(t16, t0, t1);
@@ -4320,7 +4320,8 @@  void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
         t16 = tcg_temp_new_i128();
         tcg_gen_concat_i64_i128(t16, t0, t1);
 
-        tcg_gen_qemu_st_i128(t16, clean_addr, midx, MO_LEUQ);
+        tcg_gen_qemu_st_i128(t16, clean_addr, midx,
+                             MO_LE | MO_128 | MO_ATOM_NONE);
         tcg_gen_addi_i64(clean_addr, clean_addr, 16);
 
         tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
diff --git a/tests/tcg/aarch64/sve-str.c b/tests/tcg/aarch64/sve-str.c
new file mode 100644
index 0000000000..551f0d6f18
--- /dev/null
+++ b/tests/tcg/aarch64/sve-str.c
@@ -0,0 +1,49 @@ 
+#include <stdio.h>
+#include <sys/prctl.h>
+
+#define N  (256+16)
+
+static int __attribute__((noinline)) test(int vl)
+{
+    unsigned char buf[N];
+    int err = 0;
+
+    for (int i = 0; i < N; ++i) {
+        buf[i] = (unsigned char)i;
+    }
+
+    asm volatile (
+        "mov z0.b, #255\n\t"
+        "str z0, %0"
+        : : "m" (buf) : "z0", "memory");
+
+    for (int i = 0; i < vl; ++i) {
+        if (buf[i] != 0xff) {
+            fprintf(stderr, "vl %d, index %d, expected 255, got %d\n",
+                    vl, i, buf[i]);
+            err = 1;
+        }
+    }
+
+    for (int i = vl; i < N; ++i) {
+        if (buf[i] != (unsigned char)i) {
+            fprintf(stderr, "vl %d, index %d, expected %d, got %d\n",
+                    vl, i, (unsigned char)i, buf[i]);
+            err = 1;
+        }
+    }
+
+    return err;
+}
+
+int main()
+{
+    int err = 0;
+
+    for (int i = 16; i <= 256; i += 16) {
+        if (prctl(PR_SVE_SET_VL, i, 0, 0, 0, 0) == i) {
+            err |= test(i);
+        }
+    }
+    return err;
+}
diff --git a/tests/tcg/aarch64/Makefile.target b/tests/tcg/aarch64/Makefile.target
index 62b38c792f..c6542b5f1b 100644
--- a/tests/tcg/aarch64/Makefile.target
+++ b/tests/tcg/aarch64/Makefile.target
@@ -103,7 +103,11 @@  sha512-sve: CFLAGS=-O3 -march=armv8.1-a+sve
 sha512-sve: sha512.c
 	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
 
-TESTS += sha512-sve
+sve-str: CFLAGS=-O1 -march=armv8.1-a+sve
+sve-str: sve-str.c
+	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $< -o $@ $(LDFLAGS)
+
+TESTS += sha512-sve sve-str
 
 ifneq ($(GDB),)
 GDB_SCRIPT=$(SRC_PATH)/tests/guest-debug/run-test.py