[RFC] tests/tcg: add a multiarch signals test to stress test signal delivery

Message ID 20210421132931.11127-1-alex.bennee@linaro.org
State Superseded
Headers show
Series
  • [RFC] tests/tcg: add a multiarch signals test to stress test signal delivery
Related show

Commit Message

Alex Bennée April 21, 2021, 1:29 p.m.
This adds a simple signal test that combines the POSIX timer_create
with signal delivery across multiple threads.

[AJB: So I wrote this in an attempt to flush out issues with the
s390x-linux-user handling. However I suspect I've done something wrong
or opened a can of signal handling worms.

Nominally this runs fine on real hardware but I variously get failures
when running it under translation and while debugging QEMU running the
test. I've also exposed a shortcomming with the gdb stub when dealing
with guest TLS data so yay ;-). So I post this as an RFC in case
anyone else can offer insight or can verify they are seeing the same
strange behaviour?]

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
 tests/tcg/multiarch/signals.c       | 149 ++++++++++++++++++++++++++++
 tests/tcg/multiarch/Makefile.target |   2 +
 2 files changed, 151 insertions(+)
 create mode 100644 tests/tcg/multiarch/signals.c

-- 
2.20.1

Comments

Philippe Mathieu-Daudé April 21, 2021, 1:41 p.m. | #1
+Laurent

On 4/21/21 3:29 PM, Alex Bennée wrote:
> This adds a simple signal test that combines the POSIX timer_create

> with signal delivery across multiple threads.

> 

> [AJB: So I wrote this in an attempt to flush out issues with the

> s390x-linux-user handling. However I suspect I've done something wrong

> or opened a can of signal handling worms.

> 

> Nominally this runs fine on real hardware but I variously get failures

> when running it under translation and while debugging QEMU running the

> test. I've also exposed a shortcomming with the gdb stub when dealing

> with guest TLS data so yay ;-). So I post this as an RFC in case

> anyone else can offer insight or can verify they are seeing the same

> strange behaviour?]

> 

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> ---

>  tests/tcg/multiarch/signals.c       | 149 ++++++++++++++++++++++++++++

>  tests/tcg/multiarch/Makefile.target |   2 +

>  2 files changed, 151 insertions(+)

>  create mode 100644 tests/tcg/multiarch/signals.c

> 

> diff --git a/tests/tcg/multiarch/signals.c b/tests/tcg/multiarch/signals.c

> new file mode 100644

> index 0000000000..998c8fdefd

> --- /dev/null

> +++ b/tests/tcg/multiarch/signals.c

> @@ -0,0 +1,149 @@

> +/*

> + * linux-user signal handling tests.

> + *

> + * Copyright (c) 2021 Linaro Ltd

> + *

> + * SPDX-License-Identifier: GPL-2.0-or-later

> + */

> +

> +#include <stdarg.h>

> +#include <stdint.h>

> +#include <stdio.h>

> +#include <stdlib.h>

> +#include <unistd.h>

> +#include <errno.h>

> +#include <pthread.h>

> +#include <string.h>

> +#include <signal.h>

> +#include <time.h>

> +#include <sys/time.h>

> +

> +static void error1(const char *filename, int line, const char *fmt, ...)

> +{

> +    va_list ap;

> +    va_start(ap, fmt);

> +    fprintf(stderr, "%s:%d: ", filename, line);

> +    vfprintf(stderr, fmt, ap);

> +    fprintf(stderr, "\n");

> +    va_end(ap);

> +    exit(1);

> +}

> +

> +static int __chk_error(const char *filename, int line, int ret)

> +{

> +    if (ret < 0) {

> +        error1(filename, line, "%m (ret=%d, errno=%d/%s)",

> +               ret, errno, strerror(errno));

> +    }

> +    return ret;

> +}

> +

> +#define error(fmt, ...) error1(__FILE__, __LINE__, fmt, ## __VA_ARGS__)

> +

> +#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret))

> +

> +/*

> + * Thread handling

> + */

> +typedef struct ThreadJob ThreadJob;

> +

> +struct ThreadJob {

> +    int number;

> +    int sleep;

> +    int count;

> +};

> +

> +static pthread_t *threads;

> +static int max_threads = 10;

> +__thread int signal_count;

> +int total_signal_count;

> +

> +static void *background_thread_func(void *arg)

> +{

> +    ThreadJob *job = (ThreadJob *) arg;

> +

> +    printf("thread%d: started\n", job->number);

> +    while (total_signal_count < job->count) {

> +        usleep(job->sleep);

> +    }

> +    printf("thread%d: saw %d alarms from %d\n", job->number,

> +           signal_count, total_signal_count);

> +    return NULL;

> +}

> +

> +static void spawn_threads(void)

> +{

> +    int i;

> +    threads = calloc(sizeof(pthread_t), max_threads);

> +

> +    for (i = 0; i < max_threads; i++) {

> +        ThreadJob *job = calloc(sizeof(ThreadJob), 1);

> +        job->number = i;

> +        job->sleep = i * 1000;

> +        job->count = i * 100;

> +        pthread_create(threads + i, NULL, background_thread_func, job);

> +    }

> +}

> +

> +static void close_threads(void)

> +{

> +    int i;

> +    for (i = 0; i < max_threads; i++) {

> +        pthread_join(threads[i], NULL);

> +    }

> +    free(threads);

> +    threads = NULL;

> +}

> +

> +static void sig_alarm(int sig, siginfo_t *info, void *puc)

> +{

> +    if (sig != SIGRTMIN) {

> +        error("unexpected signal");

> +    }

> +    signal_count++;

> +    __atomic_fetch_add(&total_signal_count, 1, __ATOMIC_SEQ_CST);

> +}

> +

> +static void test_signals(void)

> +{

> +    struct sigaction act;

> +    struct itimerspec it;

> +    timer_t tid;

> +    struct sigevent sev;

> +

> +    /* Set up SIG handler */

> +    act.sa_sigaction = sig_alarm;

> +    sigemptyset(&act.sa_mask);

> +    act.sa_flags = SA_SIGINFO;

> +    chk_error(sigaction(SIGRTMIN, &act, NULL));

> +

> +    /* Create POSIX timer */

> +    sev.sigev_notify = SIGEV_SIGNAL;

> +    sev.sigev_signo = SIGRTMIN;

> +    sev.sigev_value.sival_ptr = &tid;

> +    chk_error(timer_create(CLOCK_REALTIME, &sev, &tid));

> +

> +    it.it_interval.tv_sec = 0;

> +    it.it_interval.tv_nsec = 1000000;

> +    it.it_value.tv_sec = 0;

> +    it.it_value.tv_nsec = 1000000;

> +    chk_error(timer_settime(tid, 0, &it, NULL));

> +

> +    spawn_threads();

> +

> +    do {

> +        usleep(1000);

> +    } while (total_signal_count < 2000);

> +

> +    printf("shutting down after: %d signals\n", total_signal_count);

> +

> +    close_threads();

> +

> +    chk_error(timer_delete(tid));

> +}

> +

> +int main(int argc, char **argv)

> +{

> +    test_signals();

> +    return 0;

> +}

> diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target

> index a3a751723d..3f283eabe6 100644

> --- a/tests/tcg/multiarch/Makefile.target

> +++ b/tests/tcg/multiarch/Makefile.target

> @@ -30,6 +30,8 @@ testthread: LDFLAGS+=-lpthread

>  

>  threadcount: LDFLAGS+=-lpthread

>  

> +signals: LDFLAGS+=-lrt -lpthread

> +

>  # We define the runner for test-mmap after the individual

>  # architectures have defined their supported pages sizes. If no

>  # additional page sizes are defined we only run the default test.

>
Alex Bennée April 21, 2021, 4:21 p.m. | #2
Alex Bennée <alex.bennee@linaro.org> writes:

> This adds a simple signal test that combines the POSIX timer_create

> with signal delivery across multiple threads.

>

> [AJB: So I wrote this in an attempt to flush out issues with the

> s390x-linux-user handling. However I suspect I've done something wrong

> or opened a can of signal handling worms.

>

> Nominally this runs fine on real hardware but I variously get failures

> when running it under translation and while debugging QEMU running the

> test. I've also exposed a shortcomming with the gdb stub when dealing

> with guest TLS data so yay ;-). So I post this as an RFC in case

> anyone else can offer insight or can verify they are seeing the same

> strange behaviour?]


To further document my confusion:

  gdb --args $QEMU ./tests/tcg/$ARCH/signals

will SEGV in generated code for every target I've run. This seems to be
some sort of change of behaviour by running inside a debug environment.

Architectures that fail running normally:

./qemu-alpha tests/tcg/alpha-linux-user/signals
fish: “./qemu-alpha tests/tcg/alpha-li…” terminated by signal SIGILL (Illegal instruction)

./qemu-sparc64 tests/tcg/sparc64-linux-user/signals
thread0: started
thread1: started
thread2: started
thread3: started
thread4: started
thread5: started
thread6: started
thread7: started
thread8: started
thread9: started
thread0: saw 0 alarms from 0
...
(and hangs)

./qemu-s390x ./tests/tcg/s390x-linux-user/signals
fish: “./qemu-s390x ./tests/tcg/s390x-…” terminated by signal SIGSEGV (Address boundary error)

./qemu-sh4 ./tests/tcg/sh4-linux-user/signals
thread0: saw 87 alarms from 238
thread1: started
thread1: saw 0 alarms from 331
thread2: started
thread2: saw 0 alarms from 17088
thread3: started
thread3: saw 0 alarms from 17093
thread4: started
thread4: saw 0 alarms from 17098
thread5: started
thread5: saw 2 alarms from 17106
thread6: started
thread6: saw 0 alarms from 17108
thread7: started
thread7: saw 1 alarms from 17114
thread8: started
thread8: saw 0 alarms from 17118
thread9: started
thread9: saw 0 alarms from 17122
qemu: uncaught target signal 11 (Segmentation fault) - core dumped
fish: “./qemu-sh4 ./tests/tcg/sh4-linu…” terminated by signal SIGSEGV (Address boundary error)

And another completely random data point while most arches see most
signals delivered to the main thread qemu-i386 actually sees quite a few
delivered to the other threads which is weird because I though the
signal delivery would be more of a host feature than anything else.

./qemu-i386 ./tests/tcg/i386-linux-user/signals
thread0: started
thread0: saw 134 alarms from 177
thread1: started
thread1: saw 0 alarms from 254
thread2: started
thread2: saw 1 alarms from 300
thread3: started
thread3: saw 1 alarms from 305
thread4: started
thread5: started
thread6: started
thread7: started
thread8: started
thread9: started
thread4: saw 80 alarms from 423
thread5: saw 7 alarms from 525
thread6: saw 4 alarms from 631
thread7: saw 6 alarms from 758
thread8: saw 4 alarms from 822
thread9: saw 635 alarms from 978


-- 
Alex Bennée
Alex Bennée April 21, 2021, 7:47 p.m. | #3
Alex Bennée <alex.bennee@linaro.org> writes:

> Alex Bennée <alex.bennee@linaro.org> writes:

>

>> This adds a simple signal test that combines the POSIX timer_create

>> with signal delivery across multiple threads.

>>

>> [AJB: So I wrote this in an attempt to flush out issues with the

>> s390x-linux-user handling. However I suspect I've done something wrong

>> or opened a can of signal handling worms.

>>

>> Nominally this runs fine on real hardware but I variously get failures

>> when running it under translation and while debugging QEMU running the

>> test. I've also exposed a shortcomming with the gdb stub when dealing

>> with guest TLS data so yay ;-). So I post this as an RFC in case

>> anyone else can offer insight or can verify they are seeing the same

>> strange behaviour?]

>

> To further document my confusion:

>

>   gdb --args $QEMU ./tests/tcg/$ARCH/signals

>

> will SEGV in generated code for every target I've run. This seems to be

> some sort of change of behaviour by running inside a debug

> environment.


This bit at least seems to be triggered by the page protections for
detecting SMC - I think. If you skip past them it triggers:

    if (is_write && info->si_signo == SIGSEGV && info->si_code == SEGV_ACCERR &&
        h2g_valid(address)) {
        switch (page_unprotect(h2g(address), pc)) {

and runs.

-- 
Alex Bennée
Peter Maydell April 21, 2021, 7:56 p.m. | #4
On Wed, 21 Apr 2021 at 20:51, Alex Bennée <alex.bennee@linaro.org> wrote:
>

>

> Alex Bennée <alex.bennee@linaro.org> writes:

> > To further document my confusion:

> >

> >   gdb --args $QEMU ./tests/tcg/$ARCH/signals

> >

> > will SEGV in generated code for every target I've run. This seems to be

> > some sort of change of behaviour by running inside a debug

> > environment.

>

> This bit at least seems to be triggered by the page protections for

> detecting SMC - I think. If you skip past them it triggers:

>

>     if (is_write && info->si_signo == SIGSEGV && info->si_code == SEGV_ACCERR &&

>         h2g_valid(address)) {

>         switch (page_unprotect(h2g(address), pc)) {

>

> and runs.


Yeah, for linux-user code SEGV in generated code is expected and
handled -- it's how we catch things like SMC and other cases that
in softmmu we handle via the softmmu slowpath.

thanks
-- PMM

Patch

diff --git a/tests/tcg/multiarch/signals.c b/tests/tcg/multiarch/signals.c
new file mode 100644
index 0000000000..998c8fdefd
--- /dev/null
+++ b/tests/tcg/multiarch/signals.c
@@ -0,0 +1,149 @@ 
+/*
+ * linux-user signal handling tests.
+ *
+ * Copyright (c) 2021 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <pthread.h>
+#include <string.h>
+#include <signal.h>
+#include <time.h>
+#include <sys/time.h>
+
+static void error1(const char *filename, int line, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    fprintf(stderr, "%s:%d: ", filename, line);
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(1);
+}
+
+static int __chk_error(const char *filename, int line, int ret)
+{
+    if (ret < 0) {
+        error1(filename, line, "%m (ret=%d, errno=%d/%s)",
+               ret, errno, strerror(errno));
+    }
+    return ret;
+}
+
+#define error(fmt, ...) error1(__FILE__, __LINE__, fmt, ## __VA_ARGS__)
+
+#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret))
+
+/*
+ * Thread handling
+ */
+typedef struct ThreadJob ThreadJob;
+
+struct ThreadJob {
+    int number;
+    int sleep;
+    int count;
+};
+
+static pthread_t *threads;
+static int max_threads = 10;
+__thread int signal_count;
+int total_signal_count;
+
+static void *background_thread_func(void *arg)
+{
+    ThreadJob *job = (ThreadJob *) arg;
+
+    printf("thread%d: started\n", job->number);
+    while (total_signal_count < job->count) {
+        usleep(job->sleep);
+    }
+    printf("thread%d: saw %d alarms from %d\n", job->number,
+           signal_count, total_signal_count);
+    return NULL;
+}
+
+static void spawn_threads(void)
+{
+    int i;
+    threads = calloc(sizeof(pthread_t), max_threads);
+
+    for (i = 0; i < max_threads; i++) {
+        ThreadJob *job = calloc(sizeof(ThreadJob), 1);
+        job->number = i;
+        job->sleep = i * 1000;
+        job->count = i * 100;
+        pthread_create(threads + i, NULL, background_thread_func, job);
+    }
+}
+
+static void close_threads(void)
+{
+    int i;
+    for (i = 0; i < max_threads; i++) {
+        pthread_join(threads[i], NULL);
+    }
+    free(threads);
+    threads = NULL;
+}
+
+static void sig_alarm(int sig, siginfo_t *info, void *puc)
+{
+    if (sig != SIGRTMIN) {
+        error("unexpected signal");
+    }
+    signal_count++;
+    __atomic_fetch_add(&total_signal_count, 1, __ATOMIC_SEQ_CST);
+}
+
+static void test_signals(void)
+{
+    struct sigaction act;
+    struct itimerspec it;
+    timer_t tid;
+    struct sigevent sev;
+
+    /* Set up SIG handler */
+    act.sa_sigaction = sig_alarm;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_SIGINFO;
+    chk_error(sigaction(SIGRTMIN, &act, NULL));
+
+    /* Create POSIX timer */
+    sev.sigev_notify = SIGEV_SIGNAL;
+    sev.sigev_signo = SIGRTMIN;
+    sev.sigev_value.sival_ptr = &tid;
+    chk_error(timer_create(CLOCK_REALTIME, &sev, &tid));
+
+    it.it_interval.tv_sec = 0;
+    it.it_interval.tv_nsec = 1000000;
+    it.it_value.tv_sec = 0;
+    it.it_value.tv_nsec = 1000000;
+    chk_error(timer_settime(tid, 0, &it, NULL));
+
+    spawn_threads();
+
+    do {
+        usleep(1000);
+    } while (total_signal_count < 2000);
+
+    printf("shutting down after: %d signals\n", total_signal_count);
+
+    close_threads();
+
+    chk_error(timer_delete(tid));
+}
+
+int main(int argc, char **argv)
+{
+    test_signals();
+    return 0;
+}
diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target
index a3a751723d..3f283eabe6 100644
--- a/tests/tcg/multiarch/Makefile.target
+++ b/tests/tcg/multiarch/Makefile.target
@@ -30,6 +30,8 @@  testthread: LDFLAGS+=-lpthread
 
 threadcount: LDFLAGS+=-lpthread
 
+signals: LDFLAGS+=-lrt -lpthread
+
 # We define the runner for test-mmap after the individual
 # architectures have defined their supported pages sizes. If no
 # additional page sizes are defined we only run the default test.