Message ID | 20210421132931.11127-1-alex.bennee@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | [RFC] tests/tcg: add a multiarch signals test to stress test signal delivery | expand |
+Laurent On 4/21/21 3:29 PM, Alex Bennée wrote: > This adds a simple signal test that combines the POSIX timer_create > with signal delivery across multiple threads. > > [AJB: So I wrote this in an attempt to flush out issues with the > s390x-linux-user handling. However I suspect I've done something wrong > or opened a can of signal handling worms. > > Nominally this runs fine on real hardware but I variously get failures > when running it under translation and while debugging QEMU running the > test. I've also exposed a shortcomming with the gdb stub when dealing > with guest TLS data so yay ;-). So I post this as an RFC in case > anyone else can offer insight or can verify they are seeing the same > strange behaviour?] > > Signed-off-by: Alex Bennée <alex.bennee@linaro.org> > --- > tests/tcg/multiarch/signals.c | 149 ++++++++++++++++++++++++++++ > tests/tcg/multiarch/Makefile.target | 2 + > 2 files changed, 151 insertions(+) > create mode 100644 tests/tcg/multiarch/signals.c > > diff --git a/tests/tcg/multiarch/signals.c b/tests/tcg/multiarch/signals.c > new file mode 100644 > index 0000000000..998c8fdefd > --- /dev/null > +++ b/tests/tcg/multiarch/signals.c > @@ -0,0 +1,149 @@ > +/* > + * linux-user signal handling tests. > + * > + * Copyright (c) 2021 Linaro Ltd > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#include <stdarg.h> > +#include <stdint.h> > +#include <stdio.h> > +#include <stdlib.h> > +#include <unistd.h> > +#include <errno.h> > +#include <pthread.h> > +#include <string.h> > +#include <signal.h> > +#include <time.h> > +#include <sys/time.h> > + > +static void error1(const char *filename, int line, const char *fmt, ...) > +{ > + va_list ap; > + va_start(ap, fmt); > + fprintf(stderr, "%s:%d: ", filename, line); > + vfprintf(stderr, fmt, ap); > + fprintf(stderr, "\n"); > + va_end(ap); > + exit(1); > +} > + > +static int __chk_error(const char *filename, int line, int ret) > +{ > + if (ret < 0) { > + error1(filename, line, "%m (ret=%d, errno=%d/%s)", > + ret, errno, strerror(errno)); > + } > + return ret; > +} > + > +#define error(fmt, ...) error1(__FILE__, __LINE__, fmt, ## __VA_ARGS__) > + > +#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret)) > + > +/* > + * Thread handling > + */ > +typedef struct ThreadJob ThreadJob; > + > +struct ThreadJob { > + int number; > + int sleep; > + int count; > +}; > + > +static pthread_t *threads; > +static int max_threads = 10; > +__thread int signal_count; > +int total_signal_count; > + > +static void *background_thread_func(void *arg) > +{ > + ThreadJob *job = (ThreadJob *) arg; > + > + printf("thread%d: started\n", job->number); > + while (total_signal_count < job->count) { > + usleep(job->sleep); > + } > + printf("thread%d: saw %d alarms from %d\n", job->number, > + signal_count, total_signal_count); > + return NULL; > +} > + > +static void spawn_threads(void) > +{ > + int i; > + threads = calloc(sizeof(pthread_t), max_threads); > + > + for (i = 0; i < max_threads; i++) { > + ThreadJob *job = calloc(sizeof(ThreadJob), 1); > + job->number = i; > + job->sleep = i * 1000; > + job->count = i * 100; > + pthread_create(threads + i, NULL, background_thread_func, job); > + } > +} > + > +static void close_threads(void) > +{ > + int i; > + for (i = 0; i < max_threads; i++) { > + pthread_join(threads[i], NULL); > + } > + free(threads); > + threads = NULL; > +} > + > +static void sig_alarm(int sig, siginfo_t *info, void *puc) > +{ > + if (sig != SIGRTMIN) { > + error("unexpected signal"); > + } > + signal_count++; > + __atomic_fetch_add(&total_signal_count, 1, __ATOMIC_SEQ_CST); > +} > + > +static void test_signals(void) > +{ > + struct sigaction act; > + struct itimerspec it; > + timer_t tid; > + struct sigevent sev; > + > + /* Set up SIG handler */ > + act.sa_sigaction = sig_alarm; > + sigemptyset(&act.sa_mask); > + act.sa_flags = SA_SIGINFO; > + chk_error(sigaction(SIGRTMIN, &act, NULL)); > + > + /* Create POSIX timer */ > + sev.sigev_notify = SIGEV_SIGNAL; > + sev.sigev_signo = SIGRTMIN; > + sev.sigev_value.sival_ptr = &tid; > + chk_error(timer_create(CLOCK_REALTIME, &sev, &tid)); > + > + it.it_interval.tv_sec = 0; > + it.it_interval.tv_nsec = 1000000; > + it.it_value.tv_sec = 0; > + it.it_value.tv_nsec = 1000000; > + chk_error(timer_settime(tid, 0, &it, NULL)); > + > + spawn_threads(); > + > + do { > + usleep(1000); > + } while (total_signal_count < 2000); > + > + printf("shutting down after: %d signals\n", total_signal_count); > + > + close_threads(); > + > + chk_error(timer_delete(tid)); > +} > + > +int main(int argc, char **argv) > +{ > + test_signals(); > + return 0; > +} > diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target > index a3a751723d..3f283eabe6 100644 > --- a/tests/tcg/multiarch/Makefile.target > +++ b/tests/tcg/multiarch/Makefile.target > @@ -30,6 +30,8 @@ testthread: LDFLAGS+=-lpthread > > threadcount: LDFLAGS+=-lpthread > > +signals: LDFLAGS+=-lrt -lpthread > + > # We define the runner for test-mmap after the individual > # architectures have defined their supported pages sizes. If no > # additional page sizes are defined we only run the default test. >
Alex Bennée <alex.bennee@linaro.org> writes: > This adds a simple signal test that combines the POSIX timer_create > with signal delivery across multiple threads. > > [AJB: So I wrote this in an attempt to flush out issues with the > s390x-linux-user handling. However I suspect I've done something wrong > or opened a can of signal handling worms. > > Nominally this runs fine on real hardware but I variously get failures > when running it under translation and while debugging QEMU running the > test. I've also exposed a shortcomming with the gdb stub when dealing > with guest TLS data so yay ;-). So I post this as an RFC in case > anyone else can offer insight or can verify they are seeing the same > strange behaviour?] To further document my confusion: gdb --args $QEMU ./tests/tcg/$ARCH/signals will SEGV in generated code for every target I've run. This seems to be some sort of change of behaviour by running inside a debug environment. Architectures that fail running normally: ./qemu-alpha tests/tcg/alpha-linux-user/signals fish: “./qemu-alpha tests/tcg/alpha-li…” terminated by signal SIGILL (Illegal instruction) ./qemu-sparc64 tests/tcg/sparc64-linux-user/signals thread0: started thread1: started thread2: started thread3: started thread4: started thread5: started thread6: started thread7: started thread8: started thread9: started thread0: saw 0 alarms from 0 ... (and hangs) ./qemu-s390x ./tests/tcg/s390x-linux-user/signals fish: “./qemu-s390x ./tests/tcg/s390x-…” terminated by signal SIGSEGV (Address boundary error) ./qemu-sh4 ./tests/tcg/sh4-linux-user/signals thread0: saw 87 alarms from 238 thread1: started thread1: saw 0 alarms from 331 thread2: started thread2: saw 0 alarms from 17088 thread3: started thread3: saw 0 alarms from 17093 thread4: started thread4: saw 0 alarms from 17098 thread5: started thread5: saw 2 alarms from 17106 thread6: started thread6: saw 0 alarms from 17108 thread7: started thread7: saw 1 alarms from 17114 thread8: started thread8: saw 0 alarms from 17118 thread9: started thread9: saw 0 alarms from 17122 qemu: uncaught target signal 11 (Segmentation fault) - core dumped fish: “./qemu-sh4 ./tests/tcg/sh4-linu…” terminated by signal SIGSEGV (Address boundary error) And another completely random data point while most arches see most signals delivered to the main thread qemu-i386 actually sees quite a few delivered to the other threads which is weird because I though the signal delivery would be more of a host feature than anything else. ./qemu-i386 ./tests/tcg/i386-linux-user/signals thread0: started thread0: saw 134 alarms from 177 thread1: started thread1: saw 0 alarms from 254 thread2: started thread2: saw 1 alarms from 300 thread3: started thread3: saw 1 alarms from 305 thread4: started thread5: started thread6: started thread7: started thread8: started thread9: started thread4: saw 80 alarms from 423 thread5: saw 7 alarms from 525 thread6: saw 4 alarms from 631 thread7: saw 6 alarms from 758 thread8: saw 4 alarms from 822 thread9: saw 635 alarms from 978 -- Alex Bennée
Alex Bennée <alex.bennee@linaro.org> writes: > Alex Bennée <alex.bennee@linaro.org> writes: > >> This adds a simple signal test that combines the POSIX timer_create >> with signal delivery across multiple threads. >> >> [AJB: So I wrote this in an attempt to flush out issues with the >> s390x-linux-user handling. However I suspect I've done something wrong >> or opened a can of signal handling worms. >> >> Nominally this runs fine on real hardware but I variously get failures >> when running it under translation and while debugging QEMU running the >> test. I've also exposed a shortcomming with the gdb stub when dealing >> with guest TLS data so yay ;-). So I post this as an RFC in case >> anyone else can offer insight or can verify they are seeing the same >> strange behaviour?] > > To further document my confusion: > > gdb --args $QEMU ./tests/tcg/$ARCH/signals > > will SEGV in generated code for every target I've run. This seems to be > some sort of change of behaviour by running inside a debug > environment. This bit at least seems to be triggered by the page protections for detecting SMC - I think. If you skip past them it triggers: if (is_write && info->si_signo == SIGSEGV && info->si_code == SEGV_ACCERR && h2g_valid(address)) { switch (page_unprotect(h2g(address), pc)) { and runs. -- Alex Bennée
On Wed, 21 Apr 2021 at 20:51, Alex Bennée <alex.bennee@linaro.org> wrote: > > > Alex Bennée <alex.bennee@linaro.org> writes: > > To further document my confusion: > > > > gdb --args $QEMU ./tests/tcg/$ARCH/signals > > > > will SEGV in generated code for every target I've run. This seems to be > > some sort of change of behaviour by running inside a debug > > environment. > > This bit at least seems to be triggered by the page protections for > detecting SMC - I think. If you skip past them it triggers: > > if (is_write && info->si_signo == SIGSEGV && info->si_code == SEGV_ACCERR && > h2g_valid(address)) { > switch (page_unprotect(h2g(address), pc)) { > > and runs. Yeah, for linux-user code SEGV in generated code is expected and handled -- it's how we catch things like SMC and other cases that in softmmu we handle via the softmmu slowpath. thanks -- PMM
diff --git a/tests/tcg/multiarch/signals.c b/tests/tcg/multiarch/signals.c new file mode 100644 index 0000000000..998c8fdefd --- /dev/null +++ b/tests/tcg/multiarch/signals.c @@ -0,0 +1,149 @@ +/* + * linux-user signal handling tests. + * + * Copyright (c) 2021 Linaro Ltd + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <errno.h> +#include <pthread.h> +#include <string.h> +#include <signal.h> +#include <time.h> +#include <sys/time.h> + +static void error1(const char *filename, int line, const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + fprintf(stderr, "%s:%d: ", filename, line); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + va_end(ap); + exit(1); +} + +static int __chk_error(const char *filename, int line, int ret) +{ + if (ret < 0) { + error1(filename, line, "%m (ret=%d, errno=%d/%s)", + ret, errno, strerror(errno)); + } + return ret; +} + +#define error(fmt, ...) error1(__FILE__, __LINE__, fmt, ## __VA_ARGS__) + +#define chk_error(ret) __chk_error(__FILE__, __LINE__, (ret)) + +/* + * Thread handling + */ +typedef struct ThreadJob ThreadJob; + +struct ThreadJob { + int number; + int sleep; + int count; +}; + +static pthread_t *threads; +static int max_threads = 10; +__thread int signal_count; +int total_signal_count; + +static void *background_thread_func(void *arg) +{ + ThreadJob *job = (ThreadJob *) arg; + + printf("thread%d: started\n", job->number); + while (total_signal_count < job->count) { + usleep(job->sleep); + } + printf("thread%d: saw %d alarms from %d\n", job->number, + signal_count, total_signal_count); + return NULL; +} + +static void spawn_threads(void) +{ + int i; + threads = calloc(sizeof(pthread_t), max_threads); + + for (i = 0; i < max_threads; i++) { + ThreadJob *job = calloc(sizeof(ThreadJob), 1); + job->number = i; + job->sleep = i * 1000; + job->count = i * 100; + pthread_create(threads + i, NULL, background_thread_func, job); + } +} + +static void close_threads(void) +{ + int i; + for (i = 0; i < max_threads; i++) { + pthread_join(threads[i], NULL); + } + free(threads); + threads = NULL; +} + +static void sig_alarm(int sig, siginfo_t *info, void *puc) +{ + if (sig != SIGRTMIN) { + error("unexpected signal"); + } + signal_count++; + __atomic_fetch_add(&total_signal_count, 1, __ATOMIC_SEQ_CST); +} + +static void test_signals(void) +{ + struct sigaction act; + struct itimerspec it; + timer_t tid; + struct sigevent sev; + + /* Set up SIG handler */ + act.sa_sigaction = sig_alarm; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_SIGINFO; + chk_error(sigaction(SIGRTMIN, &act, NULL)); + + /* Create POSIX timer */ + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGRTMIN; + sev.sigev_value.sival_ptr = &tid; + chk_error(timer_create(CLOCK_REALTIME, &sev, &tid)); + + it.it_interval.tv_sec = 0; + it.it_interval.tv_nsec = 1000000; + it.it_value.tv_sec = 0; + it.it_value.tv_nsec = 1000000; + chk_error(timer_settime(tid, 0, &it, NULL)); + + spawn_threads(); + + do { + usleep(1000); + } while (total_signal_count < 2000); + + printf("shutting down after: %d signals\n", total_signal_count); + + close_threads(); + + chk_error(timer_delete(tid)); +} + +int main(int argc, char **argv) +{ + test_signals(); + return 0; +} diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index a3a751723d..3f283eabe6 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -30,6 +30,8 @@ testthread: LDFLAGS+=-lpthread threadcount: LDFLAGS+=-lpthread +signals: LDFLAGS+=-lrt -lpthread + # We define the runner for test-mmap after the individual # architectures have defined their supported pages sizes. If no # additional page sizes are defined we only run the default test.
This adds a simple signal test that combines the POSIX timer_create with signal delivery across multiple threads. [AJB: So I wrote this in an attempt to flush out issues with the s390x-linux-user handling. However I suspect I've done something wrong or opened a can of signal handling worms. Nominally this runs fine on real hardware but I variously get failures when running it under translation and while debugging QEMU running the test. I've also exposed a shortcomming with the gdb stub when dealing with guest TLS data so yay ;-). So I post this as an RFC in case anyone else can offer insight or can verify they are seeing the same strange behaviour?] Signed-off-by: Alex Bennée <alex.bennee@linaro.org> --- tests/tcg/multiarch/signals.c | 149 ++++++++++++++++++++++++++++ tests/tcg/multiarch/Makefile.target | 2 + 2 files changed, 151 insertions(+) create mode 100644 tests/tcg/multiarch/signals.c -- 2.20.1