@@ -5,5 +5,5 @@ LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
CFLAGS += -Wall -I$(LINUX_HDR_PATH)
-TEST_GEN_PROGS := sud_test
+TEST_GEN_PROGS := sud_test sud_benchmark
include ../lib.mk
new file mode 100644
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 Collabora Ltd.
+ *
+ * Benchmark and test syscall user dispatch
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/sysinfo.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#ifndef PR_SET_SYSCALL_USER_DISPATCH
+# define PR_SET_SYSCALL_USER_DISPATCH 59
+# define PR_SYS_DISPATCH_OFF 0
+# define PR_SYS_DISPATCH_ON 1
+#endif
+
+#ifdef __NR_syscalls
+# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
+#else
+# define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */
+#endif
+
+/*
+ * To test returning from a sigsys with selector blocked, the test
+ * requires some per-architecture support (i.e. knowledge about the
+ * signal trampoline address). On i386, we know it is on the vdso, and
+ * a small trampoline is open-coded for x86_64. Other architectures
+ * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN
+ * out of the box, but don't enable them until they support syscall user
+ * dispatch.
+ */
+#if defined(__x86_64__) || defined(__i386__)
+#define TEST_BLOCKED_RETURN
+#endif
+
+#ifdef __x86_64__
+void* (syscall_dispatcher_start)(void);
+void* (syscall_dispatcher_end)(void);
+#else
+unsigned long syscall_dispatcher_start = 0;
+unsigned long syscall_dispatcher_end = 0;
+#endif
+
+unsigned long trapped_call_count = 0;
+unsigned long native_call_count = 0;
+
+char selector;
+#define SYSCALL_BLOCK (selector = PR_SYS_DISPATCH_ON)
+#define SYSCALL_UNBLOCK (selector = PR_SYS_DISPATCH_OFF)
+
+#define CALIBRATION_STEP 100000
+#define CALIBRATE_TO_SECS 5
+int factor;
+
+static double one_sysinfo_step(void)
+{
+ struct timespec t1, t2;
+ int i;
+ struct sysinfo info;
+
+ clock_gettime(CLOCK_MONOTONIC, &t1);
+ for (i = 0; i < CALIBRATION_STEP; i++)
+ sysinfo(&info);
+ clock_gettime(CLOCK_MONOTONIC, &t2);
+ return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec);
+}
+
+static void calibrate_set(void)
+{
+ double elapsed = 0;
+
+ printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS);
+
+ while (elapsed < 1) {
+ elapsed += one_sysinfo_step();
+ factor += CALIBRATE_TO_SECS;
+ }
+
+ printf("test iterations = %d\n", CALIBRATION_STEP * factor);
+}
+
+static double perf_syscall(void)
+{
+ unsigned int i;
+ double partial = 0;
+
+ for (i = 0; i < factor; ++i)
+ partial += one_sysinfo_step()/(CALIBRATION_STEP*factor);
+ return partial;
+}
+
+static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
+{
+ char buf[1024];
+ int len;
+
+ SYSCALL_UNBLOCK;
+
+ /* printf and friends are not signal-safe. */
+ len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall);
+ write(1, buf, len);
+
+ if (info->si_syscall == MAGIC_SYSCALL_1)
+ trapped_call_count++;
+ else
+ native_call_count++;
+
+#ifdef TEST_BLOCKED_RETURN
+ SYSCALL_BLOCK;
+#endif
+
+#ifdef __x86_64__
+ __asm__ volatile("movq $0xf, %rax");
+ __asm__ volatile("leaveq");
+ __asm__ volatile("add $0x8, %rsp");
+ __asm__ volatile("syscall_dispatcher_start:");
+ __asm__ volatile("syscall");
+ __asm__ volatile("nop"); /* Landing pad within dispatcher area */
+ __asm__ volatile("syscall_dispatcher_end:");
+#endif
+
+}
+
+int main(void)
+{
+ struct sigaction act;
+ double time1, time2;
+ int ret;
+ sigset_t mask;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+
+ act.sa_sigaction = handle_sigsys;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_mask = mask;
+
+ calibrate_set();
+
+ time1 = perf_syscall();
+ printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9);
+
+ ret = sigaction(SIGSYS, &act, NULL);
+ if (ret) {
+ perror("Error sigaction:");
+ exit(-1);
+ }
+
+ fprintf(stderr, "Enabling syscall trapping.\n");
+
+ if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON,
+ syscall_dispatcher_start,
+ (syscall_dispatcher_end - syscall_dispatcher_start + 1),
+ &selector)) {
+ perror("prctl failed\n");
+ exit(-1);
+ }
+
+ SYSCALL_BLOCK;
+ syscall(MAGIC_SYSCALL_1);
+
+#ifdef TEST_BLOCKED_RETURN
+ if (selector == PR_SYS_DISPATCH_OFF) {
+ fprintf(stderr, "Failed to return with selector blocked.\n");
+ exit(-1);
+ }
+#endif
+
+ SYSCALL_UNBLOCK;
+
+ if (!trapped_call_count) {
+ fprintf(stderr, "syscall trapping does not work.\n");
+ exit(-1);
+ }
+
+ time2 = perf_syscall();
+
+ if (native_call_count) {
+ perror("syscall trapping intercepted more syscalls than expected\n");
+ exit(-1);
+ }
+
+ printf("trapped_call_count %lu, native_call_count %lu.\n",
+ trapped_call_count, native_call_count);
+ printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9);
+ printf("Interception overhead: %.1lf%% (+%.0lfns).\n",
+ 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1));
+ return 0;
+
+}