From patchwork Sun Nov 15 18:40:57 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Daniel Wagner X-Patchwork-Id: 324918 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-12.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS, INCLUDES_PATCH, MAILING_LIST_MULTI, SIGNED_OFF_BY, SPF_HELO_NONE, SPF_PASS, USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 32503C61DD8 for ; Sun, 15 Nov 2020 18:41:11 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id EFF8622450 for ; Sun, 15 Nov 2020 18:41:10 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727340AbgKOSlK (ORCPT ); Sun, 15 Nov 2020 13:41:10 -0500 Received: from mx2.suse.de ([195.135.220.15]:54436 "EHLO mx2.suse.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726817AbgKOSlK (ORCPT ); Sun, 15 Nov 2020 13:41:10 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.221.27]) by mx2.suse.de (Postfix) with ESMTP id 26C37AC90; Sun, 15 Nov 2020 18:41:09 +0000 (UTC) From: Daniel Wagner To: Clark Williams , John Kacur Cc: linux-rt-users@vger.kernel.org, Sebastian Andrzej Siewior , Daniel Wagner Subject: [rt-tests v1 1/3] rt-numa: Move thread placement code to rt-numa library Date: Sun, 15 Nov 2020 19:40:57 +0100 Message-Id: <20201115184059.7286-2-dwagner@suse.de> X-Mailer: git-send-email 2.29.2 In-Reply-To: <20201115184059.7286-1-dwagner@suse.de> References: <20201115184059.7286-1-dwagner@suse.de> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rt-users@vger.kernel.org cyclictest contains code for calculating where to place threads accoring the cpumask. Let's move it the rt-numa library to be able to reuse it. Signed-off-by: Daniel Wagner --- src/cyclictest/cyclictest.c | 98 +++---------------------------------- src/include/rt-numa.h | 12 +++++ src/lib/rt-numa.c | 78 +++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+), 90 deletions(-) diff --git a/src/cyclictest/cyclictest.c b/src/cyclictest/cyclictest.c index f10f064f7a8e..0a797c540531 100644 --- a/src/cyclictest/cyclictest.c +++ b/src/cyclictest/cyclictest.c @@ -893,12 +893,6 @@ static int interval = DEFAULT_INTERVAL; static int distance = -1; static struct bitmask *affinity_mask = NULL; static int smp = 0; - -enum { - AFFINITY_UNSPECIFIED, - AFFINITY_SPECIFIED, - AFFINITY_USEALL -}; static int setaffinity = AFFINITY_UNSPECIFIED; static int clocksources[] = { @@ -906,72 +900,6 @@ static int clocksources[] = { CLOCK_REALTIME, }; -/* Get available cpus according to getaffinity or according to the - * intersection of getaffinity and the user specified affinity - * in the case of AFFINITY_SPECIFIED, the function has to be called - * after the call to parse_cpumask made in process_options() - */ -static int get_available_cpus(void) -{ - if (affinity_mask) - return numa_bitmask_weight(affinity_mask); - - return numa_num_task_cpus(); -} - -/* cpu_for_thread AFFINITY_SPECIFIED */ -static int cpu_for_thread_sp(int thread_num, int max_cpus) -{ - unsigned int m, cpu, i, num_cpus; - - num_cpus = rt_numa_bitmask_count(affinity_mask); - - if (num_cpus == 0) - fatal("No allowable cpus to run on\n"); - - m = thread_num % num_cpus; - - /* there are num_cpus bits set, we want position of m'th one */ - for (i = 0, cpu = 0; i < max_cpus; i++) { - if (rt_numa_bitmask_isbitset(affinity_mask, i)) { - if (cpu == m) - return i; - cpu++; - } - } - fprintf(stderr, "Bug in cpu mask handling code.\n"); - return 0; -} - -/* cpu_for_thread AFFINITY_USEALL */ -static int cpu_for_thread_ua(int thread_num, int max_cpus) -{ - int res, num_cpus, i, m, cpu; - pthread_t thread; - cpu_set_t cpuset; - - thread = pthread_self(); - CPU_ZERO(&cpuset); - - res = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset); - if (res != 0) - fatal("pthread_getaffinity_np failed: %s\n", strerror(res)); - - num_cpus = CPU_COUNT(&cpuset); - m = thread_num % num_cpus; - - for (i = 0, cpu = 0; i < max_cpus; i++) { - if (CPU_ISSET(i, &cpuset)) { - if (cpu == m) - return i; - cpu++; - } - } - - fprintf(stderr, "Bug in cpu mask handling code.\n"); - return 0; -} - static void handlepolicy(char *polname) { if (strncasecmp(polname, "other", 5) == 0) @@ -1027,20 +955,6 @@ enum option_values { OPT_TRACEMARK, OPT_POSIX_TIMERS, }; -/* numa_available() must be called before any other calls to the numa library */ -static void numa_initialize(void) -{ - static int is_initialized; - - if (is_initialized == 1) - return; - - if (numa_available() != -1) - numa = 1; - - is_initialized = 1; -} - /* Process commandline options */ static void process_options(int argc, char *argv[], int max_cpus) { @@ -1104,7 +1018,9 @@ static void process_options(int argc, char *argv[], int max_cpus) /* smp sets AFFINITY_USEALL in OPT_SMP */ if (smp) break; - numa_initialize(); + if (numa_initialize()) + fatal("Couldn't initilize libnuma"); + numa = 1; if (optarg) { parse_cpumask(optarg, max_cpus, &affinity_mask); setaffinity = AFFINITY_SPECIFIED; @@ -1285,7 +1201,9 @@ static void process_options(int argc, char *argv[], int max_cpus) /* if smp wasn't requested, test for numa automatically */ if (!smp) { - numa_initialize(); + if (numa_initialize()) + fatal("Couldn't initilize libnuma"); + numa = 1; if (setaffinity == AFFINITY_UNSPECIFIED) setaffinity = AFFINITY_USEALL; } @@ -1330,7 +1248,7 @@ static void process_options(int argc, char *argv[], int max_cpus) error = 1; if (num_threads == -1) - num_threads = get_available_cpus(); + num_threads = get_available_cpus(affinity_mask); if (priospread && priority == 0) { fprintf(stderr, "defaulting realtime priority to %d\n", @@ -1998,7 +1916,7 @@ int main(int argc, char **argv) switch (setaffinity) { case AFFINITY_UNSPECIFIED: cpu = -1; break; case AFFINITY_SPECIFIED: - cpu = cpu_for_thread_sp(i, max_cpus); + cpu = cpu_for_thread_sp(i, max_cpus, affinity_mask); if (verbose) printf("Thread %d using cpu %d.\n", i, cpu); break; diff --git a/src/include/rt-numa.h b/src/include/rt-numa.h index 047c8b6257cc..ca86a45dab3a 100644 --- a/src/include/rt-numa.h +++ b/src/include/rt-numa.h @@ -4,6 +4,18 @@ #include +enum { + AFFINITY_UNSPECIFIED, + AFFINITY_SPECIFIED, + AFFINITY_USEALL +}; + +int numa_initialize(void); + +int get_available_cpus(struct bitmask *cpumask); +int cpu_for_thread_sp(int thread_num, int max_cpus, struct bitmask *cpumask); +int cpu_for_thread_ua(int thread_num, int max_cpus); + int parse_cpumask(char *str, int max_cpus, struct bitmask **cpumask); #endif diff --git a/src/lib/rt-numa.c b/src/lib/rt-numa.c index a52a56e8aadd..76f8bd2f0ebe 100644 --- a/src/lib/rt-numa.c +++ b/src/lib/rt-numa.c @@ -6,9 +6,87 @@ #include #include #include +#include +#include +#include "error.h" #include "rt-numa.h" +/* numa_available() must be called before any other calls to the numa library */ +int numa_initialize(void) +{ + static int is_initialized; + + if (is_initialized == 1) + return 0; + + if (numa_available() == -1) + return -1; + + is_initialized = 1; + return 0; +} + +int get_available_cpus(struct bitmask *cpumask) +{ + if (cpumask) + return numa_bitmask_weight(cpumask); + + return numa_num_task_cpus(); +} + +int cpu_for_thread_sp(int thread_num, int max_cpus, struct bitmask *cpumask) +{ + unsigned int m, cpu, i, num_cpus; + + num_cpus = numa_bitmask_weight(cpumask); + + if (num_cpus == 0) + fatal("No allowable cpus to run on\n"); + + m = thread_num % num_cpus; + + /* there are num_cpus bits set, we want position of m'th one */ + for (i = 0, cpu = 0; i < max_cpus; i++) { + if (numa_bitmask_isbitset(cpumask, i)) { + if (cpu == m) + return i; + cpu++; + } + } + fprintf(stderr, "Bug in cpu mask handling code.\n"); + return 0; +} + +/* cpu_for_thread AFFINITY_USEALL */ +int cpu_for_thread_ua(int thread_num, int max_cpus) +{ + int res, num_cpus, i, m, cpu; + pthread_t thread; + cpu_set_t cpuset; + + thread = pthread_self(); + CPU_ZERO(&cpuset); + + res = pthread_getaffinity_np(thread, sizeof(cpu_set_t), &cpuset); + if (res != 0) + fatal("pthread_getaffinity_np failed: %s\n", strerror(res)); + + num_cpus = CPU_COUNT(&cpuset); + m = thread_num % num_cpus; + + for (i = 0, cpu = 0; i < max_cpus; i++) { + if (CPU_ISSET(i, &cpuset)) { + if (cpu == m) + return i; + cpu++; + } + } + + fprintf(stderr, "Bug in cpu mask handling code.\n"); + return 0; +} + /* * After this function is called, affinity_mask is the intersection of * the user supplied affinity mask and the affinity mask from the run From patchwork Sun Nov 15 18:40:58 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Daniel Wagner X-Patchwork-Id: 324917 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-12.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS, INCLUDES_PATCH, MAILING_LIST_MULTI, SIGNED_OFF_BY, SPF_HELO_NONE, SPF_PASS, USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5A8A9C2D0E4 for ; Sun, 15 Nov 2020 18:41:12 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 2B75422450 for ; Sun, 15 Nov 2020 18:41:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727359AbgKOSlL (ORCPT ); Sun, 15 Nov 2020 13:41:11 -0500 Received: from mx2.suse.de ([195.135.220.15]:54450 "EHLO mx2.suse.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726741AbgKOSlL (ORCPT ); Sun, 15 Nov 2020 13:41:11 -0500 X-Virus-Scanned: by amavisd-new at test-mx.suse.de Received: from relay2.suse.de (unknown [195.135.221.27]) by mx2.suse.de (Postfix) with ESMTP id 688FBAC9A; Sun, 15 Nov 2020 18:41:09 +0000 (UTC) From: Daniel Wagner To: Clark Williams , John Kacur Cc: linux-rt-users@vger.kernel.org, Sebastian Andrzej Siewior , Daniel Wagner Subject: [rt-tests v1 2/3] signaltest: Implement thread placing Date: Sun, 15 Nov 2020 19:40:58 +0100 Message-Id: <20201115184059.7286-3-dwagner@suse.de> X-Mailer: git-send-email 2.29.2 In-Reply-To: <20201115184059.7286-1-dwagner@suse.de> References: <20201115184059.7286-1-dwagner@suse.de> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-rt-users@vger.kernel.org Without setting the thread affinity, the scheduler will move the threads around which will lead to spikes. Since any proper realtime application will use thread affinity, let's pin down the threads to CPUs. Signed-off-by: Daniel Wagner --- Makefile | 4 +- src/signaltest/signaltest.8 | 22 ++++++- src/signaltest/signaltest.c | 116 ++++++++++++++++++++++++++++++++++-- 3 files changed, 133 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 3afdfd4d53a7..a410cb24cf34 100644 --- a/Makefile +++ b/Makefile @@ -127,8 +127,8 @@ cyclicdeadline: $(OBJDIR)/cyclicdeadline.o $(OBJDIR)/librttest.a deadline_test: $(OBJDIR)/deadline_test.o $(OBJDIR)/librttest.a $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(RTTESTLIB) -signaltest: $(OBJDIR)/signaltest.o $(OBJDIR)/librttest.a - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(RTTESTLIB) +signaltest: $(OBJDIR)/signaltest.o $(OBJDIR)/librttest.a $(OBJDIR)/librttestnuma.a + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(RTTESTLIB) $(RTTESTNUMA) pi_stress: $(OBJDIR)/pi_stress.o $(OBJDIR)/librttest.a $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< $(LIBS) $(RTTESTLIB) diff --git a/src/signaltest/signaltest.8 b/src/signaltest/signaltest.8 index dc3eaab2293a..5ce119b54461 100644 --- a/src/signaltest/signaltest.8 +++ b/src/signaltest/signaltest.8 @@ -1,15 +1,28 @@ .\" -.TH SIGNALTEST 8 "September 18, 2020" +.TH SIGNALTEST 8 "November 15, 2020" .\" Please adjust this date whenever updating this manpage .SH NAME signaltest \- signal roundtrip test software .SH SYNOPSIS .LP -signaltest [ -b|--backtrace USEC ] [-D|--duration TIME] [-h|--help] [-l|--loops LOOPS ] [-p|--prio PRIO] [-q|--quiet] [-t|--threads NUM] [-m|--mlockall ] [-v|--verbose ] +signaltest [ -a|--affinity NUM] [ -b|--backtrace USEC ] [-D|--duration TIME] [-h|--help] [-l|--loops LOOPS ] [-p|--prio PRIO] [-q|--quiet] [-S|--smp] [-t|--threads NUM] [-m|--mlockall ] [-v|--verbose ] .SH OPTIONS These programs follow the usual GNU command line syntax, with long options starting with two dashes ('\-\-'). .TP +.B \-a, \-\-affinity[=PROC-SET] +Run threads on the set of processors given by PROC-SET. If PROC-SET is not +specified, all processors will be used. Threads will be assigned to processors +in the set in numeric order, in a round\-robin fashion. +.br +The set of processors can be specified as A,B,C, or A-C, or A-B,D-F, and so on*. +The ! character can be used to negate a set. For example, !B-D means to use all +available CPUs except B through D. The cpu numbers are the same as shown in the +.I processor +field in /proc/cpuinfo. See numa(3) for more information on specifying CPU sets. +* Support for CPU sets requires libnuma version >= 2. For libnuma v1, PROC-SET, +if specified, must be a single CPU number. +.TP .B \-b, \-\-breaktrace=USEC Send break trace command when latency > USEC .TP @@ -31,6 +44,11 @@ Priority of highest priority thread .B \-q, \-\-quiet print a summary only on exit .TP +.B \\-S, \-\-smp +Set options for standard testing on SMP systems. Equivalent to using +the options: "\-t \-a" as well keeping any specified priority +equal across all threads +.TP .B \-t, \-\-threads=NUM number of threads: default=2 .TP diff --git a/src/signaltest/signaltest.c b/src/signaltest/signaltest.c index dacaa63673c4..e19877a395ba 100644 --- a/src/signaltest/signaltest.c +++ b/src/signaltest/signaltest.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include @@ -31,6 +33,7 @@ #include "error.h" #include "rt-utils.h" +#include "rt-numa.h" /* Must be power of 2 ! */ #define VALBUF_SIZE 16384 @@ -43,6 +46,7 @@ struct thread_param { unsigned long max_cycles; struct thread_stat *stats; int bufmsk; + int cpu; }; /* Struct for statistics */ @@ -79,9 +83,20 @@ void *signalthread(void *param) int policy = par->prio ? SCHED_FIFO : SCHED_OTHER; int stopped = 0; int first = 1; + pthread_t thread; + cpu_set_t mask; stat->tid = gettid(); + if (par->cpu != -1) { + CPU_ZERO(&mask); + CPU_SET(par->cpu, &mask); + thread = pthread_self(); + if (pthread_setaffinity_np(thread, sizeof(mask), &mask) != 0) + warn("Could not set CPU affinity to CPU #%d\n", + par->cpu); + } + sigemptyset(&sigset); sigaddset(&sigset, par->signal); sigprocmask(SIG_BLOCK, &sigset, NULL); @@ -164,6 +179,8 @@ static void display_help(int error) printf("signaltest V %1.2f\n", VERSION); printf("Usage:\n" "signaltest \n\n" + "-a [NUM] --affinity run thread #N on processor #N, if possible\n" + " with NUM pin all threads to the processor NUM\n" "-b USEC --breaktrace=USEC send break trace command when latency > USEC\n" "-D --duration=TIME specify a length for the test run.\n" " Append 'm', 'h', or 'd' to specify minutes, hours or\n" @@ -187,16 +204,22 @@ static int duration; static int verbose; static int quiet; static int lockall; +static struct bitmask *affinity_mask = NULL; +static int smp = 0; +static int numa = 0; +static int setaffinity = AFFINITY_UNSPECIFIED; /* Process commandline options */ -static void process_options(int argc, char *argv[]) +static void process_options(int argc, char *argv[], unsigned int max_cpus) { + int option_affinity = 0; int error = 0; for (;;) { int option_index = 0; /** Options for getopt */ static struct option long_options[] = { + {"affinity", optional_argument, NULL, 'a'}, {"breaktrace", required_argument, NULL, 'b'}, {"duration", required_argument, NULL, 'D'}, {"help", no_argument, NULL, 'h'}, @@ -204,15 +227,43 @@ static void process_options(int argc, char *argv[]) {"mlockall", no_argument, NULL, 'm'}, {"priority", required_argument, NULL, 'p'}, {"quiet", no_argument, NULL, 'q'}, + {"smp", no_argument, NULL, 'S'}, {"threads", required_argument, NULL, 't'}, {"verbose", no_argument, NULL, 'v'}, {NULL, 0, NULL, 0} }; - int c = getopt_long(argc, argv, "b:D:hl:mp:qt:v", + int c = getopt_long(argc, argv, "a::b:D:hl:mp:qSt:v", long_options, &option_index); if (c == -1) break; switch (c) { + case 'a': + option_affinity = 1; + /* smp sets AFFINITY_USEALL in OPT_SMP */ + if (smp) + break; + if (numa_initialize()) + fatal("Couldn't initilize libnuma"); + numa = 1; + if (optarg) { + parse_cpumask(optarg, max_cpus, &affinity_mask); + setaffinity = AFFINITY_SPECIFIED; + } else if (optind < argc && + (atoi(argv[optind]) || + argv[optind][0] == '0' || + argv[optind][0] == '!')) { + parse_cpumask(argv[optind], max_cpus, &affinity_mask); + setaffinity = AFFINITY_SPECIFIED; + } else { + setaffinity = AFFINITY_USEALL; + } + + if (setaffinity == AFFINITY_SPECIFIED && !affinity_mask) + display_help(1); + if (verbose) + printf("Using %u cpus.\n", + numa_bitmask_weight(affinity_mask)); + break; case 'b': tracelimit = atoi(optarg); break; case 'D': duration = parse_time_string(optarg); break; case '?': @@ -221,6 +272,13 @@ static void process_options(int argc, char *argv[]) case 'm': lockall = 1; break; case 'p': priority = atoi(optarg); break; case 'q': quiet = 1; break; + case 'S': + if (numa) + fatal("numa and smp options are mutually exclusive\n"); + smp = 1; + num_threads = -1; /* update after parsing */ + setaffinity = AFFINITY_USEALL; + break; case 't': num_threads = atoi(optarg); break; case 'v': verbose = 1; break; } @@ -232,11 +290,31 @@ static void process_options(int argc, char *argv[]) if (priority < 0 || priority > 99) error = 1; + if (num_threads == -1) + num_threads = get_available_cpus(affinity_mask); + if (num_threads < 2) error = 1; - if (error) + /* if smp wasn't requested, test for numa automatically */ + if (!smp) { + if (numa_initialize()) + fatal("Couldn't initilize libnuma"); + numa = 1; + if (setaffinity == AFFINITY_UNSPECIFIED) + setaffinity = AFFINITY_USEALL; + } + + if (option_affinity) { + if (smp) + warn("-a ignored due to smp mode\n"); + } + + if (error) { + if (affinity_mask) + numa_bitmask_free(affinity_mask); display_help(error); + } } static void sighand(int sig) @@ -273,9 +351,10 @@ int main(int argc, char **argv) struct thread_param *par; struct thread_stat *stat; int i, ret = -1; - int status; + int status, cpu; + int max_cpus = sysconf(_SC_NPROCESSORS_ONLN); - process_options(argc, argv); + process_options(argc, argv, max_cpus); if (check_privs()) exit(1); @@ -287,6 +366,16 @@ int main(int argc, char **argv) goto out; } + /* Restrict the main pid to the affinity specified by the user */ + if (affinity_mask != NULL) { + int res; + + errno = 0; + res = numa_sched_setaffinity(getpid(), affinity_mask); + if (res != 0) + warn("Couldn't setaffinity in main thread: %s\n", strerror(errno)); + } + sigemptyset(&sigset); sigaddset(&sigset, signum); sigprocmask(SIG_BLOCK, &sigset, NULL); @@ -313,6 +402,22 @@ int main(int argc, char **argv) par[i].bufmsk = VALBUF_SIZE - 1; } + switch (setaffinity) { + case AFFINITY_UNSPECIFIED: + cpu = -1; + break; + case AFFINITY_SPECIFIED: + cpu = cpu_for_thread_sp(i, max_cpus, affinity_mask); + if (verbose) + printf("Thread %d using cpu %d.\n", i, cpu); + break; + case AFFINITY_USEALL: + cpu = cpu_for_thread_ua(i, max_cpus); + break; + default: + cpu = -1; + } + par[i].id = i; par[i].prio = priority; #if 0 @@ -322,6 +427,7 @@ int main(int argc, char **argv) par[i].signal = signum; par[i].max_cycles = max_cycles; par[i].stats = &stat[i]; + par[i].cpu = cpu; stat[i].min = 1000000; stat[i].max = -1000000; stat[i].avg = 0.0;