diff mbox

[V3,1/2] linux-generic: Correct worker count calculation in tests

Message ID 1456181199-29634-2-git-send-email-gary.robertson@linaro.org
State Superseded
Headers show

Commit Message

gary.robertson@linaro.org Feb. 22, 2016, 10:46 p.m. UTC
During the process of addressing Linaro BUG 2027 which relates to
inaccurate reporting of available CPUs by ODP linux-generic when
running atop a kernel compiled with NO_HZ_FULL support,
a number of instances were encountered in the validation and
performance test software where incorrect methods were used to
determine the proper (or maximum) number of worker threads to create.

The use of odp_cpu_count() for this purpose is incorrect and
deprecated... odp_cpumask_default_worker() should always be used
to determine the set and number of CPUs available for creating
worker threads.

The use of odp_cpu_count() for this purpose in conjunction with the
correct means of determining available CPUs resulted in some tests
hanging in calls to odp_barrier_wait() as the barriers were initialized
to expect threads on ALL CPUs rather than on worker CPUs alone...
and there were too few worker threads created to satisfy the barriers.

The changes below correct all instances I could find of this depecated
technique and allowed all tests to complete successfully with the
BUG 2027 patch applied. (BTW they also run correctly without that patch
after the application of the modifications included here.)

Signed-off-by: Gary S. Robertson <gary.robertson@linaro.org>
---
 test/performance/odp_atomic.c     |  9 ++++++---
 test/validation/cpumask/cpumask.c |  2 +-
 test/validation/shmem/shmem.c     |  5 ++++-
 test/validation/timer/timer.c     | 16 +++++++++-------
 4 files changed, 20 insertions(+), 12 deletions(-)

Comments

Anders Roxell Feb. 23, 2016, 9:13 p.m. UTC | #1
shortlog should say something like:
test: correct worker count calculation

On 2016-02-22 16:46, Gary S. Robertson wrote:
> During the process of addressing Linaro BUG 2027 which relates to

Add a link to the bug in this changelog.

> inaccurate reporting of available CPUs by ODP linux-generic when
> running atop a kernel compiled with NO_HZ_FULL support,

You should add a test that uses CGROUP (should be enough to expose the
bug) that fails with the current code base and passes with this fix.

> a number of instances were encountered in the validation and
> performance test software where incorrect methods were used to
> determine the proper (or maximum) number of worker threads to create.
> 
> The use of odp_cpu_count() for this purpose is incorrect and
> deprecated...

The documentation doesn't say that you should not use this for this
purpose. However, its not actually deprecated right?

> odp_cpumask_default_worker() should always be used
> to determine the set and number of CPUs available for creating
> worker threads.
> 
> The use of odp_cpu_count() for this purpose in conjunction with the
> correct means of determining available CPUs resulted in some tests
> hanging in calls to odp_barrier_wait() as the barriers were initialized
> to expect threads on ALL CPUs rather than on worker CPUs alone...
> and there were too few worker threads created to satisfy the barriers.
> 
> The changes below correct all instances I could find of this depecated
> technique and allowed all tests to complete successfully with the
> BUG 2027 patch applied. (BTW they also run correctly without that patch
> after the application of the modifications included here.)
> 
> Signed-off-by: Gary S. Robertson <gary.robertson@linaro.org>
> ---
>  test/performance/odp_atomic.c     |  9 ++++++---
>  test/validation/cpumask/cpumask.c |  2 +-
>  test/validation/shmem/shmem.c     |  5 ++++-
>  test/validation/timer/timer.c     | 16 +++++++++-------
>  4 files changed, 20 insertions(+), 12 deletions(-)
> 
> diff --git a/test/performance/odp_atomic.c b/test/performance/odp_atomic.c
> index 067329b..7e2e22f 100644
> --- a/test/performance/odp_atomic.c
> +++ b/test/performance/odp_atomic.c
> @@ -303,6 +303,8 @@ int odp_test_thread_exit(pthrd_arg *arg)
>  /** test init globals and call odp_init_global() */
>  int odp_test_global_init(void)
>  {
> +	odp_cpumask_t unused;
> +
>  	memset(thread_tbl, 0, sizeof(thread_tbl));
>  
>  	if (odp_init_global(NULL, NULL)) {
> @@ -310,7 +312,8 @@ int odp_test_global_init(void)
>  		return -1;
>  	}
>  
> -	num_workers = odp_cpu_count();
> +	num_workers = odp_cpumask_default_worker(&unused, 0);
> +
>  	/* force to max CPU count */
>  	if (num_workers > MAX_WORKERS)
>  		num_workers = MAX_WORKERS;
> @@ -378,7 +381,7 @@ int main(int argc, char *argv[])
>  			goto err_exit;
>  		}
>  		if (test_type < TEST_MIX || test_type > TEST_MAX ||
> -		    pthrdnum > odp_cpu_count() || pthrdnum < 0) {
> +		    pthrdnum > num_workers || pthrdnum < 0) {
>  			usage();
>  			goto err_exit;
>  		}
> @@ -386,7 +389,7 @@ int main(int argc, char *argv[])
>  	}
>  
>  	if (pthrdnum == 0)
> -		pthrdnum = odp_cpu_count();
> +		pthrdnum = num_workers;
>  
>  	test_atomic_init();
>  	test_atomic_store();
> diff --git a/test/validation/cpumask/cpumask.c b/test/validation/cpumask/cpumask.c
> index 2419f47..1bb8f8d 100644
> --- a/test/validation/cpumask/cpumask.c
> +++ b/test/validation/cpumask/cpumask.c
> @@ -67,7 +67,7 @@ void cpumask_test_odp_cpumask_def(void)
>  	mask_count = odp_cpumask_count(&mask);
>  	CU_ASSERT(mask_count == num_control);
>  
> -	CU_ASSERT(num_control == 1);
> +	CU_ASSERT(num_control >= 1);

Good catch, belongs in its own patch.

>  	CU_ASSERT(num_worker <= available_cpus);
>  	CU_ASSERT(num_worker > 0);
>  }
> diff --git a/test/validation/shmem/shmem.c b/test/validation/shmem/shmem.c
> index 08425e6..be8bd6f 100644
> --- a/test/validation/shmem/shmem.c
> +++ b/test/validation/shmem/shmem.c
> @@ -6,6 +6,8 @@
>  
>  #include <odp.h>
>  #include <odp_cunit_common.h>
> +#include <odp/cpumask.h>
> +

Not needed.

>  #include "shmem.h"
>  
>  #define ALIGE_SIZE  (128)
> @@ -52,6 +54,7 @@ void shmem_test_odp_shm_sunnyday(void)
>  	pthrd_arg thrdarg;
>  	odp_shm_t shm;
>  	test_shared_data_t *test_shared_data;
> +	odp_cpumask_t unused;
>  
>  	shm = odp_shm_reserve(TESTNAME,
>  			      sizeof(test_shared_data_t), ALIGE_SIZE, 0);
> @@ -70,7 +73,7 @@ void shmem_test_odp_shm_sunnyday(void)
>  	test_shared_data->foo = TEST_SHARE_FOO;
>  	test_shared_data->bar = TEST_SHARE_BAR;
>  
> -	thrdarg.numthrds = odp_cpu_count();
> +	thrdarg.numthrds = odp_cpumask_default_worker(&unused, 0);
>  
>  	if (thrdarg.numthrds > MAX_WORKERS)
>  		thrdarg.numthrds = MAX_WORKERS;
> diff --git a/test/validation/timer/timer.c b/test/validation/timer/timer.c
> index 004670a..bee3e8d 100644
> --- a/test/validation/timer/timer.c
> +++ b/test/validation/timer/timer.c
> @@ -15,6 +15,7 @@
>  
>  #include <time.h>
>  #include <odp.h>
> +#include <odp/helper/linux.h>
>  #include "odp_cunit_common.h"
>  #include "test_debug.h"
>  #include "timer.h"
> @@ -41,12 +42,6 @@ static odp_atomic_u32_t ndelivtoolate;
>   * caches may make this number lower than the capacity of the pool  */
>  static odp_atomic_u32_t timers_allocated;
>  
> -/** @private min() function */
> -static int min(int a, int b)
> -{
> -	return a < b ? a : b;
> -}
> -
>  /* @private Timer helper structure */
>  struct test_timer {
>  	odp_timer_t tim; /* Timer handle */
> @@ -457,10 +452,17 @@ void timer_test_odp_timer_all(void)
>  	int rc;
>  	odp_pool_param_t params;
>  	odp_timer_pool_param_t tparam;
> +	odp_cpumask_t unused;
> +
>  	/* Reserve at least one core for running other processes so the timer
>  	 * test hopefully can run undisturbed and thus get better timing
>  	 * results. */
> -	int num_workers = min(odp_cpu_count() - 1, MAX_WORKERS);
> +	int num_workers = odp_cpumask_default_worker(&unused, 0);
> +
> +	/* force to max CPU count */
> +	if (num_workers > MAX_WORKERS)
> +		num_workers = MAX_WORKERS;
> +

Why do we need MAX_WORKERS when we asks for odp_cpumask_default_worker?

Cheers,
Anders
diff mbox

Patch

diff --git a/test/performance/odp_atomic.c b/test/performance/odp_atomic.c
index 067329b..7e2e22f 100644
--- a/test/performance/odp_atomic.c
+++ b/test/performance/odp_atomic.c
@@ -303,6 +303,8 @@  int odp_test_thread_exit(pthrd_arg *arg)
 /** test init globals and call odp_init_global() */
 int odp_test_global_init(void)
 {
+	odp_cpumask_t unused;
+
 	memset(thread_tbl, 0, sizeof(thread_tbl));
 
 	if (odp_init_global(NULL, NULL)) {
@@ -310,7 +312,8 @@  int odp_test_global_init(void)
 		return -1;
 	}
 
-	num_workers = odp_cpu_count();
+	num_workers = odp_cpumask_default_worker(&unused, 0);
+
 	/* force to max CPU count */
 	if (num_workers > MAX_WORKERS)
 		num_workers = MAX_WORKERS;
@@ -378,7 +381,7 @@  int main(int argc, char *argv[])
 			goto err_exit;
 		}
 		if (test_type < TEST_MIX || test_type > TEST_MAX ||
-		    pthrdnum > odp_cpu_count() || pthrdnum < 0) {
+		    pthrdnum > num_workers || pthrdnum < 0) {
 			usage();
 			goto err_exit;
 		}
@@ -386,7 +389,7 @@  int main(int argc, char *argv[])
 	}
 
 	if (pthrdnum == 0)
-		pthrdnum = odp_cpu_count();
+		pthrdnum = num_workers;
 
 	test_atomic_init();
 	test_atomic_store();
diff --git a/test/validation/cpumask/cpumask.c b/test/validation/cpumask/cpumask.c
index 2419f47..1bb8f8d 100644
--- a/test/validation/cpumask/cpumask.c
+++ b/test/validation/cpumask/cpumask.c
@@ -67,7 +67,7 @@  void cpumask_test_odp_cpumask_def(void)
 	mask_count = odp_cpumask_count(&mask);
 	CU_ASSERT(mask_count == num_control);
 
-	CU_ASSERT(num_control == 1);
+	CU_ASSERT(num_control >= 1);
 	CU_ASSERT(num_worker <= available_cpus);
 	CU_ASSERT(num_worker > 0);
 }
diff --git a/test/validation/shmem/shmem.c b/test/validation/shmem/shmem.c
index 08425e6..be8bd6f 100644
--- a/test/validation/shmem/shmem.c
+++ b/test/validation/shmem/shmem.c
@@ -6,6 +6,8 @@ 
 
 #include <odp.h>
 #include <odp_cunit_common.h>
+#include <odp/cpumask.h>
+
 #include "shmem.h"
 
 #define ALIGE_SIZE  (128)
@@ -52,6 +54,7 @@  void shmem_test_odp_shm_sunnyday(void)
 	pthrd_arg thrdarg;
 	odp_shm_t shm;
 	test_shared_data_t *test_shared_data;
+	odp_cpumask_t unused;
 
 	shm = odp_shm_reserve(TESTNAME,
 			      sizeof(test_shared_data_t), ALIGE_SIZE, 0);
@@ -70,7 +73,7 @@  void shmem_test_odp_shm_sunnyday(void)
 	test_shared_data->foo = TEST_SHARE_FOO;
 	test_shared_data->bar = TEST_SHARE_BAR;
 
-	thrdarg.numthrds = odp_cpu_count();
+	thrdarg.numthrds = odp_cpumask_default_worker(&unused, 0);
 
 	if (thrdarg.numthrds > MAX_WORKERS)
 		thrdarg.numthrds = MAX_WORKERS;
diff --git a/test/validation/timer/timer.c b/test/validation/timer/timer.c
index 004670a..bee3e8d 100644
--- a/test/validation/timer/timer.c
+++ b/test/validation/timer/timer.c
@@ -15,6 +15,7 @@ 
 
 #include <time.h>
 #include <odp.h>
+#include <odp/helper/linux.h>
 #include "odp_cunit_common.h"
 #include "test_debug.h"
 #include "timer.h"
@@ -41,12 +42,6 @@  static odp_atomic_u32_t ndelivtoolate;
  * caches may make this number lower than the capacity of the pool  */
 static odp_atomic_u32_t timers_allocated;
 
-/** @private min() function */
-static int min(int a, int b)
-{
-	return a < b ? a : b;
-}
-
 /* @private Timer helper structure */
 struct test_timer {
 	odp_timer_t tim; /* Timer handle */
@@ -457,10 +452,17 @@  void timer_test_odp_timer_all(void)
 	int rc;
 	odp_pool_param_t params;
 	odp_timer_pool_param_t tparam;
+	odp_cpumask_t unused;
+
 	/* Reserve at least one core for running other processes so the timer
 	 * test hopefully can run undisturbed and thus get better timing
 	 * results. */
-	int num_workers = min(odp_cpu_count() - 1, MAX_WORKERS);
+	int num_workers = odp_cpumask_default_worker(&unused, 0);
+
+	/* force to max CPU count */
+	if (num_workers > MAX_WORKERS)
+		num_workers = MAX_WORKERS;
+
 	/* On a single-CPU machine run at least one thread */
 	if (num_workers < 1)
 		num_workers = 1;