diff mbox

test: odp_sched_latency: robust draining of queues

Message ID 20170424205853.7145-1-brian.brooks@arm.com
State Accepted
Commit 8c35a494023c7e5de127ed6de8ed8562b36f3cac
Headers show

Commit Message

Brian Brooks April 24, 2017, 8:58 p.m. UTC
From: Ola Liljedahl <ola.liljedahl@arm.com>


In order to robustly drain all queues when the benchmark has
ended, we enqueue a special event on every queue and invoke
the scheduler until all such events have been received.

Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

Reviewed-by: Brian Brooks <brian.brooks@arm.com>

---
 test/common_plat/performance/odp_sched_latency.c | 51 ++++++++++++++++++------
 1 file changed, 38 insertions(+), 13 deletions(-)

-- 
2.12.2

Comments

Bill Fischofer April 24, 2017, 10 p.m. UTC | #1
On Mon, Apr 24, 2017 at 3:58 PM, Brian Brooks <brian.brooks@arm.com> wrote:

> From: Ola Liljedahl <ola.liljedahl@arm.com>

>

> In order to robustly drain all queues when the benchmark has

> ended, we enqueue a special event on every queue and invoke

> the scheduler until all such events have been received.

>

> Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

> Reviewed-by: Brian Brooks <brian.brooks@arm.com>

> ---

>  test/common_plat/performance/odp_sched_latency.c | 51

> ++++++++++++++++++------

>  1 file changed, 38 insertions(+), 13 deletions(-)

>

> diff --git a/test/common_plat/performance/odp_sched_latency.c

> b/test/common_plat/performance/odp_sched_latency.c

> index 2b28cd7b..7ba99fc6 100644

> --- a/test/common_plat/performance/odp_sched_latency.c

> +++ b/test/common_plat/performance/odp_sched_latency.c

> @@ -57,9 +57,10 @@ ODP_STATIC_ASSERT(LO_PRIO_QUEUES <= MAX_QUEUES, "Too

> many LO priority queues");

>

>  /** Test event types */

>  typedef enum {

> -       WARM_UP, /**< Warm up event */

> -       TRAFFIC, /**< Event used only as traffic load */

> -       SAMPLE   /**< Event used to measure latency */

> +       WARM_UP,  /**< Warm up event */

> +       COOL_DOWN,/**< Last event on queue */

> +       TRAFFIC,  /**< Event used only as traffic load */

> +       SAMPLE    /**< Event used to measure latency */

>  } event_type_t;

>

>  /** Test event */

> @@ -114,16 +115,40 @@ typedef struct {

>   *

>   * Retry to be sure that all buffers have been scheduled.

>   */

> -static void clear_sched_queues(void)

> +static void clear_sched_queues(test_globals_t *globals)

>  {

>         odp_event_t ev;

> +       odp_buffer_t buf;

> +       test_event_t *event;

> +       int i, j;

> +       uint32_t numtogo = 0;

>


int numtogo would be more standard here, and consistent with i, j
immediately above.


>

> -       while (1) {

> -               ev = odp_schedule(NULL, ODP_SCHED_NO_WAIT);

> -

> -               if (ev == ODP_EVENT_INVALID)

> -                       break;

> -

> +       /* Enqueue special cool_down events on all queues (one per queue)

> */

> +       for (i = 0; i < NUM_PRIOS; i++) {

> +               for (j = 0; j < globals->args.prio[i].queues; j++) {

> +                       buf = odp_buffer_alloc(globals->pool);

> +                       if (buf == ODP_BUFFER_INVALID) {

> +                               LOG_ERR("Buffer alloc failed.\n");

> +                               return;

> +                       }

>


This isn't terribly robust. In the unlikely event that this call fails
you're leaving a bunch of events (including the previously successfully
allocated COOL_DOWN markers) on the queues. At minimum you should just
break out of this "marking" phase and proceed to drain the remaining queues
for the numgoto markers already on them. More robust would be to
preallocate all the markers needed up front or else do each queue
individually to avoid having to buffer the markers.


> +                       event = odp_buffer_addr(buf);

> +                       event->type = COOL_DOWN;

> +                       ev = odp_buffer_to_event(buf);

> +                       if (odp_queue_enq(globals->queue[i][j], ev)) {

> +                               LOG_ERR("Queue enqueue failed.\n");

> +                               odp_event_free(ev);

> +                               return;

> +                       }

> +                       numtogo++;

> +               }

> +       }

> +       /* Invoke scheduler until all cool_down events have been received

> */

> +       while (numtogo != 0) {

>


while (numtogo > 0) would be more standard here.


> +               ev = odp_schedule(NULL, ODP_SCHED_WAIT);

> +               buf = odp_buffer_from_event(ev);

> +               event = odp_buffer_addr(buf);

> +               if (event->type == COOL_DOWN)

> +                       numtogo--;

>                 odp_event_free(ev);

>         }

>  }

> @@ -394,10 +419,10 @@ static int test_schedule(int thr, test_globals_t

> *globals)

>

>         odp_barrier_wait(&globals->barrier);

>

> -       clear_sched_queues();

> -

> -       if (thr == MAIN_THREAD)

> +       if (thr == MAIN_THREAD) {

> +               clear_sched_queues(globals);

>                 print_results(globals);

> +       }

>

>         return 0;

>  }

> --

> 2.12.2

>

>
Ola Liljedahl April 24, 2017, 10:26 p.m. UTC | #2
(Responding from PoC Outlook)

From:  Bill Fischofer <bill.fischofer@linaro.org>

Date:  Tuesday, 25 April 2017 at 00:00
To:  Brian Brooks <Brian.Brooks@arm.com>
Cc:  lng-odp-forward <lng-odp@lists.linaro.org>, Ola Liljedahl
<ola.liljedahl@arm.com>
Subject:  Re: [lng-odp] [PATCH] test: odp_sched_latency: robust draining
of queues




On Mon, Apr 24, 2017 at 3:58 PM, Brian Brooks
<brian.brooks@arm.com> wrote:

From: Ola Liljedahl <ola.liljedahl@arm.com>


In order to robustly drain all queues when the benchmark has
ended, we enqueue a special event on every queue and invoke
the scheduler until all such events have been received.

Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

Reviewed-by: Brian Brooks <brian.brooks@arm.com>

---
 test/common_plat/performance/odp_sched_latency.c | 51
++++++++++++++++++------
 1 file changed, 38 insertions(+), 13 deletions(-)

 /** Test event */
@@ -114,16 +115,40 @@ typedef struct {
  *
  * Retry to be sure that all buffers have been scheduled.
  */
-static void clear_sched_queues(void)
+static void clear_sched_queues(test_globals_t *globals)
 {
        odp_event_t ev;
+       odp_buffer_t buf;
+       test_event_t *event;
+       int i, j;
+       uint32_t numtogo = 0;



int numtogo would be more standard here, and consistent with i, j
immediately above.
[Ola] Generally I prefer unsigneds for variables that should never be
negative. Is there a good reason for using signed ints instead?

 


-       while (1) {
-               ev = odp_schedule(NULL, ODP_SCHED_NO_WAIT);
-
-               if (ev == ODP_EVENT_INVALID)
-                       break;
-
+       /* Enqueue special cool_down events on all queues (one per queue)
*/
+       for (i = 0; i < NUM_PRIOS; i++) {
+               for (j = 0; j < globals->args.prio[i].queues; j++) {
+                       buf = odp_buffer_alloc(globals->pool);
+                       if (buf == ODP_BUFFER_INVALID) {
+                               LOG_ERR("Buffer alloc failed.\n");
+                               return;
+                       }



This isn't terribly robust. In the unlikely event that this call fails
you're leaving a bunch of events (including the previously successfully
allocated COOL_DOWN markers) on the queues. At minimum you should just
break out of this "marking" phase and
 proceed to drain the remaining queues for the numgoto markers already on
them.
[Ola] What does partial draining achieve?

 More robust would be to preallocate all the markers needed up front or
else do each queue individually to avoid having to buffer the markers.
[Ola] Using only one marker event and cycling through the queues while
draining them is an interesting idea. I might code this and see how it
looks.

[Ola] AFAIK, this is robust in the absence of errors.
You are raising the bar. The code is full of ³return -1² if some
unexpected error occurs. Sometimes some known resource (e.g. event) is
freed. I don¹t think this program is guaranteed to clean up all resources
if an error occurs. Even if this was the goal, actually verifying this
would be cumbersome, achieving full code coverage for all error-related
code paths. The used coding style also makes visual inspection difficult.

Personally I am happy if (all) resources are freed for successful
executions but don¹t think it is necessary to spend that much effort on
cleaning up after unexpected errors (which probably increases the risk of
new errors, trying to do too much when the system is already in an
unexpected and potentially inconsistent state is asking for trouble),
better to just die quickly. You don¹t make systems more robust by adding
complexity.

 

+                       event = odp_buffer_addr(buf);
+                       event->type = COOL_DOWN;
+                       ev = odp_buffer_to_event(buf);
+                       if (odp_queue_enq(globals->queue[i][j], ev)) {
+                               LOG_ERR("Queue enqueue failed.\n");
+                               odp_event_free(ev);
+                               return;
+                       }
+                       numtogo++;
+               }
+       }
+       /* Invoke scheduler until all cool_down events have been received
*/
+       while (numtogo != 0) {



while (numtogo > 0) would be more standard here.
[Ola] I don¹t want to silently hide bugs by writing code which ³fixes²
other errors in the code (e.g. by terminating also for ³numtogo < 0" we
hide potential bugs that makes the numtogo negative even though this
should not be possible).


 

+               ev = odp_schedule(NULL, ODP_SCHED_WAIT);
+               buf = odp_buffer_from_event(ev);
+               event = odp_buffer_addr(buf);
+               if (event->type == COOL_DOWN)
+                       numtogo--;
                odp_event_free(ev);
        }
 }
@@ -394,10 +419,10 @@ static int test_schedule(int thr, test_globals_t
*globals)

        odp_barrier_wait(&globals->barrier);

-       clear_sched_queues();
-
-       if (thr == MAIN_THREAD)
+       if (thr == MAIN_THREAD) {
+               clear_sched_queues(globals);
                print_results(globals);
+       }

        return 0;
 }
--
2.12.2diff --git a/test/common_plat/performance/odp_sched_latency.c
b/test/common_plat/performance/odp_sched_latency.c
index 2b28cd7b..7ba99fc6 100644
--- a/test/common_plat/performance/odp_sched_latency.c
+++ b/test/common_plat/performance/odp_sched_latency.c
@@ -57,9 +57,10 @@ ODP_STATIC_ASSERT(LO_PRIO_QUEUES <= MAX_QUEUES, "Too
many LO priority queues");

 /** Test event types */
 typedef enum {
-       WARM_UP, /**< Warm up event */
-       TRAFFIC, /**< Event used only as traffic load */
-       SAMPLE   /**< Event used to measure latency */
+       WARM_UP,  /**< Warm up event */
+       COOL_DOWN,/**< Last event on queue */
+       TRAFFIC,  /**< Event used only as traffic load */
+       SAMPLE    /**< Event used to measure latency */
 } event_type_t;


Bill Fischofer April 24, 2017, 11:20 p.m. UTC | #3
On Mon, Apr 24, 2017 at 5:26 PM, Ola Liljedahl <Ola.Liljedahl@arm.com>
wrote:

> (Responding from PoC Outlook)

>

> From:  Bill Fischofer <bill.fischofer@linaro.org>

> Date:  Tuesday, 25 April 2017 at 00:00

> To:  Brian Brooks <Brian.Brooks@arm.com>

> Cc:  lng-odp-forward <lng-odp@lists.linaro.org>, Ola Liljedahl

> <ola.liljedahl@arm.com>

> Subject:  Re: [lng-odp] [PATCH] test: odp_sched_latency: robust draining

> of queues

>

>

>

>

> On Mon, Apr 24, 2017 at 3:58 PM, Brian Brooks

> <brian.brooks@arm.com> wrote:

>

> From: Ola Liljedahl <ola.liljedahl@arm.com>

>

> In order to robustly drain all queues when the benchmark has

> ended, we enqueue a special event on every queue and invoke

> the scheduler until all such events have been received.

>

> Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

> Reviewed-by: Brian Brooks <brian.brooks@arm.com>

> ---

>  test/common_plat/performance/odp_sched_latency.c | 51

> ++++++++++++++++++------

>  1 file changed, 38 insertions(+), 13 deletions(-)

>

> diff --git a/test/common_plat/performance/odp_sched_latency.c

> b/test/common_plat/performance/odp_sched_latency.c

> index 2b28cd7b..7ba99fc6 100644

> --- a/test/common_plat/performance/odp_sched_latency.c

> +++ b/test/common_plat/performance/odp_sched_latency.c

> @@ -57,9 +57,10 @@ ODP_STATIC_ASSERT(LO_PRIO_QUEUES <= MAX_QUEUES, "Too

> many LO priority queues");

>

>  /** Test event types */

>  typedef enum {

> -       WARM_UP, /**< Warm up event */

> -       TRAFFIC, /**< Event used only as traffic load */

> -       SAMPLE   /**< Event used to measure latency */

> +       WARM_UP,  /**< Warm up event */

> +       COOL_DOWN,/**< Last event on queue */

> +       TRAFFIC,  /**< Event used only as traffic load */

> +       SAMPLE    /**< Event used to measure latency */

>  } event_type_t;

>

>  /** Test event */

> @@ -114,16 +115,40 @@ typedef struct {

>   *

>   * Retry to be sure that all buffers have been scheduled.

>   */

> -static void clear_sched_queues(void)

> +static void clear_sched_queues(test_globals_t *globals)

>  {

>         odp_event_t ev;

> +       odp_buffer_t buf;

> +       test_event_t *event;

> +       int i, j;

> +       uint32_t numtogo = 0;

>

>

>

> int numtogo would be more standard here, and consistent with i, j

> immediately above.

> [Ola] Generally I prefer unsigneds for variables that should never be

> negative. Is there a good reason for using signed ints instead?

>

> By that logic i and j should be unsigned as well, but you always see them

> as int. int is usual because it's easier to type and just as performant for

> these sort of counter-type operations.

>

>

> -       while (1) {

> -               ev = odp_schedule(NULL, ODP_SCHED_NO_WAIT);

> -

> -               if (ev == ODP_EVENT_INVALID)

> -                       break;

> -

> +       /* Enqueue special cool_down events on all queues (one per queue)

> */

> +       for (i = 0; i < NUM_PRIOS; i++) {

> +               for (j = 0; j < globals->args.prio[i].queues; j++) {

> +                       buf = odp_buffer_alloc(globals->pool);

> +                       if (buf == ODP_BUFFER_INVALID) {

> +                               LOG_ERR("Buffer alloc failed.\n");

> +                               return;

> +                       }

>

>

>

> This isn't terribly robust. In the unlikely event that this call fails

> you're leaving a bunch of events (including the previously successfully

> allocated COOL_DOWN markers) on the queues. At minimum you should just

> break out of this "marking" phase and

>  proceed to drain the remaining queues for the numgoto markers already on

> them.

> [Ola] What does partial draining achieve?

>


It avoids leaking the markers that were allocated before we ran out of
buffer space for whatever reason. The return here isn't terminating the
application just telling the caller that the queues have been cleared,
which in this case they have not. Since the queues are being reused for the
next round of processing, the upper layer of the application would need to
deal with one of these latent markers showing up, which would get messy.
I'd be happy with an abort() here if that's preferable.


>

>  More robust would be to preallocate all the markers needed up front or

> else do each queue individually to avoid having to buffer the markers.

> [Ola] Using only one marker event and cycling through the queues while

> draining them is an interesting idea. I might code this and see how it

> looks.

>

> [Ola] AFAIK, this is robust in the absence of errors.

> You are raising the bar. The code is full of ³return -1² if some

> unexpected error occurs. Sometimes some known resource (e.g. event) is

> freed. I don¹t think this program is guaranteed to clean up all resources

> if an error occurs. Even if this was the goal, actually verifying this

> would be cumbersome, achieving full code coverage for all error-related

> code paths. The used coding style also makes visual inspection difficult.

>

> Personally I am happy if (all) resources are freed for successful

> executions but don¹t think it is necessary to spend that much effort on

> cleaning up after unexpected errors (which probably increases the risk of

> new errors, trying to do too much when the system is already in an

> unexpected and potentially inconsistent state is asking for trouble),

> better to just die quickly. You don¹t make systems more robust by adding

> complexity.

>


All the more reason to simply abort() here, which would be fine since this
is just a test.


>

>

>

> +                       event = odp_buffer_addr(buf);

> +                       event->type = COOL_DOWN;

> +                       ev = odp_buffer_to_event(buf);

> +                       if (odp_queue_enq(globals->queue[i][j], ev)) {

> +                               LOG_ERR("Queue enqueue failed.\n");

> +                               odp_event_free(ev);

> +                               return;

> +                       }

> +                       numtogo++;

> +               }

> +       }

> +       /* Invoke scheduler until all cool_down events have been received

> */

> +       while (numtogo != 0) {

>

>

>

> while (numtogo > 0) would be more standard here.

> [Ola] I don¹t want to silently hide bugs by writing code which ³fixes²

> other errors in the code (e.g. by terminating also for ³numtogo < 0" we

> hide potential bugs that makes the numtogo negative even though this

> should not be possible).

>

>

If something happened to make numtogo negative (or hugely positive in the
case of unsigned) how would that be better? The standard (since Dijkstra)
method of guaranteed loop termination is to test for var > 0 against a
monotonically decrementing control var.


>

>

>

> +               ev = odp_schedule(NULL, ODP_SCHED_WAIT);

> +               buf = odp_buffer_from_event(ev);

> +               event = odp_buffer_addr(buf);

> +               if (event->type == COOL_DOWN)

> +                       numtogo--;

>                 odp_event_free(ev);

>         }

>  }

> @@ -394,10 +419,10 @@ static int test_schedule(int thr, test_globals_t

> *globals)

>

>         odp_barrier_wait(&globals->barrier);

>

> -       clear_sched_queues();

> -

> -       if (thr == MAIN_THREAD)

> +       if (thr == MAIN_THREAD) {

> +               clear_sched_queues(globals);

>                 print_results(globals);

> +       }

>

>         return 0;

>  }

> --

> 2.12.2

>

>

>

>

>

>

>
Savolainen, Petri (Nokia - FI/Espoo) April 25, 2017, 10:26 a.m. UTC | #4
> -----Original Message-----

> From: lng-odp [mailto:lng-odp-bounces@lists.linaro.org] On Behalf Of Brian

> Brooks

> Sent: Monday, April 24, 2017 11:59 PM

> To: lng-odp@lists.linaro.org

> Cc: Ola Liljedahl <ola.liljedahl@arm.com>

> Subject: [lng-odp] [PATCH] test: odp_sched_latency: robust draining of

> queues

> 

> From: Ola Liljedahl <ola.liljedahl@arm.com>

> 

> In order to robustly drain all queues when the benchmark has

> ended, we enqueue a special event on every queue and invoke

> the scheduler until all such events have been received.

> 


	odp_schedule_pause();

	while (1) {
		ev = odp_schedule(&src_queue, ODP_SCHED_NO_WAIT);

		if (ev == ODP_EVENT_INVALID)
			break;

		if (odp_queue_enq(src_queue, ev)) {
			LOG_ERR("[%i] Queue enqueue failed.\n", thr);
			odp_event_free(ev);
			return -1;
		}
	}

	odp_schedule_resume();

	odp_barrier_wait(&globals->barrier);

	clear_sched_queues();


What is the issue that this patch fixes? This sequence should be quite robust already since no new enqueues happen after the barrier. In a simple test code like this, the latency from last enq() (through the barrier) to schedule loop (in clear_sched_queues()) could be overcome just by not exiting after the first EVENT_INVALID from scheduler, but after N EVENT_INVALIDs in a row.

Also in your patch, thread should exit only after scheduler returns EVENT_INVALID.


-Petri
Ola Liljedahl April 25, 2017, 10:40 a.m. UTC | #5
On 25/04/2017, 12:26, "Savolainen, Petri (Nokia - FI/Espoo)" <petri.savolainen@nokia-bell-labs.com<mailto:petri.savolainen@nokia-bell-labs.com>> wrote:



-----Original Message-----
From: lng-odp [mailto:lng-odp-bounces@lists.linaro.org] On Behalf Of Brian

Brooks
Sent: Monday, April 24, 2017 11:59 PM
To: lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org>
Cc: Ola Liljedahl <ola.liljedahl@arm.com<mailto:ola.liljedahl@arm.com>>
Subject: [lng-odp] [PATCH] test: odp_sched_latency: robust draining of
queues
From: Ola Liljedahl <ola.liljedahl@arm.com<mailto:ola.liljedahl@arm.com>>

In order to robustly drain all queues when the benchmark has
ended, we enqueue a special event on every queue and invoke
the scheduler until all such events have been received.

odp_schedule_pause();

while (1) {
ev = odp_schedule(&src_queue, ODP_SCHED_NO_WAIT);

if (ev == ODP_EVENT_INVALID)
break;

if (odp_queue_enq(src_queue, ev)) {
LOG_ERR("[%i] Queue enqueue failed.\n", thr);
odp_event_free(ev);
return -1;
}
}

odp_schedule_resume();

odp_barrier_wait(&globals->barrier);

clear_sched_queues();


What is the issue that this patch fixes?
The issue is that odp_schedule() (even with a timeout) returns ODP_EVENT_INVALID but the queues are not actually empty. In a loosely synchronised (e.g. using weak ordering) queue and scheduler implementation, odp_schedule() can spuriously return EVENT_INVALID. This happens infrequently on some A57 targets.

This sequence should be quite robust already since no new enqueues happen after the barrier. In a simple test code like this, the latency from last enq() (through the barrier) to schedule loop (in clear_sched_queues()) could be overcome just by not exiting after the first EVENT_INVALID from scheduler, but after N EVENT_INVALIDs in a row.
In the scalable scheduler & queue implementation, it can take some time before enqueued events become visible and the corresponding ODP queues pushed to some scheduler queue. So odp_schedule() can return ODP_EVENT_INVALID, even when called with a timeout. There is no timeout or no amount of INVALID_EVENT returns that *guarantees* that the queues have been drained.


Also in your patch, thread should exit only after scheduler returns EVENT_INVALID.
Since the cool_down event is the last event on all queues (as they are enqueued after all threads have passed the barrier), when we have received all cool_down events we know that there are no other events on the these queues. No need to call odp_schedule() until it returns ODP_EVENT_INVALID (which can happen spuriously anyway so doesn’t signify anything).



-Petri
Savolainen, Petri (Nokia - FI/Espoo) April 25, 2017, 10:54 a.m. UTC | #6
Also in your patch, thread should exit only after scheduler returns EVENT_INVALID.
Since the cool_down event is the last event on all queues (as they are enqueued after all threads have passed the barrier), when we have received all cool_down events we know that there are no other events on the these queues. No need to call odp_schedule() until it returns ODP_EVENT_INVALID (which can happen spuriously anyway so doesn't signify anything).


It signifies release of the schedule context. For a robust exit, application should release the current context.

-Petri
Ola Liljedahl April 25, 2017, 10:55 a.m. UTC | #7
Another thing.


On 25/04/2017, 12:26, "Savolainen, Petri (Nokia - FI/Espoo)"
<petri.savolainen@nokia-bell-labs.com> wrote:

>

>

>> -----Original Message-----

>> From: lng-odp [mailto:lng-odp-bounces@lists.linaro.org] On Behalf Of

>>Brian

>> Brooks

>> Sent: Monday, April 24, 2017 11:59 PM

>> To: lng-odp@lists.linaro.org

>> Cc: Ola Liljedahl <ola.liljedahl@arm.com>

>> Subject: [lng-odp] [PATCH] test: odp_sched_latency: robust draining of

>> queues

>> 

>> From: Ola Liljedahl <ola.liljedahl@arm.com>

>> 

>> In order to robustly drain all queues when the benchmark has

>> ended, we enqueue a special event on every queue and invoke

>> the scheduler until all such events have been received.

>> 

>

>	odp_schedule_pause();

>

>	while (1) {

>		ev = odp_schedule(&src_queue, ODP_SCHED_NO_WAIT);

>

>		if (ev == ODP_EVENT_INVALID)

>			break;

>

>		if (odp_queue_enq(src_queue, ev)) {

>			LOG_ERR("[%i] Queue enqueue failed.\n", thr);

>			odp_event_free(ev);

>			return -1;

>		}

>	}

>

>	odp_schedule_resume();

Is it good to call odp_schedule_resume() here? Isn¹t it legal and possible
that the scheduler does or requests some speculative prescheduling in the
resume call? Thus defying the schedule_pause and draining of prescheduled
(stashed) events happening just before.


>

>	odp_barrier_wait(&globals->barrier);

>

>	clear_sched_queues();

>

>

>What is the issue that this patch fixes? This sequence should be quite

>robust already since no new enqueues happen after the barrier. In a

>simple test code like this, the latency from last enq() (through the

>barrier) to schedule loop (in clear_sched_queues()) could be overcome

>just by not exiting after the first EVENT_INVALID from scheduler, but

>after N EVENT_INVALIDs in a row.

>

>Also in your patch, thread should exit only after scheduler returns

>EVENT_INVALID.

>

>

>-Petri

>
Ola Liljedahl April 25, 2017, 10:59 a.m. UTC | #8
On 25/04/2017, 12:54, "Savolainen, Petri (Nokia - FI/Espoo)"
<petri.savolainen@nokia-bell-labs.com> wrote:

>Also in your patch, thread should exit only after scheduler returns

>EVENT_INVALID.

>Since the cool_down event is the last event on all queues (as they are

>enqueued after all threads have passed the barrier), when we have

>received all cool_down events we know that there are no other events on

>the these queues. No need to call odp_schedule() until it returns

>ODP_EVENT_INVALID (which can happen spuriously anyway so doesn't signify

>anything).

>

>

>It signifies release of the schedule context. For a robust exit,

>application should release the current context.

OK. Either odp_schedule() must have returned invalid event or we need an
explicit release call.
But you don¹t think this is something odp_term_local() should handle?

>

>-Petri

>

>
Bill Fischofer April 25, 2017, 11:18 a.m. UTC | #9
All good points. There's more subtly going on than I originally realized.
I've added this to the agenda for today's public call.

On Tue, Apr 25, 2017 at 5:59 AM, Ola Liljedahl <Ola.Liljedahl@arm.com>
wrote:

>

> On 25/04/2017, 12:54, "Savolainen, Petri (Nokia - FI/Espoo)"

> <petri.savolainen@nokia-bell-labs.com> wrote:

>

> >Also in your patch, thread should exit only after scheduler returns

> >EVENT_INVALID.

> >Since the cool_down event is the last event on all queues (as they are

> >enqueued after all threads have passed the barrier), when we have

> >received all cool_down events we know that there are no other events on

> >the these queues. No need to call odp_schedule() until it returns

> >ODP_EVENT_INVALID (which can happen spuriously anyway so doesn't signify

> >anything).

> >

> >

> >It signifies release of the schedule context. For a robust exit,

> >application should release the current context.

> OK. Either odp_schedule() must have returned invalid event or we need an

> explicit release call.

> But you don¹t think this is something odp_term_local() should handle?

>

> >

> >-Petri

> >

> >

>

>
Savolainen, Petri (Nokia - FI/Espoo) April 25, 2017, 12:32 p.m. UTC | #10
> -----Original Message-----

> From: Ola Liljedahl [mailto:Ola.Liljedahl@arm.com]

> Sent: Tuesday, April 25, 2017 1:56 PM

> To: Savolainen, Petri (Nokia - FI/Espoo) <petri.savolainen@nokia-bell-

> labs.com>; Brian Brooks <Brian.Brooks@arm.com>; lng-odp@lists.linaro.org

> Cc: nd <nd@arm.com>

> Subject: Re: [lng-odp] [PATCH] test: odp_sched_latency: robust draining of

> queues

> 

> Another thing.

> 

> 

> On 25/04/2017, 12:26, "Savolainen, Petri (Nokia - FI/Espoo)"

> <petri.savolainen@nokia-bell-labs.com> wrote:

> 

> >

> >

> >> -----Original Message-----

> >> From: lng-odp [mailto:lng-odp-bounces@lists.linaro.org] On Behalf Of

> >>Brian

> >> Brooks

> >> Sent: Monday, April 24, 2017 11:59 PM

> >> To: lng-odp@lists.linaro.org

> >> Cc: Ola Liljedahl <ola.liljedahl@arm.com>

> >> Subject: [lng-odp] [PATCH] test: odp_sched_latency: robust draining of

> >> queues

> >>

> >> From: Ola Liljedahl <ola.liljedahl@arm.com>

> >>

> >> In order to robustly drain all queues when the benchmark has

> >> ended, we enqueue a special event on every queue and invoke

> >> the scheduler until all such events have been received.

> >>

> >

> >	odp_schedule_pause();

> >

> >	while (1) {

> >		ev = odp_schedule(&src_queue, ODP_SCHED_NO_WAIT);

> >

> >		if (ev == ODP_EVENT_INVALID)

> >			break;

> >

> >		if (odp_queue_enq(src_queue, ev)) {

> >			LOG_ERR("[%i] Queue enqueue failed.\n",

> thr);

> >			odp_event_free(ev);

> >			return -1;

> >		}

> >	}

> >

> >	odp_schedule_resume();

> Is it good to call odp_schedule_resume() here? Isn¹t it legal and possible

> that the scheduler does or requests some speculative prescheduling in the

> resume call? Thus defying the schedule_pause and draining of prescheduled

> (stashed) events happening just before.



The loop above ensures that other threads proceed while this thread waits.

Resume should not reserve a schedule context (do pre-scheduling), only schedule() does reserve and free a context. The clear_sched_queues() loops as long as there are events and stops when sees an EVENT_INVALID == no context.

-Petri


> 

> 

> >

> >	odp_barrier_wait(&globals->barrier);

> >

> >	clear_sched_queues();

> >

> >

> >What is the issue that this patch fixes? This sequence should be quite

> >robust already since no new enqueues happen after the barrier. In a

> >simple test code like this, the latency from last enq() (through the

> >barrier) to schedule loop (in clear_sched_queues()) could be overcome

> >just by not exiting after the first EVENT_INVALID from scheduler, but

> >after N EVENT_INVALIDs in a row.

> >

> >Also in your patch, thread should exit only after scheduler returns

> >EVENT_INVALID.

> >

> >

> >-Petri

> >
Ola Liljedahl April 25, 2017, 1:51 p.m. UTC | #11
On 25/04/2017, 14:32, "Savolainen, Petri (Nokia - FI/Espoo)"
<petri.savolainen@nokia-bell-labs.com> wrote:

>

>

>> -----Original Message-----

>> From: Ola Liljedahl [mailto:Ola.Liljedahl@arm.com]

>> Sent: Tuesday, April 25, 2017 1:56 PM

>> To: Savolainen, Petri (Nokia - FI/Espoo) <petri.savolainen@nokia-bell-

>> labs.com>; Brian Brooks <Brian.Brooks@arm.com>; lng-odp@lists.linaro.org

>> Cc: nd <nd@arm.com>

>> Subject: Re: [lng-odp] [PATCH] test: odp_sched_latency: robust draining

>>of

>> queues

>> 

>> Another thing.

>> 

>> 

>> On 25/04/2017, 12:26, "Savolainen, Petri (Nokia - FI/Espoo)"

>> <petri.savolainen@nokia-bell-labs.com> wrote:

>> 

>> >

>> >

>> >> -----Original Message-----

>> >> From: lng-odp [mailto:lng-odp-bounces@lists.linaro.org] On Behalf Of

>> >>Brian

>> >> Brooks

>> >> Sent: Monday, April 24, 2017 11:59 PM

>> >> To: lng-odp@lists.linaro.org

>> >> Cc: Ola Liljedahl <ola.liljedahl@arm.com>

>> >> Subject: [lng-odp] [PATCH] test: odp_sched_latency: robust draining

>>of

>> >> queues

>> >>

>> >> From: Ola Liljedahl <ola.liljedahl@arm.com>

>> >>

>> >> In order to robustly drain all queues when the benchmark has

>> >> ended, we enqueue a special event on every queue and invoke

>> >> the scheduler until all such events have been received.

>> >>

>> >

>> >	odp_schedule_pause();

>> >

>> >	while (1) {

>> >		ev = odp_schedule(&src_queue, ODP_SCHED_NO_WAIT);

>> >

>> >		if (ev == ODP_EVENT_INVALID)

>> >			break;

>> >

>> >		if (odp_queue_enq(src_queue, ev)) {

>> >			LOG_ERR("[%i] Queue enqueue failed.\n",

>> thr);

>> >			odp_event_free(ev);

>> >			return -1;

>> >		}

>> >	}

>> >

>> >	odp_schedule_resume();

>> Is it good to call odp_schedule_resume() here? Isn¹t it legal and

>>possible

>> that the scheduler does or requests some speculative prescheduling in

>>the

>> resume call? Thus defying the schedule_pause and draining of

>>prescheduled

>> (stashed) events happening just before.

>

>

>The loop above ensures that other threads proceed while this thread waits.

>

>Resume should not reserve a schedule context (do pre-scheduling), only

>schedule() does reserve and free a context.

The spec does not say this.

/**
* Resume scheduling
*
* Resume global scheduling for this thread. After this call, all schedule
calls
* will schedule normally (perform global scheduling).
*/
void odp_schedule_resume(void);


“Resume scheduling” could easily be interpreted as allowing pre-scheduling
or enabling some “global scheduler” to schedule events for this thread. I
can easily imagine that when using a HW scheduler with some scheduling
latency, one would always send a scheduling request in advance in order to
hide the latency.

The description for odp_schedule() is written as if pre-scheduling or
stashing cannot occur.

> The clear_sched_queues() loops as long as there are events and stops

>when sees an EVENT_INVALID == no context.

Yes I have added a loop calling odp_schedule() until it returns
EVENT_INVALID.

>

>-Petri

>

>

>> 

>> 

>> >

>> >	odp_barrier_wait(&globals->barrier);

>> >

>> >	clear_sched_queues();

>> >

>> >

>> >What is the issue that this patch fixes? This sequence should be quite

>> >robust already since no new enqueues happen after the barrier. In a

>> >simple test code like this, the latency from last enq() (through the

>> >barrier) to schedule loop (in clear_sched_queues()) could be overcome

>> >just by not exiting after the first EVENT_INVALID from scheduler, but

>> >after N EVENT_INVALIDs in a row.

>> >

>> >Also in your patch, thread should exit only after scheduler returns

>> >EVENT_INVALID.

>> >

>> >

>> >-Petri

>> >

>
diff mbox

Patch

diff --git a/test/common_plat/performance/odp_sched_latency.c b/test/common_plat/performance/odp_sched_latency.c
index 2b28cd7b..7ba99fc6 100644
--- a/test/common_plat/performance/odp_sched_latency.c
+++ b/test/common_plat/performance/odp_sched_latency.c
@@ -57,9 +57,10 @@  ODP_STATIC_ASSERT(LO_PRIO_QUEUES <= MAX_QUEUES, "Too many LO priority queues");
 
 /** Test event types */
 typedef enum {
-	WARM_UP, /**< Warm up event */
-	TRAFFIC, /**< Event used only as traffic load */
-	SAMPLE	 /**< Event used to measure latency */
+	WARM_UP,  /**< Warm up event */
+	COOL_DOWN,/**< Last event on queue */
+	TRAFFIC,  /**< Event used only as traffic load */
+	SAMPLE	  /**< Event used to measure latency */
 } event_type_t;
 
 /** Test event */
@@ -114,16 +115,40 @@  typedef struct {
  *
  * Retry to be sure that all buffers have been scheduled.
  */
-static void clear_sched_queues(void)
+static void clear_sched_queues(test_globals_t *globals)
 {
 	odp_event_t ev;
+	odp_buffer_t buf;
+	test_event_t *event;
+	int i, j;
+	uint32_t numtogo = 0;
 
-	while (1) {
-		ev = odp_schedule(NULL, ODP_SCHED_NO_WAIT);
-
-		if (ev == ODP_EVENT_INVALID)
-			break;
-
+	/* Enqueue special cool_down events on all queues (one per queue) */
+	for (i = 0; i < NUM_PRIOS; i++) {
+		for (j = 0; j < globals->args.prio[i].queues; j++) {
+			buf = odp_buffer_alloc(globals->pool);
+			if (buf == ODP_BUFFER_INVALID) {
+				LOG_ERR("Buffer alloc failed.\n");
+				return;
+			}
+			event = odp_buffer_addr(buf);
+			event->type = COOL_DOWN;
+			ev = odp_buffer_to_event(buf);
+			if (odp_queue_enq(globals->queue[i][j], ev)) {
+				LOG_ERR("Queue enqueue failed.\n");
+				odp_event_free(ev);
+				return;
+			}
+			numtogo++;
+		}
+	}
+	/* Invoke scheduler until all cool_down events have been received */
+	while (numtogo != 0) {
+		ev = odp_schedule(NULL, ODP_SCHED_WAIT);
+		buf = odp_buffer_from_event(ev);
+		event = odp_buffer_addr(buf);
+		if (event->type == COOL_DOWN)
+			numtogo--;
 		odp_event_free(ev);
 	}
 }
@@ -394,10 +419,10 @@  static int test_schedule(int thr, test_globals_t *globals)
 
 	odp_barrier_wait(&globals->barrier);
 
-	clear_sched_queues();
-
-	if (thr == MAIN_THREAD)
+	if (thr == MAIN_THREAD) {
+		clear_sched_queues(globals);
 		print_results(globals);
+	}
 
 	return 0;
 }