diff mbox

[API-NEXT,PATCHv1,1/2] validation: increase timer expiration wait time

Message ID 1452849247-32266-1-git-send-email-maxim.uvarov@linaro.org
State New
Headers show

Commit Message

Maxim Uvarov Jan. 15, 2016, 9:14 a.m. UTC
Following current test logic validation test should correctly
wait for all scheduled timer expirations. Setting that time
to 1ms is not correct due to code above schedules timer to 1.1ms
and context switch in linux takes 10ms.

Problem that odp_timer_pool_destroy() does not disarm all trigered
timers before freeing pool shared memory still exist. That should
be covered with separate test case.

Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
---
 test/validation/timer/timer.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

Comments

Ivan Khoronzhuk Jan. 18, 2016, 10:54 p.m. UTC | #1
Maxim,
in general, I had no look in this test, it was working for me and I was happy.
At fast glance I have some comments. Hope you will find time to correct it.

On 15.01.16 11:14, Maxim Uvarov wrote:
> Following current test logic validation test should correctly
> wait for all scheduled timer expirations. Setting that time
> to 1ms is not correct due to code above schedules timer to 1.1ms
> and context switch in linux takes 10ms.
>
> Problem that odp_timer_pool_destroy() does not disarm all trigered
> timers before freeing pool shared memory still exist. That should
> be covered with separate test case.
>
> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
> ---
>   test/validation/timer/timer.c | 38 ++++++++++++++++++++++++--------------
>   1 file changed, 24 insertions(+), 14 deletions(-)
>
> diff --git a/test/validation/timer/timer.c b/test/validation/timer/timer.c
> index dda5e4c..db0a5ca 100644
> --- a/test/validation/timer/timer.c
> +++ b/test/validation/timer/timer.c
> @@ -266,6 +266,23 @@ static void handle_tmo(odp_event_t ev, bool stale, uint64_t prev_tick)
>   	}
>   }
>
> +/* Delay to ensure timeouts for expired timers can be
> + * received.
> + * Delay time = (max timer expiration time) +
> + *		(timer handler execution time) +
> + *		(system overhead for thread creation and schedule
> + *		 context switches)
> + */
> +static void _wait_for_timers_expiration(void)
_wait_for* -> wait_for_*

> +{
> +	struct timespec ts;
> +
> +	ts.tv_sec = 0;
> +	ts.tv_nsec = 50 * ODP_TIME_MSEC_IN_NS;
> +	if (nanosleep(&ts, NULL) < 0)
> +		CU_FAIL_FATAL("nanosleep failed");
> +}
> +
use time API here
odp_time_wait_ns(50 * ODP_TIME_MSEC_IN_NS());

Maybe better to correct it with separate preparation patch.

>   /* @private Worker thread entrypoint which performs timer alloc/set/cancel/free
>    * tests */
>   static void *worker_entrypoint(void *arg TEST_UNUSED)
> @@ -305,7 +322,7 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>   		uint64_t tck = odp_timer_current_tick(tp) + 1 +
>   			       odp_timer_ns_to_tick(tp,
>   						    (rand_r(&seed) % RANGE_MS)
> -						    * 1000000ULL);
> +						    * ODP_TIME_MSEC_IN_NS);
>   		odp_timer_set_t rc;
>   		rc = odp_timer_set_abs(tt[i].tim, tck, &tt[i].ev);
>   		if (rc != ODP_TIMER_SUCCESS) {
> @@ -351,7 +368,8 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>   				/* Timer active => reset */
>   				nreset++;
>   			uint64_t tck = 1 + odp_timer_ns_to_tick(tp,
> -				       (rand_r(&seed) % RANGE_MS) * 1000000ULL);
> +				       (rand_r(&seed) % RANGE_MS) *
> +				       ODP_TIME_MSEC_IN_NS);
...it's not related to the patch, but maybe as preparation patch would be normal to
replace rand_r() usage on odp_random_data()...

>   			odp_timer_set_t rc;
>   			uint64_t cur_tick;
>   			/* Loop until we manage to read cur_tick and set a
> @@ -372,11 +390,8 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>   				tt[i].tick = cur_tick + tck;
>   			}
>   		}
> -		struct timespec ts;
> -		ts.tv_sec = 0;
> -		ts.tv_nsec = 1000000; /* 1ms */
> -		if (nanosleep(&ts, NULL) < 0)
> -			CU_FAIL_FATAL("nanosleep failed");
> +
> +		_wait_for_timers_expiration();
You've changed step here from 1ms to 50ms, but didn't change comment under loop.
It's not seen here, but after this description is not correct.
...
Also, delay is increased but step in the loop is still supposed 1ms.
The line below should be changed also:
for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {

ms += 50ms, but pay attention, maybe there is reason to tune (7 * RANGE_MS / 10) a little.

But really I not fully understand why we need to increase step here.
IMHO, it's better to decrease it here. If in some case timeout is delivered to
early it should be caught, but if it's a little to late - it's normal.
Test simply checks if timeouts are received in right order and are not to early.

Simply increasing delay here in 50 times and not tuning iteration number you increased
time for receiving events in the loop. The events are spread in 2000 ms, and you are
lucky to fit in this range by doing like this. To increase time for receiving events
better to increase iteration number in:
for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {
to fit 2000 ms interval. That's enough to receive all events and no need to change polling timeout.

>   	}
>
>   	/* Cancel and free all timers */
> @@ -401,13 +416,8 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>   	LOG_DBG("Thread %u: %" PRIu32 " stale timeout(s) after odp_timer_free()\n",
>   		thr, nstale);
>
> -	/* Delay some more to ensure timeouts for expired timers can be
> -	 * received */
> -	struct timespec ts;
> -	ts.tv_sec = 0;
> -	ts.tv_nsec = 1000000; /* 1ms */
> -	if (nanosleep(&ts, NULL) < 0)
> -		CU_FAIL_FATAL("nanosleep failed");
> +	_wait_for_timers_expiration();
> +
>   	while (nstale != 0) {
>   		odp_event_t ev = odp_queue_deq(queue);
>   		if (ev != ODP_EVENT_INVALID) {
>
Maxim Uvarov Jan. 19, 2016, 9:59 a.m. UTC | #2
On 01/19/2016 01:54, Ivan Khoronzhuk wrote:
> Maxim,
> in general, I had no look in this test, it was working for me and I 
> was happy.
> At fast glance I have some comments. Hope you will find time to 
> correct it.
>

ok, some comments bellow.

> On 15.01.16 11:14, Maxim Uvarov wrote:
>> Following current test logic validation test should correctly
>> wait for all scheduled timer expirations. Setting that time
>> to 1ms is not correct due to code above schedules timer to 1.1ms
>> and context switch in linux takes 10ms.
>>
>> Problem that odp_timer_pool_destroy() does not disarm all trigered
>> timers before freeing pool shared memory still exist. That should
>> be covered with separate test case.
>>
>> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>> ---
>>   test/validation/timer/timer.c | 38 
>> ++++++++++++++++++++++++--------------
>>   1 file changed, 24 insertions(+), 14 deletions(-)
>>
>> diff --git a/test/validation/timer/timer.c 
>> b/test/validation/timer/timer.c
>> index dda5e4c..db0a5ca 100644
>> --- a/test/validation/timer/timer.c
>> +++ b/test/validation/timer/timer.c
>> @@ -266,6 +266,23 @@ static void handle_tmo(odp_event_t ev, bool 
>> stale, uint64_t prev_tick)
>>       }
>>   }
>>
>> +/* Delay to ensure timeouts for expired timers can be
>> + * received.
>> + * Delay time = (max timer expiration time) +
>> + *        (timer handler execution time) +
>> + *        (system overhead for thread creation and schedule
>> + *         context switches)
>> + */
>> +static void _wait_for_timers_expiration(void)
> _wait_for* -> wait_for_*
>
>> +{
>> +    struct timespec ts;
>> +
>> +    ts.tv_sec = 0;
>> +    ts.tv_nsec = 50 * ODP_TIME_MSEC_IN_NS;
>> +    if (nanosleep(&ts, NULL) < 0)
>> +        CU_FAIL_FATAL("nanosleep failed");
>> +}
>> +
> use time API here
> odp_time_wait_ns(50 * ODP_TIME_MSEC_IN_NS());
>
> Maybe better to correct it with separate preparation patch.
>
>>   /* @private Worker thread entrypoint which performs timer 
>> alloc/set/cancel/free
>>    * tests */
>>   static void *worker_entrypoint(void *arg TEST_UNUSED)
>> @@ -305,7 +322,7 @@ static void *worker_entrypoint(void *arg 
>> TEST_UNUSED)
>>           uint64_t tck = odp_timer_current_tick(tp) + 1 +
>>                      odp_timer_ns_to_tick(tp,
>>                               (rand_r(&seed) % RANGE_MS)
>> -                            * 1000000ULL);
>> +                            * ODP_TIME_MSEC_IN_NS);
>>           odp_timer_set_t rc;
>>           rc = odp_timer_set_abs(tt[i].tim, tck, &tt[i].ev);
>>           if (rc != ODP_TIMER_SUCCESS) {
>> @@ -351,7 +368,8 @@ static void *worker_entrypoint(void *arg 
>> TEST_UNUSED)
>>                   /* Timer active => reset */
>>                   nreset++;
>>               uint64_t tck = 1 + odp_timer_ns_to_tick(tp,
>> -                       (rand_r(&seed) % RANGE_MS) * 1000000ULL);
>> +                       (rand_r(&seed) % RANGE_MS) *
>> +                       ODP_TIME_MSEC_IN_NS);
> ...it's not related to the patch, but maybe as preparation patch would 
> be normal to
> replace rand_r() usage on odp_random_data()...
>
>>               odp_timer_set_t rc;
>>               uint64_t cur_tick;
>>               /* Loop until we manage to read cur_tick and set a
>> @@ -372,11 +390,8 @@ static void *worker_entrypoint(void *arg 
>> TEST_UNUSED)
>>                   tt[i].tick = cur_tick + tck;
>>               }
>>           }
>> -        struct timespec ts;
>> -        ts.tv_sec = 0;
>> -        ts.tv_nsec = 1000000; /* 1ms */
>> -        if (nanosleep(&ts, NULL) < 0)
>> -            CU_FAIL_FATAL("nanosleep failed");
>> +
>> +        _wait_for_timers_expiration();
> You've changed step here from 1ms to 50ms, but didn't change comment 
> under loop.
> It's not seen here, but after this description is not correct.
> ...
> Also, delay is increased but step in the loop is still supposed 1ms.
> The line below should be changed also:
> for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {
>
> ms += 50ms, but pay attention, maybe there is reason to tune (7 * 
> RANGE_MS / 10) a little.
>
> But really I not fully understand why we need to increase step here.
> IMHO, it's better to decrease it here. If in some case timeout is 
> delivered to
> early it should be caught, but if it's a little to late - it's normal.
> Test simply checks if timeouts are received in right order and are not 
> to early.
>
> Simply increasing delay here in 50 times and not tuning iteration 
> number you increased
> time for receiving events in the loop. The events are spread in 2000 
> ms, and you are
> lucky to fit in this range by doing like this. To increase time for 
> receiving events
> better to increase iteration number in:
> for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {
> to fit 2000 ms interval. That's enough to receive all events and no 
> need to change polling timeout.
>

Here we are talking about that bug:
https://bugs.linaro.org/show_bug.cgi?id=1940

As I understand timer step does not make sense.  timer_delete() returns 
0 that means that kernel
confirmed that it deleted that timer. But there actually timer is in 
flight and we should wait some
small time for context switches and etc. while this timer handler will 
finally being removed. In my understanding
that time does not depend on how long handler is executed or on which 
time execution was postponed.


Maxim.

>>       }
>>
>>       /* Cancel and free all timers */
>> @@ -401,13 +416,8 @@ static void *worker_entrypoint(void *arg 
>> TEST_UNUSED)
>>       LOG_DBG("Thread %u: %" PRIu32 " stale timeout(s) after 
>> odp_timer_free()\n",
>>           thr, nstale);
>>
>> -    /* Delay some more to ensure timeouts for expired timers can be
>> -     * received */
>> -    struct timespec ts;
>> -    ts.tv_sec = 0;
>> -    ts.tv_nsec = 1000000; /* 1ms */
>> -    if (nanosleep(&ts, NULL) < 0)
>> -        CU_FAIL_FATAL("nanosleep failed");
>> +    _wait_for_timers_expiration();
>> +
>>       while (nstale != 0) {
>>           odp_event_t ev = odp_queue_deq(queue);
>>           if (ev != ODP_EVENT_INVALID) {
>>
>
Ivan Khoronzhuk Jan. 19, 2016, 10:23 a.m. UTC | #3
On 19.01.16 11:59, Maxim Uvarov wrote:
> On 01/19/2016 01:54, Ivan Khoronzhuk wrote:
>> Maxim,
>> in general, I had no look in this test, it was working for me and I was happy.
>> At fast glance I have some comments. Hope you will find time to correct it.
>>
>
> ok, some comments bellow.
>
>> On 15.01.16 11:14, Maxim Uvarov wrote:
>>> Following current test logic validation test should correctly
>>> wait for all scheduled timer expirations. Setting that time
>>> to 1ms is not correct due to code above schedules timer to 1.1ms
>>> and context switch in linux takes 10ms.
>>>
>>> Problem that odp_timer_pool_destroy() does not disarm all trigered
>>> timers before freeing pool shared memory still exist. That should
>>> be covered with separate test case.
>>>
>>> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>>> ---
>>>   test/validation/timer/timer.c | 38 ++++++++++++++++++++++++--------------
>>>   1 file changed, 24 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/test/validation/timer/timer.c b/test/validation/timer/timer.c
>>> index dda5e4c..db0a5ca 100644
>>> --- a/test/validation/timer/timer.c
>>> +++ b/test/validation/timer/timer.c
>>> @@ -266,6 +266,23 @@ static void handle_tmo(odp_event_t ev, bool stale, uint64_t prev_tick)
>>>       }
>>>   }
>>>
>>> +/* Delay to ensure timeouts for expired timers can be
>>> + * received.
>>> + * Delay time = (max timer expiration time) +
>>> + *        (timer handler execution time) +
>>> + *        (system overhead for thread creation and schedule
>>> + *         context switches)
>>> + */
>>> +static void _wait_for_timers_expiration(void)
>> _wait_for* -> wait_for_*
>>
>>> +{
>>> +    struct timespec ts;
>>> +
>>> +    ts.tv_sec = 0;
>>> +    ts.tv_nsec = 50 * ODP_TIME_MSEC_IN_NS;
>>> +    if (nanosleep(&ts, NULL) < 0)
>>> +        CU_FAIL_FATAL("nanosleep failed");
>>> +}
>>> +
>> use time API here
>> odp_time_wait_ns(50 * ODP_TIME_MSEC_IN_NS());
>>
>> Maybe better to correct it with separate preparation patch.
>>
>>>   /* @private Worker thread entrypoint which performs timer alloc/set/cancel/free
>>>    * tests */
>>>   static void *worker_entrypoint(void *arg TEST_UNUSED)
>>> @@ -305,7 +322,7 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>           uint64_t tck = odp_timer_current_tick(tp) + 1 +
>>>                      odp_timer_ns_to_tick(tp,
>>>                               (rand_r(&seed) % RANGE_MS)
>>> -                            * 1000000ULL);
>>> +                            * ODP_TIME_MSEC_IN_NS);
>>>           odp_timer_set_t rc;
>>>           rc = odp_timer_set_abs(tt[i].tim, tck, &tt[i].ev);
>>>           if (rc != ODP_TIMER_SUCCESS) {
>>> @@ -351,7 +368,8 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>                   /* Timer active => reset */
>>>                   nreset++;
>>>               uint64_t tck = 1 + odp_timer_ns_to_tick(tp,
>>> -                       (rand_r(&seed) % RANGE_MS) * 1000000ULL);
>>> +                       (rand_r(&seed) % RANGE_MS) *
>>> +                       ODP_TIME_MSEC_IN_NS);
>> ...it's not related to the patch, but maybe as preparation patch would be normal to
>> replace rand_r() usage on odp_random_data()...
>>
>>>               odp_timer_set_t rc;
>>>               uint64_t cur_tick;
>>>               /* Loop until we manage to read cur_tick and set a
>>> @@ -372,11 +390,8 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>                   tt[i].tick = cur_tick + tck;
>>>               }
>>>           }
>>> -        struct timespec ts;
>>> -        ts.tv_sec = 0;
>>> -        ts.tv_nsec = 1000000; /* 1ms */
>>> -        if (nanosleep(&ts, NULL) < 0)
>>> -            CU_FAIL_FATAL("nanosleep failed");
>>> +
>>> +        _wait_for_timers_expiration();
>> You've changed step here from 1ms to 50ms, but didn't change comment under loop.
>> It's not seen here, but after this description is not correct.
>> ...
>> Also, delay is increased but step in the loop is still supposed 1ms.
>> The line below should be changed also:
>> for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {
>>
>> ms += 50ms, but pay attention, maybe there is reason to tune (7 * RANGE_MS / 10) a little.
>>
>> But really I not fully understand why we need to increase step here.
>> IMHO, it's better to decrease it here. If in some case timeout is delivered to
>> early it should be caught, but if it's a little to late - it's normal.
>> Test simply checks if timeouts are received in right order and are not to early.
>>
>> Simply increasing delay here in 50 times and not tuning iteration number you increased
>> time for receiving events in the loop. The events are spread in 2000 ms, and you are
>> lucky to fit in this range by doing like this. To increase time for receiving events
>> better to increase iteration number in:
>> for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {
>> to fit 2000 ms interval. That's enough to receive all events and no need to change polling timeout.
>>
>
> Here we are talking about that bug:
> https://bugs.linaro.org/show_bug.cgi?id=1940
>
> As I understand timer step does not make sense.  timer_delete() returns 0 that means that kernel
> confirmed that it deleted that timer. But there actually timer is in flight and we should wait some
> small time for context switches and etc. while this timer handler will finally being removed. In my understanding
> that time does not depend on how long handler is executed or on which time execution was postponed.
>
>
> Maxim.
By this approach you've broken a little idea of the test. You've increased polling period witch doesn't not affect
on timer_delete() but intention of what to take events in the rate fast enough to check if events are arrived
not to early. When correcting the test we shouldn't forget that it can be used not only by linux-generic.

The events are spread within 2 seconds, you increased period from 1 ms to 50ms, and if event is received to
early it be compared with current time that is late on, at least, 50ms instead of 1ms, as result it will not be caught.
Also integrated period will be 7/10 * 2000 * 50ms = 70 seconds, more than minute. If the last event is in 2seconds + some 50ms,
noneed to wait 70 seconds..........after that even no need to wait additional 50ms for ensuring........
Better to leave polling period (or set it less even), and increase number of iterations to fit 2,05 seconds range,
if this is your intention here, of-course.

IMHO, this is problem of linux-generic, not test, and linux generic should be fixed.
Do you considered approach to add some atomic flag and check it in event receive routine to check if it's
allowed to continue event handling? And if this is problem of some kernel version, maybe better to move it
under ifdef depending on bug kernel version.

>
>>>       }
>>>
>>>       /* Cancel and free all timers */
>>> @@ -401,13 +416,8 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>       LOG_DBG("Thread %u: %" PRIu32 " stale timeout(s) after odp_timer_free()\n",
>>>           thr, nstale);
>>>
>>> -    /* Delay some more to ensure timeouts for expired timers can be
>>> -     * received */
>>> -    struct timespec ts;
>>> -    ts.tv_sec = 0;
>>> -    ts.tv_nsec = 1000000; /* 1ms */
>>> -    if (nanosleep(&ts, NULL) < 0)
>>> -        CU_FAIL_FATAL("nanosleep failed");
>>> +    _wait_for_timers_expiration();
>>> +
>>>       while (nstale != 0) {
>>>           odp_event_t ev = odp_queue_deq(queue);
>>>           if (ev != ODP_EVENT_INVALID) {
>>>
>>
>
Maxim Uvarov Jan. 19, 2016, 11:27 a.m. UTC | #4
On 01/19/2016 13:23, Ivan Khoronzhuk wrote:
>
>
> On 19.01.16 11:59, Maxim Uvarov wrote:
>> On 01/19/2016 01:54, Ivan Khoronzhuk wrote:
>>> Maxim,
>>> in general, I had no look in this test, it was working for me and I 
>>> was happy.
>>> At fast glance I have some comments. Hope you will find time to 
>>> correct it.
>>>
>>
>> ok, some comments bellow.
>>
>>> On 15.01.16 11:14, Maxim Uvarov wrote:
>>>> Following current test logic validation test should correctly
>>>> wait for all scheduled timer expirations. Setting that time
>>>> to 1ms is not correct due to code above schedules timer to 1.1ms
>>>> and context switch in linux takes 10ms.
>>>>
>>>> Problem that odp_timer_pool_destroy() does not disarm all trigered
>>>> timers before freeing pool shared memory still exist. That should
>>>> be covered with separate test case.
>>>>
>>>> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>>>> ---
>>>>   test/validation/timer/timer.c | 38 
>>>> ++++++++++++++++++++++++--------------
>>>>   1 file changed, 24 insertions(+), 14 deletions(-)
>>>>
>>>> diff --git a/test/validation/timer/timer.c 
>>>> b/test/validation/timer/timer.c
>>>> index dda5e4c..db0a5ca 100644
>>>> --- a/test/validation/timer/timer.c
>>>> +++ b/test/validation/timer/timer.c
>>>> @@ -266,6 +266,23 @@ static void handle_tmo(odp_event_t ev, bool 
>>>> stale, uint64_t prev_tick)
>>>>       }
>>>>   }
>>>>
>>>> +/* Delay to ensure timeouts for expired timers can be
>>>> + * received.
>>>> + * Delay time = (max timer expiration time) +
>>>> + *        (timer handler execution time) +
>>>> + *        (system overhead for thread creation and schedule
>>>> + *         context switches)
>>>> + */
>>>> +static void _wait_for_timers_expiration(void)
>>> _wait_for* -> wait_for_*
>>>
>>>> +{
>>>> +    struct timespec ts;
>>>> +
>>>> +    ts.tv_sec = 0;
>>>> +    ts.tv_nsec = 50 * ODP_TIME_MSEC_IN_NS;
>>>> +    if (nanosleep(&ts, NULL) < 0)
>>>> +        CU_FAIL_FATAL("nanosleep failed");
>>>> +}
>>>> +
>>> use time API here
>>> odp_time_wait_ns(50 * ODP_TIME_MSEC_IN_NS());
>>>
>>> Maybe better to correct it with separate preparation patch.
>>>
>>>>   /* @private Worker thread entrypoint which performs timer 
>>>> alloc/set/cancel/free
>>>>    * tests */
>>>>   static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>> @@ -305,7 +322,7 @@ static void *worker_entrypoint(void *arg 
>>>> TEST_UNUSED)
>>>>           uint64_t tck = odp_timer_current_tick(tp) + 1 +
>>>>                      odp_timer_ns_to_tick(tp,
>>>>                               (rand_r(&seed) % RANGE_MS)
>>>> -                            * 1000000ULL);
>>>> +                            * ODP_TIME_MSEC_IN_NS);
>>>>           odp_timer_set_t rc;
>>>>           rc = odp_timer_set_abs(tt[i].tim, tck, &tt[i].ev);
>>>>           if (rc != ODP_TIMER_SUCCESS) {
>>>> @@ -351,7 +368,8 @@ static void *worker_entrypoint(void *arg 
>>>> TEST_UNUSED)
>>>>                   /* Timer active => reset */
>>>>                   nreset++;
>>>>               uint64_t tck = 1 + odp_timer_ns_to_tick(tp,
>>>> -                       (rand_r(&seed) % RANGE_MS) * 1000000ULL);
>>>> +                       (rand_r(&seed) % RANGE_MS) *
>>>> +                       ODP_TIME_MSEC_IN_NS);
>>> ...it's not related to the patch, but maybe as preparation patch 
>>> would be normal to
>>> replace rand_r() usage on odp_random_data()...
>>>
>>>>               odp_timer_set_t rc;
>>>>               uint64_t cur_tick;
>>>>               /* Loop until we manage to read cur_tick and set a
>>>> @@ -372,11 +390,8 @@ static void *worker_entrypoint(void *arg 
>>>> TEST_UNUSED)
>>>>                   tt[i].tick = cur_tick + tck;
>>>>               }
>>>>           }
>>>> -        struct timespec ts;
>>>> -        ts.tv_sec = 0;
>>>> -        ts.tv_nsec = 1000000; /* 1ms */
>>>> -        if (nanosleep(&ts, NULL) < 0)
>>>> -            CU_FAIL_FATAL("nanosleep failed");
>>>> +
>>>> +        _wait_for_timers_expiration();
>>> You've changed step here from 1ms to 50ms, but didn't change comment 
>>> under loop.
>>> It's not seen here, but after this description is not correct.
>>> ...
>>> Also, delay is increased but step in the loop is still supposed 1ms.
>>> The line below should be changed also:
>>> for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {
>>>
>>> ms += 50ms, but pay attention, maybe there is reason to tune (7 * 
>>> RANGE_MS / 10) a little.
>>>
>>> But really I not fully understand why we need to increase step here.
>>> IMHO, it's better to decrease it here. If in some case timeout is 
>>> delivered to
>>> early it should be caught, but if it's a little to late - it's normal.
>>> Test simply checks if timeouts are received in right order and are 
>>> not to early.
>>>
>>> Simply increasing delay here in 50 times and not tuning iteration 
>>> number you increased
>>> time for receiving events in the loop. The events are spread in 2000 
>>> ms, and you are
>>> lucky to fit in this range by doing like this. To increase time for 
>>> receiving events
>>> better to increase iteration number in:
>>> for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {
>>> to fit 2000 ms interval. That's enough to receive all events and no 
>>> need to change polling timeout.
>>>
>>
>> Here we are talking about that bug:
>> https://bugs.linaro.org/show_bug.cgi?id=1940
>>
>> As I understand timer step does not make sense.  timer_delete() 
>> returns 0 that means that kernel
>> confirmed that it deleted that timer. But there actually timer is in 
>> flight and we should wait some
>> small time for context switches and etc. while this timer handler 
>> will finally being removed. In my understanding
>> that time does not depend on how long handler is executed or on which 
>> time execution was postponed.
>>
>>
>> Maxim.
> By this approach you've broken a little idea of the test. You've 
> increased polling period witch doesn't not affect
> on timer_delete() but intention of what to take events in the rate 
> fast enough to check if events are arrived
> not to early. When correcting the test we shouldn't forget that it can 
> be used not only by linux-generic.
>
> The events are spread within 2 seconds, you increased period from 1 ms 
> to 50ms, and if event is received to
> early it be compared with current time that is late on, at least, 50ms 
> instead of 1ms, as result it will not be caught.
> Also integrated period will be 7/10 * 2000 * 50ms = 70 seconds, more 
> than minute. If the last event is in 2seconds + some 50ms,
> noneed to wait 70 seconds..........after that even no need to wait 
> additional 50ms for ensuring........
> Better to leave polling period (or set it less even), and increase 
> number of iterations to fit 2,05 seconds range,
> if this is your intention here, of-course.
>
> IMHO, this is problem of linux-generic, not test, and linux generic 
> should be fixed.
> Do you considered approach to add some atomic flag and check it in 
> event receive routine to check if it's
> allowed to continue event handling? And if this is problem of some 
> kernel version, maybe better to move it
> under ifdef depending on bug kernel version.
>

Do you want to propose right fix for that?

I saw that:

static void odp_timer_pool_del(odp_timer_pool *tp)
{
     if (tp->num_alloc != 0) {
         /* It's a programming error to attempt to destroy a */
         /* timer pool which is still in use */
         ODP_ABORT("%s: timers in use\n", tp->name);
     }
}

So in my understanding validation tests should cancel all timers or wait 
for until it will be finished.
So I think it's both validation test and linux-generic.

Maxim.


>>
>>>>       }
>>>>
>>>>       /* Cancel and free all timers */
>>>> @@ -401,13 +416,8 @@ static void *worker_entrypoint(void *arg 
>>>> TEST_UNUSED)
>>>>       LOG_DBG("Thread %u: %" PRIu32 " stale timeout(s) after 
>>>> odp_timer_free()\n",
>>>>           thr, nstale);
>>>>
>>>> -    /* Delay some more to ensure timeouts for expired timers can be
>>>> -     * received */
>>>> -    struct timespec ts;
>>>> -    ts.tv_sec = 0;
>>>> -    ts.tv_nsec = 1000000; /* 1ms */
>>>> -    if (nanosleep(&ts, NULL) < 0)
>>>> -        CU_FAIL_FATAL("nanosleep failed");
>>>> +    _wait_for_timers_expiration();
>>>> +
>>>>       while (nstale != 0) {
>>>>           odp_event_t ev = odp_queue_deq(queue);
>>>>           if (ev != ODP_EVENT_INVALID) {
>>>>
>>>
>>
>
Ivan Khoronzhuk Jan. 19, 2016, 12:11 p.m. UTC | #5
On 19.01.16 13:27, Maxim Uvarov wrote:
> On 01/19/2016 13:23, Ivan Khoronzhuk wrote:
>>
>>
>> On 19.01.16 11:59, Maxim Uvarov wrote:
>>> On 01/19/2016 01:54, Ivan Khoronzhuk wrote:
>>>> Maxim,
>>>> in general, I had no look in this test, it was working for me and I was happy.
>>>> At fast glance I have some comments. Hope you will find time to correct it.
>>>>
>>>
>>> ok, some comments bellow.
>>>
>>>> On 15.01.16 11:14, Maxim Uvarov wrote:
>>>>> Following current test logic validation test should correctly
>>>>> wait for all scheduled timer expirations. Setting that time
>>>>> to 1ms is not correct due to code above schedules timer to 1.1ms
>>>>> and context switch in linux takes 10ms.
>>>>>
>>>>> Problem that odp_timer_pool_destroy() does not disarm all trigered
>>>>> timers before freeing pool shared memory still exist. That should
>>>>> be covered with separate test case.
>>>>>
>>>>> Signed-off-by: Maxim Uvarov <maxim.uvarov@linaro.org>
>>>>> ---
>>>>>   test/validation/timer/timer.c | 38 ++++++++++++++++++++++++--------------
>>>>>   1 file changed, 24 insertions(+), 14 deletions(-)
>>>>>
>>>>> diff --git a/test/validation/timer/timer.c b/test/validation/timer/timer.c
>>>>> index dda5e4c..db0a5ca 100644
>>>>> --- a/test/validation/timer/timer.c
>>>>> +++ b/test/validation/timer/timer.c
>>>>> @@ -266,6 +266,23 @@ static void handle_tmo(odp_event_t ev, bool stale, uint64_t prev_tick)
>>>>>       }
>>>>>   }
>>>>>
>>>>> +/* Delay to ensure timeouts for expired timers can be
>>>>> + * received.
>>>>> + * Delay time = (max timer expiration time) +
>>>>> + *        (timer handler execution time) +
>>>>> + *        (system overhead for thread creation and schedule
>>>>> + *         context switches)
>>>>> + */
>>>>> +static void _wait_for_timers_expiration(void)
>>>> _wait_for* -> wait_for_*
>>>>
>>>>> +{
>>>>> +    struct timespec ts;
>>>>> +
>>>>> +    ts.tv_sec = 0;
>>>>> +    ts.tv_nsec = 50 * ODP_TIME_MSEC_IN_NS;
>>>>> +    if (nanosleep(&ts, NULL) < 0)
>>>>> +        CU_FAIL_FATAL("nanosleep failed");
>>>>> +}
>>>>> +
>>>> use time API here
>>>> odp_time_wait_ns(50 * ODP_TIME_MSEC_IN_NS());
>>>>
>>>> Maybe better to correct it with separate preparation patch.
>>>>
>>>>>   /* @private Worker thread entrypoint which performs timer alloc/set/cancel/free
>>>>>    * tests */
>>>>>   static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>>> @@ -305,7 +322,7 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>>>           uint64_t tck = odp_timer_current_tick(tp) + 1 +
>>>>>                      odp_timer_ns_to_tick(tp,
>>>>>                               (rand_r(&seed) % RANGE_MS)
>>>>> -                            * 1000000ULL);
>>>>> +                            * ODP_TIME_MSEC_IN_NS);
>>>>>           odp_timer_set_t rc;
>>>>>           rc = odp_timer_set_abs(tt[i].tim, tck, &tt[i].ev);
>>>>>           if (rc != ODP_TIMER_SUCCESS) {
>>>>> @@ -351,7 +368,8 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>>>                   /* Timer active => reset */
>>>>>                   nreset++;
>>>>>               uint64_t tck = 1 + odp_timer_ns_to_tick(tp,
>>>>> -                       (rand_r(&seed) % RANGE_MS) * 1000000ULL);
>>>>> +                       (rand_r(&seed) % RANGE_MS) *
>>>>> +                       ODP_TIME_MSEC_IN_NS);
>>>> ...it's not related to the patch, but maybe as preparation patch would be normal to
>>>> replace rand_r() usage on odp_random_data()...
>>>>
>>>>>               odp_timer_set_t rc;
>>>>>               uint64_t cur_tick;
>>>>>               /* Loop until we manage to read cur_tick and set a
>>>>> @@ -372,11 +390,8 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>>>                   tt[i].tick = cur_tick + tck;
>>>>>               }
>>>>>           }
>>>>> -        struct timespec ts;
>>>>> -        ts.tv_sec = 0;
>>>>> -        ts.tv_nsec = 1000000; /* 1ms */
>>>>> -        if (nanosleep(&ts, NULL) < 0)
>>>>> -            CU_FAIL_FATAL("nanosleep failed");
>>>>> +
>>>>> +        _wait_for_timers_expiration();
>>>> You've changed step here from 1ms to 50ms, but didn't change comment under loop.
>>>> It's not seen here, but after this description is not correct.
>>>> ...
>>>> Also, delay is increased but step in the loop is still supposed 1ms.
>>>> The line below should be changed also:
>>>> for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {
>>>>
>>>> ms += 50ms, but pay attention, maybe there is reason to tune (7 * RANGE_MS / 10) a little.
>>>>
>>>> But really I not fully understand why we need to increase step here.
>>>> IMHO, it's better to decrease it here. If in some case timeout is delivered to
>>>> early it should be caught, but if it's a little to late - it's normal.
>>>> Test simply checks if timeouts are received in right order and are not to early.
>>>>
>>>> Simply increasing delay here in 50 times and not tuning iteration number you increased
>>>> time for receiving events in the loop. The events are spread in 2000 ms, and you are
>>>> lucky to fit in this range by doing like this. To increase time for receiving events
>>>> better to increase iteration number in:
>>>> for (ms = 0; ms < 7 * RANGE_MS / 10; ms++) {
>>>> to fit 2000 ms interval. That's enough to receive all events and no need to change polling timeout.
>>>>
>>>
>>> Here we are talking about that bug:
>>> https://bugs.linaro.org/show_bug.cgi?id=1940
>>>
>>> As I understand timer step does not make sense.  timer_delete() returns 0 that means that kernel
>>> confirmed that it deleted that timer. But there actually timer is in flight and we should wait some
>>> small time for context switches and etc. while this timer handler will finally being removed. In my understanding
>>> that time does not depend on how long handler is executed or on which time execution was postponed.
>>>
>>>
>>> Maxim.
>> By this approach you've broken a little idea of the test. You've increased polling period witch doesn't not affect
>> on timer_delete() but intention of what to take events in the rate fast enough to check if events are arrived
>> not to early. When correcting the test we shouldn't forget that it can be used not only by linux-generic.
>>
>> The events are spread within 2 seconds, you increased period from 1 ms to 50ms, and if event is received to
>> early it be compared with current time that is late on, at least, 50ms instead of 1ms, as result it will not be caught.
>> Also integrated period will be 7/10 * 2000 * 50ms = 70 seconds, more than minute. If the last event is in 2seconds + some 50ms,
>> noneed to wait 70 seconds..........after that even no need to wait additional 50ms for ensuring........
>> Better to leave polling period (or set it less even), and increase number of iterations to fit 2,05 seconds range,
>> if this is your intention here, of-course.
>>
>> IMHO, this is problem of linux-generic, not test, and linux generic should be fixed.
>> Do you considered approach to add some atomic flag and check it in event receive routine to check if it's
>> allowed to continue event handling? And if this is problem of some kernel version, maybe better to move it
>> under ifdef depending on bug kernel version.
>>
>
> Do you want to propose right fix for that?
As a fast fix from test side you can simply increase after-loop delay:

10/10 - 7/10 = 3/10. 3/10 * 2000 + some 50ms = 650ms (instead of 1ms)

It should receive the rest of events leaved after loop.

>
> I saw that:
>
> static void odp_timer_pool_del(odp_timer_pool *tp)
> {
>      if (tp->num_alloc != 0) {
>          /* It's a programming error to attempt to destroy a */
>          /* timer pool which is still in use */
>          ODP_ABORT("%s: timers in use\n", tp->name);
>      }
> }
>
> So in my understanding validation tests should cancel all timers or wait for until it will be finished.
> So I think it's both validation test and linux-generic.
>
> Maxim.
>
>
>>>
>>>>>       }
>>>>>
>>>>>       /* Cancel and free all timers */
>>>>> @@ -401,13 +416,8 @@ static void *worker_entrypoint(void *arg TEST_UNUSED)
>>>>>       LOG_DBG("Thread %u: %" PRIu32 " stale timeout(s) after odp_timer_free()\n",
>>>>>           thr, nstale);
>>>>>
>>>>> -    /* Delay some more to ensure timeouts for expired timers can be
>>>>> -     * received */
>>>>> -    struct timespec ts;
>>>>> -    ts.tv_sec = 0;
>>>>> -    ts.tv_nsec = 1000000; /* 1ms */
>>>>> -    if (nanosleep(&ts, NULL) < 0)
>>>>> -        CU_FAIL_FATAL("nanosleep failed");
>>>>> +    _wait_for_timers_expiration();
>>>>> +
>>>>>       while (nstale != 0) {
>>>>>           odp_event_t ev = odp_queue_deq(queue);
>>>>>           if (ev != ODP_EVENT_INVALID) {
>>>>>
>>>>
>>>
>>
>
diff mbox

Patch

diff --git a/test/validation/timer/timer.c b/test/validation/timer/timer.c
index dda5e4c..db0a5ca 100644
--- a/test/validation/timer/timer.c
+++ b/test/validation/timer/timer.c
@@ -266,6 +266,23 @@  static void handle_tmo(odp_event_t ev, bool stale, uint64_t prev_tick)
 	}
 }
 
+/* Delay to ensure timeouts for expired timers can be
+ * received.
+ * Delay time = (max timer expiration time) +
+ *		(timer handler execution time) +
+ *		(system overhead for thread creation and schedule
+ *		 context switches)
+ */
+static void _wait_for_timers_expiration(void)
+{
+	struct timespec ts;
+
+	ts.tv_sec = 0;
+	ts.tv_nsec = 50 * ODP_TIME_MSEC_IN_NS;
+	if (nanosleep(&ts, NULL) < 0)
+		CU_FAIL_FATAL("nanosleep failed");
+}
+
 /* @private Worker thread entrypoint which performs timer alloc/set/cancel/free
  * tests */
 static void *worker_entrypoint(void *arg TEST_UNUSED)
@@ -305,7 +322,7 @@  static void *worker_entrypoint(void *arg TEST_UNUSED)
 		uint64_t tck = odp_timer_current_tick(tp) + 1 +
 			       odp_timer_ns_to_tick(tp,
 						    (rand_r(&seed) % RANGE_MS)
-						    * 1000000ULL);
+						    * ODP_TIME_MSEC_IN_NS);
 		odp_timer_set_t rc;
 		rc = odp_timer_set_abs(tt[i].tim, tck, &tt[i].ev);
 		if (rc != ODP_TIMER_SUCCESS) {
@@ -351,7 +368,8 @@  static void *worker_entrypoint(void *arg TEST_UNUSED)
 				/* Timer active => reset */
 				nreset++;
 			uint64_t tck = 1 + odp_timer_ns_to_tick(tp,
-				       (rand_r(&seed) % RANGE_MS) * 1000000ULL);
+				       (rand_r(&seed) % RANGE_MS) *
+				       ODP_TIME_MSEC_IN_NS);
 			odp_timer_set_t rc;
 			uint64_t cur_tick;
 			/* Loop until we manage to read cur_tick and set a
@@ -372,11 +390,8 @@  static void *worker_entrypoint(void *arg TEST_UNUSED)
 				tt[i].tick = cur_tick + tck;
 			}
 		}
-		struct timespec ts;
-		ts.tv_sec = 0;
-		ts.tv_nsec = 1000000; /* 1ms */
-		if (nanosleep(&ts, NULL) < 0)
-			CU_FAIL_FATAL("nanosleep failed");
+
+		_wait_for_timers_expiration();
 	}
 
 	/* Cancel and free all timers */
@@ -401,13 +416,8 @@  static void *worker_entrypoint(void *arg TEST_UNUSED)
 	LOG_DBG("Thread %u: %" PRIu32 " stale timeout(s) after odp_timer_free()\n",
 		thr, nstale);
 
-	/* Delay some more to ensure timeouts for expired timers can be
-	 * received */
-	struct timespec ts;
-	ts.tv_sec = 0;
-	ts.tv_nsec = 1000000; /* 1ms */
-	if (nanosleep(&ts, NULL) < 0)
-		CU_FAIL_FATAL("nanosleep failed");
+	_wait_for_timers_expiration();
+
 	while (nstale != 0) {
 		odp_event_t ev = odp_queue_deq(queue);
 		if (ev != ODP_EVENT_INVALID) {