diff mbox

linux-generic: time: remove platform specific acceleration

Message ID 1425658935-15991-1-git-send-email-mike.holmes@linaro.org
State New
Headers show

Commit Message

Mike Holmes March 6, 2015, 4:22 p.m. UTC
Linux-generic should be a reference implementation and does not need
archetecture specific acceleration.

Signed-off-by: Mike Holmes <mike.holmes@linaro.org>
---
 platform/linux-generic/odp_time.c | 47 +++------------------------------------
 1 file changed, 3 insertions(+), 44 deletions(-)

Comments

Maxim Uvarov March 6, 2015, 4:26 p.m. UTC | #1
Where should be that implementations placed if we want other platform to 
reuse them?

Maxim.

On 03/06/15 19:22, Mike Holmes wrote:
> Linux-generic should be a reference implementation and does not need
> archetecture specific acceleration.
>
> Signed-off-by: Mike Holmes <mike.holmes@linaro.org>
> ---
>   platform/linux-generic/odp_time.c | 47 +++------------------------------------
>   1 file changed, 3 insertions(+), 44 deletions(-)
>
> diff --git a/platform/linux-generic/odp_time.c b/platform/linux-generic/odp_time.c
> index 31d6656..6f40361 100644
> --- a/platform/linux-generic/odp_time.c
> +++ b/platform/linux-generic/odp_time.c
> @@ -6,52 +6,13 @@
>   
>   #define _POSIX_C_SOURCE 200809L
>   
> +#include <time.h>
>   #include <odp/time.h>
> -#include <odp/hints.h>
>   #include <odp/system_info.h>
> -
> -#define GIGA 1000000000
> -
> -#if defined __x86_64__ || defined __i386__
> -
> -uint64_t odp_time_cycles(void)
> -{
> -	union {
> -		uint64_t tsc_64;
> -		struct {
> -			uint32_t lo_32;
> -			uint32_t hi_32;
> -		};
> -	} tsc;
> -
> -	__asm__ __volatile__ ("rdtsc" :
> -		     "=a" (tsc.lo_32),
> -		     "=d" (tsc.hi_32) : : "memory");
> -
> -	return tsc.tsc_64;
> -}
> -
> -
> -#elif defined __OCTEON__
> -
> -uint64_t odp_time_cycles(void)
> -{
> -	#define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
> -	#define CVMX_TMP_STR2(x) #x
> -	uint64_t cycle;
> -
> -	__asm__ __volatile__ ("rdhwr %[rt],$" CVMX_TMP_STR(31) :
> -			   [rt] "=d" (cycle) : : "memory");
> -
> -	return cycle;
> -}
> -
> -#else
> -
> -#include <time.h>
> -#include <stdlib.h>
>   #include <odp_debug_internal.h>
>   
> +#define GIGA 1000000000
> +
>   uint64_t odp_time_cycles(void)
>   {
>   	struct timespec time;
> @@ -74,8 +35,6 @@ uint64_t odp_time_cycles(void)
>   	return cycles;
>   }
>   
> -#endif
> -
>   uint64_t odp_time_diff_cycles(uint64_t t1, uint64_t t2)
>   {
>   	if (odp_likely(t2 > t1))
Mike Holmes March 6, 2015, 4:35 p.m. UTC | #2
The DPDK implementation will take the X86 optimizations, presumably Cavium
already have Octeon acceleration in their own ODP implementation.

The linux generic implementation should be a model for the behavior of ODP,
it happens to run in Linux but that should not be confused with it being
used in any real application.

If we fill it with optimizations it will be harder to maintain as a pure
model.

Mike

On 6 March 2015 at 11:26, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

> Where should be that implementations placed if we want other platform to
> reuse them?
>
> Maxim.
>
>
> On 03/06/15 19:22, Mike Holmes wrote:
>
>> Linux-generic should be a reference implementation and does not need
>> archetecture specific acceleration.
>>
>> Signed-off-by: Mike Holmes <mike.holmes@linaro.org>
>> ---
>>   platform/linux-generic/odp_time.c | 47 +++---------------------------
>> ---------
>>   1 file changed, 3 insertions(+), 44 deletions(-)
>>
>> diff --git a/platform/linux-generic/odp_time.c
>> b/platform/linux-generic/odp_time.c
>> index 31d6656..6f40361 100644
>> --- a/platform/linux-generic/odp_time.c
>> +++ b/platform/linux-generic/odp_time.c
>> @@ -6,52 +6,13 @@
>>     #define _POSIX_C_SOURCE 200809L
>>   +#include <time.h>
>>   #include <odp/time.h>
>> -#include <odp/hints.h>
>>   #include <odp/system_info.h>
>> -
>> -#define GIGA 1000000000
>> -
>> -#if defined __x86_64__ || defined __i386__
>> -
>> -uint64_t odp_time_cycles(void)
>> -{
>> -       union {
>> -               uint64_t tsc_64;
>> -               struct {
>> -                       uint32_t lo_32;
>> -                       uint32_t hi_32;
>> -               };
>> -       } tsc;
>> -
>> -       __asm__ __volatile__ ("rdtsc" :
>> -                    "=a" (tsc.lo_32),
>> -                    "=d" (tsc.hi_32) : : "memory");
>> -
>> -       return tsc.tsc_64;
>> -}
>> -
>> -
>> -#elif defined __OCTEON__
>> -
>> -uint64_t odp_time_cycles(void)
>> -{
>> -       #define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
>> -       #define CVMX_TMP_STR2(x) #x
>> -       uint64_t cycle;
>> -
>> -       __asm__ __volatile__ ("rdhwr %[rt],$" CVMX_TMP_STR(31) :
>> -                          [rt] "=d" (cycle) : : "memory");
>> -
>> -       return cycle;
>> -}
>> -
>> -#else
>> -
>> -#include <time.h>
>> -#include <stdlib.h>
>>   #include <odp_debug_internal.h>
>>   +#define GIGA 1000000000
>> +
>>   uint64_t odp_time_cycles(void)
>>   {
>>         struct timespec time;
>> @@ -74,8 +35,6 @@ uint64_t odp_time_cycles(void)
>>         return cycles;
>>   }
>>   -#endif
>> -
>>   uint64_t odp_time_diff_cycles(uint64_t t1, uint64_t t2)
>>   {
>>         if (odp_likely(t2 > t1))
>>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
Bill Fischofer March 6, 2015, 4:42 p.m. UTC | #3
I agree with Mike.  Now that we have an official LNG performance
implementation, linux-generic should focus on being a simple and clean
functional implementation that elaborates the intended behavior of the ODP
APIs without being overly concerned with performance.  That doesn't mean we
don't do straightforward functional optimizations that are algorithmic in
nature, just that there shouldn't be any platform-specific tuning paths in
linux-generic.

On Fri, Mar 6, 2015 at 10:35 AM, Mike Holmes <mike.holmes@linaro.org> wrote:

> The DPDK implementation will take the X86 optimizations, presumably Cavium
> already have Octeon acceleration in their own ODP implementation.
>
> The linux generic implementation should be a model for the behavior of
> ODP, it happens to run in Linux but that should not be confused with it
> being used in any real application.
>
> If we fill it with optimizations it will be harder to maintain as a pure
> model.
>
> Mike
>
> On 6 March 2015 at 11:26, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:
>
>> Where should be that implementations placed if we want other platform to
>> reuse them?
>>
>> Maxim.
>>
>>
>> On 03/06/15 19:22, Mike Holmes wrote:
>>
>>> Linux-generic should be a reference implementation and does not need
>>> archetecture specific acceleration.
>>>
>>> Signed-off-by: Mike Holmes <mike.holmes@linaro.org>
>>> ---
>>>   platform/linux-generic/odp_time.c | 47 +++---------------------------
>>> ---------
>>>   1 file changed, 3 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/platform/linux-generic/odp_time.c
>>> b/platform/linux-generic/odp_time.c
>>> index 31d6656..6f40361 100644
>>> --- a/platform/linux-generic/odp_time.c
>>> +++ b/platform/linux-generic/odp_time.c
>>> @@ -6,52 +6,13 @@
>>>     #define _POSIX_C_SOURCE 200809L
>>>   +#include <time.h>
>>>   #include <odp/time.h>
>>> -#include <odp/hints.h>
>>>   #include <odp/system_info.h>
>>> -
>>> -#define GIGA 1000000000
>>> -
>>> -#if defined __x86_64__ || defined __i386__
>>> -
>>> -uint64_t odp_time_cycles(void)
>>> -{
>>> -       union {
>>> -               uint64_t tsc_64;
>>> -               struct {
>>> -                       uint32_t lo_32;
>>> -                       uint32_t hi_32;
>>> -               };
>>> -       } tsc;
>>> -
>>> -       __asm__ __volatile__ ("rdtsc" :
>>> -                    "=a" (tsc.lo_32),
>>> -                    "=d" (tsc.hi_32) : : "memory");
>>> -
>>> -       return tsc.tsc_64;
>>> -}
>>> -
>>> -
>>> -#elif defined __OCTEON__
>>> -
>>> -uint64_t odp_time_cycles(void)
>>> -{
>>> -       #define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
>>> -       #define CVMX_TMP_STR2(x) #x
>>> -       uint64_t cycle;
>>> -
>>> -       __asm__ __volatile__ ("rdhwr %[rt],$" CVMX_TMP_STR(31) :
>>> -                          [rt] "=d" (cycle) : : "memory");
>>> -
>>> -       return cycle;
>>> -}
>>> -
>>> -#else
>>> -
>>> -#include <time.h>
>>> -#include <stdlib.h>
>>>   #include <odp_debug_internal.h>
>>>   +#define GIGA 1000000000
>>> +
>>>   uint64_t odp_time_cycles(void)
>>>   {
>>>         struct timespec time;
>>> @@ -74,8 +35,6 @@ uint64_t odp_time_cycles(void)
>>>         return cycles;
>>>   }
>>>   -#endif
>>> -
>>>   uint64_t odp_time_diff_cycles(uint64_t t1, uint64_t t2)
>>>   {
>>>         if (odp_likely(t2 > t1))
>>>
>>
>>
>> _______________________________________________
>> lng-odp mailing list
>> lng-odp@lists.linaro.org
>> http://lists.linaro.org/mailman/listinfo/lng-odp
>>
>
>
>
> --
> Mike Holmes
> Technical Manager - Linaro Networking Group
> Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM SoCs
>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
>
Mike Holmes March 9, 2015, 12:50 p.m. UTC | #4
On 9 March 2015 at 08:45, Savolainen, Petri (Nokia - FI/Espoo) <
petri.savolainen@nokia.com> wrote:

>  Performance is not the main concern here, but cycle count measurement
> accuracy. E.g. Intel rdtsc cycle counter read latency is few cycles where
> as clock_gettime() seems to take over a thousand. System call latencies
> also vary since those may launch other kernel services, which adds jitter
> to cycle count measurements.
>
>
>
> Could we organize arch specific stuff better (no #ifdefs, but
> configure/build mechanism) and continue to support e.g. rdtsc.
>

We had originally proposed an arch directory to capture architecture
differences that should be common to all X86, ARM etc regardless of the
platform and its specific accelerators.
If this were at the top in odp/arch then I would imaging they would be
reusable implementations of API  functions.

I think this should be a topic for the ARCH call in 10 mins.


>
>
> -Petri
>
>
>
>
>
> *From:* lng-odp-bounces@lists.linaro.org [mailto:
> lng-odp-bounces@lists.linaro.org] *On Behalf Of *ext Bill Fischofer
> *Sent:* Friday, March 06, 2015 6:42 PM
> *To:* Mike Holmes
> *Cc:* lng-odp
> *Subject:* Re: [lng-odp] [PATCH] linux-generic: time: remove platform
> specific acceleration
>
>
>
> I agree with Mike.  Now that we have an official LNG performance
> implementation, linux-generic should focus on being a simple and clean
> functional implementation that elaborates the intended behavior of the ODP
> APIs without being overly concerned with performance.  That doesn't mean we
> don't do straightforward functional optimizations that are algorithmic in
> nature, just that there shouldn't be any platform-specific tuning paths in
> linux-generic.
>
>
>
> On Fri, Mar 6, 2015 at 10:35 AM, Mike Holmes <mike.holmes@linaro.org>
> wrote:
>
> The DPDK implementation will take the X86 optimizations, presumably Cavium
> already have Octeon acceleration in their own ODP implementation.
>
>
>
> The linux generic implementation should be a model for the behavior of
> ODP, it happens to run in Linux but that should not be confused with it
> being used in any real application.
>
>
>
> If we fill it with optimizations it will be harder to maintain as a pure
> model.
>
>
>
> Mike
>
>
>
> On 6 March 2015 at 11:26, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:
>
> Where should be that implementations placed if we want other platform to
> reuse them?
>
> Maxim.
>
>
>
> On 03/06/15 19:22, Mike Holmes wrote:
>
> Linux-generic should be a reference implementation and does not need
> archetecture specific acceleration.
>
> Signed-off-by: Mike Holmes <mike.holmes@linaro.org>
> ---
>   platform/linux-generic/odp_time.c | 47
> +++------------------------------------
>   1 file changed, 3 insertions(+), 44 deletions(-)
>
> diff --git a/platform/linux-generic/odp_time.c
> b/platform/linux-generic/odp_time.c
> index 31d6656..6f40361 100644
> --- a/platform/linux-generic/odp_time.c
> +++ b/platform/linux-generic/odp_time.c
> @@ -6,52 +6,13 @@
>     #define _POSIX_C_SOURCE 200809L
>   +#include <time.h>
>   #include <odp/time.h>
> -#include <odp/hints.h>
>   #include <odp/system_info.h>
> -
> -#define GIGA 1000000000
> -
> -#if defined __x86_64__ || defined __i386__
> -
> -uint64_t odp_time_cycles(void)
> -{
> -       union {
> -               uint64_t tsc_64;
> -               struct {
> -                       uint32_t lo_32;
> -                       uint32_t hi_32;
> -               };
> -       } tsc;
> -
> -       __asm__ __volatile__ ("rdtsc" :
> -                    "=a" (tsc.lo_32),
> -                    "=d" (tsc.hi_32) : : "memory");
> -
> -       return tsc.tsc_64;
> -}
> -
> -
> -#elif defined __OCTEON__
> -
> -uint64_t odp_time_cycles(void)
> -{
> -       #define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
> -       #define CVMX_TMP_STR2(x) #x
> -       uint64_t cycle;
> -
> -       __asm__ __volatile__ ("rdhwr %[rt],$" CVMX_TMP_STR(31) :
> -                          [rt] "=d" (cycle) : : "memory");
> -
> -       return cycle;
> -}
> -
> -#else
> -
> -#include <time.h>
> -#include <stdlib.h>
>   #include <odp_debug_internal.h>
>   +#define GIGA 1000000000
> +
>   uint64_t odp_time_cycles(void)
>   {
>         struct timespec time;
> @@ -74,8 +35,6 @@ uint64_t odp_time_cycles(void)
>         return cycles;
>   }
>   -#endif
> -
>   uint64_t odp_time_diff_cycles(uint64_t t1, uint64_t t2)
>   {
>         if (odp_likely(t2 > t1))
>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
>
>
>
>
> --
>
> Mike Holmes
>
> Technical Manager - Linaro Networking Group
>
> Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM SoCs
>
>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
>
>
>
Bill Fischofer March 9, 2015, 1:03 p.m. UTC | #5
I'm not sure what the benefit would be for this. ODP and Linux take a
different approach to optimization.  Linux tries to be a single
implementation optimized for multiple platforms, hence the use of arch
directories.  ODP, however is a single *specification* optimized across
multiple implementations.  odp-dpdk is intended to be the optimized
implementation for x86, just as the implementations provided by Cavium, TI,
etc. are designed to be the optimized implementations for their respective
platforms.  As such, linux-generic has no need for platform-specific
optimizations, however organized.

On Mon, Mar 9, 2015 at 7:50 AM, Mike Holmes <mike.holmes@linaro.org> wrote:

>
>
> On 9 March 2015 at 08:45, Savolainen, Petri (Nokia - FI/Espoo) <
> petri.savolainen@nokia.com> wrote:
>
>>  Performance is not the main concern here, but cycle count measurement
>> accuracy. E.g. Intel rdtsc cycle counter read latency is few cycles where
>> as clock_gettime() seems to take over a thousand. System call latencies
>> also vary since those may launch other kernel services, which adds jitter
>> to cycle count measurements.
>>
>>
>>
>> Could we organize arch specific stuff better (no #ifdefs, but
>> configure/build mechanism) and continue to support e.g. rdtsc.
>>
>
> We had originally proposed an arch directory to capture architecture
> differences that should be common to all X86, ARM etc regardless of the
> platform and its specific accelerators.
> If this were at the top in odp/arch then I would imaging they would be
> reusable implementations of API  functions.
>
> I think this should be a topic for the ARCH call in 10 mins.
>
>
>>
>>
>> -Petri
>>
>>
>>
>>
>>
>> *From:* lng-odp-bounces@lists.linaro.org [mailto:
>> lng-odp-bounces@lists.linaro.org] *On Behalf Of *ext Bill Fischofer
>> *Sent:* Friday, March 06, 2015 6:42 PM
>> *To:* Mike Holmes
>> *Cc:* lng-odp
>> *Subject:* Re: [lng-odp] [PATCH] linux-generic: time: remove platform
>> specific acceleration
>>
>>
>>
>> I agree with Mike.  Now that we have an official LNG performance
>> implementation, linux-generic should focus on being a simple and clean
>> functional implementation that elaborates the intended behavior of the ODP
>> APIs without being overly concerned with performance.  That doesn't mean we
>> don't do straightforward functional optimizations that are algorithmic in
>> nature, just that there shouldn't be any platform-specific tuning paths in
>> linux-generic.
>>
>>
>>
>> On Fri, Mar 6, 2015 at 10:35 AM, Mike Holmes <mike.holmes@linaro.org>
>> wrote:
>>
>> The DPDK implementation will take the X86 optimizations, presumably
>> Cavium already have Octeon acceleration in their own ODP implementation.
>>
>>
>>
>> The linux generic implementation should be a model for the behavior of
>> ODP, it happens to run in Linux but that should not be confused with it
>> being used in any real application.
>>
>>
>>
>> If we fill it with optimizations it will be harder to maintain as a pure
>> model.
>>
>>
>>
>> Mike
>>
>>
>>
>> On 6 March 2015 at 11:26, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:
>>
>> Where should be that implementations placed if we want other platform to
>> reuse them?
>>
>> Maxim.
>>
>>
>>
>> On 03/06/15 19:22, Mike Holmes wrote:
>>
>> Linux-generic should be a reference implementation and does not need
>> archetecture specific acceleration.
>>
>> Signed-off-by: Mike Holmes <mike.holmes@linaro.org>
>> ---
>>   platform/linux-generic/odp_time.c | 47
>> +++------------------------------------
>>   1 file changed, 3 insertions(+), 44 deletions(-)
>>
>> diff --git a/platform/linux-generic/odp_time.c
>> b/platform/linux-generic/odp_time.c
>> index 31d6656..6f40361 100644
>> --- a/platform/linux-generic/odp_time.c
>> +++ b/platform/linux-generic/odp_time.c
>> @@ -6,52 +6,13 @@
>>     #define _POSIX_C_SOURCE 200809L
>>   +#include <time.h>
>>   #include <odp/time.h>
>> -#include <odp/hints.h>
>>   #include <odp/system_info.h>
>> -
>> -#define GIGA 1000000000
>> -
>> -#if defined __x86_64__ || defined __i386__
>> -
>> -uint64_t odp_time_cycles(void)
>> -{
>> -       union {
>> -               uint64_t tsc_64;
>> -               struct {
>> -                       uint32_t lo_32;
>> -                       uint32_t hi_32;
>> -               };
>> -       } tsc;
>> -
>> -       __asm__ __volatile__ ("rdtsc" :
>> -                    "=a" (tsc.lo_32),
>> -                    "=d" (tsc.hi_32) : : "memory");
>> -
>> -       return tsc.tsc_64;
>> -}
>> -
>> -
>> -#elif defined __OCTEON__
>> -
>> -uint64_t odp_time_cycles(void)
>> -{
>> -       #define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
>> -       #define CVMX_TMP_STR2(x) #x
>> -       uint64_t cycle;
>> -
>> -       __asm__ __volatile__ ("rdhwr %[rt],$" CVMX_TMP_STR(31) :
>> -                          [rt] "=d" (cycle) : : "memory");
>> -
>> -       return cycle;
>> -}
>> -
>> -#else
>> -
>> -#include <time.h>
>> -#include <stdlib.h>
>>   #include <odp_debug_internal.h>
>>   +#define GIGA 1000000000
>> +
>>   uint64_t odp_time_cycles(void)
>>   {
>>         struct timespec time;
>> @@ -74,8 +35,6 @@ uint64_t odp_time_cycles(void)
>>         return cycles;
>>   }
>>   -#endif
>> -
>>   uint64_t odp_time_diff_cycles(uint64_t t1, uint64_t t2)
>>   {
>>         if (odp_likely(t2 > t1))
>>
>>
>>
>> _______________________________________________
>> lng-odp mailing list
>> lng-odp@lists.linaro.org
>> http://lists.linaro.org/mailman/listinfo/lng-odp
>>
>>
>>
>>
>>
>> --
>>
>> Mike Holmes
>>
>> Technical Manager - Linaro Networking Group
>>
>> Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM SoCs
>>
>>
>>
>>
>> _______________________________________________
>> lng-odp mailing list
>> lng-odp@lists.linaro.org
>> http://lists.linaro.org/mailman/listinfo/lng-odp
>>
>>
>>
>
>
>
> --
> Mike Holmes
> Technical Manager - Linaro Networking Group
> Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM SoCs
>
>
>
Mike Holmes March 9, 2015, 1:30 p.m. UTC | #6
Call concluded that we generate a platfomr/linux-generic/arch for this case
because it is important to retain the low jitter / overhead characteristics
for the test case duration metrics that use this API. In general we will
avoid arch specifics in linux-generic.

This way we remove the #ifdefs in one file with configuration time switches
to either use sys calls or an arch dependent lowest overhead implementation
which is contained in its own arch/xxxx c file similar to Linux.

On 9 March 2015 at 09:03, Bill Fischofer <bill.fischofer@linaro.org> wrote:

> I'm not sure what the benefit would be for this. ODP and Linux take a
> different approach to optimization.  Linux tries to be a single
> implementation optimized for multiple platforms, hence the use of arch
> directories.  ODP, however is a single *specification* optimized across
> multiple implementations.  odp-dpdk is intended to be the optimized
> implementation for x86, just as the implementations provided by Cavium, TI,
> etc. are designed to be the optimized implementations for their respective
> platforms.  As such, linux-generic has no need for platform-specific
> optimizations, however organized.
>
> On Mon, Mar 9, 2015 at 7:50 AM, Mike Holmes <mike.holmes@linaro.org>
> wrote:
>
>>
>>
>> On 9 March 2015 at 08:45, Savolainen, Petri (Nokia - FI/Espoo) <
>> petri.savolainen@nokia.com> wrote:
>>
>>>  Performance is not the main concern here, but cycle count measurement
>>> accuracy. E.g. Intel rdtsc cycle counter read latency is few cycles where
>>> as clock_gettime() seems to take over a thousand. System call latencies
>>> also vary since those may launch other kernel services, which adds jitter
>>> to cycle count measurements.
>>>
>>>
>>>
>>> Could we organize arch specific stuff better (no #ifdefs, but
>>> configure/build mechanism) and continue to support e.g. rdtsc.
>>>
>>
>> We had originally proposed an arch directory to capture architecture
>> differences that should be common to all X86, ARM etc regardless of the
>> platform and its specific accelerators.
>> If this were at the top in odp/arch then I would imaging they would be
>> reusable implementations of API  functions.
>>
>> I think this should be a topic for the ARCH call in 10 mins.
>>
>>
>>>
>>>
>>> -Petri
>>>
>>>
>>>
>>>
>>>
>>> *From:* lng-odp-bounces@lists.linaro.org [mailto:
>>> lng-odp-bounces@lists.linaro.org] *On Behalf Of *ext Bill Fischofer
>>> *Sent:* Friday, March 06, 2015 6:42 PM
>>> *To:* Mike Holmes
>>> *Cc:* lng-odp
>>> *Subject:* Re: [lng-odp] [PATCH] linux-generic: time: remove platform
>>> specific acceleration
>>>
>>>
>>>
>>> I agree with Mike.  Now that we have an official LNG performance
>>> implementation, linux-generic should focus on being a simple and clean
>>> functional implementation that elaborates the intended behavior of the ODP
>>> APIs without being overly concerned with performance.  That doesn't mean we
>>> don't do straightforward functional optimizations that are algorithmic in
>>> nature, just that there shouldn't be any platform-specific tuning paths in
>>> linux-generic.
>>>
>>>
>>>
>>> On Fri, Mar 6, 2015 at 10:35 AM, Mike Holmes <mike.holmes@linaro.org>
>>> wrote:
>>>
>>> The DPDK implementation will take the X86 optimizations, presumably
>>> Cavium already have Octeon acceleration in their own ODP implementation.
>>>
>>>
>>>
>>> The linux generic implementation should be a model for the behavior of
>>> ODP, it happens to run in Linux but that should not be confused with it
>>> being used in any real application.
>>>
>>>
>>>
>>> If we fill it with optimizations it will be harder to maintain as a pure
>>> model.
>>>
>>>
>>>
>>> Mike
>>>
>>>
>>>
>>> On 6 March 2015 at 11:26, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:
>>>
>>> Where should be that implementations placed if we want other platform to
>>> reuse them?
>>>
>>> Maxim.
>>>
>>>
>>>
>>> On 03/06/15 19:22, Mike Holmes wrote:
>>>
>>> Linux-generic should be a reference implementation and does not need
>>> archetecture specific acceleration.
>>>
>>> Signed-off-by: Mike Holmes <mike.holmes@linaro.org>
>>> ---
>>>   platform/linux-generic/odp_time.c | 47
>>> +++------------------------------------
>>>   1 file changed, 3 insertions(+), 44 deletions(-)
>>>
>>> diff --git a/platform/linux-generic/odp_time.c
>>> b/platform/linux-generic/odp_time.c
>>> index 31d6656..6f40361 100644
>>> --- a/platform/linux-generic/odp_time.c
>>> +++ b/platform/linux-generic/odp_time.c
>>> @@ -6,52 +6,13 @@
>>>     #define _POSIX_C_SOURCE 200809L
>>>   +#include <time.h>
>>>   #include <odp/time.h>
>>> -#include <odp/hints.h>
>>>   #include <odp/system_info.h>
>>> -
>>> -#define GIGA 1000000000
>>> -
>>> -#if defined __x86_64__ || defined __i386__
>>> -
>>> -uint64_t odp_time_cycles(void)
>>> -{
>>> -       union {
>>> -               uint64_t tsc_64;
>>> -               struct {
>>> -                       uint32_t lo_32;
>>> -                       uint32_t hi_32;
>>> -               };
>>> -       } tsc;
>>> -
>>> -       __asm__ __volatile__ ("rdtsc" :
>>> -                    "=a" (tsc.lo_32),
>>> -                    "=d" (tsc.hi_32) : : "memory");
>>> -
>>> -       return tsc.tsc_64;
>>> -}
>>> -
>>> -
>>> -#elif defined __OCTEON__
>>> -
>>> -uint64_t odp_time_cycles(void)
>>> -{
>>> -       #define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
>>> -       #define CVMX_TMP_STR2(x) #x
>>> -       uint64_t cycle;
>>> -
>>> -       __asm__ __volatile__ ("rdhwr %[rt],$" CVMX_TMP_STR(31) :
>>> -                          [rt] "=d" (cycle) : : "memory");
>>> -
>>> -       return cycle;
>>> -}
>>> -
>>> -#else
>>> -
>>> -#include <time.h>
>>> -#include <stdlib.h>
>>>   #include <odp_debug_internal.h>
>>>   +#define GIGA 1000000000
>>> +
>>>   uint64_t odp_time_cycles(void)
>>>   {
>>>         struct timespec time;
>>> @@ -74,8 +35,6 @@ uint64_t odp_time_cycles(void)
>>>         return cycles;
>>>   }
>>>   -#endif
>>> -
>>>   uint64_t odp_time_diff_cycles(uint64_t t1, uint64_t t2)
>>>   {
>>>         if (odp_likely(t2 > t1))
>>>
>>>
>>>
>>> _______________________________________________
>>> lng-odp mailing list
>>> lng-odp@lists.linaro.org
>>> http://lists.linaro.org/mailman/listinfo/lng-odp
>>>
>>>
>>>
>>>
>>>
>>> --
>>>
>>> Mike Holmes
>>>
>>> Technical Manager - Linaro Networking Group
>>>
>>> Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM
>>> SoCs
>>>
>>>
>>>
>>>
>>> _______________________________________________
>>> lng-odp mailing list
>>> lng-odp@lists.linaro.org
>>> http://lists.linaro.org/mailman/listinfo/lng-odp
>>>
>>>
>>>
>>
>>
>>
>> --
>> Mike Holmes
>> Technical Manager - Linaro Networking Group
>> Linaro.org <http://www.linaro.org/> *│ *Open source software for ARM SoCs
>>
>>
>>
>
Maxim Uvarov March 10, 2015, 9:40 a.m. UTC | #7
On 03/09/15 16:30, Mike Holmes wrote:
> Call concluded that we generate a platfomr/linux-generic/arch for this 
> case because it is important to retain the low jitter / overhead 
> characteristics for the test case duration metrics that use this API. 
> In general we will avoid arch specifics in linux-generic.
>
OK.

> This way we remove the #ifdefs in one file with configuration time 
> switches to either use sys calls or an arch dependent lowest overhead 
> implementation which is contained in its own arch/xxxx c file similar 
> to Linux.
>
> On 9 March 2015 at 09:03, Bill Fischofer <bill.fischofer@linaro.org 
> <mailto:bill.fischofer@linaro.org>> wrote:
>
>     I'm not sure what the benefit would be for this. ODP and Linux
>     take a different approach to optimization.  Linux tries to be a
>     single implementation optimized for multiple platforms, hence the
>     use of arch directories.  ODP, however is a single
>     /specification/ optimized across multiple implementations.
>      odp-dpdk is intended to be the optimized implementation for x86,
>     just as the implementations provided by Cavium, TI, etc. are
>     designed to be the optimized implementations for their respective
>     platforms.  As such, linux-generic has no need for
>     platform-specific optimizations, however organized.
>
>     On Mon, Mar 9, 2015 at 7:50 AM, Mike Holmes
>     <mike.holmes@linaro.org <mailto:mike.holmes@linaro.org>> wrote:
>
>
>
>         On 9 March 2015 at 08:45, Savolainen, Petri (Nokia - FI/Espoo)
>         <petri.savolainen@nokia.com
>         <mailto:petri.savolainen@nokia.com>> wrote:
>
>             Performance is not the main concern here, but cycle count
>             measurement accuracy. E.g. Intel rdtsc cycle counter read
>             latency is few cycles where as clock_gettime() seems to
>             take over a thousand. System call latencies also vary
>             since those may launch other kernel services, which adds
>             jitter to cycle count measurements.
>
>             Could we organize arch specific stuff better (no #ifdefs,
>             but configure/build mechanism) and continue to support
>             e.g. rdtsc.
>
>
>         We had originally proposed an arch directory to capture
>         architecture differences that should be common to all X86, ARM
>         etc regardless of the platform and its specific accelerators.
>         If this were at the top in odp/arch then I would imaging they
>         would be reusable implementations of API  functions.
>
>         I think this should be a topic for the ARCH call in 10 mins.
>
>             -Petri
>
>             *From:*lng-odp-bounces@lists.linaro.org
>             <mailto:lng-odp-bounces@lists.linaro.org>
>             [mailto:lng-odp-bounces@lists.linaro.org
>             <mailto:lng-odp-bounces@lists.linaro.org>] *On Behalf Of
>             *ext Bill Fischofer
>             *Sent:* Friday, March 06, 2015 6:42 PM
>             *To:* Mike Holmes
>             *Cc:* lng-odp
>             *Subject:* Re: [lng-odp] [PATCH] linux-generic: time:
>             remove platform specific acceleration
>
>             I agree with Mike.  Now that we have an official LNG
>             performance implementation, linux-generic should focus on
>             being a simple and clean functional implementation that
>             elaborates the intended behavior of the ODP APIs without
>             being overly concerned with performance.  That doesn't
>             mean we don't do straightforward functional optimizations
>             that are algorithmic in nature, just that there shouldn't
>             be any platform-specific tuning paths in linux-generic.
>
>             On Fri, Mar 6, 2015 at 10:35 AM, Mike Holmes
>             <mike.holmes@linaro.org <mailto:mike.holmes@linaro.org>>
>             wrote:
>
>             The DPDK implementation will take the X86 optimizations,
>             presumably Cavium already have Octeon acceleration in
>             their own ODP implementation.
>
>             The linux generic implementation should be a model for the
>             behavior of ODP, it happens to run in Linux but that
>             should not be confused with it being used in any real
>             application.
>
>             If we fill it with optimizations it will be harder to
>             maintain as a pure model.
>
>             Mike
>
>             On 6 March 2015 at 11:26, Maxim Uvarov
>             <maxim.uvarov@linaro.org <mailto:maxim.uvarov@linaro.org>>
>             wrote:
>
>             Where should be that implementations placed if we want
>             other platform to reuse them?
>
>             Maxim.
>
>
>
>             On 03/06/15 19:22, Mike Holmes wrote:
>
>             Linux-generic should be a reference implementation and
>             does not need
>             archetecture specific acceleration.
>
>             Signed-off-by: Mike Holmes <mike.holmes@linaro.org
>             <mailto:mike.holmes@linaro.org>>
>             ---
>             platform/linux-generic/odp_time.c | 47
>             +++------------------------------------
>               1 file changed, 3 insertions(+), 44 deletions(-)
>
>             diff --git a/platform/linux-generic/odp_time.c
>             b/platform/linux-generic/odp_time.c
>             index 31d6656..6f40361 100644
>             --- a/platform/linux-generic/odp_time.c
>             +++ b/platform/linux-generic/odp_time.c
>             @@ -6,52 +6,13 @@
>                 #define _POSIX_C_SOURCE 200809L
>               +#include <time.h>
>               #include <odp/time.h>
>             -#include <odp/hints.h>
>               #include <odp/system_info.h>
>             -
>             -#define GIGA 1000000000
>             -
>             -#if defined __x86_64__ || defined __i386__
>             -
>             -uint64_t odp_time_cycles(void)
>             -{
>             -       union {
>             -  uint64_t tsc_64;
>             -  struct {
>             -  uint32_t lo_32;
>             -  uint32_t hi_32;
>             -  };
>             -       } tsc;
>             -
>             -  __asm__ __volatile__ ("rdtsc" :
>             -       "=a" (tsc.lo_32),
>             -       "=d" (tsc.hi_32) : : "memory");
>             -
>             -       return tsc.tsc_64;
>             -}
>             -
>             -
>             -#elif defined __OCTEON__
>             -
>             -uint64_t odp_time_cycles(void)
>             -{
>             -  #define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
>             -  #define CVMX_TMP_STR2(x) #x
>             -  uint64_t cycle;
>             -
>             -  __asm__ __volatile__ ("rdhwr %[rt],$" CVMX_TMP_STR(31) :
>             - [rt] "=d" (cycle) : : "memory");
>             -
>             -       return cycle;
>             -}
>             -
>             -#else
>             -
>             -#include <time.h>
>             -#include <stdlib.h>
>               #include <odp_debug_internal.h>
>               +#define GIGA 1000000000
>             +
>               uint64_t odp_time_cycles(void)
>               {
>                     struct timespec time;
>             @@ -74,8 +35,6 @@ uint64_t odp_time_cycles(void)
>                     return cycles;
>               }
>               -#endif
>             -
>               uint64_t odp_time_diff_cycles(uint64_t t1, uint64_t t2)
>               {
>                     if (odp_likely(t2 > t1))
>
>             _______________________________________________
>             lng-odp mailing list
>             lng-odp@lists.linaro.org <mailto:lng-odp@lists.linaro.org>
>             http://lists.linaro.org/mailman/listinfo/lng-odp
>
>
>
>             -- 
>
>             Mike Holmes
>
>             Technical Manager - Linaro Networking Group
>
>             Linaro.org <http://www.linaro.org/>***│ *Open source
>             software for ARM SoCs
>
>
>             _______________________________________________
>             lng-odp mailing list
>             lng-odp@lists.linaro.org <mailto:lng-odp@lists.linaro.org>
>             http://lists.linaro.org/mailman/listinfo/lng-odp
>
>
>
>
>         -- 
>         Mike Holmes
>         Technical Manager - Linaro Networking Group
>         Linaro.org <http://www.linaro.org/>***│ *Open source software
>         for ARM SoCs
>
>
>
>
>
> -- 
> Mike Holmes
> Technical Manager - Linaro Networking Group
> Linaro.org <http://www.linaro.org/>***│ *Open source software for ARM SoCs
>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> http://lists.linaro.org/mailman/listinfo/lng-odp
diff mbox

Patch

diff --git a/platform/linux-generic/odp_time.c b/platform/linux-generic/odp_time.c
index 31d6656..6f40361 100644
--- a/platform/linux-generic/odp_time.c
+++ b/platform/linux-generic/odp_time.c
@@ -6,52 +6,13 @@ 
 
 #define _POSIX_C_SOURCE 200809L
 
+#include <time.h>
 #include <odp/time.h>
-#include <odp/hints.h>
 #include <odp/system_info.h>
-
-#define GIGA 1000000000
-
-#if defined __x86_64__ || defined __i386__
-
-uint64_t odp_time_cycles(void)
-{
-	union {
-		uint64_t tsc_64;
-		struct {
-			uint32_t lo_32;
-			uint32_t hi_32;
-		};
-	} tsc;
-
-	__asm__ __volatile__ ("rdtsc" :
-		     "=a" (tsc.lo_32),
-		     "=d" (tsc.hi_32) : : "memory");
-
-	return tsc.tsc_64;
-}
-
-
-#elif defined __OCTEON__
-
-uint64_t odp_time_cycles(void)
-{
-	#define CVMX_TMP_STR(x) CVMX_TMP_STR2(x)
-	#define CVMX_TMP_STR2(x) #x
-	uint64_t cycle;
-
-	__asm__ __volatile__ ("rdhwr %[rt],$" CVMX_TMP_STR(31) :
-			   [rt] "=d" (cycle) : : "memory");
-
-	return cycle;
-}
-
-#else
-
-#include <time.h>
-#include <stdlib.h>
 #include <odp_debug_internal.h>
 
+#define GIGA 1000000000
+
 uint64_t odp_time_cycles(void)
 {
 	struct timespec time;
@@ -74,8 +35,6 @@  uint64_t odp_time_cycles(void)
 	return cycles;
 }
 
-#endif
-
 uint64_t odp_time_diff_cycles(uint64_t t1, uint64_t t2)
 {
 	if (odp_likely(t2 > t1))