[1/1] helpers: linux: add application defined counters

Message ID 1441105781-23174-1-git-send-email-alexandru.badicioiu@linaro.org
State New
Headers show

Commit Message

Alexandru Badicioiu Sept. 1, 2015, 11:09 a.m.
From: Alexandru Badicioiu <alexandru.badicioiu@linaro.org>

This patch provides the applications with helpers to create, update
and read counters associated with application defined objects - e.g.
IPSec security associations in a multi-threading scenario where
multiple threads use the same object.
Each worker thread must create a local version of a given counter -
e.g. IPSec SA request bytes/packets with odph_counter32/64_create_local()
function and update its local counter with odph_counter32/64_set/add/sub
functions.
A control thread can use odph_counter32/64_read_global() to retrieve
the total value of the counter by summing all local values.

Signed-off-by: Alexandru Badicioiu <alexandru.badicioiu@linaro.org>
---
 helper/include/odp/helper/linux.h |  114 +++++++++++++++++++++++++++++++++
 helper/linux.c                    |  128 +++++++++++++++++++++++++++++++++++++
 2 files changed, 242 insertions(+), 0 deletions(-)

Comments

Ola Liljedahl Sept. 8, 2015, 12:57 p.m. | #1
On 1 September 2015 at 13:09, <alexandru.badicioiu@linaro.org> wrote:

> From: Alexandru Badicioiu <alexandru.badicioiu@linaro.org>
>
> This patch provides the applications with helpers to create, update
> and read counters associated with application defined objects - e.g.
> IPSec security associations in a multi-threading scenario where
> multiple threads use the same object.
>
I think this is useful functionality. But I fear it might not really help
on targets that do not support atomic (64-bit) reads and writes.

Why ODP helpers and not proper ODP?

We also want an API for counters (statistics) that can be offloaded to HW,
some SoC's do that. It might be useful if the API could be used for both HW
counter and your per-thread counter implementations (together with an
implementation using e.g. ARM8.1 atomics).



> Each worker thread must create a local version of a given counter -
> e.g. IPSec SA request bytes/packets with odph_counter32/64_create_local()
> function and update its local counter with odph_counter32/64_set/add/sub
> functions.
> A control thread can use odph_counter32/64_read_global() to retrieve
> the total value of the counter by summing all local values.
>
> Signed-off-by: Alexandru Badicioiu <alexandru.badicioiu@linaro.org>
> ---
>  helper/include/odp/helper/linux.h |  114 +++++++++++++++++++++++++++++++++
>  helper/linux.c                    |  128
> +++++++++++++++++++++++++++++++++++++
>  2 files changed, 242 insertions(+), 0 deletions(-)
>
> diff --git a/helper/include/odp/helper/linux.h
> b/helper/include/odp/helper/linux.h
> index ce61fdf..3c7ab15 100644
> --- a/helper/include/odp/helper/linux.h
> +++ b/helper/include/odp/helper/linux.h
> @@ -31,6 +31,7 @@ extern "C" {
>  typedef struct {
>         void *(*start_routine) (void *); /**< The function to run */
>         void *arg; /**< The functions arguemnts */
> +       void **counters_tbl; /**< Counters table address */
>  } odp_start_args_t;
>
>  /** Linux pthread state information */
> @@ -40,8 +41,18 @@ typedef struct {
>         int            cpu;    /**< CPU ID */
>         /** Saved starting args for join to later free */
>         odp_start_args_t *start_args;
> +       void *counters_tbl;    /**< Counters table for this thread */
>  } odph_linux_pthread_t;
>
> +/** Thread local counter size */
> +typedef enum odph_counter_size {
> +       ODPH_COUNTER_SIZE_32BIT,
> +       ODPH_COUNTER_SIZE_64BIT,
> +} odph_counter_size_t;
> +
> +#define ODPH_COUNTER_INVALID    (-1)
> +/** Thread local counter handle */
> +typedef int32_t odph_counter_t;
>
>  /** Linux process state information */
>  typedef struct {
> @@ -77,6 +88,109 @@ int odph_linux_pthread_create(odph_linux_pthread_t
> *thread_tbl,
>   */
>  void odph_linux_pthread_join(odph_linux_pthread_t *thread_tbl, int num);
>
> +/**
> + * Creates a 32bit thread local counter
> + * Each thread in the thread table must create a
> + * counter for a shared object to which the counter
> + * is associated to - e.g. an IPSec Security Association.
> + *
> + * Returns the counter handle
> + *
> + * @return Counter handle
> + */
> +odph_counter_t odph_counter32_create_local(void);
> +
> +/**
> + * Creates a 64bit thread local counter
> + *
> + * Returns the counter handle
> + *
> + * @return Counter handle
> + */
> +odph_counter_t odph_counter64_create_local(void);
> +
> +/**
> + * Get counter size
> + *
> + * Return the size of a counter
> + *
> + * @param Counter handle
> + * @return Counter size
> + *
> + */
> +odph_counter_size_t
> +odph_counter_size(odph_counter_t counter);
> +
> +/**
> + * Set a 32bit counter to a given value
> + *
> + * @param Counter handle
> + * @param Set value
> + *
> + */
> +void odph_counter32_set(odph_counter_t counter, uint32_t val);
> +
> +/**
> + * Set a 64bit thread local counter to a given value
> + *
> + * @param Counter handle
> + * @param Set value
> + *
> + */
> +void odph_counter64_set(odph_counter_t counter, uint64_t val);
> +
> +/**
> + * Add a value to a 32bit thread local counter
> + *
> + * @param Counter handle
> + * @param Value to add
> + *
> + */
> +void odph_counter32_add(odph_counter_t counter, unsigned int val);
>
uint32_t val?


> +
> +/**
> + * Add a value to a 64bit thread local counter
> + *
> + * @param Counter handle
> + * @param Value to add
> + *
> + */
> +void odph_counter64_add(odph_counter_t counter, unsigned int val);
>
uint64_t val?


> +
> +/**
> + * Subtract a value from a 32bit thread local counter
> + *
> + * @param Counter handle
> + * @param Value to substract
> + *
> + */
> +void odph_counter32_sub(odph_counter_t counter, unsigned int val);
> +
> +/**
> + * Subtract a value from a 64bit thread local counter
> + *
> + * @param Counter handle
> + * @param Value to substract
> + *
> + */
> +void odph_counter64_sub(odph_counter_t counter, unsigned int val);
> +
> +/**
> + * Return global value of a counter by summing thread local values
> + * Caller has to make sure that all threads created the local
> + * counter prior calling this function.
> + *
> + * @param Thread array
> + * @param Number of threads in the array
> + * @param Counter handle
> + *
> + * @return Counter value
> + *
> + */
> +uint64_t
> +odph_counter_read_global(odph_linux_pthread_t *thread_tbl, int num,
> +                        odph_counter_t counter);
> +
>
>  /**
>   * Fork a process
> diff --git a/helper/linux.c b/helper/linux.c
> index 3d3b6b8..1953d75 100644
> --- a/helper/linux.c
> +++ b/helper/linux.c
> @@ -22,16 +22,45 @@
>  #include <odp/system_info.h>
>  #include "odph_debug.h"
>
> +#define MAX_COUNTERS_TBL_SIZE  64
> +
> +typedef union odph_counters {
> +       uint32_t __u32[MAX_COUNTERS_TBL_SIZE];
> +       uint64_t __u64[MAX_COUNTERS_TBL_SIZE / 2];
> +} odph_counters_t;
> +
> +typedef union odph_counter_bits {
> +       odph_counter_t handle;
> +       union {
> +               int32_t s32;
> +               struct {
> +                       uint16_t index;
> +                       uint16_t size;
>
Perhaps use only 1 bit for the size; 0 => 32 bits, 1 => 64 bits?


> +               };
> +       };
> +} odph_counter_bits_t;
> +
> +/* Thread local array for local counters storage */
> +static __thread odph_counters_t counters_tbl;
> +
> +/* Table index (32bit) where next counter is allocated */
> +static __thread uint16_t next_idx;
> +
>  static void *odp_run_start_routine(void *arg)
>  {
>         odp_start_args_t *start_args = arg;
>
> +       void **_counters_tbl;
>         /* ODP thread local init */
>         if (odp_init_local(ODP_THREAD_WORKER)) {
>                 ODPH_ERR("Local init failed\n");
>                 return NULL;
>         }
>
> +       /* Store thread local counters table address */
> +       _counters_tbl = start_args->counters_tbl;
> +       *_counters_tbl = &counters_tbl;
> +
>         void *ret_ptr = start_args->start_routine(start_args->arg);
>         int ret = odp_term_local();
>         if (ret < 0)
> @@ -87,6 +116,8 @@ int odph_linux_pthread_create(odph_linux_pthread_t
> *thread_tbl,
>
>                 thread_tbl[i].start_args->start_routine = start_routine;
>                 thread_tbl[i].start_args->arg           = arg;
> +               thread_tbl[i].start_args->counters_tbl  =
> +                                           &thread_tbl[i].counters_tbl;
>
>                 ret = pthread_create(&thread_tbl[i].thread,
> &thread_tbl[i].attr,
>                                odp_run_start_routine,
> thread_tbl[i].start_args);
> @@ -120,6 +151,103 @@ void odph_linux_pthread_join(odph_linux_pthread_t
> *thread_tbl, int num)
>         }
>  }
>
> +odph_counter_t odph_counter32_create_local(void)
> +{
> +       odph_counter_bits_t handle;
> +       odph_counter_t ret = ODPH_COUNTER_INVALID;
> +
> +       if (next_idx >= MAX_COUNTERS_TBL_SIZE)
> +               return ret;
> +       handle.index = next_idx;
> +       next_idx += 1;
> +       handle.size = ODPH_COUNTER_SIZE_32BIT;
> +       return handle.handle;
> +}
> +
> +odph_counter_t odph_counter64_create_local(void)
> +{
> +       odph_counter_bits_t handle;
> +       odph_counter_t ret = ODPH_COUNTER_INVALID;
> +
> +       if (next_idx >= MAX_COUNTERS_TBL_SIZE)
> +               return ret;
> +       handle.index = next_idx;
> +       next_idx += 2;
> +       handle.size = ODPH_COUNTER_SIZE_64BIT;
> +       return handle.handle;
> +}
> +
> +odph_counter_size_t odph_counter_size(odph_counter_t counter)
> +{
> +       return ((odph_counter_bits_t)counter).size;
> +}
> +
> +inline void odph_counter32_set(odph_counter_t counter, uint32_t val)
> +{
> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
> +
> +       counters_tbl.__u32[idx] = val;
> +}
> +
> +inline void odph_counter64_set(odph_counter_t counter, uint64_t val)
> +{
> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
> +
> +       counters_tbl.__u64[idx] = val;
>
I think this operation must be atomic. Otherwise a reader in another thread
might see a torn write (one word old and one word new).


> +}
> +
> +inline void odph_counter32_add(odph_counter_t counter, unsigned int val)
> +{
> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
> +
> +       counters_tbl.__u32[idx] += val;
> +}
> +
> +inline void odph_counter64_add(odph_counter_t counter, unsigned int val)
> +{
> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
> +
> +       counters_tbl.__u64[idx] += val;
>
Again a need for an atomic update (the store itself, not the whole add
operation).

+}
> +
> +inline void odph_counter32_sub(odph_counter_t counter, unsigned int val)
> +{
> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
> +
> +       counters_tbl.__u32[idx] -= val;
> +}
> +
> +inline void odph_counter64_sub(odph_counter_t counter, unsigned int val)
> +{
> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
> +
> +       counters_tbl.__u64[idx] -= val;
>
Ditto.


> +}
> +
> +uint64_t
> +odph_counter_read_global(odph_linux_pthread_t *thread_tbl, int num,
> +                        odph_counter_t counter)
> +{
> +       uint64_t val = 0;
> +       uint32_t *ptr32;
> +       uint64_t *ptr64;
> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
> +       int i;
> +
> +       for (i = 0; i < num; i++) {
> +               odph_counters_t *counters_tbl = thread_tbl[i].counters_tbl;
> +
> +               switch (odph_counter_size(counter)) {
> +               case ODPH_COUNTER_SIZE_32BIT:
> +                       val += counters_tbl->__u32[idx];
> +                       break;
> +               case ODPH_COUNTER_SIZE_64BIT:
> +                       val += counters_tbl->__u64[idx];
>
Need atomic read here.
Perhaps this can be emulated by an *ordered* sequence of 32-bit reads (use
data dependencies):
1) Read MSW
2) Read LSW
3) Re-read MSW
4) Check first and second MSW, if different re-start loop.
5) Return MSW << 32 | LSW.
The 64-bit counter must be updated (written to) in a well-defined order
(write LSW then MSW), can this be enforced?


> +                       break;
> +               }
> +       }
> +       return val;
> +}
>
>  int odph_linux_process_fork_n(odph_linux_process_t *proc_tbl,
>                               const odp_cpumask_t *mask_in)
> --
> 1.7.3.4
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> https://lists.linaro.org/mailman/listinfo/lng-odp
>
Alexandru Badicioiu Sept. 8, 2015, 1:29 p.m. | #2
On 8 September 2015 at 15:57, Ola Liljedahl <ola.liljedahl@linaro.org>
wrote:

> On 1 September 2015 at 13:09, <alexandru.badicioiu@linaro.org> wrote:
>
>> From: Alexandru Badicioiu <alexandru.badicioiu@linaro.org>
>>
>> This patch provides the applications with helpers to create, update
>> and read counters associated with application defined objects - e.g.
>> IPSec security associations in a multi-threading scenario where
>> multiple threads use the same object.
>>
> I think this is useful functionality. But I fear it might not really help
> on targets that do not support atomic (64-bit) reads and writes.
>
[Alex] Those platforms are likely 32-bit platforms and they can use 32-bit
counters. These counters can be useful for debugging purposes, rather than
providing statistics over longer period of time.

>
> Why ODP helpers and not proper ODP?
>
[Alex] By application defined counters I meant software counters. These
likely won't have different implementations on different platforms.

>
> We also want an API for counters (statistics) that can be offloaded to HW,
> some SoC's do that. It might be useful if the API could be used for both HW
> counter and your per-thread counter implementations (together with an
> implementation using e.g. ARM8.1 atomics).
> [Alex] Pktio counters is this kind of API. Others may be proposed too but
> this requires agreement between platforms.
>
>
>> Each worker thread must create a local version of a given counter -
>> e.g. IPSec SA request bytes/packets with odph_counter32/64_create_local()
>> function and update its local counter with odph_counter32/64_set/add/sub
>> functions.
>> A control thread can use odph_counter32/64_read_global() to retrieve
>> the total value of the counter by summing all local values.
>>
>> Signed-off-by: Alexandru Badicioiu <alexandru.badicioiu@linaro.org>
>> ---
>>  helper/include/odp/helper/linux.h |  114
>> +++++++++++++++++++++++++++++++++
>>  helper/linux.c                    |  128
>> +++++++++++++++++++++++++++++++++++++
>>  2 files changed, 242 insertions(+), 0 deletions(-)
>>
>> diff --git a/helper/include/odp/helper/linux.h
>> b/helper/include/odp/helper/linux.h
>> index ce61fdf..3c7ab15 100644
>> --- a/helper/include/odp/helper/linux.h
>> +++ b/helper/include/odp/helper/linux.h
>> @@ -31,6 +31,7 @@ extern "C" {
>>  typedef struct {
>>         void *(*start_routine) (void *); /**< The function to run */
>>         void *arg; /**< The functions arguemnts */
>> +       void **counters_tbl; /**< Counters table address */
>>  } odp_start_args_t;
>>
>>  /** Linux pthread state information */
>> @@ -40,8 +41,18 @@ typedef struct {
>>         int            cpu;    /**< CPU ID */
>>         /** Saved starting args for join to later free */
>>         odp_start_args_t *start_args;
>> +       void *counters_tbl;    /**< Counters table for this thread */
>>  } odph_linux_pthread_t;
>>
>> +/** Thread local counter size */
>> +typedef enum odph_counter_size {
>> +       ODPH_COUNTER_SIZE_32BIT,
>> +       ODPH_COUNTER_SIZE_64BIT,
>> +} odph_counter_size_t;
>> +
>> +#define ODPH_COUNTER_INVALID    (-1)
>> +/** Thread local counter handle */
>> +typedef int32_t odph_counter_t;
>>
>>  /** Linux process state information */
>>  typedef struct {
>> @@ -77,6 +88,109 @@ int odph_linux_pthread_create(odph_linux_pthread_t
>> *thread_tbl,
>>   */
>>  void odph_linux_pthread_join(odph_linux_pthread_t *thread_tbl, int num);
>>
>> +/**
>> + * Creates a 32bit thread local counter
>> + * Each thread in the thread table must create a
>> + * counter for a shared object to which the counter
>> + * is associated to - e.g. an IPSec Security Association.
>> + *
>> + * Returns the counter handle
>> + *
>> + * @return Counter handle
>> + */
>> +odph_counter_t odph_counter32_create_local(void);
>> +
>> +/**
>> + * Creates a 64bit thread local counter
>> + *
>> + * Returns the counter handle
>> + *
>> + * @return Counter handle
>> + */
>> +odph_counter_t odph_counter64_create_local(void);
>> +
>> +/**
>> + * Get counter size
>> + *
>> + * Return the size of a counter
>> + *
>> + * @param Counter handle
>> + * @return Counter size
>> + *
>> + */
>> +odph_counter_size_t
>> +odph_counter_size(odph_counter_t counter);
>> +
>> +/**
>> + * Set a 32bit counter to a given value
>> + *
>> + * @param Counter handle
>> + * @param Set value
>> + *
>> + */
>> +void odph_counter32_set(odph_counter_t counter, uint32_t val);
>> +
>> +/**
>> + * Set a 64bit thread local counter to a given value
>> + *
>> + * @param Counter handle
>> + * @param Set value
>> + *
>> + */
>> +void odph_counter64_set(odph_counter_t counter, uint64_t val);
>> +
>> +/**
>> + * Add a value to a 32bit thread local counter
>> + *
>> + * @param Counter handle
>> + * @param Value to add
>> + *
>> + */
>> +void odph_counter32_add(odph_counter_t counter, unsigned int val);
>>
> uint32_t val?
>
>
>> +
>> +/**
>> + * Add a value to a 64bit thread local counter
>> + *
>> + * @param Counter handle
>> + * @param Value to add
>> + *
>> + */
>> +void odph_counter64_add(odph_counter_t counter, unsigned int val);
>>
> uint64_t val?
>
>
>> +
>> +/**
>> + * Subtract a value from a 32bit thread local counter
>> + *
>> + * @param Counter handle
>> + * @param Value to substract
>> + *
>> + */
>> +void odph_counter32_sub(odph_counter_t counter, unsigned int val);
>> +
>> +/**
>> + * Subtract a value from a 64bit thread local counter
>> + *
>> + * @param Counter handle
>> + * @param Value to substract
>> + *
>> + */
>> +void odph_counter64_sub(odph_counter_t counter, unsigned int val);
>> +
>> +/**
>> + * Return global value of a counter by summing thread local values
>> + * Caller has to make sure that all threads created the local
>> + * counter prior calling this function.
>> + *
>> + * @param Thread array
>> + * @param Number of threads in the array
>> + * @param Counter handle
>> + *
>> + * @return Counter value
>> + *
>> + */
>> +uint64_t
>> +odph_counter_read_global(odph_linux_pthread_t *thread_tbl, int num,
>> +                        odph_counter_t counter);
>> +
>>
>>  /**
>>   * Fork a process
>> diff --git a/helper/linux.c b/helper/linux.c
>> index 3d3b6b8..1953d75 100644
>> --- a/helper/linux.c
>> +++ b/helper/linux.c
>> @@ -22,16 +22,45 @@
>>  #include <odp/system_info.h>
>>  #include "odph_debug.h"
>>
>> +#define MAX_COUNTERS_TBL_SIZE  64
>> +
>> +typedef union odph_counters {
>> +       uint32_t __u32[MAX_COUNTERS_TBL_SIZE];
>> +       uint64_t __u64[MAX_COUNTERS_TBL_SIZE / 2];
>> +} odph_counters_t;
>> +
>> +typedef union odph_counter_bits {
>> +       odph_counter_t handle;
>> +       union {
>> +               int32_t s32;
>> +               struct {
>> +                       uint16_t index;
>> +                       uint16_t size;
>>
> Perhaps use only 1 bit for the size; 0 => 32 bits, 1 => 64 bits?
>
>
>> +               };
>> +       };
>> +} odph_counter_bits_t;
>> +
>> +/* Thread local array for local counters storage */
>> +static __thread odph_counters_t counters_tbl;
>> +
>> +/* Table index (32bit) where next counter is allocated */
>> +static __thread uint16_t next_idx;
>> +
>>  static void *odp_run_start_routine(void *arg)
>>  {
>>         odp_start_args_t *start_args = arg;
>>
>> +       void **_counters_tbl;
>>         /* ODP thread local init */
>>         if (odp_init_local(ODP_THREAD_WORKER)) {
>>                 ODPH_ERR("Local init failed\n");
>>                 return NULL;
>>         }
>>
>> +       /* Store thread local counters table address */
>> +       _counters_tbl = start_args->counters_tbl;
>> +       *_counters_tbl = &counters_tbl;
>> +
>>         void *ret_ptr = start_args->start_routine(start_args->arg);
>>         int ret = odp_term_local();
>>         if (ret < 0)
>> @@ -87,6 +116,8 @@ int odph_linux_pthread_create(odph_linux_pthread_t
>> *thread_tbl,
>>
>>                 thread_tbl[i].start_args->start_routine = start_routine;
>>                 thread_tbl[i].start_args->arg           = arg;
>> +               thread_tbl[i].start_args->counters_tbl  =
>> +                                           &thread_tbl[i].counters_tbl;
>>
>>                 ret = pthread_create(&thread_tbl[i].thread,
>> &thread_tbl[i].attr,
>>                                odp_run_start_routine,
>> thread_tbl[i].start_args);
>> @@ -120,6 +151,103 @@ void odph_linux_pthread_join(odph_linux_pthread_t
>> *thread_tbl, int num)
>>         }
>>  }
>>
>> +odph_counter_t odph_counter32_create_local(void)
>> +{
>> +       odph_counter_bits_t handle;
>> +       odph_counter_t ret = ODPH_COUNTER_INVALID;
>> +
>> +       if (next_idx >= MAX_COUNTERS_TBL_SIZE)
>> +               return ret;
>> +       handle.index = next_idx;
>> +       next_idx += 1;
>> +       handle.size = ODPH_COUNTER_SIZE_32BIT;
>> +       return handle.handle;
>> +}
>> +
>> +odph_counter_t odph_counter64_create_local(void)
>> +{
>> +       odph_counter_bits_t handle;
>> +       odph_counter_t ret = ODPH_COUNTER_INVALID;
>> +
>> +       if (next_idx >= MAX_COUNTERS_TBL_SIZE)
>> +               return ret;
>> +       handle.index = next_idx;
>> +       next_idx += 2;
>> +       handle.size = ODPH_COUNTER_SIZE_64BIT;
>> +       return handle.handle;
>> +}
>> +
>> +odph_counter_size_t odph_counter_size(odph_counter_t counter)
>> +{
>> +       return ((odph_counter_bits_t)counter).size;
>> +}
>> +
>> +inline void odph_counter32_set(odph_counter_t counter, uint32_t val)
>> +{
>> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>> +
>> +       counters_tbl.__u32[idx] = val;
>> +}
>> +
>> +inline void odph_counter64_set(odph_counter_t counter, uint64_t val)
>> +{
>> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>> +
>> +       counters_tbl.__u64[idx] = val;
>>
> I think this operation must be atomic. Otherwise a reader in another
> thread might see a torn write (one word old and one word new).
>  [Alex] At the beginning I thought the same way - to use atomics but
> there's a performance penalty that comes with them. Read operation is
> usually performed in a loop and eventually a control thread will read the
> correct value when a given counter stops incrementing for an amount of
> time, but otherwise it can see wrong values. Atomics would be required for
> example when IPSec SA expire due to volume (packets/bytes).
>



> +}
>> +
>> +inline void odph_counter32_add(odph_counter_t counter, unsigned int val)
>> +{
>> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>> +
>> +       counters_tbl.__u32[idx] += val;
>> +}
>> +
>> +inline void odph_counter64_add(odph_counter_t counter, unsigned int val)
>> +{
>> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>> +
>> +       counters_tbl.__u64[idx] += val;
>>
> Again a need for an atomic update (the store itself, not the whole add
> operation).
>
> +}
>> +
>> +inline void odph_counter32_sub(odph_counter_t counter, unsigned int val)
>> +{
>> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>> +
>> +       counters_tbl.__u32[idx] -= val;
>> +}
>> +
>> +inline void odph_counter64_sub(odph_counter_t counter, unsigned int val)
>> +{
>> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>> +
>> +       counters_tbl.__u64[idx] -= val;
>>
> Ditto.
>
>
>> +}
>> +
>> +uint64_t
>> +odph_counter_read_global(odph_linux_pthread_t *thread_tbl, int num,
>> +                        odph_counter_t counter)
>> +{
>> +       uint64_t val = 0;
>> +       uint32_t *ptr32;
>> +       uint64_t *ptr64;
>> +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>> +       int i;
>> +
>> +       for (i = 0; i < num; i++) {
>> +               odph_counters_t *counters_tbl =
>> thread_tbl[i].counters_tbl;
>> +
>> +               switch (odph_counter_size(counter)) {
>> +               case ODPH_COUNTER_SIZE_32BIT:
>> +                       val += counters_tbl->__u32[idx];
>> +                       break;
>> +               case ODPH_COUNTER_SIZE_64BIT:
>> +                       val += counters_tbl->__u64[idx];
>>
> Need atomic read here.
> Perhaps this can be emulated by an *ordered* sequence of 32-bit reads (use
> data dependencies):
> 1) Read MSW
> 2) Read LSW
> 3) Re-read MSW
> 4) Check first and second MSW, if different re-start loop.
> 5) Return MSW << 32 | LSW.
> The 64-bit counter must be updated (written to) in a well-defined order
> (write LSW then MSW), can this be enforced?
>
[Alex]

>
>
>> +                       break;
>> +               }
>> +       }
>> +       return val;
>> +}
>>
>>  int odph_linux_process_fork_n(odph_linux_process_t *proc_tbl,
>>                               const odp_cpumask_t *mask_in)
>> --
>> 1.7.3.4
>>
>> _______________________________________________
>> lng-odp mailing list
>> lng-odp@lists.linaro.org
>> https://lists.linaro.org/mailman/listinfo/lng-odp
>>
>
>
Maxim Uvarov Oct. 15, 2015, 6:53 p.m. | #3
On 09/08/2015 16:29, Alexandru Badicioiu wrote:
>
> On 8 September 2015 at 15:57, Ola Liljedahl <ola.liljedahl@linaro.org 
> <mailto:ola.liljedahl@linaro.org>> wrote:
>
>     On 1 September 2015 at 13:09, <alexandru.badicioiu@linaro.org
>     <mailto:alexandru.badicioiu@linaro.org>> wrote:
>
>         From: Alexandru Badicioiu <alexandru.badicioiu@linaro.org
>         <mailto:alexandru.badicioiu@linaro.org>>
>
>         This patch provides the applications with helpers to create,
>         update
>         and read counters associated with application defined objects
>         - e.g.
>         IPSec security associations in a multi-threading scenario where
>         multiple threads use the same object.
>
>     I think this is useful functionality. But I fear it might not
>     really help on targets that do not support atomic (64-bit) reads
>     and writes.
>
> [Alex] Those platforms are likely 32-bit platforms and they can use 
> 32-bit counters. These counters can be useful for debugging purposes, 
> rather than providing statistics over longer period of time.
>
>
>     Why ODP helpers and not proper ODP?
>
> [Alex] By application defined counters I meant software counters. 
> These likely won't have different implementations on different platforms.
>
>
>     We also want an API for counters (statistics) that can be
>     offloaded to HW, some SoC's do that. It might be useful if the API
>     could be used for both HW counter and your per-thread counter
>     implementations (together with an implementation using e.g. ARM8.1
>     atomics).
>     [Alex] Pktio counters is this kind of API. Others may be proposed
>     too but this requires agreement between platforms.
>

Yes, agree. For pktio it's needed to define exact counter size returned 
to app + it's more related to pktio hw, so there should be it's own 
pktio_cnt_add/sub().

Maxim.

>         Each worker thread must create a local version of a given
>         counter -
>         e.g. IPSec SA request bytes/packets with
>         odph_counter32/64_create_local()
>         function and update its local counter with
>         odph_counter32/64_set/add/sub
>         functions.
>         A control thread can use odph_counter32/64_read_global() to
>         retrieve
>         the total value of the counter by summing all local values.
>
>         Signed-off-by: Alexandru Badicioiu
>         <alexandru.badicioiu@linaro.org
>         <mailto:alexandru.badicioiu@linaro.org>>
>         ---
>          helper/include/odp/helper/linux.h |  114
>         +++++++++++++++++++++++++++++++++
>          helper/linux.c                    |  128
>         +++++++++++++++++++++++++++++++++++++
>          2 files changed, 242 insertions(+), 0 deletions(-)
>
>         diff --git a/helper/include/odp/helper/linux.h
>         b/helper/include/odp/helper/linux.h
>         index ce61fdf..3c7ab15 100644
>         --- a/helper/include/odp/helper/linux.h
>         +++ b/helper/include/odp/helper/linux.h
>         @@ -31,6 +31,7 @@ extern "C" {
>          typedef struct {
>                 void *(*start_routine) (void *); /**< The function to
>         run */
>                 void *arg; /**< The functions arguemnts */
>         +       void **counters_tbl; /**< Counters table address */
>          } odp_start_args_t;
>
>          /** Linux pthread state information */
>         @@ -40,8 +41,18 @@ typedef struct {
>                 int            cpu;    /**< CPU ID */
>                 /** Saved starting args for join to later free */
>                 odp_start_args_t *start_args;
>         +       void *counters_tbl;    /**< Counters table for this
>         thread */
>          } odph_linux_pthread_t;
>
>         +/** Thread local counter size */
>         +typedef enum odph_counter_size {
>         +       ODPH_COUNTER_SIZE_32BIT,
>         +       ODPH_COUNTER_SIZE_64BIT,
>         +} odph_counter_size_t;
>         +
>         +#define ODPH_COUNTER_INVALID    (-1)
>         +/** Thread local counter handle */
>         +typedef int32_t odph_counter_t;
>
>          /** Linux process state information */
>          typedef struct {
>         @@ -77,6 +88,109 @@ int
>         odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
>           */
>          void odph_linux_pthread_join(odph_linux_pthread_t
>         *thread_tbl, int num);
>
>         +/**
>         + * Creates a 32bit thread local counter
>         + * Each thread in the thread table must create a
>         + * counter for a shared object to which the counter
>         + * is associated to - e.g. an IPSec Security Association.
>         + *
>         + * Returns the counter handle
>         + *
>         + * @return Counter handle
>         + */
>         +odph_counter_t odph_counter32_create_local(void);
>         +
>         +/**
>         + * Creates a 64bit thread local counter
>         + *
>         + * Returns the counter handle
>         + *
>         + * @return Counter handle
>         + */
>         +odph_counter_t odph_counter64_create_local(void);
>         +
>         +/**
>         + * Get counter size
>         + *
>         + * Return the size of a counter
>         + *
>         + * @param Counter handle
>         + * @return Counter size
>         + *
>         + */
>         +odph_counter_size_t
>         +odph_counter_size(odph_counter_t counter);
>         +
>         +/**
>         + * Set a 32bit counter to a given value
>         + *
>         + * @param Counter handle
>         + * @param Set value
>         + *
>         + */
>         +void odph_counter32_set(odph_counter_t counter, uint32_t val);
>         +
>         +/**
>         + * Set a 64bit thread local counter to a given value
>         + *
>         + * @param Counter handle
>         + * @param Set value
>         + *
>         + */
>         +void odph_counter64_set(odph_counter_t counter, uint64_t val);
>         +
>         +/**
>         + * Add a value to a 32bit thread local counter
>         + *
>         + * @param Counter handle
>         + * @param Value to add
>         + *
>         + */
>         +void odph_counter32_add(odph_counter_t counter, unsigned int
>         val);
>
>     uint32_t val?
>
>         +
>         +/**
>         + * Add a value to a 64bit thread local counter
>         + *
>         + * @param Counter handle
>         + * @param Value to add
>         + *
>         + */
>         +void odph_counter64_add(odph_counter_t counter, unsigned int
>         val);
>
>     uint64_t val?
>
>         +
>         +/**
>         + * Subtract a value from a 32bit thread local counter
>         + *
>         + * @param Counter handle
>         + * @param Value to substract
>         + *
>         + */
>         +void odph_counter32_sub(odph_counter_t counter, unsigned int
>         val);
>         +
>         +/**
>         + * Subtract a value from a 64bit thread local counter
>         + *
>         + * @param Counter handle
>         + * @param Value to substract
>         + *
>         + */
>         +void odph_counter64_sub(odph_counter_t counter, unsigned int
>         val);
>         +
>         +/**
>         + * Return global value of a counter by summing thread local
>         values
>         + * Caller has to make sure that all threads created the local
>         + * counter prior calling this function.
>         + *
>         + * @param Thread array
>         + * @param Number of threads in the array
>         + * @param Counter handle
>         + *
>         + * @return Counter value
>         + *
>         + */
>         +uint64_t
>         +odph_counter_read_global(odph_linux_pthread_t *thread_tbl,
>         int num,
>         +                        odph_counter_t counter);
>         +
>
>          /**
>           * Fork a process
>         diff --git a/helper/linux.c b/helper/linux.c
>         index 3d3b6b8..1953d75 100644
>         --- a/helper/linux.c
>         +++ b/helper/linux.c
>         @@ -22,16 +22,45 @@
>          #include <odp/system_info.h>
>          #include "odph_debug.h"
>
>         +#define MAX_COUNTERS_TBL_SIZE  64
>         +
>         +typedef union odph_counters {
>         +       uint32_t __u32[MAX_COUNTERS_TBL_SIZE];
>         +       uint64_t __u64[MAX_COUNTERS_TBL_SIZE / 2];
>         +} odph_counters_t;
>         +
>         +typedef union odph_counter_bits {
>         +       odph_counter_t handle;
>         +       union {
>         +               int32_t s32;
>         +               struct {
>         +                       uint16_t index;
>         +                       uint16_t size;
>
>     Perhaps use only 1 bit for the size; 0 => 32 bits, 1 => 64 bits?
>
>         +               };
>         +       };
>         +} odph_counter_bits_t;
>         +
>         +/* Thread local array for local counters storage */
>         +static __thread odph_counters_t counters_tbl;
>         +
>         +/* Table index (32bit) where next counter is allocated */
>         +static __thread uint16_t next_idx;
>         +
>          static void *odp_run_start_routine(void *arg)
>          {
>                 odp_start_args_t *start_args = arg;
>
>         +       void **_counters_tbl;
>                 /* ODP thread local init */
>                 if (odp_init_local(ODP_THREAD_WORKER)) {
>                         ODPH_ERR("Local init failed\n");
>                         return NULL;
>                 }
>
>         +       /* Store thread local counters table address */
>         +       _counters_tbl = start_args->counters_tbl;
>         +       *_counters_tbl = &counters_tbl;
>         +
>                 void *ret_ptr =
>         start_args->start_routine(start_args->arg);
>                 int ret = odp_term_local();
>                 if (ret < 0)
>         @@ -87,6 +116,8 @@ int
>         odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
>
>         thread_tbl[i].start_args->start_routine = start_routine;
>         thread_tbl[i].start_args->arg           = arg;
>         +  thread_tbl[i].start_args->counters_tbl  =
>         +  &thread_tbl[i].counters_tbl;
>
>                         ret = pthread_create(&thread_tbl[i].thread,
>         &thread_tbl[i].attr,
>          odp_run_start_routine, thread_tbl[i].start_args);
>         @@ -120,6 +151,103 @@ void
>         odph_linux_pthread_join(odph_linux_pthread_t *thread_tbl, int num)
>                 }
>          }
>
>         +odph_counter_t odph_counter32_create_local(void)
>         +{
>         +       odph_counter_bits_t handle;
>         +       odph_counter_t ret = ODPH_COUNTER_INVALID;
>         +
>         +       if (next_idx >= MAX_COUNTERS_TBL_SIZE)
>         +               return ret;
>         +       handle.index = next_idx;
>         +       next_idx += 1;
>         +       handle.size = ODPH_COUNTER_SIZE_32BIT;
>         +       return handle.handle;
>         +}
>         +
>         +odph_counter_t odph_counter64_create_local(void)
>         +{
>         +       odph_counter_bits_t handle;
>         +       odph_counter_t ret = ODPH_COUNTER_INVALID;
>         +
>         +       if (next_idx >= MAX_COUNTERS_TBL_SIZE)
>         +               return ret;
>         +       handle.index = next_idx;
>         +       next_idx += 2;
>         +       handle.size = ODPH_COUNTER_SIZE_64BIT;
>         +       return handle.handle;
>         +}
>         +
>         +odph_counter_size_t odph_counter_size(odph_counter_t counter)
>         +{
>         +       return ((odph_counter_bits_t)counter).size;
>         +}
>         +
>         +inline void odph_counter32_set(odph_counter_t counter,
>         uint32_t val)
>         +{
>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>         +
>         +       counters_tbl.__u32[idx] = val;
>         +}
>         +
>         +inline void odph_counter64_set(odph_counter_t counter,
>         uint64_t val)
>         +{
>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>         +
>         +       counters_tbl.__u64[idx] = val;
>
>     I think this operation must be atomic. Otherwise a reader in
>     another thread might see a torn write (one word old and one word new).
>      [Alex] At the beginning I thought the same way - to use atomics
>     but there's a performance penalty that comes with them. Read
>     operation is usually performed in a loop and eventually a control
>     thread will read the correct value when a given counter stops
>     incrementing for an amount of time, but otherwise it can see wrong
>     values. Atomics would be required for example when IPSec SA expire
>     due to volume (packets/bytes).
>
>         +}
>         +
>         +inline void odph_counter32_add(odph_counter_t counter,
>         unsigned int val)
>         +{
>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>         +
>         +       counters_tbl.__u32[idx] += val;
>         +}
>         +
>         +inline void odph_counter64_add(odph_counter_t counter,
>         unsigned int val)
>         +{
>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>         +
>         +       counters_tbl.__u64[idx] += val;
>
>     Again a need for an atomic update (the store itself, not the whole
>     add operation).
>
>         +}
>         +
>         +inline void odph_counter32_sub(odph_counter_t counter,
>         unsigned int val)
>         +{
>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>         +
>         +       counters_tbl.__u32[idx] -= val;
>         +}
>         +
>         +inline void odph_counter64_sub(odph_counter_t counter,
>         unsigned int val)
>         +{
>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>         +
>         +       counters_tbl.__u64[idx] -= val;
>
>     Ditto.
>
>         +}
>         +
>         +uint64_t
>         +odph_counter_read_global(odph_linux_pthread_t *thread_tbl,
>         int num,
>         +                        odph_counter_t counter)
>         +{
>         +       uint64_t val = 0;
>         +       uint32_t *ptr32;
>         +       uint64_t *ptr64;
>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>         +       int i;
>         +
>         +       for (i = 0; i < num; i++) {
>         +               odph_counters_t *counters_tbl =
>         thread_tbl[i].counters_tbl;
>         +
>         +               switch (odph_counter_size(counter)) {
>         +               case ODPH_COUNTER_SIZE_32BIT:
>         +                       val += counters_tbl->__u32[idx];
>         +                       break;
>         +               case ODPH_COUNTER_SIZE_64BIT:
>         +                       val += counters_tbl->__u64[idx];
>
>     Need atomic read here.
>     Perhaps this can be emulated by an *ordered* sequence of 32-bit
>     reads (use data dependencies):
>     1) Read MSW
>     2) Read LSW
>     3) Re-read MSW
>     4) Check first and second MSW, if different re-start loop.
>     5) Return MSW << 32 | LSW.
>     The 64-bit counter must be updated (written to) in a well-defined
>     order (write LSW then MSW), can this be enforced?
>
> [Alex]
>
>
>         +                       break;
>         +               }
>         +       }
>         +       return val;
>         +}
>
>          int odph_linux_process_fork_n(odph_linux_process_t *proc_tbl,
>                                       const odp_cpumask_t *mask_in)
>         --
>         1.7.3.4
>
>         _______________________________________________
>         lng-odp mailing list
>         lng-odp@lists.linaro.org <mailto:lng-odp@lists.linaro.org>
>         https://lists.linaro.org/mailman/listinfo/lng-odp
>
>
>
>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> https://lists.linaro.org/mailman/listinfo/lng-odp
Ola Liljedahl Oct. 15, 2015, 7:48 p.m. | #4
On 15 October 2015 at 14:53, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

>
>
> On 09/08/2015 16:29, Alexandru Badicioiu wrote:
>
>>
>> On 8 September 2015 at 15:57, Ola Liljedahl <ola.liljedahl@linaro.org
>> <mailto:ola.liljedahl@linaro.org>> wrote:
>>
>>     On 1 September 2015 at 13:09, <alexandru.badicioiu@linaro.org
>>     <mailto:alexandru.badicioiu@linaro.org>> wrote:
>>
>>         From: Alexandru Badicioiu <alexandru.badicioiu@linaro.org
>>         <mailto:alexandru.badicioiu@linaro.org>>
>>
>>         This patch provides the applications with helpers to create,
>>         update
>>         and read counters associated with application defined objects
>>         - e.g.
>>         IPSec security associations in a multi-threading scenario where
>>         multiple threads use the same object.
>>
>>     I think this is useful functionality. But I fear it might not
>>     really help on targets that do not support atomic (64-bit) reads
>>     and writes.
>>
>> [Alex] Those platforms are likely 32-bit platforms and they can use
>> 32-bit counters. These counters can be useful for debugging purposes,
>> rather than providing statistics over longer period of time.
>>
>>
>>     Why ODP helpers and not proper ODP?
>>
>> [Alex] By application defined counters I meant software counters. These
>> likely won't have different implementations on different platforms.
>>
>>
>>     We also want an API for counters (statistics) that can be
>>     offloaded to HW, some SoC's do that. It might be useful if the API
>>     could be used for both HW counter and your per-thread counter
>>     implementations (together with an implementation using e.g. ARM8.1
>>     atomics).
>>     [Alex] Pktio counters is this kind of API. Others may be proposed
>>     too but this requires agreement between platforms.
>>
>>
> Yes, agree. For pktio it's needed to define exact counter size returned to
> app + it's more related to pktio hw, so there should be it's own
> pktio_cnt_add/sub().
>
Is this a public ODP API or are these functions internal? When and by whom
would those calls be used?

The pktio interface will maintain its own counters as packets are received
and transmitted. Either the HW does some or all of this or SW has to update
the counters. Existing ODP atomics can be used for that.


>
> Maxim.
>
>         Each worker thread must create a local version of a given
>>         counter -
>>         e.g. IPSec SA request bytes/packets with
>>         odph_counter32/64_create_local()
>>         function and update its local counter with
>>         odph_counter32/64_set/add/sub
>>         functions.
>>         A control thread can use odph_counter32/64_read_global() to
>>         retrieve
>>         the total value of the counter by summing all local values.
>>
>>         Signed-off-by: Alexandru Badicioiu
>>         <alexandru.badicioiu@linaro.org
>>         <mailto:alexandru.badicioiu@linaro.org>>
>>         ---
>>          helper/include/odp/helper/linux.h |  114
>>         +++++++++++++++++++++++++++++++++
>>          helper/linux.c                    |  128
>>         +++++++++++++++++++++++++++++++++++++
>>          2 files changed, 242 insertions(+), 0 deletions(-)
>>
>>         diff --git a/helper/include/odp/helper/linux.h
>>         b/helper/include/odp/helper/linux.h
>>         index ce61fdf..3c7ab15 100644
>>         --- a/helper/include/odp/helper/linux.h
>>         +++ b/helper/include/odp/helper/linux.h
>>         @@ -31,6 +31,7 @@ extern "C" {
>>          typedef struct {
>>                 void *(*start_routine) (void *); /**< The function to
>>         run */
>>                 void *arg; /**< The functions arguemnts */
>>         +       void **counters_tbl; /**< Counters table address */
>>          } odp_start_args_t;
>>
>>          /** Linux pthread state information */
>>         @@ -40,8 +41,18 @@ typedef struct {
>>                 int            cpu;    /**< CPU ID */
>>                 /** Saved starting args for join to later free */
>>                 odp_start_args_t *start_args;
>>         +       void *counters_tbl;    /**< Counters table for this
>>         thread */
>>          } odph_linux_pthread_t;
>>
>>         +/** Thread local counter size */
>>         +typedef enum odph_counter_size {
>>         +       ODPH_COUNTER_SIZE_32BIT,
>>         +       ODPH_COUNTER_SIZE_64BIT,
>>         +} odph_counter_size_t;
>>         +
>>         +#define ODPH_COUNTER_INVALID    (-1)
>>         +/** Thread local counter handle */
>>         +typedef int32_t odph_counter_t;
>>
>>          /** Linux process state information */
>>          typedef struct {
>>         @@ -77,6 +88,109 @@ int
>>         odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
>>           */
>>          void odph_linux_pthread_join(odph_linux_pthread_t
>>         *thread_tbl, int num);
>>
>>         +/**
>>         + * Creates a 32bit thread local counter
>>         + * Each thread in the thread table must create a
>>         + * counter for a shared object to which the counter
>>         + * is associated to - e.g. an IPSec Security Association.
>>         + *
>>         + * Returns the counter handle
>>         + *
>>         + * @return Counter handle
>>         + */
>>         +odph_counter_t odph_counter32_create_local(void);
>>         +
>>         +/**
>>         + * Creates a 64bit thread local counter
>>         + *
>>         + * Returns the counter handle
>>         + *
>>         + * @return Counter handle
>>         + */
>>         +odph_counter_t odph_counter64_create_local(void);
>>         +
>>         +/**
>>         + * Get counter size
>>         + *
>>         + * Return the size of a counter
>>         + *
>>         + * @param Counter handle
>>         + * @return Counter size
>>         + *
>>         + */
>>         +odph_counter_size_t
>>         +odph_counter_size(odph_counter_t counter);
>>         +
>>         +/**
>>         + * Set a 32bit counter to a given value
>>         + *
>>         + * @param Counter handle
>>         + * @param Set value
>>         + *
>>         + */
>>         +void odph_counter32_set(odph_counter_t counter, uint32_t val);
>>         +
>>         +/**
>>         + * Set a 64bit thread local counter to a given value
>>         + *
>>         + * @param Counter handle
>>         + * @param Set value
>>         + *
>>         + */
>>         +void odph_counter64_set(odph_counter_t counter, uint64_t val);
>>         +
>>         +/**
>>         + * Add a value to a 32bit thread local counter
>>         + *
>>         + * @param Counter handle
>>         + * @param Value to add
>>         + *
>>         + */
>>         +void odph_counter32_add(odph_counter_t counter, unsigned int
>>         val);
>>
>>     uint32_t val?
>>
>>         +
>>         +/**
>>         + * Add a value to a 64bit thread local counter
>>         + *
>>         + * @param Counter handle
>>         + * @param Value to add
>>         + *
>>         + */
>>         +void odph_counter64_add(odph_counter_t counter, unsigned int
>>         val);
>>
>>     uint64_t val?
>>
>>         +
>>         +/**
>>         + * Subtract a value from a 32bit thread local counter
>>         + *
>>         + * @param Counter handle
>>         + * @param Value to substract
>>         + *
>>         + */
>>         +void odph_counter32_sub(odph_counter_t counter, unsigned int
>>         val);
>>         +
>>         +/**
>>         + * Subtract a value from a 64bit thread local counter
>>         + *
>>         + * @param Counter handle
>>         + * @param Value to substract
>>         + *
>>         + */
>>         +void odph_counter64_sub(odph_counter_t counter, unsigned int
>>         val);
>>         +
>>         +/**
>>         + * Return global value of a counter by summing thread local
>>         values
>>         + * Caller has to make sure that all threads created the local
>>         + * counter prior calling this function.
>>         + *
>>         + * @param Thread array
>>         + * @param Number of threads in the array
>>         + * @param Counter handle
>>         + *
>>         + * @return Counter value
>>         + *
>>         + */
>>         +uint64_t
>>         +odph_counter_read_global(odph_linux_pthread_t *thread_tbl,
>>         int num,
>>         +                        odph_counter_t counter);
>>         +
>>
>>          /**
>>           * Fork a process
>>         diff --git a/helper/linux.c b/helper/linux.c
>>         index 3d3b6b8..1953d75 100644
>>         --- a/helper/linux.c
>>         +++ b/helper/linux.c
>>         @@ -22,16 +22,45 @@
>>          #include <odp/system_info.h>
>>          #include "odph_debug.h"
>>
>>         +#define MAX_COUNTERS_TBL_SIZE  64
>>         +
>>         +typedef union odph_counters {
>>         +       uint32_t __u32[MAX_COUNTERS_TBL_SIZE];
>>         +       uint64_t __u64[MAX_COUNTERS_TBL_SIZE / 2];
>>         +} odph_counters_t;
>>         +
>>         +typedef union odph_counter_bits {
>>         +       odph_counter_t handle;
>>         +       union {
>>         +               int32_t s32;
>>         +               struct {
>>         +                       uint16_t index;
>>         +                       uint16_t size;
>>
>>     Perhaps use only 1 bit for the size; 0 => 32 bits, 1 => 64 bits?
>>
>>         +               };
>>         +       };
>>         +} odph_counter_bits_t;
>>         +
>>         +/* Thread local array for local counters storage */
>>         +static __thread odph_counters_t counters_tbl;
>>         +
>>         +/* Table index (32bit) where next counter is allocated */
>>         +static __thread uint16_t next_idx;
>>         +
>>          static void *odp_run_start_routine(void *arg)
>>          {
>>                 odp_start_args_t *start_args = arg;
>>
>>         +       void **_counters_tbl;
>>                 /* ODP thread local init */
>>                 if (odp_init_local(ODP_THREAD_WORKER)) {
>>                         ODPH_ERR("Local init failed\n");
>>                         return NULL;
>>                 }
>>
>>         +       /* Store thread local counters table address */
>>         +       _counters_tbl = start_args->counters_tbl;
>>         +       *_counters_tbl = &counters_tbl;
>>         +
>>                 void *ret_ptr =
>>         start_args->start_routine(start_args->arg);
>>                 int ret = odp_term_local();
>>                 if (ret < 0)
>>         @@ -87,6 +116,8 @@ int
>>         odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
>>
>>         thread_tbl[i].start_args->start_routine = start_routine;
>>         thread_tbl[i].start_args->arg           = arg;
>>         +  thread_tbl[i].start_args->counters_tbl  =
>>         +  &thread_tbl[i].counters_tbl;
>>
>>                         ret = pthread_create(&thread_tbl[i].thread,
>>         &thread_tbl[i].attr,
>>          odp_run_start_routine, thread_tbl[i].start_args);
>>         @@ -120,6 +151,103 @@ void
>>         odph_linux_pthread_join(odph_linux_pthread_t *thread_tbl, int num)
>>                 }
>>          }
>>
>>         +odph_counter_t odph_counter32_create_local(void)
>>         +{
>>         +       odph_counter_bits_t handle;
>>         +       odph_counter_t ret = ODPH_COUNTER_INVALID;
>>         +
>>         +       if (next_idx >= MAX_COUNTERS_TBL_SIZE)
>>         +               return ret;
>>         +       handle.index = next_idx;
>>         +       next_idx += 1;
>>         +       handle.size = ODPH_COUNTER_SIZE_32BIT;
>>         +       return handle.handle;
>>         +}
>>         +
>>         +odph_counter_t odph_counter64_create_local(void)
>>         +{
>>         +       odph_counter_bits_t handle;
>>         +       odph_counter_t ret = ODPH_COUNTER_INVALID;
>>         +
>>         +       if (next_idx >= MAX_COUNTERS_TBL_SIZE)
>>         +               return ret;
>>         +       handle.index = next_idx;
>>         +       next_idx += 2;
>>         +       handle.size = ODPH_COUNTER_SIZE_64BIT;
>>         +       return handle.handle;
>>         +}
>>         +
>>         +odph_counter_size_t odph_counter_size(odph_counter_t counter)
>>         +{
>>         +       return ((odph_counter_bits_t)counter).size;
>>         +}
>>         +
>>         +inline void odph_counter32_set(odph_counter_t counter,
>>         uint32_t val)
>>         +{
>>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>>         +
>>         +       counters_tbl.__u32[idx] = val;
>>         +}
>>         +
>>         +inline void odph_counter64_set(odph_counter_t counter,
>>         uint64_t val)
>>         +{
>>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>>         +
>>         +       counters_tbl.__u64[idx] = val;
>>
>>     I think this operation must be atomic. Otherwise a reader in
>>     another thread might see a torn write (one word old and one word new).
>>      [Alex] At the beginning I thought the same way - to use atomics
>>     but there's a performance penalty that comes with them. Read
>>     operation is usually performed in a loop and eventually a control
>>     thread will read the correct value when a given counter stops
>>     incrementing for an amount of time, but otherwise it can see wrong
>>     values. Atomics would be required for example when IPSec SA expire
>>     due to volume (packets/bytes).
>>
>>         +}
>>         +
>>         +inline void odph_counter32_add(odph_counter_t counter,
>>         unsigned int val)
>>         +{
>>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>>         +
>>         +       counters_tbl.__u32[idx] += val;
>>         +}
>>         +
>>         +inline void odph_counter64_add(odph_counter_t counter,
>>         unsigned int val)
>>         +{
>>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>>         +
>>         +       counters_tbl.__u64[idx] += val;
>>
>>     Again a need for an atomic update (the store itself, not the whole
>>     add operation).
>>
>>         +}
>>         +
>>         +inline void odph_counter32_sub(odph_counter_t counter,
>>         unsigned int val)
>>         +{
>>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>>         +
>>         +       counters_tbl.__u32[idx] -= val;
>>         +}
>>         +
>>         +inline void odph_counter64_sub(odph_counter_t counter,
>>         unsigned int val)
>>         +{
>>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>>         +
>>         +       counters_tbl.__u64[idx] -= val;
>>
>>     Ditto.
>>
>>         +}
>>         +
>>         +uint64_t
>>         +odph_counter_read_global(odph_linux_pthread_t *thread_tbl,
>>         int num,
>>         +                        odph_counter_t counter)
>>         +{
>>         +       uint64_t val = 0;
>>         +       uint32_t *ptr32;
>>         +       uint64_t *ptr64;
>>         +       uint16_t idx = ((odph_counter_bits_t)counter).index;
>>         +       int i;
>>         +
>>         +       for (i = 0; i < num; i++) {
>>         +               odph_counters_t *counters_tbl =
>>         thread_tbl[i].counters_tbl;
>>         +
>>         +               switch (odph_counter_size(counter)) {
>>         +               case ODPH_COUNTER_SIZE_32BIT:
>>         +                       val += counters_tbl->__u32[idx];
>>         +                       break;
>>         +               case ODPH_COUNTER_SIZE_64BIT:
>>         +                       val += counters_tbl->__u64[idx];
>>
>>     Need atomic read here.
>>     Perhaps this can be emulated by an *ordered* sequence of 32-bit
>>     reads (use data dependencies):
>>     1) Read MSW
>>     2) Read LSW
>>     3) Re-read MSW
>>     4) Check first and second MSW, if different re-start loop.
>>     5) Return MSW << 32 | LSW.
>>     The 64-bit counter must be updated (written to) in a well-defined
>>     order (write LSW then MSW), can this be enforced?
>>
>> [Alex]
>>
>>
>>         +                       break;
>>         +               }
>>         +       }
>>         +       return val;
>>         +}
>>
>>          int odph_linux_process_fork_n(odph_linux_process_t *proc_tbl,
>>                                       const odp_cpumask_t *mask_in)
>>         --
>>         1.7.3.4
>>
>>         _______________________________________________
>>         lng-odp mailing list
>>         lng-odp@lists.linaro.org <mailto:lng-odp@lists.linaro.org>
>>         https://lists.linaro.org/mailman/listinfo/lng-odp
>>
>>
>>
>>
>>
>> _______________________________________________
>> lng-odp mailing list
>> lng-odp@lists.linaro.org
>> https://lists.linaro.org/mailman/listinfo/lng-odp
>>
>
> _______________________________________________
> lng-odp mailing list
> lng-odp@lists.linaro.org
> https://lists.linaro.org/mailman/listinfo/lng-odp
>

Patch

diff --git a/helper/include/odp/helper/linux.h b/helper/include/odp/helper/linux.h
index ce61fdf..3c7ab15 100644
--- a/helper/include/odp/helper/linux.h
+++ b/helper/include/odp/helper/linux.h
@@ -31,6 +31,7 @@  extern "C" {
 typedef struct {
 	void *(*start_routine) (void *); /**< The function to run */
 	void *arg; /**< The functions arguemnts */
+	void **counters_tbl; /**< Counters table address */
 } odp_start_args_t;
 
 /** Linux pthread state information */
@@ -40,8 +41,18 @@  typedef struct {
 	int            cpu;    /**< CPU ID */
 	/** Saved starting args for join to later free */
 	odp_start_args_t *start_args;
+	void *counters_tbl;    /**< Counters table for this thread */
 } odph_linux_pthread_t;
 
+/** Thread local counter size */
+typedef enum odph_counter_size {
+	ODPH_COUNTER_SIZE_32BIT,
+	ODPH_COUNTER_SIZE_64BIT,
+} odph_counter_size_t;
+
+#define ODPH_COUNTER_INVALID    (-1)
+/** Thread local counter handle */
+typedef int32_t odph_counter_t;
 
 /** Linux process state information */
 typedef struct {
@@ -77,6 +88,109 @@  int odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
  */
 void odph_linux_pthread_join(odph_linux_pthread_t *thread_tbl, int num);
 
+/**
+ * Creates a 32bit thread local counter
+ * Each thread in the thread table must create a
+ * counter for a shared object to which the counter
+ * is associated to - e.g. an IPSec Security Association.
+ *
+ * Returns the counter handle
+ *
+ * @return Counter handle
+ */
+odph_counter_t odph_counter32_create_local(void);
+
+/**
+ * Creates a 64bit thread local counter
+ *
+ * Returns the counter handle
+ *
+ * @return Counter handle
+ */
+odph_counter_t odph_counter64_create_local(void);
+
+/**
+ * Get counter size
+ *
+ * Return the size of a counter
+ *
+ * @param Counter handle
+ * @return Counter size
+ *
+ */
+odph_counter_size_t
+odph_counter_size(odph_counter_t counter);
+
+/**
+ * Set a 32bit counter to a given value
+ *
+ * @param Counter handle
+ * @param Set value
+ *
+ */
+void odph_counter32_set(odph_counter_t counter, uint32_t val);
+
+/**
+ * Set a 64bit thread local counter to a given value
+ *
+ * @param Counter handle
+ * @param Set value
+ *
+ */
+void odph_counter64_set(odph_counter_t counter, uint64_t val);
+
+/**
+ * Add a value to a 32bit thread local counter
+ *
+ * @param Counter handle
+ * @param Value to add
+ *
+ */
+void odph_counter32_add(odph_counter_t counter, unsigned int val);
+
+/**
+ * Add a value to a 64bit thread local counter
+ *
+ * @param Counter handle
+ * @param Value to add
+ *
+ */
+void odph_counter64_add(odph_counter_t counter, unsigned int val);
+
+/**
+ * Subtract a value from a 32bit thread local counter
+ *
+ * @param Counter handle
+ * @param Value to substract
+ *
+ */
+void odph_counter32_sub(odph_counter_t counter, unsigned int val);
+
+/**
+ * Subtract a value from a 64bit thread local counter
+ *
+ * @param Counter handle
+ * @param Value to substract
+ *
+ */
+void odph_counter64_sub(odph_counter_t counter, unsigned int val);
+
+/**
+ * Return global value of a counter by summing thread local values
+ * Caller has to make sure that all threads created the local
+ * counter prior calling this function.
+ *
+ * @param Thread array
+ * @param Number of threads in the array
+ * @param Counter handle
+ *
+ * @return Counter value
+ *
+ */
+uint64_t
+odph_counter_read_global(odph_linux_pthread_t *thread_tbl, int num,
+			 odph_counter_t counter);
+
 
 /**
  * Fork a process
diff --git a/helper/linux.c b/helper/linux.c
index 3d3b6b8..1953d75 100644
--- a/helper/linux.c
+++ b/helper/linux.c
@@ -22,16 +22,45 @@ 
 #include <odp/system_info.h>
 #include "odph_debug.h"
 
+#define MAX_COUNTERS_TBL_SIZE	64
+
+typedef union odph_counters {
+	uint32_t __u32[MAX_COUNTERS_TBL_SIZE];
+	uint64_t __u64[MAX_COUNTERS_TBL_SIZE / 2];
+} odph_counters_t;
+
+typedef union odph_counter_bits {
+	odph_counter_t handle;
+	union {
+		int32_t s32;
+		struct {
+			uint16_t index;
+			uint16_t size;
+		};
+	};
+} odph_counter_bits_t;
+
+/* Thread local array for local counters storage */
+static __thread odph_counters_t counters_tbl;
+
+/* Table index (32bit) where next counter is allocated */
+static __thread uint16_t next_idx;
+
 static void *odp_run_start_routine(void *arg)
 {
 	odp_start_args_t *start_args = arg;
 
+	void **_counters_tbl;
 	/* ODP thread local init */
 	if (odp_init_local(ODP_THREAD_WORKER)) {
 		ODPH_ERR("Local init failed\n");
 		return NULL;
 	}
 
+	/* Store thread local counters table address */
+	_counters_tbl = start_args->counters_tbl;
+	*_counters_tbl = &counters_tbl;
+
 	void *ret_ptr = start_args->start_routine(start_args->arg);
 	int ret = odp_term_local();
 	if (ret < 0)
@@ -87,6 +116,8 @@  int odph_linux_pthread_create(odph_linux_pthread_t *thread_tbl,
 
 		thread_tbl[i].start_args->start_routine = start_routine;
 		thread_tbl[i].start_args->arg           = arg;
+		thread_tbl[i].start_args->counters_tbl	=
+					    &thread_tbl[i].counters_tbl;
 
 		ret = pthread_create(&thread_tbl[i].thread, &thread_tbl[i].attr,
 			       odp_run_start_routine, thread_tbl[i].start_args);
@@ -120,6 +151,103 @@  void odph_linux_pthread_join(odph_linux_pthread_t *thread_tbl, int num)
 	}
 }
 
+odph_counter_t odph_counter32_create_local(void)
+{
+	odph_counter_bits_t handle;
+	odph_counter_t ret = ODPH_COUNTER_INVALID;
+
+	if (next_idx >= MAX_COUNTERS_TBL_SIZE)
+		return ret;
+	handle.index = next_idx;
+	next_idx += 1;
+	handle.size = ODPH_COUNTER_SIZE_32BIT;
+	return handle.handle;
+}
+
+odph_counter_t odph_counter64_create_local(void)
+{
+	odph_counter_bits_t handle;
+	odph_counter_t ret = ODPH_COUNTER_INVALID;
+
+	if (next_idx >= MAX_COUNTERS_TBL_SIZE)
+		return ret;
+	handle.index = next_idx;
+	next_idx += 2;
+	handle.size = ODPH_COUNTER_SIZE_64BIT;
+	return handle.handle;
+}
+
+odph_counter_size_t odph_counter_size(odph_counter_t counter)
+{
+	return ((odph_counter_bits_t)counter).size;
+}
+
+inline void odph_counter32_set(odph_counter_t counter, uint32_t val)
+{
+	uint16_t idx = ((odph_counter_bits_t)counter).index;
+
+	counters_tbl.__u32[idx] = val;
+}
+
+inline void odph_counter64_set(odph_counter_t counter, uint64_t val)
+{
+	uint16_t idx = ((odph_counter_bits_t)counter).index;
+
+	counters_tbl.__u64[idx] = val;
+}
+
+inline void odph_counter32_add(odph_counter_t counter, unsigned int val)
+{
+	uint16_t idx = ((odph_counter_bits_t)counter).index;
+
+	counters_tbl.__u32[idx] += val;
+}
+
+inline void odph_counter64_add(odph_counter_t counter, unsigned int val)
+{
+	uint16_t idx = ((odph_counter_bits_t)counter).index;
+
+	counters_tbl.__u64[idx] += val;
+}
+
+inline void odph_counter32_sub(odph_counter_t counter, unsigned int val)
+{
+	uint16_t idx = ((odph_counter_bits_t)counter).index;
+
+	counters_tbl.__u32[idx] -= val;
+}
+
+inline void odph_counter64_sub(odph_counter_t counter, unsigned int val)
+{
+	uint16_t idx = ((odph_counter_bits_t)counter).index;
+
+	counters_tbl.__u64[idx] -= val;
+}
+
+uint64_t
+odph_counter_read_global(odph_linux_pthread_t *thread_tbl, int num,
+			 odph_counter_t counter)
+{
+	uint64_t val = 0;
+	uint32_t *ptr32;
+	uint64_t *ptr64;
+	uint16_t idx = ((odph_counter_bits_t)counter).index;
+	int i;
+
+	for (i = 0; i < num; i++) {
+		odph_counters_t *counters_tbl = thread_tbl[i].counters_tbl;
+
+		switch (odph_counter_size(counter)) {
+		case ODPH_COUNTER_SIZE_32BIT:
+			val += counters_tbl->__u32[idx];
+			break;
+		case ODPH_COUNTER_SIZE_64BIT:
+			val += counters_tbl->__u64[idx];
+			break;
+		}
+	}
+	return val;
+}
 
 int odph_linux_process_fork_n(odph_linux_process_t *proc_tbl,
 			      const odp_cpumask_t *mask_in)