[v3,2/3] lib/rcu: add resource reclamation APIs

Message ID	20191001062917.35578-3-honnappa.nagarahalli@arm.com
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of dev-bounces@dpdk.org designates 92.243.14.124 as permitted sender) client-ip=92.243.14.124; From: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com> To: honnappa.nagarahalli@arm.com, konstantin.ananyev@intel.com, stephen@networkplumber.org, paulmck@linux.ibm.com Cc: yipeng1.wang@intel.com, vladimir.medvedkin@intel.com, ruifeng.wang@arm.com, dharmik.thakkar@arm.com, dev@dpdk.org, nd@arm.com Date: Tue, 1 Oct 2019 01:29:16 -0500 Message-Id: <20191001062917.35578-3-honnappa.nagarahalli@arm.com> In-Reply-To: <20191001062917.35578-1-honnappa.nagarahalli@arm.com> References: <20190906094534.36060-1-ruifeng.wang@arm.com> <20191001062917.35578-1-honnappa.nagarahalli@arm.com> Subject: [dpdk-dev] [PATCH v3 2/3] lib/rcu: add resource reclamation APIs Precedence: list Errors-To: dev-bounces@dpdk.org Sender: "dev" <dev-bounces@dpdk.org>
Series	Add RCU reclamation APIs \| expand [v3,0/3] Add RCU reclamation APIs [v3,1/3] lib/ring: add peek API [v3,2/3] lib/rcu: add resource reclamation APIs [v3,3/3] doc/rcu: add RCU integration design details

Honnappa Nagarahalli Oct. 1, 2019, 6:29 a.m. UTC

Add resource reclamation APIs to make it simple for applications
and libraries to integrate rte_rcu library.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

---
 app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-
 lib/librte_rcu/meson.build         |   2 +
 lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++
 lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++
 lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++
 lib/librte_rcu/rte_rcu_version.map |   4 +
 lib/meson.build                    |   6 +-
 7 files changed, 700 insertions(+), 3 deletions(-)
 create mode 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

-- 
2.17.1

Ananyev, Konstantin Oct. 2, 2019, 5:39 p.m. UTC | #1

Hi Honnappa,

 
> Add resource reclamation APIs to make it simple for applications

> and libraries to integrate rte_rcu library.

> 

> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> ---

>  app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

>  lib/librte_rcu/meson.build         |   2 +

>  lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

>  lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

>  lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

>  lib/librte_rcu/rte_rcu_version.map |   4 +

>  lib/meson.build                    |   6 +-

>  7 files changed, 700 insertions(+), 3 deletions(-)

>  create mode 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

> 

> diff --git a/lib/librte_rcu/rte_rcu_qsbr.c b/lib/librte_rcu/rte_rcu_qsbr.c

> index ce7f93dd3..76814f50b 100644

> --- a/lib/librte_rcu/rte_rcu_qsbr.c

> +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> @@ -21,6 +21,7 @@

>  #include <rte_errno.h>

> 

>  #include "rte_rcu_qsbr.h"

> +#include "rte_rcu_qsbr_pvt.h"

> 

>  /* Get the memory size of QSBR variable */

>  size_t

> @@ -267,6 +268,190 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)

>  	return 0;

>  }

> 

> +/* Create a queue used to store the data structure elements that can

> + * be freed later. This queue is referred to as 'defer queue'.

> + */

> +struct rte_rcu_qsbr_dq *

> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params)

> +{

> +	struct rte_rcu_qsbr_dq *dq;

> +	uint32_t qs_fifo_size;

> +

> +	if (params == NULL || params->f == NULL ||

> +		params->v == NULL || params->name == NULL ||

> +		params->size == 0 || params->esize == 0 ||

> +		(params->esize % 8 != 0)) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return NULL;

> +	}

> +

> +	dq = rte_zmalloc(NULL,

> +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> +		RTE_CACHE_LINE_SIZE);

> +	if (dq == NULL) {

> +		rte_errno = ENOMEM;

> +

> +		return NULL;

> +	}

> +

> +	/* round up qs_fifo_size to next power of two that is not less than

> +	 * max_size.

> +	 */

> +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> +					* params->size) + 1);

> +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> +					SOCKET_ID_ANY, 0);


If it is going to be not MT safe, then why not to create the ring with
(RING_F_SP_ENQ | RING_F_SC_DEQ) flags set?
Though I think it could be changed to allow MT safe multiple
enqeue/single dequeue, see below.

> +	if (dq->r == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): defer queue create failed\n", __func__);

> +		rte_free(dq);

> +		return NULL;

> +	}

> +

> +	dq->v = params->v;

> +	dq->size = params->size;

> +	dq->esize = params->esize;

> +	dq->f = params->f;

> +	dq->p = params->p;

> +

> +	return dq;

> +}

> +

> +/* Enqueue one resource to the defer queue to free after the grace

> + * period is over.

> + */

> +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e)

> +{

> +	uint64_t token;

> +	uint64_t *tmp;

> +	uint32_t i;

> +	uint32_t cur_size, free_size;

> +

> +	if (dq == NULL || e == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;


Why just not to return -EINVAL straightway?
I think there is no much point to set rte_errno in that function at all,
just return value should do.

> +	}

> +

> +	/* Start the grace period */

> +	token = rte_rcu_qsbr_start(dq->v);

> +

> +	/* Reclaim resources if the queue is 1/8th full. This helps

> +	 * the queue from growing too large and allows time for reader

> +	 * threads to report their quiescent state.

> +	 */

> +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);


Probably would be a bit easier if you just store in dq->esize (elt size + token size) / 8.

> +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {


Why to make this threshold value hard-coded?
Why either not to put it into create parameter, or just return a special return value,
to indicate that threshold is reached?
Or even return number of filled/free entroes on success, so caller can decide
to reclaim or not based on that information on his own?

> +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> +			"%s(): Triggering reclamation\n", __func__);

> +		rte_rcu_qsbr_dq_reclaim(dq);

> +	}

> +

> +	/* Check if there is space for atleast for 1 resource */

> +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> +	if (!free_size) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Defer queue is full\n", __func__);

> +		rte_errno = ENOSPC;

> +		return 1;

> +	}

> +

> +	/* Enqueue the resource */

> +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> +

> +	/* The resource to enqueue needs to be a multiple of 64b

> +	 * due to the limitation of the rte_ring implementation.

> +	 */

> +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);



That whole construction above looks a bit clumsy and error prone...
I suppose just:

const uint32_t nb_elt =  dq->elt_size/8 + 1;
uint32_t free, n;
...
n = rte_ring_enqueue_bulk(dq->r, e, nb_elt, &free);
if (n == 0)
  return -ENOSPC;
return free;

That way I think you can have MT-safe version of that function.

> +

> +	return 0;

> +}

> +

> +/* Reclaim resources from the defer queue. */

> +int

> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq)

> +{

> +	uint32_t max_cnt;

> +	uint32_t cnt;

> +	void *token;

> +	uint64_t *tmp;

> +	uint32_t i;

> +

> +	if (dq == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;


Same story as above - I think rte_errno is excessive in this function.
Just return value should be enough.


> +	}

> +

> +	/* Anything to reclaim? */

> +	if (rte_ring_count(dq->r) == 0)

> +		return 0;


Not sure you need that, see below.

> +

> +	/* Reclaim at the max 1/16th the total number of entries. */

> +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;


Again why not to make max_cnt a configurable at create() parameter?
Or even a parameter for that function?

> +	cnt = 0;

> +

> +	/* Check reader threads quiescent state and reclaim resources */

> +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> +			== 1)) {



> +		(void)rte_ring_sc_dequeue(dq->r, &token);

> +		/* The resource to dequeue needs to be a multiple of 64b

> +		 * due to the limitation of the rte_ring implementation.

> +		 */

> +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> +			i++, tmp++)

> +			(void)rte_ring_sc_dequeue(dq->r,

> +					(void *)(uintptr_t)tmp);


Again, no need for such constructs with multiple dequeuer I believe.
Just:

const uint32_t nb_elt =  dq->elt_size/8 + 1;
uint32_t n;
uintptr_t elt[nb_elt];
...
n = rte_ring_dequeue_bulk(dq->r, elt, nb_elt, NULL);
if (n != 0) {dq->f(dq->p, elt);}

Seems enough.
Again in that case you can have enqueue/reclaim running in
different threads simultaneously, plus you don't need dq->e at all. 

> +		dq->f(dq->p, dq->e);

> +

> +		cnt++;

> +	}

> +

> +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> +

> +	if (cnt == 0) {

> +		/* No resources were reclaimed */

> +		rte_errno = EAGAIN;

> +		return 1;

> +	}

> +

> +	return 0;


I'd suggest to return cnt on success.

> +}

> +

> +/* Delete a defer queue. */

> +int

> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq)

> +{

> +	if (dq == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;

> +	}

> +

> +	/* Reclaim all the resources */

> +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> +		/* Error number is already set by the reclaim API */

> +		return 1;


How do you know that you have reclaimed everything?

> +

> +	rte_ring_free(dq->r);

> +	rte_free(dq);

> +

> +	return 0;

> +}

> +

>  int rte_rcu_log_type;

> 

>  RTE_INIT(rte_rcu_register)

> diff --git a/lib/librte_rcu/rte_rcu_qsbr.h b/lib/librte_rcu/rte_rcu_qsbr.h

> index c80f15c00..185d4b50a 100644

> --- a/lib/librte_rcu/rte_rcu_qsbr.h

> +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> @@ -34,6 +34,7 @@ extern "C" {

>  #include <rte_lcore.h>

>  #include <rte_debug.h>

>  #include <rte_atomic.h>

> +#include <rte_ring.h>

> 

>  extern int rte_rcu_log_type;

> 

> @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

>  	 */

>  } __rte_cache_aligned;

> 

> +/**

> + * Call back function called to free the resources.

> + *

> + * @param p

> + *   Pointer provided while creating the defer queue

> + * @param e

> + *   Pointer to the resource data stored on the defer queue

> + *

> + * @return

> + *   None

> + */

> +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);


Stylish thing - usually in DPDK we have typedf newtype_t ...
Though I am not sure you need a new typedef at all - just 
a function pointer inside the struct seems enough.

> +

> +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> +

> +/**

> + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> + */

> +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> +

> +/**

> + *  Reclaim at the max 1/16th the total number of resources.

> + */

> +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4



As I said above, I don't think these thresholds need to be hardcoded.
In any case, there seems not much point to put them in the public header file.

> +

> +/**

> + * Parameters used when creating the defer queue.

> + */

> +struct rte_rcu_qsbr_dq_parameters {

> +	const char *name;

> +	/**< Name of the queue. */

> +	uint32_t size;

> +	/**< Number of entries in queue. Typically, this will be

> +	 *   the same as the maximum number of entries supported in the

> +	 *   lock free data structure.

> +	 *   Data structures with unbounded number of entries is not

> +	 *   supported currently.

> +	 */

> +	uint32_t esize;

> +	/**< Size (in bytes) of each element in the defer queue.

> +	 *   This has to be multiple of 8B as the rte_ring APIs

> +	 *   support 8B element sizes only.

> +	 */

> +	rte_rcu_qsbr_free_resource f;

> +	/**< Function to call to free the resource. */

> +	void *p;


Style nit again - I like short names myself, but that seems a bit extreme... :)
Might be at least:
void (*reclaim)(void *, void *);
void * reclaim_data;
?

> +	/**< Pointer passed to the free function. Typically, this is the

> +	 *   pointer to the data structure to which the resource to free

> +	 *   belongs. This can be NULL.

> +	 */

> +	struct rte_rcu_qsbr *v;


Does it need to be inside that struct?
Might be better:
rte_rcu_qsbr_dq_create(struct rte_rcu_qsbr *v, const struct rte_rcu_qsbr_dq_parameters *params);

Another alternative: make both reclaim() and enqueue() to take v as a parameter.

> +	/**< RCU QSBR variable to use for this defer queue */

> +};

> +

> +/* RTE defer queue structure.

> + * This structure holds the defer queue. The defer queue is used to

> + * hold the deleted entries from the data structure that are not

> + * yet freed.

> + */

> +struct rte_rcu_qsbr_dq;

> +

>  /**

>   * @warning

>   * @b EXPERIMENTAL: this API may change without prior notice

> @@ -648,6 +710,113 @@ __rte_experimental

>  int

>  rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);

> 

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Create a queue used to store the data structure elements that can

> + * be freed later. This queue is referred to as 'defer queue'.

> + *

> + * @param params

> + *   Parameters to create a defer queue.

> + * @return

> + *   On success - Valid pointer to defer queue

> + *   On error - NULL

> + *   Possible rte_errno codes are:

> + *   - EINVAL - NULL parameters are passed

> + *   - ENOMEM - Not enough memory

> + */

> +__rte_experimental

> +struct rte_rcu_qsbr_dq *

> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Enqueue one resource to the defer queue and start the grace period.

> + * The resource will be freed later after at least one grace period

> + * is over.

> + *

> + * If the defer queue is full, it will attempt to reclaim resources.

> + * It will also reclaim resources at regular intervals to avoid

> + * the defer queue from growing too big.

> + *

> + * This API is not multi-thread safe. It is expected that the caller

> + * provides multi-thread safety by locking a mutex or some other means.

> + *

> + * A lock free multi-thread writer algorithm could achieve multi-thread

> + * safety by creating and using one defer queue per thread.

> + *

> + * @param dq

> + *   Defer queue to allocate an entry from.

> + * @param e

> + *   Pointer to resource data to copy to the defer queue. The size of

> + *   the data to copy is equal to the element size provided when the

> + *   defer queue was created.

> + * @return

> + *   On success - 0

> + *   On error - 1 with rte_errno set to

> + *   - EINVAL - NULL parameters are passed

> + *   - ENOSPC - Defer queue is full. This condition can not happen

> + *		if the defer queue size is equal (or larger) than the

> + *		number of elements in the data structure.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Reclaim resources from the defer queue.

> + *

> + * This API is not multi-thread safe. It is expected that the caller

> + * provides multi-thread safety by locking a mutex or some other means.

> + *

> + * A lock free multi-thread writer algorithm could achieve multi-thread

> + * safety by creating and using one defer queue per thread.

> + *

> + * @param dq

> + *   Defer queue to reclaim an entry from.

> + * @return

> + *   On successful reclamation of at least 1 resource - 0

> + *   On error - 1 with rte_errno set to

> + *   - EINVAL - NULL parameters are passed

> + *   - EAGAIN - None of the resources have completed at least 1 grace period,

> + *		try again.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Delete a defer queue.

> + *

> + * It tries to reclaim all the resources on the defer queue.

> + * If any of the resources have not completed the grace period

> + * the reclamation stops and returns immediately. The rest of

> + * the resources are not reclaimed and the defer queue is not

> + * freed.

> + *

> + * @param dq

> + *   Defer queue to delete.

> + * @return

> + *   On success - 0

> + *   On error - 1

> + *   Possible rte_errno codes are:

> + *   - EINVAL - NULL parameters are passed

> + *   - EAGAIN - Some of the resources have not completed at least 1 grace

> + *		period, try again.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> +

>  #ifdef __cplusplus

>  }

>  #endif

> diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> new file mode 100644

> index 000000000..2122bc36a

> --- /dev/null

> +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h


Again style suggestion: as it is not public header - don't use rte_ prefix for naming.
From my perspective - easier to relalize for reader what is public header, what is not.

> @@ -0,0 +1,46 @@

> +/* SPDX-License-Identifier: BSD-3-Clause

> + * Copyright (c) 2019 Arm Limited

> + */

> +

> +#ifndef _RTE_RCU_QSBR_PVT_H_

> +#define _RTE_RCU_QSBR_PVT_H_

> +

> +/**

> + * This file is private to the RCU library. It should not be included

> + * by the user of this library.

> + */

> +

> +#ifdef __cplusplus

> +extern "C" {

> +#endif

> +

> +#include "rte_rcu_qsbr.h"

> +

> +/* RTE defer queue structure.

> + * This structure holds the defer queue. The defer queue is used to

> + * hold the deleted entries from the data structure that are not

> + * yet freed.

> + */

> +struct rte_rcu_qsbr_dq {

> +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> +	uint32_t size;

> +	/**< Number of elements in the defer queue */

> +	uint32_t esize;

> +	/**< Size (in bytes) of data stored on the defer queue */

> +	rte_rcu_qsbr_free_resource f;

> +	/**< Function to call to free the resource. */

> +	void *p;

> +	/**< Pointer passed to the free function. Typically, this is the

> +	 *   pointer to the data structure to which the resource to free

> +	 *   belongs.

> +	 */

> +	char e[0];

> +	/**< Temporary storage to copy the defer queue element. */


Do you really need 'e' at all?
Can't it be just temporary stack variable?

> +};

> +

> +#ifdef __cplusplus

> +}

> +#endif

> +

> +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> diff --git a/lib/librte_rcu/rte_rcu_version.map b/lib/librte_rcu/rte_rcu_version.map

> index f8b9ef2ab..dfac88a37 100644

> --- a/lib/librte_rcu/rte_rcu_version.map

> +++ b/lib/librte_rcu/rte_rcu_version.map

> @@ -8,6 +8,10 @@ EXPERIMENTAL {

>  	rte_rcu_qsbr_synchronize;

>  	rte_rcu_qsbr_thread_register;

>  	rte_rcu_qsbr_thread_unregister;

> +	rte_rcu_qsbr_dq_create;

> +	rte_rcu_qsbr_dq_enqueue;

> +	rte_rcu_qsbr_dq_reclaim;

> +	rte_rcu_qsbr_dq_delete;

> 

>  	local: *;

>  };

> diff --git a/lib/meson.build b/lib/meson.build

> index e5ff83893..0e1be8407 100644

> --- a/lib/meson.build

> +++ b/lib/meson.build

> @@ -11,7 +11,9 @@

>  libraries = [

>  	'kvargs', # eal depends on kvargs

>  	'eal', # everything depends on eal

> -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> +	'ring',

> +	'rcu', # rcu depends on ring

> +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

>  	'cmdline',

>  	'metrics', # bitrate/latency stats depends on this

>  	'hash',    # efd depends on this

> @@ -22,7 +24,7 @@ libraries = [

>  	'gro', 'gso', 'ip_frag', 'jobstats',

>  	'kni', 'latencystats', 'lpm', 'member',

>  	'power', 'pdump', 'rawdev',

> -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> +	'reorder', 'sched', 'security', 'stack', 'vhost',

>  	# ipsec lib depends on net, crypto and security

>  	'ipsec',

>  	# add pkt framework libs which use other libs from above

> --

> 2.17.1

Ananyev, Konstantin Oct. 2, 2019, 6:50 p.m. UTC | #2

> +

> +/* Reclaim resources from the defer queue. */

> +int

> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq)

> +{

> +	uint32_t max_cnt;

> +	uint32_t cnt;

> +	void *token;

> +	uint64_t *tmp;

> +	uint32_t i;

> +

> +	if (dq == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;

> +	}

> +

> +	/* Anything to reclaim? */

> +	if (rte_ring_count(dq->r) == 0)

> +		return 0;

> +

> +	/* Reclaim at the max 1/16th the total number of entries. */

> +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> +	cnt = 0;

> +

> +	/* Check reader threads quiescent state and reclaim resources */

> +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)


One more thing I forgot to ask - how this construct supposed to work on 32 bit machines?
peek() will return 32-bit value, while  qsbr_check() operates with 64bit tokens...
As I understand in that case you need to peek() 2 elems.
Might work, but still think better to introduce serialize version of ring_dequeue()
See my other mail about re_ring_peek().


> +			== 1)) {

> +		(void)rte_ring_sc_dequeue(dq->r, &token);

> +		/* The resource to dequeue needs to be a multiple of 64b

> +		 * due to the limitation of the rte_ring implementation.

> +		 */

> +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> +			i++, tmp++)

> +			(void)rte_ring_sc_dequeue(dq->r,

> +					(void *)(uintptr_t)tmp);

> +		dq->f(dq->p, dq->e);

> +

> +		cnt++;

> +	}

> +

> +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> +

> +	if (cnt == 0) {

> +		/* No resources were reclaimed */

> +		rte_errno = EAGAIN;

> +		return 1;

> +	}

> +

> +	return 0;

> +}

> +

> +/* Delete a defer queue. */

> +int

> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq)

> +{

> +	if (dq == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;

> +	}

> +

> +	/* Reclaim all the resources */

> +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> +		/* Error number is already set by the reclaim API */

> +		return 1;

> +

> +	rte_ring_free(dq->r);

> +	rte_free(dq);

> +

> +	return 0;

> +}

> +

>  int rte_rcu_log_type;

> 

>  RTE_INIT(rte_rcu_register)

> diff --git a/lib/librte_rcu/rte_rcu_qsbr.h b/lib/librte_rcu/rte_rcu_qsbr.h

> index c80f15c00..185d4b50a 100644

> --- a/lib/librte_rcu/rte_rcu_qsbr.h

> +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> @@ -34,6 +34,7 @@ extern "C" {

>  #include <rte_lcore.h>

>  #include <rte_debug.h>

>  #include <rte_atomic.h>

> +#include <rte_ring.h>

> 

>  extern int rte_rcu_log_type;

> 

> @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

>  	 */

>  } __rte_cache_aligned;

> 

> +/**

> + * Call back function called to free the resources.

> + *

> + * @param p

> + *   Pointer provided while creating the defer queue

> + * @param e

> + *   Pointer to the resource data stored on the defer queue

> + *

> + * @return

> + *   None

> + */

> +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> +

> +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> +

> +/**

> + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> + */

> +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> +

> +/**

> + *  Reclaim at the max 1/16th the total number of resources.

> + */

> +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> +

> +/**

> + * Parameters used when creating the defer queue.

> + */

> +struct rte_rcu_qsbr_dq_parameters {

> +	const char *name;

> +	/**< Name of the queue. */

> +	uint32_t size;

> +	/**< Number of entries in queue. Typically, this will be

> +	 *   the same as the maximum number of entries supported in the

> +	 *   lock free data structure.

> +	 *   Data structures with unbounded number of entries is not

> +	 *   supported currently.

> +	 */

> +	uint32_t esize;

> +	/**< Size (in bytes) of each element in the defer queue.

> +	 *   This has to be multiple of 8B as the rte_ring APIs

> +	 *   support 8B element sizes only.

> +	 */

> +	rte_rcu_qsbr_free_resource f;

> +	/**< Function to call to free the resource. */

> +	void *p;

> +	/**< Pointer passed to the free function. Typically, this is the

> +	 *   pointer to the data structure to which the resource to free

> +	 *   belongs. This can be NULL.

> +	 */

> +	struct rte_rcu_qsbr *v;

> +	/**< RCU QSBR variable to use for this defer queue */

> +};

> +

> +/* RTE defer queue structure.

> + * This structure holds the defer queue. The defer queue is used to

> + * hold the deleted entries from the data structure that are not

> + * yet freed.

> + */

> +struct rte_rcu_qsbr_dq;

> +

>  /**

>   * @warning

>   * @b EXPERIMENTAL: this API may change without prior notice

> @@ -648,6 +710,113 @@ __rte_experimental

>  int

>  rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);

> 

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Create a queue used to store the data structure elements that can

> + * be freed later. This queue is referred to as 'defer queue'.

> + *

> + * @param params

> + *   Parameters to create a defer queue.

> + * @return

> + *   On success - Valid pointer to defer queue

> + *   On error - NULL

> + *   Possible rte_errno codes are:

> + *   - EINVAL - NULL parameters are passed

> + *   - ENOMEM - Not enough memory

> + */

> +__rte_experimental

> +struct rte_rcu_qsbr_dq *

> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Enqueue one resource to the defer queue and start the grace period.

> + * The resource will be freed later after at least one grace period

> + * is over.

> + *

> + * If the defer queue is full, it will attempt to reclaim resources.

> + * It will also reclaim resources at regular intervals to avoid

> + * the defer queue from growing too big.

> + *

> + * This API is not multi-thread safe. It is expected that the caller

> + * provides multi-thread safety by locking a mutex or some other means.

> + *

> + * A lock free multi-thread writer algorithm could achieve multi-thread

> + * safety by creating and using one defer queue per thread.

> + *

> + * @param dq

> + *   Defer queue to allocate an entry from.

> + * @param e

> + *   Pointer to resource data to copy to the defer queue. The size of

> + *   the data to copy is equal to the element size provided when the

> + *   defer queue was created.

> + * @return

> + *   On success - 0

> + *   On error - 1 with rte_errno set to

> + *   - EINVAL - NULL parameters are passed

> + *   - ENOSPC - Defer queue is full. This condition can not happen

> + *		if the defer queue size is equal (or larger) than the

> + *		number of elements in the data structure.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Reclaim resources from the defer queue.

> + *

> + * This API is not multi-thread safe. It is expected that the caller

> + * provides multi-thread safety by locking a mutex or some other means.

> + *

> + * A lock free multi-thread writer algorithm could achieve multi-thread

> + * safety by creating and using one defer queue per thread.

> + *

> + * @param dq

> + *   Defer queue to reclaim an entry from.

> + * @return

> + *   On successful reclamation of at least 1 resource - 0

> + *   On error - 1 with rte_errno set to

> + *   - EINVAL - NULL parameters are passed

> + *   - EAGAIN - None of the resources have completed at least 1 grace period,

> + *		try again.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Delete a defer queue.

> + *

> + * It tries to reclaim all the resources on the defer queue.

> + * If any of the resources have not completed the grace period

> + * the reclamation stops and returns immediately. The rest of

> + * the resources are not reclaimed and the defer queue is not

> + * freed.

> + *

> + * @param dq

> + *   Defer queue to delete.

> + * @return

> + *   On success - 0

> + *   On error - 1

> + *   Possible rte_errno codes are:

> + *   - EINVAL - NULL parameters are passed

> + *   - EAGAIN - Some of the resources have not completed at least 1 grace

> + *		period, try again.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> +

>  #ifdef __cplusplus

>  }

>  #endif

> diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> new file mode 100644

> index 000000000..2122bc36a

> --- /dev/null

> +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> @@ -0,0 +1,46 @@

> +/* SPDX-License-Identifier: BSD-3-Clause

> + * Copyright (c) 2019 Arm Limited

> + */

> +

> +#ifndef _RTE_RCU_QSBR_PVT_H_

> +#define _RTE_RCU_QSBR_PVT_H_

> +

> +/**

> + * This file is private to the RCU library. It should not be included

> + * by the user of this library.

> + */

> +

> +#ifdef __cplusplus

> +extern "C" {

> +#endif

> +

> +#include "rte_rcu_qsbr.h"

> +

> +/* RTE defer queue structure.

> + * This structure holds the defer queue. The defer queue is used to

> + * hold the deleted entries from the data structure that are not

> + * yet freed.

> + */

> +struct rte_rcu_qsbr_dq {

> +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> +	uint32_t size;

> +	/**< Number of elements in the defer queue */

> +	uint32_t esize;

> +	/**< Size (in bytes) of data stored on the defer queue */

> +	rte_rcu_qsbr_free_resource f;

> +	/**< Function to call to free the resource. */

> +	void *p;

> +	/**< Pointer passed to the free function. Typically, this is the

> +	 *   pointer to the data structure to which the resource to free

> +	 *   belongs.

> +	 */

> +	char e[0];

> +	/**< Temporary storage to copy the defer queue element. */

> +};

> +

> +#ifdef __cplusplus

> +}

> +#endif

> +

> +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> diff --git a/lib/librte_rcu/rte_rcu_version.map b/lib/librte_rcu/rte_rcu_version.map

> index f8b9ef2ab..dfac88a37 100644

> --- a/lib/librte_rcu/rte_rcu_version.map

> +++ b/lib/librte_rcu/rte_rcu_version.map

> @@ -8,6 +8,10 @@ EXPERIMENTAL {

>  	rte_rcu_qsbr_synchronize;

>  	rte_rcu_qsbr_thread_register;

>  	rte_rcu_qsbr_thread_unregister;

> +	rte_rcu_qsbr_dq_create;

> +	rte_rcu_qsbr_dq_enqueue;

> +	rte_rcu_qsbr_dq_reclaim;

> +	rte_rcu_qsbr_dq_delete;

> 

>  	local: *;

>  };

> diff --git a/lib/meson.build b/lib/meson.build

> index e5ff83893..0e1be8407 100644

> --- a/lib/meson.build

> +++ b/lib/meson.build

> @@ -11,7 +11,9 @@

>  libraries = [

>  	'kvargs', # eal depends on kvargs

>  	'eal', # everything depends on eal

> -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> +	'ring',

> +	'rcu', # rcu depends on ring

> +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

>  	'cmdline',

>  	'metrics', # bitrate/latency stats depends on this

>  	'hash',    # efd depends on this

> @@ -22,7 +24,7 @@ libraries = [

>  	'gro', 'gso', 'ip_frag', 'jobstats',

>  	'kni', 'latencystats', 'lpm', 'member',

>  	'power', 'pdump', 'rawdev',

> -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> +	'reorder', 'sched', 'security', 'stack', 'vhost',

>  	# ipsec lib depends on net, crypto and security

>  	'ipsec',

>  	# add pkt framework libs which use other libs from above

> --

> 2.17.1

Honnappa Nagarahalli Oct. 3, 2019, 6:29 a.m. UTC | #3

> 

> Hi Honnappa,

Thanks Konstantin for the feedback.

> 

> 

> > Add resource reclamation APIs to make it simple for applications and

> > libraries to integrate rte_rcu library.

> >

> > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> > Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> > ---

> >  app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

> >  lib/librte_rcu/meson.build         |   2 +

> >  lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

> >  lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

> >  lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

> >  lib/librte_rcu/rte_rcu_version.map |   4 +

> >  lib/meson.build                    |   6 +-

> >  7 files changed, 700 insertions(+), 3 deletions(-)  create mode

> > 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

> >

> > diff --git a/lib/librte_rcu/rte_rcu_qsbr.c

> > b/lib/librte_rcu/rte_rcu_qsbr.c index ce7f93dd3..76814f50b 100644

> > --- a/lib/librte_rcu/rte_rcu_qsbr.c

> > +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> > @@ -21,6 +21,7 @@

> >  #include <rte_errno.h>

> >

> >  #include "rte_rcu_qsbr.h"

> > +#include "rte_rcu_qsbr_pvt.h"

> >

> >  /* Get the memory size of QSBR variable */  size_t @@ -267,6 +268,190

> > @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)

> >  	return 0;

> >  }

> >

> > +/* Create a queue used to store the data structure elements that can

> > + * be freed later. This queue is referred to as 'defer queue'.

> > + */

> > +struct rte_rcu_qsbr_dq *

> > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > +*params) {

> > +	struct rte_rcu_qsbr_dq *dq;

> > +	uint32_t qs_fifo_size;

> > +

> > +	if (params == NULL || params->f == NULL ||

> > +		params->v == NULL || params->name == NULL ||

> > +		params->size == 0 || params->esize == 0 ||

> > +		(params->esize % 8 != 0)) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return NULL;

> > +	}

> > +

> > +	dq = rte_zmalloc(NULL,

> > +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> > +		RTE_CACHE_LINE_SIZE);

> > +	if (dq == NULL) {

> > +		rte_errno = ENOMEM;

> > +

> > +		return NULL;

> > +	}

> > +

> > +	/* round up qs_fifo_size to next power of two that is not less than

> > +	 * max_size.

> > +	 */

> > +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> > +					* params->size) + 1);

> > +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> > +					SOCKET_ID_ANY, 0);

> 

> If it is going to be not MT safe, then why not to create the ring with

> (RING_F_SP_ENQ | RING_F_SC_DEQ) flags set?

Agree.

> Though I think it could be changed to allow MT safe multiple enqeue/single

> dequeue, see below.

The MT safe issue is due to reclaim code. The reclaim code has the following sequence:

rte_ring_peek
rte_rcu_qsbr_check
rte_ring_dequeue

This entire sequence needs to be atomic as the entry cannot be dequeued without knowing that the grace period for that entry is over. Note that due to optimizations in rte_rcu_qsbr_check API, this sequence should not be large in most cases. I do not have ideas on how to make this sequence lock-free.

If the writer is on the control plane, most use cases will use mutex locks for synchronization if they are multi-threaded. That lock should be enough to provide the thread safety for these APIs.

If the writer is multi-threaded and lock-free, then one should use per thread defer queue.

> 

> > +	if (dq->r == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): defer queue create failed\n", __func__);

> > +		rte_free(dq);

> > +		return NULL;

> > +	}

> > +

> > +	dq->v = params->v;

> > +	dq->size = params->size;

> > +	dq->esize = params->esize;

> > +	dq->f = params->f;

> > +	dq->p = params->p;

> > +

> > +	return dq;

> > +}

> > +

> > +/* Enqueue one resource to the defer queue to free after the grace

> > + * period is over.

> > + */

> > +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e) {

> > +	uint64_t token;

> > +	uint64_t *tmp;

> > +	uint32_t i;

> > +	uint32_t cur_size, free_size;

> > +

> > +	if (dq == NULL || e == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return 1;

> 

> Why just not to return -EINVAL straightway?

> I think there is no much point to set rte_errno in that function at all, just

> return value should do.

I am trying to keep these consistent with the existing APIs. They return 0 or 1 and set the rte_errno.

> 

> > +	}

> > +

> > +	/* Start the grace period */

> > +	token = rte_rcu_qsbr_start(dq->v);

> > +

> > +	/* Reclaim resources if the queue is 1/8th full. This helps

> > +	 * the queue from growing too large and allows time for reader

> > +	 * threads to report their quiescent state.

> > +	 */

> > +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

> 

> Probably would be a bit easier if you just store in dq->esize (elt size + token

> size) / 8.

Agree

> 

> > +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

> 

> Why to make this threshold value hard-coded?

> Why either not to put it into create parameter, or just return a special return

> value, to indicate that threshold is reached?

My thinking was to keep the programming interface easy to use. The more the parameters, the more painful it is for the user. IMO, the constants chosen should be good enough for most cases. More advanced users could modify the constants. However, we could make these as part of the parameters, but make them optional for the user. For ex: if they set them to 0, default values can be used.

> Or even return number of filled/free entroes on success, so caller can decide

> to reclaim or not based on that information on his own?

This means more code on the user side. I think adding these to parameters seems like a better option.

> 

> > +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > +			"%s(): Triggering reclamation\n", __func__);

> > +		rte_rcu_qsbr_dq_reclaim(dq);

> > +	}

> > +

> > +	/* Check if there is space for atleast for 1 resource */

> > +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> > +	if (!free_size) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Defer queue is full\n", __func__);

> > +		rte_errno = ENOSPC;

> > +		return 1;

> > +	}

> > +

> > +	/* Enqueue the resource */

> > +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> > +

> > +	/* The resource to enqueue needs to be a multiple of 64b

> > +	 * due to the limitation of the rte_ring implementation.

> > +	 */

> > +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> > +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

> 

> 

> That whole construction above looks a bit clumsy and error prone...

> I suppose just:

> 

> const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t free, n; ...

> n = rte_ring_enqueue_bulk(dq->r, e, nb_elt, &free); if (n == 0)

Yes, bulk enqueue can be used. But note that once the flexible element size ring patch is done, this code will use that.

>   return -ENOSPC;

> return free;

> 

> That way I think you can have MT-safe version of that function.

Please see the description of MT safe issue above.

> 

> > +

> > +	return 0;

> > +}

> > +

> > +/* Reclaim resources from the defer queue. */ int

> > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

> > +	uint32_t max_cnt;

> > +	uint32_t cnt;

> > +	void *token;

> > +	uint64_t *tmp;

> > +	uint32_t i;

> > +

> > +	if (dq == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return 1;

> 

> Same story as above - I think rte_errno is excessive in this function.

> Just return value should be enough.

> 

> 

> > +	}

> > +

> > +	/* Anything to reclaim? */

> > +	if (rte_ring_count(dq->r) == 0)

> > +		return 0;

> 

> Not sure you need that, see below.

> 

> > +

> > +	/* Reclaim at the max 1/16th the total number of entries. */

> > +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> > +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> 

> Again why not to make max_cnt a configurable at create() parameter?

I think making this as an optional parameter for creating defer queue is a better option.

> Or even a parameter for that function?

> 

> > +	cnt = 0;

> > +

> > +	/* Check reader threads quiescent state and reclaim resources */

> > +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> > +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> > +			== 1)) {

> 

> 

> > +		(void)rte_ring_sc_dequeue(dq->r, &token);

> > +		/* The resource to dequeue needs to be a multiple of 64b

> > +		 * due to the limitation of the rte_ring implementation.

> > +		 */

> > +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> > +			i++, tmp++)

> > +			(void)rte_ring_sc_dequeue(dq->r,

> > +					(void *)(uintptr_t)tmp);

> 

> Again, no need for such constructs with multiple dequeuer I believe.

> Just:

> 

> const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t n; uintptr_t

> elt[nb_elt]; ...

> n = rte_ring_dequeue_bulk(dq->r, elt, nb_elt, NULL); if (n != 0) {dq->f(dq->p,

> elt);}

Agree on bulk API use.

> 

> Seems enough.

> Again in that case you can have enqueue/reclaim running in different threads

> simultaneously, plus you don't need dq->e at all.

Will check on dq->e

> 

> > +		dq->f(dq->p, dq->e);

> > +

> > +		cnt++;

> > +	}

> > +

> > +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> > +

> > +	if (cnt == 0) {

> > +		/* No resources were reclaimed */

> > +		rte_errno = EAGAIN;

> > +		return 1;

> > +	}

> > +

> > +	return 0;

> 

> I'd suggest to return cnt on success.

I am trying to keep the APIs simple. I do not see much use for 'cnt' as return value to the user. It exposes more details which I think are internal to the library.

> 

> > +}

> > +

> > +/* Delete a defer queue. */

> > +int

> > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

> > +	if (dq == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return 1;

> > +	}

> > +

> > +	/* Reclaim all the resources */

> > +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> > +		/* Error number is already set by the reclaim API */

> > +		return 1;

> 

> How do you know that you have reclaimed everything?

Good point, will come back with a different solution.

> 

> > +

> > +	rte_ring_free(dq->r);

> > +	rte_free(dq);

> > +

> > +	return 0;

> > +}

> > +

> >  int rte_rcu_log_type;

> >

> >  RTE_INIT(rte_rcu_register)

> > diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

> > b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a 100644

> > --- a/lib/librte_rcu/rte_rcu_qsbr.h

> > +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> > @@ -34,6 +34,7 @@ extern "C" {

> >  #include <rte_lcore.h>

> >  #include <rte_debug.h>

> >  #include <rte_atomic.h>

> > +#include <rte_ring.h>

> >

> >  extern int rte_rcu_log_type;

> >

> > @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

> >  	 */

> >  } __rte_cache_aligned;

> >

> > +/**

> > + * Call back function called to free the resources.

> > + *

> > + * @param p

> > + *   Pointer provided while creating the defer queue

> > + * @param e

> > + *   Pointer to the resource data stored on the defer queue

> > + *

> > + * @return

> > + *   None

> > + */

> > +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> 

> Stylish thing - usually in DPDK we have typedf newtype_t ...

> Though I am not sure you need a new typedef at all - just a function pointer

> inside the struct seems enough.

Other libraries (for ex: rte_hash) use this approach. I think it is better to keep it out of the structure to allow for better commenting.

> 

> > +

> > +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> > +

> > +/**

> > + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> > + */

> > +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> > +

> > +/**

> > + *  Reclaim at the max 1/16th the total number of resources.

> > + */

> > +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> 

> 

> As I said above, I don't think these thresholds need to be hardcoded.

> In any case, there seems not much point to put them in the public header file.

> 

> > +

> > +/**

> > + * Parameters used when creating the defer queue.

> > + */

> > +struct rte_rcu_qsbr_dq_parameters {

> > +	const char *name;

> > +	/**< Name of the queue. */

> > +	uint32_t size;

> > +	/**< Number of entries in queue. Typically, this will be

> > +	 *   the same as the maximum number of entries supported in the

> > +	 *   lock free data structure.

> > +	 *   Data structures with unbounded number of entries is not

> > +	 *   supported currently.

> > +	 */

> > +	uint32_t esize;

> > +	/**< Size (in bytes) of each element in the defer queue.

> > +	 *   This has to be multiple of 8B as the rte_ring APIs

> > +	 *   support 8B element sizes only.

> > +	 */

> > +	rte_rcu_qsbr_free_resource f;

> > +	/**< Function to call to free the resource. */

> > +	void *p;

> 

> Style nit again - I like short names myself, but that seems a bit extreme... :)

> Might be at least:

> void (*reclaim)(void *, void *);

May be 'free_fn'?

> void * reclaim_data;

> ?

This is the pointer to the data structure to free the resource into. For ex: In LPM data structure, it will be pointer to LPM. 'reclaim_data' does not convey the meaning correctly.

> 

> > +	/**< Pointer passed to the free function. Typically, this is the

> > +	 *   pointer to the data structure to which the resource to free

> > +	 *   belongs. This can be NULL.

> > +	 */

> > +	struct rte_rcu_qsbr *v;

> 

> Does it need to be inside that struct?

> Might be better:

> rte_rcu_qsbr_dq_create(struct rte_rcu_qsbr *v, const struct

> rte_rcu_qsbr_dq_parameters *params);

The API takes a parameter structure as input anyway, why to add another argument to the function? QSBR variable is also another parameter.

> 

> Another alternative: make both reclaim() and enqueue() to take v as a

> parameter.

But both of them need access to some of the parameters provided in rte_rcu_qsbr_dq_create API. We would end up passing 2 arguments to the functions.

> 

> > +	/**< RCU QSBR variable to use for this defer queue */ };

> > +

> > +/* RTE defer queue structure.

> > + * This structure holds the defer queue. The defer queue is used to

> > + * hold the deleted entries from the data structure that are not

> > + * yet freed.

> > + */

> > +struct rte_rcu_qsbr_dq;

> > +

> >  /**

> >   * @warning

> >   * @b EXPERIMENTAL: this API may change without prior notice @@

> > -648,6 +710,113 @@ __rte_experimental  int  rte_rcu_qsbr_dump(FILE *f,

> > struct rte_rcu_qsbr *v);

> >

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Create a queue used to store the data structure elements that can

> > + * be freed later. This queue is referred to as 'defer queue'.

> > + *

> > + * @param params

> > + *   Parameters to create a defer queue.

> > + * @return

> > + *   On success - Valid pointer to defer queue

> > + *   On error - NULL

> > + *   Possible rte_errno codes are:

> > + *   - EINVAL - NULL parameters are passed

> > + *   - ENOMEM - Not enough memory

> > + */

> > +__rte_experimental

> > +struct rte_rcu_qsbr_dq *

> > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > +*params);

> > +

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Enqueue one resource to the defer queue and start the grace period.

> > + * The resource will be freed later after at least one grace period

> > + * is over.

> > + *

> > + * If the defer queue is full, it will attempt to reclaim resources.

> > + * It will also reclaim resources at regular intervals to avoid

> > + * the defer queue from growing too big.

> > + *

> > + * This API is not multi-thread safe. It is expected that the caller

> > + * provides multi-thread safety by locking a mutex or some other means.

> > + *

> > + * A lock free multi-thread writer algorithm could achieve

> > +multi-thread

> > + * safety by creating and using one defer queue per thread.

> > + *

> > + * @param dq

> > + *   Defer queue to allocate an entry from.

> > + * @param e

> > + *   Pointer to resource data to copy to the defer queue. The size of

> > + *   the data to copy is equal to the element size provided when the

> > + *   defer queue was created.

> > + * @return

> > + *   On success - 0

> > + *   On error - 1 with rte_errno set to

> > + *   - EINVAL - NULL parameters are passed

> > + *   - ENOSPC - Defer queue is full. This condition can not happen

> > + *		if the defer queue size is equal (or larger) than the

> > + *		number of elements in the data structure.

> > + */

> > +__rte_experimental

> > +int

> > +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> > +

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Reclaim resources from the defer queue.

> > + *

> > + * This API is not multi-thread safe. It is expected that the caller

> > + * provides multi-thread safety by locking a mutex or some other means.

> > + *

> > + * A lock free multi-thread writer algorithm could achieve

> > +multi-thread

> > + * safety by creating and using one defer queue per thread.

> > + *

> > + * @param dq

> > + *   Defer queue to reclaim an entry from.

> > + * @return

> > + *   On successful reclamation of at least 1 resource - 0

> > + *   On error - 1 with rte_errno set to

> > + *   - EINVAL - NULL parameters are passed

> > + *   - EAGAIN - None of the resources have completed at least 1 grace

> period,

> > + *		try again.

> > + */

> > +__rte_experimental

> > +int

> > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> > +

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Delete a defer queue.

> > + *

> > + * It tries to reclaim all the resources on the defer queue.

> > + * If any of the resources have not completed the grace period

> > + * the reclamation stops and returns immediately. The rest of

> > + * the resources are not reclaimed and the defer queue is not

> > + * freed.

> > + *

> > + * @param dq

> > + *   Defer queue to delete.

> > + * @return

> > + *   On success - 0

> > + *   On error - 1

> > + *   Possible rte_errno codes are:

> > + *   - EINVAL - NULL parameters are passed

> > + *   - EAGAIN - Some of the resources have not completed at least 1 grace

> > + *		period, try again.

> > + */

> > +__rte_experimental

> > +int

> > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> > +

> >  #ifdef __cplusplus

> >  }

> >  #endif

> > diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > new file mode 100644

> > index 000000000..2122bc36a

> > --- /dev/null

> > +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> 

> Again style suggestion: as it is not public header - don't use rte_ prefix for

> naming.

> From my perspective - easier to relalize for reader what is public header,

> what is not.

Looks like the guidelines are not defined very well. I see one private file with rte_ prefix. I see Stephen not using rte_ prefix. I do not have any preference. But, a consistent approach is required.

> 

> > @@ -0,0 +1,46 @@

> > +/* SPDX-License-Identifier: BSD-3-Clause

> > + * Copyright (c) 2019 Arm Limited

> > + */

> > +

> > +#ifndef _RTE_RCU_QSBR_PVT_H_

> > +#define _RTE_RCU_QSBR_PVT_H_

> > +

> > +/**

> > + * This file is private to the RCU library. It should not be included

> > + * by the user of this library.

> > + */

> > +

> > +#ifdef __cplusplus

> > +extern "C" {

> > +#endif

> > +

> > +#include "rte_rcu_qsbr.h"

> > +

> > +/* RTE defer queue structure.

> > + * This structure holds the defer queue. The defer queue is used to

> > + * hold the deleted entries from the data structure that are not

> > + * yet freed.

> > + */

> > +struct rte_rcu_qsbr_dq {

> > +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> > +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> > +	uint32_t size;

> > +	/**< Number of elements in the defer queue */

> > +	uint32_t esize;

> > +	/**< Size (in bytes) of data stored on the defer queue */

> > +	rte_rcu_qsbr_free_resource f;

> > +	/**< Function to call to free the resource. */

> > +	void *p;

> > +	/**< Pointer passed to the free function. Typically, this is the

> > +	 *   pointer to the data structure to which the resource to free

> > +	 *   belongs.

> > +	 */

> > +	char e[0];

> > +	/**< Temporary storage to copy the defer queue element. */

> 

> Do you really need 'e' at all?

> Can't it be just temporary stack variable?

Ok, will check.

> 

> > +};

> > +

> > +#ifdef __cplusplus

> > +}

> > +#endif

> > +

> > +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> > diff --git a/lib/librte_rcu/rte_rcu_version.map

> > b/lib/librte_rcu/rte_rcu_version.map

> > index f8b9ef2ab..dfac88a37 100644

> > --- a/lib/librte_rcu/rte_rcu_version.map

> > +++ b/lib/librte_rcu/rte_rcu_version.map

> > @@ -8,6 +8,10 @@ EXPERIMENTAL {

> >  	rte_rcu_qsbr_synchronize;

> >  	rte_rcu_qsbr_thread_register;

> >  	rte_rcu_qsbr_thread_unregister;

> > +	rte_rcu_qsbr_dq_create;

> > +	rte_rcu_qsbr_dq_enqueue;

> > +	rte_rcu_qsbr_dq_reclaim;

> > +	rte_rcu_qsbr_dq_delete;

> >

> >  	local: *;

> >  };

> > diff --git a/lib/meson.build b/lib/meson.build index

> > e5ff83893..0e1be8407 100644

> > --- a/lib/meson.build

> > +++ b/lib/meson.build

> > @@ -11,7 +11,9 @@

> >  libraries = [

> >  	'kvargs', # eal depends on kvargs

> >  	'eal', # everything depends on eal

> > -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > +	'ring',

> > +	'rcu', # rcu depends on ring

> > +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> >  	'cmdline',

> >  	'metrics', # bitrate/latency stats depends on this

> >  	'hash',    # efd depends on this

> > @@ -22,7 +24,7 @@ libraries = [

> >  	'gro', 'gso', 'ip_frag', 'jobstats',

> >  	'kni', 'latencystats', 'lpm', 'member',

> >  	'power', 'pdump', 'rawdev',

> > -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> > +	'reorder', 'sched', 'security', 'stack', 'vhost',

> >  	# ipsec lib depends on net, crypto and security

> >  	'ipsec',

> >  	# add pkt framework libs which use other libs from above

> > --

> > 2.17.1

Honnappa Nagarahalli Oct. 3, 2019, 6:42 a.m. UTC | #4

> 

> > +

> > +/* Reclaim resources from the defer queue. */ int

> > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

> > +	uint32_t max_cnt;

> > +	uint32_t cnt;

> > +	void *token;

> > +	uint64_t *tmp;

> > +	uint32_t i;

> > +

> > +	if (dq == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return 1;

> > +	}

> > +

> > +	/* Anything to reclaim? */

> > +	if (rte_ring_count(dq->r) == 0)

> > +		return 0;

> > +

> > +	/* Reclaim at the max 1/16th the total number of entries. */

> > +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> > +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> > +	cnt = 0;

> > +

> > +	/* Check reader threads quiescent state and reclaim resources */

> > +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> > +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> 

> One more thing I forgot to ask - how this construct supposed to work on 32

> bit machines?

> peek() will return 32-bit value, while  qsbr_check() operates with 64bit

> tokens...

> As I understand in that case you need to peek() 2 elems.

Yes, that is the intention. Ring APIs with desired element size will help address the 32b machines.

> Might work, but still think better to introduce serialize version of

> ring_dequeue() See my other mail about re_ring_peek().

> 

> 

> > +			== 1)) {

> > +		(void)rte_ring_sc_dequeue(dq->r, &token);

> > +		/* The resource to dequeue needs to be a multiple of 64b

> > +		 * due to the limitation of the rte_ring implementation.

> > +		 */

> > +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> > +			i++, tmp++)

> > +			(void)rte_ring_sc_dequeue(dq->r,

> > +					(void *)(uintptr_t)tmp);

> > +		dq->f(dq->p, dq->e);

> > +

> > +		cnt++;

> > +	}

> > +

> > +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> > +

> > +	if (cnt == 0) {

> > +		/* No resources were reclaimed */

> > +		rte_errno = EAGAIN;

> > +		return 1;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +/* Delete a defer queue. */

> > +int

> > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

> > +	if (dq == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return 1;

> > +	}

> > +

> > +	/* Reclaim all the resources */

> > +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> > +		/* Error number is already set by the reclaim API */

> > +		return 1;

> > +

> > +	rte_ring_free(dq->r);

> > +	rte_free(dq);

> > +

> > +	return 0;

> > +}

> > +

> >  int rte_rcu_log_type;

> >

> >  RTE_INIT(rte_rcu_register)

> > diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

> > b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a 100644

> > --- a/lib/librte_rcu/rte_rcu_qsbr.h

> > +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> > @@ -34,6 +34,7 @@ extern "C" {

> >  #include <rte_lcore.h>

> >  #include <rte_debug.h>

> >  #include <rte_atomic.h>

> > +#include <rte_ring.h>

> >

> >  extern int rte_rcu_log_type;

> >

> > @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

> >  	 */

> >  } __rte_cache_aligned;

> >

> > +/**

> > + * Call back function called to free the resources.

> > + *

> > + * @param p

> > + *   Pointer provided while creating the defer queue

> > + * @param e

> > + *   Pointer to the resource data stored on the defer queue

> > + *

> > + * @return

> > + *   None

> > + */

> > +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> > +

> > +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> > +

> > +/**

> > + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> > + */

> > +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> > +

> > +/**

> > + *  Reclaim at the max 1/16th the total number of resources.

> > + */

> > +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> > +

> > +/**

> > + * Parameters used when creating the defer queue.

> > + */

> > +struct rte_rcu_qsbr_dq_parameters {

> > +	const char *name;

> > +	/**< Name of the queue. */

> > +	uint32_t size;

> > +	/**< Number of entries in queue. Typically, this will be

> > +	 *   the same as the maximum number of entries supported in the

> > +	 *   lock free data structure.

> > +	 *   Data structures with unbounded number of entries is not

> > +	 *   supported currently.

> > +	 */

> > +	uint32_t esize;

> > +	/**< Size (in bytes) of each element in the defer queue.

> > +	 *   This has to be multiple of 8B as the rte_ring APIs

> > +	 *   support 8B element sizes only.

> > +	 */

> > +	rte_rcu_qsbr_free_resource f;

> > +	/**< Function to call to free the resource. */

> > +	void *p;

> > +	/**< Pointer passed to the free function. Typically, this is the

> > +	 *   pointer to the data structure to which the resource to free

> > +	 *   belongs. This can be NULL.

> > +	 */

> > +	struct rte_rcu_qsbr *v;

> > +	/**< RCU QSBR variable to use for this defer queue */ };

> > +

> > +/* RTE defer queue structure.

> > + * This structure holds the defer queue. The defer queue is used to

> > + * hold the deleted entries from the data structure that are not

> > + * yet freed.

> > + */

> > +struct rte_rcu_qsbr_dq;

> > +

> >  /**

> >   * @warning

> >   * @b EXPERIMENTAL: this API may change without prior notice @@

> > -648,6 +710,113 @@ __rte_experimental  int  rte_rcu_qsbr_dump(FILE *f,

> > struct rte_rcu_qsbr *v);

> >

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Create a queue used to store the data structure elements that can

> > + * be freed later. This queue is referred to as 'defer queue'.

> > + *

> > + * @param params

> > + *   Parameters to create a defer queue.

> > + * @return

> > + *   On success - Valid pointer to defer queue

> > + *   On error - NULL

> > + *   Possible rte_errno codes are:

> > + *   - EINVAL - NULL parameters are passed

> > + *   - ENOMEM - Not enough memory

> > + */

> > +__rte_experimental

> > +struct rte_rcu_qsbr_dq *

> > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > +*params);

> > +

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Enqueue one resource to the defer queue and start the grace period.

> > + * The resource will be freed later after at least one grace period

> > + * is over.

> > + *

> > + * If the defer queue is full, it will attempt to reclaim resources.

> > + * It will also reclaim resources at regular intervals to avoid

> > + * the defer queue from growing too big.

> > + *

> > + * This API is not multi-thread safe. It is expected that the caller

> > + * provides multi-thread safety by locking a mutex or some other means.

> > + *

> > + * A lock free multi-thread writer algorithm could achieve

> > +multi-thread

> > + * safety by creating and using one defer queue per thread.

> > + *

> > + * @param dq

> > + *   Defer queue to allocate an entry from.

> > + * @param e

> > + *   Pointer to resource data to copy to the defer queue. The size of

> > + *   the data to copy is equal to the element size provided when the

> > + *   defer queue was created.

> > + * @return

> > + *   On success - 0

> > + *   On error - 1 with rte_errno set to

> > + *   - EINVAL - NULL parameters are passed

> > + *   - ENOSPC - Defer queue is full. This condition can not happen

> > + *		if the defer queue size is equal (or larger) than the

> > + *		number of elements in the data structure.

> > + */

> > +__rte_experimental

> > +int

> > +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> > +

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Reclaim resources from the defer queue.

> > + *

> > + * This API is not multi-thread safe. It is expected that the caller

> > + * provides multi-thread safety by locking a mutex or some other means.

> > + *

> > + * A lock free multi-thread writer algorithm could achieve

> > +multi-thread

> > + * safety by creating and using one defer queue per thread.

> > + *

> > + * @param dq

> > + *   Defer queue to reclaim an entry from.

> > + * @return

> > + *   On successful reclamation of at least 1 resource - 0

> > + *   On error - 1 with rte_errno set to

> > + *   - EINVAL - NULL parameters are passed

> > + *   - EAGAIN - None of the resources have completed at least 1 grace

> period,

> > + *		try again.

> > + */

> > +__rte_experimental

> > +int

> > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> > +

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Delete a defer queue.

> > + *

> > + * It tries to reclaim all the resources on the defer queue.

> > + * If any of the resources have not completed the grace period

> > + * the reclamation stops and returns immediately. The rest of

> > + * the resources are not reclaimed and the defer queue is not

> > + * freed.

> > + *

> > + * @param dq

> > + *   Defer queue to delete.

> > + * @return

> > + *   On success - 0

> > + *   On error - 1

> > + *   Possible rte_errno codes are:

> > + *   - EINVAL - NULL parameters are passed

> > + *   - EAGAIN - Some of the resources have not completed at least 1 grace

> > + *		period, try again.

> > + */

> > +__rte_experimental

> > +int

> > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> > +

> >  #ifdef __cplusplus

> >  }

> >  #endif

> > diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > new file mode 100644

> > index 000000000..2122bc36a

> > --- /dev/null

> > +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > @@ -0,0 +1,46 @@

> > +/* SPDX-License-Identifier: BSD-3-Clause

> > + * Copyright (c) 2019 Arm Limited

> > + */

> > +

> > +#ifndef _RTE_RCU_QSBR_PVT_H_

> > +#define _RTE_RCU_QSBR_PVT_H_

> > +

> > +/**

> > + * This file is private to the RCU library. It should not be included

> > + * by the user of this library.

> > + */

> > +

> > +#ifdef __cplusplus

> > +extern "C" {

> > +#endif

> > +

> > +#include "rte_rcu_qsbr.h"

> > +

> > +/* RTE defer queue structure.

> > + * This structure holds the defer queue. The defer queue is used to

> > + * hold the deleted entries from the data structure that are not

> > + * yet freed.

> > + */

> > +struct rte_rcu_qsbr_dq {

> > +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> > +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> > +	uint32_t size;

> > +	/**< Number of elements in the defer queue */

> > +	uint32_t esize;

> > +	/**< Size (in bytes) of data stored on the defer queue */

> > +	rte_rcu_qsbr_free_resource f;

> > +	/**< Function to call to free the resource. */

> > +	void *p;

> > +	/**< Pointer passed to the free function. Typically, this is the

> > +	 *   pointer to the data structure to which the resource to free

> > +	 *   belongs.

> > +	 */

> > +	char e[0];

> > +	/**< Temporary storage to copy the defer queue element. */ };

> > +

> > +#ifdef __cplusplus

> > +}

> > +#endif

> > +

> > +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> > diff --git a/lib/librte_rcu/rte_rcu_version.map

> > b/lib/librte_rcu/rte_rcu_version.map

> > index f8b9ef2ab..dfac88a37 100644

> > --- a/lib/librte_rcu/rte_rcu_version.map

> > +++ b/lib/librte_rcu/rte_rcu_version.map

> > @@ -8,6 +8,10 @@ EXPERIMENTAL {

> >  	rte_rcu_qsbr_synchronize;

> >  	rte_rcu_qsbr_thread_register;

> >  	rte_rcu_qsbr_thread_unregister;

> > +	rte_rcu_qsbr_dq_create;

> > +	rte_rcu_qsbr_dq_enqueue;

> > +	rte_rcu_qsbr_dq_reclaim;

> > +	rte_rcu_qsbr_dq_delete;

> >

> >  	local: *;

> >  };

> > diff --git a/lib/meson.build b/lib/meson.build index

> > e5ff83893..0e1be8407 100644

> > --- a/lib/meson.build

> > +++ b/lib/meson.build

> > @@ -11,7 +11,9 @@

> >  libraries = [

> >  	'kvargs', # eal depends on kvargs

> >  	'eal', # everything depends on eal

> > -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > +	'ring',

> > +	'rcu', # rcu depends on ring

> > +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> >  	'cmdline',

> >  	'metrics', # bitrate/latency stats depends on this

> >  	'hash',    # efd depends on this

> > @@ -22,7 +24,7 @@ libraries = [

> >  	'gro', 'gso', 'ip_frag', 'jobstats',

> >  	'kni', 'latencystats', 'lpm', 'member',

> >  	'power', 'pdump', 'rawdev',

> > -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> > +	'reorder', 'sched', 'security', 'stack', 'vhost',

> >  	# ipsec lib depends on net, crypto and security

> >  	'ipsec',

> >  	# add pkt framework libs which use other libs from above

> > --

> > 2.17.1

Ananyev, Konstantin Oct. 3, 2019, 11:52 a.m. UTC | #5

> -----Original Message-----

> From: Honnappa Nagarahalli [mailto:Honnappa.Nagarahalli@arm.com]

> Sent: Thursday, October 3, 2019 7:42 AM

> To: Ananyev, Konstantin <konstantin.ananyev@intel.com>; stephen@networkplumber.org; paulmck@linux.ibm.com

> Cc: Wang, Yipeng1 <yipeng1.wang@intel.com>; Medvedkin, Vladimir <vladimir.medvedkin@intel.com>; Ruifeng Wang (Arm Technology

> China) <Ruifeng.Wang@arm.com>; Dharmik Thakkar <Dharmik.Thakkar@arm.com>; Honnappa Nagarahalli

> <Honnappa.Nagarahalli@arm.com>; dev@dpdk.org; nd <nd@arm.com>; nd <nd@arm.com>

> Subject: RE: [PATCH v3 2/3] lib/rcu: add resource reclamation APIs

> 

> >

> > > +

> > > +/* Reclaim resources from the defer queue. */ int

> > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

> > > +	uint32_t max_cnt;

> > > +	uint32_t cnt;

> > > +	void *token;

> > > +	uint64_t *tmp;

> > > +	uint32_t i;

> > > +

> > > +	if (dq == NULL) {

> > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > +			"%s(): Invalid input parameter\n", __func__);

> > > +		rte_errno = EINVAL;

> > > +

> > > +		return 1;

> > > +	}

> > > +

> > > +	/* Anything to reclaim? */

> > > +	if (rte_ring_count(dq->r) == 0)

> > > +		return 0;

> > > +

> > > +	/* Reclaim at the max 1/16th the total number of entries. */

> > > +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> > > +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> > > +	cnt = 0;

> > > +

> > > +	/* Check reader threads quiescent state and reclaim resources */

> > > +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> > > +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> >

> > One more thing I forgot to ask - how this construct supposed to work on 32

> > bit machines?

> > peek() will return 32-bit value, while  qsbr_check() operates with 64bit

> > tokens...

> > As I understand in that case you need to peek() 2 elems.

> Yes, that is the intention. Ring APIs with desired element size will help address the 32b machines.


Or serialized dequeue :)

> 

> > Might work, but still think better to introduce serialize version of

> > ring_dequeue() See my other mail about re_ring_peek().

> >

> >

> > > +			== 1)) {

> > > +		(void)rte_ring_sc_dequeue(dq->r, &token);

> > > +		/* The resource to dequeue needs to be a multiple of 64b

> > > +		 * due to the limitation of the rte_ring implementation.

> > > +		 */

> > > +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> > > +			i++, tmp++)

> > > +			(void)rte_ring_sc_dequeue(dq->r,

> > > +					(void *)(uintptr_t)tmp);

> > > +		dq->f(dq->p, dq->e);

> > > +

> > > +		cnt++;

> > > +	}

> > > +

> > > +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > > +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> > > +

> > > +	if (cnt == 0) {

> > > +		/* No resources were reclaimed */

> > > +		rte_errno = EAGAIN;

> > > +		return 1;

> > > +	}

> > > +

> > > +	return 0;

> > > +}

> > > +

> > > +/* Delete a defer queue. */

> > > +int

> > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

> > > +	if (dq == NULL) {

> > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > +			"%s(): Invalid input parameter\n", __func__);

> > > +		rte_errno = EINVAL;

> > > +

> > > +		return 1;

> > > +	}

> > > +

> > > +	/* Reclaim all the resources */

> > > +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> > > +		/* Error number is already set by the reclaim API */

> > > +		return 1;

> > > +

> > > +	rte_ring_free(dq->r);

> > > +	rte_free(dq);

> > > +

> > > +	return 0;

> > > +}

> > > +

> > >  int rte_rcu_log_type;

> > >

> > >  RTE_INIT(rte_rcu_register)

> > > diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

> > > b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a 100644

> > > --- a/lib/librte_rcu/rte_rcu_qsbr.h

> > > +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> > > @@ -34,6 +34,7 @@ extern "C" {

> > >  #include <rte_lcore.h>

> > >  #include <rte_debug.h>

> > >  #include <rte_atomic.h>

> > > +#include <rte_ring.h>

> > >

> > >  extern int rte_rcu_log_type;

> > >

> > > @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

> > >  	 */

> > >  } __rte_cache_aligned;

> > >

> > > +/**

> > > + * Call back function called to free the resources.

> > > + *

> > > + * @param p

> > > + *   Pointer provided while creating the defer queue

> > > + * @param e

> > > + *   Pointer to the resource data stored on the defer queue

> > > + *

> > > + * @return

> > > + *   None

> > > + */

> > > +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> > > +

> > > +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> > > +

> > > +/**

> > > + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> > > + */

> > > +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> > > +

> > > +/**

> > > + *  Reclaim at the max 1/16th the total number of resources.

> > > + */

> > > +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> > > +

> > > +/**

> > > + * Parameters used when creating the defer queue.

> > > + */

> > > +struct rte_rcu_qsbr_dq_parameters {

> > > +	const char *name;

> > > +	/**< Name of the queue. */

> > > +	uint32_t size;

> > > +	/**< Number of entries in queue. Typically, this will be

> > > +	 *   the same as the maximum number of entries supported in the

> > > +	 *   lock free data structure.

> > > +	 *   Data structures with unbounded number of entries is not

> > > +	 *   supported currently.

> > > +	 */

> > > +	uint32_t esize;

> > > +	/**< Size (in bytes) of each element in the defer queue.

> > > +	 *   This has to be multiple of 8B as the rte_ring APIs

> > > +	 *   support 8B element sizes only.

> > > +	 */

> > > +	rte_rcu_qsbr_free_resource f;

> > > +	/**< Function to call to free the resource. */

> > > +	void *p;

> > > +	/**< Pointer passed to the free function. Typically, this is the

> > > +	 *   pointer to the data structure to which the resource to free

> > > +	 *   belongs. This can be NULL.

> > > +	 */

> > > +	struct rte_rcu_qsbr *v;

> > > +	/**< RCU QSBR variable to use for this defer queue */ };

> > > +

> > > +/* RTE defer queue structure.

> > > + * This structure holds the defer queue. The defer queue is used to

> > > + * hold the deleted entries from the data structure that are not

> > > + * yet freed.

> > > + */

> > > +struct rte_rcu_qsbr_dq;

> > > +

> > >  /**

> > >   * @warning

> > >   * @b EXPERIMENTAL: this API may change without prior notice @@

> > > -648,6 +710,113 @@ __rte_experimental  int  rte_rcu_qsbr_dump(FILE *f,

> > > struct rte_rcu_qsbr *v);

> > >

> > > +/**

> > > + * @warning

> > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > + *

> > > + * Create a queue used to store the data structure elements that can

> > > + * be freed later. This queue is referred to as 'defer queue'.

> > > + *

> > > + * @param params

> > > + *   Parameters to create a defer queue.

> > > + * @return

> > > + *   On success - Valid pointer to defer queue

> > > + *   On error - NULL

> > > + *   Possible rte_errno codes are:

> > > + *   - EINVAL - NULL parameters are passed

> > > + *   - ENOMEM - Not enough memory

> > > + */

> > > +__rte_experimental

> > > +struct rte_rcu_qsbr_dq *

> > > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > > +*params);

> > > +

> > > +/**

> > > + * @warning

> > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > + *

> > > + * Enqueue one resource to the defer queue and start the grace period.

> > > + * The resource will be freed later after at least one grace period

> > > + * is over.

> > > + *

> > > + * If the defer queue is full, it will attempt to reclaim resources.

> > > + * It will also reclaim resources at regular intervals to avoid

> > > + * the defer queue from growing too big.

> > > + *

> > > + * This API is not multi-thread safe. It is expected that the caller

> > > + * provides multi-thread safety by locking a mutex or some other means.

> > > + *

> > > + * A lock free multi-thread writer algorithm could achieve

> > > +multi-thread

> > > + * safety by creating and using one defer queue per thread.

> > > + *

> > > + * @param dq

> > > + *   Defer queue to allocate an entry from.

> > > + * @param e

> > > + *   Pointer to resource data to copy to the defer queue. The size of

> > > + *   the data to copy is equal to the element size provided when the

> > > + *   defer queue was created.

> > > + * @return

> > > + *   On success - 0

> > > + *   On error - 1 with rte_errno set to

> > > + *   - EINVAL - NULL parameters are passed

> > > + *   - ENOSPC - Defer queue is full. This condition can not happen

> > > + *		if the defer queue size is equal (or larger) than the

> > > + *		number of elements in the data structure.

> > > + */

> > > +__rte_experimental

> > > +int

> > > +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> > > +

> > > +/**

> > > + * @warning

> > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > + *

> > > + * Reclaim resources from the defer queue.

> > > + *

> > > + * This API is not multi-thread safe. It is expected that the caller

> > > + * provides multi-thread safety by locking a mutex or some other means.

> > > + *

> > > + * A lock free multi-thread writer algorithm could achieve

> > > +multi-thread

> > > + * safety by creating and using one defer queue per thread.

> > > + *

> > > + * @param dq

> > > + *   Defer queue to reclaim an entry from.

> > > + * @return

> > > + *   On successful reclamation of at least 1 resource - 0

> > > + *   On error - 1 with rte_errno set to

> > > + *   - EINVAL - NULL parameters are passed

> > > + *   - EAGAIN - None of the resources have completed at least 1 grace

> > period,

> > > + *		try again.

> > > + */

> > > +__rte_experimental

> > > +int

> > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> > > +

> > > +/**

> > > + * @warning

> > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > + *

> > > + * Delete a defer queue.

> > > + *

> > > + * It tries to reclaim all the resources on the defer queue.

> > > + * If any of the resources have not completed the grace period

> > > + * the reclamation stops and returns immediately. The rest of

> > > + * the resources are not reclaimed and the defer queue is not

> > > + * freed.

> > > + *

> > > + * @param dq

> > > + *   Defer queue to delete.

> > > + * @return

> > > + *   On success - 0

> > > + *   On error - 1

> > > + *   Possible rte_errno codes are:

> > > + *   - EINVAL - NULL parameters are passed

> > > + *   - EAGAIN - Some of the resources have not completed at least 1 grace

> > > + *		period, try again.

> > > + */

> > > +__rte_experimental

> > > +int

> > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> > > +

> > >  #ifdef __cplusplus

> > >  }

> > >  #endif

> > > diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > new file mode 100644

> > > index 000000000..2122bc36a

> > > --- /dev/null

> > > +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > @@ -0,0 +1,46 @@

> > > +/* SPDX-License-Identifier: BSD-3-Clause

> > > + * Copyright (c) 2019 Arm Limited

> > > + */

> > > +

> > > +#ifndef _RTE_RCU_QSBR_PVT_H_

> > > +#define _RTE_RCU_QSBR_PVT_H_

> > > +

> > > +/**

> > > + * This file is private to the RCU library. It should not be included

> > > + * by the user of this library.

> > > + */

> > > +

> > > +#ifdef __cplusplus

> > > +extern "C" {

> > > +#endif

> > > +

> > > +#include "rte_rcu_qsbr.h"

> > > +

> > > +/* RTE defer queue structure.

> > > + * This structure holds the defer queue. The defer queue is used to

> > > + * hold the deleted entries from the data structure that are not

> > > + * yet freed.

> > > + */

> > > +struct rte_rcu_qsbr_dq {

> > > +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> > > +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> > > +	uint32_t size;

> > > +	/**< Number of elements in the defer queue */

> > > +	uint32_t esize;

> > > +	/**< Size (in bytes) of data stored on the defer queue */

> > > +	rte_rcu_qsbr_free_resource f;

> > > +	/**< Function to call to free the resource. */

> > > +	void *p;

> > > +	/**< Pointer passed to the free function. Typically, this is the

> > > +	 *   pointer to the data structure to which the resource to free

> > > +	 *   belongs.

> > > +	 */

> > > +	char e[0];

> > > +	/**< Temporary storage to copy the defer queue element. */ };

> > > +

> > > +#ifdef __cplusplus

> > > +}

> > > +#endif

> > > +

> > > +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> > > diff --git a/lib/librte_rcu/rte_rcu_version.map

> > > b/lib/librte_rcu/rte_rcu_version.map

> > > index f8b9ef2ab..dfac88a37 100644

> > > --- a/lib/librte_rcu/rte_rcu_version.map

> > > +++ b/lib/librte_rcu/rte_rcu_version.map

> > > @@ -8,6 +8,10 @@ EXPERIMENTAL {

> > >  	rte_rcu_qsbr_synchronize;

> > >  	rte_rcu_qsbr_thread_register;

> > >  	rte_rcu_qsbr_thread_unregister;

> > > +	rte_rcu_qsbr_dq_create;

> > > +	rte_rcu_qsbr_dq_enqueue;

> > > +	rte_rcu_qsbr_dq_reclaim;

> > > +	rte_rcu_qsbr_dq_delete;

> > >

> > >  	local: *;

> > >  };

> > > diff --git a/lib/meson.build b/lib/meson.build index

> > > e5ff83893..0e1be8407 100644

> > > --- a/lib/meson.build

> > > +++ b/lib/meson.build

> > > @@ -11,7 +11,9 @@

> > >  libraries = [

> > >  	'kvargs', # eal depends on kvargs

> > >  	'eal', # everything depends on eal

> > > -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > > +	'ring',

> > > +	'rcu', # rcu depends on ring

> > > +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > >  	'cmdline',

> > >  	'metrics', # bitrate/latency stats depends on this

> > >  	'hash',    # efd depends on this

> > > @@ -22,7 +24,7 @@ libraries = [

> > >  	'gro', 'gso', 'ip_frag', 'jobstats',

> > >  	'kni', 'latencystats', 'lpm', 'member',

> > >  	'power', 'pdump', 'rawdev',

> > > -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> > > +	'reorder', 'sched', 'security', 'stack', 'vhost',

> > >  	# ipsec lib depends on net, crypto and security

> > >  	'ipsec',

> > >  	# add pkt framework libs which use other libs from above

> > > --

> > > 2.17.1

Ananyev, Konstantin Oct. 3, 2019, 12:26 p.m. UTC | #6

Hi Honnappa,

> > > Add resource reclamation APIs to make it simple for applications and

> > > libraries to integrate rte_rcu library.

> > >

> > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> > > Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> > > ---

> > >  app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

> > >  lib/librte_rcu/meson.build         |   2 +

> > >  lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

> > >  lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

> > >  lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

> > >  lib/librte_rcu/rte_rcu_version.map |   4 +

> > >  lib/meson.build                    |   6 +-

> > >  7 files changed, 700 insertions(+), 3 deletions(-)  create mode

> > > 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > >

> > > diff --git a/lib/librte_rcu/rte_rcu_qsbr.c

> > > b/lib/librte_rcu/rte_rcu_qsbr.c index ce7f93dd3..76814f50b 100644

> > > --- a/lib/librte_rcu/rte_rcu_qsbr.c

> > > +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> > > @@ -21,6 +21,7 @@

> > >  #include <rte_errno.h>

> > >

> > >  #include "rte_rcu_qsbr.h"

> > > +#include "rte_rcu_qsbr_pvt.h"

> > >

> > >  /* Get the memory size of QSBR variable */  size_t @@ -267,6 +268,190

> > > @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)

> > >  	return 0;

> > >  }

> > >

> > > +/* Create a queue used to store the data structure elements that can

> > > + * be freed later. This queue is referred to as 'defer queue'.

> > > + */

> > > +struct rte_rcu_qsbr_dq *

> > > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > > +*params) {

> > > +	struct rte_rcu_qsbr_dq *dq;

> > > +	uint32_t qs_fifo_size;

> > > +

> > > +	if (params == NULL || params->f == NULL ||

> > > +		params->v == NULL || params->name == NULL ||

> > > +		params->size == 0 || params->esize == 0 ||

> > > +		(params->esize % 8 != 0)) {

> > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > +			"%s(): Invalid input parameter\n", __func__);

> > > +		rte_errno = EINVAL;

> > > +

> > > +		return NULL;

> > > +	}

> > > +

> > > +	dq = rte_zmalloc(NULL,

> > > +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> > > +		RTE_CACHE_LINE_SIZE);

> > > +	if (dq == NULL) {

> > > +		rte_errno = ENOMEM;

> > > +

> > > +		return NULL;

> > > +	}

> > > +

> > > +	/* round up qs_fifo_size to next power of two that is not less than

> > > +	 * max_size.

> > > +	 */

> > > +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> > > +					* params->size) + 1);

> > > +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> > > +					SOCKET_ID_ANY, 0);

> >

> > If it is going to be not MT safe, then why not to create the ring with

> > (RING_F_SP_ENQ | RING_F_SC_DEQ) flags set?

> Agree.

> 

> > Though I think it could be changed to allow MT safe multiple enqeue/single

> > dequeue, see below.

> The MT safe issue is due to reclaim code. The reclaim code has the following sequence:

> 

> rte_ring_peek

> rte_rcu_qsbr_check

> rte_ring_dequeue

> 

> This entire sequence needs to be atomic as the entry cannot be dequeued without knowing that the grace period for that entry is over.


I understand that, though I believe at least it should be possible to support multiple-enqueue/single dequeuer and reclaim mode.
With serialized dequeue() even multiple dequeue should be possible.

> Note that due to optimizations in rte_rcu_qsbr_check API, this sequence should not be large in most cases. I do not have ideas on how to

> make this sequence lock-free.

> 

> If the writer is on the control plane, most use cases will use mutex locks for synchronization if they are multi-threaded. That lock should be

> enough to provide the thread safety for these APIs.


In that is case, why do we need ring at all?
For sure people can create their own queue quite easily with mutex and TAILQ.
If performance is not an issue, they can even add pthread_cond to it, and have an ability
for the consumer to sleep/wakeup on empty/full queue. 

> 

> If the writer is multi-threaded and lock-free, then one should use per thread defer queue.


If that's the only working model, then the question is why do we need that API at all?
Just simple array with counter or linked-list should do for majority of cases.

> 

> >

> > > +	if (dq->r == NULL) {

> > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > +			"%s(): defer queue create failed\n", __func__);

> > > +		rte_free(dq);

> > > +		return NULL;

> > > +	}

> > > +

> > > +	dq->v = params->v;

> > > +	dq->size = params->size;

> > > +	dq->esize = params->esize;

> > > +	dq->f = params->f;

> > > +	dq->p = params->p;

> > > +

> > > +	return dq;

> > > +}

> > > +

> > > +/* Enqueue one resource to the defer queue to free after the grace

> > > + * period is over.

> > > + */

> > > +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e) {

> > > +	uint64_t token;

> > > +	uint64_t *tmp;

> > > +	uint32_t i;

> > > +	uint32_t cur_size, free_size;

> > > +

> > > +	if (dq == NULL || e == NULL) {

> > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > +			"%s(): Invalid input parameter\n", __func__);

> > > +		rte_errno = EINVAL;

> > > +

> > > +		return 1;

> >

> > Why just not to return -EINVAL straightway?

> > I think there is no much point to set rte_errno in that function at all, just

> > return value should do.

> I am trying to keep these consistent with the existing APIs. They return 0 or 1 and set the rte_errno.


A lot of public DPDK API functions do use return value to return status code
(0, or some positive numbers of success, negative errno values on failure),
I am not inventing anything new here.

> 

> >

> > > +	}

> > > +

> > > +	/* Start the grace period */

> > > +	token = rte_rcu_qsbr_start(dq->v);

> > > +

> > > +	/* Reclaim resources if the queue is 1/8th full. This helps

> > > +	 * the queue from growing too large and allows time for reader

> > > +	 * threads to report their quiescent state.

> > > +	 */

> > > +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

> >

> > Probably would be a bit easier if you just store in dq->esize (elt size + token

> > size) / 8.

> Agree

> 

> >

> > > +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

> >

> > Why to make this threshold value hard-coded?

> > Why either not to put it into create parameter, or just return a special return

> > value, to indicate that threshold is reached?

> My thinking was to keep the programming interface easy to use. The more the parameters, the more painful it is for the user. IMO, the

> constants chosen should be good enough for most cases. More advanced users could modify the constants. However, we could make these

> as part of the parameters, but make them optional for the user. For ex: if they set them to 0, default values can be used.

> 

> > Or even return number of filled/free entroes on success, so caller can decide

> > to reclaim or not based on that information on his own?

> This means more code on the user side. 


I personally think it it really wouldn't be that big problem to the user to pass extra parameter to the function.
Again what if user doesn't want to reclaim() in enqueue() thread at all?

> I think adding these to parameters seems like a better option.

> 

> >

> > > +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > > +			"%s(): Triggering reclamation\n", __func__);

> > > +		rte_rcu_qsbr_dq_reclaim(dq);

> > > +	}

> > > +

> > > +	/* Check if there is space for atleast for 1 resource */

> > > +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> > > +	if (!free_size) {

> > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > +			"%s(): Defer queue is full\n", __func__);

> > > +		rte_errno = ENOSPC;

> > > +		return 1;

> > > +	}

> > > +

> > > +	/* Enqueue the resource */

> > > +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> > > +

> > > +	/* The resource to enqueue needs to be a multiple of 64b

> > > +	 * due to the limitation of the rte_ring implementation.

> > > +	 */

> > > +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> > > +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

> >

> >

> > That whole construction above looks a bit clumsy and error prone...

> > I suppose just:

> >

> > const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t free, n; ...

> > n = rte_ring_enqueue_bulk(dq->r, e, nb_elt, &free); if (n == 0)

> Yes, bulk enqueue can be used. But note that once the flexible element size ring patch is done, this code will use that.


Well, when it will be in the mainline, and it would provide a better way,
for sure this code can be updated to use new API (if it is provide some improvements).
But as I udenrstand, right now it is not there, while bulk enqueue/dequeue are.

> 

> >   return -ENOSPC;

> > return free;

> >

> > That way I think you can have MT-safe version of that function.

> Please see the description of MT safe issue above.

> 

> >

> > > +

> > > +	return 0;

> > > +}

> > > +

> > > +/* Reclaim resources from the defer queue. */ int

> > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

> > > +	uint32_t max_cnt;

> > > +	uint32_t cnt;

> > > +	void *token;

> > > +	uint64_t *tmp;

> > > +	uint32_t i;

> > > +

> > > +	if (dq == NULL) {

> > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > +			"%s(): Invalid input parameter\n", __func__);

> > > +		rte_errno = EINVAL;

> > > +

> > > +		return 1;

> >

> > Same story as above - I think rte_errno is excessive in this function.

> > Just return value should be enough.

> >

> >

> > > +	}

> > > +

> > > +	/* Anything to reclaim? */

> > > +	if (rte_ring_count(dq->r) == 0)

> > > +		return 0;

> >

> > Not sure you need that, see below.

> >

> > > +

> > > +	/* Reclaim at the max 1/16th the total number of entries. */

> > > +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> > > +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> >

> > Again why not to make max_cnt a configurable at create() parameter?

> I think making this as an optional parameter for creating defer queue is a better option.

> 

> > Or even a parameter for that function?

> >

> > > +	cnt = 0;

> > > +

> > > +	/* Check reader threads quiescent state and reclaim resources */

> > > +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> > > +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> > > +			== 1)) {

> >

> >

> > > +		(void)rte_ring_sc_dequeue(dq->r, &token);

> > > +		/* The resource to dequeue needs to be a multiple of 64b

> > > +		 * due to the limitation of the rte_ring implementation.

> > > +		 */

> > > +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> > > +			i++, tmp++)

> > > +			(void)rte_ring_sc_dequeue(dq->r,

> > > +					(void *)(uintptr_t)tmp);

> >

> > Again, no need for such constructs with multiple dequeuer I believe.

> > Just:

> >

> > const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t n; uintptr_t

> > elt[nb_elt]; ...

> > n = rte_ring_dequeue_bulk(dq->r, elt, nb_elt, NULL); if (n != 0) {dq->f(dq->p,

> > elt);}

> Agree on bulk API use.

> 

> >

> > Seems enough.

> > Again in that case you can have enqueue/reclaim running in different threads

> > simultaneously, plus you don't need dq->e at all.

> Will check on dq->e

> 

> >

> > > +		dq->f(dq->p, dq->e);

> > > +

> > > +		cnt++;

> > > +	}

> > > +

> > > +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > > +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> > > +

> > > +	if (cnt == 0) {

> > > +		/* No resources were reclaimed */

> > > +		rte_errno = EAGAIN;

> > > +		return 1;

> > > +	}

> > > +

> > > +	return 0;

> >

> > I'd suggest to return cnt on success.

> I am trying to keep the APIs simple. I do not see much use for 'cnt' as return value to the user. It exposes more details which I think are

> internal to the library.


Not sure what is the hassle to return number of completed reclamaitions?
If user doesn't need that information, he simply wouldn't use it.
But might be it would be usefull - he can decide should he try another attempt
of reclaim() immediately or is it ok to do something else.

> 

> >

> > > +}

> > > +

> > > +/* Delete a defer queue. */

> > > +int

> > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

> > > +	if (dq == NULL) {

> > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > +			"%s(): Invalid input parameter\n", __func__);

> > > +		rte_errno = EINVAL;

> > > +

> > > +		return 1;

> > > +	}

> > > +

> > > +	/* Reclaim all the resources */

> > > +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> > > +		/* Error number is already set by the reclaim API */

> > > +		return 1;

> >

> > How do you know that you have reclaimed everything?

> Good point, will come back with a different solution.

> 

> >

> > > +

> > > +	rte_ring_free(dq->r);

> > > +	rte_free(dq);

> > > +

> > > +	return 0;

> > > +}

> > > +

> > >  int rte_rcu_log_type;

> > >

> > >  RTE_INIT(rte_rcu_register)

> > > diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

> > > b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a 100644

> > > --- a/lib/librte_rcu/rte_rcu_qsbr.h

> > > +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> > > @@ -34,6 +34,7 @@ extern "C" {

> > >  #include <rte_lcore.h>

> > >  #include <rte_debug.h>

> > >  #include <rte_atomic.h>

> > > +#include <rte_ring.h>

> > >

> > >  extern int rte_rcu_log_type;

> > >

> > > @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

> > >  	 */

> > >  } __rte_cache_aligned;

> > >

> > > +/**

> > > + * Call back function called to free the resources.

> > > + *

> > > + * @param p

> > > + *   Pointer provided while creating the defer queue

> > > + * @param e

> > > + *   Pointer to the resource data stored on the defer queue

> > > + *

> > > + * @return

> > > + *   None

> > > + */

> > > +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> >

> > Stylish thing - usually in DPDK we have typedf newtype_t ...

> > Though I am not sure you need a new typedef at all - just a function pointer

> > inside the struct seems enough.

> Other libraries (for ex: rte_hash) use this approach. I think it is better to keep it out of the structure to allow for better commenting.


I am saying majority of DPDK code use _t suffix for typedef:
typedef void (*rte_rcu_qsbr_free_resource_t)(void *p, void *e);

> 

> >

> > > +

> > > +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> > > +

> > > +/**

> > > + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> > > + */

> > > +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> > > +

> > > +/**

> > > + *  Reclaim at the max 1/16th the total number of resources.

> > > + */

> > > +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> >

> >

> > As I said above, I don't think these thresholds need to be hardcoded.

> > In any case, there seems not much point to put them in the public header file.

> >

> > > +

> > > +/**

> > > + * Parameters used when creating the defer queue.

> > > + */

> > > +struct rte_rcu_qsbr_dq_parameters {

> > > +	const char *name;

> > > +	/**< Name of the queue. */

> > > +	uint32_t size;

> > > +	/**< Number of entries in queue. Typically, this will be

> > > +	 *   the same as the maximum number of entries supported in the

> > > +	 *   lock free data structure.

> > > +	 *   Data structures with unbounded number of entries is not

> > > +	 *   supported currently.

> > > +	 */

> > > +	uint32_t esize;

> > > +	/**< Size (in bytes) of each element in the defer queue.

> > > +	 *   This has to be multiple of 8B as the rte_ring APIs

> > > +	 *   support 8B element sizes only.

> > > +	 */

> > > +	rte_rcu_qsbr_free_resource f;

> > > +	/**< Function to call to free the resource. */

> > > +	void *p;

> >

> > Style nit again - I like short names myself, but that seems a bit extreme... :)

> > Might be at least:

> > void (*reclaim)(void *, void *);

> May be 'free_fn'?

> 

> > void * reclaim_data;

> > ?

> This is the pointer to the data structure to free the resource into. For ex: In LPM data structure, it will be pointer to LPM. 'reclaim_data'

> does not convey the meaning correctly.


Ok, please free to comeup with your own names.
I just wanted to say that 'f' and 'p' are a bit an extreme for public API.

> 

> >

> > > +	/**< Pointer passed to the free function. Typically, this is the

> > > +	 *   pointer to the data structure to which the resource to free

> > > +	 *   belongs. This can be NULL.

> > > +	 */

> > > +	struct rte_rcu_qsbr *v;

> >

> > Does it need to be inside that struct?

> > Might be better:

> > rte_rcu_qsbr_dq_create(struct rte_rcu_qsbr *v, const struct

> > rte_rcu_qsbr_dq_parameters *params);

> The API takes a parameter structure as input anyway, why to add another argument to the function? QSBR variable is also another

> parameter.

> 

> >

> > Another alternative: make both reclaim() and enqueue() to take v as a

> > parameter.

> But both of them need access to some of the parameters provided in rte_rcu_qsbr_dq_create API. We would end up passing 2 arguments to

> the functions.


Pure stylish thing.
From my perspective it just provides better visibility what is going in the code:
For QSBR var 'v' create a new deferred queue.
But no strong opinion here.

> 

> >

> > > +	/**< RCU QSBR variable to use for this defer queue */ };

> > > +

> > > +/* RTE defer queue structure.

> > > + * This structure holds the defer queue. The defer queue is used to

> > > + * hold the deleted entries from the data structure that are not

> > > + * yet freed.

> > > + */

> > > +struct rte_rcu_qsbr_dq;

> > > +

> > >  /**

> > >   * @warning

> > >   * @b EXPERIMENTAL: this API may change without prior notice @@

> > > -648,6 +710,113 @@ __rte_experimental  int  rte_rcu_qsbr_dump(FILE *f,

> > > struct rte_rcu_qsbr *v);

> > >

> > > +/**

> > > + * @warning

> > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > + *

> > > + * Create a queue used to store the data structure elements that can

> > > + * be freed later. This queue is referred to as 'defer queue'.

> > > + *

> > > + * @param params

> > > + *   Parameters to create a defer queue.

> > > + * @return

> > > + *   On success - Valid pointer to defer queue

> > > + *   On error - NULL

> > > + *   Possible rte_errno codes are:

> > > + *   - EINVAL - NULL parameters are passed

> > > + *   - ENOMEM - Not enough memory

> > > + */

> > > +__rte_experimental

> > > +struct rte_rcu_qsbr_dq *

> > > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > > +*params);

> > > +

> > > +/**

> > > + * @warning

> > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > + *

> > > + * Enqueue one resource to the defer queue and start the grace period.

> > > + * The resource will be freed later after at least one grace period

> > > + * is over.

> > > + *

> > > + * If the defer queue is full, it will attempt to reclaim resources.

> > > + * It will also reclaim resources at regular intervals to avoid

> > > + * the defer queue from growing too big.

> > > + *

> > > + * This API is not multi-thread safe. It is expected that the caller

> > > + * provides multi-thread safety by locking a mutex or some other means.

> > > + *

> > > + * A lock free multi-thread writer algorithm could achieve

> > > +multi-thread

> > > + * safety by creating and using one defer queue per thread.

> > > + *

> > > + * @param dq

> > > + *   Defer queue to allocate an entry from.

> > > + * @param e

> > > + *   Pointer to resource data to copy to the defer queue. The size of

> > > + *   the data to copy is equal to the element size provided when the

> > > + *   defer queue was created.

> > > + * @return

> > > + *   On success - 0

> > > + *   On error - 1 with rte_errno set to

> > > + *   - EINVAL - NULL parameters are passed

> > > + *   - ENOSPC - Defer queue is full. This condition can not happen

> > > + *		if the defer queue size is equal (or larger) than the

> > > + *		number of elements in the data structure.

> > > + */

> > > +__rte_experimental

> > > +int

> > > +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> > > +

> > > +/**

> > > + * @warning

> > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > + *

> > > + * Reclaim resources from the defer queue.

> > > + *

> > > + * This API is not multi-thread safe. It is expected that the caller

> > > + * provides multi-thread safety by locking a mutex or some other means.

> > > + *

> > > + * A lock free multi-thread writer algorithm could achieve

> > > +multi-thread

> > > + * safety by creating and using one defer queue per thread.

> > > + *

> > > + * @param dq

> > > + *   Defer queue to reclaim an entry from.

> > > + * @return

> > > + *   On successful reclamation of at least 1 resource - 0

> > > + *   On error - 1 with rte_errno set to

> > > + *   - EINVAL - NULL parameters are passed

> > > + *   - EAGAIN - None of the resources have completed at least 1 grace

> > period,

> > > + *		try again.

> > > + */

> > > +__rte_experimental

> > > +int

> > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> > > +

> > > +/**

> > > + * @warning

> > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > + *

> > > + * Delete a defer queue.

> > > + *

> > > + * It tries to reclaim all the resources on the defer queue.

> > > + * If any of the resources have not completed the grace period

> > > + * the reclamation stops and returns immediately. The rest of

> > > + * the resources are not reclaimed and the defer queue is not

> > > + * freed.

> > > + *

> > > + * @param dq

> > > + *   Defer queue to delete.

> > > + * @return

> > > + *   On success - 0

> > > + *   On error - 1

> > > + *   Possible rte_errno codes are:

> > > + *   - EINVAL - NULL parameters are passed

> > > + *   - EAGAIN - Some of the resources have not completed at least 1 grace

> > > + *		period, try again.

> > > + */

> > > +__rte_experimental

> > > +int

> > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> > > +

> > >  #ifdef __cplusplus

> > >  }

> > >  #endif

> > > diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > new file mode 100644

> > > index 000000000..2122bc36a

> > > --- /dev/null

> > > +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> >

> > Again style suggestion: as it is not public header - don't use rte_ prefix for

> > naming.

> > From my perspective - easier to relalize for reader what is public header,

> > what is not.

> Looks like the guidelines are not defined very well. I see one private file with rte_ prefix. I see Stephen not using rte_ prefix. I do not have

> any preference. But, a consistent approach is required.


That's just a suggestion.
For me (and I hope for others) it would be a bit easier.
When looking at the code for first time I had to look a t meson.build to check
is it a public header or not.
If the file doesn't have 'rte_' prefix, I assume that it is an internal one straightway.
But , as you said, there is no exact guidelines here, so up to you to decide.

> 

> >

> > > @@ -0,0 +1,46 @@

> > > +/* SPDX-License-Identifier: BSD-3-Clause

> > > + * Copyright (c) 2019 Arm Limited

> > > + */

> > > +

> > > +#ifndef _RTE_RCU_QSBR_PVT_H_

> > > +#define _RTE_RCU_QSBR_PVT_H_

> > > +

> > > +/**

> > > + * This file is private to the RCU library. It should not be included

> > > + * by the user of this library.

> > > + */

> > > +

> > > +#ifdef __cplusplus

> > > +extern "C" {

> > > +#endif

> > > +

> > > +#include "rte_rcu_qsbr.h"

> > > +

> > > +/* RTE defer queue structure.

> > > + * This structure holds the defer queue. The defer queue is used to

> > > + * hold the deleted entries from the data structure that are not

> > > + * yet freed.

> > > + */

> > > +struct rte_rcu_qsbr_dq {

> > > +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> > > +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> > > +	uint32_t size;

> > > +	/**< Number of elements in the defer queue */

> > > +	uint32_t esize;

> > > +	/**< Size (in bytes) of data stored on the defer queue */

> > > +	rte_rcu_qsbr_free_resource f;

> > > +	/**< Function to call to free the resource. */

> > > +	void *p;

> > > +	/**< Pointer passed to the free function. Typically, this is the

> > > +	 *   pointer to the data structure to which the resource to free

> > > +	 *   belongs.

> > > +	 */

> > > +	char e[0];

> > > +	/**< Temporary storage to copy the defer queue element. */

> >

> > Do you really need 'e' at all?

> > Can't it be just temporary stack variable?

> Ok, will check.

> 

> >

> > > +};

> > > +

> > > +#ifdef __cplusplus

> > > +}

> > > +#endif

> > > +

> > > +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> > > diff --git a/lib/librte_rcu/rte_rcu_version.map

> > > b/lib/librte_rcu/rte_rcu_version.map

> > > index f8b9ef2ab..dfac88a37 100644

> > > --- a/lib/librte_rcu/rte_rcu_version.map

> > > +++ b/lib/librte_rcu/rte_rcu_version.map

> > > @@ -8,6 +8,10 @@ EXPERIMENTAL {

> > >  	rte_rcu_qsbr_synchronize;

> > >  	rte_rcu_qsbr_thread_register;

> > >  	rte_rcu_qsbr_thread_unregister;

> > > +	rte_rcu_qsbr_dq_create;

> > > +	rte_rcu_qsbr_dq_enqueue;

> > > +	rte_rcu_qsbr_dq_reclaim;

> > > +	rte_rcu_qsbr_dq_delete;

> > >

> > >  	local: *;

> > >  };

> > > diff --git a/lib/meson.build b/lib/meson.build index

> > > e5ff83893..0e1be8407 100644

> > > --- a/lib/meson.build

> > > +++ b/lib/meson.build

> > > @@ -11,7 +11,9 @@

> > >  libraries = [

> > >  	'kvargs', # eal depends on kvargs

> > >  	'eal', # everything depends on eal

> > > -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > > +	'ring',

> > > +	'rcu', # rcu depends on ring

> > > +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > >  	'cmdline',

> > >  	'metrics', # bitrate/latency stats depends on this

> > >  	'hash',    # efd depends on this

> > > @@ -22,7 +24,7 @@ libraries = [

> > >  	'gro', 'gso', 'ip_frag', 'jobstats',

> > >  	'kni', 'latencystats', 'lpm', 'member',

> > >  	'power', 'pdump', 'rawdev',

> > > -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> > > +	'reorder', 'sched', 'security', 'stack', 'vhost',

> > >  	# ipsec lib depends on net, crypto and security

> > >  	'ipsec',

> > >  	# add pkt framework libs which use other libs from above

> > > --

> > > 2.17.1

Honnappa Nagarahalli Oct. 4, 2019, 6:07 a.m. UTC | #7

> 

> Hi Honnappa,

> 

> > > > Add resource reclamation APIs to make it simple for applications

> > > > and libraries to integrate rte_rcu library.

> > > >

> > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> > > > Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> > > > ---

> > > >  app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

> > > >  lib/librte_rcu/meson.build         |   2 +

> > > >  lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

> > > >  lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

> > > >  lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

> > > >  lib/librte_rcu/rte_rcu_version.map |   4 +

> > > >  lib/meson.build                    |   6 +-

> > > >  7 files changed, 700 insertions(+), 3 deletions(-)  create mode

> > > > 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > >

> > > > diff --git a/lib/librte_rcu/rte_rcu_qsbr.c

> > > > b/lib/librte_rcu/rte_rcu_qsbr.c index ce7f93dd3..76814f50b 100644

> > > > --- a/lib/librte_rcu/rte_rcu_qsbr.c

> > > > +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> > > > @@ -21,6 +21,7 @@

> > > >  #include <rte_errno.h>

> > > >

> > > >  #include "rte_rcu_qsbr.h"

> > > > +#include "rte_rcu_qsbr_pvt.h"

> > > >

> > > >  /* Get the memory size of QSBR variable */  size_t @@ -267,6

> > > > +268,190 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)

> > > >  	return 0;

> > > >  }

> > > >

> > > > +/* Create a queue used to store the data structure elements that

> > > > +can

> > > > + * be freed later. This queue is referred to as 'defer queue'.

> > > > + */

> > > > +struct rte_rcu_qsbr_dq *

> > > > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > > > +*params) {

> > > > +	struct rte_rcu_qsbr_dq *dq;

> > > > +	uint32_t qs_fifo_size;

> > > > +

> > > > +	if (params == NULL || params->f == NULL ||

> > > > +		params->v == NULL || params->name == NULL ||

> > > > +		params->size == 0 || params->esize == 0 ||

> > > > +		(params->esize % 8 != 0)) {

> > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > +		rte_errno = EINVAL;

> > > > +

> > > > +		return NULL;

> > > > +	}

> > > > +

> > > > +	dq = rte_zmalloc(NULL,

> > > > +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> > > > +		RTE_CACHE_LINE_SIZE);

> > > > +	if (dq == NULL) {

> > > > +		rte_errno = ENOMEM;

> > > > +

> > > > +		return NULL;

> > > > +	}

> > > > +

> > > > +	/* round up qs_fifo_size to next power of two that is not less than

> > > > +	 * max_size.

> > > > +	 */

> > > > +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> > > > +					* params->size) + 1);

> > > > +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> > > > +					SOCKET_ID_ANY, 0);

> > >

> > > If it is going to be not MT safe, then why not to create the ring

> > > with (RING_F_SP_ENQ | RING_F_SC_DEQ) flags set?

> > Agree.

> >

> > > Though I think it could be changed to allow MT safe multiple

> > > enqeue/single dequeue, see below.

> > The MT safe issue is due to reclaim code. The reclaim code has the following

> sequence:

> >

> > rte_ring_peek

> > rte_rcu_qsbr_check

> > rte_ring_dequeue

> >

> > This entire sequence needs to be atomic as the entry cannot be dequeued

> without knowing that the grace period for that entry is over.

> 

> I understand that, though I believe at least it should be possible to support

> multiple-enqueue/single dequeuer and reclaim mode.

> With serialized dequeue() even multiple dequeue should be possible.

Agreed. Please see the response on the other thread.

> 

> > Note that due to optimizations in rte_rcu_qsbr_check API, this

> > sequence should not be large in most cases. I do not have ideas on how to

> make this sequence lock-free.

> >

> > If the writer is on the control plane, most use cases will use mutex

> > locks for synchronization if they are multi-threaded. That lock should be

> enough to provide the thread safety for these APIs.

> 

> In that is case, why do we need ring at all?

> For sure people can create their own queue quite easily with mutex and TAILQ.

> If performance is not an issue, they can even add pthread_cond to it, and have

> an ability for the consumer to sleep/wakeup on empty/full queue.

> 

> >

> > If the writer is multi-threaded and lock-free, then one should use per thread

> defer queue.

> 

> If that's the only working model, then the question is why do we need that API

> at all?

> Just simple array with counter or linked-list should do for majority of cases.

Please see the other thread.

> 

> >

> > >

> > > > +	if (dq->r == NULL) {

> > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > +			"%s(): defer queue create failed\n", __func__);

> > > > +		rte_free(dq);

> > > > +		return NULL;

> > > > +	}

> > > > +

> > > > +	dq->v = params->v;

> > > > +	dq->size = params->size;

> > > > +	dq->esize = params->esize;

> > > > +	dq->f = params->f;

> > > > +	dq->p = params->p;

> > > > +

> > > > +	return dq;

> > > > +}

> > > > +

> > > > +/* Enqueue one resource to the defer queue to free after the

> > > > +grace

> > > > + * period is over.

> > > > + */

> > > > +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e) {

> > > > +	uint64_t token;

> > > > +	uint64_t *tmp;

> > > > +	uint32_t i;

> > > > +	uint32_t cur_size, free_size;

> > > > +

> > > > +	if (dq == NULL || e == NULL) {

> > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > +		rte_errno = EINVAL;

> > > > +

> > > > +		return 1;

> > >

> > > Why just not to return -EINVAL straightway?

> > > I think there is no much point to set rte_errno in that function at

> > > all, just return value should do.

> > I am trying to keep these consistent with the existing APIs. They return 0 or 1

> and set the rte_errno.

> 

> A lot of public DPDK API functions do use return value to return status code (0,

> or some positive numbers of success, negative errno values on failure), I am

> not inventing anything new here.

Agree, you are not proposing a new thing here. May be I was not clear. I really do not have an opinion on how this should be done. But, I do have an opinion on consistency. These new APIs follow what has been done in the existing RCU APIs. I think we have 2 options here.
1) Either we change existing RCU APIs to get rid of rte_errno (is it an ABI change?) or
2) The new APIs follow what has been done in the existing RCU APIs.
I want to make sure we are consistent at least within RCU APIs.

> 

> >

> > >

> > > > +	}

> > > > +

> > > > +	/* Start the grace period */

> > > > +	token = rte_rcu_qsbr_start(dq->v);

> > > > +

> > > > +	/* Reclaim resources if the queue is 1/8th full. This helps

> > > > +	 * the queue from growing too large and allows time for reader

> > > > +	 * threads to report their quiescent state.

> > > > +	 */

> > > > +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

> > >

> > > Probably would be a bit easier if you just store in dq->esize (elt

> > > size + token

> > > size) / 8.

> > Agree

> >

> > >

> > > > +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

> > >

> > > Why to make this threshold value hard-coded?

> > > Why either not to put it into create parameter, or just return a

> > > special return value, to indicate that threshold is reached?

> > My thinking was to keep the programming interface easy to use. The

> > more the parameters, the more painful it is for the user. IMO, the

> > constants chosen should be good enough for most cases. More advanced

> users could modify the constants. However, we could make these as part of the

> parameters, but make them optional for the user. For ex: if they set them to 0,

> default values can be used.

> >

> > > Or even return number of filled/free entroes on success, so caller

> > > can decide to reclaim or not based on that information on his own?

> > This means more code on the user side.

> 

> I personally think it it really wouldn't be that big problem to the user to pass

> extra parameter to the function.

I will convert the 2 constants into optional parameters (user can set them to 0 to make the algorithm use default values)

> Again what if user doesn't want to reclaim() in enqueue() thread at all?

'enqueue' has to do reclamation if the defer queue is full. I do not think this is trivial.

In the current design, reclamation in enqueue is also done on regular basis (automatic triggering of reclamation when the queue reaches certain limit) to keep the queue from growing too large. This is required when we implement a dynamically adjusting defer queue. The current algorithm keeps the cost of reclamation spread across multiple calls and puts an upper bound on cycles for delete API by reclaiming a fixed number of entries.

This algorithm is proven to work in the LPM integration performance tests at a very low performance over head (~1%). So, I do not know why a user would not want to use this. The 2 additional parameters should give the user more flexibility.

However, if the user wants his own algorithm, he can create one with the base APIs provided.

> 

> > I think adding these to parameters seems like a better option.

> >

> > >

> > > > +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > > > +			"%s(): Triggering reclamation\n", __func__);

> > > > +		rte_rcu_qsbr_dq_reclaim(dq);

> > > > +	}

> > > > +

> > > > +	/* Check if there is space for atleast for 1 resource */

> > > > +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> > > > +	if (!free_size) {

> > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > +			"%s(): Defer queue is full\n", __func__);

> > > > +		rte_errno = ENOSPC;

> > > > +		return 1;

> > > > +	}

> > > > +

> > > > +	/* Enqueue the resource */

> > > > +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> > > > +

> > > > +	/* The resource to enqueue needs to be a multiple of 64b

> > > > +	 * due to the limitation of the rte_ring implementation.

> > > > +	 */

> > > > +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> > > > +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

> > >

> > >

> > > That whole construction above looks a bit clumsy and error prone...

> > > I suppose just:

> > >

> > > const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t free, n; ...

> > > n = rte_ring_enqueue_bulk(dq->r, e, nb_elt, &free); if (n == 0)

> > Yes, bulk enqueue can be used. But note that once the flexible element size

> ring patch is done, this code will use that.

> 

> Well, when it will be in the mainline, and it would provide a better way, for sure

> this code can be updated to use new API (if it is provide some improvements).

> But as I udenrstand, right now it is not there, while bulk enqueue/dequeue are.

Apologies, I was not clear. I agree we can go with bulk APIs for now.

> 

> >

> > >   return -ENOSPC;

> > > return free;

> > >

> > > That way I think you can have MT-safe version of that function.

> > Please see the description of MT safe issue above.

> >

> > >

> > > > +

> > > > +	return 0;

> > > > +}

> > > > +

> > > > +/* Reclaim resources from the defer queue. */ int

> > > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

> > > > +	uint32_t max_cnt;

> > > > +	uint32_t cnt;

> > > > +	void *token;

> > > > +	uint64_t *tmp;

> > > > +	uint32_t i;

> > > > +

> > > > +	if (dq == NULL) {

> > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > +		rte_errno = EINVAL;

> > > > +

> > > > +		return 1;

> > >

> > > Same story as above - I think rte_errno is excessive in this function.

> > > Just return value should be enough.

> > >

> > >

> > > > +	}

> > > > +

> > > > +	/* Anything to reclaim? */

> > > > +	if (rte_ring_count(dq->r) == 0)

> > > > +		return 0;

> > >

> > > Not sure you need that, see below.

> > >

> > > > +

> > > > +	/* Reclaim at the max 1/16th the total number of entries. */

> > > > +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> > > > +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> > >

> > > Again why not to make max_cnt a configurable at create() parameter?

> > I think making this as an optional parameter for creating defer queue is a

> better option.

> >

> > > Or even a parameter for that function?

> > >

> > > > +	cnt = 0;

> > > > +

> > > > +	/* Check reader threads quiescent state and reclaim resources */

> > > > +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> > > > +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> > > > +			== 1)) {

> > >

> > >

> > > > +		(void)rte_ring_sc_dequeue(dq->r, &token);

> > > > +		/* The resource to dequeue needs to be a multiple of 64b

> > > > +		 * due to the limitation of the rte_ring implementation.

> > > > +		 */

> > > > +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> > > > +			i++, tmp++)

> > > > +			(void)rte_ring_sc_dequeue(dq->r,

> > > > +					(void *)(uintptr_t)tmp);

> > >

> > > Again, no need for such constructs with multiple dequeuer I believe.

> > > Just:

> > >

> > > const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t n; uintptr_t

> > > elt[nb_elt]; ...

> > > n = rte_ring_dequeue_bulk(dq->r, elt, nb_elt, NULL); if (n != 0)

> > > {dq->f(dq->p, elt);}

> > Agree on bulk API use.

> >

> > >

> > > Seems enough.

> > > Again in that case you can have enqueue/reclaim running in different

> > > threads simultaneously, plus you don't need dq->e at all.

> > Will check on dq->e

> >

> > >

> > > > +		dq->f(dq->p, dq->e);

> > > > +

> > > > +		cnt++;

> > > > +	}

> > > > +

> > > > +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > > > +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> > > > +

> > > > +	if (cnt == 0) {

> > > > +		/* No resources were reclaimed */

> > > > +		rte_errno = EAGAIN;

> > > > +		return 1;

> > > > +	}

> > > > +

> > > > +	return 0;

> > >

> > > I'd suggest to return cnt on success.

> > I am trying to keep the APIs simple. I do not see much use for 'cnt'

> > as return value to the user. It exposes more details which I think are internal

> to the library.

> 

> Not sure what is the hassle to return number of completed reclamaitions?

> If user doesn't need that information, he simply wouldn't use it.

> But might be it would be usefull - he can decide should he try another attempt

> of reclaim() immediately or is it ok to do something else.

There is no hassle to return that information.

As per the current design, user calls 'reclaim' when it is out of resources while adding an entry to the data structure. At that point the user wants to know if at least 1 resource was reclaimed because the user has to allocate 1 resource. He does not have a use for the number of resources reclaimed.

If this API returns 0, then the user can decide to repeat the call or return failure. But that decision depends on the length of the grace period which is under user's control.

> 

> >

> > >

> > > > +}

> > > > +

> > > > +/* Delete a defer queue. */

> > > > +int

> > > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

> > > > +	if (dq == NULL) {

> > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > +		rte_errno = EINVAL;

> > > > +

> > > > +		return 1;

> > > > +	}

> > > > +

> > > > +	/* Reclaim all the resources */

> > > > +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> > > > +		/* Error number is already set by the reclaim API */

> > > > +		return 1;

> > >

> > > How do you know that you have reclaimed everything?

> > Good point, will come back with a different solution.

> >

> > >

> > > > +

> > > > +	rte_ring_free(dq->r);

> > > > +	rte_free(dq);

> > > > +

> > > > +	return 0;

> > > > +}

> > > > +

> > > >  int rte_rcu_log_type;

> > > >

> > > >  RTE_INIT(rte_rcu_register)

> > > > diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

> > > > b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a 100644

> > > > --- a/lib/librte_rcu/rte_rcu_qsbr.h

> > > > +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> > > > @@ -34,6 +34,7 @@ extern "C" {

> > > >  #include <rte_lcore.h>

> > > >  #include <rte_debug.h>

> > > >  #include <rte_atomic.h>

> > > > +#include <rte_ring.h>

> > > >

> > > >  extern int rte_rcu_log_type;

> > > >

> > > > @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

> > > >  	 */

> > > >  } __rte_cache_aligned;

> > > >

> > > > +/**

> > > > + * Call back function called to free the resources.

> > > > + *

> > > > + * @param p

> > > > + *   Pointer provided while creating the defer queue

> > > > + * @param e

> > > > + *   Pointer to the resource data stored on the defer queue

> > > > + *

> > > > + * @return

> > > > + *   None

> > > > + */

> > > > +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> > >

> > > Stylish thing - usually in DPDK we have typedf newtype_t ...

> > > Though I am not sure you need a new typedef at all - just a function

> > > pointer inside the struct seems enough.

> > Other libraries (for ex: rte_hash) use this approach. I think it is better to keep

> it out of the structure to allow for better commenting.

> 

> I am saying majority of DPDK code use _t suffix for typedef:

> typedef void (*rte_rcu_qsbr_free_resource_t)(void *p, void *e);

Apologies, got it, will change.

> 

> >

> > >

> > > > +

> > > > +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> > > > +

> > > > +/**

> > > > + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> > > > + */

> > > > +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> > > > +

> > > > +/**

> > > > + *  Reclaim at the max 1/16th the total number of resources.

> > > > + */

> > > > +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> > >

> > >

> > > As I said above, I don't think these thresholds need to be hardcoded.

> > > In any case, there seems not much point to put them in the public header

> file.

> > >

> > > > +

> > > > +/**

> > > > + * Parameters used when creating the defer queue.

> > > > + */

> > > > +struct rte_rcu_qsbr_dq_parameters {

> > > > +	const char *name;

> > > > +	/**< Name of the queue. */

> > > > +	uint32_t size;

> > > > +	/**< Number of entries in queue. Typically, this will be

> > > > +	 *   the same as the maximum number of entries supported in the

> > > > +	 *   lock free data structure.

> > > > +	 *   Data structures with unbounded number of entries is not

> > > > +	 *   supported currently.

> > > > +	 */

> > > > +	uint32_t esize;

> > > > +	/**< Size (in bytes) of each element in the defer queue.

> > > > +	 *   This has to be multiple of 8B as the rte_ring APIs

> > > > +	 *   support 8B element sizes only.

> > > > +	 */

> > > > +	rte_rcu_qsbr_free_resource f;

> > > > +	/**< Function to call to free the resource. */

> > > > +	void *p;

> > >

> > > Style nit again - I like short names myself, but that seems a bit

> > > extreme... :) Might be at least:

> > > void (*reclaim)(void *, void *);

> > May be 'free_fn'?

> >

> > > void * reclaim_data;

> > > ?

> > This is the pointer to the data structure to free the resource into. For ex: In

> LPM data structure, it will be pointer to LPM. 'reclaim_data'

> > does not convey the meaning correctly.

> 

> Ok, please free to comeup with your own names.

> I just wanted to say that 'f' and 'p' are a bit an extreme for public API.

ok, this is the hardest thing to do 😊

> 

> >

> > >

> > > > +	/**< Pointer passed to the free function. Typically, this is the

> > > > +	 *   pointer to the data structure to which the resource to free

> > > > +	 *   belongs. This can be NULL.

> > > > +	 */

> > > > +	struct rte_rcu_qsbr *v;

> > >

> > > Does it need to be inside that struct?

> > > Might be better:

> > > rte_rcu_qsbr_dq_create(struct rte_rcu_qsbr *v, const struct

> > > rte_rcu_qsbr_dq_parameters *params);

> > The API takes a parameter structure as input anyway, why to add

> > another argument to the function? QSBR variable is also another parameter.

> >

> > >

> > > Another alternative: make both reclaim() and enqueue() to take v as

> > > a parameter.

> > But both of them need access to some of the parameters provided in

> > rte_rcu_qsbr_dq_create API. We would end up passing 2 arguments to the

> functions.

> 

> Pure stylish thing.

> From my perspective it just provides better visibility what is going in the code:

> For QSBR var 'v' create a new deferred queue.

> But no strong opinion here.

> 

> >

> > >

> > > > +	/**< RCU QSBR variable to use for this defer queue */ };

> > > > +

> > > > +/* RTE defer queue structure.

> > > > + * This structure holds the defer queue. The defer queue is used

> > > > +to

> > > > + * hold the deleted entries from the data structure that are not

> > > > + * yet freed.

> > > > + */

> > > > +struct rte_rcu_qsbr_dq;

> > > > +

> > > >  /**

> > > >   * @warning

> > > >   * @b EXPERIMENTAL: this API may change without prior notice @@

> > > > -648,6 +710,113 @@ __rte_experimental  int  rte_rcu_qsbr_dump(FILE

> > > > *f, struct rte_rcu_qsbr *v);

> > > >

> > > > +/**

> > > > + * @warning

> > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > + *

> > > > + * Create a queue used to store the data structure elements that

> > > > +can

> > > > + * be freed later. This queue is referred to as 'defer queue'.

> > > > + *

> > > > + * @param params

> > > > + *   Parameters to create a defer queue.

> > > > + * @return

> > > > + *   On success - Valid pointer to defer queue

> > > > + *   On error - NULL

> > > > + *   Possible rte_errno codes are:

> > > > + *   - EINVAL - NULL parameters are passed

> > > > + *   - ENOMEM - Not enough memory

> > > > + */

> > > > +__rte_experimental

> > > > +struct rte_rcu_qsbr_dq *

> > > > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > > > +*params);

> > > > +

> > > > +/**

> > > > + * @warning

> > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > + *

> > > > + * Enqueue one resource to the defer queue and start the grace period.

> > > > + * The resource will be freed later after at least one grace

> > > > +period

> > > > + * is over.

> > > > + *

> > > > + * If the defer queue is full, it will attempt to reclaim resources.

> > > > + * It will also reclaim resources at regular intervals to avoid

> > > > + * the defer queue from growing too big.

> > > > + *

> > > > + * This API is not multi-thread safe. It is expected that the

> > > > +caller

> > > > + * provides multi-thread safety by locking a mutex or some other means.

> > > > + *

> > > > + * A lock free multi-thread writer algorithm could achieve

> > > > +multi-thread

> > > > + * safety by creating and using one defer queue per thread.

> > > > + *

> > > > + * @param dq

> > > > + *   Defer queue to allocate an entry from.

> > > > + * @param e

> > > > + *   Pointer to resource data to copy to the defer queue. The size of

> > > > + *   the data to copy is equal to the element size provided when the

> > > > + *   defer queue was created.

> > > > + * @return

> > > > + *   On success - 0

> > > > + *   On error - 1 with rte_errno set to

> > > > + *   - EINVAL - NULL parameters are passed

> > > > + *   - ENOSPC - Defer queue is full. This condition can not happen

> > > > + *		if the defer queue size is equal (or larger) than the

> > > > + *		number of elements in the data structure.

> > > > + */

> > > > +__rte_experimental

> > > > +int

> > > > +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> > > > +

> > > > +/**

> > > > + * @warning

> > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > + *

> > > > + * Reclaim resources from the defer queue.

> > > > + *

> > > > + * This API is not multi-thread safe. It is expected that the

> > > > +caller

> > > > + * provides multi-thread safety by locking a mutex or some other means.

> > > > + *

> > > > + * A lock free multi-thread writer algorithm could achieve

> > > > +multi-thread

> > > > + * safety by creating and using one defer queue per thread.

> > > > + *

> > > > + * @param dq

> > > > + *   Defer queue to reclaim an entry from.

> > > > + * @return

> > > > + *   On successful reclamation of at least 1 resource - 0

> > > > + *   On error - 1 with rte_errno set to

> > > > + *   - EINVAL - NULL parameters are passed

> > > > + *   - EAGAIN - None of the resources have completed at least 1 grace

> > > period,

> > > > + *		try again.

> > > > + */

> > > > +__rte_experimental

> > > > +int

> > > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> > > > +

> > > > +/**

> > > > + * @warning

> > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > + *

> > > > + * Delete a defer queue.

> > > > + *

> > > > + * It tries to reclaim all the resources on the defer queue.

> > > > + * If any of the resources have not completed the grace period

> > > > + * the reclamation stops and returns immediately. The rest of

> > > > + * the resources are not reclaimed and the defer queue is not

> > > > + * freed.

> > > > + *

> > > > + * @param dq

> > > > + *   Defer queue to delete.

> > > > + * @return

> > > > + *   On success - 0

> > > > + *   On error - 1

> > > > + *   Possible rte_errno codes are:

> > > > + *   - EINVAL - NULL parameters are passed

> > > > + *   - EAGAIN - Some of the resources have not completed at least 1

> grace

> > > > + *		period, try again.

> > > > + */

> > > > +__rte_experimental

> > > > +int

> > > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> > > > +

> > > >  #ifdef __cplusplus

> > > >  }

> > > >  #endif

> > > > diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > > b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > > new file mode 100644

> > > > index 000000000..2122bc36a

> > > > --- /dev/null

> > > > +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > >

> > > Again style suggestion: as it is not public header - don't use rte_

> > > prefix for naming.

> > > From my perspective - easier to relalize for reader what is public

> > > header, what is not.

> > Looks like the guidelines are not defined very well. I see one private

> > file with rte_ prefix. I see Stephen not using rte_ prefix. I do not have any

> preference. But, a consistent approach is required.

> 

> That's just a suggestion.

> For me (and I hope for others) it would be a bit easier.

> When looking at the code for first time I had to look a t meson.build to check is

> it a public header or not.

> If the file doesn't have 'rte_' prefix, I assume that it is an internal one

> straightway.

> But , as you said, there is no exact guidelines here, so up to you to decide.

I think it makes sense to remove 'rte_' prefix. I will also change the file name to have '_private' suffix.
There are some inconsistencies in the existing code, will send a patch to correct them to follow this approach.

> 

> >

> > >

> > > > @@ -0,0 +1,46 @@

> > > > +/* SPDX-License-Identifier: BSD-3-Clause

> > > > + * Copyright (c) 2019 Arm Limited  */

> > > > +

> > > > +#ifndef _RTE_RCU_QSBR_PVT_H_

> > > > +#define _RTE_RCU_QSBR_PVT_H_

> > > > +

> > > > +/**

> > > > + * This file is private to the RCU library. It should not be

> > > > +included

> > > > + * by the user of this library.

> > > > + */

> > > > +

> > > > +#ifdef __cplusplus

> > > > +extern "C" {

> > > > +#endif

> > > > +

> > > > +#include "rte_rcu_qsbr.h"

> > > > +

> > > > +/* RTE defer queue structure.

> > > > + * This structure holds the defer queue. The defer queue is used

> > > > +to

> > > > + * hold the deleted entries from the data structure that are not

> > > > + * yet freed.

> > > > + */

> > > > +struct rte_rcu_qsbr_dq {

> > > > +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> > > > +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> > > > +	uint32_t size;

> > > > +	/**< Number of elements in the defer queue */

> > > > +	uint32_t esize;

> > > > +	/**< Size (in bytes) of data stored on the defer queue */

> > > > +	rte_rcu_qsbr_free_resource f;

> > > > +	/**< Function to call to free the resource. */

> > > > +	void *p;

> > > > +	/**< Pointer passed to the free function. Typically, this is the

> > > > +	 *   pointer to the data structure to which the resource to free

> > > > +	 *   belongs.

> > > > +	 */

> > > > +	char e[0];

> > > > +	/**< Temporary storage to copy the defer queue element. */

> > >

> > > Do you really need 'e' at all?

> > > Can't it be just temporary stack variable?

> > Ok, will check.

> >

> > >

> > > > +};

> > > > +

> > > > +#ifdef __cplusplus

> > > > +}

> > > > +#endif

> > > > +

> > > > +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> > > > diff --git a/lib/librte_rcu/rte_rcu_version.map

> > > > b/lib/librte_rcu/rte_rcu_version.map

> > > > index f8b9ef2ab..dfac88a37 100644

> > > > --- a/lib/librte_rcu/rte_rcu_version.map

> > > > +++ b/lib/librte_rcu/rte_rcu_version.map

> > > > @@ -8,6 +8,10 @@ EXPERIMENTAL {

> > > >  	rte_rcu_qsbr_synchronize;

> > > >  	rte_rcu_qsbr_thread_register;

> > > >  	rte_rcu_qsbr_thread_unregister;

> > > > +	rte_rcu_qsbr_dq_create;

> > > > +	rte_rcu_qsbr_dq_enqueue;

> > > > +	rte_rcu_qsbr_dq_reclaim;

> > > > +	rte_rcu_qsbr_dq_delete;

> > > >

> > > >  	local: *;

> > > >  };

> > > > diff --git a/lib/meson.build b/lib/meson.build index

> > > > e5ff83893..0e1be8407 100644

> > > > --- a/lib/meson.build

> > > > +++ b/lib/meson.build

> > > > @@ -11,7 +11,9 @@

> > > >  libraries = [

> > > >  	'kvargs', # eal depends on kvargs

> > > >  	'eal', # everything depends on eal

> > > > -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > > > +	'ring',

> > > > +	'rcu', # rcu depends on ring

> > > > +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > > >  	'cmdline',

> > > >  	'metrics', # bitrate/latency stats depends on this

> > > >  	'hash',    # efd depends on this

> > > > @@ -22,7 +24,7 @@ libraries = [

> > > >  	'gro', 'gso', 'ip_frag', 'jobstats',

> > > >  	'kni', 'latencystats', 'lpm', 'member',

> > > >  	'power', 'pdump', 'rawdev',

> > > > -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> > > > +	'reorder', 'sched', 'security', 'stack', 'vhost',

> > > >  	# ipsec lib depends on net, crypto and security

> > > >  	'ipsec',

> > > >  	# add pkt framework libs which use other libs from above

> > > > --

> > > > 2.17.1

Medvedkin, Vladimir Oct. 4, 2019, 7:01 p.m. UTC | #8

Hi Honnappa,

On 01/10/2019 07:29, Honnappa Nagarahalli wrote:
> Add resource reclamation APIs to make it simple for applications

> and libraries to integrate rte_rcu library.

>

> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> ---

>   app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

>   lib/librte_rcu/meson.build         |   2 +

>   lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

>   lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

>   lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

>   lib/librte_rcu/rte_rcu_version.map |   4 +

>   lib/meson.build                    |   6 +-

>   7 files changed, 700 insertions(+), 3 deletions(-)

>   create mode 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

There are compilation errors when building DPDK as a shared library.

I think you need something like:

--- a/lib/librte_rcu/Makefile
+++ b/lib/librte_rcu/Makefile
@@ -8,7 +8,7 @@ LIB = librte_rcu.a

  CFLAGS += -DALLOW_EXPERIMENTAL_API
  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
-LDLIBS += -lrte_eal
+LDLIBS += -lrte_eal -lrte_ring
>

> diff --git a/app/test/test_rcu_qsbr.c b/app/test/test_rcu_qsbr.c

> index d1b9e46a2..3a6815243 100644

> --- a/app/test/test_rcu_qsbr.c

> +++ b/app/test/test_rcu_qsbr.c

I think it's better to split unittests patches and the library patches
> @@ -1,8 +1,9 @@

>   /* SPDX-License-Identifier: BSD-3-Clause

> - * Copyright (c) 2018 Arm Limited

> + * Copyright (c) 2019 Arm Limited

>    */

>   

>   #include <stdio.h>

> +#include <string.h>

>   #include <rte_pause.h>

>   #include <rte_rcu_qsbr.h>

>   #include <rte_hash.h>

> @@ -33,6 +34,7 @@ static uint32_t *keys;

>   #define COUNTER_VALUE 4096

>   static uint32_t *hash_data[RTE_MAX_LCORE][TOTAL_ENTRY];

>   static uint8_t writer_done;

> +static uint8_t cb_failed;

>   

>   static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];

>   struct rte_hash *h[RTE_MAX_LCORE];

> @@ -582,6 +584,269 @@ test_rcu_qsbr_thread_offline(void)

>   	return 0;

>   }

>   

> +static void

> +rte_rcu_qsbr_test_free_resource(void *p, void *e)

This function is not a part of DPDK API so it's better to name it like 
test_rcu_qsbr_free_resource().
> +{

> +	if (p != NULL && e != NULL) {

> +		printf("%s: Test failed\n", __func__);

> +		cb_failed = 1;

> +	}

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_create: create a queue used to store the data structure

> + * elements that can be freed later. This queue is referred to as 'defer queue'.

> + */

> +static int

> +test_rcu_qsbr_dq_create(void)

> +{

> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> +	struct rte_rcu_qsbr_dq_parameters params;

> +	struct rte_rcu_qsbr_dq *dq;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_create()\n");

> +

> +	/* Pass invalid parameters */

> +	dq = rte_rcu_qsbr_dq_create(NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> +	params.name = rcu_dq_name;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	params.f = rte_rcu_qsbr_test_free_resource;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> +	params.v = t[0];

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	params.size = 1;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	params.esize = 3;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	/* Pass all valid parameters */

> +	params.esize = 16;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid params");

> +	rte_rcu_qsbr_dq_delete(dq);

> +

> +	return 0;

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer queue,

> + * to be freed later after atleast one grace period is over.

> + */

> +static int

> +test_rcu_qsbr_dq_enqueue(void)

> +{

> +	int ret;

> +	uint64_t r;

> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> +	struct rte_rcu_qsbr_dq_parameters params;

> +	struct rte_rcu_qsbr_dq *dq;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_enqueue()\n");

> +

> +	/* Create a queue with simple parameters */

> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> +	params.name = rcu_dq_name;

> +	params.f = rte_rcu_qsbr_test_free_resource;

> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> +	params.v = t[0];

> +	params.size = 1;

> +	params.esize = 16;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid params");

> +

> +	/* Pass invalid parameters */

> +	ret = rte_rcu_qsbr_dq_enqueue(NULL, NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid params");

> +

> +	ret = rte_rcu_qsbr_dq_enqueue(dq, NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid params");

> +

> +	ret = rte_rcu_qsbr_dq_enqueue(NULL, &r);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid params");

> +

> +	ret = rte_rcu_qsbr_dq_delete(dq);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 1), "dq delete valid params");

> +

> +	return 0;

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_reclaim: Reclaim resources from the defer queue.

> + */

> +static int

> +test_rcu_qsbr_dq_reclaim(void)

> +{

> +	int ret;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_reclaim()\n");

> +

> +	/* Pass invalid parameters */

> +	ret = rte_rcu_qsbr_dq_reclaim(NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq reclaim invalid params");

> +

> +	return 0;

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_delete: Delete a defer queue.

> + */

> +static int

> +test_rcu_qsbr_dq_delete(void)

> +{

> +	int ret;

> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> +	struct rte_rcu_qsbr_dq_parameters params;

> +	struct rte_rcu_qsbr_dq *dq;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_delete()\n");

> +

> +	/* Pass invalid parameters */

> +	ret = rte_rcu_qsbr_dq_delete(NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq delete invalid params");

> +

> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> +	params.name = rcu_dq_name;

> +	params.f = rte_rcu_qsbr_test_free_resource;

> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> +	params.v = t[0];

> +	params.size = 1;

> +	params.esize = 16;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid params");

> +	ret = rte_rcu_qsbr_dq_delete(dq);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid params");

> +

> +	return 0;

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer queue,

> + * to be freed later after atleast one grace period is over.

> + */

> +static int

> +test_rcu_qsbr_dq_functional(int32_t size, int32_t esize)

> +{

> +	int i, j, ret;

> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> +	struct rte_rcu_qsbr_dq_parameters params;

> +	struct rte_rcu_qsbr_dq *dq;

> +	uint64_t *e;

> +	uint64_t sc = 200;

> +	int max_entries;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_xxx functional tests()\n");

> +	printf("Size = %d, esize = %d\n", size, esize);

> +

> +	e = (uint64_t *)rte_zmalloc(NULL, esize, RTE_CACHE_LINE_SIZE);

> +	if (e == NULL)

> +		return 0;

> +	cb_failed = 0;

> +

> +	/* Initialize the RCU variable. No threads are registered */

> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> +

> +	/* Create a queue with simple parameters */

> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> +	params.name = rcu_dq_name;

> +	params.f = rte_rcu_qsbr_test_free_resource;

> +	params.v = t[0];

> +	params.size = size;

> +	params.esize = esize;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid params");

> +

> +	/* Given the size and esize, calculate the maximum number of entries

> +	 * that can be stored on the defer queue (look at the logic used

> +	 * in capacity calculation of rte_ring).

> +	 */

> +	max_entries = rte_align32pow2(((esize/8 + 1) * size) + 1);

> +	max_entries = (max_entries - 1)/(esize/8 + 1);

> +

> +	/* Enqueue few counters starting with the value 'sc' */

> +	/* The queue size will be rounded up to 2. The enqueue API also

> +	 * reclaims if the queue size is above certain limit. Since, there

> +	 * are no threads registered, reclamation succedes. Hence, it should

> +	 * be possible to enqueue more than the provided queue size.

> +	 */

> +	for (i = 0; i < 10; i++) {

> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> +			"dq enqueue functional");

> +		for (j = 0; j < esize/8; j++)

> +			e[j] = sc++;

> +	}

> +

> +	/* Register a thread on the RCU QSBR variable. Reclamation will not

> +	 * succeed. It should not be possible to enqueue more than the size

> +	 * number of resources.

> +	 */

> +	rte_rcu_qsbr_thread_register(t[0], 1);

> +	rte_rcu_qsbr_thread_online(t[0], 1);

> +

> +	for (i = 0; i < max_entries; i++) {

> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> +			"dq enqueue functional");

> +		for (j = 0; j < esize/8; j++)

> +			e[j] = sc++;

> +	}

> +

> +	/* Enqueue fails as queue is full */

> +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue functional");

> +

> +	/* Delete should fail as there are elements in defer queue which

> +	 * cannot be reclaimed.

> +	 */

> +	ret = rte_rcu_qsbr_dq_delete(dq);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq delete valid params");

> +

> +	/* Report quiescent state, enqueue should succeed */

> +	rte_rcu_qsbr_quiescent(t[0], 1);

> +	for (i = 0; i < max_entries; i++) {

> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> +			"dq enqueue functional");

> +		for (j = 0; j < esize/8; j++)

> +			e[j] = sc++;

> +	}

> +

> +	/* Queue is full */

> +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue functional");

> +

> +	/* Report quiescent state, delete should succeed */

> +	rte_rcu_qsbr_quiescent(t[0], 1);

> +	ret = rte_rcu_qsbr_dq_delete(dq);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid params");

> +

> +	/* Validate that call back function did not return any error */

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((cb_failed == 1), "CB failed");

> +

> +	rte_free(e);

> +	return 0;

> +}

> +

>   /*

>    * rte_rcu_qsbr_dump: Dump status of a single QS variable to a file

>    */

> @@ -1025,6 +1290,18 @@ test_rcu_qsbr_main(void)

>   	if (test_rcu_qsbr_thread_offline() < 0)

>   		goto test_fail;

>   

> +	if (test_rcu_qsbr_dq_create() < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_reclaim() < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_delete() < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_enqueue() < 0)

> +		goto test_fail;

> +

>   	printf("\nFunctional tests\n");

>   

>   	if (test_rcu_qsbr_sw_sv_3qs() < 0)

> @@ -1033,6 +1310,18 @@ test_rcu_qsbr_main(void)

>   	if (test_rcu_qsbr_mw_mv_mqs() < 0)

>   		goto test_fail;

>   

> +	if (test_rcu_qsbr_dq_functional(1, 8) < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_functional(2, 8) < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_functional(303, 16) < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_functional(7, 128) < 0)

> +		goto test_fail;

> +

>   	free_rcu();

>   

>   	printf("\n");

> diff --git a/lib/librte_rcu/meson.build b/lib/librte_rcu/meson.build

> index 62920ba02..e280b29c1 100644

> --- a/lib/librte_rcu/meson.build

> +++ b/lib/librte_rcu/meson.build

> @@ -10,3 +10,5 @@ headers = files('rte_rcu_qsbr.h')

>   if cc.get_id() == 'clang' and dpdk_conf.get('RTE_ARCH_64') == false

>   	ext_deps += cc.find_library('atomic')

>   endif

> +

> +deps += ['ring']

> diff --git a/lib/librte_rcu/rte_rcu_qsbr.c b/lib/librte_rcu/rte_rcu_qsbr.c

> index ce7f93dd3..76814f50b 100644

> --- a/lib/librte_rcu/rte_rcu_qsbr.c

> +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> @@ -21,6 +21,7 @@

>   #include <rte_errno.h>

>   

>   #include "rte_rcu_qsbr.h"

> +#include "rte_rcu_qsbr_pvt.h"

>   

>   /* Get the memory size of QSBR variable */

>   size_t

> @@ -267,6 +268,190 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)

>   	return 0;

>   }

>   

> +/* Create a queue used to store the data structure elements that can

> + * be freed later. This queue is referred to as 'defer queue'.

> + */

> +struct rte_rcu_qsbr_dq *

> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params)

> +{

> +	struct rte_rcu_qsbr_dq *dq;

> +	uint32_t qs_fifo_size;

> +

> +	if (params == NULL || params->f == NULL ||

> +		params->v == NULL || params->name == NULL ||

> +		params->size == 0 || params->esize == 0 ||

> +		(params->esize % 8 != 0)) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return NULL;

> +	}

> +

> +	dq = rte_zmalloc(NULL,

> +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> +		RTE_CACHE_LINE_SIZE);

> +	if (dq == NULL) {

> +		rte_errno = ENOMEM;

> +

> +		return NULL;

> +	}

> +

> +	/* round up qs_fifo_size to next power of two that is not less than

> +	 * max_size.

> +	 */

> +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> +					* params->size) + 1);

> +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> +					SOCKET_ID_ANY, 0);

> +	if (dq->r == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): defer queue create failed\n", __func__);

> +		rte_free(dq);

> +		return NULL;

> +	}

> +

> +	dq->v = params->v;

> +	dq->size = params->size;

> +	dq->esize = params->esize;

> +	dq->f = params->f;

> +	dq->p = params->p;

> +

> +	return dq;

> +}

> +

> +/* Enqueue one resource to the defer queue to free after the grace

> + * period is over.

> + */

> +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e)

> +{

> +	uint64_t token;

> +	uint64_t *tmp;

> +	uint32_t i;

> +	uint32_t cur_size, free_size;

> +

> +	if (dq == NULL || e == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;

> +	}

> +

> +	/* Start the grace period */

> +	token = rte_rcu_qsbr_start(dq->v);

> +

> +	/* Reclaim resources if the queue is 1/8th full. This helps

> +	 * the queue from growing too large and allows time for reader

> +	 * threads to report their quiescent state.

> +	 */

> +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

> +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

> +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> +			"%s(): Triggering reclamation\n", __func__);

> +		rte_rcu_qsbr_dq_reclaim(dq);

> +	}

> +

> +	/* Check if there is space for atleast for 1 resource */

> +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> +	if (!free_size) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Defer queue is full\n", __func__);

> +		rte_errno = ENOSPC;

> +		return 1;

> +	}

> +

> +	/* Enqueue the resource */

> +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> +

> +	/* The resource to enqueue needs to be a multiple of 64b

> +	 * due to the limitation of the rte_ring implementation.

> +	 */

> +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

> +

> +	return 0;

> +}

> +

> +/* Reclaim resources from the defer queue. */

> +int

> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq)

> +{

> +	uint32_t max_cnt;

> +	uint32_t cnt;

> +	void *token;

> +	uint64_t *tmp;

> +	uint32_t i;

> +

> +	if (dq == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;

> +	}

> +

> +	/* Anything to reclaim? */

> +	if (rte_ring_count(dq->r) == 0)

> +		return 0;

> +

> +	/* Reclaim at the max 1/16th the total number of entries. */

> +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> +	cnt = 0;

> +

> +	/* Check reader threads quiescent state and reclaim resources */

> +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> +			== 1)) {

> +		(void)rte_ring_sc_dequeue(dq->r, &token);

> +		/* The resource to dequeue needs to be a multiple of 64b

> +		 * due to the limitation of the rte_ring implementation.

> +		 */

> +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> +			i++, tmp++)

> +			(void)rte_ring_sc_dequeue(dq->r,

> +					(void *)(uintptr_t)tmp);

> +		dq->f(dq->p, dq->e);

> +

> +		cnt++;

> +	}

> +

> +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> +

> +	if (cnt == 0) {

> +		/* No resources were reclaimed */

> +		rte_errno = EAGAIN;

> +		return 1;

> +	}

> +

> +	return 0;

> +}

> +

> +/* Delete a defer queue. */

> +int

> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq)

> +{

> +	if (dq == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;

> +	}

> +

> +	/* Reclaim all the resources */

> +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> +		/* Error number is already set by the reclaim API */

> +		return 1;

Here could be a potential problem. rte_rcu_qsbr_dq_reclai() reclaims 
only max_cnt entries that is 1/16 of possible enqueued entries, so the 
rest won't be reclaimed.
> +

> +	rte_ring_free(dq->r);

> +	rte_free(dq);

> +

> +	return 0;

> +}

> +

>   int rte_rcu_log_type;

>   

>   RTE_INIT(rte_rcu_register)

> diff --git a/lib/librte_rcu/rte_rcu_qsbr.h b/lib/librte_rcu/rte_rcu_qsbr.h

> index c80f15c00..185d4b50a 100644

> --- a/lib/librte_rcu/rte_rcu_qsbr.h

> +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> @@ -34,6 +34,7 @@ extern "C" {

>   #include <rte_lcore.h>

>   #include <rte_debug.h>

>   #include <rte_atomic.h>

> +#include <rte_ring.h>

I think it's better to move this include into rte_rcu_qsbr.c
>   

>   extern int rte_rcu_log_type;

>   

> @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

>   	 */

>   } __rte_cache_aligned;

>   

> +/**

> + * Call back function called to free the resources.

> + *

> + * @param p

> + *   Pointer provided while creating the defer queue

> + * @param e

> + *   Pointer to the resource data stored on the defer queue

> + *

> + * @return

> + *   None

> + */

> +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> +

> +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

I don't see the usage of this macro anywhere in the rcu library (I see 
you are using it in LPM).

char rcu_dq_name[RTE_RING_NAMESIZE];
is using instead in the tests.
+ See my comments for  [PATCH v3 1/3] lib/lpm: integrate RCU QSBR
> +

> +/**

> + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> + */

> +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> +

> +/**

> + *  Reclaim at the max 1/16th the total number of resources.

> + */

> +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

Those two defines could be moved into .c file.
> +

> +/**

> + * Parameters used when creating the defer queue.

> + */

> +struct rte_rcu_qsbr_dq_parameters {

> +	const char *name;

> +	/**< Name of the queue. */

> +	uint32_t size;

> +	/**< Number of entries in queue. Typically, this will be

> +	 *   the same as the maximum number of entries supported in the

> +	 *   lock free data structure.

> +	 *   Data structures with unbounded number of entries is not

> +	 *   supported currently.

> +	 */

> +	uint32_t esize;

> +	/**< Size (in bytes) of each element in the defer queue.

> +	 *   This has to be multiple of 8B as the rte_ring APIs

> +	 *   support 8B element sizes only.

> +	 */

> +	rte_rcu_qsbr_free_resource f;

> +	/**< Function to call to free the resource. */

> +	void *p;

> +	/**< Pointer passed to the free function. Typically, this is the

> +	 *   pointer to the data structure to which the resource to free

> +	 *   belongs. This can be NULL.

> +	 */

> +	struct rte_rcu_qsbr *v;

> +	/**< RCU QSBR variable to use for this defer queue */

> +};

> +

> +/* RTE defer queue structure.

> + * This structure holds the defer queue. The defer queue is used to

> + * hold the deleted entries from the data structure that are not

> + * yet freed.

> + */

> +struct rte_rcu_qsbr_dq;

> +

>   /**

>    * @warning

>    * @b EXPERIMENTAL: this API may change without prior notice

> @@ -648,6 +710,113 @@ __rte_experimental

>   int

>   rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);

>   

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Create a queue used to store the data structure elements that can

> + * be freed later. This queue is referred to as 'defer queue'.

> + *

> + * @param params

> + *   Parameters to create a defer queue.

> + * @return

> + *   On success - Valid pointer to defer queue

> + *   On error - NULL

> + *   Possible rte_errno codes are:

> + *   - EINVAL - NULL parameters are passed

> + *   - ENOMEM - Not enough memory

> + */

> +__rte_experimental

> +struct rte_rcu_qsbr_dq *

> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Enqueue one resource to the defer queue and start the grace period.

> + * The resource will be freed later after at least one grace period

> + * is over.

> + *

> + * If the defer queue is full, it will attempt to reclaim resources.

> + * It will also reclaim resources at regular intervals to avoid

> + * the defer queue from growing too big.

> + *

> + * This API is not multi-thread safe. It is expected that the caller

> + * provides multi-thread safety by locking a mutex or some other means.

> + *

> + * A lock free multi-thread writer algorithm could achieve multi-thread

> + * safety by creating and using one defer queue per thread.

> + *

> + * @param dq

> + *   Defer queue to allocate an entry from.

> + * @param e

> + *   Pointer to resource data to copy to the defer queue. The size of

> + *   the data to copy is equal to the element size provided when the

> + *   defer queue was created.

> + * @return

> + *   On success - 0

> + *   On error - 1 with rte_errno set to

> + *   - EINVAL - NULL parameters are passed

> + *   - ENOSPC - Defer queue is full. This condition can not happen

> + *		if the defer queue size is equal (or larger) than the

> + *		number of elements in the data structure.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Reclaim resources from the defer queue.

> + *

> + * This API is not multi-thread safe. It is expected that the caller

> + * provides multi-thread safety by locking a mutex or some other means.

> + *

> + * A lock free multi-thread writer algorithm could achieve multi-thread

> + * safety by creating and using one defer queue per thread.

> + *

> + * @param dq

> + *   Defer queue to reclaim an entry from.

> + * @return

> + *   On successful reclamation of at least 1 resource - 0

> + *   On error - 1 with rte_errno set to

> + *   - EINVAL - NULL parameters are passed

> + *   - EAGAIN - None of the resources have completed at least 1 grace period,

> + *		try again.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Delete a defer queue.

> + *

> + * It tries to reclaim all the resources on the defer queue.

> + * If any of the resources have not completed the grace period

> + * the reclamation stops and returns immediately. The rest of

> + * the resources are not reclaimed and the defer queue is not

> + * freed.

> + *

> + * @param dq

> + *   Defer queue to delete.

> + * @return

> + *   On success - 0

> + *   On error - 1

> + *   Possible rte_errno codes are:

> + *   - EINVAL - NULL parameters are passed

> + *   - EAGAIN - Some of the resources have not completed at least 1 grace

> + *		period, try again.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> +

>   #ifdef __cplusplus

>   }

>   #endif

> diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> new file mode 100644

> index 000000000..2122bc36a

> --- /dev/null

> +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> @@ -0,0 +1,46 @@

> +/* SPDX-License-Identifier: BSD-3-Clause

> + * Copyright (c) 2019 Arm Limited

> + */

> +

> +#ifndef _RTE_RCU_QSBR_PVT_H_

> +#define _RTE_RCU_QSBR_PVT_H_

> +

> +/**

> + * This file is private to the RCU library. It should not be included

> + * by the user of this library.

> + */

Why this struct definition is separated into private .h? Maybe just 
define it in rte_rcu_qsbr.c instead?
> +

> +#ifdef __cplusplus

> +extern "C" {

> +#endif

> +

> +#include "rte_rcu_qsbr.h"

> +

> +/* RTE defer queue structure.

> + * This structure holds the defer queue. The defer queue is used to

> + * hold the deleted entries from the data structure that are not

> + * yet freed.

> + */

> +struct rte_rcu_qsbr_dq {

> +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> +	uint32_t size;

> +	/**< Number of elements in the defer queue */

> +	uint32_t esize;

> +	/**< Size (in bytes) of data stored on the defer queue */

> +	rte_rcu_qsbr_free_resource f;

> +	/**< Function to call to free the resource. */

> +	void *p;

> +	/**< Pointer passed to the free function. Typically, this is the

> +	 *   pointer to the data structure to which the resource to free

> +	 *   belongs.

> +	 */

> +	char e[0];

> +	/**< Temporary storage to copy the defer queue element. */

> +};

> +

> +#ifdef __cplusplus

> +}

> +#endif

> +

> +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> diff --git a/lib/librte_rcu/rte_rcu_version.map b/lib/librte_rcu/rte_rcu_version.map

> index f8b9ef2ab..dfac88a37 100644

> --- a/lib/librte_rcu/rte_rcu_version.map

> +++ b/lib/librte_rcu/rte_rcu_version.map

> @@ -8,6 +8,10 @@ EXPERIMENTAL {

>   	rte_rcu_qsbr_synchronize;

>   	rte_rcu_qsbr_thread_register;

>   	rte_rcu_qsbr_thread_unregister;

> +	rte_rcu_qsbr_dq_create;

> +	rte_rcu_qsbr_dq_enqueue;

> +	rte_rcu_qsbr_dq_reclaim;

> +	rte_rcu_qsbr_dq_delete;

>   

>   	local: *;

>   };

> diff --git a/lib/meson.build b/lib/meson.build

> index e5ff83893..0e1be8407 100644

> --- a/lib/meson.build

> +++ b/lib/meson.build

> @@ -11,7 +11,9 @@

>   libraries = [

>   	'kvargs', # eal depends on kvargs

>   	'eal', # everything depends on eal

> -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> +	'ring',

> +	'rcu', # rcu depends on ring

> +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

>   	'cmdline',

>   	'metrics', # bitrate/latency stats depends on this

>   	'hash',    # efd depends on this

> @@ -22,7 +24,7 @@ libraries = [

>   	'gro', 'gso', 'ip_frag', 'jobstats',

>   	'kni', 'latencystats', 'lpm', 'member',

>   	'power', 'pdump', 'rawdev',

> -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> +	'reorder', 'sched', 'security', 'stack', 'vhost',

>   	# ipsec lib depends on net, crypto and security

>   	'ipsec',

>   	# add pkt framework libs which use other libs from above


-- 
Regards,
Vladimir

Ananyev, Konstantin Oct. 7, 2019, 10:46 a.m. UTC | #9

> > > > > Add resource reclamation APIs to make it simple for applications

> > > > > and libraries to integrate rte_rcu library.

> > > > >

> > > > > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> > > > > Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> > > > > ---

> > > > >  app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

> > > > >  lib/librte_rcu/meson.build         |   2 +

> > > > >  lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

> > > > >  lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

> > > > >  lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

> > > > >  lib/librte_rcu/rte_rcu_version.map |   4 +

> > > > >  lib/meson.build                    |   6 +-

> > > > >  7 files changed, 700 insertions(+), 3 deletions(-)  create mode

> > > > > 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > > >

> > > > > diff --git a/lib/librte_rcu/rte_rcu_qsbr.c

> > > > > b/lib/librte_rcu/rte_rcu_qsbr.c index ce7f93dd3..76814f50b 100644

> > > > > --- a/lib/librte_rcu/rte_rcu_qsbr.c

> > > > > +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> > > > > @@ -21,6 +21,7 @@

> > > > >  #include <rte_errno.h>

> > > > >

> > > > >  #include "rte_rcu_qsbr.h"

> > > > > +#include "rte_rcu_qsbr_pvt.h"

> > > > >

> > > > >  /* Get the memory size of QSBR variable */  size_t @@ -267,6

> > > > > +268,190 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)

> > > > >  	return 0;

> > > > >  }

> > > > >

> > > > > +/* Create a queue used to store the data structure elements that

> > > > > +can

> > > > > + * be freed later. This queue is referred to as 'defer queue'.

> > > > > + */

> > > > > +struct rte_rcu_qsbr_dq *

> > > > > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > > > > +*params) {

> > > > > +	struct rte_rcu_qsbr_dq *dq;

> > > > > +	uint32_t qs_fifo_size;

> > > > > +

> > > > > +	if (params == NULL || params->f == NULL ||

> > > > > +		params->v == NULL || params->name == NULL ||

> > > > > +		params->size == 0 || params->esize == 0 ||

> > > > > +		(params->esize % 8 != 0)) {

> > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > > +		rte_errno = EINVAL;

> > > > > +

> > > > > +		return NULL;

> > > > > +	}

> > > > > +

> > > > > +	dq = rte_zmalloc(NULL,

> > > > > +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> > > > > +		RTE_CACHE_LINE_SIZE);

> > > > > +	if (dq == NULL) {

> > > > > +		rte_errno = ENOMEM;

> > > > > +

> > > > > +		return NULL;

> > > > > +	}

> > > > > +

> > > > > +	/* round up qs_fifo_size to next power of two that is not less than

> > > > > +	 * max_size.

> > > > > +	 */

> > > > > +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> > > > > +					* params->size) + 1);

> > > > > +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> > > > > +					SOCKET_ID_ANY, 0);

> > > >

> > > > If it is going to be not MT safe, then why not to create the ring

> > > > with (RING_F_SP_ENQ | RING_F_SC_DEQ) flags set?

> > > Agree.

> > >

> > > > Though I think it could be changed to allow MT safe multiple

> > > > enqeue/single dequeue, see below.

> > > The MT safe issue is due to reclaim code. The reclaim code has the following

> > sequence:

> > >

> > > rte_ring_peek

> > > rte_rcu_qsbr_check

> > > rte_ring_dequeue

> > >

> > > This entire sequence needs to be atomic as the entry cannot be dequeued

> > without knowing that the grace period for that entry is over.

> >

> > I understand that, though I believe at least it should be possible to support

> > multiple-enqueue/single dequeuer and reclaim mode.

> > With serialized dequeue() even multiple dequeue should be possible.

> Agreed. Please see the response on the other thread.

> 

> >

> > > Note that due to optimizations in rte_rcu_qsbr_check API, this

> > > sequence should not be large in most cases. I do not have ideas on how to

> > make this sequence lock-free.

> > >

> > > If the writer is on the control plane, most use cases will use mutex

> > > locks for synchronization if they are multi-threaded. That lock should be

> > enough to provide the thread safety for these APIs.

> >

> > In that is case, why do we need ring at all?

> > For sure people can create their own queue quite easily with mutex and TAILQ.

> > If performance is not an issue, they can even add pthread_cond to it, and have

> > an ability for the consumer to sleep/wakeup on empty/full queue.

> >

> > >

> > > If the writer is multi-threaded and lock-free, then one should use per thread

> > defer queue.

> >

> > If that's the only working model, then the question is why do we need that API

> > at all?

> > Just simple array with counter or linked-list should do for majority of cases.

> Please see the other thread.

> 

> >

> > >

> > > >

> > > > > +	if (dq->r == NULL) {

> > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > +			"%s(): defer queue create failed\n", __func__);

> > > > > +		rte_free(dq);

> > > > > +		return NULL;

> > > > > +	}

> > > > > +

> > > > > +	dq->v = params->v;

> > > > > +	dq->size = params->size;

> > > > > +	dq->esize = params->esize;

> > > > > +	dq->f = params->f;

> > > > > +	dq->p = params->p;

> > > > > +

> > > > > +	return dq;

> > > > > +}

> > > > > +

> > > > > +/* Enqueue one resource to the defer queue to free after the

> > > > > +grace

> > > > > + * period is over.

> > > > > + */

> > > > > +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e) {

> > > > > +	uint64_t token;

> > > > > +	uint64_t *tmp;

> > > > > +	uint32_t i;

> > > > > +	uint32_t cur_size, free_size;

> > > > > +

> > > > > +	if (dq == NULL || e == NULL) {

> > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > > +		rte_errno = EINVAL;

> > > > > +

> > > > > +		return 1;

> > > >

> > > > Why just not to return -EINVAL straightway?

> > > > I think there is no much point to set rte_errno in that function at

> > > > all, just return value should do.

> > > I am trying to keep these consistent with the existing APIs. They return 0 or 1

> > and set the rte_errno.

> >

> > A lot of public DPDK API functions do use return value to return status code (0,

> > or some positive numbers of success, negative errno values on failure), I am

> > not inventing anything new here.

> Agree, you are not proposing a new thing here. May be I was not clear. I really do not have an opinion on how this should be done. But, I do

> have an opinion on consistency. These new APIs follow what has been done in the existing RCU APIs. I think we have 2 options here.

> 1) Either we change existing RCU APIs to get rid of rte_errno (is it an ABI change?) or

> 2) The new APIs follow what has been done in the existing RCU APIs.

> I want to make sure we are consistent at least within RCU APIs.


But as I can see right now rcu API sets rte_errno only for control-path functions
(get_memsize, init, register, unregister, dump).
All fast-path (inline) function don't set/use it.
So from perspective that is consistent behavior, no?

> 

> >

> > >

> > > >

> > > > > +	}

> > > > > +

> > > > > +	/* Start the grace period */

> > > > > +	token = rte_rcu_qsbr_start(dq->v);

> > > > > +

> > > > > +	/* Reclaim resources if the queue is 1/8th full. This helps

> > > > > +	 * the queue from growing too large and allows time for reader

> > > > > +	 * threads to report their quiescent state.

> > > > > +	 */

> > > > > +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

> > > >

> > > > Probably would be a bit easier if you just store in dq->esize (elt

> > > > size + token

> > > > size) / 8.

> > > Agree

> > >

> > > >

> > > > > +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

> > > >

> > > > Why to make this threshold value hard-coded?

> > > > Why either not to put it into create parameter, or just return a

> > > > special return value, to indicate that threshold is reached?

> > > My thinking was to keep the programming interface easy to use. The

> > > more the parameters, the more painful it is for the user. IMO, the

> > > constants chosen should be good enough for most cases. More advanced

> > users could modify the constants. However, we could make these as part of the

> > parameters, but make them optional for the user. For ex: if they set them to 0,

> > default values can be used.

> > >

> > > > Or even return number of filled/free entroes on success, so caller

> > > > can decide to reclaim or not based on that information on his own?

> > > This means more code on the user side.

> >

> > I personally think it it really wouldn't be that big problem to the user to pass

> > extra parameter to the function.

> I will convert the 2 constants into optional parameters (user can set them to 0 to make the algorithm use default values)

> 

> > Again what if user doesn't want to reclaim() in enqueue() thread at all?

> 'enqueue' has to do reclamation if the defer queue is full. I do not think this is trivial.

> 

> In the current design, reclamation in enqueue is also done on regular basis (automatic triggering of reclamation when the queue reaches

> certain limit) to keep the queue from growing too large. This is required when we implement a dynamically adjusting defer queue. The

> current algorithm keeps the cost of reclamation spread across multiple calls and puts an upper bound on cycles for delete API by reclaiming

> a fixed number of entries.

> 

> This algorithm is proven to work in the LPM integration performance tests at a very low performance over head (~1%). So, I do not know

> why a user would not want to use this. 


Yeh, I looked at LPM implementation and one thing I found strange -
defer_queue is hidden inside LPM struct and all reclamations are done internally.
Yes for sure it allows to defer and group actual reclaim(), which hopefully will lead to better performance.
But why not to allow user to call reclaim() for it directly too?
In that way user might avoid/(minimize) doing reclaim() in LPM write() at all.
And let say do it somewhere later in the same thread (when no other tasks to do),
or even leave it to some other house-keeping thread to do (sort of garbage collector).
Or such mode is not supported/planned?

> The 2 additional parameters should give the user more flexibility.


Ok, let's keep it as config params.
After another though - I think you right, it should be good enough.

> 

> However, if the user wants his own algorithm, he can create one with the base APIs provided.

> 

> >

> > > I think adding these to parameters seems like a better option.

> > >

> > > >

> > > > > +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > > > > +			"%s(): Triggering reclamation\n", __func__);

> > > > > +		rte_rcu_qsbr_dq_reclaim(dq);

> > > > > +	}

> > > > > +

> > > > > +	/* Check if there is space for atleast for 1 resource */

> > > > > +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> > > > > +	if (!free_size) {

> > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > +			"%s(): Defer queue is full\n", __func__);

> > > > > +		rte_errno = ENOSPC;

> > > > > +		return 1;

> > > > > +	}

> > > > > +

> > > > > +	/* Enqueue the resource */

> > > > > +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> > > > > +

> > > > > +	/* The resource to enqueue needs to be a multiple of 64b

> > > > > +	 * due to the limitation of the rte_ring implementation.

> > > > > +	 */

> > > > > +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> > > > > +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

> > > >

> > > >

> > > > That whole construction above looks a bit clumsy and error prone...

> > > > I suppose just:

> > > >

> > > > const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t free, n; ...

> > > > n = rte_ring_enqueue_bulk(dq->r, e, nb_elt, &free); if (n == 0)

> > > Yes, bulk enqueue can be used. But note that once the flexible element size

> > ring patch is done, this code will use that.

> >

> > Well, when it will be in the mainline, and it would provide a better way, for sure

> > this code can be updated to use new API (if it is provide some improvements).

> > But as I udenrstand, right now it is not there, while bulk enqueue/dequeue are.

> Apologies, I was not clear. I agree we can go with bulk APIs for now.

> 

> >

> > >

> > > >   return -ENOSPC;

> > > > return free;

> > > >

> > > > That way I think you can have MT-safe version of that function.

> > > Please see the description of MT safe issue above.

> > >

> > > >

> > > > > +

> > > > > +	return 0;

> > > > > +}

> > > > > +

> > > > > +/* Reclaim resources from the defer queue. */ int

> > > > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

> > > > > +	uint32_t max_cnt;

> > > > > +	uint32_t cnt;

> > > > > +	void *token;

> > > > > +	uint64_t *tmp;

> > > > > +	uint32_t i;

> > > > > +

> > > > > +	if (dq == NULL) {

> > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > > +		rte_errno = EINVAL;

> > > > > +

> > > > > +		return 1;

> > > >

> > > > Same story as above - I think rte_errno is excessive in this function.

> > > > Just return value should be enough.

> > > >

> > > >

> > > > > +	}

> > > > > +

> > > > > +	/* Anything to reclaim? */

> > > > > +	if (rte_ring_count(dq->r) == 0)

> > > > > +		return 0;

> > > >

> > > > Not sure you need that, see below.

> > > >

> > > > > +

> > > > > +	/* Reclaim at the max 1/16th the total number of entries. */

> > > > > +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> > > > > +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> > > >

> > > > Again why not to make max_cnt a configurable at create() parameter?

> > > I think making this as an optional parameter for creating defer queue is a

> > better option.

> > >

> > > > Or even a parameter for that function?

> > > >

> > > > > +	cnt = 0;

> > > > > +

> > > > > +	/* Check reader threads quiescent state and reclaim resources */

> > > > > +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> > > > > +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> > > > > +			== 1)) {

> > > >

> > > >

> > > > > +		(void)rte_ring_sc_dequeue(dq->r, &token);

> > > > > +		/* The resource to dequeue needs to be a multiple of 64b

> > > > > +		 * due to the limitation of the rte_ring implementation.

> > > > > +		 */

> > > > > +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> > > > > +			i++, tmp++)

> > > > > +			(void)rte_ring_sc_dequeue(dq->r,

> > > > > +					(void *)(uintptr_t)tmp);

> > > >

> > > > Again, no need for such constructs with multiple dequeuer I believe.

> > > > Just:

> > > >

> > > > const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t n; uintptr_t

> > > > elt[nb_elt]; ...

> > > > n = rte_ring_dequeue_bulk(dq->r, elt, nb_elt, NULL); if (n != 0)

> > > > {dq->f(dq->p, elt);}

> > > Agree on bulk API use.

> > >

> > > >

> > > > Seems enough.

> > > > Again in that case you can have enqueue/reclaim running in different

> > > > threads simultaneously, plus you don't need dq->e at all.

> > > Will check on dq->e

> > >

> > > >

> > > > > +		dq->f(dq->p, dq->e);

> > > > > +

> > > > > +		cnt++;

> > > > > +	}

> > > > > +

> > > > > +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > > > > +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> > > > > +

> > > > > +	if (cnt == 0) {

> > > > > +		/* No resources were reclaimed */

> > > > > +		rte_errno = EAGAIN;

> > > > > +		return 1;

> > > > > +	}

> > > > > +

> > > > > +	return 0;

> > > >

> > > > I'd suggest to return cnt on success.

> > > I am trying to keep the APIs simple. I do not see much use for 'cnt'

> > > as return value to the user. It exposes more details which I think are internal

> > to the library.

> >

> > Not sure what is the hassle to return number of completed reclamaitions?

> > If user doesn't need that information, he simply wouldn't use it.

> > But might be it would be usefull - he can decide should he try another attempt

> > of reclaim() immediately or is it ok to do something else.

> There is no hassle to return that information.

> 

> As per the current design, user calls 'reclaim' when it is out of resources while adding an entry to the data structure. At that point the user

> wants to know if at least 1 resource was reclaimed because the user has to allocate 1 resource. He does not have a use for the number of

> resources reclaimed.


Ok, but why user can't decide to do reclaim in advance, let say when he foresee that he would need a lot of allocations in nearest future?
Or when there is some idle time? Or some combination of these things?
At he would like to free some extra resources in that case to minimize number of reclaims in future peak interval?

> 

> If this API returns 0, then the user can decide to repeat the call or return failure. But that decision depends on the length of the grace period

> which is under user's control.

> 

> >

> > >

> > > >

> > > > > +}

> > > > > +

> > > > > +/* Delete a defer queue. */

> > > > > +int

> > > > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

> > > > > +	if (dq == NULL) {

> > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > > +		rte_errno = EINVAL;

> > > > > +

> > > > > +		return 1;

> > > > > +	}

> > > > > +

> > > > > +	/* Reclaim all the resources */

> > > > > +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> > > > > +		/* Error number is already set by the reclaim API */

> > > > > +		return 1;

> > > >

> > > > How do you know that you have reclaimed everything?

> > > Good point, will come back with a different solution.

> > >

> > > >

> > > > > +

> > > > > +	rte_ring_free(dq->r);

> > > > > +	rte_free(dq);

> > > > > +

> > > > > +	return 0;

> > > > > +}

> > > > > +

> > > > >  int rte_rcu_log_type;

> > > > >

> > > > >  RTE_INIT(rte_rcu_register)

> > > > > diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

> > > > > b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a 100644

> > > > > --- a/lib/librte_rcu/rte_rcu_qsbr.h

> > > > > +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> > > > > @@ -34,6 +34,7 @@ extern "C" {

> > > > >  #include <rte_lcore.h>

> > > > >  #include <rte_debug.h>

> > > > >  #include <rte_atomic.h>

> > > > > +#include <rte_ring.h>

> > > > >

> > > > >  extern int rte_rcu_log_type;

> > > > >

> > > > > @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

> > > > >  	 */

> > > > >  } __rte_cache_aligned;

> > > > >

> > > > > +/**

> > > > > + * Call back function called to free the resources.

> > > > > + *

> > > > > + * @param p

> > > > > + *   Pointer provided while creating the defer queue

> > > > > + * @param e

> > > > > + *   Pointer to the resource data stored on the defer queue

> > > > > + *

> > > > > + * @return

> > > > > + *   None

> > > > > + */

> > > > > +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> > > >

> > > > Stylish thing - usually in DPDK we have typedf newtype_t ...

> > > > Though I am not sure you need a new typedef at all - just a function

> > > > pointer inside the struct seems enough.

> > > Other libraries (for ex: rte_hash) use this approach. I think it is better to keep

> > it out of the structure to allow for better commenting.

> >

> > I am saying majority of DPDK code use _t suffix for typedef:

> > typedef void (*rte_rcu_qsbr_free_resource_t)(void *p, void *e);

> Apologies, got it, will change.

> 

> >

> > >

> > > >

> > > > > +

> > > > > +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> > > > > +

> > > > > +/**

> > > > > + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> > > > > + */

> > > > > +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> > > > > +

> > > > > +/**

> > > > > + *  Reclaim at the max 1/16th the total number of resources.

> > > > > + */

> > > > > +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> > > >

> > > >

> > > > As I said above, I don't think these thresholds need to be hardcoded.

> > > > In any case, there seems not much point to put them in the public header

> > file.

> > > >

> > > > > +

> > > > > +/**

> > > > > + * Parameters used when creating the defer queue.

> > > > > + */

> > > > > +struct rte_rcu_qsbr_dq_parameters {

> > > > > +	const char *name;

> > > > > +	/**< Name of the queue. */

> > > > > +	uint32_t size;

> > > > > +	/**< Number of entries in queue. Typically, this will be

> > > > > +	 *   the same as the maximum number of entries supported in the

> > > > > +	 *   lock free data structure.

> > > > > +	 *   Data structures with unbounded number of entries is not

> > > > > +	 *   supported currently.

> > > > > +	 */

> > > > > +	uint32_t esize;

> > > > > +	/**< Size (in bytes) of each element in the defer queue.

> > > > > +	 *   This has to be multiple of 8B as the rte_ring APIs

> > > > > +	 *   support 8B element sizes only.

> > > > > +	 */

> > > > > +	rte_rcu_qsbr_free_resource f;

> > > > > +	/**< Function to call to free the resource. */

> > > > > +	void *p;

> > > >

> > > > Style nit again - I like short names myself, but that seems a bit

> > > > extreme... :) Might be at least:

> > > > void (*reclaim)(void *, void *);

> > > May be 'free_fn'?

> > >

> > > > void * reclaim_data;

> > > > ?

> > > This is the pointer to the data structure to free the resource into. For ex: In

> > LPM data structure, it will be pointer to LPM. 'reclaim_data'

> > > does not convey the meaning correctly.

> >

> > Ok, please free to comeup with your own names.

> > I just wanted to say that 'f' and 'p' are a bit an extreme for public API.

> ok, this is the hardest thing to do 😊

> 

> >

> > >

> > > >

> > > > > +	/**< Pointer passed to the free function. Typically, this is the

> > > > > +	 *   pointer to the data structure to which the resource to free

> > > > > +	 *   belongs. This can be NULL.

> > > > > +	 */

> > > > > +	struct rte_rcu_qsbr *v;

> > > >

> > > > Does it need to be inside that struct?

> > > > Might be better:

> > > > rte_rcu_qsbr_dq_create(struct rte_rcu_qsbr *v, const struct

> > > > rte_rcu_qsbr_dq_parameters *params);

> > > The API takes a parameter structure as input anyway, why to add

> > > another argument to the function? QSBR variable is also another parameter.

> > >

> > > >

> > > > Another alternative: make both reclaim() and enqueue() to take v as

> > > > a parameter.

> > > But both of them need access to some of the parameters provided in

> > > rte_rcu_qsbr_dq_create API. We would end up passing 2 arguments to the

> > functions.

> >

> > Pure stylish thing.

> > From my perspective it just provides better visibility what is going in the code:

> > For QSBR var 'v' create a new deferred queue.

> > But no strong opinion here.

> >

> > >

> > > >

> > > > > +	/**< RCU QSBR variable to use for this defer queue */ };

> > > > > +

> > > > > +/* RTE defer queue structure.

> > > > > + * This structure holds the defer queue. The defer queue is used

> > > > > +to

> > > > > + * hold the deleted entries from the data structure that are not

> > > > > + * yet freed.

> > > > > + */

> > > > > +struct rte_rcu_qsbr_dq;

> > > > > +

> > > > >  /**

> > > > >   * @warning

> > > > >   * @b EXPERIMENTAL: this API may change without prior notice @@

> > > > > -648,6 +710,113 @@ __rte_experimental  int  rte_rcu_qsbr_dump(FILE

> > > > > *f, struct rte_rcu_qsbr *v);

> > > > >

> > > > > +/**

> > > > > + * @warning

> > > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > > + *

> > > > > + * Create a queue used to store the data structure elements that

> > > > > +can

> > > > > + * be freed later. This queue is referred to as 'defer queue'.

> > > > > + *

> > > > > + * @param params

> > > > > + *   Parameters to create a defer queue.

> > > > > + * @return

> > > > > + *   On success - Valid pointer to defer queue

> > > > > + *   On error - NULL

> > > > > + *   Possible rte_errno codes are:

> > > > > + *   - EINVAL - NULL parameters are passed

> > > > > + *   - ENOMEM - Not enough memory

> > > > > + */

> > > > > +__rte_experimental

> > > > > +struct rte_rcu_qsbr_dq *

> > > > > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > > > > +*params);

> > > > > +

> > > > > +/**

> > > > > + * @warning

> > > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > > + *

> > > > > + * Enqueue one resource to the defer queue and start the grace period.

> > > > > + * The resource will be freed later after at least one grace

> > > > > +period

> > > > > + * is over.

> > > > > + *

> > > > > + * If the defer queue is full, it will attempt to reclaim resources.

> > > > > + * It will also reclaim resources at regular intervals to avoid

> > > > > + * the defer queue from growing too big.

> > > > > + *

> > > > > + * This API is not multi-thread safe. It is expected that the

> > > > > +caller

> > > > > + * provides multi-thread safety by locking a mutex or some other means.

> > > > > + *

> > > > > + * A lock free multi-thread writer algorithm could achieve

> > > > > +multi-thread

> > > > > + * safety by creating and using one defer queue per thread.

> > > > > + *

> > > > > + * @param dq

> > > > > + *   Defer queue to allocate an entry from.

> > > > > + * @param e

> > > > > + *   Pointer to resource data to copy to the defer queue. The size of

> > > > > + *   the data to copy is equal to the element size provided when the

> > > > > + *   defer queue was created.

> > > > > + * @return

> > > > > + *   On success - 0

> > > > > + *   On error - 1 with rte_errno set to

> > > > > + *   - EINVAL - NULL parameters are passed

> > > > > + *   - ENOSPC - Defer queue is full. This condition can not happen

> > > > > + *		if the defer queue size is equal (or larger) than the

> > > > > + *		number of elements in the data structure.

> > > > > + */

> > > > > +__rte_experimental

> > > > > +int

> > > > > +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> > > > > +

> > > > > +/**

> > > > > + * @warning

> > > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > > + *

> > > > > + * Reclaim resources from the defer queue.

> > > > > + *

> > > > > + * This API is not multi-thread safe. It is expected that the

> > > > > +caller

> > > > > + * provides multi-thread safety by locking a mutex or some other means.

> > > > > + *

> > > > > + * A lock free multi-thread writer algorithm could achieve

> > > > > +multi-thread

> > > > > + * safety by creating and using one defer queue per thread.

> > > > > + *

> > > > > + * @param dq

> > > > > + *   Defer queue to reclaim an entry from.

> > > > > + * @return

> > > > > + *   On successful reclamation of at least 1 resource - 0

> > > > > + *   On error - 1 with rte_errno set to

> > > > > + *   - EINVAL - NULL parameters are passed

> > > > > + *   - EAGAIN - None of the resources have completed at least 1 grace

> > > > period,

> > > > > + *		try again.

> > > > > + */

> > > > > +__rte_experimental

> > > > > +int

> > > > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> > > > > +

> > > > > +/**

> > > > > + * @warning

> > > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > > + *

> > > > > + * Delete a defer queue.

> > > > > + *

> > > > > + * It tries to reclaim all the resources on the defer queue.

> > > > > + * If any of the resources have not completed the grace period

> > > > > + * the reclamation stops and returns immediately. The rest of

> > > > > + * the resources are not reclaimed and the defer queue is not

> > > > > + * freed.

> > > > > + *

> > > > > + * @param dq

> > > > > + *   Defer queue to delete.

> > > > > + * @return

> > > > > + *   On success - 0

> > > > > + *   On error - 1

> > > > > + *   Possible rte_errno codes are:

> > > > > + *   - EINVAL - NULL parameters are passed

> > > > > + *   - EAGAIN - Some of the resources have not completed at least 1

> > grace

> > > > > + *		period, try again.

> > > > > + */

> > > > > +__rte_experimental

> > > > > +int

> > > > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> > > > > +

> > > > >  #ifdef __cplusplus

> > > > >  }

> > > > >  #endif

> > > > > diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > > > b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > > > new file mode 100644

> > > > > index 000000000..2122bc36a

> > > > > --- /dev/null

> > > > > +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > >

> > > > Again style suggestion: as it is not public header - don't use rte_

> > > > prefix for naming.

> > > > From my perspective - easier to relalize for reader what is public

> > > > header, what is not.

> > > Looks like the guidelines are not defined very well. I see one private

> > > file with rte_ prefix. I see Stephen not using rte_ prefix. I do not have any

> > preference. But, a consistent approach is required.

> >

> > That's just a suggestion.

> > For me (and I hope for others) it would be a bit easier.

> > When looking at the code for first time I had to look a t meson.build to check is

> > it a public header or not.

> > If the file doesn't have 'rte_' prefix, I assume that it is an internal one

> > straightway.

> > But , as you said, there is no exact guidelines here, so up to you to decide.

> I think it makes sense to remove 'rte_' prefix. I will also change the file name to have '_private' suffix.

> There are some inconsistencies in the existing code, will send a patch to correct them to follow this approach.

> 

> >

> > >

> > > >

> > > > > @@ -0,0 +1,46 @@

> > > > > +/* SPDX-License-Identifier: BSD-3-Clause

> > > > > + * Copyright (c) 2019 Arm Limited  */

> > > > > +

> > > > > +#ifndef _RTE_RCU_QSBR_PVT_H_

> > > > > +#define _RTE_RCU_QSBR_PVT_H_

> > > > > +

> > > > > +/**

> > > > > + * This file is private to the RCU library. It should not be

> > > > > +included

> > > > > + * by the user of this library.

> > > > > + */

> > > > > +

> > > > > +#ifdef __cplusplus

> > > > > +extern "C" {

> > > > > +#endif

> > > > > +

> > > > > +#include "rte_rcu_qsbr.h"

> > > > > +

> > > > > +/* RTE defer queue structure.

> > > > > + * This structure holds the defer queue. The defer queue is used

> > > > > +to

> > > > > + * hold the deleted entries from the data structure that are not

> > > > > + * yet freed.

> > > > > + */

> > > > > +struct rte_rcu_qsbr_dq {

> > > > > +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> > > > > +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> > > > > +	uint32_t size;

> > > > > +	/**< Number of elements in the defer queue */

> > > > > +	uint32_t esize;

> > > > > +	/**< Size (in bytes) of data stored on the defer queue */

> > > > > +	rte_rcu_qsbr_free_resource f;

> > > > > +	/**< Function to call to free the resource. */

> > > > > +	void *p;

> > > > > +	/**< Pointer passed to the free function. Typically, this is the

> > > > > +	 *   pointer to the data structure to which the resource to free

> > > > > +	 *   belongs.

> > > > > +	 */

> > > > > +	char e[0];

> > > > > +	/**< Temporary storage to copy the defer queue element. */

> > > >

> > > > Do you really need 'e' at all?

> > > > Can't it be just temporary stack variable?

> > > Ok, will check.

> > >

> > > >

> > > > > +};

> > > > > +

> > > > > +#ifdef __cplusplus

> > > > > +}

> > > > > +#endif

> > > > > +

> > > > > +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> > > > > diff --git a/lib/librte_rcu/rte_rcu_version.map

> > > > > b/lib/librte_rcu/rte_rcu_version.map

> > > > > index f8b9ef2ab..dfac88a37 100644

> > > > > --- a/lib/librte_rcu/rte_rcu_version.map

> > > > > +++ b/lib/librte_rcu/rte_rcu_version.map

> > > > > @@ -8,6 +8,10 @@ EXPERIMENTAL {

> > > > >  	rte_rcu_qsbr_synchronize;

> > > > >  	rte_rcu_qsbr_thread_register;

> > > > >  	rte_rcu_qsbr_thread_unregister;

> > > > > +	rte_rcu_qsbr_dq_create;

> > > > > +	rte_rcu_qsbr_dq_enqueue;

> > > > > +	rte_rcu_qsbr_dq_reclaim;

> > > > > +	rte_rcu_qsbr_dq_delete;

> > > > >

> > > > >  	local: *;

> > > > >  };

> > > > > diff --git a/lib/meson.build b/lib/meson.build index

> > > > > e5ff83893..0e1be8407 100644

> > > > > --- a/lib/meson.build

> > > > > +++ b/lib/meson.build

> > > > > @@ -11,7 +11,9 @@

> > > > >  libraries = [

> > > > >  	'kvargs', # eal depends on kvargs

> > > > >  	'eal', # everything depends on eal

> > > > > -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > > > > +	'ring',

> > > > > +	'rcu', # rcu depends on ring

> > > > > +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > > > >  	'cmdline',

> > > > >  	'metrics', # bitrate/latency stats depends on this

> > > > >  	'hash',    # efd depends on this

> > > > > @@ -22,7 +24,7 @@ libraries = [

> > > > >  	'gro', 'gso', 'ip_frag', 'jobstats',

> > > > >  	'kni', 'latencystats', 'lpm', 'member',

> > > > >  	'power', 'pdump', 'rawdev',

> > > > > -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> > > > > +	'reorder', 'sched', 'security', 'stack', 'vhost',

> > > > >  	# ipsec lib depends on net, crypto and security

> > > > >  	'ipsec',

> > > > >  	# add pkt framework libs which use other libs from above

> > > > > --

> > > > > 2.17.1

Medvedkin, Vladimir Oct. 7, 2019, 1:11 p.m. UTC | #10

Hi Honnappa,

On 01/10/2019 07:29, Honnappa Nagarahalli wrote:
> Add resource reclamation APIs to make it simple for applications

> and libraries to integrate rte_rcu library.

>

> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> ---

>   app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

>   lib/librte_rcu/meson.build         |   2 +

>   lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

>   lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

>   lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

>   lib/librte_rcu/rte_rcu_version.map |   4 +

>   lib/meson.build                    |   6 +-

>   7 files changed, 700 insertions(+), 3 deletions(-)

>   create mode 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

>

> diff --git a/app/test/test_rcu_qsbr.c b/app/test/test_rcu_qsbr.c

> index d1b9e46a2..3a6815243 100644

> --- a/app/test/test_rcu_qsbr.c

> +++ b/app/test/test_rcu_qsbr.c

> @@ -1,8 +1,9 @@

>   /* SPDX-License-Identifier: BSD-3-Clause

> - * Copyright (c) 2018 Arm Limited

> + * Copyright (c) 2019 Arm Limited

>    */

>   

>   #include <stdio.h>

> +#include <string.h>

>   #include <rte_pause.h>

>   #include <rte_rcu_qsbr.h>

>   #include <rte_hash.h>

> @@ -33,6 +34,7 @@ static uint32_t *keys;

>   #define COUNTER_VALUE 4096

>   static uint32_t *hash_data[RTE_MAX_LCORE][TOTAL_ENTRY];

>   static uint8_t writer_done;

> +static uint8_t cb_failed;

>   

>   static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];

>   struct rte_hash *h[RTE_MAX_LCORE];

> @@ -582,6 +584,269 @@ test_rcu_qsbr_thread_offline(void)

>   	return 0;

>   }

>   

> +static void

> +rte_rcu_qsbr_test_free_resource(void *p, void *e)

> +{

> +	if (p != NULL && e != NULL) {

> +		printf("%s: Test failed\n", __func__);

> +		cb_failed = 1;

> +	}

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_create: create a queue used to store the data structure

> + * elements that can be freed later. This queue is referred to as 'defer queue'.

> + */

> +static int

> +test_rcu_qsbr_dq_create(void)

> +{

> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> +	struct rte_rcu_qsbr_dq_parameters params;

> +	struct rte_rcu_qsbr_dq *dq;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_create()\n");

> +

> +	/* Pass invalid parameters */

> +	dq = rte_rcu_qsbr_dq_create(NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> +	params.name = rcu_dq_name;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	params.f = rte_rcu_qsbr_test_free_resource;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> +	params.v = t[0];

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	params.size = 1;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	params.esize = 3;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid params");

> +

> +	/* Pass all valid parameters */

> +	params.esize = 16;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid params");

> +	rte_rcu_qsbr_dq_delete(dq);

> +

> +	return 0;

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer queue,

> + * to be freed later after atleast one grace period is over.

> + */

> +static int

> +test_rcu_qsbr_dq_enqueue(void)

> +{

> +	int ret;

> +	uint64_t r;

> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> +	struct rte_rcu_qsbr_dq_parameters params;

> +	struct rte_rcu_qsbr_dq *dq;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_enqueue()\n");

> +

> +	/* Create a queue with simple parameters */

> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> +	params.name = rcu_dq_name;

> +	params.f = rte_rcu_qsbr_test_free_resource;

> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> +	params.v = t[0];

> +	params.size = 1;

> +	params.esize = 16;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid params");

> +

> +	/* Pass invalid parameters */

> +	ret = rte_rcu_qsbr_dq_enqueue(NULL, NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid params");

> +

> +	ret = rte_rcu_qsbr_dq_enqueue(dq, NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid params");

> +

> +	ret = rte_rcu_qsbr_dq_enqueue(NULL, &r);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid params");

> +

> +	ret = rte_rcu_qsbr_dq_delete(dq);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 1), "dq delete valid params");

> +

> +	return 0;

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_reclaim: Reclaim resources from the defer queue.

> + */

> +static int

> +test_rcu_qsbr_dq_reclaim(void)

> +{

> +	int ret;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_reclaim()\n");

> +

> +	/* Pass invalid parameters */

> +	ret = rte_rcu_qsbr_dq_reclaim(NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq reclaim invalid params");

> +

> +	return 0;

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_delete: Delete a defer queue.

> + */

> +static int

> +test_rcu_qsbr_dq_delete(void)

> +{

> +	int ret;

> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> +	struct rte_rcu_qsbr_dq_parameters params;

> +	struct rte_rcu_qsbr_dq *dq;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_delete()\n");

> +

> +	/* Pass invalid parameters */

> +	ret = rte_rcu_qsbr_dq_delete(NULL);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq delete invalid params");

> +

> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> +	params.name = rcu_dq_name;

> +	params.f = rte_rcu_qsbr_test_free_resource;

> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> +	params.v = t[0];

> +	params.size = 1;

> +	params.esize = 16;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid params");

> +	ret = rte_rcu_qsbr_dq_delete(dq);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid params");

> +

> +	return 0;

> +}

> +

> +/*

> + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer queue,

> + * to be freed later after atleast one grace period is over.

> + */

> +static int

> +test_rcu_qsbr_dq_functional(int32_t size, int32_t esize)

> +{

> +	int i, j, ret;

> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> +	struct rte_rcu_qsbr_dq_parameters params;

> +	struct rte_rcu_qsbr_dq *dq;

> +	uint64_t *e;

> +	uint64_t sc = 200;

> +	int max_entries;

> +

> +	printf("\nTest rte_rcu_qsbr_dq_xxx functional tests()\n");

> +	printf("Size = %d, esize = %d\n", size, esize);

> +

> +	e = (uint64_t *)rte_zmalloc(NULL, esize, RTE_CACHE_LINE_SIZE);

> +	if (e == NULL)

> +		return 0;

> +	cb_failed = 0;

> +

> +	/* Initialize the RCU variable. No threads are registered */

> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> +

> +	/* Create a queue with simple parameters */

> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> +	params.name = rcu_dq_name;

> +	params.f = rte_rcu_qsbr_test_free_resource;

> +	params.v = t[0];

> +	params.size = size;

> +	params.esize = esize;

> +	dq = rte_rcu_qsbr_dq_create(&params);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid params");

> +

> +	/* Given the size and esize, calculate the maximum number of entries

> +	 * that can be stored on the defer queue (look at the logic used

> +	 * in capacity calculation of rte_ring).

> +	 */

> +	max_entries = rte_align32pow2(((esize/8 + 1) * size) + 1);

> +	max_entries = (max_entries - 1)/(esize/8 + 1);

> +

> +	/* Enqueue few counters starting with the value 'sc' */

> +	/* The queue size will be rounded up to 2. The enqueue API also

> +	 * reclaims if the queue size is above certain limit. Since, there

> +	 * are no threads registered, reclamation succedes. Hence, it should

> +	 * be possible to enqueue more than the provided queue size.

> +	 */

> +	for (i = 0; i < 10; i++) {

> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> +			"dq enqueue functional");

> +		for (j = 0; j < esize/8; j++)

> +			e[j] = sc++;

> +	}

> +

> +	/* Register a thread on the RCU QSBR variable. Reclamation will not

> +	 * succeed. It should not be possible to enqueue more than the size

> +	 * number of resources.

> +	 */

> +	rte_rcu_qsbr_thread_register(t[0], 1);

> +	rte_rcu_qsbr_thread_online(t[0], 1);

> +

> +	for (i = 0; i < max_entries; i++) {

> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> +			"dq enqueue functional");

> +		for (j = 0; j < esize/8; j++)

> +			e[j] = sc++;

> +	}

> +

> +	/* Enqueue fails as queue is full */

> +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue functional");

> +

> +	/* Delete should fail as there are elements in defer queue which

> +	 * cannot be reclaimed.

> +	 */

> +	ret = rte_rcu_qsbr_dq_delete(dq);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq delete valid params");

> +

> +	/* Report quiescent state, enqueue should succeed */

> +	rte_rcu_qsbr_quiescent(t[0], 1);

> +	for (i = 0; i < max_entries; i++) {

> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> +			"dq enqueue functional");

> +		for (j = 0; j < esize/8; j++)

> +			e[j] = sc++;

> +	}

> +

> +	/* Queue is full */

> +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue functional");

> +

> +	/* Report quiescent state, delete should succeed */

> +	rte_rcu_qsbr_quiescent(t[0], 1);

> +	ret = rte_rcu_qsbr_dq_delete(dq);

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid params");

> +

> +	/* Validate that call back function did not return any error */

> +	TEST_RCU_QSBR_RETURN_IF_ERROR((cb_failed == 1), "CB failed");

> +

> +	rte_free(e);

> +	return 0;

> +}

> +

>   /*

>    * rte_rcu_qsbr_dump: Dump status of a single QS variable to a file

>    */

> @@ -1025,6 +1290,18 @@ test_rcu_qsbr_main(void)

>   	if (test_rcu_qsbr_thread_offline() < 0)

>   		goto test_fail;

>   

> +	if (test_rcu_qsbr_dq_create() < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_reclaim() < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_delete() < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_enqueue() < 0)

> +		goto test_fail;

> +

>   	printf("\nFunctional tests\n");

>   

>   	if (test_rcu_qsbr_sw_sv_3qs() < 0)

> @@ -1033,6 +1310,18 @@ test_rcu_qsbr_main(void)

>   	if (test_rcu_qsbr_mw_mv_mqs() < 0)

>   		goto test_fail;

>   

> +	if (test_rcu_qsbr_dq_functional(1, 8) < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_functional(2, 8) < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_functional(303, 16) < 0)

> +		goto test_fail;

> +

> +	if (test_rcu_qsbr_dq_functional(7, 128) < 0)

> +		goto test_fail;

> +

>   	free_rcu();

>   

>   	printf("\n");

> diff --git a/lib/librte_rcu/meson.build b/lib/librte_rcu/meson.build

> index 62920ba02..e280b29c1 100644

> --- a/lib/librte_rcu/meson.build

> +++ b/lib/librte_rcu/meson.build

> @@ -10,3 +10,5 @@ headers = files('rte_rcu_qsbr.h')

>   if cc.get_id() == 'clang' and dpdk_conf.get('RTE_ARCH_64') == false

>   	ext_deps += cc.find_library('atomic')

>   endif

> +

> +deps += ['ring']

> diff --git a/lib/librte_rcu/rte_rcu_qsbr.c b/lib/librte_rcu/rte_rcu_qsbr.c

> index ce7f93dd3..76814f50b 100644

> --- a/lib/librte_rcu/rte_rcu_qsbr.c

> +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> @@ -21,6 +21,7 @@

>   #include <rte_errno.h>

>   

>   #include "rte_rcu_qsbr.h"

> +#include "rte_rcu_qsbr_pvt.h"

>   

>   /* Get the memory size of QSBR variable */

>   size_t

> @@ -267,6 +268,190 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)

>   	return 0;

>   }

>   

> +/* Create a queue used to store the data structure elements that can

> + * be freed later. This queue is referred to as 'defer queue'.

> + */

> +struct rte_rcu_qsbr_dq *

> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params)

> +{

> +	struct rte_rcu_qsbr_dq *dq;

> +	uint32_t qs_fifo_size;

> +

> +	if (params == NULL || params->f == NULL ||

> +		params->v == NULL || params->name == NULL ||

> +		params->size == 0 || params->esize == 0 ||

> +		(params->esize % 8 != 0)) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return NULL;

> +	}

> +

> +	dq = rte_zmalloc(NULL,

> +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> +		RTE_CACHE_LINE_SIZE);

> +	if (dq == NULL) {

> +		rte_errno = ENOMEM;

> +

> +		return NULL;

> +	}

> +

> +	/* round up qs_fifo_size to next power of two that is not less than

> +	 * max_size.

> +	 */

> +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> +					* params->size) + 1);

> +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> +					SOCKET_ID_ANY, 0);

> +	if (dq->r == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): defer queue create failed\n", __func__);

> +		rte_free(dq);

> +		return NULL;

> +	}

> +

> +	dq->v = params->v;

> +	dq->size = params->size;

> +	dq->esize = params->esize;

> +	dq->f = params->f;

> +	dq->p = params->p;

> +

> +	return dq;

> +}

> +

> +/* Enqueue one resource to the defer queue to free after the grace

> + * period is over.

> + */

> +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e)

> +{

> +	uint64_t token;

> +	uint64_t *tmp;

> +	uint32_t i;

> +	uint32_t cur_size, free_size;

> +

> +	if (dq == NULL || e == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;

> +	}

> +

> +	/* Start the grace period */

> +	token = rte_rcu_qsbr_start(dq->v);

> +

> +	/* Reclaim resources if the queue is 1/8th full. This helps

> +	 * the queue from growing too large and allows time for reader

> +	 * threads to report their quiescent state.

> +	 */

> +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

> +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

> +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> +			"%s(): Triggering reclamation\n", __func__);

> +		rte_rcu_qsbr_dq_reclaim(dq);

> +	}


There are two problems I see:

1. rte_rcu_qsbr_dq_reclaim() reclaims only 1/16 of the defer queue while 
it triggers on 1/8. This means that there will always be 1/16 of non 
reclaimed entries in the queue.

2. Number of entries to reclaim depend on dq->size. So, 
rte_rcu_qsbr_dq_reclaim() could take a lot of cycles. For LPM library 
this means that rte_lpm_delete() sometimes takes a long time.

So, my suggestions here would be

- trigger rte_rcu_qsbr_dq_reclaim() with every enqueue

- reclaim small amount of entries (could be configurable of creation time)

- provide API to trigger reclaim from the application manually.

> +

> +	/* Check if there is space for atleast for 1 resource */

> +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> +	if (!free_size) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Defer queue is full\n", __func__);

> +		rte_errno = ENOSPC;

> +		return 1;

> +	}

> +

> +	/* Enqueue the resource */

> +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> +

> +	/* The resource to enqueue needs to be a multiple of 64b

> +	 * due to the limitation of the rte_ring implementation.

> +	 */

> +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

> +

> +	return 0;

> +}

> +

> +/* Reclaim resources from the defer queue. */

> +int

> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq)

> +{

> +	uint32_t max_cnt;

> +	uint32_t cnt;

> +	void *token;

> +	uint64_t *tmp;

> +	uint32_t i;

> +

> +	if (dq == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;

> +	}

> +

> +	/* Anything to reclaim? */

> +	if (rte_ring_count(dq->r) == 0)

> +		return 0;

> +

> +	/* Reclaim at the max 1/16th the total number of entries. */

> +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> +	cnt = 0;

> +

> +	/* Check reader threads quiescent state and reclaim resources */

> +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> +			== 1)) {

> +		(void)rte_ring_sc_dequeue(dq->r, &token);

> +		/* The resource to dequeue needs to be a multiple of 64b

> +		 * due to the limitation of the rte_ring implementation.

> +		 */

> +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> +			i++, tmp++)

> +			(void)rte_ring_sc_dequeue(dq->r,

> +					(void *)(uintptr_t)tmp);

> +		dq->f(dq->p, dq->e);

> +

> +		cnt++;

> +	}

> +

> +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> +

> +	if (cnt == 0) {

> +		/* No resources were reclaimed */

> +		rte_errno = EAGAIN;

> +		return 1;

> +	}

> +

> +	return 0;

> +}

> +

> +/* Delete a defer queue. */

> +int

> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq)

> +{

> +	if (dq == NULL) {

> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> +			"%s(): Invalid input parameter\n", __func__);

> +		rte_errno = EINVAL;

> +

> +		return 1;

> +	}

> +

> +	/* Reclaim all the resources */

> +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> +		/* Error number is already set by the reclaim API */

> +		return 1;

> +

> +	rte_ring_free(dq->r);

> +	rte_free(dq);

> +

> +	return 0;

> +}

> +

>   int rte_rcu_log_type;

>   

>   RTE_INIT(rte_rcu_register)

> diff --git a/lib/librte_rcu/rte_rcu_qsbr.h b/lib/librte_rcu/rte_rcu_qsbr.h

> index c80f15c00..185d4b50a 100644

> --- a/lib/librte_rcu/rte_rcu_qsbr.h

> +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> @@ -34,6 +34,7 @@ extern "C" {

>   #include <rte_lcore.h>

>   #include <rte_debug.h>

>   #include <rte_atomic.h>

> +#include <rte_ring.h>

>   

>   extern int rte_rcu_log_type;

>   

> @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

>   	 */

>   } __rte_cache_aligned;

>   

> +/**

> + * Call back function called to free the resources.

> + *

> + * @param p

> + *   Pointer provided while creating the defer queue

> + * @param e

> + *   Pointer to the resource data stored on the defer queue

> + *

> + * @return

> + *   None

> + */

> +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> +

> +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> +

> +/**

> + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> + */

> +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> +

> +/**

> + *  Reclaim at the max 1/16th the total number of resources.

> + */

> +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> +

> +/**

> + * Parameters used when creating the defer queue.

> + */

> +struct rte_rcu_qsbr_dq_parameters {

> +	const char *name;

> +	/**< Name of the queue. */

> +	uint32_t size;

> +	/**< Number of entries in queue. Typically, this will be

> +	 *   the same as the maximum number of entries supported in the

> +	 *   lock free data structure.

> +	 *   Data structures with unbounded number of entries is not

> +	 *   supported currently.

> +	 */

> +	uint32_t esize;

> +	/**< Size (in bytes) of each element in the defer queue.

> +	 *   This has to be multiple of 8B as the rte_ring APIs

> +	 *   support 8B element sizes only.

> +	 */

> +	rte_rcu_qsbr_free_resource f;

> +	/**< Function to call to free the resource. */

> +	void *p;

> +	/**< Pointer passed to the free function. Typically, this is the

> +	 *   pointer to the data structure to which the resource to free

> +	 *   belongs. This can be NULL.

> +	 */

> +	struct rte_rcu_qsbr *v;

> +	/**< RCU QSBR variable to use for this defer queue */

> +};

> +

> +/* RTE defer queue structure.

> + * This structure holds the defer queue. The defer queue is used to

> + * hold the deleted entries from the data structure that are not

> + * yet freed.

> + */

> +struct rte_rcu_qsbr_dq;

> +

>   /**

>    * @warning

>    * @b EXPERIMENTAL: this API may change without prior notice

> @@ -648,6 +710,113 @@ __rte_experimental

>   int

>   rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);

>   

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Create a queue used to store the data structure elements that can

> + * be freed later. This queue is referred to as 'defer queue'.

> + *

> + * @param params

> + *   Parameters to create a defer queue.

> + * @return

> + *   On success - Valid pointer to defer queue

> + *   On error - NULL

> + *   Possible rte_errno codes are:

> + *   - EINVAL - NULL parameters are passed

> + *   - ENOMEM - Not enough memory

> + */

> +__rte_experimental

> +struct rte_rcu_qsbr_dq *

> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Enqueue one resource to the defer queue and start the grace period.

> + * The resource will be freed later after at least one grace period

> + * is over.

> + *

> + * If the defer queue is full, it will attempt to reclaim resources.

> + * It will also reclaim resources at regular intervals to avoid

> + * the defer queue from growing too big.

> + *

> + * This API is not multi-thread safe. It is expected that the caller

> + * provides multi-thread safety by locking a mutex or some other means.

> + *

> + * A lock free multi-thread writer algorithm could achieve multi-thread

> + * safety by creating and using one defer queue per thread.

> + *

> + * @param dq

> + *   Defer queue to allocate an entry from.

> + * @param e

> + *   Pointer to resource data to copy to the defer queue. The size of

> + *   the data to copy is equal to the element size provided when the

> + *   defer queue was created.

> + * @return

> + *   On success - 0

> + *   On error - 1 with rte_errno set to

> + *   - EINVAL - NULL parameters are passed

> + *   - ENOSPC - Defer queue is full. This condition can not happen

> + *		if the defer queue size is equal (or larger) than the

> + *		number of elements in the data structure.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Reclaim resources from the defer queue.

> + *

> + * This API is not multi-thread safe. It is expected that the caller

> + * provides multi-thread safety by locking a mutex or some other means.

> + *

> + * A lock free multi-thread writer algorithm could achieve multi-thread

> + * safety by creating and using one defer queue per thread.

> + *

> + * @param dq

> + *   Defer queue to reclaim an entry from.

> + * @return

> + *   On successful reclamation of at least 1 resource - 0

> + *   On error - 1 with rte_errno set to

> + *   - EINVAL - NULL parameters are passed

> + *   - EAGAIN - None of the resources have completed at least 1 grace period,

> + *		try again.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> +

> +/**

> + * @warning

> + * @b EXPERIMENTAL: this API may change without prior notice

> + *

> + * Delete a defer queue.

> + *

> + * It tries to reclaim all the resources on the defer queue.

> + * If any of the resources have not completed the grace period

> + * the reclamation stops and returns immediately. The rest of

> + * the resources are not reclaimed and the defer queue is not

> + * freed.

> + *

> + * @param dq

> + *   Defer queue to delete.

> + * @return

> + *   On success - 0

> + *   On error - 1

> + *   Possible rte_errno codes are:

> + *   - EINVAL - NULL parameters are passed

> + *   - EAGAIN - Some of the resources have not completed at least 1 grace

> + *		period, try again.

> + */

> +__rte_experimental

> +int

> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> +

>   #ifdef __cplusplus

>   }

>   #endif

> diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> new file mode 100644

> index 000000000..2122bc36a

> --- /dev/null

> +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> @@ -0,0 +1,46 @@

> +/* SPDX-License-Identifier: BSD-3-Clause

> + * Copyright (c) 2019 Arm Limited

> + */

> +

> +#ifndef _RTE_RCU_QSBR_PVT_H_

> +#define _RTE_RCU_QSBR_PVT_H_

> +

> +/**

> + * This file is private to the RCU library. It should not be included

> + * by the user of this library.

> + */

> +

> +#ifdef __cplusplus

> +extern "C" {

> +#endif

> +

> +#include "rte_rcu_qsbr.h"

> +

> +/* RTE defer queue structure.

> + * This structure holds the defer queue. The defer queue is used to

> + * hold the deleted entries from the data structure that are not

> + * yet freed.

> + */

> +struct rte_rcu_qsbr_dq {

> +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> +	uint32_t size;

> +	/**< Number of elements in the defer queue */

> +	uint32_t esize;

> +	/**< Size (in bytes) of data stored on the defer queue */

> +	rte_rcu_qsbr_free_resource f;

> +	/**< Function to call to free the resource. */

> +	void *p;

> +	/**< Pointer passed to the free function. Typically, this is the

> +	 *   pointer to the data structure to which the resource to free

> +	 *   belongs.

> +	 */

> +	char e[0];

> +	/**< Temporary storage to copy the defer queue element. */

> +};

> +

> +#ifdef __cplusplus

> +}

> +#endif

> +

> +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> diff --git a/lib/librte_rcu/rte_rcu_version.map b/lib/librte_rcu/rte_rcu_version.map

> index f8b9ef2ab..dfac88a37 100644

> --- a/lib/librte_rcu/rte_rcu_version.map

> +++ b/lib/librte_rcu/rte_rcu_version.map

> @@ -8,6 +8,10 @@ EXPERIMENTAL {

>   	rte_rcu_qsbr_synchronize;

>   	rte_rcu_qsbr_thread_register;

>   	rte_rcu_qsbr_thread_unregister;

> +	rte_rcu_qsbr_dq_create;

> +	rte_rcu_qsbr_dq_enqueue;

> +	rte_rcu_qsbr_dq_reclaim;

> +	rte_rcu_qsbr_dq_delete;

>   

>   	local: *;

>   };

> diff --git a/lib/meson.build b/lib/meson.build

> index e5ff83893..0e1be8407 100644

> --- a/lib/meson.build

> +++ b/lib/meson.build

> @@ -11,7 +11,9 @@

>   libraries = [

>   	'kvargs', # eal depends on kvargs

>   	'eal', # everything depends on eal

> -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> +	'ring',

> +	'rcu', # rcu depends on ring

> +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

>   	'cmdline',

>   	'metrics', # bitrate/latency stats depends on this

>   	'hash',    # efd depends on this

> @@ -22,7 +24,7 @@ libraries = [

>   	'gro', 'gso', 'ip_frag', 'jobstats',

>   	'kni', 'latencystats', 'lpm', 'member',

>   	'power', 'pdump', 'rawdev',

> -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> +	'reorder', 'sched', 'security', 'stack', 'vhost',

>   	# ipsec lib depends on net, crypto and security

>   	'ipsec',

>   	# add pkt framework libs which use other libs from above


-- 
Regards,
Vladimir

Honnappa Nagarahalli Oct. 13, 2019, 3:02 a.m. UTC | #11

Hi Vladimir,
	Apologies for the delayed response, I had to run few experiments.

<snip>

> 

> Hi Honnappa,

> 

> On 01/10/2019 07:29, Honnappa Nagarahalli wrote:

> > Add resource reclamation APIs to make it simple for applications and

> > libraries to integrate rte_rcu library.

> >

> > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> > Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> > ---

> >   app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

> >   lib/librte_rcu/meson.build         |   2 +

> >   lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

> >   lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

> >   lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

> >   lib/librte_rcu/rte_rcu_version.map |   4 +

> >   lib/meson.build                    |   6 +-

> >   7 files changed, 700 insertions(+), 3 deletions(-)

> >   create mode 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

> >

> > diff --git a/app/test/test_rcu_qsbr.c b/app/test/test_rcu_qsbr.c index

> > d1b9e46a2..3a6815243 100644

> > --- a/app/test/test_rcu_qsbr.c

> > +++ b/app/test/test_rcu_qsbr.c

> > @@ -1,8 +1,9 @@

> >   /* SPDX-License-Identifier: BSD-3-Clause

> > - * Copyright (c) 2018 Arm Limited

> > + * Copyright (c) 2019 Arm Limited

> >    */

> >

> >   #include <stdio.h>

> > +#include <string.h>

> >   #include <rte_pause.h>

> >   #include <rte_rcu_qsbr.h>

> >   #include <rte_hash.h>

> > @@ -33,6 +34,7 @@ static uint32_t *keys;

> >   #define COUNTER_VALUE 4096

> >   static uint32_t *hash_data[RTE_MAX_LCORE][TOTAL_ENTRY];

> >   static uint8_t writer_done;

> > +static uint8_t cb_failed;

> >

> >   static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];

> >   struct rte_hash *h[RTE_MAX_LCORE];

> > @@ -582,6 +584,269 @@ test_rcu_qsbr_thread_offline(void)

> >   	return 0;

> >   }

> >

> > +static void

> > +rte_rcu_qsbr_test_free_resource(void *p, void *e) {

> > +	if (p != NULL && e != NULL) {

> > +		printf("%s: Test failed\n", __func__);

> > +		cb_failed = 1;

> > +	}

> > +}

> > +

> > +/*

> > + * rte_rcu_qsbr_dq_create: create a queue used to store the data

> > +structure

> > + * elements that can be freed later. This queue is referred to as 'defer

> queue'.

> > + */

> > +static int

> > +test_rcu_qsbr_dq_create(void)

> > +{

> > +	char rcu_dq_name[RTE_RING_NAMESIZE];

> > +	struct rte_rcu_qsbr_dq_parameters params;

> > +	struct rte_rcu_qsbr_dq *dq;

> > +

> > +	printf("\nTest rte_rcu_qsbr_dq_create()\n");

> > +

> > +	/* Pass invalid parameters */

> > +	dq = rte_rcu_qsbr_dq_create(NULL);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> > +params");

> > +

> > +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> > +params");

> > +

> > +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> > +	params.name = rcu_dq_name;

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> > +params");

> > +

> > +	params.f = rte_rcu_qsbr_test_free_resource;

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> > +params");

> > +

> > +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> > +	params.v = t[0];

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> > +params");

> > +

> > +	params.size = 1;

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> > +params");

> > +

> > +	params.esize = 3;

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> > +params");

> > +

> > +	/* Pass all valid parameters */

> > +	params.esize = 16;

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

> params");

> > +	rte_rcu_qsbr_dq_delete(dq);

> > +

> > +	return 0;

> > +}

> > +

> > +/*

> > + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer queue,

> > + * to be freed later after atleast one grace period is over.

> > + */

> > +static int

> > +test_rcu_qsbr_dq_enqueue(void)

> > +{

> > +	int ret;

> > +	uint64_t r;

> > +	char rcu_dq_name[RTE_RING_NAMESIZE];

> > +	struct rte_rcu_qsbr_dq_parameters params;

> > +	struct rte_rcu_qsbr_dq *dq;

> > +

> > +	printf("\nTest rte_rcu_qsbr_dq_enqueue()\n");

> > +

> > +	/* Create a queue with simple parameters */

> > +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> > +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> > +	params.name = rcu_dq_name;

> > +	params.f = rte_rcu_qsbr_test_free_resource;

> > +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> > +	params.v = t[0];

> > +	params.size = 1;

> > +	params.esize = 16;

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

> > +params");

> > +

> > +	/* Pass invalid parameters */

> > +	ret = rte_rcu_qsbr_dq_enqueue(NULL, NULL);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid

> > +params");

> > +

> > +	ret = rte_rcu_qsbr_dq_enqueue(dq, NULL);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid

> > +params");

> > +

> > +	ret = rte_rcu_qsbr_dq_enqueue(NULL, &r);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid

> > +params");

> > +

> > +	ret = rte_rcu_qsbr_dq_delete(dq);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 1), "dq delete valid

> params");

> > +

> > +	return 0;

> > +}

> > +

> > +/*

> > + * rte_rcu_qsbr_dq_reclaim: Reclaim resources from the defer queue.

> > + */

> > +static int

> > +test_rcu_qsbr_dq_reclaim(void)

> > +{

> > +	int ret;

> > +

> > +	printf("\nTest rte_rcu_qsbr_dq_reclaim()\n");

> > +

> > +	/* Pass invalid parameters */

> > +	ret = rte_rcu_qsbr_dq_reclaim(NULL);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq reclaim invalid

> > +params");

> > +

> > +	return 0;

> > +}

> > +

> > +/*

> > + * rte_rcu_qsbr_dq_delete: Delete a defer queue.

> > + */

> > +static int

> > +test_rcu_qsbr_dq_delete(void)

> > +{

> > +	int ret;

> > +	char rcu_dq_name[RTE_RING_NAMESIZE];

> > +	struct rte_rcu_qsbr_dq_parameters params;

> > +	struct rte_rcu_qsbr_dq *dq;

> > +

> > +	printf("\nTest rte_rcu_qsbr_dq_delete()\n");

> > +

> > +	/* Pass invalid parameters */

> > +	ret = rte_rcu_qsbr_dq_delete(NULL);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq delete invalid

> > +params");

> > +

> > +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> > +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> > +	params.name = rcu_dq_name;

> > +	params.f = rte_rcu_qsbr_test_free_resource;

> > +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> > +	params.v = t[0];

> > +	params.size = 1;

> > +	params.esize = 16;

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

> params");

> > +	ret = rte_rcu_qsbr_dq_delete(dq);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid

> params");

> > +

> > +	return 0;

> > +}

> > +

> > +/*

> > + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer queue,

> > + * to be freed later after atleast one grace period is over.

> > + */

> > +static int

> > +test_rcu_qsbr_dq_functional(int32_t size, int32_t esize) {

> > +	int i, j, ret;

> > +	char rcu_dq_name[RTE_RING_NAMESIZE];

> > +	struct rte_rcu_qsbr_dq_parameters params;

> > +	struct rte_rcu_qsbr_dq *dq;

> > +	uint64_t *e;

> > +	uint64_t sc = 200;

> > +	int max_entries;

> > +

> > +	printf("\nTest rte_rcu_qsbr_dq_xxx functional tests()\n");

> > +	printf("Size = %d, esize = %d\n", size, esize);

> > +

> > +	e = (uint64_t *)rte_zmalloc(NULL, esize, RTE_CACHE_LINE_SIZE);

> > +	if (e == NULL)

> > +		return 0;

> > +	cb_failed = 0;

> > +

> > +	/* Initialize the RCU variable. No threads are registered */

> > +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> > +

> > +	/* Create a queue with simple parameters */

> > +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> > +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> > +	params.name = rcu_dq_name;

> > +	params.f = rte_rcu_qsbr_test_free_resource;

> > +	params.v = t[0];

> > +	params.size = size;

> > +	params.esize = esize;

> > +	dq = rte_rcu_qsbr_dq_create(&params);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

> > +params");

> > +

> > +	/* Given the size and esize, calculate the maximum number of entries

> > +	 * that can be stored on the defer queue (look at the logic used

> > +	 * in capacity calculation of rte_ring).

> > +	 */

> > +	max_entries = rte_align32pow2(((esize/8 + 1) * size) + 1);

> > +	max_entries = (max_entries - 1)/(esize/8 + 1);

> > +

> > +	/* Enqueue few counters starting with the value 'sc' */

> > +	/* The queue size will be rounded up to 2. The enqueue API also

> > +	 * reclaims if the queue size is above certain limit. Since, there

> > +	 * are no threads registered, reclamation succedes. Hence, it should

> > +	 * be possible to enqueue more than the provided queue size.

> > +	 */

> > +	for (i = 0; i < 10; i++) {

> > +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> > +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> > +			"dq enqueue functional");

> > +		for (j = 0; j < esize/8; j++)

> > +			e[j] = sc++;

> > +	}

> > +

> > +	/* Register a thread on the RCU QSBR variable. Reclamation will not

> > +	 * succeed. It should not be possible to enqueue more than the size

> > +	 * number of resources.

> > +	 */

> > +	rte_rcu_qsbr_thread_register(t[0], 1);

> > +	rte_rcu_qsbr_thread_online(t[0], 1);

> > +

> > +	for (i = 0; i < max_entries; i++) {

> > +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> > +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> > +			"dq enqueue functional");

> > +		for (j = 0; j < esize/8; j++)

> > +			e[j] = sc++;

> > +	}

> > +

> > +	/* Enqueue fails as queue is full */

> > +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue

> functional");

> > +

> > +	/* Delete should fail as there are elements in defer queue which

> > +	 * cannot be reclaimed.

> > +	 */

> > +	ret = rte_rcu_qsbr_dq_delete(dq);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq delete valid

> params");

> > +

> > +	/* Report quiescent state, enqueue should succeed */

> > +	rte_rcu_qsbr_quiescent(t[0], 1);

> > +	for (i = 0; i < max_entries; i++) {

> > +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> > +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> > +			"dq enqueue functional");

> > +		for (j = 0; j < esize/8; j++)

> > +			e[j] = sc++;

> > +	}

> > +

> > +	/* Queue is full */

> > +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue

> functional");

> > +

> > +	/* Report quiescent state, delete should succeed */

> > +	rte_rcu_qsbr_quiescent(t[0], 1);

> > +	ret = rte_rcu_qsbr_dq_delete(dq);

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid

> params");

> > +

> > +	/* Validate that call back function did not return any error */

> > +	TEST_RCU_QSBR_RETURN_IF_ERROR((cb_failed == 1), "CB failed");

> > +

> > +	rte_free(e);

> > +	return 0;

> > +}

> > +

> >   /*

> >    * rte_rcu_qsbr_dump: Dump status of a single QS variable to a file

> >    */

> > @@ -1025,6 +1290,18 @@ test_rcu_qsbr_main(void)

> >   	if (test_rcu_qsbr_thread_offline() < 0)

> >   		goto test_fail;

> >

> > +	if (test_rcu_qsbr_dq_create() < 0)

> > +		goto test_fail;

> > +

> > +	if (test_rcu_qsbr_dq_reclaim() < 0)

> > +		goto test_fail;

> > +

> > +	if (test_rcu_qsbr_dq_delete() < 0)

> > +		goto test_fail;

> > +

> > +	if (test_rcu_qsbr_dq_enqueue() < 0)

> > +		goto test_fail;

> > +

> >   	printf("\nFunctional tests\n");

> >

> >   	if (test_rcu_qsbr_sw_sv_3qs() < 0)

> > @@ -1033,6 +1310,18 @@ test_rcu_qsbr_main(void)

> >   	if (test_rcu_qsbr_mw_mv_mqs() < 0)

> >   		goto test_fail;

> >

> > +	if (test_rcu_qsbr_dq_functional(1, 8) < 0)

> > +		goto test_fail;

> > +

> > +	if (test_rcu_qsbr_dq_functional(2, 8) < 0)

> > +		goto test_fail;

> > +

> > +	if (test_rcu_qsbr_dq_functional(303, 16) < 0)

> > +		goto test_fail;

> > +

> > +	if (test_rcu_qsbr_dq_functional(7, 128) < 0)

> > +		goto test_fail;

> > +

> >   	free_rcu();

> >

> >   	printf("\n");

> > diff --git a/lib/librte_rcu/meson.build b/lib/librte_rcu/meson.build

> > index 62920ba02..e280b29c1 100644

> > --- a/lib/librte_rcu/meson.build

> > +++ b/lib/librte_rcu/meson.build

> > @@ -10,3 +10,5 @@ headers = files('rte_rcu_qsbr.h')

> >   if cc.get_id() == 'clang' and dpdk_conf.get('RTE_ARCH_64') == false

> >   	ext_deps += cc.find_library('atomic')

> >   endif

> > +

> > +deps += ['ring']

> > diff --git a/lib/librte_rcu/rte_rcu_qsbr.c

> > b/lib/librte_rcu/rte_rcu_qsbr.c index ce7f93dd3..76814f50b 100644

> > --- a/lib/librte_rcu/rte_rcu_qsbr.c

> > +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> > @@ -21,6 +21,7 @@

> >   #include <rte_errno.h>

> >

> >   #include "rte_rcu_qsbr.h"

> > +#include "rte_rcu_qsbr_pvt.h"

> >

> >   /* Get the memory size of QSBR variable */

> >   size_t

> > @@ -267,6 +268,190 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr

> *v)

> >   	return 0;

> >   }

> >

> > +/* Create a queue used to store the data structure elements that can

> > + * be freed later. This queue is referred to as 'defer queue'.

> > + */

> > +struct rte_rcu_qsbr_dq *

> > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > +*params) {

> > +	struct rte_rcu_qsbr_dq *dq;

> > +	uint32_t qs_fifo_size;

> > +

> > +	if (params == NULL || params->f == NULL ||

> > +		params->v == NULL || params->name == NULL ||

> > +		params->size == 0 || params->esize == 0 ||

> > +		(params->esize % 8 != 0)) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return NULL;

> > +	}

> > +

> > +	dq = rte_zmalloc(NULL,

> > +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> > +		RTE_CACHE_LINE_SIZE);

> > +	if (dq == NULL) {

> > +		rte_errno = ENOMEM;

> > +

> > +		return NULL;

> > +	}

> > +

> > +	/* round up qs_fifo_size to next power of two that is not less than

> > +	 * max_size.

> > +	 */

> > +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> > +					* params->size) + 1);

> > +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> > +					SOCKET_ID_ANY, 0);

> > +	if (dq->r == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): defer queue create failed\n", __func__);

> > +		rte_free(dq);

> > +		return NULL;

> > +	}

> > +

> > +	dq->v = params->v;

> > +	dq->size = params->size;

> > +	dq->esize = params->esize;

> > +	dq->f = params->f;

> > +	dq->p = params->p;

> > +

> > +	return dq;

> > +}

> > +

> > +/* Enqueue one resource to the defer queue to free after the grace

> > + * period is over.

> > + */

> > +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e) {

> > +	uint64_t token;

> > +	uint64_t *tmp;

> > +	uint32_t i;

> > +	uint32_t cur_size, free_size;

> > +

> > +	if (dq == NULL || e == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return 1;

> > +	}

> > +

> > +	/* Start the grace period */

> > +	token = rte_rcu_qsbr_start(dq->v);

> > +

> > +	/* Reclaim resources if the queue is 1/8th full. This helps

> > +	 * the queue from growing too large and allows time for reader

> > +	 * threads to report their quiescent state.

> > +	 */

> > +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

> > +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

> > +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > +			"%s(): Triggering reclamation\n", __func__);

> > +		rte_rcu_qsbr_dq_reclaim(dq);

> > +	}

> 

> There are two problems I see:

> 

> 1. rte_rcu_qsbr_dq_reclaim() reclaims only 1/16 of the defer queue while it

> triggers on 1/8. This means that there will always be 1/16 of non reclaimed

> entries in the queue.

There will be 'at least' 1/16 non-reclaimed entries. It could be more depending on the length of the grace period and the rate of deletion.
The trigger of 1/8 is used to give sufficient time for the readers to report their quiescent state. 1/16 is used to spread the load of reclamation across multiple calls and provide a upper bound on the cycles consumed.

> 

> 2. Number of entries to reclaim depend on dq->size. So,

> rte_rcu_qsbr_dq_reclaim() could take a lot of cycles. For LPM library this

That is true. It depends on dq->size (number of tbl8 groups). However, note that there is patch [1] which provides batch reclamation kind of behavior which reduces the cycles consumed by reclamation significantly.

[1] https://patches.dpdk.org/patch/58960/

> means that rte_lpm_delete() sometimes takes a long time.

Agree, sometimes takes additional time. It is good to spread it over multiple calls.

> 

> So, my suggestions here would be

> 

> - trigger rte_rcu_qsbr_dq_reclaim() with every enqueue

Given that the LPM APIs are mainly for control plane, I would think that, the next time LPM API is called, the readers have completed the grace period. But if there are frequent updates, we might end up with empty reclaims which will waste cycles. IMO, this trigger should happen after at least few entries are in the queue. 

> 

> - reclaim small amount of entries (could be configurable of creation time)

Agree. I would keep it a smaller than the trigger amount knowing that the elements added right before the trigger might not have completed the grace period.

> 

> - provide API to trigger reclaim from the application manually.

IMO, this will add additional complexity to the application. I agree that there will be special needs for some applications. I think those applications might have to implement their own methods using the base RCU APIs.
Instead, as agreed in other threads, I suggest we expose the parameters (when to trigger and how much to reclaim) to the application as optional configurable parameters. i.e. if the application does not provide we can use default values. I think this should provide enough flexibility to the application.

> 

> > +

> > +	/* Check if there is space for atleast for 1 resource */

> > +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> > +	if (!free_size) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Defer queue is full\n", __func__);

> > +		rte_errno = ENOSPC;

> > +		return 1;

> > +	}

> > +

> > +	/* Enqueue the resource */

> > +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> > +

> > +	/* The resource to enqueue needs to be a multiple of 64b

> > +	 * due to the limitation of the rte_ring implementation.

> > +	 */

> > +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> > +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

> > +

> > +	return 0;

> > +}

> > +

> > +/* Reclaim resources from the defer queue. */ int

> > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

> > +	uint32_t max_cnt;

> > +	uint32_t cnt;

> > +	void *token;

> > +	uint64_t *tmp;

> > +	uint32_t i;

> > +

> > +	if (dq == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return 1;

> > +	}

> > +

> > +	/* Anything to reclaim? */

> > +	if (rte_ring_count(dq->r) == 0)

> > +		return 0;

> > +

> > +	/* Reclaim at the max 1/16th the total number of entries. */

> > +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> > +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> > +	cnt = 0;

> > +

> > +	/* Check reader threads quiescent state and reclaim resources */

> > +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> > +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> > +			== 1)) {

> > +		(void)rte_ring_sc_dequeue(dq->r, &token);

> > +		/* The resource to dequeue needs to be a multiple of 64b

> > +		 * due to the limitation of the rte_ring implementation.

> > +		 */

> > +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> > +			i++, tmp++)

> > +			(void)rte_ring_sc_dequeue(dq->r,

> > +					(void *)(uintptr_t)tmp);

> > +		dq->f(dq->p, dq->e);

> > +

> > +		cnt++;

> > +	}

> > +

> > +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> > +

> > +	if (cnt == 0) {

> > +		/* No resources were reclaimed */

> > +		rte_errno = EAGAIN;

> > +		return 1;

> > +	}

> > +

> > +	return 0;

> > +}

> > +

> > +/* Delete a defer queue. */

> > +int

> > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

> > +	if (dq == NULL) {

> > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > +			"%s(): Invalid input parameter\n", __func__);

> > +		rte_errno = EINVAL;

> > +

> > +		return 1;

> > +	}

> > +

> > +	/* Reclaim all the resources */

> > +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> > +		/* Error number is already set by the reclaim API */

> > +		return 1;

> > +

> > +	rte_ring_free(dq->r);

> > +	rte_free(dq);

> > +

> > +	return 0;

> > +}

> > +

> >   int rte_rcu_log_type;

> >

> >   RTE_INIT(rte_rcu_register)

> > diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

> > b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a 100644

> > --- a/lib/librte_rcu/rte_rcu_qsbr.h

> > +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> > @@ -34,6 +34,7 @@ extern "C" {

> >   #include <rte_lcore.h>

> >   #include <rte_debug.h>

> >   #include <rte_atomic.h>

> > +#include <rte_ring.h>

> >

> >   extern int rte_rcu_log_type;

> >

> > @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

> >   	 */

> >   } __rte_cache_aligned;

> >

> > +/**

> > + * Call back function called to free the resources.

> > + *

> > + * @param p

> > + *   Pointer provided while creating the defer queue

> > + * @param e

> > + *   Pointer to the resource data stored on the defer queue

> > + *

> > + * @return

> > + *   None

> > + */

> > +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> > +

> > +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> > +

> > +/**

> > + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> > + */

> > +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> > +

> > +/**

> > + *  Reclaim at the max 1/16th the total number of resources.

> > + */

> > +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> > +

> > +/**

> > + * Parameters used when creating the defer queue.

> > + */

> > +struct rte_rcu_qsbr_dq_parameters {

> > +	const char *name;

> > +	/**< Name of the queue. */

> > +	uint32_t size;

> > +	/**< Number of entries in queue. Typically, this will be

> > +	 *   the same as the maximum number of entries supported in the

> > +	 *   lock free data structure.

> > +	 *   Data structures with unbounded number of entries is not

> > +	 *   supported currently.

> > +	 */

> > +	uint32_t esize;

> > +	/**< Size (in bytes) of each element in the defer queue.

> > +	 *   This has to be multiple of 8B as the rte_ring APIs

> > +	 *   support 8B element sizes only.

> > +	 */

> > +	rte_rcu_qsbr_free_resource f;

> > +	/**< Function to call to free the resource. */

> > +	void *p;

> > +	/**< Pointer passed to the free function. Typically, this is the

> > +	 *   pointer to the data structure to which the resource to free

> > +	 *   belongs. This can be NULL.

> > +	 */

> > +	struct rte_rcu_qsbr *v;

> > +	/**< RCU QSBR variable to use for this defer queue */ };

> > +

> > +/* RTE defer queue structure.

> > + * This structure holds the defer queue. The defer queue is used to

> > + * hold the deleted entries from the data structure that are not

> > + * yet freed.

> > + */

> > +struct rte_rcu_qsbr_dq;

> > +

> >   /**

> >    * @warning

> >    * @b EXPERIMENTAL: this API may change without prior notice @@

> > -648,6 +710,113 @@ __rte_experimental

> >   int

> >   rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);

> >

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Create a queue used to store the data structure elements that can

> > + * be freed later. This queue is referred to as 'defer queue'.

> > + *

> > + * @param params

> > + *   Parameters to create a defer queue.

> > + * @return

> > + *   On success - Valid pointer to defer queue

> > + *   On error - NULL

> > + *   Possible rte_errno codes are:

> > + *   - EINVAL - NULL parameters are passed

> > + *   - ENOMEM - Not enough memory

> > + */

> > +__rte_experimental

> > +struct rte_rcu_qsbr_dq *

> > +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> > +*params);

> > +

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Enqueue one resource to the defer queue and start the grace period.

> > + * The resource will be freed later after at least one grace period

> > + * is over.

> > + *

> > + * If the defer queue is full, it will attempt to reclaim resources.

> > + * It will also reclaim resources at regular intervals to avoid

> > + * the defer queue from growing too big.

> > + *

> > + * This API is not multi-thread safe. It is expected that the caller

> > + * provides multi-thread safety by locking a mutex or some other means.

> > + *

> > + * A lock free multi-thread writer algorithm could achieve

> > +multi-thread

> > + * safety by creating and using one defer queue per thread.

> > + *

> > + * @param dq

> > + *   Defer queue to allocate an entry from.

> > + * @param e

> > + *   Pointer to resource data to copy to the defer queue. The size of

> > + *   the data to copy is equal to the element size provided when the

> > + *   defer queue was created.

> > + * @return

> > + *   On success - 0

> > + *   On error - 1 with rte_errno set to

> > + *   - EINVAL - NULL parameters are passed

> > + *   - ENOSPC - Defer queue is full. This condition can not happen

> > + *		if the defer queue size is equal (or larger) than the

> > + *		number of elements in the data structure.

> > + */

> > +__rte_experimental

> > +int

> > +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> > +

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Reclaim resources from the defer queue.

> > + *

> > + * This API is not multi-thread safe. It is expected that the caller

> > + * provides multi-thread safety by locking a mutex or some other means.

> > + *

> > + * A lock free multi-thread writer algorithm could achieve

> > +multi-thread

> > + * safety by creating and using one defer queue per thread.

> > + *

> > + * @param dq

> > + *   Defer queue to reclaim an entry from.

> > + * @return

> > + *   On successful reclamation of at least 1 resource - 0

> > + *   On error - 1 with rte_errno set to

> > + *   - EINVAL - NULL parameters are passed

> > + *   - EAGAIN - None of the resources have completed at least 1 grace

> period,

> > + *		try again.

> > + */

> > +__rte_experimental

> > +int

> > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> > +

> > +/**

> > + * @warning

> > + * @b EXPERIMENTAL: this API may change without prior notice

> > + *

> > + * Delete a defer queue.

> > + *

> > + * It tries to reclaim all the resources on the defer queue.

> > + * If any of the resources have not completed the grace period

> > + * the reclamation stops and returns immediately. The rest of

> > + * the resources are not reclaimed and the defer queue is not

> > + * freed.

> > + *

> > + * @param dq

> > + *   Defer queue to delete.

> > + * @return

> > + *   On success - 0

> > + *   On error - 1

> > + *   Possible rte_errno codes are:

> > + *   - EINVAL - NULL parameters are passed

> > + *   - EAGAIN - Some of the resources have not completed at least 1 grace

> > + *		period, try again.

> > + */

> > +__rte_experimental

> > +int

> > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> > +

> >   #ifdef __cplusplus

> >   }

> >   #endif

> > diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > new file mode 100644

> > index 000000000..2122bc36a

> > --- /dev/null

> > +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > @@ -0,0 +1,46 @@

> > +/* SPDX-License-Identifier: BSD-3-Clause

> > + * Copyright (c) 2019 Arm Limited

> > + */

> > +

> > +#ifndef _RTE_RCU_QSBR_PVT_H_

> > +#define _RTE_RCU_QSBR_PVT_H_

> > +

> > +/**

> > + * This file is private to the RCU library. It should not be included

> > + * by the user of this library.

> > + */

> > +

> > +#ifdef __cplusplus

> > +extern "C" {

> > +#endif

> > +

> > +#include "rte_rcu_qsbr.h"

> > +

> > +/* RTE defer queue structure.

> > + * This structure holds the defer queue. The defer queue is used to

> > + * hold the deleted entries from the data structure that are not

> > + * yet freed.

> > + */

> > +struct rte_rcu_qsbr_dq {

> > +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> > +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> > +	uint32_t size;

> > +	/**< Number of elements in the defer queue */

> > +	uint32_t esize;

> > +	/**< Size (in bytes) of data stored on the defer queue */

> > +	rte_rcu_qsbr_free_resource f;

> > +	/**< Function to call to free the resource. */

> > +	void *p;

> > +	/**< Pointer passed to the free function. Typically, this is the

> > +	 *   pointer to the data structure to which the resource to free

> > +	 *   belongs.

> > +	 */

> > +	char e[0];

> > +	/**< Temporary storage to copy the defer queue element. */ };

> > +

> > +#ifdef __cplusplus

> > +}

> > +#endif

> > +

> > +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> > diff --git a/lib/librte_rcu/rte_rcu_version.map

> > b/lib/librte_rcu/rte_rcu_version.map

> > index f8b9ef2ab..dfac88a37 100644

> > --- a/lib/librte_rcu/rte_rcu_version.map

> > +++ b/lib/librte_rcu/rte_rcu_version.map

> > @@ -8,6 +8,10 @@ EXPERIMENTAL {

> >   	rte_rcu_qsbr_synchronize;

> >   	rte_rcu_qsbr_thread_register;

> >   	rte_rcu_qsbr_thread_unregister;

> > +	rte_rcu_qsbr_dq_create;

> > +	rte_rcu_qsbr_dq_enqueue;

> > +	rte_rcu_qsbr_dq_reclaim;

> > +	rte_rcu_qsbr_dq_delete;

> >

> >   	local: *;

> >   };

> > diff --git a/lib/meson.build b/lib/meson.build index

> > e5ff83893..0e1be8407 100644

> > --- a/lib/meson.build

> > +++ b/lib/meson.build

> > @@ -11,7 +11,9 @@

> >   libraries = [

> >   	'kvargs', # eal depends on kvargs

> >   	'eal', # everything depends on eal

> > -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > +	'ring',

> > +	'rcu', # rcu depends on ring

> > +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> >   	'cmdline',

> >   	'metrics', # bitrate/latency stats depends on this

> >   	'hash',    # efd depends on this

> > @@ -22,7 +24,7 @@ libraries = [

> >   	'gro', 'gso', 'ip_frag', 'jobstats',

> >   	'kni', 'latencystats', 'lpm', 'member',

> >   	'power', 'pdump', 'rawdev',

> > -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> > +	'reorder', 'sched', 'security', 'stack', 'vhost',

> >   	# ipsec lib depends on net, crypto and security

> >   	'ipsec',

> >   	# add pkt framework libs which use other libs from above

> 

> --

> Regards,

> Vladimir

Honnappa Nagarahalli Oct. 13, 2019, 4:35 a.m. UTC | #12

<snip>

> > > > > > Add resource reclamation APIs to make it simple for

> > > > > > applications and libraries to integrate rte_rcu library.

> > > > > >

> > > > > > Signed-off-by: Honnappa Nagarahalli

> > > > > > <honnappa.nagarahalli@arm.com>

> > > > > > Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> > > > > > ---

> > > > > >  app/test/test_rcu_qsbr.c           | 291

> ++++++++++++++++++++++++++++-

> > > > > >  lib/librte_rcu/meson.build         |   2 +

> > > > > >  lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

> > > > > >  lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

> > > > > >  lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

> > > > > >  lib/librte_rcu/rte_rcu_version.map |   4 +

> > > > > >  lib/meson.build                    |   6 +-

> > > > > >  7 files changed, 700 insertions(+), 3 deletions(-)  create

> > > > > > mode

> > > > > > 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > > > >

> > > > > > diff --git a/lib/librte_rcu/rte_rcu_qsbr.c

> > > > > > b/lib/librte_rcu/rte_rcu_qsbr.c index ce7f93dd3..76814f50b

> > > > > > 100644

> > > > > > --- a/lib/librte_rcu/rte_rcu_qsbr.c

> > > > > > +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> > > > > > @@ -21,6 +21,7 @@

> > > > > >  #include <rte_errno.h>

> > > > > >

> > > > > >  #include "rte_rcu_qsbr.h"

> > > > > > +#include "rte_rcu_qsbr_pvt.h"

> > > > > >

> > > > > >  /* Get the memory size of QSBR variable */  size_t @@ -267,6

> > > > > > +268,190 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)

> > > > > >  	return 0;

> > > > > >  }

> > > > > >

> > > > > > +/* Create a queue used to store the data structure elements

> > > > > > +that can

> > > > > > + * be freed later. This queue is referred to as 'defer queue'.

> > > > > > + */

> > > > > > +struct rte_rcu_qsbr_dq *

> > > > > > +rte_rcu_qsbr_dq_create(const struct

> > > > > > +rte_rcu_qsbr_dq_parameters

> > > > > > +*params) {

> > > > > > +	struct rte_rcu_qsbr_dq *dq;

> > > > > > +	uint32_t qs_fifo_size;

> > > > > > +

> > > > > > +	if (params == NULL || params->f == NULL ||

> > > > > > +		params->v == NULL || params->name == NULL ||

> > > > > > +		params->size == 0 || params->esize == 0 ||

> > > > > > +		(params->esize % 8 != 0)) {

> > > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > > > +		rte_errno = EINVAL;

> > > > > > +

> > > > > > +		return NULL;

> > > > > > +	}

> > > > > > +

> > > > > > +	dq = rte_zmalloc(NULL,

> > > > > > +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> > > > > > +		RTE_CACHE_LINE_SIZE);

> > > > > > +	if (dq == NULL) {

> > > > > > +		rte_errno = ENOMEM;

> > > > > > +

> > > > > > +		return NULL;

> > > > > > +	}

> > > > > > +

> > > > > > +	/* round up qs_fifo_size to next power of two that is not less

> than

> > > > > > +	 * max_size.

> > > > > > +	 */

> > > > > > +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> > > > > > +					* params->size) + 1);

> > > > > > +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> > > > > > +					SOCKET_ID_ANY, 0);

> > > > >

> > > > > If it is going to be not MT safe, then why not to create the

> > > > > ring with (RING_F_SP_ENQ | RING_F_SC_DEQ) flags set?

> > > > Agree.

> > > >

> > > > > Though I think it could be changed to allow MT safe multiple

> > > > > enqeue/single dequeue, see below.

> > > > The MT safe issue is due to reclaim code. The reclaim code has the

> > > > following

> > > sequence:

> > > >

> > > > rte_ring_peek

> > > > rte_rcu_qsbr_check

> > > > rte_ring_dequeue

> > > >

> > > > This entire sequence needs to be atomic as the entry cannot be

> > > > dequeued

> > > without knowing that the grace period for that entry is over.

> > >

> > > I understand that, though I believe at least it should be possible

> > > to support multiple-enqueue/single dequeuer and reclaim mode.

> > > With serialized dequeue() even multiple dequeue should be possible.

> > Agreed. Please see the response on the other thread.

> >

> > >

> > > > Note that due to optimizations in rte_rcu_qsbr_check API, this

> > > > sequence should not be large in most cases. I do not have ideas on

> > > > how to

> > > make this sequence lock-free.

> > > >

> > > > If the writer is on the control plane, most use cases will use

> > > > mutex locks for synchronization if they are multi-threaded. That

> > > > lock should be

> > > enough to provide the thread safety for these APIs.

> > >

> > > In that is case, why do we need ring at all?

> > > For sure people can create their own queue quite easily with mutex and

> TAILQ.

> > > If performance is not an issue, they can even add pthread_cond to

> > > it, and have an ability for the consumer to sleep/wakeup on empty/full

> queue.

> > >

> > > >

> > > > If the writer is multi-threaded and lock-free, then one should use

> > > > per thread

> > > defer queue.

> > >

> > > If that's the only working model, then the question is why do we

> > > need that API at all?

> > > Just simple array with counter or linked-list should do for majority of

> cases.

> > Please see the other thread.

> >

> > >

> > > >

> > > > >

> > > > > > +	if (dq->r == NULL) {

> > > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > > +			"%s(): defer queue create failed\n",

> __func__);

> > > > > > +		rte_free(dq);

> > > > > > +		return NULL;

> > > > > > +	}

> > > > > > +

> > > > > > +	dq->v = params->v;

> > > > > > +	dq->size = params->size;

> > > > > > +	dq->esize = params->esize;

> > > > > > +	dq->f = params->f;

> > > > > > +	dq->p = params->p;

> > > > > > +

> > > > > > +	return dq;

> > > > > > +}

> > > > > > +

> > > > > > +/* Enqueue one resource to the defer queue to free after the

> > > > > > +grace

> > > > > > + * period is over.

> > > > > > + */

> > > > > > +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e)

> {

> > > > > > +	uint64_t token;

> > > > > > +	uint64_t *tmp;

> > > > > > +	uint32_t i;

> > > > > > +	uint32_t cur_size, free_size;

> > > > > > +

> > > > > > +	if (dq == NULL || e == NULL) {

> > > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > > > +		rte_errno = EINVAL;

> > > > > > +

> > > > > > +		return 1;

> > > > >

> > > > > Why just not to return -EINVAL straightway?

> > > > > I think there is no much point to set rte_errno in that function

> > > > > at all, just return value should do.

> > > > I am trying to keep these consistent with the existing APIs. They

> > > > return 0 or 1

> > > and set the rte_errno.

> > >

> > > A lot of public DPDK API functions do use return value to return

> > > status code (0, or some positive numbers of success, negative errno

> > > values on failure), I am not inventing anything new here.

> > Agree, you are not proposing a new thing here. May be I was not clear.

> > I really do not have an opinion on how this should be done. But, I do have

> an opinion on consistency. These new APIs follow what has been done in the

> existing RCU APIs. I think we have 2 options here.

> > 1) Either we change existing RCU APIs to get rid of rte_errno (is it

> > an ABI change?) or

> > 2) The new APIs follow what has been done in the existing RCU APIs.

> > I want to make sure we are consistent at least within RCU APIs.

> 

> But as I can see right now rcu API sets rte_errno only for control-path

> functions (get_memsize, init, register, unregister, dump).

> All fast-path (inline) function don't set/use it.

> So from perspective that is consistent behavior, no?

Agree. I am treating this as a control plane function mainly (hence it is a non-inline function as well).

> 

> >

> > >

> > > >

> > > > >

> > > > > > +	}

> > > > > > +

> > > > > > +	/* Start the grace period */

> > > > > > +	token = rte_rcu_qsbr_start(dq->v);

> > > > > > +

> > > > > > +	/* Reclaim resources if the queue is 1/8th full. This helps

> > > > > > +	 * the queue from growing too large and allows time for

> reader

> > > > > > +	 * threads to report their quiescent state.

> > > > > > +	 */

> > > > > > +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

> > > > >

> > > > > Probably would be a bit easier if you just store in dq->esize

> > > > > (elt size + token

> > > > > size) / 8.

> > > > Agree

> > > >

> > > > >

> > > > > > +	if (cur_size > (dq->size >>

> > > > > > +RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

> > > > >

> > > > > Why to make this threshold value hard-coded?

> > > > > Why either not to put it into create parameter, or just return a

> > > > > special return value, to indicate that threshold is reached?

> > > > My thinking was to keep the programming interface easy to use. The

> > > > more the parameters, the more painful it is for the user. IMO, the

> > > > constants chosen should be good enough for most cases. More

> > > > advanced

> > > users could modify the constants. However, we could make these as

> > > part of the parameters, but make them optional for the user. For ex:

> > > if they set them to 0, default values can be used.

> > > >

> > > > > Or even return number of filled/free entroes on success, so

> > > > > caller can decide to reclaim or not based on that information on his

> own?

> > > > This means more code on the user side.

> > >

> > > I personally think it it really wouldn't be that big problem to the

> > > user to pass extra parameter to the function.

> > I will convert the 2 constants into optional parameters (user can set

> > them to 0 to make the algorithm use default values)

> >

> > > Again what if user doesn't want to reclaim() in enqueue() thread at all?

> > 'enqueue' has to do reclamation if the defer queue is full. I do not think this

> is trivial.

> >

> > In the current design, reclamation in enqueue is also done on regular

> > basis (automatic triggering of reclamation when the queue reaches

> > certain limit) to keep the queue from growing too large. This is

> > required when we implement a dynamically adjusting defer queue. The

> current algorithm keeps the cost of reclamation spread across multiple calls

> and puts an upper bound on cycles for delete API by reclaiming a fixed

> number of entries.

> >

> > This algorithm is proven to work in the LPM integration performance

> > tests at a very low performance over head (~1%). So, I do not know why a

> user would not want to use this.

> 

> Yeh, I looked at LPM implementation and one thing I found strange -

> defer_queue is hidden inside LPM struct and all reclamations are done

> internally.

> Yes for sure it allows to defer and group actual reclaim(), which hopefully will

> lead to better performance.

> But why not to allow user to call reclaim() for it directly too?

> In that way user might avoid/(minimize) doing reclaim() in LPM write() at all.

> And let say do it somewhere later in the same thread (when no other tasks to

> do), or even leave it to some other house-keeping thread to do (sort of

> garbage collector).

> Or such mode is not supported/planned?

The goal of integrating the RCU defer APIs with libraries is to take away the complexity on the writer to adopt the lock-free algorithms. I am looking to address most used use cases. There will be use cases which are not very common and I think those should be addressed by the application by using the base RCU APIs. Let us discuss this more in the other thread, where you have similar questions.

> 

> > The 2 additional parameters should give the user more flexibility.

> 

> Ok, let's keep it as config params.

> After another though - I think you right, it should be good enough.

> 

> >

> > However, if the user wants his own algorithm, he can create one with the

> base APIs provided.

> >

> > >

> > > > I think adding these to parameters seems like a better option.

> > > >

> > > > >

> > > > > > +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > > > > > +			"%s(): Triggering reclamation\n", __func__);

> > > > > > +		rte_rcu_qsbr_dq_reclaim(dq);

> > > > > > +	}

> > > > > > +

> > > > > > +	/* Check if there is space for atleast for 1 resource */

> > > > > > +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> > > > > > +	if (!free_size) {

> > > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > > +			"%s(): Defer queue is full\n", __func__);

> > > > > > +		rte_errno = ENOSPC;

> > > > > > +		return 1;

> > > > > > +	}

> > > > > > +

> > > > > > +	/* Enqueue the resource */

> > > > > > +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> > > > > > +

> > > > > > +	/* The resource to enqueue needs to be a multiple of 64b

> > > > > > +	 * due to the limitation of the rte_ring implementation.

> > > > > > +	 */

> > > > > > +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> > > > > > +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

> > > > >

> > > > >

> > > > > That whole construction above looks a bit clumsy and error prone...

> > > > > I suppose just:

> > > > >

> > > > > const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t free, n; ...

> > > > > n = rte_ring_enqueue_bulk(dq->r, e, nb_elt, &free); if (n == 0)

> > > > Yes, bulk enqueue can be used. But note that once the flexible

> > > > element size

> > > ring patch is done, this code will use that.

> > >

> > > Well, when it will be in the mainline, and it would provide a better

> > > way, for sure this code can be updated to use new API (if it is provide

> some improvements).

> > > But as I udenrstand, right now it is not there, while bulk

> enqueue/dequeue are.

> > Apologies, I was not clear. I agree we can go with bulk APIs for now.

> >

> > >

> > > >

> > > > >   return -ENOSPC;

> > > > > return free;

> > > > >

> > > > > That way I think you can have MT-safe version of that function.

> > > > Please see the description of MT safe issue above.

> > > >

> > > > >

> > > > > > +

> > > > > > +	return 0;

> > > > > > +}

> > > > > > +

> > > > > > +/* Reclaim resources from the defer queue. */ int

> > > > > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

> > > > > > +	uint32_t max_cnt;

> > > > > > +	uint32_t cnt;

> > > > > > +	void *token;

> > > > > > +	uint64_t *tmp;

> > > > > > +	uint32_t i;

> > > > > > +

> > > > > > +	if (dq == NULL) {

> > > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > > > +		rte_errno = EINVAL;

> > > > > > +

> > > > > > +		return 1;

> > > > >

> > > > > Same story as above - I think rte_errno is excessive in this function.

> > > > > Just return value should be enough.

> > > > >

> > > > >

> > > > > > +	}

> > > > > > +

> > > > > > +	/* Anything to reclaim? */

> > > > > > +	if (rte_ring_count(dq->r) == 0)

> > > > > > +		return 0;

> > > > >

> > > > > Not sure you need that, see below.

> > > > >

> > > > > > +

> > > > > > +	/* Reclaim at the max 1/16th the total number of entries. */

> > > > > > +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> > > > > > +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> > > > >

> > > > > Again why not to make max_cnt a configurable at create() parameter?

> > > > I think making this as an optional parameter for creating defer

> > > > queue is a

> > > better option.

> > > >

> > > > > Or even a parameter for that function?

> > > > >

> > > > > > +	cnt = 0;

> > > > > > +

> > > > > > +	/* Check reader threads quiescent state and reclaim

> resources */

> > > > > > +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) ==

> 0) &&

> > > > > > +		(rte_rcu_qsbr_check(dq->v,

> (uint64_t)((uintptr_t)token), false)

> > > > > > +			== 1)) {

> > > > >

> > > > >

> > > > > > +		(void)rte_ring_sc_dequeue(dq->r, &token);

> > > > > > +		/* The resource to dequeue needs to be a multiple of

> 64b

> > > > > > +		 * due to the limitation of the rte_ring

> implementation.

> > > > > > +		 */

> > > > > > +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> > > > > > +			i++, tmp++)

> > > > > > +			(void)rte_ring_sc_dequeue(dq->r,

> > > > > > +					(void *)(uintptr_t)tmp);

> > > > >

> > > > > Again, no need for such constructs with multiple dequeuer I believe.

> > > > > Just:

> > > > >

> > > > > const uint32_t nb_elt =  dq->elt_size/8 + 1; uint32_t n;

> > > > > uintptr_t elt[nb_elt]; ...

> > > > > n = rte_ring_dequeue_bulk(dq->r, elt, nb_elt, NULL); if (n != 0)

> > > > > {dq->f(dq->p, elt);}

> > > > Agree on bulk API use.

> > > >

> > > > >

> > > > > Seems enough.

> > > > > Again in that case you can have enqueue/reclaim running in

> > > > > different threads simultaneously, plus you don't need dq->e at all.

> > > > Will check on dq->e

> > > >

> > > > >

> > > > > > +		dq->f(dq->p, dq->e);

> > > > > > +

> > > > > > +		cnt++;

> > > > > > +	}

> > > > > > +

> > > > > > +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> > > > > > +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> > > > > > +

> > > > > > +	if (cnt == 0) {

> > > > > > +		/* No resources were reclaimed */

> > > > > > +		rte_errno = EAGAIN;

> > > > > > +		return 1;

> > > > > > +	}

> > > > > > +

> > > > > > +	return 0;

> > > > >

> > > > > I'd suggest to return cnt on success.

> > > > I am trying to keep the APIs simple. I do not see much use for 'cnt'

> > > > as return value to the user. It exposes more details which I think

> > > > are internal

> > > to the library.

> > >

> > > Not sure what is the hassle to return number of completed reclamaitions?

> > > If user doesn't need that information, he simply wouldn't use it.

> > > But might be it would be usefull - he can decide should he try

> > > another attempt of reclaim() immediately or is it ok to do something else.

> > There is no hassle to return that information.

> >

> > As per the current design, user calls 'reclaim' when it is out of

> > resources while adding an entry to the data structure. At that point

> > the user wants to know if at least 1 resource was reclaimed because the

> user has to allocate 1 resource. He does not have a use for the number of

> resources reclaimed.

> 

> Ok, but why user can't decide to do reclaim in advance, let say when he

> foresee that he would need a lot of allocations in nearest future?

> Or when there is some idle time? Or some combination of these things?

> At he would like to free some extra resources in that case to minimize

> number of reclaims in future peak interval?

If the user has free time he can call the reclaim API. By making the parameters configurable, he should be able to control how much he can reclaim.
If the user wants to make sure that he has enough free resources for the future. He should be able to do it by knowing how many free resources are available in his data structure currently.
But, I do not see it as a problem to return the number of resources reclaimed. I will add that.

> 

> >

> > If this API returns 0, then the user can decide to repeat the call or

> > return failure. But that decision depends on the length of the grace period

> which is under user's control.

> >

> > >

> > > >

> > > > >

> > > > > > +}

> > > > > > +

> > > > > > +/* Delete a defer queue. */

> > > > > > +int

> > > > > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

> > > > > > +	if (dq == NULL) {

> > > > > > +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> > > > > > +			"%s(): Invalid input parameter\n", __func__);

> > > > > > +		rte_errno = EINVAL;

> > > > > > +

> > > > > > +		return 1;

> > > > > > +	}

> > > > > > +

> > > > > > +	/* Reclaim all the resources */

> > > > > > +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> > > > > > +		/* Error number is already set by the reclaim API */

> > > > > > +		return 1;

> > > > >

> > > > > How do you know that you have reclaimed everything?

> > > > Good point, will come back with a different solution.

> > > >

> > > > >

> > > > > > +

> > > > > > +	rte_ring_free(dq->r);

> > > > > > +	rte_free(dq);

> > > > > > +

> > > > > > +	return 0;

> > > > > > +}

> > > > > > +

> > > > > >  int rte_rcu_log_type;

> > > > > >

> > > > > >  RTE_INIT(rte_rcu_register)

> > > > > > diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

> > > > > > b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a

> > > > > > 100644

> > > > > > --- a/lib/librte_rcu/rte_rcu_qsbr.h

> > > > > > +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> > > > > > @@ -34,6 +34,7 @@ extern "C" {  #include <rte_lcore.h>

> > > > > > #include <rte_debug.h>  #include <rte_atomic.h>

> > > > > > +#include <rte_ring.h>

> > > > > >

> > > > > >  extern int rte_rcu_log_type;

> > > > > >

> > > > > > @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

> > > > > >  	 */

> > > > > >  } __rte_cache_aligned;

> > > > > >

> > > > > > +/**

> > > > > > + * Call back function called to free the resources.

> > > > > > + *

> > > > > > + * @param p

> > > > > > + *   Pointer provided while creating the defer queue

> > > > > > + * @param e

> > > > > > + *   Pointer to the resource data stored on the defer queue

> > > > > > + *

> > > > > > + * @return

> > > > > > + *   None

> > > > > > + */

> > > > > > +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> > > > >

> > > > > Stylish thing - usually in DPDK we have typedf newtype_t ...

> > > > > Though I am not sure you need a new typedef at all - just a

> > > > > function pointer inside the struct seems enough.

> > > > Other libraries (for ex: rte_hash) use this approach. I think it

> > > > is better to keep

> > > it out of the structure to allow for better commenting.

> > >

> > > I am saying majority of DPDK code use _t suffix for typedef:

> > > typedef void (*rte_rcu_qsbr_free_resource_t)(void *p, void *e);

> > Apologies, got it, will change.

> >

> > >

> > > >

> > > > >

> > > > > > +

> > > > > > +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> > > > > > +

> > > > > > +/**

> > > > > > + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> > > > > > + */

> > > > > > +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> > > > > > +

> > > > > > +/**

> > > > > > + *  Reclaim at the max 1/16th the total number of resources.

> > > > > > + */

> > > > > > +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> > > > >

> > > > >

> > > > > As I said above, I don't think these thresholds need to be hardcoded.

> > > > > In any case, there seems not much point to put them in the

> > > > > public header

> > > file.

> > > > >

> > > > > > +

> > > > > > +/**

> > > > > > + * Parameters used when creating the defer queue.

> > > > > > + */

> > > > > > +struct rte_rcu_qsbr_dq_parameters {

> > > > > > +	const char *name;

> > > > > > +	/**< Name of the queue. */

> > > > > > +	uint32_t size;

> > > > > > +	/**< Number of entries in queue. Typically, this will be

> > > > > > +	 *   the same as the maximum number of entries supported in

> the

> > > > > > +	 *   lock free data structure.

> > > > > > +	 *   Data structures with unbounded number of entries is not

> > > > > > +	 *   supported currently.

> > > > > > +	 */

> > > > > > +	uint32_t esize;

> > > > > > +	/**< Size (in bytes) of each element in the defer queue.

> > > > > > +	 *   This has to be multiple of 8B as the rte_ring APIs

> > > > > > +	 *   support 8B element sizes only.

> > > > > > +	 */

> > > > > > +	rte_rcu_qsbr_free_resource f;

> > > > > > +	/**< Function to call to free the resource. */

> > > > > > +	void *p;

> > > > >

> > > > > Style nit again - I like short names myself, but that seems a

> > > > > bit extreme... :) Might be at least:

> > > > > void (*reclaim)(void *, void *);

> > > > May be 'free_fn'?

> > > >

> > > > > void * reclaim_data;

> > > > > ?

> > > > This is the pointer to the data structure to free the resource

> > > > into. For ex: In

> > > LPM data structure, it will be pointer to LPM. 'reclaim_data'

> > > > does not convey the meaning correctly.

> > >

> > > Ok, please free to comeup with your own names.

> > > I just wanted to say that 'f' and 'p' are a bit an extreme for public API.

> > ok, this is the hardest thing to do 😊

> >

> > >

> > > >

> > > > >

> > > > > > +	/**< Pointer passed to the free function. Typically, this is the

> > > > > > +	 *   pointer to the data structure to which the resource to

> free

> > > > > > +	 *   belongs. This can be NULL.

> > > > > > +	 */

> > > > > > +	struct rte_rcu_qsbr *v;

> > > > >

> > > > > Does it need to be inside that struct?

> > > > > Might be better:

> > > > > rte_rcu_qsbr_dq_create(struct rte_rcu_qsbr *v, const struct

> > > > > rte_rcu_qsbr_dq_parameters *params);

> > > > The API takes a parameter structure as input anyway, why to add

> > > > another argument to the function? QSBR variable is also another

> parameter.

> > > >

> > > > >

> > > > > Another alternative: make both reclaim() and enqueue() to take v

> > > > > as a parameter.

> > > > But both of them need access to some of the parameters provided in

> > > > rte_rcu_qsbr_dq_create API. We would end up passing 2 arguments to

> > > > the

> > > functions.

> > >

> > > Pure stylish thing.

> > > From my perspective it just provides better visibility what is going in the

> code:

> > > For QSBR var 'v' create a new deferred queue.

> > > But no strong opinion here.

> > >

> > > >

> > > > >

> > > > > > +	/**< RCU QSBR variable to use for this defer queue */ };

> > > > > > +

> > > > > > +/* RTE defer queue structure.

> > > > > > + * This structure holds the defer queue. The defer queue is

> > > > > > +used to

> > > > > > + * hold the deleted entries from the data structure that are

> > > > > > +not

> > > > > > + * yet freed.

> > > > > > + */

> > > > > > +struct rte_rcu_qsbr_dq;

> > > > > > +

> > > > > >  /**

> > > > > >   * @warning

> > > > > >   * @b EXPERIMENTAL: this API may change without prior notice

> > > > > > @@

> > > > > > -648,6 +710,113 @@ __rte_experimental  int

> > > > > > rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);

> > > > > >

> > > > > > +/**

> > > > > > + * @warning

> > > > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > > > + *

> > > > > > + * Create a queue used to store the data structure elements

> > > > > > +that can

> > > > > > + * be freed later. This queue is referred to as 'defer queue'.

> > > > > > + *

> > > > > > + * @param params

> > > > > > + *   Parameters to create a defer queue.

> > > > > > + * @return

> > > > > > + *   On success - Valid pointer to defer queue

> > > > > > + *   On error - NULL

> > > > > > + *   Possible rte_errno codes are:

> > > > > > + *   - EINVAL - NULL parameters are passed

> > > > > > + *   - ENOMEM - Not enough memory

> > > > > > + */

> > > > > > +__rte_experimental

> > > > > > +struct rte_rcu_qsbr_dq *

> > > > > > +rte_rcu_qsbr_dq_create(const struct

> > > > > > +rte_rcu_qsbr_dq_parameters *params);

> > > > > > +

> > > > > > +/**

> > > > > > + * @warning

> > > > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > > > + *

> > > > > > + * Enqueue one resource to the defer queue and start the grace

> period.

> > > > > > + * The resource will be freed later after at least one grace

> > > > > > +period

> > > > > > + * is over.

> > > > > > + *

> > > > > > + * If the defer queue is full, it will attempt to reclaim resources.

> > > > > > + * It will also reclaim resources at regular intervals to

> > > > > > +avoid

> > > > > > + * the defer queue from growing too big.

> > > > > > + *

> > > > > > + * This API is not multi-thread safe. It is expected that the

> > > > > > +caller

> > > > > > + * provides multi-thread safety by locking a mutex or some other

> means.

> > > > > > + *

> > > > > > + * A lock free multi-thread writer algorithm could achieve

> > > > > > +multi-thread

> > > > > > + * safety by creating and using one defer queue per thread.

> > > > > > + *

> > > > > > + * @param dq

> > > > > > + *   Defer queue to allocate an entry from.

> > > > > > + * @param e

> > > > > > + *   Pointer to resource data to copy to the defer queue. The size of

> > > > > > + *   the data to copy is equal to the element size provided when the

> > > > > > + *   defer queue was created.

> > > > > > + * @return

> > > > > > + *   On success - 0

> > > > > > + *   On error - 1 with rte_errno set to

> > > > > > + *   - EINVAL - NULL parameters are passed

> > > > > > + *   - ENOSPC - Defer queue is full. This condition can not happen

> > > > > > + *		if the defer queue size is equal (or larger) than the

> > > > > > + *		number of elements in the data structure.

> > > > > > + */

> > > > > > +__rte_experimental

> > > > > > +int

> > > > > > +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> > > > > > +

> > > > > > +/**

> > > > > > + * @warning

> > > > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > > > + *

> > > > > > + * Reclaim resources from the defer queue.

> > > > > > + *

> > > > > > + * This API is not multi-thread safe. It is expected that the

> > > > > > +caller

> > > > > > + * provides multi-thread safety by locking a mutex or some other

> means.

> > > > > > + *

> > > > > > + * A lock free multi-thread writer algorithm could achieve

> > > > > > +multi-thread

> > > > > > + * safety by creating and using one defer queue per thread.

> > > > > > + *

> > > > > > + * @param dq

> > > > > > + *   Defer queue to reclaim an entry from.

> > > > > > + * @return

> > > > > > + *   On successful reclamation of at least 1 resource - 0

> > > > > > + *   On error - 1 with rte_errno set to

> > > > > > + *   - EINVAL - NULL parameters are passed

> > > > > > + *   - EAGAIN - None of the resources have completed at least 1

> grace

> > > > > period,

> > > > > > + *		try again.

> > > > > > + */

> > > > > > +__rte_experimental

> > > > > > +int

> > > > > > +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> > > > > > +

> > > > > > +/**

> > > > > > + * @warning

> > > > > > + * @b EXPERIMENTAL: this API may change without prior notice

> > > > > > + *

> > > > > > + * Delete a defer queue.

> > > > > > + *

> > > > > > + * It tries to reclaim all the resources on the defer queue.

> > > > > > + * If any of the resources have not completed the grace

> > > > > > +period

> > > > > > + * the reclamation stops and returns immediately. The rest of

> > > > > > + * the resources are not reclaimed and the defer queue is not

> > > > > > + * freed.

> > > > > > + *

> > > > > > + * @param dq

> > > > > > + *   Defer queue to delete.

> > > > > > + * @return

> > > > > > + *   On success - 0

> > > > > > + *   On error - 1

> > > > > > + *   Possible rte_errno codes are:

> > > > > > + *   - EINVAL - NULL parameters are passed

> > > > > > + *   - EAGAIN - Some of the resources have not completed at least 1

> > > grace

> > > > > > + *		period, try again.

> > > > > > + */

> > > > > > +__rte_experimental

> > > > > > +int

> > > > > > +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> > > > > > +

> > > > > >  #ifdef __cplusplus

> > > > > >  }

> > > > > >  #endif

> > > > > > diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > > > > b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > > > > new file mode 100644

> > > > > > index 000000000..2122bc36a

> > > > > > --- /dev/null

> > > > > > +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> > > > >

> > > > > Again style suggestion: as it is not public header - don't use

> > > > > rte_ prefix for naming.

> > > > > From my perspective - easier to relalize for reader what is

> > > > > public header, what is not.

> > > > Looks like the guidelines are not defined very well. I see one

> > > > private file with rte_ prefix. I see Stephen not using rte_

> > > > prefix. I do not have any

> > > preference. But, a consistent approach is required.

> > >

> > > That's just a suggestion.

> > > For me (and I hope for others) it would be a bit easier.

> > > When looking at the code for first time I had to look a t

> > > meson.build to check is it a public header or not.

> > > If the file doesn't have 'rte_' prefix, I assume that it is an

> > > internal one straightway.

> > > But , as you said, there is no exact guidelines here, so up to you to decide.

> > I think it makes sense to remove 'rte_' prefix. I will also change the file

> name to have '_private' suffix.

> > There are some inconsistencies in the existing code, will send a patch to

> correct them to follow this approach.

> >

> > >

> > > >

> > > > >

> > > > > > @@ -0,0 +1,46 @@

> > > > > > +/* SPDX-License-Identifier: BSD-3-Clause

> > > > > > + * Copyright (c) 2019 Arm Limited  */

> > > > > > +

> > > > > > +#ifndef _RTE_RCU_QSBR_PVT_H_

> > > > > > +#define _RTE_RCU_QSBR_PVT_H_

> > > > > > +

> > > > > > +/**

> > > > > > + * This file is private to the RCU library. It should not be

> > > > > > +included

> > > > > > + * by the user of this library.

> > > > > > + */

> > > > > > +

> > > > > > +#ifdef __cplusplus

> > > > > > +extern "C" {

> > > > > > +#endif

> > > > > > +

> > > > > > +#include "rte_rcu_qsbr.h"

> > > > > > +

> > > > > > +/* RTE defer queue structure.

> > > > > > + * This structure holds the defer queue. The defer queue is

> > > > > > +used to

> > > > > > + * hold the deleted entries from the data structure that are

> > > > > > +not

> > > > > > + * yet freed.

> > > > > > + */

> > > > > > +struct rte_rcu_qsbr_dq {

> > > > > > +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this

> queue.*/

> > > > > > +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> > > > > > +	uint32_t size;

> > > > > > +	/**< Number of elements in the defer queue */

> > > > > > +	uint32_t esize;

> > > > > > +	/**< Size (in bytes) of data stored on the defer queue */

> > > > > > +	rte_rcu_qsbr_free_resource f;

> > > > > > +	/**< Function to call to free the resource. */

> > > > > > +	void *p;

> > > > > > +	/**< Pointer passed to the free function. Typically, this is the

> > > > > > +	 *   pointer to the data structure to which the resource to

> free

> > > > > > +	 *   belongs.

> > > > > > +	 */

> > > > > > +	char e[0];

> > > > > > +	/**< Temporary storage to copy the defer queue element. */

> > > > >

> > > > > Do you really need 'e' at all?

> > > > > Can't it be just temporary stack variable?

> > > > Ok, will check.

> > > >

> > > > >

> > > > > > +};

> > > > > > +

> > > > > > +#ifdef __cplusplus

> > > > > > +}

> > > > > > +#endif

> > > > > > +

> > > > > > +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> > > > > > diff --git a/lib/librte_rcu/rte_rcu_version.map

> > > > > > b/lib/librte_rcu/rte_rcu_version.map

> > > > > > index f8b9ef2ab..dfac88a37 100644

> > > > > > --- a/lib/librte_rcu/rte_rcu_version.map

> > > > > > +++ b/lib/librte_rcu/rte_rcu_version.map

> > > > > > @@ -8,6 +8,10 @@ EXPERIMENTAL {

> > > > > >  	rte_rcu_qsbr_synchronize;

> > > > > >  	rte_rcu_qsbr_thread_register;

> > > > > >  	rte_rcu_qsbr_thread_unregister;

> > > > > > +	rte_rcu_qsbr_dq_create;

> > > > > > +	rte_rcu_qsbr_dq_enqueue;

> > > > > > +	rte_rcu_qsbr_dq_reclaim;

> > > > > > +	rte_rcu_qsbr_dq_delete;

> > > > > >

> > > > > >  	local: *;

> > > > > >  };

> > > > > > diff --git a/lib/meson.build b/lib/meson.build index

> > > > > > e5ff83893..0e1be8407 100644

> > > > > > --- a/lib/meson.build

> > > > > > +++ b/lib/meson.build

> > > > > > @@ -11,7 +11,9 @@

> > > > > >  libraries = [

> > > > > >  	'kvargs', # eal depends on kvargs

> > > > > >  	'eal', # everything depends on eal

> > > > > > -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > > > > > +	'ring',

> > > > > > +	'rcu', # rcu depends on ring

> > > > > > +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> > > > > >  	'cmdline',

> > > > > >  	'metrics', # bitrate/latency stats depends on this

> > > > > >  	'hash',    # efd depends on this

> > > > > > @@ -22,7 +24,7 @@ libraries = [

> > > > > >  	'gro', 'gso', 'ip_frag', 'jobstats',

> > > > > >  	'kni', 'latencystats', 'lpm', 'member',

> > > > > >  	'power', 'pdump', 'rawdev',

> > > > > > -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> > > > > > +	'reorder', 'sched', 'security', 'stack', 'vhost',

> > > > > >  	# ipsec lib depends on net, crypto and security

> > > > > >  	'ipsec',

> > > > > >  	# add pkt framework libs which use other libs from above

> > > > > > --

> > > > > > 2.17.1

Medvedkin, Vladimir Oct. 15, 2019, 4:48 p.m. UTC | #13

Hi Honnappa,

On 13/10/2019 04:02, Honnappa Nagarahalli wrote:
> Hi Vladimir,

> 	Apologies for the delayed response, I had to run few experiments.

>

> <snip>

>

>> Hi Honnappa,

>>

>> On 01/10/2019 07:29, Honnappa Nagarahalli wrote:

>>> Add resource reclamation APIs to make it simple for applications and

>>> libraries to integrate rte_rcu library.

>>>

>>> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

>>> Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

>>> ---

>>>    app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

>>>    lib/librte_rcu/meson.build         |   2 +

>>>    lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

>>>    lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

>>>    lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

>>>    lib/librte_rcu/rte_rcu_version.map |   4 +

>>>    lib/meson.build                    |   6 +-

>>>    7 files changed, 700 insertions(+), 3 deletions(-)

>>>    create mode 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

>>>

>>> diff --git a/app/test/test_rcu_qsbr.c b/app/test/test_rcu_qsbr.c index

>>> d1b9e46a2..3a6815243 100644

>>> --- a/app/test/test_rcu_qsbr.c

>>> +++ b/app/test/test_rcu_qsbr.c

>>> @@ -1,8 +1,9 @@

>>>    /* SPDX-License-Identifier: BSD-3-Clause

>>> - * Copyright (c) 2018 Arm Limited

>>> + * Copyright (c) 2019 Arm Limited

>>>     */

>>>

>>>    #include <stdio.h>

>>> +#include <string.h>

>>>    #include <rte_pause.h>

>>>    #include <rte_rcu_qsbr.h>

>>>    #include <rte_hash.h>

>>> @@ -33,6 +34,7 @@ static uint32_t *keys;

>>>    #define COUNTER_VALUE 4096

>>>    static uint32_t *hash_data[RTE_MAX_LCORE][TOTAL_ENTRY];

>>>    static uint8_t writer_done;

>>> +static uint8_t cb_failed;

>>>

>>>    static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];

>>>    struct rte_hash *h[RTE_MAX_LCORE];

>>> @@ -582,6 +584,269 @@ test_rcu_qsbr_thread_offline(void)

>>>    	return 0;

>>>    }

>>>

>>> +static void

>>> +rte_rcu_qsbr_test_free_resource(void *p, void *e) {

>>> +	if (p != NULL && e != NULL) {

>>> +		printf("%s: Test failed\n", __func__);

>>> +		cb_failed = 1;

>>> +	}

>>> +}

>>> +

>>> +/*

>>> + * rte_rcu_qsbr_dq_create: create a queue used to store the data

>>> +structure

>>> + * elements that can be freed later. This queue is referred to as 'defer

>> queue'.

>>> + */

>>> +static int

>>> +test_rcu_qsbr_dq_create(void)

>>> +{

>>> +	char rcu_dq_name[RTE_RING_NAMESIZE];

>>> +	struct rte_rcu_qsbr_dq_parameters params;

>>> +	struct rte_rcu_qsbr_dq *dq;

>>> +

>>> +	printf("\nTest rte_rcu_qsbr_dq_create()\n");

>>> +

>>> +	/* Pass invalid parameters */

>>> +	dq = rte_rcu_qsbr_dq_create(NULL);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

>>> +params");

>>> +

>>> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

>>> +params");

>>> +

>>> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

>>> +	params.name = rcu_dq_name;

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

>>> +params");

>>> +

>>> +	params.f = rte_rcu_qsbr_test_free_resource;

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

>>> +params");

>>> +

>>> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

>>> +	params.v = t[0];

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

>>> +params");

>>> +

>>> +	params.size = 1;

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

>>> +params");

>>> +

>>> +	params.esize = 3;

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

>>> +params");

>>> +

>>> +	/* Pass all valid parameters */

>>> +	params.esize = 16;

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

>> params");

>>> +	rte_rcu_qsbr_dq_delete(dq);

>>> +

>>> +	return 0;

>>> +}

>>> +

>>> +/*

>>> + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer queue,

>>> + * to be freed later after atleast one grace period is over.

>>> + */

>>> +static int

>>> +test_rcu_qsbr_dq_enqueue(void)

>>> +{

>>> +	int ret;

>>> +	uint64_t r;

>>> +	char rcu_dq_name[RTE_RING_NAMESIZE];

>>> +	struct rte_rcu_qsbr_dq_parameters params;

>>> +	struct rte_rcu_qsbr_dq *dq;

>>> +

>>> +	printf("\nTest rte_rcu_qsbr_dq_enqueue()\n");

>>> +

>>> +	/* Create a queue with simple parameters */

>>> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

>>> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

>>> +	params.name = rcu_dq_name;

>>> +	params.f = rte_rcu_qsbr_test_free_resource;

>>> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

>>> +	params.v = t[0];

>>> +	params.size = 1;

>>> +	params.esize = 16;

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

>>> +params");

>>> +

>>> +	/* Pass invalid parameters */

>>> +	ret = rte_rcu_qsbr_dq_enqueue(NULL, NULL);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid

>>> +params");

>>> +

>>> +	ret = rte_rcu_qsbr_dq_enqueue(dq, NULL);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid

>>> +params");

>>> +

>>> +	ret = rte_rcu_qsbr_dq_enqueue(NULL, &r);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid

>>> +params");

>>> +

>>> +	ret = rte_rcu_qsbr_dq_delete(dq);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 1), "dq delete valid

>> params");

>>> +

>>> +	return 0;

>>> +}

>>> +

>>> +/*

>>> + * rte_rcu_qsbr_dq_reclaim: Reclaim resources from the defer queue.

>>> + */

>>> +static int

>>> +test_rcu_qsbr_dq_reclaim(void)

>>> +{

>>> +	int ret;

>>> +

>>> +	printf("\nTest rte_rcu_qsbr_dq_reclaim()\n");

>>> +

>>> +	/* Pass invalid parameters */

>>> +	ret = rte_rcu_qsbr_dq_reclaim(NULL);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq reclaim invalid

>>> +params");

>>> +

>>> +	return 0;

>>> +}

>>> +

>>> +/*

>>> + * rte_rcu_qsbr_dq_delete: Delete a defer queue.

>>> + */

>>> +static int

>>> +test_rcu_qsbr_dq_delete(void)

>>> +{

>>> +	int ret;

>>> +	char rcu_dq_name[RTE_RING_NAMESIZE];

>>> +	struct rte_rcu_qsbr_dq_parameters params;

>>> +	struct rte_rcu_qsbr_dq *dq;

>>> +

>>> +	printf("\nTest rte_rcu_qsbr_dq_delete()\n");

>>> +

>>> +	/* Pass invalid parameters */

>>> +	ret = rte_rcu_qsbr_dq_delete(NULL);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq delete invalid

>>> +params");

>>> +

>>> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

>>> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

>>> +	params.name = rcu_dq_name;

>>> +	params.f = rte_rcu_qsbr_test_free_resource;

>>> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

>>> +	params.v = t[0];

>>> +	params.size = 1;

>>> +	params.esize = 16;

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

>> params");

>>> +	ret = rte_rcu_qsbr_dq_delete(dq);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid

>> params");

>>> +

>>> +	return 0;

>>> +}

>>> +

>>> +/*

>>> + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer queue,

>>> + * to be freed later after atleast one grace period is over.

>>> + */

>>> +static int

>>> +test_rcu_qsbr_dq_functional(int32_t size, int32_t esize) {

>>> +	int i, j, ret;

>>> +	char rcu_dq_name[RTE_RING_NAMESIZE];

>>> +	struct rte_rcu_qsbr_dq_parameters params;

>>> +	struct rte_rcu_qsbr_dq *dq;

>>> +	uint64_t *e;

>>> +	uint64_t sc = 200;

>>> +	int max_entries;

>>> +

>>> +	printf("\nTest rte_rcu_qsbr_dq_xxx functional tests()\n");

>>> +	printf("Size = %d, esize = %d\n", size, esize);

>>> +

>>> +	e = (uint64_t *)rte_zmalloc(NULL, esize, RTE_CACHE_LINE_SIZE);

>>> +	if (e == NULL)

>>> +		return 0;

>>> +	cb_failed = 0;

>>> +

>>> +	/* Initialize the RCU variable. No threads are registered */

>>> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

>>> +

>>> +	/* Create a queue with simple parameters */

>>> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

>>> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

>>> +	params.name = rcu_dq_name;

>>> +	params.f = rte_rcu_qsbr_test_free_resource;

>>> +	params.v = t[0];

>>> +	params.size = size;

>>> +	params.esize = esize;

>>> +	dq = rte_rcu_qsbr_dq_create(&params);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

>>> +params");

>>> +

>>> +	/* Given the size and esize, calculate the maximum number of entries

>>> +	 * that can be stored on the defer queue (look at the logic used

>>> +	 * in capacity calculation of rte_ring).

>>> +	 */

>>> +	max_entries = rte_align32pow2(((esize/8 + 1) * size) + 1);

>>> +	max_entries = (max_entries - 1)/(esize/8 + 1);

>>> +

>>> +	/* Enqueue few counters starting with the value 'sc' */

>>> +	/* The queue size will be rounded up to 2. The enqueue API also

>>> +	 * reclaims if the queue size is above certain limit. Since, there

>>> +	 * are no threads registered, reclamation succedes. Hence, it should

>>> +	 * be possible to enqueue more than the provided queue size.

>>> +	 */

>>> +	for (i = 0; i < 10; i++) {

>>> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

>>> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

>>> +			"dq enqueue functional");

>>> +		for (j = 0; j < esize/8; j++)

>>> +			e[j] = sc++;

>>> +	}

>>> +

>>> +	/* Register a thread on the RCU QSBR variable. Reclamation will not

>>> +	 * succeed. It should not be possible to enqueue more than the size

>>> +	 * number of resources.

>>> +	 */

>>> +	rte_rcu_qsbr_thread_register(t[0], 1);

>>> +	rte_rcu_qsbr_thread_online(t[0], 1);

>>> +

>>> +	for (i = 0; i < max_entries; i++) {

>>> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

>>> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

>>> +			"dq enqueue functional");

>>> +		for (j = 0; j < esize/8; j++)

>>> +			e[j] = sc++;

>>> +	}

>>> +

>>> +	/* Enqueue fails as queue is full */

>>> +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue

>> functional");

>>> +

>>> +	/* Delete should fail as there are elements in defer queue which

>>> +	 * cannot be reclaimed.

>>> +	 */

>>> +	ret = rte_rcu_qsbr_dq_delete(dq);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq delete valid

>> params");

>>> +

>>> +	/* Report quiescent state, enqueue should succeed */

>>> +	rte_rcu_qsbr_quiescent(t[0], 1);

>>> +	for (i = 0; i < max_entries; i++) {

>>> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

>>> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

>>> +			"dq enqueue functional");

>>> +		for (j = 0; j < esize/8; j++)

>>> +			e[j] = sc++;

>>> +	}

>>> +

>>> +	/* Queue is full */

>>> +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue

>> functional");

>>> +

>>> +	/* Report quiescent state, delete should succeed */

>>> +	rte_rcu_qsbr_quiescent(t[0], 1);

>>> +	ret = rte_rcu_qsbr_dq_delete(dq);

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid

>> params");

>>> +

>>> +	/* Validate that call back function did not return any error */

>>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((cb_failed == 1), "CB failed");

>>> +

>>> +	rte_free(e);

>>> +	return 0;

>>> +}

>>> +

>>>    /*

>>>     * rte_rcu_qsbr_dump: Dump status of a single QS variable to a file

>>>     */

>>> @@ -1025,6 +1290,18 @@ test_rcu_qsbr_main(void)

>>>    	if (test_rcu_qsbr_thread_offline() < 0)

>>>    		goto test_fail;

>>>

>>> +	if (test_rcu_qsbr_dq_create() < 0)

>>> +		goto test_fail;

>>> +

>>> +	if (test_rcu_qsbr_dq_reclaim() < 0)

>>> +		goto test_fail;

>>> +

>>> +	if (test_rcu_qsbr_dq_delete() < 0)

>>> +		goto test_fail;

>>> +

>>> +	if (test_rcu_qsbr_dq_enqueue() < 0)

>>> +		goto test_fail;

>>> +

>>>    	printf("\nFunctional tests\n");

>>>

>>>    	if (test_rcu_qsbr_sw_sv_3qs() < 0)

>>> @@ -1033,6 +1310,18 @@ test_rcu_qsbr_main(void)

>>>    	if (test_rcu_qsbr_mw_mv_mqs() < 0)

>>>    		goto test_fail;

>>>

>>> +	if (test_rcu_qsbr_dq_functional(1, 8) < 0)

>>> +		goto test_fail;

>>> +

>>> +	if (test_rcu_qsbr_dq_functional(2, 8) < 0)

>>> +		goto test_fail;

>>> +

>>> +	if (test_rcu_qsbr_dq_functional(303, 16) < 0)

>>> +		goto test_fail;

>>> +

>>> +	if (test_rcu_qsbr_dq_functional(7, 128) < 0)

>>> +		goto test_fail;

>>> +

>>>    	free_rcu();

>>>

>>>    	printf("\n");

>>> diff --git a/lib/librte_rcu/meson.build b/lib/librte_rcu/meson.build

>>> index 62920ba02..e280b29c1 100644

>>> --- a/lib/librte_rcu/meson.build

>>> +++ b/lib/librte_rcu/meson.build

>>> @@ -10,3 +10,5 @@ headers = files('rte_rcu_qsbr.h')

>>>    if cc.get_id() == 'clang' and dpdk_conf.get('RTE_ARCH_64') == false

>>>    	ext_deps += cc.find_library('atomic')

>>>    endif

>>> +

>>> +deps += ['ring']

>>> diff --git a/lib/librte_rcu/rte_rcu_qsbr.c

>>> b/lib/librte_rcu/rte_rcu_qsbr.c index ce7f93dd3..76814f50b 100644

>>> --- a/lib/librte_rcu/rte_rcu_qsbr.c

>>> +++ b/lib/librte_rcu/rte_rcu_qsbr.c

>>> @@ -21,6 +21,7 @@

>>>    #include <rte_errno.h>

>>>

>>>    #include "rte_rcu_qsbr.h"

>>> +#include "rte_rcu_qsbr_pvt.h"

>>>

>>>    /* Get the memory size of QSBR variable */

>>>    size_t

>>> @@ -267,6 +268,190 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr

>> *v)

>>>    	return 0;

>>>    }

>>>

>>> +/* Create a queue used to store the data structure elements that can

>>> + * be freed later. This queue is referred to as 'defer queue'.

>>> + */

>>> +struct rte_rcu_qsbr_dq *

>>> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

>>> +*params) {

>>> +	struct rte_rcu_qsbr_dq *dq;

>>> +	uint32_t qs_fifo_size;

>>> +

>>> +	if (params == NULL || params->f == NULL ||

>>> +		params->v == NULL || params->name == NULL ||

>>> +		params->size == 0 || params->esize == 0 ||

>>> +		(params->esize % 8 != 0)) {

>>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

>>> +			"%s(): Invalid input parameter\n", __func__);

>>> +		rte_errno = EINVAL;

>>> +

>>> +		return NULL;

>>> +	}

>>> +

>>> +	dq = rte_zmalloc(NULL,

>>> +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

>>> +		RTE_CACHE_LINE_SIZE);

>>> +	if (dq == NULL) {

>>> +		rte_errno = ENOMEM;

>>> +

>>> +		return NULL;

>>> +	}

>>> +

>>> +	/* round up qs_fifo_size to next power of two that is not less than

>>> +	 * max_size.

>>> +	 */

>>> +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

>>> +					* params->size) + 1);

>>> +	dq->r = rte_ring_create(params->name, qs_fifo_size,

>>> +					SOCKET_ID_ANY, 0);

>>> +	if (dq->r == NULL) {

>>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

>>> +			"%s(): defer queue create failed\n", __func__);

>>> +		rte_free(dq);

>>> +		return NULL;

>>> +	}

>>> +

>>> +	dq->v = params->v;

>>> +	dq->size = params->size;

>>> +	dq->esize = params->esize;

>>> +	dq->f = params->f;

>>> +	dq->p = params->p;

>>> +

>>> +	return dq;

>>> +}

>>> +

>>> +/* Enqueue one resource to the defer queue to free after the grace

>>> + * period is over.

>>> + */

>>> +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e) {

>>> +	uint64_t token;

>>> +	uint64_t *tmp;

>>> +	uint32_t i;

>>> +	uint32_t cur_size, free_size;

>>> +

>>> +	if (dq == NULL || e == NULL) {

>>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

>>> +			"%s(): Invalid input parameter\n", __func__);

>>> +		rte_errno = EINVAL;

>>> +

>>> +		return 1;

>>> +	}

>>> +

>>> +	/* Start the grace period */

>>> +	token = rte_rcu_qsbr_start(dq->v);

>>> +

>>> +	/* Reclaim resources if the queue is 1/8th full. This helps

>>> +	 * the queue from growing too large and allows time for reader

>>> +	 * threads to report their quiescent state.

>>> +	 */

>>> +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

>>> +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

>>> +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

>>> +			"%s(): Triggering reclamation\n", __func__);

>>> +		rte_rcu_qsbr_dq_reclaim(dq);

>>> +	}

>> There are two problems I see:

>>

>> 1. rte_rcu_qsbr_dq_reclaim() reclaims only 1/16 of the defer queue while it

>> triggers on 1/8. This means that there will always be 1/16 of non reclaimed

>> entries in the queue.

> There will be 'at least' 1/16 non-reclaimed entries.

Correct, that's what I meant :)
>   It could be more depending on the length of the grace period and the rate of deletion.


Right, the number of entries to reclaim depends on:

- grace period which is application specific

- cost of delete operation which is library (algorithm) specific

- rate of deletion which depends on runtime.

So it is very hard to predict how big should be threshold to trigger 
reclamation and how many entries should it reclaim.

> The trigger of 1/8 is used to give sufficient time for the readers to report their quiescent state. 1/16 is used to spread the load of reclamation across multiple calls and provide a upper bound on the cycles consumed.


1/16 of max entries to reclaim within single call can cost a lot. 
Moreover, it could have an impact on the readers through massive cache 
evictions.

Consider a set of routes from test_lpm_perf.c. To install all routes you 
need to have at least 65k tbl8 entries (now it has 2k). So when 
reclaiming, besides the costs of rte_rcu_qsbr_check(), you'll need to 
rewrite 4k cache lines.

So 1/16 of max entries is relatively big and it's better to spread this 
load across multiple calls.

>

>> 2. Number of entries to reclaim depend on dq->size. So,

>> rte_rcu_qsbr_dq_reclaim() could take a lot of cycles. For LPM library this

> That is true. It depends on dq->size (number of tbl8 groups). However, note that there is patch [1] which provides batch reclamation kind of behavior which reduces the cycles consumed by reclamation significantly.

>

> [1] https://patches.dpdk.org/patch/58960/

>

>> means that rte_lpm_delete() sometimes takes a long time.

> Agree, sometimes takes additional time. It is good to spread it over multiple calls.

Right, with batch reclamation we have here classic throughput vs latency 
problem. Either reclaiming big number of entries relatively infrequently 
spreading the cost of readers quiescent state check or reclaiming small 
amount of entries more often spending more cycles in average. I'd prefer 
latency here because as I mentioned earlier huge batches could have an 
impact on readers and lead to big difference in cost of delete().
>

>> So, my suggestions here would be

>>

>> - trigger rte_rcu_qsbr_dq_reclaim() with every enqueue

> Given that the LPM APIs are mainly for control plane, I would think that, the next time LPM API is called, the readers have completed the grace period. But if there are frequent updates, we might end up with empty reclaims which will waste cycles. IMO, this trigger should happen after at least few entries are in the queue.

>

>> - reclaim small amount of entries (could be configurable of creation time)

> Agree. I would keep it a smaller than the trigger amount knowing that the elements added right before the trigger might not have completed the grace period.

>

>> - provide API to trigger reclaim from the application manually.

> IMO, this will add additional complexity to the application. I agree that there will be special needs for some applications. I think those applications might have to implement their own methods using the base RCU APIs.

> Instead, as agreed in other threads, I suggest we expose the parameters (when to trigger and how much to reclaim) to the application as optional configurable parameters. i.e. if the application does not provide we can use default values. I think this should provide enough flexibility to the application.


Agree.

Regarding default values, one strategy could be:

- if reported threshold isn't set (i.e. is equal 0) then call reclaim 
with every enqueue (i.e. threshold == 1)

- if max_entries_to_reclaim isn't set then reclaim as much as we can


>>> +

>>> +	/* Check if there is space for atleast for 1 resource */

>>> +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

>>> +	if (!free_size) {

>>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

>>> +			"%s(): Defer queue is full\n", __func__);

>>> +		rte_errno = ENOSPC;

>>> +		return 1;

>>> +	}

>>> +

>>> +	/* Enqueue the resource */

>>> +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

>>> +

>>> +	/* The resource to enqueue needs to be a multiple of 64b

>>> +	 * due to the limitation of the rte_ring implementation.

>>> +	 */

>>> +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

>>> +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

>>> +

>>> +	return 0;

>>> +}

>>> +

>>> +/* Reclaim resources from the defer queue. */ int

>>> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

>>> +	uint32_t max_cnt;

>>> +	uint32_t cnt;

>>> +	void *token;

>>> +	uint64_t *tmp;

>>> +	uint32_t i;

>>> +

>>> +	if (dq == NULL) {

>>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

>>> +			"%s(): Invalid input parameter\n", __func__);

>>> +		rte_errno = EINVAL;

>>> +

>>> +		return 1;

>>> +	}

>>> +

>>> +	/* Anything to reclaim? */

>>> +	if (rte_ring_count(dq->r) == 0)

>>> +		return 0;

>>> +

>>> +	/* Reclaim at the max 1/16th the total number of entries. */

>>> +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

>>> +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

>>> +	cnt = 0;

>>> +

>>> +	/* Check reader threads quiescent state and reclaim resources */

>>> +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

>>> +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

>>> +			== 1)) {

>>> +		(void)rte_ring_sc_dequeue(dq->r, &token);

>>> +		/* The resource to dequeue needs to be a multiple of 64b

>>> +		 * due to the limitation of the rte_ring implementation.

>>> +		 */

>>> +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

>>> +			i++, tmp++)

>>> +			(void)rte_ring_sc_dequeue(dq->r,

>>> +					(void *)(uintptr_t)tmp);

>>> +		dq->f(dq->p, dq->e);

>>> +

>>> +		cnt++;

>>> +	}

>>> +

>>> +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

>>> +		"%s(): Reclaimed %u resources\n", __func__, cnt);

>>> +

>>> +	if (cnt == 0) {

>>> +		/* No resources were reclaimed */

>>> +		rte_errno = EAGAIN;

>>> +		return 1;

>>> +	}

>>> +

>>> +	return 0;

>>> +}

>>> +

>>> +/* Delete a defer queue. */

>>> +int

>>> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

>>> +	if (dq == NULL) {

>>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

>>> +			"%s(): Invalid input parameter\n", __func__);

>>> +		rte_errno = EINVAL;

>>> +

>>> +		return 1;

>>> +	}

>>> +

>>> +	/* Reclaim all the resources */

>>> +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

>>> +		/* Error number is already set by the reclaim API */

>>> +		return 1;

>>> +

>>> +	rte_ring_free(dq->r);

>>> +	rte_free(dq);

>>> +

>>> +	return 0;

>>> +}

>>> +

>>>    int rte_rcu_log_type;

>>>

>>>    RTE_INIT(rte_rcu_register)

>>> diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

>>> b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a 100644

>>> --- a/lib/librte_rcu/rte_rcu_qsbr.h

>>> +++ b/lib/librte_rcu/rte_rcu_qsbr.h

>>> @@ -34,6 +34,7 @@ extern "C" {

>>>    #include <rte_lcore.h>

>>>    #include <rte_debug.h>

>>>    #include <rte_atomic.h>

>>> +#include <rte_ring.h>

>>>

>>>    extern int rte_rcu_log_type;

>>>

>>> @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

>>>    	 */

>>>    } __rte_cache_aligned;

>>>

>>> +/**

>>> + * Call back function called to free the resources.

>>> + *

>>> + * @param p

>>> + *   Pointer provided while creating the defer queue

>>> + * @param e

>>> + *   Pointer to the resource data stored on the defer queue

>>> + *

>>> + * @return

>>> + *   None

>>> + */

>>> +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

>>> +

>>> +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

>>> +

>>> +/**

>>> + *  Trigger automatic reclamation after 1/8th the defer queue is full.

>>> + */

>>> +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

>>> +

>>> +/**

>>> + *  Reclaim at the max 1/16th the total number of resources.

>>> + */

>>> +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

>>> +

>>> +/**

>>> + * Parameters used when creating the defer queue.

>>> + */

>>> +struct rte_rcu_qsbr_dq_parameters {

>>> +	const char *name;

>>> +	/**< Name of the queue. */

>>> +	uint32_t size;

>>> +	/**< Number of entries in queue. Typically, this will be

>>> +	 *   the same as the maximum number of entries supported in the

>>> +	 *   lock free data structure.

>>> +	 *   Data structures with unbounded number of entries is not

>>> +	 *   supported currently.

>>> +	 */

>>> +	uint32_t esize;

>>> +	/**< Size (in bytes) of each element in the defer queue.

>>> +	 *   This has to be multiple of 8B as the rte_ring APIs

>>> +	 *   support 8B element sizes only.

>>> +	 */

>>> +	rte_rcu_qsbr_free_resource f;

>>> +	/**< Function to call to free the resource. */

>>> +	void *p;

>>> +	/**< Pointer passed to the free function. Typically, this is the

>>> +	 *   pointer to the data structure to which the resource to free

>>> +	 *   belongs. This can be NULL.

>>> +	 */

>>> +	struct rte_rcu_qsbr *v;

>>> +	/**< RCU QSBR variable to use for this defer queue */ };

>>> +

>>> +/* RTE defer queue structure.

>>> + * This structure holds the defer queue. The defer queue is used to

>>> + * hold the deleted entries from the data structure that are not

>>> + * yet freed.

>>> + */

>>> +struct rte_rcu_qsbr_dq;

>>> +

>>>    /**

>>>     * @warning

>>>     * @b EXPERIMENTAL: this API may change without prior notice @@

>>> -648,6 +710,113 @@ __rte_experimental

>>>    int

>>>    rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);

>>>

>>> +/**

>>> + * @warning

>>> + * @b EXPERIMENTAL: this API may change without prior notice

>>> + *

>>> + * Create a queue used to store the data structure elements that can

>>> + * be freed later. This queue is referred to as 'defer queue'.

>>> + *

>>> + * @param params

>>> + *   Parameters to create a defer queue.

>>> + * @return

>>> + *   On success - Valid pointer to defer queue

>>> + *   On error - NULL

>>> + *   Possible rte_errno codes are:

>>> + *   - EINVAL - NULL parameters are passed

>>> + *   - ENOMEM - Not enough memory

>>> + */

>>> +__rte_experimental

>>> +struct rte_rcu_qsbr_dq *

>>> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

>>> +*params);

>>> +

>>> +/**

>>> + * @warning

>>> + * @b EXPERIMENTAL: this API may change without prior notice

>>> + *

>>> + * Enqueue one resource to the defer queue and start the grace period.

>>> + * The resource will be freed later after at least one grace period

>>> + * is over.

>>> + *

>>> + * If the defer queue is full, it will attempt to reclaim resources.

>>> + * It will also reclaim resources at regular intervals to avoid

>>> + * the defer queue from growing too big.

>>> + *

>>> + * This API is not multi-thread safe. It is expected that the caller

>>> + * provides multi-thread safety by locking a mutex or some other means.

>>> + *

>>> + * A lock free multi-thread writer algorithm could achieve

>>> +multi-thread

>>> + * safety by creating and using one defer queue per thread.

>>> + *

>>> + * @param dq

>>> + *   Defer queue to allocate an entry from.

>>> + * @param e

>>> + *   Pointer to resource data to copy to the defer queue. The size of

>>> + *   the data to copy is equal to the element size provided when the

>>> + *   defer queue was created.

>>> + * @return

>>> + *   On success - 0

>>> + *   On error - 1 with rte_errno set to

>>> + *   - EINVAL - NULL parameters are passed

>>> + *   - ENOSPC - Defer queue is full. This condition can not happen

>>> + *		if the defer queue size is equal (or larger) than the

>>> + *		number of elements in the data structure.

>>> + */

>>> +__rte_experimental

>>> +int

>>> +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

>>> +

>>> +/**

>>> + * @warning

>>> + * @b EXPERIMENTAL: this API may change without prior notice

>>> + *

>>> + * Reclaim resources from the defer queue.

>>> + *

>>> + * This API is not multi-thread safe. It is expected that the caller

>>> + * provides multi-thread safety by locking a mutex or some other means.

>>> + *

>>> + * A lock free multi-thread writer algorithm could achieve

>>> +multi-thread

>>> + * safety by creating and using one defer queue per thread.

>>> + *

>>> + * @param dq

>>> + *   Defer queue to reclaim an entry from.

>>> + * @return

>>> + *   On successful reclamation of at least 1 resource - 0

>>> + *   On error - 1 with rte_errno set to

>>> + *   - EINVAL - NULL parameters are passed

>>> + *   - EAGAIN - None of the resources have completed at least 1 grace

>> period,

>>> + *		try again.

>>> + */

>>> +__rte_experimental

>>> +int

>>> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

>>> +

>>> +/**

>>> + * @warning

>>> + * @b EXPERIMENTAL: this API may change without prior notice

>>> + *

>>> + * Delete a defer queue.

>>> + *

>>> + * It tries to reclaim all the resources on the defer queue.

>>> + * If any of the resources have not completed the grace period

>>> + * the reclamation stops and returns immediately. The rest of

>>> + * the resources are not reclaimed and the defer queue is not

>>> + * freed.

>>> + *

>>> + * @param dq

>>> + *   Defer queue to delete.

>>> + * @return

>>> + *   On success - 0

>>> + *   On error - 1

>>> + *   Possible rte_errno codes are:

>>> + *   - EINVAL - NULL parameters are passed

>>> + *   - EAGAIN - Some of the resources have not completed at least 1 grace

>>> + *		period, try again.

>>> + */

>>> +__rte_experimental

>>> +int

>>> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

>>> +

>>>    #ifdef __cplusplus

>>>    }

>>>    #endif

>>> diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

>>> b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

>>> new file mode 100644

>>> index 000000000..2122bc36a

>>> --- /dev/null

>>> +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

>>> @@ -0,0 +1,46 @@

>>> +/* SPDX-License-Identifier: BSD-3-Clause

>>> + * Copyright (c) 2019 Arm Limited

>>> + */

>>> +

>>> +#ifndef _RTE_RCU_QSBR_PVT_H_

>>> +#define _RTE_RCU_QSBR_PVT_H_

>>> +

>>> +/**

>>> + * This file is private to the RCU library. It should not be included

>>> + * by the user of this library.

>>> + */

>>> +

>>> +#ifdef __cplusplus

>>> +extern "C" {

>>> +#endif

>>> +

>>> +#include "rte_rcu_qsbr.h"

>>> +

>>> +/* RTE defer queue structure.

>>> + * This structure holds the defer queue. The defer queue is used to

>>> + * hold the deleted entries from the data structure that are not

>>> + * yet freed.

>>> + */

>>> +struct rte_rcu_qsbr_dq {

>>> +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

>>> +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

>>> +	uint32_t size;

>>> +	/**< Number of elements in the defer queue */

>>> +	uint32_t esize;

>>> +	/**< Size (in bytes) of data stored on the defer queue */

>>> +	rte_rcu_qsbr_free_resource f;

>>> +	/**< Function to call to free the resource. */

>>> +	void *p;

>>> +	/**< Pointer passed to the free function. Typically, this is the

>>> +	 *   pointer to the data structure to which the resource to free

>>> +	 *   belongs.

>>> +	 */

>>> +	char e[0];

>>> +	/**< Temporary storage to copy the defer queue element. */ };

>>> +

>>> +#ifdef __cplusplus

>>> +}

>>> +#endif

>>> +

>>> +#endif /* _RTE_RCU_QSBR_PVT_H_ */

>>> diff --git a/lib/librte_rcu/rte_rcu_version.map

>>> b/lib/librte_rcu/rte_rcu_version.map

>>> index f8b9ef2ab..dfac88a37 100644

>>> --- a/lib/librte_rcu/rte_rcu_version.map

>>> +++ b/lib/librte_rcu/rte_rcu_version.map

>>> @@ -8,6 +8,10 @@ EXPERIMENTAL {

>>>    	rte_rcu_qsbr_synchronize;

>>>    	rte_rcu_qsbr_thread_register;

>>>    	rte_rcu_qsbr_thread_unregister;

>>> +	rte_rcu_qsbr_dq_create;

>>> +	rte_rcu_qsbr_dq_enqueue;

>>> +	rte_rcu_qsbr_dq_reclaim;

>>> +	rte_rcu_qsbr_dq_delete;

>>>

>>>    	local: *;

>>>    };

>>> diff --git a/lib/meson.build b/lib/meson.build index

>>> e5ff83893..0e1be8407 100644

>>> --- a/lib/meson.build

>>> +++ b/lib/meson.build

>>> @@ -11,7 +11,9 @@

>>>    libraries = [

>>>    	'kvargs', # eal depends on kvargs

>>>    	'eal', # everything depends on eal

>>> -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

>>> +	'ring',

>>> +	'rcu', # rcu depends on ring

>>> +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

>>>    	'cmdline',

>>>    	'metrics', # bitrate/latency stats depends on this

>>>    	'hash',    # efd depends on this

>>> @@ -22,7 +24,7 @@ libraries = [

>>>    	'gro', 'gso', 'ip_frag', 'jobstats',

>>>    	'kni', 'latencystats', 'lpm', 'member',

>>>    	'power', 'pdump', 'rawdev',

>>> -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

>>> +	'reorder', 'sched', 'security', 'stack', 'vhost',

>>>    	# ipsec lib depends on net, crypto and security

>>>    	'ipsec',

>>>    	# add pkt framework libs which use other libs from above

>> --

>> Regards,

>> Vladimir


-- 
Regards,
Vladimir

Honnappa Nagarahalli Oct. 18, 2019, 3:47 a.m. UTC | #14

<snip>

> 

> Hi Honnappa,

> 

> On 13/10/2019 04:02, Honnappa Nagarahalli wrote:

> > Hi Vladimir,

> > 	Apologies for the delayed response, I had to run few experiments.

> >

> > <snip>

> >

> >> Hi Honnappa,

> >>

> >> On 01/10/2019 07:29, Honnappa Nagarahalli wrote:

> >>> Add resource reclamation APIs to make it simple for applications and

> >>> libraries to integrate rte_rcu library.

> >>>

> >>> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> >>> Reviewed-by: Ola Liljedhal <ola.liljedhal@arm.com>

> >>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>

> >>> ---

> >>>    app/test/test_rcu_qsbr.c           | 291 ++++++++++++++++++++++++++++-

> >>>    lib/librte_rcu/meson.build         |   2 +

> >>>    lib/librte_rcu/rte_rcu_qsbr.c      | 185 ++++++++++++++++++

> >>>    lib/librte_rcu/rte_rcu_qsbr.h      | 169 +++++++++++++++++

> >>>    lib/librte_rcu/rte_rcu_qsbr_pvt.h  |  46 +++++

> >>>    lib/librte_rcu/rte_rcu_version.map |   4 +

> >>>    lib/meson.build                    |   6 +-

> >>>    7 files changed, 700 insertions(+), 3 deletions(-)

> >>>    create mode 100644 lib/librte_rcu/rte_rcu_qsbr_pvt.h

> >>>

> >>> diff --git a/app/test/test_rcu_qsbr.c b/app/test/test_rcu_qsbr.c

> >>> index

> >>> d1b9e46a2..3a6815243 100644

> >>> --- a/app/test/test_rcu_qsbr.c

> >>> +++ b/app/test/test_rcu_qsbr.c

> >>> @@ -1,8 +1,9 @@

> >>>    /* SPDX-License-Identifier: BSD-3-Clause

> >>> - * Copyright (c) 2018 Arm Limited

> >>> + * Copyright (c) 2019 Arm Limited

> >>>     */

> >>>

> >>>    #include <stdio.h>

> >>> +#include <string.h>

> >>>    #include <rte_pause.h>

> >>>    #include <rte_rcu_qsbr.h>

> >>>    #include <rte_hash.h>

> >>> @@ -33,6 +34,7 @@ static uint32_t *keys;

> >>>    #define COUNTER_VALUE 4096

> >>>    static uint32_t *hash_data[RTE_MAX_LCORE][TOTAL_ENTRY];

> >>>    static uint8_t writer_done;

> >>> +static uint8_t cb_failed;

> >>>

> >>>    static struct rte_rcu_qsbr *t[RTE_MAX_LCORE];

> >>>    struct rte_hash *h[RTE_MAX_LCORE]; @@ -582,6 +584,269 @@

> >>> test_rcu_qsbr_thread_offline(void)

> >>>    	return 0;

> >>>    }

> >>>

> >>> +static void

> >>> +rte_rcu_qsbr_test_free_resource(void *p, void *e) {

> >>> +	if (p != NULL && e != NULL) {

> >>> +		printf("%s: Test failed\n", __func__);

> >>> +		cb_failed = 1;

> >>> +	}

> >>> +}

> >>> +

> >>> +/*

> >>> + * rte_rcu_qsbr_dq_create: create a queue used to store the data

> >>> +structure

> >>> + * elements that can be freed later. This queue is referred to as

> >>> +'defer

> >> queue'.

> >>> + */

> >>> +static int

> >>> +test_rcu_qsbr_dq_create(void)

> >>> +{

> >>> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> >>> +	struct rte_rcu_qsbr_dq_parameters params;

> >>> +	struct rte_rcu_qsbr_dq *dq;

> >>> +

> >>> +	printf("\nTest rte_rcu_qsbr_dq_create()\n");

> >>> +

> >>> +	/* Pass invalid parameters */

> >>> +	dq = rte_rcu_qsbr_dq_create(NULL);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> >>> +params");

> >>> +

> >>> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> >>> +params");

> >>> +

> >>> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> >>> +	params.name = rcu_dq_name;

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> >>> +params");

> >>> +

> >>> +	params.f = rte_rcu_qsbr_test_free_resource;

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> >>> +params");

> >>> +

> >>> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> >>> +	params.v = t[0];

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> >>> +params");

> >>> +

> >>> +	params.size = 1;

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> >>> +params");

> >>> +

> >>> +	params.esize = 3;

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq != NULL), "dq create invalid

> >>> +params");

> >>> +

> >>> +	/* Pass all valid parameters */

> >>> +	params.esize = 16;

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

> >> params");

> >>> +	rte_rcu_qsbr_dq_delete(dq);

> >>> +

> >>> +	return 0;

> >>> +}

> >>> +

> >>> +/*

> >>> + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer

> >>> +queue,

> >>> + * to be freed later after atleast one grace period is over.

> >>> + */

> >>> +static int

> >>> +test_rcu_qsbr_dq_enqueue(void)

> >>> +{

> >>> +	int ret;

> >>> +	uint64_t r;

> >>> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> >>> +	struct rte_rcu_qsbr_dq_parameters params;

> >>> +	struct rte_rcu_qsbr_dq *dq;

> >>> +

> >>> +	printf("\nTest rte_rcu_qsbr_dq_enqueue()\n");

> >>> +

> >>> +	/* Create a queue with simple parameters */

> >>> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> >>> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> >>> +	params.name = rcu_dq_name;

> >>> +	params.f = rte_rcu_qsbr_test_free_resource;

> >>> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> >>> +	params.v = t[0];

> >>> +	params.size = 1;

> >>> +	params.esize = 16;

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

> >>> +params");

> >>> +

> >>> +	/* Pass invalid parameters */

> >>> +	ret = rte_rcu_qsbr_dq_enqueue(NULL, NULL);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid

> >>> +params");

> >>> +

> >>> +	ret = rte_rcu_qsbr_dq_enqueue(dq, NULL);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid

> >>> +params");

> >>> +

> >>> +	ret = rte_rcu_qsbr_dq_enqueue(NULL, &r);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue invalid

> >>> +params");

> >>> +

> >>> +	ret = rte_rcu_qsbr_dq_delete(dq);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 1), "dq delete valid

> >> params");

> >>> +

> >>> +	return 0;

> >>> +}

> >>> +

> >>> +/*

> >>> + * rte_rcu_qsbr_dq_reclaim: Reclaim resources from the defer queue.

> >>> + */

> >>> +static int

> >>> +test_rcu_qsbr_dq_reclaim(void)

> >>> +{

> >>> +	int ret;

> >>> +

> >>> +	printf("\nTest rte_rcu_qsbr_dq_reclaim()\n");

> >>> +

> >>> +	/* Pass invalid parameters */

> >>> +	ret = rte_rcu_qsbr_dq_reclaim(NULL);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq reclaim invalid

> >>> +params");

> >>> +

> >>> +	return 0;

> >>> +}

> >>> +

> >>> +/*

> >>> + * rte_rcu_qsbr_dq_delete: Delete a defer queue.

> >>> + */

> >>> +static int

> >>> +test_rcu_qsbr_dq_delete(void)

> >>> +{

> >>> +	int ret;

> >>> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> >>> +	struct rte_rcu_qsbr_dq_parameters params;

> >>> +	struct rte_rcu_qsbr_dq *dq;

> >>> +

> >>> +	printf("\nTest rte_rcu_qsbr_dq_delete()\n");

> >>> +

> >>> +	/* Pass invalid parameters */

> >>> +	ret = rte_rcu_qsbr_dq_delete(NULL);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 1), "dq delete invalid

> >>> +params");

> >>> +

> >>> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> >>> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> >>> +	params.name = rcu_dq_name;

> >>> +	params.f = rte_rcu_qsbr_test_free_resource;

> >>> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> >>> +	params.v = t[0];

> >>> +	params.size = 1;

> >>> +	params.esize = 16;

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

> >> params");

> >>> +	ret = rte_rcu_qsbr_dq_delete(dq);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid

> >> params");

> >>> +

> >>> +	return 0;

> >>> +}

> >>> +

> >>> +/*

> >>> + * rte_rcu_qsbr_dq_enqueue: enqueue one resource to the defer

> >>> +queue,

> >>> + * to be freed later after atleast one grace period is over.

> >>> + */

> >>> +static int

> >>> +test_rcu_qsbr_dq_functional(int32_t size, int32_t esize) {

> >>> +	int i, j, ret;

> >>> +	char rcu_dq_name[RTE_RING_NAMESIZE];

> >>> +	struct rte_rcu_qsbr_dq_parameters params;

> >>> +	struct rte_rcu_qsbr_dq *dq;

> >>> +	uint64_t *e;

> >>> +	uint64_t sc = 200;

> >>> +	int max_entries;

> >>> +

> >>> +	printf("\nTest rte_rcu_qsbr_dq_xxx functional tests()\n");

> >>> +	printf("Size = %d, esize = %d\n", size, esize);

> >>> +

> >>> +	e = (uint64_t *)rte_zmalloc(NULL, esize, RTE_CACHE_LINE_SIZE);

> >>> +	if (e == NULL)

> >>> +		return 0;

> >>> +	cb_failed = 0;

> >>> +

> >>> +	/* Initialize the RCU variable. No threads are registered */

> >>> +	rte_rcu_qsbr_init(t[0], RTE_MAX_LCORE);

> >>> +

> >>> +	/* Create a queue with simple parameters */

> >>> +	memset(&params, 0, sizeof(struct rte_rcu_qsbr_dq_parameters));

> >>> +	snprintf(rcu_dq_name, sizeof(rcu_dq_name), "TEST_RCU");

> >>> +	params.name = rcu_dq_name;

> >>> +	params.f = rte_rcu_qsbr_test_free_resource;

> >>> +	params.v = t[0];

> >>> +	params.size = size;

> >>> +	params.esize = esize;

> >>> +	dq = rte_rcu_qsbr_dq_create(&params);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((dq == NULL), "dq create valid

> >>> +params");

> >>> +

> >>> +	/* Given the size and esize, calculate the maximum number of entries

> >>> +	 * that can be stored on the defer queue (look at the logic used

> >>> +	 * in capacity calculation of rte_ring).

> >>> +	 */

> >>> +	max_entries = rte_align32pow2(((esize/8 + 1) * size) + 1);

> >>> +	max_entries = (max_entries - 1)/(esize/8 + 1);

> >>> +

> >>> +	/* Enqueue few counters starting with the value 'sc' */

> >>> +	/* The queue size will be rounded up to 2. The enqueue API also

> >>> +	 * reclaims if the queue size is above certain limit. Since, there

> >>> +	 * are no threads registered, reclamation succedes. Hence, it should

> >>> +	 * be possible to enqueue more than the provided queue size.

> >>> +	 */

> >>> +	for (i = 0; i < 10; i++) {

> >>> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> >>> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> >>> +			"dq enqueue functional");

> >>> +		for (j = 0; j < esize/8; j++)

> >>> +			e[j] = sc++;

> >>> +	}

> >>> +

> >>> +	/* Register a thread on the RCU QSBR variable. Reclamation will not

> >>> +	 * succeed. It should not be possible to enqueue more than the size

> >>> +	 * number of resources.

> >>> +	 */

> >>> +	rte_rcu_qsbr_thread_register(t[0], 1);

> >>> +	rte_rcu_qsbr_thread_online(t[0], 1);

> >>> +

> >>> +	for (i = 0; i < max_entries; i++) {

> >>> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> >>> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> >>> +			"dq enqueue functional");

> >>> +		for (j = 0; j < esize/8; j++)

> >>> +			e[j] = sc++;

> >>> +	}

> >>> +

> >>> +	/* Enqueue fails as queue is full */

> >>> +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue

> >> functional");

> >>> +

> >>> +	/* Delete should fail as there are elements in defer queue which

> >>> +	 * cannot be reclaimed.

> >>> +	 */

> >>> +	ret = rte_rcu_qsbr_dq_delete(dq);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq delete valid

> >> params");

> >>> +

> >>> +	/* Report quiescent state, enqueue should succeed */

> >>> +	rte_rcu_qsbr_quiescent(t[0], 1);

> >>> +	for (i = 0; i < max_entries; i++) {

> >>> +		ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> >>> +		TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0),

> >>> +			"dq enqueue functional");

> >>> +		for (j = 0; j < esize/8; j++)

> >>> +			e[j] = sc++;

> >>> +	}

> >>> +

> >>> +	/* Queue is full */

> >>> +	ret = rte_rcu_qsbr_dq_enqueue(dq, e);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret == 0), "dq enqueue

> >> functional");

> >>> +

> >>> +	/* Report quiescent state, delete should succeed */

> >>> +	rte_rcu_qsbr_quiescent(t[0], 1);

> >>> +	ret = rte_rcu_qsbr_dq_delete(dq);

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((ret != 0), "dq delete valid

> >> params");

> >>> +

> >>> +	/* Validate that call back function did not return any error */

> >>> +	TEST_RCU_QSBR_RETURN_IF_ERROR((cb_failed == 1), "CB failed");

> >>> +

> >>> +	rte_free(e);

> >>> +	return 0;

> >>> +}

> >>> +

> >>>    /*

> >>>     * rte_rcu_qsbr_dump: Dump status of a single QS variable to a file

> >>>     */

> >>> @@ -1025,6 +1290,18 @@ test_rcu_qsbr_main(void)

> >>>    	if (test_rcu_qsbr_thread_offline() < 0)

> >>>    		goto test_fail;

> >>>

> >>> +	if (test_rcu_qsbr_dq_create() < 0)

> >>> +		goto test_fail;

> >>> +

> >>> +	if (test_rcu_qsbr_dq_reclaim() < 0)

> >>> +		goto test_fail;

> >>> +

> >>> +	if (test_rcu_qsbr_dq_delete() < 0)

> >>> +		goto test_fail;

> >>> +

> >>> +	if (test_rcu_qsbr_dq_enqueue() < 0)

> >>> +		goto test_fail;

> >>> +

> >>>    	printf("\nFunctional tests\n");

> >>>

> >>>    	if (test_rcu_qsbr_sw_sv_3qs() < 0) @@ -1033,6 +1310,18 @@

> >>> test_rcu_qsbr_main(void)

> >>>    	if (test_rcu_qsbr_mw_mv_mqs() < 0)

> >>>    		goto test_fail;

> >>>

> >>> +	if (test_rcu_qsbr_dq_functional(1, 8) < 0)

> >>> +		goto test_fail;

> >>> +

> >>> +	if (test_rcu_qsbr_dq_functional(2, 8) < 0)

> >>> +		goto test_fail;

> >>> +

> >>> +	if (test_rcu_qsbr_dq_functional(303, 16) < 0)

> >>> +		goto test_fail;

> >>> +

> >>> +	if (test_rcu_qsbr_dq_functional(7, 128) < 0)

> >>> +		goto test_fail;

> >>> +

> >>>    	free_rcu();

> >>>

> >>>    	printf("\n");

> >>> diff --git a/lib/librte_rcu/meson.build b/lib/librte_rcu/meson.build

> >>> index 62920ba02..e280b29c1 100644

> >>> --- a/lib/librte_rcu/meson.build

> >>> +++ b/lib/librte_rcu/meson.build

> >>> @@ -10,3 +10,5 @@ headers = files('rte_rcu_qsbr.h')

> >>>    if cc.get_id() == 'clang' and dpdk_conf.get('RTE_ARCH_64') == false

> >>>    	ext_deps += cc.find_library('atomic')

> >>>    endif

> >>> +

> >>> +deps += ['ring']

> >>> diff --git a/lib/librte_rcu/rte_rcu_qsbr.c

> >>> b/lib/librte_rcu/rte_rcu_qsbr.c index ce7f93dd3..76814f50b 100644

> >>> --- a/lib/librte_rcu/rte_rcu_qsbr.c

> >>> +++ b/lib/librte_rcu/rte_rcu_qsbr.c

> >>> @@ -21,6 +21,7 @@

> >>>    #include <rte_errno.h>

> >>>

> >>>    #include "rte_rcu_qsbr.h"

> >>> +#include "rte_rcu_qsbr_pvt.h"

> >>>

> >>>    /* Get the memory size of QSBR variable */

> >>>    size_t

> >>> @@ -267,6 +268,190 @@ rte_rcu_qsbr_dump(FILE *f, struct

> rte_rcu_qsbr

> >> *v)

> >>>    	return 0;

> >>>    }

> >>>

> >>> +/* Create a queue used to store the data structure elements that

> >>> +can

> >>> + * be freed later. This queue is referred to as 'defer queue'.

> >>> + */

> >>> +struct rte_rcu_qsbr_dq *

> >>> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> >>> +*params) {

> >>> +	struct rte_rcu_qsbr_dq *dq;

> >>> +	uint32_t qs_fifo_size;

> >>> +

> >>> +	if (params == NULL || params->f == NULL ||

> >>> +		params->v == NULL || params->name == NULL ||

> >>> +		params->size == 0 || params->esize == 0 ||

> >>> +		(params->esize % 8 != 0)) {

> >>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> >>> +			"%s(): Invalid input parameter\n", __func__);

> >>> +		rte_errno = EINVAL;

> >>> +

> >>> +		return NULL;

> >>> +	}

> >>> +

> >>> +	dq = rte_zmalloc(NULL,

> >>> +		(sizeof(struct rte_rcu_qsbr_dq) + params->esize),

> >>> +		RTE_CACHE_LINE_SIZE);

> >>> +	if (dq == NULL) {

> >>> +		rte_errno = ENOMEM;

> >>> +

> >>> +		return NULL;

> >>> +	}

> >>> +

> >>> +	/* round up qs_fifo_size to next power of two that is not less than

> >>> +	 * max_size.

> >>> +	 */

> >>> +	qs_fifo_size = rte_align32pow2((((params->esize/8) + 1)

> >>> +					* params->size) + 1);

> >>> +	dq->r = rte_ring_create(params->name, qs_fifo_size,

> >>> +					SOCKET_ID_ANY, 0);

> >>> +	if (dq->r == NULL) {

> >>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> >>> +			"%s(): defer queue create failed\n", __func__);

> >>> +		rte_free(dq);

> >>> +		return NULL;

> >>> +	}

> >>> +

> >>> +	dq->v = params->v;

> >>> +	dq->size = params->size;

> >>> +	dq->esize = params->esize;

> >>> +	dq->f = params->f;

> >>> +	dq->p = params->p;

> >>> +

> >>> +	return dq;

> >>> +}

> >>> +

> >>> +/* Enqueue one resource to the defer queue to free after the grace

> >>> + * period is over.

> >>> + */

> >>> +int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e) {

> >>> +	uint64_t token;

> >>> +	uint64_t *tmp;

> >>> +	uint32_t i;

> >>> +	uint32_t cur_size, free_size;

> >>> +

> >>> +	if (dq == NULL || e == NULL) {

> >>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> >>> +			"%s(): Invalid input parameter\n", __func__);

> >>> +		rte_errno = EINVAL;

> >>> +

> >>> +		return 1;

> >>> +	}

> >>> +

> >>> +	/* Start the grace period */

> >>> +	token = rte_rcu_qsbr_start(dq->v);

> >>> +

> >>> +	/* Reclaim resources if the queue is 1/8th full. This helps

> >>> +	 * the queue from growing too large and allows time for reader

> >>> +	 * threads to report their quiescent state.

> >>> +	 */

> >>> +	cur_size = rte_ring_count(dq->r) / (dq->esize/8 + 1);

> >>> +	if (cur_size > (dq->size >> RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT)) {

> >>> +		rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> >>> +			"%s(): Triggering reclamation\n", __func__);

> >>> +		rte_rcu_qsbr_dq_reclaim(dq);

> >>> +	}

> >> There are two problems I see:

> >>

> >> 1. rte_rcu_qsbr_dq_reclaim() reclaims only 1/16 of the defer queue

> >> while it triggers on 1/8. This means that there will always be 1/16

> >> of non reclaimed entries in the queue.

> > There will be 'at least' 1/16 non-reclaimed entries.

> Correct, that's what I meant :)

> >   It could be more depending on the length of the grace period and the rate

> of deletion.

> 

> Right, the number of entries to reclaim depends on:

> 

> - grace period which is application specific

> 

> - cost of delete operation which is library (algorithm) specific

> 

> - rate of deletion which depends on runtime.

> 

> So it is very hard to predict how big should be threshold to trigger

> reclamation and how many entries should it reclaim.

> 

> > The trigger of 1/8 is used to give sufficient time for the readers to report

> their quiescent state. 1/16 is used to spread the load of reclamation across

> multiple calls and provide a upper bound on the cycles consumed.

> 

> 1/16 of max entries to reclaim within single call can cost a lot.

> Moreover, it could have an impact on the readers through massive cache

> evictions.

> 

> Consider a set of routes from test_lpm_perf.c. To install all routes you need

> to have at least 65k tbl8 entries (now it has 2k). So when reclaiming, besides

> the costs of rte_rcu_qsbr_check(), you'll need to rewrite 4k cache lines.

> 

> So 1/16 of max entries is relatively big and it's better to spread this load

> across multiple calls.

> 

> >

> >> 2. Number of entries to reclaim depend on dq->size. So,

> >> rte_rcu_qsbr_dq_reclaim() could take a lot of cycles. For LPM library

> >> this

> > That is true. It depends on dq->size (number of tbl8 groups). However, note

> that there is patch [1] which provides batch reclamation kind of behavior

> which reduces the cycles consumed by reclamation significantly.

> >

> > [1] https://patches.dpdk.org/patch/58960/

> >

> >> means that rte_lpm_delete() sometimes takes a long time.

> > Agree, sometimes takes additional time. It is good to spread it over multiple

> calls.

> Right, with batch reclamation we have here classic throughput vs latency

> problem. Either reclaiming big number of entries relatively infrequently

> spreading the cost of readers quiescent state check or reclaiming small

> amount of entries more often spending more cycles in average. I'd prefer

> latency here because as I mentioned earlier huge batches could have an

> impact on readers and lead to big difference in cost of delete().

> >

> >> So, my suggestions here would be

> >>

> >> - trigger rte_rcu_qsbr_dq_reclaim() with every enqueue

> > Given that the LPM APIs are mainly for control plane, I would think that,

> the next time LPM API is called, the readers have completed the grace period.

> But if there are frequent updates, we might end up with empty reclaims

> which will waste cycles. IMO, this trigger should happen after at least few

> entries are in the queue.

> >

> >> - reclaim small amount of entries (could be configurable of creation

> >> time)

> > Agree. I would keep it a smaller than the trigger amount knowing that the

> elements added right before the trigger might not have completed the grace

> period.

> >

> >> - provide API to trigger reclaim from the application manually.

> > IMO, this will add additional complexity to the application. I agree that

> there will be special needs for some applications. I think those applications

> might have to implement their own methods using the base RCU APIs.

> > Instead, as agreed in other threads, I suggest we expose the parameters

> (when to trigger and how much to reclaim) to the application as optional

> configurable parameters. i.e. if the application does not provide we can use

> default values. I think this should provide enough flexibility to the application.

> 

> Agree.

> 

> Regarding default values, one strategy could be:

> 

> - if reported threshold isn't set (i.e. is equal 0) then call reclaim with every

> enqueue (i.e. threshold == 1)

> 

> - if max_entries_to_reclaim isn't set then reclaim as much as we can

> 

Ok, sounds good.

> 

> >>> +

> >>> +	/* Check if there is space for atleast for 1 resource */

> >>> +	free_size = rte_ring_free_count(dq->r) / (dq->esize/8 + 1);

> >>> +	if (!free_size) {

> >>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> >>> +			"%s(): Defer queue is full\n", __func__);

> >>> +		rte_errno = ENOSPC;

> >>> +		return 1;

> >>> +	}

> >>> +

> >>> +	/* Enqueue the resource */

> >>> +	rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)token);

> >>> +

> >>> +	/* The resource to enqueue needs to be a multiple of 64b

> >>> +	 * due to the limitation of the rte_ring implementation.

> >>> +	 */

> >>> +	for (i = 0, tmp = (uint64_t *)e; i < dq->esize/8; i++, tmp++)

> >>> +		rte_ring_sp_enqueue(dq->r, (void *)(uintptr_t)*tmp);

> >>> +

> >>> +	return 0;

> >>> +}

> >>> +

> >>> +/* Reclaim resources from the defer queue. */ int

> >>> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq) {

> >>> +	uint32_t max_cnt;

> >>> +	uint32_t cnt;

> >>> +	void *token;

> >>> +	uint64_t *tmp;

> >>> +	uint32_t i;

> >>> +

> >>> +	if (dq == NULL) {

> >>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> >>> +			"%s(): Invalid input parameter\n", __func__);

> >>> +		rte_errno = EINVAL;

> >>> +

> >>> +		return 1;

> >>> +	}

> >>> +

> >>> +	/* Anything to reclaim? */

> >>> +	if (rte_ring_count(dq->r) == 0)

> >>> +		return 0;

> >>> +

> >>> +	/* Reclaim at the max 1/16th the total number of entries. */

> >>> +	max_cnt = dq->size >> RTE_RCU_QSBR_MAX_RECLAIM_LIMIT;

> >>> +	max_cnt = (max_cnt == 0) ? dq->size : max_cnt;

> >>> +	cnt = 0;

> >>> +

> >>> +	/* Check reader threads quiescent state and reclaim resources */

> >>> +	while ((cnt < max_cnt) && (rte_ring_peek(dq->r, &token) == 0) &&

> >>> +		(rte_rcu_qsbr_check(dq->v, (uint64_t)((uintptr_t)token), false)

> >>> +			== 1)) {

> >>> +		(void)rte_ring_sc_dequeue(dq->r, &token);

> >>> +		/* The resource to dequeue needs to be a multiple of 64b

> >>> +		 * due to the limitation of the rte_ring implementation.

> >>> +		 */

> >>> +		for (i = 0, tmp = (uint64_t *)dq->e; i < dq->esize/8;

> >>> +			i++, tmp++)

> >>> +			(void)rte_ring_sc_dequeue(dq->r,

> >>> +					(void *)(uintptr_t)tmp);

> >>> +		dq->f(dq->p, dq->e);

> >>> +

> >>> +		cnt++;

> >>> +	}

> >>> +

> >>> +	rte_log(RTE_LOG_INFO, rte_rcu_log_type,

> >>> +		"%s(): Reclaimed %u resources\n", __func__, cnt);

> >>> +

> >>> +	if (cnt == 0) {

> >>> +		/* No resources were reclaimed */

> >>> +		rte_errno = EAGAIN;

> >>> +		return 1;

> >>> +	}

> >>> +

> >>> +	return 0;

> >>> +}

> >>> +

> >>> +/* Delete a defer queue. */

> >>> +int

> >>> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq) {

> >>> +	if (dq == NULL) {

> >>> +		rte_log(RTE_LOG_ERR, rte_rcu_log_type,

> >>> +			"%s(): Invalid input parameter\n", __func__);

> >>> +		rte_errno = EINVAL;

> >>> +

> >>> +		return 1;

> >>> +	}

> >>> +

> >>> +	/* Reclaim all the resources */

> >>> +	if (rte_rcu_qsbr_dq_reclaim(dq) != 0)

> >>> +		/* Error number is already set by the reclaim API */

> >>> +		return 1;

> >>> +

> >>> +	rte_ring_free(dq->r);

> >>> +	rte_free(dq);

> >>> +

> >>> +	return 0;

> >>> +}

> >>> +

> >>>    int rte_rcu_log_type;

> >>>

> >>>    RTE_INIT(rte_rcu_register)

> >>> diff --git a/lib/librte_rcu/rte_rcu_qsbr.h

> >>> b/lib/librte_rcu/rte_rcu_qsbr.h index c80f15c00..185d4b50a 100644

> >>> --- a/lib/librte_rcu/rte_rcu_qsbr.h

> >>> +++ b/lib/librte_rcu/rte_rcu_qsbr.h

> >>> @@ -34,6 +34,7 @@ extern "C" {

> >>>    #include <rte_lcore.h>

> >>>    #include <rte_debug.h>

> >>>    #include <rte_atomic.h>

> >>> +#include <rte_ring.h>

> >>>

> >>>    extern int rte_rcu_log_type;

> >>>

> >>> @@ -109,6 +110,67 @@ struct rte_rcu_qsbr {

> >>>    	 */

> >>>    } __rte_cache_aligned;

> >>>

> >>> +/**

> >>> + * Call back function called to free the resources.

> >>> + *

> >>> + * @param p

> >>> + *   Pointer provided while creating the defer queue

> >>> + * @param e

> >>> + *   Pointer to the resource data stored on the defer queue

> >>> + *

> >>> + * @return

> >>> + *   None

> >>> + */

> >>> +typedef void (*rte_rcu_qsbr_free_resource)(void *p, void *e);

> >>> +

> >>> +#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE

> >>> +

> >>> +/**

> >>> + *  Trigger automatic reclamation after 1/8th the defer queue is full.

> >>> + */

> >>> +#define RTE_RCU_QSBR_AUTO_RECLAIM_LIMIT 3

> >>> +

> >>> +/**

> >>> + *  Reclaim at the max 1/16th the total number of resources.

> >>> + */

> >>> +#define RTE_RCU_QSBR_MAX_RECLAIM_LIMIT 4

> >>> +

> >>> +/**

> >>> + * Parameters used when creating the defer queue.

> >>> + */

> >>> +struct rte_rcu_qsbr_dq_parameters {

> >>> +	const char *name;

> >>> +	/**< Name of the queue. */

> >>> +	uint32_t size;

> >>> +	/**< Number of entries in queue. Typically, this will be

> >>> +	 *   the same as the maximum number of entries supported in the

> >>> +	 *   lock free data structure.

> >>> +	 *   Data structures with unbounded number of entries is not

> >>> +	 *   supported currently.

> >>> +	 */

> >>> +	uint32_t esize;

> >>> +	/**< Size (in bytes) of each element in the defer queue.

> >>> +	 *   This has to be multiple of 8B as the rte_ring APIs

> >>> +	 *   support 8B element sizes only.

> >>> +	 */

> >>> +	rte_rcu_qsbr_free_resource f;

> >>> +	/**< Function to call to free the resource. */

> >>> +	void *p;

> >>> +	/**< Pointer passed to the free function. Typically, this is the

> >>> +	 *   pointer to the data structure to which the resource to free

> >>> +	 *   belongs. This can be NULL.

> >>> +	 */

> >>> +	struct rte_rcu_qsbr *v;

> >>> +	/**< RCU QSBR variable to use for this defer queue */ };

> >>> +

> >>> +/* RTE defer queue structure.

> >>> + * This structure holds the defer queue. The defer queue is used to

> >>> + * hold the deleted entries from the data structure that are not

> >>> + * yet freed.

> >>> + */

> >>> +struct rte_rcu_qsbr_dq;

> >>> +

> >>>    /**

> >>>     * @warning

> >>>     * @b EXPERIMENTAL: this API may change without prior notice @@

> >>> -648,6 +710,113 @@ __rte_experimental

> >>>    int

> >>>    rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);

> >>>

> >>> +/**

> >>> + * @warning

> >>> + * @b EXPERIMENTAL: this API may change without prior notice

> >>> + *

> >>> + * Create a queue used to store the data structure elements that

> >>> +can

> >>> + * be freed later. This queue is referred to as 'defer queue'.

> >>> + *

> >>> + * @param params

> >>> + *   Parameters to create a defer queue.

> >>> + * @return

> >>> + *   On success - Valid pointer to defer queue

> >>> + *   On error - NULL

> >>> + *   Possible rte_errno codes are:

> >>> + *   - EINVAL - NULL parameters are passed

> >>> + *   - ENOMEM - Not enough memory

> >>> + */

> >>> +__rte_experimental

> >>> +struct rte_rcu_qsbr_dq *

> >>> +rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters

> >>> +*params);

> >>> +

> >>> +/**

> >>> + * @warning

> >>> + * @b EXPERIMENTAL: this API may change without prior notice

> >>> + *

> >>> + * Enqueue one resource to the defer queue and start the grace period.

> >>> + * The resource will be freed later after at least one grace period

> >>> + * is over.

> >>> + *

> >>> + * If the defer queue is full, it will attempt to reclaim resources.

> >>> + * It will also reclaim resources at regular intervals to avoid

> >>> + * the defer queue from growing too big.

> >>> + *

> >>> + * This API is not multi-thread safe. It is expected that the

> >>> +caller

> >>> + * provides multi-thread safety by locking a mutex or some other means.

> >>> + *

> >>> + * A lock free multi-thread writer algorithm could achieve

> >>> +multi-thread

> >>> + * safety by creating and using one defer queue per thread.

> >>> + *

> >>> + * @param dq

> >>> + *   Defer queue to allocate an entry from.

> >>> + * @param e

> >>> + *   Pointer to resource data to copy to the defer queue. The size of

> >>> + *   the data to copy is equal to the element size provided when the

> >>> + *   defer queue was created.

> >>> + * @return

> >>> + *   On success - 0

> >>> + *   On error - 1 with rte_errno set to

> >>> + *   - EINVAL - NULL parameters are passed

> >>> + *   - ENOSPC - Defer queue is full. This condition can not happen

> >>> + *		if the defer queue size is equal (or larger) than the

> >>> + *		number of elements in the data structure.

> >>> + */

> >>> +__rte_experimental

> >>> +int

> >>> +rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);

> >>> +

> >>> +/**

> >>> + * @warning

> >>> + * @b EXPERIMENTAL: this API may change without prior notice

> >>> + *

> >>> + * Reclaim resources from the defer queue.

> >>> + *

> >>> + * This API is not multi-thread safe. It is expected that the

> >>> +caller

> >>> + * provides multi-thread safety by locking a mutex or some other means.

> >>> + *

> >>> + * A lock free multi-thread writer algorithm could achieve

> >>> +multi-thread

> >>> + * safety by creating and using one defer queue per thread.

> >>> + *

> >>> + * @param dq

> >>> + *   Defer queue to reclaim an entry from.

> >>> + * @return

> >>> + *   On successful reclamation of at least 1 resource - 0

> >>> + *   On error - 1 with rte_errno set to

> >>> + *   - EINVAL - NULL parameters are passed

> >>> + *   - EAGAIN - None of the resources have completed at least 1 grace

> >> period,

> >>> + *		try again.

> >>> + */

> >>> +__rte_experimental

> >>> +int

> >>> +rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq);

> >>> +

> >>> +/**

> >>> + * @warning

> >>> + * @b EXPERIMENTAL: this API may change without prior notice

> >>> + *

> >>> + * Delete a defer queue.

> >>> + *

> >>> + * It tries to reclaim all the resources on the defer queue.

> >>> + * If any of the resources have not completed the grace period

> >>> + * the reclamation stops and returns immediately. The rest of

> >>> + * the resources are not reclaimed and the defer queue is not

> >>> + * freed.

> >>> + *

> >>> + * @param dq

> >>> + *   Defer queue to delete.

> >>> + * @return

> >>> + *   On success - 0

> >>> + *   On error - 1

> >>> + *   Possible rte_errno codes are:

> >>> + *   - EINVAL - NULL parameters are passed

> >>> + *   - EAGAIN - Some of the resources have not completed at least 1

> grace

> >>> + *		period, try again.

> >>> + */

> >>> +__rte_experimental

> >>> +int

> >>> +rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);

> >>> +

> >>>    #ifdef __cplusplus

> >>>    }

> >>>    #endif

> >>> diff --git a/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> >>> b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> >>> new file mode 100644

> >>> index 000000000..2122bc36a

> >>> --- /dev/null

> >>> +++ b/lib/librte_rcu/rte_rcu_qsbr_pvt.h

> >>> @@ -0,0 +1,46 @@

> >>> +/* SPDX-License-Identifier: BSD-3-Clause

> >>> + * Copyright (c) 2019 Arm Limited

> >>> + */

> >>> +

> >>> +#ifndef _RTE_RCU_QSBR_PVT_H_

> >>> +#define _RTE_RCU_QSBR_PVT_H_

> >>> +

> >>> +/**

> >>> + * This file is private to the RCU library. It should not be

> >>> +included

> >>> + * by the user of this library.

> >>> + */

> >>> +

> >>> +#ifdef __cplusplus

> >>> +extern "C" {

> >>> +#endif

> >>> +

> >>> +#include "rte_rcu_qsbr.h"

> >>> +

> >>> +/* RTE defer queue structure.

> >>> + * This structure holds the defer queue. The defer queue is used to

> >>> + * hold the deleted entries from the data structure that are not

> >>> + * yet freed.

> >>> + */

> >>> +struct rte_rcu_qsbr_dq {

> >>> +	struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/

> >>> +	struct rte_ring *r;     /**< RCU QSBR defer queue. */

> >>> +	uint32_t size;

> >>> +	/**< Number of elements in the defer queue */

> >>> +	uint32_t esize;

> >>> +	/**< Size (in bytes) of data stored on the defer queue */

> >>> +	rte_rcu_qsbr_free_resource f;

> >>> +	/**< Function to call to free the resource. */

> >>> +	void *p;

> >>> +	/**< Pointer passed to the free function. Typically, this is the

> >>> +	 *   pointer to the data structure to which the resource to free

> >>> +	 *   belongs.

> >>> +	 */

> >>> +	char e[0];

> >>> +	/**< Temporary storage to copy the defer queue element. */ };

> >>> +

> >>> +#ifdef __cplusplus

> >>> +}

> >>> +#endif

> >>> +

> >>> +#endif /* _RTE_RCU_QSBR_PVT_H_ */

> >>> diff --git a/lib/librte_rcu/rte_rcu_version.map

> >>> b/lib/librte_rcu/rte_rcu_version.map

> >>> index f8b9ef2ab..dfac88a37 100644

> >>> --- a/lib/librte_rcu/rte_rcu_version.map

> >>> +++ b/lib/librte_rcu/rte_rcu_version.map

> >>> @@ -8,6 +8,10 @@ EXPERIMENTAL {

> >>>    	rte_rcu_qsbr_synchronize;

> >>>    	rte_rcu_qsbr_thread_register;

> >>>    	rte_rcu_qsbr_thread_unregister;

> >>> +	rte_rcu_qsbr_dq_create;

> >>> +	rte_rcu_qsbr_dq_enqueue;

> >>> +	rte_rcu_qsbr_dq_reclaim;

> >>> +	rte_rcu_qsbr_dq_delete;

> >>>

> >>>    	local: *;

> >>>    };

> >>> diff --git a/lib/meson.build b/lib/meson.build index

> >>> e5ff83893..0e1be8407 100644

> >>> --- a/lib/meson.build

> >>> +++ b/lib/meson.build

> >>> @@ -11,7 +11,9 @@

> >>>    libraries = [

> >>>    	'kvargs', # eal depends on kvargs

> >>>    	'eal', # everything depends on eal

> >>> -	'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> >>> +	'ring',

> >>> +	'rcu', # rcu depends on ring

> >>> +	'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core

> >>>    	'cmdline',

> >>>    	'metrics', # bitrate/latency stats depends on this

> >>>    	'hash',    # efd depends on this

> >>> @@ -22,7 +24,7 @@ libraries = [

> >>>    	'gro', 'gso', 'ip_frag', 'jobstats',

> >>>    	'kni', 'latencystats', 'lpm', 'member',

> >>>    	'power', 'pdump', 'rawdev',

> >>> -	'rcu', 'reorder', 'sched', 'security', 'stack', 'vhost',

> >>> +	'reorder', 'sched', 'security', 'stack', 'vhost',

> >>>    	# ipsec lib depends on net, crypto and security

> >>>    	'ipsec',

> >>>    	# add pkt framework libs which use other libs from above

> >> --

> >> Regards,

> >> Vladimir

> 

> --

> Regards,

> Vladimir

[v3,2/3] lib/rcu: add resource reclamation APIs

Commit Message

Comments

Patch