[1/3] hash: deprecate lock ellision and read/write concurreny flags

Message ID 20181101045454.632-2-honnappa.nagarahalli@arm.com
State New
Headers show
Series
  • hash: deprecate lock ellision and read/write concurreny flags
Related show

Commit Message

Honnappa Nagarahalli Nov. 1, 2018, 4:54 a.m.
Hash library should provide read/write concurrency by default
as most of the use cases require read/write concurrency. Hence
the flag RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is deprecated.
The library will decide if locking is required to provide
the concurrency based on other configuration flags.

If a lock is used to provide the read/write concurrency, best
possible locking mechanism available on the platform should
be used. Hence, the flag RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT
is deprecated. The library will use transactional memory
if the platform supports it.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>

Reviewed-by: Gavin Hu <gavin.hu@arm.com>

---
 lib/librte_hash/rte_cuckoo_hash.c    | 319 ++++++++++++++++++++++++++-
 lib/librte_hash/rte_hash.h           |  22 +-
 lib/librte_hash/rte_hash_version.map |   7 +
 3 files changed, 345 insertions(+), 3 deletions(-)

-- 
2.17.1

Comments

Bruce Richardson Nov. 1, 2018, 9:45 a.m. | #1
On Wed, Oct 31, 2018 at 11:54:52PM -0500, Honnappa Nagarahalli wrote:
> Hash library should provide read/write concurrency by default

> as most of the use cases require read/write concurrency. Hence

> the flag RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is deprecated.

> The library will decide if locking is required to provide

> the concurrency based on other configuration flags.

> 

> If a lock is used to provide the read/write concurrency, best

> possible locking mechanism available on the platform should

> be used. Hence, the flag RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT

> is deprecated. The library will use transactional memory

> if the platform supports it.

> 

> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>

> Reviewed-by: Gavin Hu <gavin.hu@arm.com>

> ---

>  lib/librte_hash/rte_cuckoo_hash.c    | 319 ++++++++++++++++++++++++++-

>  lib/librte_hash/rte_hash.h           |  22 +-

>  lib/librte_hash/rte_hash_version.map |   7 +

>  3 files changed, 345 insertions(+), 3 deletions(-)

> 

> diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c

> index 5ddcccd87..a11de22be 100644

> --- a/lib/librte_hash/rte_cuckoo_hash.c

> +++ b/lib/librte_hash/rte_cuckoo_hash.c

> @@ -121,7 +121,7 @@ get_alt_bucket_index(const struct rte_hash *h,

>  }

>  

>  struct rte_hash *

> -rte_hash_create(const struct rte_hash_parameters *params)

> +rte_hash_create_v20(const struct rte_hash_parameters *params)

>  {

>  	struct rte_hash *h = NULL;

>  	struct rte_tailq_entry *te = NULL;

> @@ -446,6 +446,323 @@ rte_hash_create(const struct rte_hash_parameters *params)

>  	rte_free(tbl_chng_cnt);

>  	return NULL;

>  }

> +VERSION_SYMBOL(rte_hash_create, _v20, 2.0);

> +


To make reviewing this easier, can I ask if in V2 you can put the v18.11
function first, before the existing one. Hopefully that means that the "new"
function from git's point of view is the existing one, showing it as a
block add that we can pretty much skip reviewing. The benefit of this is that
the changes in the v1811 should then show as line-by-line diffs in the
patch, so we can easily review the changes made in the new case. It's hard
to spot them in the whole function below.

> +struct rte_hash *

> +rte_hash_create_v1811(const struct rte_hash_parameters *params)

> +{

> +	struct rte_hash *h = NULL;

> +	struct rte_tailq_entry *te = NULL;

> +	struct rte_hash_list *hash_list;

> +	struct rte_ring *r = NULL;

> +	struct rte_ring *r_ext = NULL;

> +	char hash_name[RTE_HASH_NAMESIZE];

> +	void *k = NULL;

> +	void *buckets = NULL;

> +	void *buckets_ext = NULL;

> +	char ring_name[RTE_RING_NAMESIZE];

> +	char ext_ring_name[RTE_RING_NAMESIZE];

> +	unsigned num_key_slots;

> +	unsigned i;

> +	unsigned int use_local_cache = 0;

> +	unsigned int ext_table_support = 0;

> +	unsigned int readwrite_concur_support = 1;

> +	unsigned int writer_takes_lock = 1;

> +	unsigned int no_free_on_del = 0;

> +	uint32_t *tbl_chng_cnt = NULL;

> +	unsigned int readwrite_concur_lf_support = 0;

> +

> +	rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;

> +

> +	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);

> +

> +	if (params == NULL) {

> +		RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");

> +		return NULL;

> +	}

> +

> +	/* Check for valid parameters */

> +	if ((params->entries > RTE_HASH_ENTRIES_MAX) ||

> +			(params->entries < RTE_HASH_BUCKET_ENTRIES) ||

> +			(params->key_len == 0)) {

> +		rte_errno = EINVAL;

> +		RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");

> +		return NULL;

> +	}

> +

> +	/* Validate correct usage of extra options */

> +	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&

> +	    (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {

> +		rte_errno = EINVAL;

> +		RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "

> +			"feature not supported with rw concurrency "

> +			"lock free\n");


Please don't split the error text across lines. If you put it on a line by
itself, hopefully checkpatch should not complain. If checkpatch does
complain about the long line, just ignore it!
[Yes, I know this is copied from original code, but since it appears as new
code in this patch, we should fix it]

> +		return NULL;

> +	}

> +

> +	/* Check extra flags field to check extra options. */

> +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)

> +		use_local_cache = 1;

> +

> +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)

> +		ext_table_support = 1;

> +

> +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)

> +		no_free_on_del = 1;

> +

> +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {

> +		/* Do not use lock for RW concurrency */

> +		readwrite_concur_support = 0;

> +		readwrite_concur_lf_support = 1;

> +		/* Enable not freeing internal memory/index on delete */

> +		no_free_on_del = 1;

> +	}

> +

> +	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&

> +	    !(params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD))

> +		writer_takes_lock = 0;

> +

> +	/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */

> +	if (use_local_cache)

> +		/*

> +		 * Increase number of slots by total number of indices

> +		 * that can be stored in the lcore caches

> +		 * except for the first cache

> +		 */

> +		num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *

> +					(LCORE_CACHE_SIZE - 1) + 1;

> +	else

> +		num_key_slots = params->entries + 1;

> +

> +	snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);

> +	/* Create ring (Dummy slot index is not enqueued) */

> +	r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),

> +			params->socket_id, 0);

> +	if (r == NULL) {

> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");

> +		goto err;

> +	}

> +

> +	const uint32_t num_buckets = rte_align32pow2(params->entries) /

> +						RTE_HASH_BUCKET_ENTRIES;

> +

> +	/* Create ring for extendable buckets. */

> +	if (ext_table_support) {

> +		snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",

> +								params->name);

> +		r_ext = rte_ring_create(ext_ring_name,

> +				rte_align32pow2(num_buckets + 1),

> +				params->socket_id, 0);

> +

> +		if (r_ext == NULL) {

> +			RTE_LOG(ERR, HASH, "ext buckets memory allocation "

> +								"failed\n");

> +			goto err;

> +		}

> +	}

> +

> +	snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);

> +

> +	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);

> +

> +	/* guarantee there's no existing: this is normally already checked

> +	 * by ring creation above */

> +	TAILQ_FOREACH(te, hash_list, next) {

> +		h = (struct rte_hash *) te->data;

> +		if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)

> +			break;

> +	}

> +	h = NULL;

> +	if (te != NULL) {

> +		rte_errno = EEXIST;

> +		te = NULL;

> +		goto err_unlock;

> +	}

> +

> +	te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);

> +	if (te == NULL) {

> +		RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");

> +		goto err_unlock;

> +	}

> +

> +	h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),

> +					RTE_CACHE_LINE_SIZE, params->socket_id);

> +

> +	if (h == NULL) {

> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");

> +		goto err_unlock;

> +	}

> +

> +	buckets = rte_zmalloc_socket(NULL,

> +				num_buckets * sizeof(struct rte_hash_bucket),

> +				RTE_CACHE_LINE_SIZE, params->socket_id);

> +

> +	if (buckets == NULL) {

> +		RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");

> +		goto err_unlock;

> +	}

> +

> +	/* Allocate same number of extendable buckets */

> +	if (ext_table_support) {

> +		buckets_ext = rte_zmalloc_socket(NULL,

> +				num_buckets * sizeof(struct rte_hash_bucket),

> +				RTE_CACHE_LINE_SIZE, params->socket_id);

> +		if (buckets_ext == NULL) {

> +			RTE_LOG(ERR, HASH, "ext buckets memory allocation "

> +							"failed\n");

> +			goto err_unlock;

> +		}

> +		/* Populate ext bkt ring. We reserve 0 similar to the

> +		 * key-data slot, just in case in future we want to

> +		 * use bucket index for the linked list and 0 means NULL

> +		 * for next bucket

> +		 */

> +		for (i = 1; i <= num_buckets; i++)

> +			rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));

> +	}

> +

> +	const uint32_t key_entry_size =

> +		RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,

> +			  KEY_ALIGNMENT);

> +	const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;

> +

> +	k = rte_zmalloc_socket(NULL, key_tbl_size,

> +			RTE_CACHE_LINE_SIZE, params->socket_id);

> +

> +	if (k == NULL) {

> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");

> +		goto err_unlock;

> +	}

> +

> +	tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),

> +			RTE_CACHE_LINE_SIZE, params->socket_id);

> +

> +	if (tbl_chng_cnt == NULL) {

> +		RTE_LOG(ERR, HASH, "memory allocation failed\n");

> +		goto err_unlock;

> +	}

> +

> +/*

> + * If x86 architecture is used, select appropriate compare function,

> + * which may use x86 intrinsics, otherwise use memcmp

> + */

> +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)

> +	/* Select function to compare keys */

> +	switch (params->key_len) {

> +	case 16:

> +		h->cmp_jump_table_idx = KEY_16_BYTES;

> +		break;

> +	case 32:

> +		h->cmp_jump_table_idx = KEY_32_BYTES;

> +		break;

> +	case 48:

> +		h->cmp_jump_table_idx = KEY_48_BYTES;

> +		break;

> +	case 64:

> +		h->cmp_jump_table_idx = KEY_64_BYTES;

> +		break;

> +	case 80:

> +		h->cmp_jump_table_idx = KEY_80_BYTES;

> +		break;

> +	case 96:

> +		h->cmp_jump_table_idx = KEY_96_BYTES;

> +		break;

> +	case 112:

> +		h->cmp_jump_table_idx = KEY_112_BYTES;

> +		break;

> +	case 128:

> +		h->cmp_jump_table_idx = KEY_128_BYTES;

> +		break;

> +	default:

> +		/* If key is not multiple of 16, use generic memcmp */

> +		h->cmp_jump_table_idx = KEY_OTHER_BYTES;

> +	}

> +#else

> +	h->cmp_jump_table_idx = KEY_OTHER_BYTES;

> +#endif

> +

> +	if (use_local_cache) {

> +		h->local_free_slots = rte_zmalloc_socket(NULL,

> +				sizeof(struct lcore_cache) * RTE_MAX_LCORE,

> +				RTE_CACHE_LINE_SIZE, params->socket_id);

> +	}

> +

> +	/* Default hash function */

> +#if defined(RTE_ARCH_X86)

> +	default_hash_func = (rte_hash_function)rte_hash_crc;

> +#elif defined(RTE_ARCH_ARM64)

> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))

> +		default_hash_func = (rte_hash_function)rte_hash_crc;

> +#endif

> +	/* Setup hash context */

> +	snprintf(h->name, sizeof(h->name), "%s", params->name);

> +	h->entries = params->entries;

> +	h->key_len = params->key_len;

> +	h->key_entry_size = key_entry_size;

> +	h->hash_func_init_val = params->hash_func_init_val;

> +

> +	h->num_buckets = num_buckets;

> +	h->bucket_bitmask = h->num_buckets - 1;

> +	h->buckets = buckets;

> +	h->buckets_ext = buckets_ext;

> +	h->free_ext_bkts = r_ext;

> +	h->hash_func = (params->hash_func == NULL) ?

> +		default_hash_func : params->hash_func;

> +	h->key_store = k;

> +	h->free_slots = r;

> +	h->tbl_chng_cnt = tbl_chng_cnt;

> +	*h->tbl_chng_cnt = 0;

> +	h->hw_trans_mem_support = rte_tm_supported();

> +	h->use_local_cache = use_local_cache;

> +	h->readwrite_concur_support = readwrite_concur_support;

> +	h->ext_table_support = ext_table_support;

> +	h->writer_takes_lock = writer_takes_lock;

> +	h->no_free_on_del = no_free_on_del;

> +	h->readwrite_concur_lf_support = readwrite_concur_lf_support;

> +

> +#if defined(RTE_ARCH_X86)

> +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))

> +		h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;

> +	else

> +#endif

> +		h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;

> +

> +	if (h->writer_takes_lock) {

> +		h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),

> +						RTE_CACHE_LINE_SIZE);

> +		if (h->readwrite_lock == NULL)

> +			goto err_unlock;

> +

> +		rte_rwlock_init(h->readwrite_lock);

> +	}

> +

> +	/* Populate free slots ring. Entry zero is reserved for key misses. */

> +	for (i = 1; i < num_key_slots; i++)

> +		rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));

> +

> +	te->data = (void *) h;

> +	TAILQ_INSERT_TAIL(hash_list, te, next);

> +	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);

> +

> +	return h;

> +err_unlock:

> +	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);

> +err:

> +	rte_ring_free(r);

> +	rte_ring_free(r_ext);

> +	rte_free(te);

> +	rte_free(h);

> +	rte_free(buckets);

> +	rte_free(buckets_ext);

> +	rte_free(k);

> +	rte_free(tbl_chng_cnt);

> +	return NULL;

> +}

> +BIND_DEFAULT_SYMBOL(rte_hash_create, _v1811, 18.11);

> +MAP_STATIC_SYMBOL(

> +	struct rte_hash *rte_hash_create(

> +		const struct rte_hash_parameters *params),

> +		rte_hash_create_v1811);

>  

>  void

>  rte_hash_free(struct rte_hash *h)

> diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h

> index c93d1a137..93c7019ec 100644

> --- a/lib/librte_hash/rte_hash.h

> +++ b/lib/librte_hash/rte_hash.h

> @@ -30,13 +30,27 @@ extern "C" {

>  #define RTE_HASH_LOOKUP_BULK_MAX		64

>  #define RTE_HASH_LOOKUP_MULTI_MAX		RTE_HASH_LOOKUP_BULK_MAX

>  

> -/** Enable Hardware transactional memory support. */

> +/**

> + * @deprecated

> + * This define will be removed in the next release.

> + * If the target platform supports hardware transactional memory

> + * it will be used without user consent as it provides the best possible

> + * performance.

> + *

> + * Enable Hardware transactional memory support.

> + */

>  #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT	0x01

>  

>  /** Default behavior of insertion, single writer/multi writer */

>  #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02

>  

> -/** Flag to support reader writer concurrency */

> +/**

> + * @deprecated

> + * This define will be removed in the next release.

> + * This library should be thread-safe by default.

> + *

> + * Flag to support reader writer concurrency

> + */

>  #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04

>  


Do we not need/want to add some new flags to disable these features, in
case there are cases where either RW concurrency, or transaction memory is
unwanted?

>  /** Flag to indicate the extendabe bucket table feature should be used */

> @@ -105,6 +119,10 @@ struct rte_hash;

>   */

>  struct rte_hash *

>  rte_hash_create(const struct rte_hash_parameters *params);

> +struct rte_hash *

> +rte_hash_create_v20(const struct rte_hash_parameters *params);

> +struct rte_hash *

> +rte_hash_create_v1811(const struct rte_hash_parameters *params);

>  

>  /**

>   * Set a new hash compare function other than the default one.

> diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map

> index 734ae28b0..c72469099 100644

> --- a/lib/librte_hash/rte_hash_version.map

> +++ b/lib/librte_hash/rte_hash_version.map

> @@ -54,6 +54,13 @@ DPDK_18.08 {

>  

>  } DPDK_16.07;

>  

> +DPDK_18.11 {

> +	global:

> +

> +	rte_hash_create;

> +

> +} DPDK_18.08;

> +

>  EXPERIMENTAL {

>  	global:

>  

> -- 

> 2.17.1

>
Bruce Richardson Nov. 1, 2018, 9:48 a.m. | #2
On Thu, Nov 01, 2018 at 09:45:31AM +0000, Bruce Richardson wrote:
> On Wed, Oct 31, 2018 at 11:54:52PM -0500, Honnappa Nagarahalli wrote:

> > Hash library should provide read/write concurrency by default

> > as most of the use cases require read/write concurrency. Hence

> > the flag RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY is deprecated.

> > The library will decide if locking is required to provide

> > the concurrency based on other configuration flags.

> > 

> > If a lock is used to provide the read/write concurrency, best

> > possible locking mechanism available on the platform should

> > be used. Hence, the flag RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT

> > is deprecated. The library will use transactional memory

> > if the platform supports it.

> > 

> > Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> > Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>

> > Reviewed-by: Gavin Hu <gavin.hu@arm.com>

> > ---

> >  lib/librte_hash/rte_cuckoo_hash.c    | 319 ++++++++++++++++++++++++++-

> >  lib/librte_hash/rte_hash.h           |  22 +-

> >  lib/librte_hash/rte_hash_version.map |   7 +

> >  3 files changed, 345 insertions(+), 3 deletions(-)

> > 

> > diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c

> > index 5ddcccd87..a11de22be 100644

> > --- a/lib/librte_hash/rte_cuckoo_hash.c

> > +++ b/lib/librte_hash/rte_cuckoo_hash.c

> > @@ -121,7 +121,7 @@ get_alt_bucket_index(const struct rte_hash *h,

> >  }

> >  

> >  struct rte_hash *

> > -rte_hash_create(const struct rte_hash_parameters *params)

> > +rte_hash_create_v20(const struct rte_hash_parameters *params)

> >  {

> >  	struct rte_hash *h = NULL;

> >  	struct rte_tailq_entry *te = NULL;

> > @@ -446,6 +446,323 @@ rte_hash_create(const struct rte_hash_parameters *params)

> >  	rte_free(tbl_chng_cnt);

> >  	return NULL;

> >  }

> > +VERSION_SYMBOL(rte_hash_create, _v20, 2.0);

> > +

> 

> To make reviewing this easier, can I ask if in V2 you can put the v18.11

> function first, before the existing one. Hopefully that means that the "new"

> function from git's point of view is the existing one, showing it as a

> block add that we can pretty much skip reviewing. The benefit of this is that

> the changes in the v1811 should then show as line-by-line diffs in the

> patch, so we can easily review the changes made in the new case. It's hard

> to spot them in the whole function below.

> 

> > +struct rte_hash *

> > +rte_hash_create_v1811(const struct rte_hash_parameters *params)

> > +{

> > +	struct rte_hash *h = NULL;

> > +	struct rte_tailq_entry *te = NULL;

> > +	struct rte_hash_list *hash_list;

> > +	struct rte_ring *r = NULL;

> > +	struct rte_ring *r_ext = NULL;

> > +	char hash_name[RTE_HASH_NAMESIZE];

> > +	void *k = NULL;

> > +	void *buckets = NULL;

> > +	void *buckets_ext = NULL;

> > +	char ring_name[RTE_RING_NAMESIZE];

> > +	char ext_ring_name[RTE_RING_NAMESIZE];

> > +	unsigned num_key_slots;

> > +	unsigned i;

> > +	unsigned int use_local_cache = 0;

> > +	unsigned int ext_table_support = 0;

> > +	unsigned int readwrite_concur_support = 1;

> > +	unsigned int writer_takes_lock = 1;

> > +	unsigned int no_free_on_del = 0;

> > +	uint32_t *tbl_chng_cnt = NULL;

> > +	unsigned int readwrite_concur_lf_support = 0;

> > +

> > +	rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;

> > +

> > +	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);

> > +

> > +	if (params == NULL) {

> > +		RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");

> > +		return NULL;

> > +	}

> > +

> > +	/* Check for valid parameters */

> > +	if ((params->entries > RTE_HASH_ENTRIES_MAX) ||

> > +			(params->entries < RTE_HASH_BUCKET_ENTRIES) ||

> > +			(params->key_len == 0)) {

> > +		rte_errno = EINVAL;

> > +		RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");

> > +		return NULL;

> > +	}

> > +

> > +	/* Validate correct usage of extra options */

> > +	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&

> > +	    (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {

> > +		rte_errno = EINVAL;

> > +		RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "

> > +			"feature not supported with rw concurrency "

> > +			"lock free\n");

> 

> Please don't split the error text across lines. If you put it on a line by

> itself, hopefully checkpatch should not complain. If checkpatch does

> complain about the long line, just ignore it!

> [Yes, I know this is copied from original code, but since it appears as new

> code in this patch, we should fix it]

> 

> > +		return NULL;

> > +	}

> > +

> > +	/* Check extra flags field to check extra options. */

> > +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)

> > +		use_local_cache = 1;

> > +

> > +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)

> > +		ext_table_support = 1;

> > +

> > +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)

> > +		no_free_on_del = 1;

> > +

> > +	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {

> > +		/* Do not use lock for RW concurrency */

> > +		readwrite_concur_support = 0;

> > +		readwrite_concur_lf_support = 1;

> > +		/* Enable not freeing internal memory/index on delete */

> > +		no_free_on_del = 1;

> > +	}

> > +

> > +	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&

> > +	    !(params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD))

> > +		writer_takes_lock = 0;

> > +

> > +	/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */

> > +	if (use_local_cache)

> > +		/*

> > +		 * Increase number of slots by total number of indices

> > +		 * that can be stored in the lcore caches

> > +		 * except for the first cache

> > +		 */

> > +		num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *

> > +					(LCORE_CACHE_SIZE - 1) + 1;

> > +	else

> > +		num_key_slots = params->entries + 1;

> > +

> > +	snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);

> > +	/* Create ring (Dummy slot index is not enqueued) */

> > +	r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),

> > +			params->socket_id, 0);

> > +	if (r == NULL) {

> > +		RTE_LOG(ERR, HASH, "memory allocation failed\n");

> > +		goto err;

> > +	}

> > +

> > +	const uint32_t num_buckets = rte_align32pow2(params->entries) /

> > +						RTE_HASH_BUCKET_ENTRIES;

> > +

> > +	/* Create ring for extendable buckets. */

> > +	if (ext_table_support) {

> > +		snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",

> > +								params->name);

> > +		r_ext = rte_ring_create(ext_ring_name,

> > +				rte_align32pow2(num_buckets + 1),

> > +				params->socket_id, 0);

> > +

> > +		if (r_ext == NULL) {

> > +			RTE_LOG(ERR, HASH, "ext buckets memory allocation "

> > +								"failed\n");

> > +			goto err;

> > +		}

> > +	}

> > +

> > +	snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);

> > +

> > +	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);

> > +

> > +	/* guarantee there's no existing: this is normally already checked

> > +	 * by ring creation above */

> > +	TAILQ_FOREACH(te, hash_list, next) {

> > +		h = (struct rte_hash *) te->data;

> > +		if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)

> > +			break;

> > +	}

> > +	h = NULL;

> > +	if (te != NULL) {

> > +		rte_errno = EEXIST;

> > +		te = NULL;

> > +		goto err_unlock;

> > +	}

> > +

> > +	te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);

> > +	if (te == NULL) {

> > +		RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");

> > +		goto err_unlock;

> > +	}

> > +

> > +	h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),

> > +					RTE_CACHE_LINE_SIZE, params->socket_id);

> > +

> > +	if (h == NULL) {

> > +		RTE_LOG(ERR, HASH, "memory allocation failed\n");

> > +		goto err_unlock;

> > +	}

> > +

> > +	buckets = rte_zmalloc_socket(NULL,

> > +				num_buckets * sizeof(struct rte_hash_bucket),

> > +				RTE_CACHE_LINE_SIZE, params->socket_id);

> > +

> > +	if (buckets == NULL) {

> > +		RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");

> > +		goto err_unlock;

> > +	}

> > +

> > +	/* Allocate same number of extendable buckets */

> > +	if (ext_table_support) {

> > +		buckets_ext = rte_zmalloc_socket(NULL,

> > +				num_buckets * sizeof(struct rte_hash_bucket),

> > +				RTE_CACHE_LINE_SIZE, params->socket_id);

> > +		if (buckets_ext == NULL) {

> > +			RTE_LOG(ERR, HASH, "ext buckets memory allocation "

> > +							"failed\n");

> > +			goto err_unlock;

> > +		}

> > +		/* Populate ext bkt ring. We reserve 0 similar to the

> > +		 * key-data slot, just in case in future we want to

> > +		 * use bucket index for the linked list and 0 means NULL

> > +		 * for next bucket

> > +		 */

> > +		for (i = 1; i <= num_buckets; i++)

> > +			rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));

> > +	}

> > +

> > +	const uint32_t key_entry_size =

> > +		RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,

> > +			  KEY_ALIGNMENT);

> > +	const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;

> > +

> > +	k = rte_zmalloc_socket(NULL, key_tbl_size,

> > +			RTE_CACHE_LINE_SIZE, params->socket_id);

> > +

> > +	if (k == NULL) {

> > +		RTE_LOG(ERR, HASH, "memory allocation failed\n");

> > +		goto err_unlock;

> > +	}

> > +

> > +	tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),

> > +			RTE_CACHE_LINE_SIZE, params->socket_id);

> > +

> > +	if (tbl_chng_cnt == NULL) {

> > +		RTE_LOG(ERR, HASH, "memory allocation failed\n");

> > +		goto err_unlock;

> > +	}

> > +

> > +/*

> > + * If x86 architecture is used, select appropriate compare function,

> > + * which may use x86 intrinsics, otherwise use memcmp

> > + */

> > +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)

> > +	/* Select function to compare keys */

> > +	switch (params->key_len) {

> > +	case 16:

> > +		h->cmp_jump_table_idx = KEY_16_BYTES;

> > +		break;

> > +	case 32:

> > +		h->cmp_jump_table_idx = KEY_32_BYTES;

> > +		break;

> > +	case 48:

> > +		h->cmp_jump_table_idx = KEY_48_BYTES;

> > +		break;

> > +	case 64:

> > +		h->cmp_jump_table_idx = KEY_64_BYTES;

> > +		break;

> > +	case 80:

> > +		h->cmp_jump_table_idx = KEY_80_BYTES;

> > +		break;

> > +	case 96:

> > +		h->cmp_jump_table_idx = KEY_96_BYTES;

> > +		break;

> > +	case 112:

> > +		h->cmp_jump_table_idx = KEY_112_BYTES;

> > +		break;

> > +	case 128:

> > +		h->cmp_jump_table_idx = KEY_128_BYTES;

> > +		break;

> > +	default:

> > +		/* If key is not multiple of 16, use generic memcmp */

> > +		h->cmp_jump_table_idx = KEY_OTHER_BYTES;

> > +	}

> > +#else

> > +	h->cmp_jump_table_idx = KEY_OTHER_BYTES;

> > +#endif

> > +

> > +	if (use_local_cache) {

> > +		h->local_free_slots = rte_zmalloc_socket(NULL,

> > +				sizeof(struct lcore_cache) * RTE_MAX_LCORE,

> > +				RTE_CACHE_LINE_SIZE, params->socket_id);

> > +	}

> > +

> > +	/* Default hash function */

> > +#if defined(RTE_ARCH_X86)

> > +	default_hash_func = (rte_hash_function)rte_hash_crc;

> > +#elif defined(RTE_ARCH_ARM64)

> > +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))

> > +		default_hash_func = (rte_hash_function)rte_hash_crc;

> > +#endif

> > +	/* Setup hash context */

> > +	snprintf(h->name, sizeof(h->name), "%s", params->name);

> > +	h->entries = params->entries;

> > +	h->key_len = params->key_len;

> > +	h->key_entry_size = key_entry_size;

> > +	h->hash_func_init_val = params->hash_func_init_val;

> > +

> > +	h->num_buckets = num_buckets;

> > +	h->bucket_bitmask = h->num_buckets - 1;

> > +	h->buckets = buckets;

> > +	h->buckets_ext = buckets_ext;

> > +	h->free_ext_bkts = r_ext;

> > +	h->hash_func = (params->hash_func == NULL) ?

> > +		default_hash_func : params->hash_func;

> > +	h->key_store = k;

> > +	h->free_slots = r;

> > +	h->tbl_chng_cnt = tbl_chng_cnt;

> > +	*h->tbl_chng_cnt = 0;

> > +	h->hw_trans_mem_support = rte_tm_supported();

> > +	h->use_local_cache = use_local_cache;

> > +	h->readwrite_concur_support = readwrite_concur_support;

> > +	h->ext_table_support = ext_table_support;

> > +	h->writer_takes_lock = writer_takes_lock;

> > +	h->no_free_on_del = no_free_on_del;

> > +	h->readwrite_concur_lf_support = readwrite_concur_lf_support;

> > +

> > +#if defined(RTE_ARCH_X86)

> > +	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))

> > +		h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;

> > +	else

> > +#endif

> > +		h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;

> > +

> > +	if (h->writer_takes_lock) {

> > +		h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),

> > +						RTE_CACHE_LINE_SIZE);

> > +		if (h->readwrite_lock == NULL)

> > +			goto err_unlock;

> > +

> > +		rte_rwlock_init(h->readwrite_lock);

> > +	}

> > +

> > +	/* Populate free slots ring. Entry zero is reserved for key misses. */

> > +	for (i = 1; i < num_key_slots; i++)

> > +		rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));

> > +

> > +	te->data = (void *) h;

> > +	TAILQ_INSERT_TAIL(hash_list, te, next);

> > +	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);

> > +

> > +	return h;

> > +err_unlock:

> > +	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);

> > +err:

> > +	rte_ring_free(r);

> > +	rte_ring_free(r_ext);

> > +	rte_free(te);

> > +	rte_free(h);

> > +	rte_free(buckets);

> > +	rte_free(buckets_ext);

> > +	rte_free(k);

> > +	rte_free(tbl_chng_cnt);

> > +	return NULL;

> > +}

> > +BIND_DEFAULT_SYMBOL(rte_hash_create, _v1811, 18.11);

> > +MAP_STATIC_SYMBOL(

> > +	struct rte_hash *rte_hash_create(

> > +		const struct rte_hash_parameters *params),

> > +		rte_hash_create_v1811);

> >  

> >  void

> >  rte_hash_free(struct rte_hash *h)

> > diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h

> > index c93d1a137..93c7019ec 100644

> > --- a/lib/librte_hash/rte_hash.h

> > +++ b/lib/librte_hash/rte_hash.h

> > @@ -30,13 +30,27 @@ extern "C" {

> >  #define RTE_HASH_LOOKUP_BULK_MAX		64

> >  #define RTE_HASH_LOOKUP_MULTI_MAX		RTE_HASH_LOOKUP_BULK_MAX

> >  

> > -/** Enable Hardware transactional memory support. */

> > +/**

> > + * @deprecated

> > + * This define will be removed in the next release.

> > + * If the target platform supports hardware transactional memory

> > + * it will be used without user consent as it provides the best possible

> > + * performance.

> > + *

> > + * Enable Hardware transactional memory support.

> > + */

> >  #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT	0x01

> >  

> >  /** Default behavior of insertion, single writer/multi writer */

> >  #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02

> >  

> > -/** Flag to support reader writer concurrency */

> > +/**

> > + * @deprecated

> > + * This define will be removed in the next release.

> > + * This library should be thread-safe by default.

> > + *

> > + * Flag to support reader writer concurrency

> > + */

> >  #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04

> >  

> 

> Do we not need/want to add some new flags to disable these features, in

> case there are cases where either RW concurrency, or transaction memory is

> unwanted?

> 

Commenting more on my own comment - I'm looking for suggestions here from
others. Perhaps we should take the YAGNI principle and not add in such
flags unless someone comes with a use-case that needs it. Having negative
flags where before we had positive could be confusing.
Honnappa Nagarahalli Nov. 1, 2018, 7:43 p.m. | #3
> >

> > diff --git a/lib/librte_hash/rte_cuckoo_hash.c

> > b/lib/librte_hash/rte_cuckoo_hash.c

> > index 5ddcccd87..a11de22be 100644

> > --- a/lib/librte_hash/rte_cuckoo_hash.c

> > +++ b/lib/librte_hash/rte_cuckoo_hash.c

> > @@ -121,7 +121,7 @@ get_alt_bucket_index(const struct rte_hash *h,  }

> >

> >  struct rte_hash *

> > -rte_hash_create(const struct rte_hash_parameters *params)

> > +rte_hash_create_v20(const struct rte_hash_parameters *params)

> >  {

> >  	struct rte_hash *h = NULL;

> >  	struct rte_tailq_entry *te = NULL;

> > @@ -446,6 +446,323 @@ rte_hash_create(const struct

> rte_hash_parameters *params)

> >  	rte_free(tbl_chng_cnt);

> >  	return NULL;

> >  }

> > +VERSION_SYMBOL(rte_hash_create, _v20, 2.0);

> > +

> 

> To make reviewing this easier, can I ask if in V2 you can put the v18.11

> function first, before the existing one. Hopefully that means that the "new"

> function from git's point of view is the existing one, showing it as a block

> add that we can pretty much skip reviewing. The benefit of this is that the

> changes in the v1811 should then show as line-by-line diffs in the patch, so

> we can easily review the changes made in the new case. It's hard to spot

> them in the whole function below.

> 

I agree it is painful to review. I tried what you suggested here, it still shows v18.11 function as a complete new block of code. I will create another commit in the series. The first commit will have just the exact duplication of the function (but renamed to v18.11). The next commit will have all the required changes for the v18.11 function and deprecation related changes. We can squash both of them once the review completes.
Let me know if there is a better solution.

> > +struct rte_hash *

> > +rte_hash_create_v1811(const struct rte_hash_parameters *params) {

> > +	struct rte_hash *h = NULL;

> > +	struct rte_tailq_entry *te = NULL;

> > +	struct rte_hash_list *hash_list;

> > +	struct rte_ring *r = NULL;

> > +	struct rte_ring *r_ext = NULL;

> > +	char hash_name[RTE_HASH_NAMESIZE];

> > +	void *k = NULL;

> > +	void *buckets = NULL;

> > +	void *buckets_ext = NULL;

> > +	char ring_name[RTE_RING_NAMESIZE];

> > +	char ext_ring_name[RTE_RING_NAMESIZE];

> > +	unsigned num_key_slots;

> > +	unsigned i;

> > +

> > +	/* Validate correct usage of extra options */

> > +	if ((params->extra_flag &

> RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&

> > +	    (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {

> > +		rte_errno = EINVAL;

> > +		RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "

> > +			"feature not supported with rw concurrency "

> > +			"lock free\n");

> 

> Please don't split the error text across lines. If you put it on a line by itself,

> hopefully checkpatch should not complain. If checkpatch does complain

> about the long line, just ignore it!

> [Yes, I know this is copied from original code, but since it appears as new

> code in this patch, we should fix it]

> 

Ok, I can do that. There are other check patch issues which I did not fix (thinking it is old code), will fix those as well.

> > +		return NULL;

> > +	}

> > +

> > +	/* Check extra flags field to check extra options. */

> > +	if (params->extra_flag &

> RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)

> > +		use_local_cache = 1;

> > +

> >

> >  void

> >  rte_hash_free(struct rte_hash *h)

> > diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h

> > index c93d1a137..93c7019ec 100644

> > --- a/lib/librte_hash/rte_hash.h

> > +++ b/lib/librte_hash/rte_hash.h

> > @@ -30,13 +30,27 @@ extern "C" {

> >  #define RTE_HASH_LOOKUP_BULK_MAX		64

> >  #define RTE_HASH_LOOKUP_MULTI_MAX

> 	RTE_HASH_LOOKUP_BULK_MAX

> >

> > -/** Enable Hardware transactional memory support. */

> > +/**

> > + * @deprecated

> > + * This define will be removed in the next release.

> > + * If the target platform supports hardware transactional memory

> > + * it will be used without user consent as it provides the best

> > +possible

> > + * performance.

> > + *

> > + * Enable Hardware transactional memory support.

> > + */

> >  #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT	0x01

> >

> >  /** Default behavior of insertion, single writer/multi writer */

> > #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02

> >

> > -/** Flag to support reader writer concurrency */

> > +/**

> > + * @deprecated

> > + * This define will be removed in the next release.

> > + * This library should be thread-safe by default.

> > + *

> > + * Flag to support reader writer concurrency  */

> >  #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04

> >

> 

> Do we not need/want to add some new flags to disable these features, in

> case there are cases where either RW concurrency, or transaction memory is

> unwanted?

> 

I cannot think of a case where RW concurrency is not required, except for single thread use case. In the single thread use case, the existing flags provide the most optimal solution. Hopefully, it will be clear from the changes.
I am not sure on if there is a case for no wanting transactional memory while using lock based RW concurrency. We have a flag for non-lock based RW concurrency in which case transactional memory will not be used.
IMO, I do not see a need for additional flags.
 
> >  /** Flag to indicate the extendabe bucket table feature should be

> > used */ @@ -105,6 +119,10 @@ struct rte_hash;

> >   */

> >  struct rte_hash *

> >  rte_hash_create(const struct rte_hash_parameters *params);

> > +struct rte_hash *

> > +rte_hash_create_v20(const struct rte_hash_parameters *params); struct

> > +rte_hash * rte_hash_create_v1811(const struct rte_hash_parameters

> > +*params);

> >

> >  /**

> >   * Set a new hash compare function other than the default one.

> > diff --git a/lib/librte_hash/rte_hash_version.map

> > b/lib/librte_hash/rte_hash_version.map

> > index 734ae28b0..c72469099 100644

> > --- a/lib/librte_hash/rte_hash_version.map

> > +++ b/lib/librte_hash/rte_hash_version.map

> > @@ -54,6 +54,13 @@ DPDK_18.08 {

> >

> >  } DPDK_16.07;

> >

> > +DPDK_18.11 {

> > +	global:

> > +

> > +	rte_hash_create;

> > +

> > +} DPDK_18.08;

> > +

> >  EXPERIMENTAL {

> >  	global:

> >

> > --

> > 2.17.1

> >
Bruce Richardson Nov. 2, 2018, 11:11 a.m. | #4
On Thu, Nov 01, 2018 at 07:43:29PM +0000, Honnappa Nagarahalli wrote:
> > >

> > > diff --git a/lib/librte_hash/rte_cuckoo_hash.c

> > > b/lib/librte_hash/rte_cuckoo_hash.c

> > > index 5ddcccd87..a11de22be 100644

> > > --- a/lib/librte_hash/rte_cuckoo_hash.c

> > > +++ b/lib/librte_hash/rte_cuckoo_hash.c

> > > @@ -121,7 +121,7 @@ get_alt_bucket_index(const struct rte_hash *h,  }

> > >

> > >  struct rte_hash *

> > > -rte_hash_create(const struct rte_hash_parameters *params)

> > > +rte_hash_create_v20(const struct rte_hash_parameters *params)

> > >  {

> > >  	struct rte_hash *h = NULL;

> > >  	struct rte_tailq_entry *te = NULL;

> > > @@ -446,6 +446,323 @@ rte_hash_create(const struct

> > rte_hash_parameters *params)

> > >  	rte_free(tbl_chng_cnt);

> > >  	return NULL;

> > >  }

> > > +VERSION_SYMBOL(rte_hash_create, _v20, 2.0);

> > > +

> > 

> > To make reviewing this easier, can I ask if in V2 you can put the v18.11

> > function first, before the existing one. Hopefully that means that the "new"

> > function from git's point of view is the existing one, showing it as a block

> > add that we can pretty much skip reviewing. The benefit of this is that the

> > changes in the v1811 should then show as line-by-line diffs in the patch, so

> > we can easily review the changes made in the new case. It's hard to spot

> > them in the whole function below.

> > 

> I agree it is painful to review. I tried what you suggested here, it still shows v18.11 function as a complete new block of code. I will create another commit in the series. The first commit will have just the exact duplication of the function (but renamed to v18.11). The next commit will have all the required changes for the v18.11 function and deprecation related changes. We can squash both of them once the review completes.

> Let me know if there is a better solution.


Thanks. I'll leave the squashing up to Thomas to decide, because even on
later review e.g. if we ever need to bisect, having the commits separate
could help.

> 

> > > +struct rte_hash *

> > > +rte_hash_create_v1811(const struct rte_hash_parameters *params) {

> > > +	struct rte_hash *h = NULL;

> > > +	struct rte_tailq_entry *te = NULL;

> > > +	struct rte_hash_list *hash_list;

> > > +	struct rte_ring *r = NULL;

> > > +	struct rte_ring *r_ext = NULL;

> > > +	char hash_name[RTE_HASH_NAMESIZE];

> > > +	void *k = NULL;

> > > +	void *buckets = NULL;

> > > +	void *buckets_ext = NULL;

> > > +	char ring_name[RTE_RING_NAMESIZE];

> > > +	char ext_ring_name[RTE_RING_NAMESIZE];

> > > +	unsigned num_key_slots;

> > > +	unsigned i;

> > > +

> > > +	/* Validate correct usage of extra options */

> > > +	if ((params->extra_flag &

> > RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&

> > > +	    (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {

> > > +		rte_errno = EINVAL;

> > > +		RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "

> > > +			"feature not supported with rw concurrency "

> > > +			"lock free\n");

> > 

> > Please don't split the error text across lines. If you put it on a line by itself,

> > hopefully checkpatch should not complain. If checkpatch does complain

> > about the long line, just ignore it!

> > [Yes, I know this is copied from original code, but since it appears as new

> > code in this patch, we should fix it]

> > 

> Ok, I can do that. There are other check patch issues which I did not fix (thinking it is old code), will fix those as well.

> 

> > > +		return NULL;

> > > +	}

> > > +

> > > +	/* Check extra flags field to check extra options. */

> > > +	if (params->extra_flag &

> > RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)

> > > +		use_local_cache = 1;

> > > +

> > >

> > >  void

> > >  rte_hash_free(struct rte_hash *h)

> > > diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h

> > > index c93d1a137..93c7019ec 100644

> > > --- a/lib/librte_hash/rte_hash.h

> > > +++ b/lib/librte_hash/rte_hash.h

> > > @@ -30,13 +30,27 @@ extern "C" {

> > >  #define RTE_HASH_LOOKUP_BULK_MAX		64

> > >  #define RTE_HASH_LOOKUP_MULTI_MAX

> > 	RTE_HASH_LOOKUP_BULK_MAX

> > >

> > > -/** Enable Hardware transactional memory support. */

> > > +/**

> > > + * @deprecated

> > > + * This define will be removed in the next release.

> > > + * If the target platform supports hardware transactional memory

> > > + * it will be used without user consent as it provides the best

> > > +possible

> > > + * performance.

> > > + *

> > > + * Enable Hardware transactional memory support.

> > > + */

> > >  #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT	0x01

> > >

> > >  /** Default behavior of insertion, single writer/multi writer */

> > > #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02

> > >

> > > -/** Flag to support reader writer concurrency */

> > > +/**

> > > + * @deprecated

> > > + * This define will be removed in the next release.

> > > + * This library should be thread-safe by default.

> > > + *

> > > + * Flag to support reader writer concurrency  */

> > >  #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04

> > >

> > 

> > Do we not need/want to add some new flags to disable these features, in

> > case there are cases where either RW concurrency, or transaction memory is

> > unwanted?

> > 

> I cannot think of a case where RW concurrency is not required, except for single thread use case. In the single thread use case, the existing flags provide the most optimal solution. Hopefully, it will be clear from the changes.

> I am not sure on if there is a case for no wanting transactional memory while using lock based RW concurrency. We have a flag for non-lock based RW concurrency in which case transactional memory will not be used.

> IMO, I do not see a need for additional flags.

>  


I think for testing purposes at least, we need an option to disable TSX.
When assessing performance impacting changes to the hash, the developer
needs to evaluate the impact in both TSX and non-TSX scenarios.

/Bruce

Patch

diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c
index 5ddcccd87..a11de22be 100644
--- a/lib/librte_hash/rte_cuckoo_hash.c
+++ b/lib/librte_hash/rte_cuckoo_hash.c
@@ -121,7 +121,7 @@  get_alt_bucket_index(const struct rte_hash *h,
 }
 
 struct rte_hash *
-rte_hash_create(const struct rte_hash_parameters *params)
+rte_hash_create_v20(const struct rte_hash_parameters *params)
 {
 	struct rte_hash *h = NULL;
 	struct rte_tailq_entry *te = NULL;
@@ -446,6 +446,323 @@  rte_hash_create(const struct rte_hash_parameters *params)
 	rte_free(tbl_chng_cnt);
 	return NULL;
 }
+VERSION_SYMBOL(rte_hash_create, _v20, 2.0);
+
+struct rte_hash *
+rte_hash_create_v1811(const struct rte_hash_parameters *params)
+{
+	struct rte_hash *h = NULL;
+	struct rte_tailq_entry *te = NULL;
+	struct rte_hash_list *hash_list;
+	struct rte_ring *r = NULL;
+	struct rte_ring *r_ext = NULL;
+	char hash_name[RTE_HASH_NAMESIZE];
+	void *k = NULL;
+	void *buckets = NULL;
+	void *buckets_ext = NULL;
+	char ring_name[RTE_RING_NAMESIZE];
+	char ext_ring_name[RTE_RING_NAMESIZE];
+	unsigned num_key_slots;
+	unsigned i;
+	unsigned int use_local_cache = 0;
+	unsigned int ext_table_support = 0;
+	unsigned int readwrite_concur_support = 1;
+	unsigned int writer_takes_lock = 1;
+	unsigned int no_free_on_del = 0;
+	uint32_t *tbl_chng_cnt = NULL;
+	unsigned int readwrite_concur_lf_support = 0;
+
+	rte_hash_function default_hash_func = (rte_hash_function)rte_jhash;
+
+	hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list);
+
+	if (params == NULL) {
+		RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n");
+		return NULL;
+	}
+
+	/* Check for valid parameters */
+	if ((params->entries > RTE_HASH_ENTRIES_MAX) ||
+			(params->entries < RTE_HASH_BUCKET_ENTRIES) ||
+			(params->key_len == 0)) {
+		rte_errno = EINVAL;
+		RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n");
+		return NULL;
+	}
+
+	/* Validate correct usage of extra options */
+	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
+	    (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)) {
+		rte_errno = EINVAL;
+		RTE_LOG(ERR, HASH, "rte_hash_create: extendable bucket "
+			"feature not supported with rw concurrency "
+			"lock free\n");
+		return NULL;
+	}
+
+	/* Check extra flags field to check extra options. */
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD)
+		use_local_cache = 1;
+
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_EXT_TABLE)
+		ext_table_support = 1;
+
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_NO_FREE_ON_DEL)
+		no_free_on_del = 1;
+
+	if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) {
+		/* Do not use lock for RW concurrency */
+		readwrite_concur_support = 0;
+		readwrite_concur_lf_support = 1;
+		/* Enable not freeing internal memory/index on delete */
+		no_free_on_del = 1;
+	}
+
+	if ((params->extra_flag & RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY_LF) &&
+	    !(params->extra_flag & RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD))
+		writer_takes_lock = 0;
+
+	/* Store all keys and leave the first entry as a dummy entry for lookup_bulk */
+	if (use_local_cache)
+		/*
+		 * Increase number of slots by total number of indices
+		 * that can be stored in the lcore caches
+		 * except for the first cache
+		 */
+		num_key_slots = params->entries + (RTE_MAX_LCORE - 1) *
+					(LCORE_CACHE_SIZE - 1) + 1;
+	else
+		num_key_slots = params->entries + 1;
+
+	snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name);
+	/* Create ring (Dummy slot index is not enqueued) */
+	r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots),
+			params->socket_id, 0);
+	if (r == NULL) {
+		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		goto err;
+	}
+
+	const uint32_t num_buckets = rte_align32pow2(params->entries) /
+						RTE_HASH_BUCKET_ENTRIES;
+
+	/* Create ring for extendable buckets. */
+	if (ext_table_support) {
+		snprintf(ext_ring_name, sizeof(ext_ring_name), "HT_EXT_%s",
+								params->name);
+		r_ext = rte_ring_create(ext_ring_name,
+				rte_align32pow2(num_buckets + 1),
+				params->socket_id, 0);
+
+		if (r_ext == NULL) {
+			RTE_LOG(ERR, HASH, "ext buckets memory allocation "
+								"failed\n");
+			goto err;
+		}
+	}
+
+	snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name);
+
+	rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK);
+
+	/* guarantee there's no existing: this is normally already checked
+	 * by ring creation above */
+	TAILQ_FOREACH(te, hash_list, next) {
+		h = (struct rte_hash *) te->data;
+		if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0)
+			break;
+	}
+	h = NULL;
+	if (te != NULL) {
+		rte_errno = EEXIST;
+		te = NULL;
+		goto err_unlock;
+	}
+
+	te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0);
+	if (te == NULL) {
+		RTE_LOG(ERR, HASH, "tailq entry allocation failed\n");
+		goto err_unlock;
+	}
+
+	h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash),
+					RTE_CACHE_LINE_SIZE, params->socket_id);
+
+	if (h == NULL) {
+		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		goto err_unlock;
+	}
+
+	buckets = rte_zmalloc_socket(NULL,
+				num_buckets * sizeof(struct rte_hash_bucket),
+				RTE_CACHE_LINE_SIZE, params->socket_id);
+
+	if (buckets == NULL) {
+		RTE_LOG(ERR, HASH, "buckets memory allocation failed\n");
+		goto err_unlock;
+	}
+
+	/* Allocate same number of extendable buckets */
+	if (ext_table_support) {
+		buckets_ext = rte_zmalloc_socket(NULL,
+				num_buckets * sizeof(struct rte_hash_bucket),
+				RTE_CACHE_LINE_SIZE, params->socket_id);
+		if (buckets_ext == NULL) {
+			RTE_LOG(ERR, HASH, "ext buckets memory allocation "
+							"failed\n");
+			goto err_unlock;
+		}
+		/* Populate ext bkt ring. We reserve 0 similar to the
+		 * key-data slot, just in case in future we want to
+		 * use bucket index for the linked list and 0 means NULL
+		 * for next bucket
+		 */
+		for (i = 1; i <= num_buckets; i++)
+			rte_ring_sp_enqueue(r_ext, (void *)((uintptr_t) i));
+	}
+
+	const uint32_t key_entry_size =
+		RTE_ALIGN(sizeof(struct rte_hash_key) + params->key_len,
+			  KEY_ALIGNMENT);
+	const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots;
+
+	k = rte_zmalloc_socket(NULL, key_tbl_size,
+			RTE_CACHE_LINE_SIZE, params->socket_id);
+
+	if (k == NULL) {
+		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		goto err_unlock;
+	}
+
+	tbl_chng_cnt = rte_zmalloc_socket(NULL, sizeof(uint32_t),
+			RTE_CACHE_LINE_SIZE, params->socket_id);
+
+	if (tbl_chng_cnt == NULL) {
+		RTE_LOG(ERR, HASH, "memory allocation failed\n");
+		goto err_unlock;
+	}
+
+/*
+ * If x86 architecture is used, select appropriate compare function,
+ * which may use x86 intrinsics, otherwise use memcmp
+ */
+#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64)
+	/* Select function to compare keys */
+	switch (params->key_len) {
+	case 16:
+		h->cmp_jump_table_idx = KEY_16_BYTES;
+		break;
+	case 32:
+		h->cmp_jump_table_idx = KEY_32_BYTES;
+		break;
+	case 48:
+		h->cmp_jump_table_idx = KEY_48_BYTES;
+		break;
+	case 64:
+		h->cmp_jump_table_idx = KEY_64_BYTES;
+		break;
+	case 80:
+		h->cmp_jump_table_idx = KEY_80_BYTES;
+		break;
+	case 96:
+		h->cmp_jump_table_idx = KEY_96_BYTES;
+		break;
+	case 112:
+		h->cmp_jump_table_idx = KEY_112_BYTES;
+		break;
+	case 128:
+		h->cmp_jump_table_idx = KEY_128_BYTES;
+		break;
+	default:
+		/* If key is not multiple of 16, use generic memcmp */
+		h->cmp_jump_table_idx = KEY_OTHER_BYTES;
+	}
+#else
+	h->cmp_jump_table_idx = KEY_OTHER_BYTES;
+#endif
+
+	if (use_local_cache) {
+		h->local_free_slots = rte_zmalloc_socket(NULL,
+				sizeof(struct lcore_cache) * RTE_MAX_LCORE,
+				RTE_CACHE_LINE_SIZE, params->socket_id);
+	}
+
+	/* Default hash function */
+#if defined(RTE_ARCH_X86)
+	default_hash_func = (rte_hash_function)rte_hash_crc;
+#elif defined(RTE_ARCH_ARM64)
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32))
+		default_hash_func = (rte_hash_function)rte_hash_crc;
+#endif
+	/* Setup hash context */
+	snprintf(h->name, sizeof(h->name), "%s", params->name);
+	h->entries = params->entries;
+	h->key_len = params->key_len;
+	h->key_entry_size = key_entry_size;
+	h->hash_func_init_val = params->hash_func_init_val;
+
+	h->num_buckets = num_buckets;
+	h->bucket_bitmask = h->num_buckets - 1;
+	h->buckets = buckets;
+	h->buckets_ext = buckets_ext;
+	h->free_ext_bkts = r_ext;
+	h->hash_func = (params->hash_func == NULL) ?
+		default_hash_func : params->hash_func;
+	h->key_store = k;
+	h->free_slots = r;
+	h->tbl_chng_cnt = tbl_chng_cnt;
+	*h->tbl_chng_cnt = 0;
+	h->hw_trans_mem_support = rte_tm_supported();
+	h->use_local_cache = use_local_cache;
+	h->readwrite_concur_support = readwrite_concur_support;
+	h->ext_table_support = ext_table_support;
+	h->writer_takes_lock = writer_takes_lock;
+	h->no_free_on_del = no_free_on_del;
+	h->readwrite_concur_lf_support = readwrite_concur_lf_support;
+
+#if defined(RTE_ARCH_X86)
+	if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE2))
+		h->sig_cmp_fn = RTE_HASH_COMPARE_SSE;
+	else
+#endif
+		h->sig_cmp_fn = RTE_HASH_COMPARE_SCALAR;
+
+	if (h->writer_takes_lock) {
+		h->readwrite_lock = rte_malloc(NULL, sizeof(rte_rwlock_t),
+						RTE_CACHE_LINE_SIZE);
+		if (h->readwrite_lock == NULL)
+			goto err_unlock;
+
+		rte_rwlock_init(h->readwrite_lock);
+	}
+
+	/* Populate free slots ring. Entry zero is reserved for key misses. */
+	for (i = 1; i < num_key_slots; i++)
+		rte_ring_sp_enqueue(r, (void *)((uintptr_t) i));
+
+	te->data = (void *) h;
+	TAILQ_INSERT_TAIL(hash_list, te, next);
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+
+	return h;
+err_unlock:
+	rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK);
+err:
+	rte_ring_free(r);
+	rte_ring_free(r_ext);
+	rte_free(te);
+	rte_free(h);
+	rte_free(buckets);
+	rte_free(buckets_ext);
+	rte_free(k);
+	rte_free(tbl_chng_cnt);
+	return NULL;
+}
+BIND_DEFAULT_SYMBOL(rte_hash_create, _v1811, 18.11);
+MAP_STATIC_SYMBOL(
+	struct rte_hash *rte_hash_create(
+		const struct rte_hash_parameters *params),
+		rte_hash_create_v1811);
 
 void
 rte_hash_free(struct rte_hash *h)
diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h
index c93d1a137..93c7019ec 100644
--- a/lib/librte_hash/rte_hash.h
+++ b/lib/librte_hash/rte_hash.h
@@ -30,13 +30,27 @@  extern "C" {
 #define RTE_HASH_LOOKUP_BULK_MAX		64
 #define RTE_HASH_LOOKUP_MULTI_MAX		RTE_HASH_LOOKUP_BULK_MAX
 
-/** Enable Hardware transactional memory support. */
+/**
+ * @deprecated
+ * This define will be removed in the next release.
+ * If the target platform supports hardware transactional memory
+ * it will be used without user consent as it provides the best possible
+ * performance.
+ *
+ * Enable Hardware transactional memory support.
+ */
 #define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT	0x01
 
 /** Default behavior of insertion, single writer/multi writer */
 #define RTE_HASH_EXTRA_FLAGS_MULTI_WRITER_ADD 0x02
 
-/** Flag to support reader writer concurrency */
+/**
+ * @deprecated
+ * This define will be removed in the next release.
+ * This library should be thread-safe by default.
+ *
+ * Flag to support reader writer concurrency
+ */
 #define RTE_HASH_EXTRA_FLAGS_RW_CONCURRENCY 0x04
 
 /** Flag to indicate the extendabe bucket table feature should be used */
@@ -105,6 +119,10 @@  struct rte_hash;
  */
 struct rte_hash *
 rte_hash_create(const struct rte_hash_parameters *params);
+struct rte_hash *
+rte_hash_create_v20(const struct rte_hash_parameters *params);
+struct rte_hash *
+rte_hash_create_v1811(const struct rte_hash_parameters *params);
 
 /**
  * Set a new hash compare function other than the default one.
diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map
index 734ae28b0..c72469099 100644
--- a/lib/librte_hash/rte_hash_version.map
+++ b/lib/librte_hash/rte_hash_version.map
@@ -54,6 +54,13 @@  DPDK_18.08 {
 
 } DPDK_16.07;
 
+DPDK_18.11 {
+	global:
+
+	rte_hash_create;
+
+} DPDK_18.08;
+
 EXPERIMENTAL {
 	global: