[RFC,v6,6/6] lib/ring: improved copy function to copy ring elements

Message ID 20191021002300.26497-7-honnappa.nagarahalli@arm.com
State New
Headers show
Series
  • lib/ring: APIs to support custom element size
Related show

Commit Message

Honnappa Nagarahalli Oct. 21, 2019, 12:23 a.m.
Improved copy function to copy to/from ring elements.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

---
 lib/librte_ring/rte_ring_elem.h | 165 ++++++++++++++++----------------
 1 file changed, 84 insertions(+), 81 deletions(-)

-- 
2.17.1

Comments

Olivier Matz Oct. 23, 2019, 10:05 a.m. | #1
On Sun, Oct 20, 2019 at 07:23:00PM -0500, Honnappa Nagarahalli wrote:
> Improved copy function to copy to/from ring elements.

> 

> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> Signed-off-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> ---

>  lib/librte_ring/rte_ring_elem.h | 165 ++++++++++++++++----------------

>  1 file changed, 84 insertions(+), 81 deletions(-)


(...)

> +static __rte_always_inline void

> +copy_elems(uint32_t du32[], const uint32_t su32[], uint32_t nr_num)

> +{

> +	uint32_t i;

> +

> +	for (i = 0; i < (nr_num & ~7); i += 8)

> +		memcpy(du32 + i, su32 + i, 8 * sizeof(uint32_t));

> +

> +	switch (nr_num & 7) {

> +	case 7: du32[nr_num - 7] = su32[nr_num - 7]; /* fallthrough */

> +	case 6: du32[nr_num - 6] = su32[nr_num - 6]; /* fallthrough */

> +	case 5: du32[nr_num - 5] = su32[nr_num - 5]; /* fallthrough */

> +	case 4: du32[nr_num - 4] = su32[nr_num - 4]; /* fallthrough */

> +	case 3: du32[nr_num - 3] = su32[nr_num - 3]; /* fallthrough */

> +	case 2: du32[nr_num - 2] = su32[nr_num - 2]; /* fallthrough */

> +	case 1: du32[nr_num - 1] = su32[nr_num - 1]; /* fallthrough */

> +	}

> +}


minor comment: I suggest src32 and dst32 instead of su32 and du32.

Patch

diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h
index 0ce5f2be7..80ec3c562 100644
--- a/lib/librte_ring/rte_ring_elem.h
+++ b/lib/librte_ring/rte_ring_elem.h
@@ -109,85 +109,88 @@  __rte_experimental
 struct rte_ring *rte_ring_create_elem(const char *name, unsigned int count,
 			unsigned int esize, int socket_id, unsigned int flags);
 
-#define ENQUEUE_PTRS_GEN(r, ring_start, prod_head, obj_table, esize, n) do { \
-	unsigned int i, j; \
-	const uint32_t size = (r)->size; \
-	uint32_t idx = prod_head & (r)->mask; \
-	uint32_t *ring = (uint32_t *)ring_start; \
-	uint32_t *obj = (uint32_t *)obj_table; \
-	uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
-	uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
-	uint32_t seg0 = size - idx; \
-	if (likely(n < seg0)) { \
-		for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
-						i += 8, nr_idx += 8) { \
-			memcpy(ring + nr_idx, obj + i, 8 * sizeof (uint32_t)); \
-		} \
-		switch (nr_n & 0x7) { \
-		case 7: \
-			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-		case 6: \
-			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-		case 5: \
-			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-		case 4: \
-			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-		case 3: \
-			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-		case 2: \
-			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-		case 1: \
-			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
-		} \
-	} else { \
-		uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
-		uint32_t nr_seg1 = nr_n - nr_seg0; \
-		for (i = 0; i < nr_seg0; i++, nr_idx++)\
-			ring[nr_idx] = obj[i]; \
-		for (j = 0; j < nr_seg1; i++, j++) \
-			ring[j] = obj[i]; \
-	} \
-} while (0)
-
-#define DEQUEUE_PTRS_GEN(r, ring_start, cons_head, obj_table, esize, n) do { \
-	unsigned int i, j; \
-	uint32_t idx = cons_head & (r)->mask; \
-	const uint32_t size = (r)->size; \
-	uint32_t *ring = (uint32_t *)ring_start; \
-	uint32_t *obj = (uint32_t *)obj_table; \
-	uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
-	uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
-	uint32_t seg0 = size - idx; \
-	if (likely(n < seg0)) { \
-		for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
-						i += 8, nr_idx += 8) { \
-			memcpy(obj + i, ring + nr_idx, 8 * sizeof (uint32_t)); \
-		} \
-		switch (nr_n & 0x7) { \
-		case 7: \
-			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-		case 6: \
-			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-		case 5: \
-			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-		case 4: \
-			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-		case 3: \
-			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-		case 2: \
-			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-		case 1: \
-			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
-		} \
-	} else { \
-		uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
-		uint32_t nr_seg1 = nr_n - nr_seg0; \
-		for (i = 0; i < nr_seg0; i++, nr_idx++)\
-			obj[i] = ring[nr_idx];\
-		for (j = 0; j < nr_seg1; i++, j++) \
-			obj[i] = ring[j]; \
-	} \
-} while (0)
+static __rte_always_inline void
+copy_elems(uint32_t du32[], const uint32_t su32[], uint32_t nr_num)
+{
+	uint32_t i;
+
+	for (i = 0; i < (nr_num & ~7); i += 8)
+		memcpy(du32 + i, su32 + i, 8 * sizeof(uint32_t));
+
+	switch (nr_num & 7) {
+	case 7: du32[nr_num - 7] = su32[nr_num - 7]; /* fallthrough */
+	case 6: du32[nr_num - 6] = su32[nr_num - 6]; /* fallthrough */
+	case 5: du32[nr_num - 5] = su32[nr_num - 5]; /* fallthrough */
+	case 4: du32[nr_num - 4] = su32[nr_num - 4]; /* fallthrough */
+	case 3: du32[nr_num - 3] = su32[nr_num - 3]; /* fallthrough */
+	case 2: du32[nr_num - 2] = su32[nr_num - 2]; /* fallthrough */
+	case 1: du32[nr_num - 1] = su32[nr_num - 1]; /* fallthrough */
+	}
+}
+
+static __rte_always_inline void
+enqueue_elems(struct rte_ring *r, void *ring_start, uint32_t prod_head,
+		void *obj_table, uint32_t num, uint32_t esize)
+{
+	uint32_t idx, nr_idx, nr_num;
+	uint32_t *du32;
+	const uint32_t *su32;
+
+	const uint32_t size = r->size;
+	uint32_t s0, nr_s0, nr_s1;
+
+	idx = prod_head & (r)->mask;
+	/* Normalize the idx to uint32_t */
+	nr_idx = (idx * esize) / sizeof(uint32_t);
+
+	du32 = (uint32_t *)ring_start + nr_idx;
+	su32 = obj_table;
+
+	/* Normalize the number of elements to uint32_t */
+	nr_num = (num * esize) / sizeof(uint32_t);
+
+	s0 = size - idx;
+	if (num < s0)
+		copy_elems(du32, su32, nr_num);
+	else {
+		nr_s0 = (s0 * esize) / sizeof(uint32_t);
+		nr_s1 = nr_num - nr_s0;
+		copy_elems(du32, su32, nr_s0);
+		copy_elems(ring_start, su32 + nr_s0, nr_s1);
+	}
+}
+
+static __rte_always_inline void
+dequeue_elems(struct rte_ring *r, void *ring_start, uint32_t cons_head,
+		void *obj_table, uint32_t num, uint32_t esize)
+{
+	uint32_t idx, nr_idx, nr_num;
+	uint32_t *du32;
+	const uint32_t *su32;
+
+	const uint32_t size = r->size;
+	uint32_t s0, nr_s0, nr_s1;
+
+	idx = cons_head & (r)->mask;
+	/* Normalize the idx to uint32_t */
+	nr_idx = (idx * esize) / sizeof(uint32_t);
+
+	su32 = (uint32_t *)ring_start + nr_idx;
+	du32 = obj_table;
+
+	/* Normalize the number of elements to uint32_t */
+	nr_num = (num * esize) / sizeof(uint32_t);
+
+	s0 = size - idx;
+	if (num < s0)
+		copy_elems(du32, su32, nr_num);
+	else {
+		nr_s0 = (s0 * esize) / sizeof(uint32_t);
+		nr_s1 = nr_num - nr_s0;
+		copy_elems(du32, su32, nr_s0);
+		copy_elems(du32 + nr_s0, ring_start, nr_s1);
+	}
+}
 
 /* Between load and load. there might be cpu reorder in weak model
  * (powerpc/arm).
@@ -242,7 +245,7 @@  __rte_ring_do_enqueue_elem(struct rte_ring *r, void * const obj_table,
 	if (n == 0)
 		goto end;
 
-	ENQUEUE_PTRS_GEN(r, &r[1], prod_head, obj_table, esize, n);
+	enqueue_elems(r, &r[1], prod_head, obj_table, n, esize);
 
 	update_tail(&r->prod, prod_head, prod_next, is_sp, 1);
 end:
@@ -289,7 +292,7 @@  __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
 	if (n == 0)
 		goto end;
 
-	DEQUEUE_PTRS_GEN(r, &r[1], cons_head, obj_table, esize, n);
+	dequeue_elems(r, &r[1], cons_head, obj_table, n, esize);
 
 	update_tail(&r->cons, cons_head, cons_next, is_sc, 0);