[RFC,v6,5/6] lib/ring: copy ring elements using memcpy partially

Message ID 20191021002300.26497-6-honnappa.nagarahalli@arm.com
State New
Headers show
Series
  • lib/ring: APIs to support custom element size
Related show

Commit Message

Honnappa Nagarahalli Oct. 21, 2019, 12:22 a.m.
Copy of ring elements uses memcpy for 32B chunks. The remaining
bytes are copied using assignments.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

---
 lib/librte_ring/rte_ring.c      |  10 --
 lib/librte_ring/rte_ring_elem.h | 229 +++++++-------------------------
 2 files changed, 49 insertions(+), 190 deletions(-)

-- 
2.17.1

Patch

diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c
index e95285259..0f7f4b598 100644
--- a/lib/librte_ring/rte_ring.c
+++ b/lib/librte_ring/rte_ring.c
@@ -51,16 +51,6 @@  rte_ring_get_memsize_elem(unsigned count, unsigned esize)
 {
 	ssize_t sz;
 
-	/* Supported esize values are 4/8/16.
-	 * Others can be added on need basis.
-	 */
-	if (esize != 4 && esize != 8 && esize != 16) {
-		RTE_LOG(ERR, RING,
-			"Unsupported esize value. Supported values are 4, 8 and 16\n");
-
-		return -EINVAL;
-	}
-
 	/* count must be a power of 2 */
 	if ((!POWEROF2(count)) || (count > RTE_RING_SZ_MASK )) {
 		RTE_LOG(ERR, RING,
diff --git a/lib/librte_ring/rte_ring_elem.h b/lib/librte_ring/rte_ring_elem.h
index 7e9914567..0ce5f2be7 100644
--- a/lib/librte_ring/rte_ring_elem.h
+++ b/lib/librte_ring/rte_ring_elem.h
@@ -24,6 +24,7 @@  extern "C" {
 #include <stdint.h>
 #include <sys/queue.h>
 #include <errno.h>
+#include <string.h>
 #include <rte_common.h>
 #include <rte_config.h>
 #include <rte_memory.h>
@@ -108,215 +109,83 @@  __rte_experimental
 struct rte_ring *rte_ring_create_elem(const char *name, unsigned int count,
 			unsigned int esize, int socket_id, unsigned int flags);
 
-/* the actual enqueue of pointers on the ring.
- * Placed here since identical code needed in both
- * single and multi producer enqueue functions.
- */
-#define ENQUEUE_PTRS_ELEM(r, ring_start, prod_head, obj_table, esize, n) do { \
-	if (esize == 4) \
-		ENQUEUE_PTRS_32(r, ring_start, prod_head, obj_table, n); \
-	else if (esize == 8) \
-		ENQUEUE_PTRS_64(r, ring_start, prod_head, obj_table, n); \
-	else if (esize == 16) \
-		ENQUEUE_PTRS_128(r, ring_start, prod_head, obj_table, n); \
-} while (0)
-
-#define ENQUEUE_PTRS_32(r, ring_start, prod_head, obj_table, n) do { \
-	unsigned int i; \
+#define ENQUEUE_PTRS_GEN(r, ring_start, prod_head, obj_table, esize, n) do { \
+	unsigned int i, j; \
 	const uint32_t size = (r)->size; \
 	uint32_t idx = prod_head & (r)->mask; \
 	uint32_t *ring = (uint32_t *)ring_start; \
 	uint32_t *obj = (uint32_t *)obj_table; \
-	if (likely(idx + n < size)) { \
-		for (i = 0; i < (n & ((~(uint32_t)0x7))); i += 8, idx += 8) { \
-			ring[idx] = obj[i]; \
-			ring[idx + 1] = obj[i + 1]; \
-			ring[idx + 2] = obj[i + 2]; \
-			ring[idx + 3] = obj[i + 3]; \
-			ring[idx + 4] = obj[i + 4]; \
-			ring[idx + 5] = obj[i + 5]; \
-			ring[idx + 6] = obj[i + 6]; \
-			ring[idx + 7] = obj[i + 7]; \
+	uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
+	uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
+	uint32_t seg0 = size - idx; \
+	if (likely(n < seg0)) { \
+		for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
+						i += 8, nr_idx += 8) { \
+			memcpy(ring + nr_idx, obj + i, 8 * sizeof (uint32_t)); \
 		} \
-		switch (n & 0x7) { \
+		switch (nr_n & 0x7) { \
 		case 7: \
-			ring[idx++] = obj[i++]; /* fallthrough */ \
+			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
 		case 6: \
-			ring[idx++] = obj[i++]; /* fallthrough */ \
+			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
 		case 5: \
-			ring[idx++] = obj[i++]; /* fallthrough */ \
+			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
 		case 4: \
-			ring[idx++] = obj[i++]; /* fallthrough */ \
+			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
 		case 3: \
-			ring[idx++] = obj[i++]; /* fallthrough */ \
+			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
 		case 2: \
-			ring[idx++] = obj[i++]; /* fallthrough */ \
-		case 1: \
-			ring[idx++] = obj[i++]; /* fallthrough */ \
-		} \
-	} else { \
-		for (i = 0; idx < size; i++, idx++)\
-			ring[idx] = obj[i]; \
-		for (idx = 0; i < n; i++, idx++) \
-			ring[idx] = obj[i]; \
-	} \
-} while (0)
-
-#define ENQUEUE_PTRS_64(r, ring_start, prod_head, obj_table, n) do { \
-	unsigned int i; \
-	const uint32_t size = (r)->size; \
-	uint32_t idx = prod_head & (r)->mask; \
-	uint64_t *ring = (uint64_t *)ring_start; \
-	uint64_t *obj = (uint64_t *)obj_table; \
-	if (likely(idx + n < size)) { \
-		for (i = 0; i < (n & ((~(uint32_t)0x3))); i += 4, idx += 4) { \
-			ring[idx] = obj[i]; \
-			ring[idx + 1] = obj[i + 1]; \
-			ring[idx + 2] = obj[i + 2]; \
-			ring[idx + 3] = obj[i + 3]; \
-		} \
-		switch (n & 0x3) { \
-		case 3: \
-			ring[idx++] = obj[i++]; /* fallthrough */ \
-		case 2: \
-			ring[idx++] = obj[i++]; /* fallthrough */ \
-		case 1: \
-			ring[idx++] = obj[i++]; \
-		} \
-	} else { \
-		for (i = 0; idx < size; i++, idx++)\
-			ring[idx] = obj[i]; \
-		for (idx = 0; i < n; i++, idx++) \
-			ring[idx] = obj[i]; \
-	} \
-} while (0)
-
-#define ENQUEUE_PTRS_128(r, ring_start, prod_head, obj_table, n) do { \
-	unsigned int i; \
-	const uint32_t size = (r)->size; \
-	uint32_t idx = prod_head & (r)->mask; \
-	__uint128_t *ring = (__uint128_t *)ring_start; \
-	__uint128_t *obj = (__uint128_t *)obj_table; \
-	if (likely(idx + n < size)) { \
-		for (i = 0; i < (n >> 1); i += 2, idx += 2) { \
-			ring[idx] = obj[i]; \
-			ring[idx + 1] = obj[i + 1]; \
-		} \
-		switch (n & 0x1) { \
+			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
 		case 1: \
-			ring[idx++] = obj[i++]; \
+			ring[nr_idx++] = obj[i++]; /* fallthrough */ \
 		} \
 	} else { \
-		for (i = 0; idx < size; i++, idx++)\
-			ring[idx] = obj[i]; \
-		for (idx = 0; i < n; i++, idx++) \
-			ring[idx] = obj[i]; \
+		uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
+		uint32_t nr_seg1 = nr_n - nr_seg0; \
+		for (i = 0; i < nr_seg0; i++, nr_idx++)\
+			ring[nr_idx] = obj[i]; \
+		for (j = 0; j < nr_seg1; i++, j++) \
+			ring[j] = obj[i]; \
 	} \
 } while (0)
 
-/* the actual copy of pointers on the ring to obj_table.
- * Placed here since identical code needed in both
- * single and multi consumer dequeue functions.
- */
-#define DEQUEUE_PTRS_ELEM(r, ring_start, cons_head, obj_table, esize, n) do { \
-	if (esize == 4) \
-		DEQUEUE_PTRS_32(r, ring_start, cons_head, obj_table, n); \
-	else if (esize == 8) \
-		DEQUEUE_PTRS_64(r, ring_start, cons_head, obj_table, n); \
-	else if (esize == 16) \
-		DEQUEUE_PTRS_128(r, ring_start, cons_head, obj_table, n); \
-} while (0)
-
-#define DEQUEUE_PTRS_32(r, ring_start, cons_head, obj_table, n) do { \
-	unsigned int i; \
+#define DEQUEUE_PTRS_GEN(r, ring_start, cons_head, obj_table, esize, n) do { \
+	unsigned int i, j; \
 	uint32_t idx = cons_head & (r)->mask; \
 	const uint32_t size = (r)->size; \
 	uint32_t *ring = (uint32_t *)ring_start; \
 	uint32_t *obj = (uint32_t *)obj_table; \
-	if (likely(idx + n < size)) { \
-		for (i = 0; i < (n & (~(uint32_t)0x7)); i += 8, idx += 8) {\
-			obj[i] = ring[idx]; \
-			obj[i + 1] = ring[idx + 1]; \
-			obj[i + 2] = ring[idx + 2]; \
-			obj[i + 3] = ring[idx + 3]; \
-			obj[i + 4] = ring[idx + 4]; \
-			obj[i + 5] = ring[idx + 5]; \
-			obj[i + 6] = ring[idx + 6]; \
-			obj[i + 7] = ring[idx + 7]; \
+	uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
+	uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
+	uint32_t seg0 = size - idx; \
+	if (likely(n < seg0)) { \
+		for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
+						i += 8, nr_idx += 8) { \
+			memcpy(obj + i, ring + nr_idx, 8 * sizeof (uint32_t)); \
 		} \
-		switch (n & 0x7) { \
+		switch (nr_n & 0x7) { \
 		case 7: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
+			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
 		case 6: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
+			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
 		case 5: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
+			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
 		case 4: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
+			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
 		case 3: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
+			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
 		case 2: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
-		case 1: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
-		} \
-	} else { \
-		for (i = 0; idx < size; i++, idx++) \
-			obj[i] = ring[idx]; \
-		for (idx = 0; i < n; i++, idx++) \
-			obj[i] = ring[idx]; \
-	} \
-} while (0)
-
-#define DEQUEUE_PTRS_64(r, ring_start, cons_head, obj_table, n) do { \
-	unsigned int i; \
-	uint32_t idx = cons_head & (r)->mask; \
-	const uint32_t size = (r)->size; \
-	uint64_t *ring = (uint64_t *)ring_start; \
-	uint64_t *obj = (uint64_t *)obj_table; \
-	if (likely(idx + n < size)) { \
-		for (i = 0; i < (n & (~(uint32_t)0x3)); i += 4, idx += 4) {\
-			obj[i] = ring[idx]; \
-			obj[i + 1] = ring[idx + 1]; \
-			obj[i + 2] = ring[idx + 2]; \
-			obj[i + 3] = ring[idx + 3]; \
-		} \
-		switch (n & 0x3) { \
-		case 3: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
-		case 2: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
-		case 1: \
-			obj[i++] = ring[idx++]; \
-		} \
-	} else { \
-		for (i = 0; idx < size; i++, idx++) \
-			obj[i] = ring[idx]; \
-		for (idx = 0; i < n; i++, idx++) \
-			obj[i] = ring[idx]; \
-	} \
-} while (0)
-
-#define DEQUEUE_PTRS_128(r, ring_start, cons_head, obj_table, n) do { \
-	unsigned int i; \
-	uint32_t idx = cons_head & (r)->mask; \
-	const uint32_t size = (r)->size; \
-	__uint128_t *ring = (__uint128_t *)ring_start; \
-	__uint128_t *obj = (__uint128_t *)obj_table; \
-	if (likely(idx + n < size)) { \
-		for (i = 0; i < (n >> 1); i += 2, idx += 2) { \
-			obj[i] = ring[idx]; \
-			obj[i + 1] = ring[idx + 1]; \
-		} \
-		switch (n & 0x1) { \
+			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
 		case 1: \
-			obj[i++] = ring[idx++]; /* fallthrough */ \
+			obj[i++] = ring[nr_idx++]; /* fallthrough */ \
 		} \
 	} else { \
-		for (i = 0; idx < size; i++, idx++) \
-			obj[i] = ring[idx]; \
-		for (idx = 0; i < n; i++, idx++) \
-			obj[i] = ring[idx]; \
+		uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
+		uint32_t nr_seg1 = nr_n - nr_seg0; \
+		for (i = 0; i < nr_seg0; i++, nr_idx++)\
+			obj[i] = ring[nr_idx];\
+		for (j = 0; j < nr_seg1; i++, j++) \
+			obj[i] = ring[j]; \
 	} \
 } while (0)
 
@@ -373,7 +242,7 @@  __rte_ring_do_enqueue_elem(struct rte_ring *r, void * const obj_table,
 	if (n == 0)
 		goto end;
 
-	ENQUEUE_PTRS_ELEM(r, &r[1], prod_head, obj_table, esize, n);
+	ENQUEUE_PTRS_GEN(r, &r[1], prod_head, obj_table, esize, n);
 
 	update_tail(&r->prod, prod_head, prod_next, is_sp, 1);
 end:
@@ -420,7 +289,7 @@  __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
 	if (n == 0)
 		goto end;
 
-	DEQUEUE_PTRS_ELEM(r, &r[1], cons_head, obj_table, esize, n);
+	DEQUEUE_PTRS_GEN(r, &r[1], cons_head, obj_table, esize, n);
 
 	update_tail(&r->cons, cons_head, cons_next, is_sc, 0);