diff mbox series

[v2,4/5] bus/fslmc: use cinh read for eqcr ci on ls1088 platform

Message ID 20190627093343.5171-5-hemant.agrawal@nxp.com
State Superseded
Headers show
Series FSLMC bus enchancements | expand

Commit Message

Hemant Agrawal June 27, 2019, 9:33 a.m. UTC
From: Nipun Gupta <nipun.gupta@nxp.com>


LS1088 platform CENA operation are causing issues
at high load. CINH (cache inhibited) mode is working
fine with minor performance impact.

This patch enables CINH mode selectively on LS1088 platform

Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>

---
 drivers/bus/fslmc/portal/dpaa2_hw_dpio.h      |   2 -
 drivers/bus/fslmc/portal/dpaa2_hw_pvt.h       |   5 -
 .../fslmc/qbman/include/fsl_qbman_portal.h    |   9 +
 drivers/bus/fslmc/qbman/qbman_portal.c        | 278 +++++++++++++++++-
 drivers/bus/fslmc/qbman/qbman_sys.h           |  22 +-
 5 files changed, 303 insertions(+), 13 deletions(-)

-- 
2.17.1
diff mbox series

Patch

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
index 17e7e4fad..c68495eaf 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.h
@@ -28,8 +28,6 @@  RTE_DECLARE_PER_LCORE(struct dpaa2_io_portal_t, _dpaa2_io);
 #define DPAA2_PER_LCORE_ETHRX_DPIO RTE_PER_LCORE(_dpaa2_io).ethrx_dpio_dev
 #define DPAA2_PER_LCORE_ETHRX_PORTAL DPAA2_PER_LCORE_ETHRX_DPIO->sw_portal
 
-/* Variable to store DPAA2 platform type */
-extern uint32_t dpaa2_svr_family;
 /* Variable to store DPAA2 DQRR size */
 extern uint8_t dpaa2_dqrr_size;
 /* Variable to store DPAA2 EQCR size */
diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
index 0cbde8a9b..92fc76211 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_pvt.h
@@ -23,11 +23,6 @@ 
 #define lower_32_bits(x) ((uint32_t)(x))
 #define upper_32_bits(x) ((uint32_t)(((x) >> 16) >> 16))
 
-#define SVR_LS1080A             0x87030000
-#define SVR_LS2080A             0x87010000
-#define SVR_LS2088A             0x87090000
-#define SVR_LX2160A             0x87360000
-
 #ifndef VLAN_TAG_SIZE
 #define VLAN_TAG_SIZE   4 /** < Vlan Header Length */
 #endif
diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
index 07b8a4372..107850d37 100644
--- a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
@@ -1,6 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014 Freescale Semiconductor, Inc.
+ * Copyright 2015-2019 NXP
  *
  */
 #ifndef _FSL_QBMAN_PORTAL_H
@@ -8,6 +9,14 @@ 
 
 #include <fsl_qbman_base.h>
 
+#define SVR_LS1080A	0x87030000
+#define SVR_LS2080A	0x87010000
+#define SVR_LS2088A	0x87090000
+#define SVR_LX2160A	0x87360000
+
+/* Variable to store DPAA2 platform type */
+extern uint32_t dpaa2_svr_family;
+
 /**
  * DOC - QBMan portal APIs to implement the following functions:
  * - Initialize and destroy Software portal object.
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c b/drivers/bus/fslmc/qbman/qbman_portal.c
index 20da8b921..e6066ce35 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.c
+++ b/drivers/bus/fslmc/qbman/qbman_portal.c
@@ -76,6 +76,10 @@  qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd);
 static int
+qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd);
+static int
 qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd);
@@ -87,6 +91,12 @@  qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
 		uint32_t *flags,
 		int num_frames);
 static int
+qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		uint32_t *flags,
+		int num_frames);
+static int
 qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd,
@@ -99,7 +109,12 @@  qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
 		struct qbman_fd **fd,
 		uint32_t *flags,
 		int num_frames);
-
+static int
+qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		struct qbman_fd **fd,
+		uint32_t *flags,
+		int num_frames);
 static int
 qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
@@ -113,6 +128,11 @@  qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
 		const struct qbman_fd *fd,
 		int num_frames);
 static int
+qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		int num_frames);
+static int
 qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd,
@@ -273,6 +293,17 @@  struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
 		qbman_swp_release_ptr = qbman_swp_release_mem_back;
 	}
 
+	if (dpaa2_svr_family == SVR_LS1080A) {
+		qbman_swp_enqueue_ring_mode_ptr =
+				qbman_swp_enqueue_ring_mode_cinh_direct;
+		qbman_swp_enqueue_multiple_ptr =
+				qbman_swp_enqueue_multiple_cinh_direct;
+		qbman_swp_enqueue_multiple_fd_ptr =
+				qbman_swp_enqueue_multiple_fd_cinh_direct;
+		qbman_swp_enqueue_multiple_desc_ptr =
+				qbman_swp_enqueue_multiple_desc_cinh_direct;
+	}
+
 	for (mask_size = p->eqcr.pi_ring_size; mask_size > 0; mask_size >>= 1)
 		p->eqcr.pi_ci_mask = (p->eqcr.pi_ci_mask<<1) + 1;
 	eqcr_pi = qbman_cinh_read(&p->sys, QBMAN_CINH_SWP_EQCR_PI);
@@ -700,6 +731,46 @@  static int qbman_swp_enqueue_ring_mode_direct(struct qbman_swp *s,
 	return 0;
 }
 
+static int qbman_swp_enqueue_ring_mode_cinh_direct(
+		struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd)
+{
+	uint32_t *p;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t eqcr_ci, full_mask, half_mask;
+
+	half_mask = (s->eqcr.pi_ci_mask>>1);
+	full_mask = s->eqcr.pi_ci_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+				eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return -EBUSY;
+	}
+
+	p = qbman_cena_write_start_wo_shadow(&s->sys,
+			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
+	memcpy(&p[1], &cl[1], 28);
+	memcpy(&p[8], fd, sizeof(*fd));
+	lwsync();
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	p[0] = cl[0] | s->eqcr.pi_vb;
+	qbman_cena_write_complete_wo_shadow(&s->sys,
+			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
+	s->eqcr.pi++;
+	s->eqcr.pi &= full_mask;
+	s->eqcr.available--;
+	if (!(s->eqcr.pi & half_mask))
+		s->eqcr.pi_vb ^= QB_VALID_BIT;
+
+	return 0;
+}
+
 static int qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
 						const struct qbman_eq_desc *d,
 						const struct qbman_fd *fd)
@@ -823,6 +894,76 @@  static int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
 	return num_enqueued;
 }
 
+static int qbman_swp_enqueue_multiple_cinh_direct(
+		struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		uint32_t *flags,
+		int num_frames)
+{
+	uint32_t *p = NULL;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+	int i, num_enqueued = 0;
+	uint64_t addr_cena;
+
+	half_mask = (s->eqcr.pi_ci_mask>>1);
+	full_mask = s->eqcr.pi_ci_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+				eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return 0;
+	}
+
+	eqcr_pi = s->eqcr.pi;
+	num_enqueued = (s->eqcr.available < num_frames) ?
+			s->eqcr.available : num_frames;
+	s->eqcr.available -= num_enqueued;
+	/* Fill in the EQCR ring */
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		memcpy(&p[1], &cl[1], 28);
+		memcpy(&p[8], &fd[i], sizeof(*fd));
+		eqcr_pi++;
+	}
+
+	lwsync();
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	eqcr_pi = s->eqcr.pi;
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		p[0] = cl[0] | s->eqcr.pi_vb;
+		if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+			struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+			d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+				((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+		}
+		eqcr_pi++;
+		if (!(eqcr_pi & half_mask))
+			s->eqcr.pi_vb ^= QB_VALID_BIT;
+	}
+
+	/* Flush all the cacheline without load/store in between */
+	eqcr_pi = s->eqcr.pi;
+	addr_cena = (size_t)s->sys.addr_cena;
+	for (i = 0; i < num_enqueued; i++) {
+		dcbf(addr_cena +
+			QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		eqcr_pi++;
+	}
+	s->eqcr.pi = eqcr_pi & full_mask;
+
+	return num_enqueued;
+}
+
 static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
 					       const struct qbman_eq_desc *d,
 					       const struct qbman_fd *fd,
@@ -954,6 +1095,76 @@  static int qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
 	return num_enqueued;
 }
 
+static int qbman_swp_enqueue_multiple_fd_cinh_direct(
+		struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		struct qbman_fd **fd,
+		uint32_t *flags,
+		int num_frames)
+{
+	uint32_t *p = NULL;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+	int i, num_enqueued = 0;
+	uint64_t addr_cena;
+
+	half_mask = (s->eqcr.pi_ci_mask>>1);
+	full_mask = s->eqcr.pi_ci_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+				eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return 0;
+	}
+
+	eqcr_pi = s->eqcr.pi;
+	num_enqueued = (s->eqcr.available < num_frames) ?
+			s->eqcr.available : num_frames;
+	s->eqcr.available -= num_enqueued;
+	/* Fill in the EQCR ring */
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		memcpy(&p[1], &cl[1], 28);
+		memcpy(&p[8], fd[i], sizeof(struct qbman_fd));
+		eqcr_pi++;
+	}
+
+	lwsync();
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	eqcr_pi = s->eqcr.pi;
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		p[0] = cl[0] | s->eqcr.pi_vb;
+		if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+			struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+			d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+				((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+		}
+		eqcr_pi++;
+		if (!(eqcr_pi & half_mask))
+			s->eqcr.pi_vb ^= QB_VALID_BIT;
+	}
+
+	/* Flush all the cacheline without load/store in between */
+	eqcr_pi = s->eqcr.pi;
+	addr_cena = (size_t)s->sys.addr_cena;
+	for (i = 0; i < num_enqueued; i++) {
+		dcbf(addr_cena +
+			QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		eqcr_pi++;
+	}
+	s->eqcr.pi = eqcr_pi & full_mask;
+
+	return num_enqueued;
+}
+
 static int qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
 						  const struct qbman_eq_desc *d,
 						  struct qbman_fd **fd,
@@ -1087,6 +1298,71 @@  static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
 	return num_enqueued;
 }
 
+static int qbman_swp_enqueue_multiple_desc_cinh_direct(
+		struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		int num_frames)
+{
+	uint32_t *p;
+	const uint32_t *cl;
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+	int i, num_enqueued = 0;
+	uint64_t addr_cena;
+
+	half_mask = (s->eqcr.pi_ci_mask>>1);
+	full_mask = s->eqcr.pi_ci_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+					eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return 0;
+	}
+
+	eqcr_pi = s->eqcr.pi;
+	num_enqueued = (s->eqcr.available < num_frames) ?
+			s->eqcr.available : num_frames;
+	s->eqcr.available -= num_enqueued;
+	/* Fill in the EQCR ring */
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		cl = qb_cl(&d[i]);
+		memcpy(&p[1], &cl[1], 28);
+		memcpy(&p[8], &fd[i], sizeof(*fd));
+		eqcr_pi++;
+	}
+
+	lwsync();
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	eqcr_pi = s->eqcr.pi;
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cena_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		cl = qb_cl(&d[i]);
+		p[0] = cl[0] | s->eqcr.pi_vb;
+		eqcr_pi++;
+		if (!(eqcr_pi & half_mask))
+			s->eqcr.pi_vb ^= QB_VALID_BIT;
+	}
+
+	/* Flush all the cacheline without load/store in between */
+	eqcr_pi = s->eqcr.pi;
+	addr_cena = (size_t)s->sys.addr_cena;
+	for (i = 0; i < num_enqueued; i++) {
+		dcbf(addr_cena +
+			QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		eqcr_pi++;
+	}
+	s->eqcr.pi = eqcr_pi & full_mask;
+
+	return num_enqueued;
+}
+
 static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 					const struct qbman_eq_desc *d,
 					const struct qbman_fd *fd,
diff --git a/drivers/bus/fslmc/qbman/qbman_sys.h b/drivers/bus/fslmc/qbman/qbman_sys.h
index 71f7a6782..e59fcfd54 100644
--- a/drivers/bus/fslmc/qbman/qbman_sys.h
+++ b/drivers/bus/fslmc/qbman/qbman_sys.h
@@ -381,6 +381,14 @@  static inline uint32_t qbman_set_swp_cfg(uint8_t max_fill, uint8_t wn,
 #define QMAN_REV_5000	0x05000000
 #define QMAN_REV_MASK	0xffff0000
 
+#define SVR_LS1080A	0x87030000
+#define SVR_LS2080A	0x87010000
+#define SVR_LS2088A	0x87090000
+#define SVR_LX2160A	0x87360000
+
+/* Variable to store DPAA2 platform type */
+extern uint32_t dpaa2_svr_family;
+
 static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 				     const struct qbman_swp_desc *d,
 				     uint8_t dqrr_size)
@@ -388,16 +396,17 @@  static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 	uint32_t reg;
 	int i;
 	int cena_region_size = 4*1024;
-
-	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
-			&& (d->cena_access_mode == qman_cena_fastest_access))
-		cena_region_size = 64*1024;
+	uint8_t est = 1;
 #ifdef RTE_ARCH_64
 	uint8_t wn = CENA_WRITE_ENABLE;
 #else
 	uint8_t wn = CINH_WRITE_ENABLE;
 #endif
 
+
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+			&& (d->cena_access_mode == qman_cena_fastest_access))
+		cena_region_size = 64*1024;
 	s->addr_cena = d->cena_bar;
 	s->addr_cinh = d->cinh_bar;
 	s->idx = (uint32_t)d->idx;
@@ -428,6 +437,9 @@  static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 			dccivac(s->addr_cena + i);
 	}
 
+	if (dpaa2_svr_family == SVR_LS1080A)
+		est = 0;
+
 	if (s->eqcr_mode == qman_eqcr_vb_array) {
 		reg = qbman_set_swp_cfg(dqrr_size, wn,
 					0, 3, 2, 3, 1, 1, 1, 1, 1, 1);
@@ -438,7 +450,7 @@  static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 						1, 3, 2, 0, 1, 1, 1, 1, 1, 1);
 		else
 			reg = qbman_set_swp_cfg(dqrr_size, wn,
-						1, 3, 2, 2, 1, 1, 1, 1, 1, 1);
+						est, 3, 2, 2, 1, 1, 1, 1, 1, 1);
 	}
 
 	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000