diff mbox series

[07/37] bus/fslmc: support portal migration

Message ID 20200527132326.1382-8-hemant.agrawal@nxp.com
State Superseded
Headers show
Series NXP DPAAx enhancements | expand

Commit Message

Hemant Agrawal May 27, 2020, 1:22 p.m. UTC
From: Nipun Gupta <nipun.gupta@nxp.com>


The patch adds support for portal migration by disabling stashing
for the portals which is used in the non-affined threads, or on
threads affined to multiple cores

Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>

---
 drivers/bus/fslmc/portal/dpaa2_hw_dpio.c      |  83 +----
 .../bus/fslmc/qbman/include/fsl_qbman_debug.h |   1 +
 .../fslmc/qbman/include/fsl_qbman_portal.h    |   8 +-
 drivers/bus/fslmc/qbman/qbman_portal.c        | 340 +++++++++++++++++-
 drivers/bus/fslmc/qbman/qbman_portal.h        |  19 +-
 drivers/bus/fslmc/qbman/qbman_sys.h           | 135 ++++++-
 6 files changed, 503 insertions(+), 83 deletions(-)

-- 
2.17.1
diff mbox series

Patch

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
index 5a12ff35d..97be76116 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
@@ -53,10 +53,6 @@  static uint32_t io_space_count;
 /* Variable to store DPAA2 platform type */
 uint32_t dpaa2_svr_family;
 
-/* Physical core id for lcores running on dpaa2. */
-/* DPAA2 only support 1 lcore to 1 phy cpu mapping */
-static unsigned int dpaa2_cpu[RTE_MAX_LCORE];
-
 /* Variable to store DPAA2 DQRR size */
 uint8_t dpaa2_dqrr_size;
 /* Variable to store DPAA2 EQCR size */
@@ -159,7 +155,7 @@  dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, int cpu_id)
 		return;
 	}
 
-	cpu_mask = cpu_mask << dpaa2_cpu[cpu_id];
+	cpu_mask = cpu_mask << cpu_id;
 	snprintf(command, COMMAND_LEN, "echo %X > /proc/irq/%s/smp_affinity",
 		 cpu_mask, token);
 	ret = system(command);
@@ -228,17 +224,9 @@  static void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev)
 #endif
 
 static int
-dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev)
+dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
 {
 	int sdest, ret;
-	int cpu_id;
-
-	/* Set the Stashing Destination */
-	cpu_id = dpaa2_get_core_id();
-	if (cpu_id < 0) {
-		DPAA2_BUS_ERR("Thread not affined to a single core");
-		return -1;
-	}
 
 	/* Set the STASH Destination depending on Current CPU ID.
 	 * Valid values of SDEST are 4,5,6,7. Where,
@@ -277,6 +265,7 @@  static void dpaa2_put_qbman_swp(struct dpaa2_dpio_dev *dpio_dev)
 static struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(void)
 {
 	struct dpaa2_dpio_dev *dpio_dev = NULL;
+	int cpu_id;
 	int ret;
 
 	/* Get DPIO dev handle from list using index */
@@ -292,11 +281,19 @@  static struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(void)
 	DPAA2_BUS_DEBUG("New Portal %p (%d) affined thread - %lu",
 			dpio_dev, dpio_dev->index, syscall(SYS_gettid));
 
-	ret = dpaa2_configure_stashing(dpio_dev);
-	if (ret) {
-		DPAA2_BUS_ERR("dpaa2_configure_stashing failed");
-		rte_atomic16_clear(&dpio_dev->ref_count);
-		return NULL;
+	/* Set the Stashing Destination */
+	cpu_id = dpaa2_get_core_id();
+	if (cpu_id < 0) {
+		DPAA2_BUS_WARN("Thread not affined to a single core");
+		if (dpaa2_svr_family != SVR_LX2160A)
+			qbman_swp_update(dpio_dev->sw_portal, 1);
+	} else {
+		ret = dpaa2_configure_stashing(dpio_dev, cpu_id);
+		if (ret) {
+			DPAA2_BUS_ERR("dpaa2_configure_stashing failed");
+			rte_atomic16_clear(&dpio_dev->ref_count);
+			return NULL;
+		}
 	}
 
 	ret = pthread_setspecific(dpaa2_portal_key, (void *)dpio_dev);
@@ -363,46 +360,6 @@  static void dpaa2_portal_finish(void *arg)
 	pthread_setspecific(dpaa2_portal_key, NULL);
 }
 
-/*
- * This checks for not supported lcore mappings as well as get the physical
- * cpuid for the lcore.
- * one lcore can only map to 1 cpu i.e. 1@10-14 not supported.
- * one cpu can be mapped to more than one lcores.
- */
-static int
-dpaa2_check_lcore_cpuset(void)
-{
-	unsigned int lcore_id, i;
-	int ret = 0;
-
-	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-		dpaa2_cpu[lcore_id] = 0xffffffff;
-
-	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		rte_cpuset_t cpuset = rte_lcore_cpuset(lcore_id);
-
-		for (i = 0; i < CPU_SETSIZE; i++) {
-			if (!CPU_ISSET(i, &cpuset))
-				continue;
-			if (i >= RTE_MAX_LCORE) {
-				DPAA2_BUS_ERR("ERR:lcore map to core %u (>= %u) not supported",
-					i, RTE_MAX_LCORE);
-				ret = -1;
-				continue;
-			}
-			RTE_LOG(DEBUG, EAL, "lcore id = %u cpu=%u\n",
-				lcore_id, i);
-			if (dpaa2_cpu[lcore_id] != 0xffffffff) {
-				DPAA2_BUS_ERR("ERR:lcore map to multi-cpu not supported");
-				ret = -1;
-				continue;
-			}
-			dpaa2_cpu[lcore_id] = i;
-		}
-	}
-	return ret;
-}
-
 static int
 dpaa2_create_dpio_device(int vdev_fd,
 			 struct vfio_device_info *obj_info,
@@ -413,7 +370,6 @@  dpaa2_create_dpio_device(int vdev_fd,
 	struct qbman_swp_desc p_des;
 	struct dpio_attr attr;
 	int ret;
-	static int check_lcore_cpuset;
 
 	if (obj_info->num_regions < NUM_DPIO_REGIONS) {
 		DPAA2_BUS_ERR("Not sufficient number of DPIO regions");
@@ -433,13 +389,6 @@  dpaa2_create_dpio_device(int vdev_fd,
 	/* Using single portal  for all devices */
 	dpio_dev->mc_portal = dpaa2_get_mcp_ptr(MC_PORTAL_INDEX);
 
-	if (!check_lcore_cpuset) {
-		check_lcore_cpuset = 1;
-
-		if (dpaa2_check_lcore_cpuset() < 0)
-			goto err;
-	}
-
 	dpio_dev->dpio = rte_zmalloc(NULL, sizeof(struct fsl_mc_io),
 				     RTE_CACHE_LINE_SIZE);
 	if (!dpio_dev->dpio) {
diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_debug.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_debug.h
index 11267d439..54096e877 100644
--- a/drivers/bus/fslmc/qbman/include/fsl_qbman_debug.h
+++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_debug.h
@@ -1,5 +1,6 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright (C) 2015 Freescale Semiconductor, Inc.
+ * Copyright 2020 NXP
  */
 #ifndef _FSL_QBMAN_DEBUG_H
 #define _FSL_QBMAN_DEBUG_H
diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
index f820077d2..eb68c9cab 100644
--- a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014 Freescale Semiconductor, Inc.
- * Copyright 2015-2019 NXP
+ * Copyright 2015-2020 NXP
  *
  */
 #ifndef _FSL_QBMAN_PORTAL_H
@@ -44,6 +44,12 @@  extern uint32_t dpaa2_svr_family;
  */
 struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d);
 
+/**
+ * qbman_swp_update() - Update portal cacheability attributes.
+ * @p: the given qbman swp portal
+ */
+int qbman_swp_update(struct qbman_swp *p, int stash_off);
+
 /**
  * qbman_swp_finish() - Create and destroy a functional object representing
  * the given QBMan portal descriptor.
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c b/drivers/bus/fslmc/qbman/qbman_portal.c
index d7ff74c7a..57f50b0d8 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.c
+++ b/drivers/bus/fslmc/qbman/qbman_portal.c
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  *
  */
 
@@ -82,6 +82,10 @@  qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd);
 static int
+qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd);
+static int
 qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd);
@@ -99,6 +103,12 @@  qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
 		uint32_t *flags,
 		int num_frames);
 static int
+qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		uint32_t *flags,
+		int num_frames);
+static int
 qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd,
@@ -118,6 +128,12 @@  qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
 		uint32_t *flags,
 		int num_frames);
 static int
+qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		struct qbman_fd **fd,
+		uint32_t *flags,
+		int num_frames);
+static int
 qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		struct qbman_fd **fd,
@@ -135,6 +151,11 @@  qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
 		const struct qbman_fd *fd,
 		int num_frames);
 static int
+qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		int num_frames);
+static int
 qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd,
@@ -143,9 +164,12 @@  qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 static int
 qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
 static int
+qbman_swp_pull_cinh_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
+static int
 qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d);
 
 const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s);
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s);
 const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s);
 
 static int
@@ -153,6 +177,10 @@  qbman_swp_release_direct(struct qbman_swp *s,
 		const struct qbman_release_desc *d,
 		const uint64_t *buffers, unsigned int num_buffers);
 static int
+qbman_swp_release_cinh_direct(struct qbman_swp *s,
+		const struct qbman_release_desc *d,
+		const uint64_t *buffers, unsigned int num_buffers);
+static int
 qbman_swp_release_mem_back(struct qbman_swp *s,
 		const struct qbman_release_desc *d,
 		const uint64_t *buffers, unsigned int num_buffers);
@@ -327,6 +355,28 @@  struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
 	return p;
 }
 
+int qbman_swp_update(struct qbman_swp *p, int stash_off)
+{
+	const struct qbman_swp_desc *d = &p->desc;
+	struct qbman_swp_sys *s = &p->sys;
+	int ret;
+
+	/* Nothing needs to be done for QBMAN rev > 5000 with fast access */
+	if ((qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+			&& (d->cena_access_mode == qman_cena_fastest_access))
+		return 0;
+
+	ret = qbman_swp_sys_update(s, d, p->dqrr.dqrr_size, stash_off);
+	if (ret) {
+		pr_err("qbman_swp_sys_init() failed %d\n", ret);
+		return ret;
+	}
+
+	p->stash_off = stash_off;
+
+	return 0;
+}
+
 void qbman_swp_finish(struct qbman_swp *p)
 {
 #ifdef QBMAN_CHECKING
@@ -462,6 +512,27 @@  void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
 #endif
 }
 
+void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
+{
+	uint8_t *v = cmd;
+#ifdef QBMAN_CHECKING
+	QBMAN_BUG_ON(!(p->mc.check != swp_mc_can_submit));
+#endif
+	/* TBD: "|=" is going to hurt performance. Need to move as many fields
+	 * out of word zero, and for those that remain, the "OR" needs to occur
+	 * at the caller side. This debug check helps to catch cases where the
+	 * caller wants to OR but has forgotten to do so.
+	 */
+	QBMAN_BUG_ON((*v & cmd_verb) != *v);
+	dma_wmb();
+	*v = cmd_verb | p->mc.valid_bit;
+	qbman_cinh_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+	clean(cmd);
+#ifdef QBMAN_CHECKING
+	p->mc.check = swp_mc_can_poll;
+#endif
+}
+
 void *qbman_swp_mc_result(struct qbman_swp *p)
 {
 	uint32_t *ret, verb;
@@ -500,6 +571,27 @@  void *qbman_swp_mc_result(struct qbman_swp *p)
 	return ret;
 }
 
+void *qbman_swp_mc_result_cinh(struct qbman_swp *p)
+{
+	uint32_t *ret, verb;
+#ifdef QBMAN_CHECKING
+	QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll);
+#endif
+	ret = qbman_cinh_read_shadow(&p->sys,
+			      QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+	/* Remove the valid-bit -
+	 * command completed iff the rest is non-zero
+	 */
+	verb = ret[0] & ~QB_VALID_BIT;
+	if (!verb)
+		return NULL;
+	p->mc.valid_bit ^= QB_VALID_BIT;
+#ifdef QBMAN_CHECKING
+	p->mc.check = swp_mc_can_start;
+#endif
+	return ret;
+}
+
 /***********/
 /* Enqueue */
 /***********/
@@ -640,6 +732,16 @@  static inline void qbman_write_eqcr_am_rt_register(struct qbman_swp *p,
 				     QMAN_RT_MODE);
 }
 
+static void memcpy_byte_by_byte(void *to, const void *from, size_t n)
+{
+	const uint8_t *src = from;
+	volatile uint8_t *dest = to;
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		dest[i] = src[i];
+}
+
 
 static int qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
 					       const struct qbman_eq_desc *d,
@@ -754,7 +856,7 @@  static int qbman_swp_enqueue_ring_mode_cinh_direct(
 			return -EBUSY;
 	}
 
-	p = qbman_cena_write_start_wo_shadow(&s->sys,
+	p = qbman_cinh_write_start_wo_shadow(&s->sys,
 			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
 	memcpy(&p[1], &cl[1], 28);
 	memcpy(&p[8], fd, sizeof(*fd));
@@ -762,8 +864,6 @@  static int qbman_swp_enqueue_ring_mode_cinh_direct(
 
 	/* Set the verb byte, have to substitute in the valid-bit */
 	p[0] = cl[0] | s->eqcr.pi_vb;
-	qbman_cena_write_complete_wo_shadow(&s->sys,
-			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
 	s->eqcr.pi++;
 	s->eqcr.pi &= full_mask;
 	s->eqcr.available--;
@@ -815,7 +915,10 @@  static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
 				       const struct qbman_eq_desc *d,
 				       const struct qbman_fd *fd)
 {
-	return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+	if (!s->stash_off)
+		return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+	else
+		return qbman_swp_enqueue_ring_mode_cinh_direct(s, d, fd);
 }
 
 int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
@@ -1025,7 +1128,12 @@  int qbman_swp_enqueue_multiple(struct qbman_swp *s,
 				      uint32_t *flags,
 				      int num_frames)
 {
-	return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags, num_frames);
+	if (!s->stash_off)
+		return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags,
+						num_frames);
+	else
+		return qbman_swp_enqueue_multiple_cinh_direct(s, d, fd, flags,
+						num_frames);
 }
 
 static int qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
@@ -1233,7 +1341,12 @@  int qbman_swp_enqueue_multiple_fd(struct qbman_swp *s,
 					 uint32_t *flags,
 					 int num_frames)
 {
-	return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags, num_frames);
+	if (!s->stash_off)
+		return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags,
+					num_frames);
+	else
+		return qbman_swp_enqueue_multiple_fd_cinh_direct(s, d, fd,
+					flags, num_frames);
 }
 
 static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
@@ -1426,7 +1539,13 @@  int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
 					   const struct qbman_fd *fd,
 					   int num_frames)
 {
-	return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd, num_frames);
+	if (!s->stash_off)
+		return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd,
+					num_frames);
+	else
+		return qbman_swp_enqueue_multiple_desc_cinh_direct(s, d, fd,
+					num_frames);
+
 }
 
 /*************************/
@@ -1574,6 +1693,30 @@  static int qbman_swp_pull_direct(struct qbman_swp *s,
 	return 0;
 }
 
+static int qbman_swp_pull_cinh_direct(struct qbman_swp *s,
+				 struct qbman_pull_desc *d)
+{
+	uint32_t *p;
+	uint32_t *cl = qb_cl(d);
+
+	if (!atomic_dec_and_test(&s->vdq.busy)) {
+		atomic_inc(&s->vdq.busy);
+		return -EBUSY;
+	}
+
+	d->pull.tok = s->sys.idx + 1;
+	s->vdq.storage = (void *)(size_t)d->pull.rsp_addr_virt;
+	p = qbman_cinh_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR);
+	memcpy_byte_by_byte(&p[1], &cl[1], 12);
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	lwsync();
+	p[0] = cl[0] | s->vdq.valid_bit;
+	s->vdq.valid_bit ^= QB_VALID_BIT;
+
+	return 0;
+}
+
 static int qbman_swp_pull_mem_back(struct qbman_swp *s,
 				   struct qbman_pull_desc *d)
 {
@@ -1601,7 +1744,10 @@  static int qbman_swp_pull_mem_back(struct qbman_swp *s,
 
 int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
 {
-	return qbman_swp_pull_ptr(s, d);
+	if (!s->stash_off)
+		return qbman_swp_pull_ptr(s, d);
+	else
+		return qbman_swp_pull_cinh_direct(s, d);
 }
 
 /****************/
@@ -1638,7 +1784,10 @@  void qbman_swp_prefetch_dqrr_next(struct qbman_swp *s)
  */
 const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
 {
-	return qbman_swp_dqrr_next_ptr(s);
+	if (!s->stash_off)
+		return qbman_swp_dqrr_next_ptr(s);
+	else
+		return qbman_swp_dqrr_next_cinh_direct(s);
 }
 
 const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
@@ -1718,6 +1867,81 @@  const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
 	return p;
 }
 
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s)
+{
+	uint32_t verb;
+	uint32_t response_verb;
+	uint32_t flags;
+	const struct qbman_result *p;
+
+	/* Before using valid-bit to detect if something is there, we have to
+	 * handle the case of the DQRR reset bug...
+	 */
+	if (s->dqrr.reset_bug) {
+		/* We pick up new entries by cache-inhibited producer index,
+		 * which means that a non-coherent mapping would require us to
+		 * invalidate and read *only* once that PI has indicated that
+		 * there's an entry here. The first trip around the DQRR ring
+		 * will be much less efficient than all subsequent trips around
+		 * it...
+		 */
+		uint8_t pi = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_DQPI) &
+			     QMAN_DQRR_PI_MASK;
+
+		/* there are new entries if pi != next_idx */
+		if (pi == s->dqrr.next_idx)
+			return NULL;
+
+		/* if next_idx is/was the last ring index, and 'pi' is
+		 * different, we can disable the workaround as all the ring
+		 * entries have now been DMA'd to so valid-bit checking is
+		 * repaired. Note: this logic needs to be based on next_idx
+		 * (which increments one at a time), rather than on pi (which
+		 * can burst and wrap-around between our snapshots of it).
+		 */
+		QBMAN_BUG_ON((s->dqrr.dqrr_size - 1) < 0);
+		if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1u)) {
+			pr_debug("DEBUG: next_idx=%d, pi=%d, clear reset bug\n",
+				 s->dqrr.next_idx, pi);
+			s->dqrr.reset_bug = 0;
+		}
+	}
+	p = qbman_cinh_read_wo_shadow(&s->sys,
+			QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+
+	verb = p->dq.verb;
+
+	/* If the valid-bit isn't of the expected polarity, nothing there. Note,
+	 * in the DQRR reset bug workaround, we shouldn't need to skip these
+	 * check, because we've already determined that a new entry is available
+	 * and we've invalidated the cacheline before reading it, so the
+	 * valid-bit behaviour is repaired and should tell us what we already
+	 * knew from reading PI.
+	 */
+	if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit)
+		return NULL;
+
+	/* There's something there. Move "next_idx" attention to the next ring
+	 * entry (and prefetch it) before returning what we found.
+	 */
+	s->dqrr.next_idx++;
+	if (s->dqrr.next_idx == s->dqrr.dqrr_size) {
+		s->dqrr.next_idx = 0;
+		s->dqrr.valid_bit ^= QB_VALID_BIT;
+	}
+	/* If this is the final response to a volatile dequeue command
+	 * indicate that the vdq is no longer busy
+	 */
+	flags = p->dq.stat;
+	response_verb = verb & QBMAN_RESPONSE_VERB_MASK;
+	if ((response_verb == QBMAN_RESULT_DQ) &&
+	    (flags & QBMAN_DQ_STAT_VOLATILE) &&
+	    (flags & QBMAN_DQ_STAT_EXPIRED))
+		atomic_inc(&s->vdq.busy);
+
+	return p;
+}
+
 const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s)
 {
 	uint32_t verb;
@@ -2096,6 +2320,37 @@  static int qbman_swp_release_direct(struct qbman_swp *s,
 	return 0;
 }
 
+static int qbman_swp_release_cinh_direct(struct qbman_swp *s,
+				    const struct qbman_release_desc *d,
+				    const uint64_t *buffers,
+				    unsigned int num_buffers)
+{
+	uint32_t *p;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR);
+
+	pr_debug("RAR=%08x\n", rar);
+	if (!RAR_SUCCESS(rar))
+		return -EBUSY;
+
+	QBMAN_BUG_ON(!num_buffers || (num_buffers > 7));
+
+	/* Start the release command */
+	p = qbman_cinh_write_start_wo_shadow(&s->sys,
+				     QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+
+	/* Copy the caller's buffer pointers to the command */
+	memcpy_byte_by_byte(&p[2], buffers, num_buffers * sizeof(uint64_t));
+
+	/* Set the verb byte, have to substitute in the valid-bit and the
+	 * number of buffers.
+	 */
+	lwsync();
+	p[0] = cl[0] | RAR_VB(rar) | num_buffers;
+
+	return 0;
+}
+
 static int qbman_swp_release_mem_back(struct qbman_swp *s,
 				      const struct qbman_release_desc *d,
 				      const uint64_t *buffers,
@@ -2134,7 +2389,11 @@  int qbman_swp_release(struct qbman_swp *s,
 			     const uint64_t *buffers,
 			     unsigned int num_buffers)
 {
-	return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+	if (!s->stash_off)
+		return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+	else
+		return qbman_swp_release_cinh_direct(s, d, buffers,
+						num_buffers);
 }
 
 /*******************/
@@ -2157,8 +2416,8 @@  struct qbman_acquire_rslt {
 	uint64_t buf[7];
 };
 
-int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
-		      unsigned int num_buffers)
+static int qbman_swp_acquire_direct(struct qbman_swp *s, uint16_t bpid,
+				uint64_t *buffers, unsigned int num_buffers)
 {
 	struct qbman_acquire_desc *p;
 	struct qbman_acquire_rslt *r;
@@ -2202,6 +2461,61 @@  int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
 	return (int)r->num;
 }
 
+static int qbman_swp_acquire_cinh_direct(struct qbman_swp *s, uint16_t bpid,
+			uint64_t *buffers, unsigned int num_buffers)
+{
+	struct qbman_acquire_desc *p;
+	struct qbman_acquire_rslt *r;
+
+	if (!num_buffers || (num_buffers > 7))
+		return -EINVAL;
+
+	/* Start the management command */
+	p = qbman_swp_mc_start(s);
+
+	if (!p)
+		return -EBUSY;
+
+	/* Encode the caller-provided attributes */
+	p->bpid = bpid;
+	p->num = num_buffers;
+
+	/* Complete the management command */
+	r = qbman_swp_mc_complete_cinh(s, p, QBMAN_MC_ACQUIRE);
+	if (!r) {
+		pr_err("qbman: acquire from BPID %d failed, no response\n",
+		       bpid);
+		return -EIO;
+	}
+
+	/* Decode the outcome */
+	QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_MC_ACQUIRE);
+
+	/* Determine success or failure */
+	if (r->rslt != QBMAN_MC_RSLT_OK) {
+		pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n",
+		       bpid, r->rslt);
+		return -EIO;
+	}
+
+	QBMAN_BUG_ON(r->num > num_buffers);
+
+	/* Copy the acquired buffers to the caller's array */
+	u64_from_le32_copy(buffers, &r->buf[0], r->num);
+
+	return (int)r->num;
+}
+
+int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
+		      unsigned int num_buffers)
+{
+	if (!s->stash_off)
+		return qbman_swp_acquire_direct(s, bpid, buffers, num_buffers);
+	else
+		return qbman_swp_acquire_cinh_direct(s, bpid, buffers,
+					num_buffers);
+}
+
 /*****************/
 /* FQ management */
 /*****************/
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.h b/drivers/bus/fslmc/qbman/qbman_portal.h
index 3aaacae52..1cf791830 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/qbman_portal.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  *
  */
 
@@ -102,6 +102,7 @@  struct qbman_swp {
 		uint32_t ci;
 		int available;
 	} eqcr;
+	uint8_t stash_off;
 };
 
 /* -------------------------- */
@@ -118,7 +119,9 @@  struct qbman_swp {
  */
 void *qbman_swp_mc_start(struct qbman_swp *p);
 void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb);
+void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t cmd_verb);
 void *qbman_swp_mc_result(struct qbman_swp *p);
+void *qbman_swp_mc_result_cinh(struct qbman_swp *p);
 
 /* Wraps up submit + poll-for-result */
 static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
@@ -135,6 +138,20 @@  static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
 	return cmd;
 }
 
+static inline void *qbman_swp_mc_complete_cinh(struct qbman_swp *swp, void *cmd,
+					  uint8_t cmd_verb)
+{
+	int loopvar = 1000;
+
+	qbman_swp_mc_submit_cinh(swp, cmd, cmd_verb);
+	do {
+		cmd = qbman_swp_mc_result_cinh(swp);
+	} while (!cmd && loopvar--);
+	QBMAN_BUG_ON(!loopvar);
+
+	return cmd;
+}
+
 /* ---------------------- */
 /* Descriptors/cachelines */
 /* ---------------------- */
diff --git a/drivers/bus/fslmc/qbman/qbman_sys.h b/drivers/bus/fslmc/qbman/qbman_sys.h
index 55449edf3..61f817c47 100644
--- a/drivers/bus/fslmc/qbman/qbman_sys.h
+++ b/drivers/bus/fslmc/qbman/qbman_sys.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2019 NXP
+ * Copyright 2019-2020 NXP
  */
 /* qbman_sys_decl.h and qbman_sys.h are the two platform-specific files in the
  * driver. They are only included via qbman_private.h, which is itself a
@@ -190,6 +190,34 @@  static inline void qbman_cinh_write(struct qbman_swp_sys *s, uint32_t offset,
 #endif
 }
 
+static inline void *qbman_cinh_write_start_wo_shadow(struct qbman_swp_sys *s,
+						     uint32_t offset)
+{
+#ifdef QBMAN_CINH_TRACE
+	pr_info("qbman_cinh_write_start(%p:%d:0x%03x)\n",
+		s->addr_cinh, s->idx, offset);
+#endif
+	QBMAN_BUG_ON(offset & 63);
+	return (s->addr_cinh + offset);
+}
+
+static inline void qbman_cinh_write_complete(struct qbman_swp_sys *s,
+					     uint32_t offset, void *cmd)
+{
+	const uint32_t *shadow = cmd;
+	int loop;
+#ifdef QBMAN_CINH_TRACE
+	pr_info("qbman_cinh_write_complete(%p:%d:0x%03x) %p\n",
+		s->addr_cinh, s->idx, offset, shadow);
+	hexdump(cmd, 64);
+#endif
+	for (loop = 15; loop >= 1; loop--)
+		__raw_writel(shadow[loop], s->addr_cinh +
+					 offset + loop * 4);
+	lwsync();
+	__raw_writel(shadow[0], s->addr_cinh + offset);
+}
+
 static inline uint32_t qbman_cinh_read(struct qbman_swp_sys *s, uint32_t offset)
 {
 	uint32_t reg = __raw_readl(s->addr_cinh + offset);
@@ -200,6 +228,35 @@  static inline uint32_t qbman_cinh_read(struct qbman_swp_sys *s, uint32_t offset)
 	return reg;
 }
 
+static inline void *qbman_cinh_read_shadow(struct qbman_swp_sys *s,
+					   uint32_t offset)
+{
+	uint32_t *shadow = (uint32_t *)(s->cena + offset);
+	unsigned int loop;
+#ifdef QBMAN_CINH_TRACE
+	pr_info(" %s (%p:%d:0x%03x) %p\n", __func__,
+		s->addr_cinh, s->idx, offset, shadow);
+#endif
+
+	for (loop = 0; loop < 16; loop++)
+		shadow[loop] = __raw_readl(s->addr_cinh + offset
+					+ loop * 4);
+#ifdef QBMAN_CINH_TRACE
+	hexdump(shadow, 64);
+#endif
+	return shadow;
+}
+
+static inline void *qbman_cinh_read_wo_shadow(struct qbman_swp_sys *s,
+					      uint32_t offset)
+{
+#ifdef QBMAN_CINH_TRACE
+	pr_info("qbman_cinh_read(%p:%d:0x%03x)\n",
+		s->addr_cinh, s->idx, offset);
+#endif
+	return s->addr_cinh + offset;
+}
+
 static inline void *qbman_cena_write_start(struct qbman_swp_sys *s,
 					   uint32_t offset)
 {
@@ -476,6 +533,82 @@  static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 	return 0;
 }
 
+static inline int qbman_swp_sys_update(struct qbman_swp_sys *s,
+				     const struct qbman_swp_desc *d,
+				     uint8_t dqrr_size,
+				     int stash_off)
+{
+	uint32_t reg;
+	int i;
+	int cena_region_size = 4*1024;
+	uint8_t est = 1;
+#ifdef RTE_ARCH_64
+	uint8_t wn = CENA_WRITE_ENABLE;
+#else
+	uint8_t wn = CINH_WRITE_ENABLE;
+#endif
+
+	if (stash_off)
+		wn = CINH_WRITE_ENABLE;
+
+	QBMAN_BUG_ON(d->idx < 0);
+#ifdef QBMAN_CHECKING
+	/* We should never be asked to initialise for a portal that isn't in
+	 * the power-on state. (Ie. don't forget to reset portals when they are
+	 * decommissioned!)
+	 */
+	reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
+	QBMAN_BUG_ON(reg);
+#endif
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+			&& (d->cena_access_mode == qman_cena_fastest_access))
+		memset(s->addr_cena, 0, cena_region_size);
+	else {
+		/* Invalidate the portal memory.
+		 * This ensures no stale cache lines
+		 */
+		for (i = 0; i < cena_region_size; i += 64)
+			dccivac(s->addr_cena + i);
+	}
+
+	if (dpaa2_svr_family == SVR_LS1080A)
+		est = 0;
+
+	if (s->eqcr_mode == qman_eqcr_vb_array) {
+		reg = qbman_set_swp_cfg(dqrr_size, wn,
+					0, 3, 2, 3, 1, 1, 1, 1, 1, 1);
+	} else {
+		if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 &&
+			    (d->cena_access_mode == qman_cena_fastest_access))
+			reg = qbman_set_swp_cfg(dqrr_size, wn,
+						1, 3, 2, 0, 1, 1, 1, 1, 1, 1);
+		else
+			reg = qbman_set_swp_cfg(dqrr_size, wn,
+						est, 3, 2, 2, 1, 1, 1, 1, 1, 1);
+	}
+
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+			&& (d->cena_access_mode == qman_cena_fastest_access))
+		reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */
+		       1 << SWP_CFG_VPM_SHIFT |  /* VDQCR read triggered mode */
+		       1 << SWP_CFG_CPM_SHIFT;   /* CR read triggered mode */
+
+	qbman_cinh_write(s, QBMAN_CINH_SWP_CFG, reg);
+	reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
+	if (!reg) {
+		pr_err("The portal %d is not enabled!\n", s->idx);
+		return -1;
+	}
+
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+			&& (d->cena_access_mode == qman_cena_fastest_access)) {
+		qbman_cinh_write(s, QBMAN_CINH_SWP_EQCR_PI, QMAN_RT_MODE);
+		qbman_cinh_write(s, QBMAN_CINH_SWP_RCR_PI, QMAN_RT_MODE);
+	}
+
+	return 0;
+}
+
 static inline void qbman_swp_sys_finish(struct qbman_swp_sys *s)
 {
 	free(s->cena);