diff mbox series

[07/16] bus/fslmc: support portal migration

Message ID 20200302145829.27808-8-hemant.agrawal@nxp.com
State New
Headers show
Series NXP DPAAx fixes and enhancements | expand

Commit Message

Hemant Agrawal March 2, 2020, 2:58 p.m. UTC
From: Nipun Gupta <nipun.gupta@nxp.com>


The patch adds support for portal migration by disabling stashing
for the portals which is used in the non-affined threads, or on
threads affined to multiple cores

Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>

---
 drivers/bus/fslmc/portal/dpaa2_hw_dpio.c      |  83 +--
 .../fslmc/qbman/include/fsl_qbman_portal.h    |   8 +-
 drivers/bus/fslmc/qbman/qbman_portal.c        | 554 +++++++++++++++++-
 drivers/bus/fslmc/qbman/qbman_portal.h        |  19 +-
 drivers/bus/fslmc/qbman/qbman_sys.h           | 135 ++++-
 5 files changed, 717 insertions(+), 82 deletions(-)

-- 
2.17.1

Comments

Ferruh Yigit March 3, 2020, 5:43 p.m. UTC | #1
On 3/2/2020 2:58 PM, Hemant Agrawal wrote:
> From: Nipun Gupta <nipun.gupta@nxp.com>

> 

> The patch adds support for portal migration by disabling stashing

> for the portals which is used in the non-affined threads, or on

> threads affined to multiple cores

> 

> Signed-off-by: Nipun Gupta <nipun.gupta@nxp.com>


<...>

> @@ -754,7 +856,7 @@ static int qbman_swp_enqueue_ring_mode_cinh_direct(

>  			return -EBUSY;

>  	}

>  

> -	p = qbman_cena_write_start_wo_shadow(&s->sys,

> +	p = qbman_cinh_write_start_wo_shadow(&s->sys,

>  			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));

>  	memcpy(&p[1], &cl[1], 28);

>  	memcpy(&p[8], fd, sizeof(*fd));

> @@ -762,8 +864,44 @@ static int qbman_swp_enqueue_ring_mode_cinh_direct(

>  

>  	/* Set the verb byte, have to substitute in the valid-bit */

>  	p[0] = cl[0] | s->eqcr.pi_vb;

> -	qbman_cena_write_complete_wo_shadow(&s->sys,

> +	s->eqcr.pi++;

> +	s->eqcr.pi &= full_mask;

> +	s->eqcr.available--;

> +	if (!(s->eqcr.pi & half_mask))

> +		s->eqcr.pi_vb ^= QB_VALID_BIT;

> +

> +	return 0;

> +}

> +

> +static int qbman_swp_enqueue_ring_mode_cinh_direct(

> +		struct qbman_swp *s,

> +		const struct qbman_eq_desc *d,

> +		const struct qbman_fd *fd)

> +{


This patch is adding a second 'qbman_swp_enqueue_ring_mode_cinh_direct()'
function, it may be a git artifact.

The duplicated functions seems removed later but this patch is wrong, needs
fixing. Please make sure each patch in series is functional and compiles fine.
diff mbox series

Patch

diff --git a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
index 054d45306..2102d2981 100644
--- a/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
+++ b/drivers/bus/fslmc/portal/dpaa2_hw_dpio.c
@@ -53,10 +53,6 @@  static uint32_t io_space_count;
 /* Variable to store DPAA2 platform type */
 uint32_t dpaa2_svr_family;
 
-/* Physical core id for lcores running on dpaa2. */
-/* DPAA2 only support 1 lcore to 1 phy cpu mapping */
-static unsigned int dpaa2_cpu[RTE_MAX_LCORE];
-
 /* Variable to store DPAA2 DQRR size */
 uint8_t dpaa2_dqrr_size;
 /* Variable to store DPAA2 EQCR size */
@@ -159,7 +155,7 @@  dpaa2_affine_dpio_intr_to_respective_core(int32_t dpio_id, int cpu_id)
 		return;
 	}
 
-	cpu_mask = cpu_mask << dpaa2_cpu[cpu_id];
+	cpu_mask = cpu_mask << cpu_id;
 	snprintf(command, COMMAND_LEN, "echo %X > /proc/irq/%s/smp_affinity",
 		 cpu_mask, token);
 	ret = system(command);
@@ -228,17 +224,9 @@  static void dpaa2_dpio_intr_deinit(struct dpaa2_dpio_dev *dpio_dev)
 #endif
 
 static int
-dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev)
+dpaa2_configure_stashing(struct dpaa2_dpio_dev *dpio_dev, int cpu_id)
 {
 	int sdest, ret;
-	int cpu_id;
-
-	/* Set the Stashing Destination */
-	cpu_id = dpaa2_get_core_id();
-	if (cpu_id < 0) {
-		DPAA2_BUS_ERR("Thread not affined to a single core");
-		return -1;
-	}
 
 	/* Set the STASH Destination depending on Current CPU ID.
 	 * Valid values of SDEST are 4,5,6,7. Where,
@@ -277,6 +265,7 @@  static void dpaa2_put_qbman_swp(struct dpaa2_dpio_dev *dpio_dev)
 static struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(void)
 {
 	struct dpaa2_dpio_dev *dpio_dev = NULL;
+	int cpu_id;
 	int ret;
 
 	/* Get DPIO dev handle from list using index */
@@ -292,11 +281,19 @@  static struct dpaa2_dpio_dev *dpaa2_get_qbman_swp(void)
 	DPAA2_BUS_DEBUG("New Portal %p (%d) affined thread - %lu",
 			dpio_dev, dpio_dev->index, syscall(SYS_gettid));
 
-	ret = dpaa2_configure_stashing(dpio_dev);
-	if (ret) {
-		DPAA2_BUS_ERR("dpaa2_configure_stashing failed");
-		rte_atomic16_clear(&dpio_dev->ref_count);
-		return NULL;
+	/* Set the Stashing Destination */
+	cpu_id = dpaa2_get_core_id();
+	if (cpu_id < 0) {
+		DPAA2_BUS_WARN("Thread not affined to a single core");
+		if (dpaa2_svr_family != SVR_LX2160A)
+			qbman_swp_update(dpio_dev->sw_portal, 1);
+	} else {
+		ret = dpaa2_configure_stashing(dpio_dev, cpu_id);
+		if (ret) {
+			DPAA2_BUS_ERR("dpaa2_configure_stashing failed");
+			rte_atomic16_clear(&dpio_dev->ref_count);
+			return NULL;
+		}
 	}
 
 	ret = pthread_setspecific(dpaa2_portal_key, (void *)dpio_dev);
@@ -363,46 +360,6 @@  static void dpaa2_portal_finish(void *arg)
 	pthread_setspecific(dpaa2_portal_key, NULL);
 }
 
-/*
- * This checks for not supported lcore mappings as well as get the physical
- * cpuid for the lcore.
- * one lcore can only map to 1 cpu i.e. 1@10-14 not supported.
- * one cpu can be mapped to more than one lcores.
- */
-static int
-dpaa2_check_lcore_cpuset(void)
-{
-	unsigned int lcore_id, i;
-	int ret = 0;
-
-	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
-		dpaa2_cpu[lcore_id] = 0xffffffff;
-
-	for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
-		rte_cpuset_t cpuset = rte_lcore_cpuset(lcore_id);
-
-		for (i = 0; i < CPU_SETSIZE; i++) {
-			if (!CPU_ISSET(i, &cpuset))
-				continue;
-			if (i >= RTE_MAX_LCORE) {
-				DPAA2_BUS_ERR("ERR:lcore map to core %u (>= %u) not supported",
-					i, RTE_MAX_LCORE);
-				ret = -1;
-				continue;
-			}
-			RTE_LOG(DEBUG, EAL, "lcore id = %u cpu=%u\n",
-				lcore_id, i);
-			if (dpaa2_cpu[lcore_id] != 0xffffffff) {
-				DPAA2_BUS_ERR("ERR:lcore map to multi-cpu not supported");
-				ret = -1;
-				continue;
-			}
-			dpaa2_cpu[lcore_id] = i;
-		}
-	}
-	return ret;
-}
-
 static int
 dpaa2_create_dpio_device(int vdev_fd,
 			 struct vfio_device_info *obj_info,
@@ -413,7 +370,6 @@  dpaa2_create_dpio_device(int vdev_fd,
 	struct qbman_swp_desc p_des;
 	struct dpio_attr attr;
 	int ret;
-	static int check_lcore_cpuset;
 
 	if (obj_info->num_regions < NUM_DPIO_REGIONS) {
 		DPAA2_BUS_ERR("Not sufficient number of DPIO regions");
@@ -433,13 +389,6 @@  dpaa2_create_dpio_device(int vdev_fd,
 	/* Using single portal  for all devices */
 	dpio_dev->mc_portal = rte_mcp_ptr_list[MC_PORTAL_INDEX];
 
-	if (!check_lcore_cpuset) {
-		check_lcore_cpuset = 1;
-
-		if (dpaa2_check_lcore_cpuset() < 0)
-			goto err;
-	}
-
 	dpio_dev->dpio = rte_zmalloc(NULL, sizeof(struct fsl_mc_io),
 				     RTE_CACHE_LINE_SIZE);
 	if (!dpio_dev->dpio) {
diff --git a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
index 88f0a9968..0d6364d99 100644
--- a/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/include/fsl_qbman_portal.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014 Freescale Semiconductor, Inc.
- * Copyright 2015-2019 NXP
+ * Copyright 2015-2020 NXP
  *
  */
 #ifndef _FSL_QBMAN_PORTAL_H
@@ -43,6 +43,12 @@  extern uint32_t dpaa2_svr_family;
  */
 struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d);
 
+/**
+ * qbman_swp_update() - Update portal cacheability attributes.
+ * @p: the given qbman swp portal
+ */
+int qbman_swp_update(struct qbman_swp *p, int stash_off);
+
 /**
  * qbman_swp_finish() - Create and destroy a functional object representing
  * the given QBMan portal descriptor.
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.c b/drivers/bus/fslmc/qbman/qbman_portal.c
index d4223bdc8..a06b88dd2 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.c
+++ b/drivers/bus/fslmc/qbman/qbman_portal.c
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  *
  */
 
@@ -82,6 +82,10 @@  qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd);
 static int
+qbman_swp_enqueue_ring_mode_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd);
+static int
 qbman_swp_enqueue_ring_mode_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd);
@@ -99,6 +103,12 @@  qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
 		uint32_t *flags,
 		int num_frames);
 static int
+qbman_swp_enqueue_multiple_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		uint32_t *flags,
+		int num_frames);
+static int
 qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd,
@@ -118,6 +128,12 @@  qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
 		uint32_t *flags,
 		int num_frames);
 static int
+qbman_swp_enqueue_multiple_fd_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		struct qbman_fd **fd,
+		uint32_t *flags,
+		int num_frames);
+static int
 qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		struct qbman_fd **fd,
@@ -135,6 +151,11 @@  qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
 		const struct qbman_fd *fd,
 		int num_frames);
 static int
+qbman_swp_enqueue_multiple_desc_cinh_direct(struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		int num_frames);
+static int
 qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 		const struct qbman_eq_desc *d,
 		const struct qbman_fd *fd,
@@ -143,9 +164,12 @@  qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 static int
 qbman_swp_pull_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
 static int
+qbman_swp_pull_cinh_direct(struct qbman_swp *s, struct qbman_pull_desc *d);
+static int
 qbman_swp_pull_mem_back(struct qbman_swp *s, struct qbman_pull_desc *d);
 
 const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s);
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s);
 const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s);
 
 static int
@@ -153,6 +177,10 @@  qbman_swp_release_direct(struct qbman_swp *s,
 		const struct qbman_release_desc *d,
 		const uint64_t *buffers, unsigned int num_buffers);
 static int
+qbman_swp_release_cinh_direct(struct qbman_swp *s,
+		const struct qbman_release_desc *d,
+		const uint64_t *buffers, unsigned int num_buffers);
+static int
 qbman_swp_release_mem_back(struct qbman_swp *s,
 		const struct qbman_release_desc *d,
 		const uint64_t *buffers, unsigned int num_buffers);
@@ -327,6 +355,28 @@  struct qbman_swp *qbman_swp_init(const struct qbman_swp_desc *d)
 	return p;
 }
 
+int qbman_swp_update(struct qbman_swp *p, int stash_off)
+{
+	const struct qbman_swp_desc *d = &p->desc;
+	struct qbman_swp_sys *s = &p->sys;
+	int ret;
+
+	/* Nothing needs to be done for QBMAN rev > 5000 with fast access */
+	if ((qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+			&& (d->cena_access_mode == qman_cena_fastest_access))
+		return 0;
+
+	ret = qbman_swp_sys_update(s, d, p->dqrr.dqrr_size, stash_off);
+	if (ret) {
+		pr_err("qbman_swp_sys_init() failed %d\n", ret);
+		return ret;
+	}
+
+	p->stash_off = stash_off;
+
+	return 0;
+}
+
 void qbman_swp_finish(struct qbman_swp *p)
 {
 #ifdef QBMAN_CHECKING
@@ -462,6 +512,27 @@  void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
 #endif
 }
 
+void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t cmd_verb)
+{
+	uint8_t *v = cmd;
+#ifdef QBMAN_CHECKING
+	QBMAN_BUG_ON(!(p->mc.check != swp_mc_can_submit));
+#endif
+	/* TBD: "|=" is going to hurt performance. Need to move as many fields
+	 * out of word zero, and for those that remain, the "OR" needs to occur
+	 * at the caller side. This debug check helps to catch cases where the
+	 * caller wants to OR but has forgotten to do so.
+	 */
+	QBMAN_BUG_ON((*v & cmd_verb) != *v);
+	dma_wmb();
+	*v = cmd_verb | p->mc.valid_bit;
+	qbman_cinh_write_complete(&p->sys, QBMAN_CENA_SWP_CR, cmd);
+	clean(cmd);
+#ifdef QBMAN_CHECKING
+	p->mc.check = swp_mc_can_poll;
+#endif
+}
+
 void *qbman_swp_mc_result(struct qbman_swp *p)
 {
 	uint32_t *ret, verb;
@@ -500,6 +571,27 @@  void *qbman_swp_mc_result(struct qbman_swp *p)
 	return ret;
 }
 
+void *qbman_swp_mc_result_cinh(struct qbman_swp *p)
+{
+	uint32_t *ret, verb;
+#ifdef QBMAN_CHECKING
+	QBMAN_BUG_ON(p->mc.check != swp_mc_can_poll);
+#endif
+	ret = qbman_cinh_read_shadow(&p->sys,
+			      QBMAN_CENA_SWP_RR(p->mc.valid_bit));
+	/* Remove the valid-bit -
+	 * command completed iff the rest is non-zero
+	 */
+	verb = ret[0] & ~QB_VALID_BIT;
+	if (!verb)
+		return NULL;
+	p->mc.valid_bit ^= QB_VALID_BIT;
+#ifdef QBMAN_CHECKING
+	p->mc.check = swp_mc_can_start;
+#endif
+	return ret;
+}
+
 /***********/
 /* Enqueue */
 /***********/
@@ -640,6 +732,16 @@  static inline void qbman_write_eqcr_am_rt_register(struct qbman_swp *p,
 				     QMAN_RT_MODE);
 }
 
+static void memcpy_byte_by_byte(void *to, const void *from, size_t n)
+{
+	const uint8_t *src = from;
+	volatile uint8_t *dest = to;
+	size_t i;
+
+	for (i = 0; i < n; i++)
+		dest[i] = src[i];
+}
+
 
 static int qbman_swp_enqueue_array_mode_direct(struct qbman_swp *s,
 					       const struct qbman_eq_desc *d,
@@ -754,7 +856,7 @@  static int qbman_swp_enqueue_ring_mode_cinh_direct(
 			return -EBUSY;
 	}
 
-	p = qbman_cena_write_start_wo_shadow(&s->sys,
+	p = qbman_cinh_write_start_wo_shadow(&s->sys,
 			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
 	memcpy(&p[1], &cl[1], 28);
 	memcpy(&p[8], fd, sizeof(*fd));
@@ -762,8 +864,44 @@  static int qbman_swp_enqueue_ring_mode_cinh_direct(
 
 	/* Set the verb byte, have to substitute in the valid-bit */
 	p[0] = cl[0] | s->eqcr.pi_vb;
-	qbman_cena_write_complete_wo_shadow(&s->sys,
+	s->eqcr.pi++;
+	s->eqcr.pi &= full_mask;
+	s->eqcr.available--;
+	if (!(s->eqcr.pi & half_mask))
+		s->eqcr.pi_vb ^= QB_VALID_BIT;
+
+	return 0;
+}
+
+static int qbman_swp_enqueue_ring_mode_cinh_direct(
+		struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd)
+{
+	uint32_t *p;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t eqcr_ci, full_mask, half_mask;
+
+	half_mask = (s->eqcr.pi_ci_mask>>1);
+	full_mask = s->eqcr.pi_ci_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+				eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return -EBUSY;
+	}
+
+	p = qbman_cinh_write_start_wo_shadow(&s->sys,
 			QBMAN_CENA_SWP_EQCR(s->eqcr.pi & half_mask));
+	memcpy_byte_by_byte(&p[1], &cl[1], 28);
+	memcpy_byte_by_byte(&p[8], fd, sizeof(*fd));
+	lwsync();
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	p[0] = cl[0] | s->eqcr.pi_vb;
 	s->eqcr.pi++;
 	s->eqcr.pi &= full_mask;
 	s->eqcr.available--;
@@ -815,7 +953,10 @@  static int qbman_swp_enqueue_ring_mode(struct qbman_swp *s,
 				       const struct qbman_eq_desc *d,
 				       const struct qbman_fd *fd)
 {
-	return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+	if (!s->stash_off)
+		return qbman_swp_enqueue_ring_mode_ptr(s, d, fd);
+	else
+		return qbman_swp_enqueue_ring_mode_cinh_direct(s, d, fd);
 }
 
 int qbman_swp_enqueue(struct qbman_swp *s, const struct qbman_eq_desc *d,
@@ -966,6 +1107,67 @@  static int qbman_swp_enqueue_multiple_cinh_direct(
 	return num_enqueued;
 }
 
+static int qbman_swp_enqueue_multiple_cinh_direct(
+		struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		uint32_t *flags,
+		int num_frames)
+{
+	uint32_t *p = NULL;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+	int i, num_enqueued = 0;
+
+	half_mask = (s->eqcr.pi_ci_mask>>1);
+	full_mask = s->eqcr.pi_ci_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+				eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return 0;
+	}
+
+	eqcr_pi = s->eqcr.pi;
+	num_enqueued = (s->eqcr.available < num_frames) ?
+			s->eqcr.available : num_frames;
+	s->eqcr.available -= num_enqueued;
+	/* Fill in the EQCR ring */
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cinh_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		memcpy_byte_by_byte(&p[1], &cl[1], 28);
+		memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd));
+		eqcr_pi++;
+	}
+
+	lwsync();
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	eqcr_pi = s->eqcr.pi;
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cinh_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		p[0] = cl[0] | s->eqcr.pi_vb;
+		if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+			struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+			d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+				((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+		}
+		eqcr_pi++;
+		if (!(eqcr_pi & half_mask))
+			s->eqcr.pi_vb ^= QB_VALID_BIT;
+	}
+
+	s->eqcr.pi = eqcr_pi & full_mask;
+
+	return num_enqueued;
+}
+
 static int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s,
 					       const struct qbman_eq_desc *d,
 					       const struct qbman_fd *fd,
@@ -1025,7 +1227,12 @@  inline int qbman_swp_enqueue_multiple(struct qbman_swp *s,
 				      uint32_t *flags,
 				      int num_frames)
 {
-	return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags, num_frames);
+	if (!s->stash_off)
+		return qbman_swp_enqueue_multiple_ptr(s, d, fd, flags,
+						num_frames);
+	else
+		return qbman_swp_enqueue_multiple_cinh_direct(s, d, fd, flags,
+						num_frames);
 }
 
 static int qbman_swp_enqueue_multiple_fd_direct(struct qbman_swp *s,
@@ -1167,6 +1374,67 @@  static int qbman_swp_enqueue_multiple_fd_cinh_direct(
 	return num_enqueued;
 }
 
+static int qbman_swp_enqueue_multiple_fd_cinh_direct(
+		struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		struct qbman_fd **fd,
+		uint32_t *flags,
+		int num_frames)
+{
+	uint32_t *p = NULL;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+	int i, num_enqueued = 0;
+
+	half_mask = (s->eqcr.pi_ci_mask>>1);
+	full_mask = s->eqcr.pi_ci_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+				eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return 0;
+	}
+
+	eqcr_pi = s->eqcr.pi;
+	num_enqueued = (s->eqcr.available < num_frames) ?
+			s->eqcr.available : num_frames;
+	s->eqcr.available -= num_enqueued;
+	/* Fill in the EQCR ring */
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cinh_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		memcpy_byte_by_byte(&p[1], &cl[1], 28);
+		memcpy_byte_by_byte(&p[8], fd[i], sizeof(struct qbman_fd));
+		eqcr_pi++;
+	}
+
+	lwsync();
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	eqcr_pi = s->eqcr.pi;
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cinh_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		p[0] = cl[0] | s->eqcr.pi_vb;
+		if (flags && (flags[i] & QBMAN_ENQUEUE_FLAG_DCA)) {
+			struct qbman_eq_desc *d = (struct qbman_eq_desc *)p;
+
+			d->eq.dca = (1 << QB_ENQUEUE_CMD_DCA_EN_SHIFT) |
+				((flags[i]) & QBMAN_EQCR_DCA_IDXMASK);
+		}
+		eqcr_pi++;
+		if (!(eqcr_pi & half_mask))
+			s->eqcr.pi_vb ^= QB_VALID_BIT;
+	}
+
+	s->eqcr.pi = eqcr_pi & full_mask;
+
+	return num_enqueued;
+}
+
 static int qbman_swp_enqueue_multiple_fd_mem_back(struct qbman_swp *s,
 						  const struct qbman_eq_desc *d,
 						  struct qbman_fd **fd,
@@ -1233,7 +1501,12 @@  inline int qbman_swp_enqueue_multiple_fd(struct qbman_swp *s,
 					 uint32_t *flags,
 					 int num_frames)
 {
-	return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags, num_frames);
+	if (!s->stash_off)
+		return qbman_swp_enqueue_multiple_fd_ptr(s, d, fd, flags,
+					num_frames);
+	else
+		return qbman_swp_enqueue_multiple_fd_cinh_direct(s, d, fd,
+					flags, num_frames);
 }
 
 static int qbman_swp_enqueue_multiple_desc_direct(struct qbman_swp *s,
@@ -1365,6 +1638,62 @@  static int qbman_swp_enqueue_multiple_desc_cinh_direct(
 	return num_enqueued;
 }
 
+static int qbman_swp_enqueue_multiple_desc_cinh_direct(
+		struct qbman_swp *s,
+		const struct qbman_eq_desc *d,
+		const struct qbman_fd *fd,
+		int num_frames)
+{
+	uint32_t *p;
+	const uint32_t *cl;
+	uint32_t eqcr_ci, eqcr_pi, half_mask, full_mask;
+	int i, num_enqueued = 0;
+
+	half_mask = (s->eqcr.pi_ci_mask>>1);
+	full_mask = s->eqcr.pi_ci_mask;
+	if (!s->eqcr.available) {
+		eqcr_ci = s->eqcr.ci;
+		s->eqcr.ci = qbman_cinh_read(&s->sys,
+				QBMAN_CINH_SWP_EQCR_CI) & full_mask;
+		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
+					eqcr_ci, s->eqcr.ci);
+		if (!s->eqcr.available)
+			return 0;
+	}
+
+	eqcr_pi = s->eqcr.pi;
+	num_enqueued = (s->eqcr.available < num_frames) ?
+			s->eqcr.available : num_frames;
+	s->eqcr.available -= num_enqueued;
+	/* Fill in the EQCR ring */
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cinh_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		cl = qb_cl(&d[i]);
+		memcpy_byte_by_byte(&p[1], &cl[1], 28);
+		memcpy_byte_by_byte(&p[8], &fd[i], sizeof(*fd));
+		eqcr_pi++;
+	}
+
+	lwsync();
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	eqcr_pi = s->eqcr.pi;
+	for (i = 0; i < num_enqueued; i++) {
+		p = qbman_cinh_write_start_wo_shadow(&s->sys,
+				QBMAN_CENA_SWP_EQCR(eqcr_pi & half_mask));
+		cl = qb_cl(&d[i]);
+		p[0] = cl[0] | s->eqcr.pi_vb;
+		eqcr_pi++;
+		if (!(eqcr_pi & half_mask))
+			s->eqcr.pi_vb ^= QB_VALID_BIT;
+	}
+
+	s->eqcr.pi = eqcr_pi & full_mask;
+
+	return num_enqueued;
+}
+
 static int qbman_swp_enqueue_multiple_desc_mem_back(struct qbman_swp *s,
 					const struct qbman_eq_desc *d,
 					const struct qbman_fd *fd,
@@ -1426,7 +1755,13 @@  inline int qbman_swp_enqueue_multiple_desc(struct qbman_swp *s,
 					   const struct qbman_fd *fd,
 					   int num_frames)
 {
-	return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd, num_frames);
+	if (!s->stash_off)
+		return qbman_swp_enqueue_multiple_desc_ptr(s, d, fd,
+					num_frames);
+	else
+		return qbman_swp_enqueue_multiple_desc_cinh_direct(s, d, fd,
+					num_frames);
+
 }
 
 /*************************/
@@ -1574,6 +1909,30 @@  static int qbman_swp_pull_direct(struct qbman_swp *s,
 	return 0;
 }
 
+static int qbman_swp_pull_cinh_direct(struct qbman_swp *s,
+				 struct qbman_pull_desc *d)
+{
+	uint32_t *p;
+	uint32_t *cl = qb_cl(d);
+
+	if (!atomic_dec_and_test(&s->vdq.busy)) {
+		atomic_inc(&s->vdq.busy);
+		return -EBUSY;
+	}
+
+	d->pull.tok = s->sys.idx + 1;
+	s->vdq.storage = (void *)(size_t)d->pull.rsp_addr_virt;
+	p = qbman_cinh_write_start_wo_shadow(&s->sys, QBMAN_CENA_SWP_VDQCR);
+	memcpy_byte_by_byte(&p[1], &cl[1], 12);
+
+	/* Set the verb byte, have to substitute in the valid-bit */
+	lwsync();
+	p[0] = cl[0] | s->vdq.valid_bit;
+	s->vdq.valid_bit ^= QB_VALID_BIT;
+
+	return 0;
+}
+
 static int qbman_swp_pull_mem_back(struct qbman_swp *s,
 				   struct qbman_pull_desc *d)
 {
@@ -1601,7 +1960,10 @@  static int qbman_swp_pull_mem_back(struct qbman_swp *s,
 
 inline int qbman_swp_pull(struct qbman_swp *s, struct qbman_pull_desc *d)
 {
-	return qbman_swp_pull_ptr(s, d);
+	if (!s->stash_off)
+		return qbman_swp_pull_ptr(s, d);
+	else
+		return qbman_swp_pull_cinh_direct(s, d);
 }
 
 /****************/
@@ -1638,7 +2000,10 @@  void qbman_swp_prefetch_dqrr_next(struct qbman_swp *s)
  */
 inline const struct qbman_result *qbman_swp_dqrr_next(struct qbman_swp *s)
 {
-	return qbman_swp_dqrr_next_ptr(s);
+	if (!s->stash_off)
+		return qbman_swp_dqrr_next_ptr(s);
+	else
+		return qbman_swp_dqrr_next_cinh_direct(s);
 }
 
 const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
@@ -1718,6 +2083,81 @@  const struct qbman_result *qbman_swp_dqrr_next_direct(struct qbman_swp *s)
 	return p;
 }
 
+const struct qbman_result *qbman_swp_dqrr_next_cinh_direct(struct qbman_swp *s)
+{
+	uint32_t verb;
+	uint32_t response_verb;
+	uint32_t flags;
+	const struct qbman_result *p;
+
+	/* Before using valid-bit to detect if something is there, we have to
+	 * handle the case of the DQRR reset bug...
+	 */
+	if (s->dqrr.reset_bug) {
+		/* We pick up new entries by cache-inhibited producer index,
+		 * which means that a non-coherent mapping would require us to
+		 * invalidate and read *only* once that PI has indicated that
+		 * there's an entry here. The first trip around the DQRR ring
+		 * will be much less efficient than all subsequent trips around
+		 * it...
+		 */
+		uint8_t pi = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_DQPI) &
+			     QMAN_DQRR_PI_MASK;
+
+		/* there are new entries if pi != next_idx */
+		if (pi == s->dqrr.next_idx)
+			return NULL;
+
+		/* if next_idx is/was the last ring index, and 'pi' is
+		 * different, we can disable the workaround as all the ring
+		 * entries have now been DMA'd to so valid-bit checking is
+		 * repaired. Note: this logic needs to be based on next_idx
+		 * (which increments one at a time), rather than on pi (which
+		 * can burst and wrap-around between our snapshots of it).
+		 */
+		QBMAN_BUG_ON((s->dqrr.dqrr_size - 1) < 0);
+		if (s->dqrr.next_idx == (s->dqrr.dqrr_size - 1u)) {
+			pr_debug("DEBUG: next_idx=%d, pi=%d, clear reset bug\n",
+				 s->dqrr.next_idx, pi);
+			s->dqrr.reset_bug = 0;
+		}
+	}
+	p = qbman_cinh_read_wo_shadow(&s->sys,
+			QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
+
+	verb = p->dq.verb;
+
+	/* If the valid-bit isn't of the expected polarity, nothing there. Note,
+	 * in the DQRR reset bug workaround, we shouldn't need to skip these
+	 * check, because we've already determined that a new entry is available
+	 * and we've invalidated the cacheline before reading it, so the
+	 * valid-bit behaviour is repaired and should tell us what we already
+	 * knew from reading PI.
+	 */
+	if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit)
+		return NULL;
+
+	/* There's something there. Move "next_idx" attention to the next ring
+	 * entry (and prefetch it) before returning what we found.
+	 */
+	s->dqrr.next_idx++;
+	if (s->dqrr.next_idx == s->dqrr.dqrr_size) {
+		s->dqrr.next_idx = 0;
+		s->dqrr.valid_bit ^= QB_VALID_BIT;
+	}
+	/* If this is the final response to a volatile dequeue command
+	 * indicate that the vdq is no longer busy
+	 */
+	flags = p->dq.stat;
+	response_verb = verb & QBMAN_RESPONSE_VERB_MASK;
+	if ((response_verb == QBMAN_RESULT_DQ) &&
+	    (flags & QBMAN_DQ_STAT_VOLATILE) &&
+	    (flags & QBMAN_DQ_STAT_EXPIRED))
+		atomic_inc(&s->vdq.busy);
+
+	return p;
+}
+
 const struct qbman_result *qbman_swp_dqrr_next_mem_back(struct qbman_swp *s)
 {
 	uint32_t verb;
@@ -2096,6 +2536,37 @@  static int qbman_swp_release_direct(struct qbman_swp *s,
 	return 0;
 }
 
+static int qbman_swp_release_cinh_direct(struct qbman_swp *s,
+				    const struct qbman_release_desc *d,
+				    const uint64_t *buffers,
+				    unsigned int num_buffers)
+{
+	uint32_t *p;
+	const uint32_t *cl = qb_cl(d);
+	uint32_t rar = qbman_cinh_read(&s->sys, QBMAN_CINH_SWP_RAR);
+
+	pr_debug("RAR=%08x\n", rar);
+	if (!RAR_SUCCESS(rar))
+		return -EBUSY;
+
+	QBMAN_BUG_ON(!num_buffers || (num_buffers > 7));
+
+	/* Start the release command */
+	p = qbman_cinh_write_start_wo_shadow(&s->sys,
+				     QBMAN_CENA_SWP_RCR(RAR_IDX(rar)));
+
+	/* Copy the caller's buffer pointers to the command */
+	memcpy_byte_by_byte(&p[2], buffers, num_buffers * sizeof(uint64_t));
+
+	/* Set the verb byte, have to substitute in the valid-bit and the
+	 * number of buffers.
+	 */
+	lwsync();
+	p[0] = cl[0] | RAR_VB(rar) | num_buffers;
+
+	return 0;
+}
+
 static int qbman_swp_release_mem_back(struct qbman_swp *s,
 				      const struct qbman_release_desc *d,
 				      const uint64_t *buffers,
@@ -2134,7 +2605,11 @@  inline int qbman_swp_release(struct qbman_swp *s,
 			     const uint64_t *buffers,
 			     unsigned int num_buffers)
 {
-	return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+	if (!s->stash_off)
+		return qbman_swp_release_ptr(s, d, buffers, num_buffers);
+	else
+		return qbman_swp_release_cinh_direct(s, d, buffers,
+						num_buffers);
 }
 
 /*******************/
@@ -2157,8 +2632,8 @@  struct qbman_acquire_rslt {
 	uint64_t buf[7];
 };
 
-int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
-		      unsigned int num_buffers)
+static int qbman_swp_acquire_direct(struct qbman_swp *s, uint16_t bpid,
+				uint64_t *buffers, unsigned int num_buffers)
 {
 	struct qbman_acquire_desc *p;
 	struct qbman_acquire_rslt *r;
@@ -2202,6 +2677,61 @@  int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
 	return (int)r->num;
 }
 
+static int qbman_swp_acquire_cinh_direct(struct qbman_swp *s, uint16_t bpid,
+			uint64_t *buffers, unsigned int num_buffers)
+{
+	struct qbman_acquire_desc *p;
+	struct qbman_acquire_rslt *r;
+
+	if (!num_buffers || (num_buffers > 7))
+		return -EINVAL;
+
+	/* Start the management command */
+	p = qbman_swp_mc_start(s);
+
+	if (!p)
+		return -EBUSY;
+
+	/* Encode the caller-provided attributes */
+	p->bpid = bpid;
+	p->num = num_buffers;
+
+	/* Complete the management command */
+	r = qbman_swp_mc_complete_cinh(s, p, QBMAN_MC_ACQUIRE);
+	if (!r) {
+		pr_err("qbman: acquire from BPID %d failed, no response\n",
+		       bpid);
+		return -EIO;
+	}
+
+	/* Decode the outcome */
+	QBMAN_BUG_ON((r->verb & QBMAN_RESPONSE_VERB_MASK) != QBMAN_MC_ACQUIRE);
+
+	/* Determine success or failure */
+	if (r->rslt != QBMAN_MC_RSLT_OK) {
+		pr_err("Acquire buffers from BPID 0x%x failed, code=0x%02x\n",
+		       bpid, r->rslt);
+		return -EIO;
+	}
+
+	QBMAN_BUG_ON(r->num > num_buffers);
+
+	/* Copy the acquired buffers to the caller's array */
+	u64_from_le32_copy(buffers, &r->buf[0], r->num);
+
+	return (int)r->num;
+}
+
+int qbman_swp_acquire(struct qbman_swp *s, uint16_t bpid, uint64_t *buffers,
+		      unsigned int num_buffers)
+{
+	if (!s->stash_off)
+		return qbman_swp_acquire_direct(s, bpid, buffers, num_buffers);
+	else
+		return qbman_swp_acquire_cinh_direct(s, bpid, buffers,
+					num_buffers);
+}
+
 /*****************/
 /* FQ management */
 /*****************/
diff --git a/drivers/bus/fslmc/qbman/qbman_portal.h b/drivers/bus/fslmc/qbman/qbman_portal.h
index 3aaacae52..1cf791830 100644
--- a/drivers/bus/fslmc/qbman/qbman_portal.h
+++ b/drivers/bus/fslmc/qbman/qbman_portal.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2018-2019 NXP
+ * Copyright 2018-2020 NXP
  *
  */
 
@@ -102,6 +102,7 @@  struct qbman_swp {
 		uint32_t ci;
 		int available;
 	} eqcr;
+	uint8_t stash_off;
 };
 
 /* -------------------------- */
@@ -118,7 +119,9 @@  struct qbman_swp {
  */
 void *qbman_swp_mc_start(struct qbman_swp *p);
 void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, uint8_t cmd_verb);
+void qbman_swp_mc_submit_cinh(struct qbman_swp *p, void *cmd, uint8_t cmd_verb);
 void *qbman_swp_mc_result(struct qbman_swp *p);
+void *qbman_swp_mc_result_cinh(struct qbman_swp *p);
 
 /* Wraps up submit + poll-for-result */
 static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
@@ -135,6 +138,20 @@  static inline void *qbman_swp_mc_complete(struct qbman_swp *swp, void *cmd,
 	return cmd;
 }
 
+static inline void *qbman_swp_mc_complete_cinh(struct qbman_swp *swp, void *cmd,
+					  uint8_t cmd_verb)
+{
+	int loopvar = 1000;
+
+	qbman_swp_mc_submit_cinh(swp, cmd, cmd_verb);
+	do {
+		cmd = qbman_swp_mc_result_cinh(swp);
+	} while (!cmd && loopvar--);
+	QBMAN_BUG_ON(!loopvar);
+
+	return cmd;
+}
+
 /* ---------------------- */
 /* Descriptors/cachelines */
 /* ---------------------- */
diff --git a/drivers/bus/fslmc/qbman/qbman_sys.h b/drivers/bus/fslmc/qbman/qbman_sys.h
index 55449edf3..61f817c47 100644
--- a/drivers/bus/fslmc/qbman/qbman_sys.h
+++ b/drivers/bus/fslmc/qbman/qbman_sys.h
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 2014-2016 Freescale Semiconductor, Inc.
- * Copyright 2019 NXP
+ * Copyright 2019-2020 NXP
  */
 /* qbman_sys_decl.h and qbman_sys.h are the two platform-specific files in the
  * driver. They are only included via qbman_private.h, which is itself a
@@ -190,6 +190,34 @@  static inline void qbman_cinh_write(struct qbman_swp_sys *s, uint32_t offset,
 #endif
 }
 
+static inline void *qbman_cinh_write_start_wo_shadow(struct qbman_swp_sys *s,
+						     uint32_t offset)
+{
+#ifdef QBMAN_CINH_TRACE
+	pr_info("qbman_cinh_write_start(%p:%d:0x%03x)\n",
+		s->addr_cinh, s->idx, offset);
+#endif
+	QBMAN_BUG_ON(offset & 63);
+	return (s->addr_cinh + offset);
+}
+
+static inline void qbman_cinh_write_complete(struct qbman_swp_sys *s,
+					     uint32_t offset, void *cmd)
+{
+	const uint32_t *shadow = cmd;
+	int loop;
+#ifdef QBMAN_CINH_TRACE
+	pr_info("qbman_cinh_write_complete(%p:%d:0x%03x) %p\n",
+		s->addr_cinh, s->idx, offset, shadow);
+	hexdump(cmd, 64);
+#endif
+	for (loop = 15; loop >= 1; loop--)
+		__raw_writel(shadow[loop], s->addr_cinh +
+					 offset + loop * 4);
+	lwsync();
+	__raw_writel(shadow[0], s->addr_cinh + offset);
+}
+
 static inline uint32_t qbman_cinh_read(struct qbman_swp_sys *s, uint32_t offset)
 {
 	uint32_t reg = __raw_readl(s->addr_cinh + offset);
@@ -200,6 +228,35 @@  static inline uint32_t qbman_cinh_read(struct qbman_swp_sys *s, uint32_t offset)
 	return reg;
 }
 
+static inline void *qbman_cinh_read_shadow(struct qbman_swp_sys *s,
+					   uint32_t offset)
+{
+	uint32_t *shadow = (uint32_t *)(s->cena + offset);
+	unsigned int loop;
+#ifdef QBMAN_CINH_TRACE
+	pr_info(" %s (%p:%d:0x%03x) %p\n", __func__,
+		s->addr_cinh, s->idx, offset, shadow);
+#endif
+
+	for (loop = 0; loop < 16; loop++)
+		shadow[loop] = __raw_readl(s->addr_cinh + offset
+					+ loop * 4);
+#ifdef QBMAN_CINH_TRACE
+	hexdump(shadow, 64);
+#endif
+	return shadow;
+}
+
+static inline void *qbman_cinh_read_wo_shadow(struct qbman_swp_sys *s,
+					      uint32_t offset)
+{
+#ifdef QBMAN_CINH_TRACE
+	pr_info("qbman_cinh_read(%p:%d:0x%03x)\n",
+		s->addr_cinh, s->idx, offset);
+#endif
+	return s->addr_cinh + offset;
+}
+
 static inline void *qbman_cena_write_start(struct qbman_swp_sys *s,
 					   uint32_t offset)
 {
@@ -476,6 +533,82 @@  static inline int qbman_swp_sys_init(struct qbman_swp_sys *s,
 	return 0;
 }
 
+static inline int qbman_swp_sys_update(struct qbman_swp_sys *s,
+				     const struct qbman_swp_desc *d,
+				     uint8_t dqrr_size,
+				     int stash_off)
+{
+	uint32_t reg;
+	int i;
+	int cena_region_size = 4*1024;
+	uint8_t est = 1;
+#ifdef RTE_ARCH_64
+	uint8_t wn = CENA_WRITE_ENABLE;
+#else
+	uint8_t wn = CINH_WRITE_ENABLE;
+#endif
+
+	if (stash_off)
+		wn = CINH_WRITE_ENABLE;
+
+	QBMAN_BUG_ON(d->idx < 0);
+#ifdef QBMAN_CHECKING
+	/* We should never be asked to initialise for a portal that isn't in
+	 * the power-on state. (Ie. don't forget to reset portals when they are
+	 * decommissioned!)
+	 */
+	reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
+	QBMAN_BUG_ON(reg);
+#endif
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+			&& (d->cena_access_mode == qman_cena_fastest_access))
+		memset(s->addr_cena, 0, cena_region_size);
+	else {
+		/* Invalidate the portal memory.
+		 * This ensures no stale cache lines
+		 */
+		for (i = 0; i < cena_region_size; i += 64)
+			dccivac(s->addr_cena + i);
+	}
+
+	if (dpaa2_svr_family == SVR_LS1080A)
+		est = 0;
+
+	if (s->eqcr_mode == qman_eqcr_vb_array) {
+		reg = qbman_set_swp_cfg(dqrr_size, wn,
+					0, 3, 2, 3, 1, 1, 1, 1, 1, 1);
+	} else {
+		if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000 &&
+			    (d->cena_access_mode == qman_cena_fastest_access))
+			reg = qbman_set_swp_cfg(dqrr_size, wn,
+						1, 3, 2, 0, 1, 1, 1, 1, 1, 1);
+		else
+			reg = qbman_set_swp_cfg(dqrr_size, wn,
+						est, 3, 2, 2, 1, 1, 1, 1, 1, 1);
+	}
+
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+			&& (d->cena_access_mode == qman_cena_fastest_access))
+		reg |= 1 << SWP_CFG_CPBS_SHIFT | /* memory-backed mode */
+		       1 << SWP_CFG_VPM_SHIFT |  /* VDQCR read triggered mode */
+		       1 << SWP_CFG_CPM_SHIFT;   /* CR read triggered mode */
+
+	qbman_cinh_write(s, QBMAN_CINH_SWP_CFG, reg);
+	reg = qbman_cinh_read(s, QBMAN_CINH_SWP_CFG);
+	if (!reg) {
+		pr_err("The portal %d is not enabled!\n", s->idx);
+		return -1;
+	}
+
+	if ((d->qman_version & QMAN_REV_MASK) >= QMAN_REV_5000
+			&& (d->cena_access_mode == qman_cena_fastest_access)) {
+		qbman_cinh_write(s, QBMAN_CINH_SWP_EQCR_PI, QMAN_RT_MODE);
+		qbman_cinh_write(s, QBMAN_CINH_SWP_RCR_PI, QMAN_RT_MODE);
+	}
+
+	return 0;
+}
+
 static inline void qbman_swp_sys_finish(struct qbman_swp_sys *s)
 {
 	free(s->cena);