diff mbox series

[v2,09/29] net/dpaa: enable Tx queue taildrop

Message ID 20200707092244.12791-10-hemant.agrawal@nxp.com
State New
Headers show
Series NXP DPAAx enhancements | expand

Commit Message

Hemant Agrawal July 7, 2020, 9:22 a.m. UTC
From: Gagandeep Singh <g.singh@nxp.com>


Enable congestion handling/tail drop for TX queues.

Signed-off-by: Gagandeep Singh <g.singh@nxp.com>

---
 drivers/bus/dpaa/base/qbman/qman.c        |  43 +++++++++
 drivers/bus/dpaa/include/fsl_qman.h       |  17 ++++
 drivers/bus/dpaa/rte_bus_dpaa_version.map |   2 +
 drivers/net/dpaa/dpaa_ethdev.c            | 111 ++++++++++++++++++++--
 drivers/net/dpaa/dpaa_ethdev.h            |   1 +
 drivers/net/dpaa/dpaa_rxtx.c              |  71 ++++++++++++++
 drivers/net/dpaa/dpaa_rxtx.h              |   3 +
 7 files changed, 242 insertions(+), 6 deletions(-)

-- 
2.17.1
diff mbox series

Patch

diff --git a/drivers/bus/dpaa/base/qbman/qman.c b/drivers/bus/dpaa/base/qbman/qman.c
index b596e79c2..447c09177 100644
--- a/drivers/bus/dpaa/base/qbman/qman.c
+++ b/drivers/bus/dpaa/base/qbman/qman.c
@@ -40,6 +40,8 @@ 
 			spin_unlock(&__fq478->fqlock); \
 	} while (0)
 
+static qman_cb_free_mbuf qman_free_mbuf_cb;
+
 static inline void fq_set(struct qman_fq *fq, u32 mask)
 {
 	dpaa_set_bits(mask, &fq->flags);
@@ -790,6 +792,47 @@  static inline void fq_state_change(struct qman_portal *p, struct qman_fq *fq,
 	FQUNLOCK(fq);
 }
 
+void
+qman_ern_register_cb(qman_cb_free_mbuf cb)
+{
+	qman_free_mbuf_cb = cb;
+}
+
+
+void
+qman_ern_poll_free(void)
+{
+	struct qman_portal *p = get_affine_portal();
+	u8 verb, num = 0;
+	const struct qm_mr_entry *msg;
+	const struct qm_fd *fd;
+	struct qm_mr_entry swapped_msg;
+
+	qm_mr_pvb_update(&p->p);
+	msg = qm_mr_current(&p->p);
+
+	while (msg != NULL) {
+		swapped_msg = *msg;
+		hw_fd_to_cpu(&swapped_msg.ern.fd);
+		verb = msg->ern.verb & QM_MR_VERB_TYPE_MASK;
+		fd = &swapped_msg.ern.fd;
+
+		if (unlikely(verb & 0x20)) {
+			printf("HW ERN notification, Nothing to do\n");
+		} else {
+			if ((fd->bpid & 0xff) != 0xff)
+				qman_free_mbuf_cb(fd);
+		}
+
+		num++;
+		qm_mr_next(&p->p);
+		qm_mr_pvb_update(&p->p);
+		msg = qm_mr_current(&p->p);
+	}
+
+	qm_mr_cci_consume(&p->p, num);
+}
+
 static u32 __poll_portal_slow(struct qman_portal *p, u32 is)
 {
 	const struct qm_mr_entry *msg;
diff --git a/drivers/bus/dpaa/include/fsl_qman.h b/drivers/bus/dpaa/include/fsl_qman.h
index 78b698f39..0d9cfc339 100644
--- a/drivers/bus/dpaa/include/fsl_qman.h
+++ b/drivers/bus/dpaa/include/fsl_qman.h
@@ -1158,6 +1158,10 @@  typedef void (*qman_cb_mr)(struct qman_portal *qm, struct qman_fq *fq,
 /* This callback type is used when handling DCP ERNs */
 typedef void (*qman_cb_dc_ern)(struct qman_portal *qm,
 				const struct qm_mr_entry *msg);
+
+/* This callback function will be used to free mbufs of ERN */
+typedef uint16_t (*qman_cb_free_mbuf)(const struct qm_fd *fd);
+
 /*
  * s/w-visible states. Ie. tentatively scheduled + truly scheduled + active +
  * held-active + held-suspended are just "sched". Things like "retired" will not
@@ -1808,6 +1812,19 @@  __rte_internal
 int qman_enqueue_multi(struct qman_fq *fq, const struct qm_fd *fd, u32 *flags,
 		       int frames_to_send);
 
+/**
+ * qman_ern_poll_free - Polling on MR and calling a callback function to free
+ * mbufs when SW ERNs received.
+ */
+__rte_internal
+void qman_ern_poll_free(void);
+
+/**
+ * qman_ern_register_cb - Register a callback function to free buffers.
+ */
+__rte_internal
+void qman_ern_register_cb(qman_cb_free_mbuf cb);
+
 /**
  * qman_enqueue_multi_fq - Enqueue multiple frames to their respective frame
  * queues.
diff --git a/drivers/bus/dpaa/rte_bus_dpaa_version.map b/drivers/bus/dpaa/rte_bus_dpaa_version.map
index 46d42f7d6..8069b05af 100644
--- a/drivers/bus/dpaa/rte_bus_dpaa_version.map
+++ b/drivers/bus/dpaa/rte_bus_dpaa_version.map
@@ -61,6 +61,8 @@  INTERNAL {
 	qman_enqueue;
 	qman_enqueue_multi;
 	qman_enqueue_multi_fq;
+	qman_ern_poll_free;
+	qman_ern_register_cb;
 	qman_fq_fqid;
 	qman_fq_portal_irqsource_add;
 	qman_fq_portal_irqsource_remove;
diff --git a/drivers/net/dpaa/dpaa_ethdev.c b/drivers/net/dpaa/dpaa_ethdev.c
index f1c9a7151..fd2c0c681 100644
--- a/drivers/net/dpaa/dpaa_ethdev.c
+++ b/drivers/net/dpaa/dpaa_ethdev.c
@@ -1,7 +1,7 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  *
  *   Copyright 2016 Freescale Semiconductor, Inc. All rights reserved.
- *   Copyright 2017-2019 NXP
+ *   Copyright 2017-2020 NXP
  *
  */
 /* System headers */
@@ -86,9 +86,12 @@  static int dpaa_push_mode_max_queue = DPAA_DEFAULT_PUSH_MODE_QUEUE;
 static int dpaa_push_queue_idx; /* Queue index which are in push mode*/
 
 
-/* Per FQ Taildrop in frame count */
+/* Per RX FQ Taildrop in frame count */
 static unsigned int td_threshold = CGR_RX_PERFQ_THRESH;
 
+/* Per TX FQ Taildrop in frame count, disabled by default */
+static unsigned int td_tx_threshold;
+
 struct rte_dpaa_xstats_name_off {
 	char name[RTE_ETH_XSTATS_NAME_SIZE];
 	uint32_t offset;
@@ -275,7 +278,11 @@  static int dpaa_eth_dev_start(struct rte_eth_dev *dev)
 	PMD_INIT_FUNC_TRACE();
 
 	/* Change tx callback to the real one */
-	dev->tx_pkt_burst = dpaa_eth_queue_tx;
+	if (dpaa_intf->cgr_tx)
+		dev->tx_pkt_burst = dpaa_eth_queue_tx_slow;
+	else
+		dev->tx_pkt_burst = dpaa_eth_queue_tx;
+
 	fman_if_enable_rx(dpaa_intf->fif);
 
 	return 0;
@@ -867,6 +874,7 @@  int dpaa_eth_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx,
 	DPAA_PMD_INFO("Tx queue setup for queue index: %d fq_id (0x%x)",
 			queue_idx, dpaa_intf->tx_queues[queue_idx].fqid);
 	dev->data->tx_queues[queue_idx] = &dpaa_intf->tx_queues[queue_idx];
+
 	return 0;
 }
 
@@ -1236,9 +1244,19 @@  static int dpaa_rx_queue_init(struct qman_fq *fq, struct qman_cgr *cgr_rx,
 
 /* Initialise a Tx FQ */
 static int dpaa_tx_queue_init(struct qman_fq *fq,
-			      struct fman_if *fman_intf)
+			      struct fman_if *fman_intf,
+			      struct qman_cgr *cgr_tx)
 {
 	struct qm_mcc_initfq opts = {0};
+	struct qm_mcc_initcgr cgr_opts = {
+		.we_mask = QM_CGR_WE_CS_THRES |
+				QM_CGR_WE_CSTD_EN |
+				QM_CGR_WE_MODE,
+		.cgr = {
+			.cstd_en = QM_CGR_EN,
+			.mode = QMAN_CGR_MODE_FRAME
+		}
+	};
 	int ret;
 
 	ret = qman_create_fq(0, QMAN_FQ_FLAG_DYNAMIC_FQID |
@@ -1257,6 +1275,27 @@  static int dpaa_tx_queue_init(struct qman_fq *fq,
 	opts.fqd.context_a.hi = 0x80000000 | fman_dealloc_bufs_mask_hi;
 	opts.fqd.context_a.lo = 0 | fman_dealloc_bufs_mask_lo;
 	DPAA_PMD_DEBUG("init tx fq %p, fqid 0x%x", fq, fq->fqid);
+
+	if (cgr_tx) {
+		/* Enable tail drop with cgr on this queue */
+		qm_cgr_cs_thres_set64(&cgr_opts.cgr.cs_thres,
+				      td_tx_threshold, 0);
+		cgr_tx->cb = NULL;
+		ret = qman_create_cgr(cgr_tx, QMAN_CGR_FLAG_USE_INIT,
+				      &cgr_opts);
+		if (ret) {
+			DPAA_PMD_WARN(
+				"rx taildrop init fail on rx fqid 0x%x(ret=%d)",
+				fq->fqid, ret);
+			goto without_cgr;
+		}
+		opts.we_mask |= QM_INITFQ_WE_CGID;
+		opts.fqd.cgid = cgr_tx->cgrid;
+		opts.fqd.fq_ctrl |= QM_FQCTRL_CGE;
+		DPAA_PMD_DEBUG("Tx FQ tail drop enabled, threshold = %d\n",
+				td_tx_threshold);
+	}
+without_cgr:
 	ret = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &opts);
 	if (ret)
 		DPAA_PMD_ERR("init tx fqid 0x%x failed %d", fq->fqid, ret);
@@ -1309,6 +1348,7 @@  dpaa_dev_init(struct rte_eth_dev *eth_dev)
 	struct fman_if *fman_intf;
 	struct fman_if_bpool *bp, *tmp_bp;
 	uint32_t cgrid[DPAA_MAX_NUM_PCD_QUEUES];
+	uint32_t cgrid_tx[MAX_DPAA_CORES];
 	char eth_buf[RTE_ETHER_ADDR_FMT_SIZE];
 
 	PMD_INIT_FUNC_TRACE();
@@ -1319,7 +1359,10 @@  dpaa_dev_init(struct rte_eth_dev *eth_dev)
 		eth_dev->dev_ops = &dpaa_devops;
 		/* Plugging of UCODE burst API not supported in Secondary */
 		eth_dev->rx_pkt_burst = dpaa_eth_queue_rx;
-		eth_dev->tx_pkt_burst = dpaa_eth_queue_tx;
+		if (dpaa_intf->cgr_tx)
+			eth_dev->tx_pkt_burst = dpaa_eth_queue_tx_slow;
+		else
+			eth_dev->tx_pkt_burst = dpaa_eth_queue_tx;
 #ifdef CONFIG_FSL_QMAN_FQ_LOOKUP
 		qman_set_fq_lookup_table(
 				dpaa_intf->rx_queues->qman_fq_lookup_table);
@@ -1366,6 +1409,21 @@  dpaa_dev_init(struct rte_eth_dev *eth_dev)
 		return -ENOMEM;
 	}
 
+	memset(cgrid, 0, sizeof(cgrid));
+	memset(cgrid_tx, 0, sizeof(cgrid_tx));
+
+	/* if DPAA_TX_TAILDROP_THRESHOLD is set, use that value; if 0, it means
+	 * Tx tail drop is disabled.
+	 */
+	if (getenv("DPAA_TX_TAILDROP_THRESHOLD")) {
+		td_tx_threshold = atoi(getenv("DPAA_TX_TAILDROP_THRESHOLD"));
+		DPAA_PMD_DEBUG("Tail drop threshold env configured: %u",
+			       td_tx_threshold);
+		/* if a very large value is being configured */
+		if (td_tx_threshold > UINT16_MAX)
+			td_tx_threshold = CGR_RX_PERFQ_THRESH;
+	}
+
 	/* If congestion control is enabled globally*/
 	if (td_threshold) {
 		dpaa_intf->cgr_rx = rte_zmalloc(NULL,
@@ -1414,9 +1472,36 @@  dpaa_dev_init(struct rte_eth_dev *eth_dev)
 		goto free_rx;
 	}
 
+	/* If congestion control is enabled globally*/
+	if (td_tx_threshold) {
+		dpaa_intf->cgr_tx = rte_zmalloc(NULL,
+			sizeof(struct qman_cgr) * MAX_DPAA_CORES,
+			MAX_CACHELINE);
+		if (!dpaa_intf->cgr_tx) {
+			DPAA_PMD_ERR("Failed to alloc mem for cgr_tx\n");
+			ret = -ENOMEM;
+			goto free_rx;
+		}
+
+		ret = qman_alloc_cgrid_range(&cgrid_tx[0], MAX_DPAA_CORES,
+					     1, 0);
+		if (ret != MAX_DPAA_CORES) {
+			DPAA_PMD_WARN("insufficient CGRIDs available");
+			ret = -EINVAL;
+			goto free_rx;
+		}
+	} else {
+		dpaa_intf->cgr_tx = NULL;
+	}
+
+
 	for (loop = 0; loop < MAX_DPAA_CORES; loop++) {
+		if (dpaa_intf->cgr_tx)
+			dpaa_intf->cgr_tx[loop].cgrid = cgrid_tx[loop];
+
 		ret = dpaa_tx_queue_init(&dpaa_intf->tx_queues[loop],
-					 fman_intf);
+			fman_intf,
+			dpaa_intf->cgr_tx ? &dpaa_intf->cgr_tx[loop] : NULL);
 		if (ret)
 			goto free_tx;
 		dpaa_intf->tx_queues[loop].dpaa_intf = dpaa_intf;
@@ -1487,6 +1572,7 @@  dpaa_dev_init(struct rte_eth_dev *eth_dev)
 
 free_rx:
 	rte_free(dpaa_intf->cgr_rx);
+	rte_free(dpaa_intf->cgr_tx);
 	rte_free(dpaa_intf->rx_queues);
 	dpaa_intf->rx_queues = NULL;
 	dpaa_intf->nb_rx_queues = 0;
@@ -1527,6 +1613,17 @@  dpaa_dev_uninit(struct rte_eth_dev *dev)
 	rte_free(dpaa_intf->cgr_rx);
 	dpaa_intf->cgr_rx = NULL;
 
+	/* Release TX congestion Groups */
+	if (dpaa_intf->cgr_tx) {
+		for (loop = 0; loop < MAX_DPAA_CORES; loop++)
+			qman_delete_cgr(&dpaa_intf->cgr_tx[loop]);
+
+		qman_release_cgrid_range(dpaa_intf->cgr_tx[loop].cgrid,
+					 MAX_DPAA_CORES);
+		rte_free(dpaa_intf->cgr_tx);
+		dpaa_intf->cgr_tx = NULL;
+	}
+
 	rte_free(dpaa_intf->rx_queues);
 	dpaa_intf->rx_queues = NULL;
 
@@ -1631,6 +1728,8 @@  rte_dpaa_probe(struct rte_dpaa_driver *dpaa_drv __rte_unused,
 	eth_dev->device = &dpaa_dev->device;
 	dpaa_dev->eth_dev = eth_dev;
 
+	qman_ern_register_cb(dpaa_free_mbuf);
+
 	/* Invoke PMD device initialization function */
 	diag = dpaa_dev_init(eth_dev);
 	if (diag == 0) {
diff --git a/drivers/net/dpaa/dpaa_ethdev.h b/drivers/net/dpaa/dpaa_ethdev.h
index 6a6477ac8..d4261f885 100644
--- a/drivers/net/dpaa/dpaa_ethdev.h
+++ b/drivers/net/dpaa/dpaa_ethdev.h
@@ -111,6 +111,7 @@  struct dpaa_if {
 	struct qman_fq *rx_queues;
 	struct qman_cgr *cgr_rx;
 	struct qman_fq *tx_queues;
+	struct qman_cgr *cgr_tx;
 	struct qman_fq debug_queues[2];
 	uint16_t nb_rx_queues;
 	uint16_t nb_tx_queues;
diff --git a/drivers/net/dpaa/dpaa_rxtx.c b/drivers/net/dpaa/dpaa_rxtx.c
index 3aeecb7d2..819cad7c6 100644
--- a/drivers/net/dpaa/dpaa_rxtx.c
+++ b/drivers/net/dpaa/dpaa_rxtx.c
@@ -398,6 +398,69 @@  dpaa_eth_fd_to_mbuf(const struct qm_fd *fd, uint32_t ifid)
 	return mbuf;
 }
 
+uint16_t
+dpaa_free_mbuf(const struct qm_fd *fd)
+{
+	struct rte_mbuf *mbuf;
+	struct dpaa_bp_info *bp_info;
+	uint8_t format;
+	void *ptr;
+
+	bp_info = DPAA_BPID_TO_POOL_INFO(fd->bpid);
+	format = (fd->opaque & DPAA_FD_FORMAT_MASK) >> DPAA_FD_FORMAT_SHIFT;
+	if (unlikely(format == qm_fd_sg)) {
+		struct rte_mbuf *first_seg, *prev_seg, *cur_seg, *temp;
+		struct qm_sg_entry *sgt, *sg_temp;
+		void *vaddr, *sg_vaddr;
+		int i = 0;
+		uint16_t fd_offset = fd->offset;
+
+		vaddr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
+		if (!vaddr) {
+			DPAA_PMD_ERR("unable to convert physical address");
+			return -1;
+		}
+		sgt = vaddr + fd_offset;
+		sg_temp = &sgt[i++];
+		hw_sg_to_cpu(sg_temp);
+		temp = (struct rte_mbuf *)
+			((char *)vaddr - bp_info->meta_data_size);
+		sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info,
+						qm_sg_entry_get64(sg_temp));
+
+		first_seg = (struct rte_mbuf *)((char *)sg_vaddr -
+						bp_info->meta_data_size);
+		first_seg->nb_segs = 1;
+		prev_seg = first_seg;
+		while (i < DPAA_SGT_MAX_ENTRIES) {
+			sg_temp = &sgt[i++];
+			hw_sg_to_cpu(sg_temp);
+			sg_vaddr = DPAA_MEMPOOL_PTOV(bp_info,
+						qm_sg_entry_get64(sg_temp));
+			cur_seg = (struct rte_mbuf *)((char *)sg_vaddr -
+						      bp_info->meta_data_size);
+			first_seg->nb_segs += 1;
+			prev_seg->next = cur_seg;
+			if (sg_temp->final) {
+				cur_seg->next = NULL;
+				break;
+			}
+			prev_seg = cur_seg;
+		}
+
+		rte_pktmbuf_free_seg(temp);
+		rte_pktmbuf_free_seg(first_seg);
+		return 0;
+	}
+
+	ptr = DPAA_MEMPOOL_PTOV(bp_info, qm_fd_addr(fd));
+	mbuf = (struct rte_mbuf *)((char *)ptr - bp_info->meta_data_size);
+
+	rte_pktmbuf_free(mbuf);
+
+	return 0;
+}
+
 /* Specific for LS1043 */
 void
 dpaa_rx_cb_no_prefetch(struct qman_fq **fq, struct qm_dqrr_entry **dqrr,
@@ -1011,6 +1074,14 @@  dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
 	return sent;
 }
 
+uint16_t
+dpaa_eth_queue_tx_slow(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
+{
+	qman_ern_poll_free();
+
+	return dpaa_eth_queue_tx(q, bufs, nb_bufs);
+}
+
 uint16_t dpaa_eth_tx_drop_all(void *q  __rte_unused,
 			      struct rte_mbuf **bufs __rte_unused,
 		uint16_t nb_bufs __rte_unused)
diff --git a/drivers/net/dpaa/dpaa_rxtx.h b/drivers/net/dpaa/dpaa_rxtx.h
index 4f896fba1..fe8eb6dc7 100644
--- a/drivers/net/dpaa/dpaa_rxtx.h
+++ b/drivers/net/dpaa/dpaa_rxtx.h
@@ -254,6 +254,8 @@  struct annotations_t {
 
 uint16_t dpaa_eth_queue_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs);
 
+uint16_t dpaa_eth_queue_tx_slow(void *q, struct rte_mbuf **bufs,
+				uint16_t nb_bufs);
 uint16_t dpaa_eth_queue_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs);
 
 uint16_t dpaa_eth_tx_drop_all(void *q  __rte_unused,
@@ -266,6 +268,7 @@  int dpaa_eth_mbuf_to_sg_fd(struct rte_mbuf *mbuf,
 			   struct qm_fd *fd,
 			   uint32_t bpid);
 
+uint16_t dpaa_free_mbuf(const struct qm_fd *fd);
 void dpaa_rx_cb(struct qman_fq **fq,
 		struct qm_dqrr_entry **dqrr, void **bufs, int num_bufs);