[net-next,v6,07/14] octeontx2-pf: cn10k: Use LMTST lines for NPA/NIX operations

Message ID	20210211155834.31874-8-gakula@marvell.com
State	Accepted
Commit	4c236d5dc8b86222dc155cd68e7934624264150f
Headers	show Return-Path: <linux-crypto-owner@kernel.org> From: Geetha sowjanya <gakula@marvell.com> To: <netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>, <linux-crypto@vger.kernel.org> CC: <davem@davemloft.net>, <kuba@kernel.org>, <sgoutham@marvell.com>, <lcherian@marvell.com>, <hkelam@marvell.com>, <sbhatta@marvell.com>, <jerinj@marvell.com>, <bbrezillon@kernel.org>, <arno@natisbad.org>, <schalla@marvell.com>, Geetha sowjanya <gakula@marvell.com> Subject: [net-next v6 07/14] octeontx2-pf: cn10k: Use LMTST lines for NPA/NIX operations Date: Thu, 11 Feb 2021 21:28:27 +0530 Message-ID: <20210211155834.31874-8-gakula@marvell.com> In-Reply-To: <20210211155834.31874-1-gakula@marvell.com> References: <20210211155834.31874-1-gakula@marvell.com> MIME-Version: 1.0 Content-Type: text/plain Precedence: bulk
Series	Add Marvell CN10K support \| expand [net-next,v6,00/14] Add Marvell CN10K support [net-next,v6,01/14] octeontx2-af: cn10k: Add mbox support for CN10K platform [net-next,v6,02/14] octeontx2-pf: cn10k: Add mbox support for CN10K [net-next,v6,03/14] octeontx2-af: cn10k: Update NIX/NPA context structure [net-next,v6,04/14] octeontx2-af: cn10k: Update NIX and NPA context in debugfs [net-next,v6,05/14] octeontx2-pf: cn10k: Initialise NIX context [net-next,v6,06/14] octeontx2-pf: cn10k: Map LMTST region [net-next,v6,07/14] octeontx2-pf: cn10k: Use LMTST lines for NPA/NIX operations [net-next,v6,08/14] octeontx2-af: cn10k: Add RPM MAC support [net-next,v6,09/14] octeontx2-af: cn10k: Add support for programmable channels [net-next,v6,10/14] octeontx2-af: cn10K: Add MTU configuration [net-next,v6,11/14] octeontx2-pf: cn10k: Get max mtu supported from admin function [net-next,v6,12/14] octeontx2-af: cn10k: Add RPM LMAC pause frame support [net-next,v6,13/14] octeontx2-af: cn10k: Add RPM Rx/Tx stats support [net-next,v6,14/14] octeontx2-af: cn10k: MAC internal loopback support

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile index 29c82b94b2dc..745aa8a19499 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/Makefile +++ b/drivers/net/ethernet/marvell/octeontx2/nic/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_OCTEONTX2_PF) += rvu_nicpf.o obj-$(CONFIG_OCTEONTX2_VF) += rvu_nicvf.o rvu_nicpf-y := otx2_pf.o otx2_common.o otx2_txrx.o otx2_ethtool.o \ - otx2_ptp.o otx2_flows.o + otx2_ptp.o otx2_flows.o cn10k.o rvu_nicvf-y := otx2_vf.o ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c new file mode 100644 index 000000000000..d6ca809edaed --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTx2 RVU Physcial Function ethernet driver + * + * Copyright (C) 2020 Marvell. + */ + +#include "cn10k.h" +#include "otx2_reg.h" +#include "otx2_struct.h" + +static struct dev_hw_ops otx2_hw_ops = { + .sq_aq_init = otx2_sq_aq_init, + .sqe_flush = otx2_sqe_flush, + .aura_freeptr = otx2_aura_freeptr, + .refill_pool_ptrs = otx2_refill_pool_ptrs, +}; + +static struct dev_hw_ops cn10k_hw_ops = { + .sq_aq_init = cn10k_sq_aq_init, + .sqe_flush = cn10k_sqe_flush, + .aura_freeptr = cn10k_aura_freeptr, + .refill_pool_ptrs = cn10k_refill_pool_ptrs, +}; + +int cn10k_pf_lmtst_init(struct otx2_nic *pf) +{ + int size, num_lines; + u64 base; + + if (!test_bit(CN10K_LMTST, &pf->hw.cap_flag)) { + pf->hw_ops = &otx2_hw_ops; + return 0; + } + + pf->hw_ops = &cn10k_hw_ops; + base = pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM) + + (MBOX_SIZE * (pf->total_vfs + 1)); + + size = pci_resource_len(pf->pdev, PCI_MBOX_BAR_NUM) - + (MBOX_SIZE * (pf->total_vfs + 1)); + + pf->hw.lmt_base = ioremap(base, size); + + if (!pf->hw.lmt_base) { + dev_err(pf->dev, "Unable to map PF LMTST region\n"); + return -ENOMEM; + } + + /* FIXME: Get the num of LMTST lines from LMT table */ + pf->tot_lmt_lines = size / LMT_LINE_SIZE; + num_lines = (pf->tot_lmt_lines - NIX_LMTID_BASE) / + pf->hw.tx_queues; + /* Number of LMT lines per SQ queues */ + pf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines; + + pf->nix_lmt_size = pf->nix_lmt_lines * LMT_LINE_SIZE; + return 0; +} + +int cn10k_vf_lmtst_init(struct otx2_nic *vf) +{ + int size, num_lines; + + if (!test_bit(CN10K_LMTST, &vf->hw.cap_flag)) { + vf->hw_ops = &otx2_hw_ops; + return 0; + } + + vf->hw_ops = &cn10k_hw_ops; + size = pci_resource_len(vf->pdev, PCI_MBOX_BAR_NUM); + vf->hw.lmt_base = ioremap_wc(pci_resource_start(vf->pdev, + PCI_MBOX_BAR_NUM), + size); + if (!vf->hw.lmt_base) { + dev_err(vf->dev, "Unable to map VF LMTST region\n"); + return -ENOMEM; + } + + vf->tot_lmt_lines = size / LMT_LINE_SIZE; + /* LMTST lines per SQ */ + num_lines = (vf->tot_lmt_lines - NIX_LMTID_BASE) / + vf->hw.tx_queues; + vf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines; + vf->nix_lmt_size = vf->nix_lmt_lines * LMT_LINE_SIZE; + return 0; +} +EXPORT_SYMBOL(cn10k_vf_lmtst_init); + +int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) +{ + struct nix_cn10k_aq_enq_req *aq; + struct otx2_nic *pfvf = dev; + struct otx2_snd_queue *sq; + + sq = &pfvf->qset.sq[qidx]; + sq->lmt_addr = (__force u64 *)((u64)pfvf->hw.nix_lmt_base + + (qidx * pfvf->nix_lmt_size)); + + /* Get memory to put this msg */ + aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox); + if (!aq) + return -ENOMEM; + + aq->sq.cq = pfvf->hw.rx_queues + qidx; + aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */ + aq->sq.cq_ena = 1; + aq->sq.ena = 1; + /* Only one SMQ is allocated, map all SQ's to that SMQ */ + aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; + /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/ + aq->sq.smq_rr_weight = OTX2_MAX_MTU; + aq->sq.default_chan = pfvf->hw.tx_chan_base; + aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ + aq->sq.sqb_aura = sqb_aura; + aq->sq.sq_int_ena = NIX_SQINT_BITS; + aq->sq.qint_idx = 0; + /* Due pipelining impact minimum 2000 unused SQ CQE's + * need to maintain to avoid CQ overflow. + */ + aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt)); + + /* Fill AQ info */ + aq->qidx = qidx; + aq->ctype = NIX_AQ_CTYPE_SQ; + aq->op = NIX_AQ_INSTOP_INIT; + + return otx2_sync_mbox_msg(&pfvf->mbox); +} + +#define NPA_MAX_BURST 16 +void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) +{ + struct otx2_nic *pfvf = dev; + u64 ptrs[NPA_MAX_BURST]; + int num_ptrs = 1; + dma_addr_t bufptr; + + /* Refill pool with new buffers */ + while (cq->pool_ptrs) { + if (otx2_alloc_buffer(pfvf, cq, &bufptr)) { + if (num_ptrs--) + __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs, + num_ptrs, + cq->rbpool->lmt_addr); + break; + } + cq->pool_ptrs--; + ptrs[num_ptrs] = (u64)bufptr + OTX2_HEAD_ROOM; + num_ptrs++; + if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) { + __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs, + num_ptrs, + cq->rbpool->lmt_addr); + num_ptrs = 1; + } + } +} + +void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx) +{ + struct otx2_nic *pfvf = dev; + int lmt_id = NIX_LMTID_BASE + (qidx * pfvf->nix_lmt_lines); + u64 val = 0, tar_addr = 0; + + /* FIXME: val[0:10] LMT_ID. + * [12:15] no of LMTST - 1 in the burst. + * [19:63] data size of each LMTST in the burst except first. + */ + val = (lmt_id & 0x7FF); + /* Target address for LMTST flush tells HW how many 128bit + * words are present. + * tar_addr[6:4] size of first LMTST - 1 in units of 128b. + */ + tar_addr |= sq->io_addr | (((size / 16) - 1) & 0x7) << 4; + dma_wmb(); + memcpy(sq->lmt_addr, sq->sqe_base, size); + cn10k_lmt_flush(val, tar_addr); + + sq->head++; + sq->head &= (sq->sqe_cnt - 1); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h new file mode 100644 index 000000000000..e0bc595cbb78 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTx2 RVU Ethernet driver + * + * Copyright (C) 2020 Marvell. + */ + +#ifndef CN10K_H +#define CN10K_H + +#include "otx2_common.h" + +void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); +void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx); +int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); +int cn10k_pf_lmtst_init(struct otx2_nic *pf); +int cn10k_vf_lmtst_init(struct otx2_nic *vf); +#endif /* CN10K_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index dbbdc3453f1a..2779802eed84 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -15,6 +15,7 @@ #include "otx2_reg.h" #include "otx2_common.h" #include "otx2_struct.h" +#include "cn10k.h" static void otx2_nix_rq_op_stats(struct queue_stats *stats, struct otx2_nic *pfvf, int qidx) @@ -526,6 +527,26 @@ static int otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool, return ret; } +int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, + dma_addr_t *dma) +{ + if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, dma))) { + struct refill_work *work; + struct delayed_work *dwork; + + work = &pfvf->refill_wrk[cq->cq_idx]; + dwork = &work->pool_refill_work; + /* Schedule a task if no other task is running */ + if (!cq->refill_task_sched) { + cq->refill_task_sched = true; + schedule_delayed_work(dwork, + msecs_to_jiffies(100)); + } + return -ENOMEM; + } + return 0; +} + void otx2_tx_timeout(struct net_device *netdev, unsigned int txq) { struct otx2_nic *pfvf = netdev_priv(netdev); @@ -728,9 +749,6 @@ void otx2_sqb_flush(struct otx2_nic *pfvf) #define RQ_PASS_LVL_AURA (255 - ((95 * 256) / 100)) /* RED when 95% is full */ #define RQ_DROP_LVL_AURA (255 - ((99 * 256) / 100)) /* Drop when 99% is full */ -/* Send skid of 2000 packets required for CQ size of 4K CQEs. */ -#define SEND_CQ_SKID 2000 - static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura) { struct otx2_qset *qset = &pfvf->qset; @@ -764,45 +782,14 @@ static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura) return otx2_sync_mbox_msg(&pfvf->mbox); } -static int cn10k_sq_aq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) -{ - struct nix_cn10k_aq_enq_req *aq; - - /* Get memory to put this msg */ - aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox); - if (!aq) - return -ENOMEM; - - aq->sq.cq = pfvf->hw.rx_queues + qidx; - aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */ - aq->sq.cq_ena = 1; - aq->sq.ena = 1; - /* Only one SMQ is allocated, map all SQ's to that SMQ */ - aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0]; - /* FIXME: set based on NIX_AF_DWRR_RPM_MTU*/ - aq->sq.smq_rr_weight = OTX2_MAX_MTU; - aq->sq.default_chan = pfvf->hw.tx_chan_base; - aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */ - aq->sq.sqb_aura = sqb_aura; - aq->sq.sq_int_ena = NIX_SQINT_BITS; - aq->sq.qint_idx = 0; - /* Due pipelining impact minimum 2000 unused SQ CQE's - * need to maintain to avoid CQ overflow. - */ - aq->sq.cq_limit = ((SEND_CQ_SKID * 256) / (pfvf->qset.sqe_cnt)); - - /* Fill AQ info */ - aq->qidx = qidx; - aq->ctype = NIX_AQ_CTYPE_SQ; - aq->op = NIX_AQ_INSTOP_INIT; - - return otx2_sync_mbox_msg(&pfvf->mbox); -} - -static int otx2_sq_aq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) +int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) { + struct otx2_nic *pfvf = dev; + struct otx2_snd_queue *sq; struct nix_aq_enq_req *aq; + sq = &pfvf->qset.sq[qidx]; + sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx)); /* Get memory to put this msg */ aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox); if (!aq) @@ -873,16 +860,12 @@ static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) sq->sqe_thresh = ((sq->num_sqbs * sq->sqe_per_sqb) * 10) / 100; sq->aura_id = sqb_aura; sq->aura_fc_addr = pool->fc_addr->base; - sq->lmt_addr = (__force u64 *)(pfvf->reg_base + LMT_LF_LMTLINEX(qidx)); sq->io_addr = (__force u64)otx2_get_regaddr(pfvf, NIX_LF_OP_SENDX(0)); sq->stats.bytes = 0; sq->stats.pkts = 0; - if (is_dev_otx2(pfvf->pdev)) - return otx2_sq_aq_init(pfvf, qidx, sqb_aura); - else - return cn10k_sq_aq_init(pfvf, qidx, sqb_aura); + return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); } @@ -987,7 +970,7 @@ static void otx2_pool_refill_task(struct work_struct *work) } return; } - otx2_aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); + pfvf->hw_ops->aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM); cq->pool_ptrs--; } cq->refill_task_sched = false; @@ -1231,6 +1214,11 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, pool->rbsize = buf_size; + /* Set LMTST addr for NPA batch free */ + if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) + pool->lmt_addr = (__force u64 *)((u64)pfvf->hw.npa_lmt_base + + (pool_id * LMT_LINE_SIZE)); + /* Initialize this pool's context via AF */ aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); if (!aq) { @@ -1319,7 +1307,7 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf) for (ptr = 0; ptr < num_sqbs; ptr++) { if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) return -ENOMEM; - otx2_aura_freeptr(pfvf, pool_id, bufptr); + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, bufptr); sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr; } } @@ -1369,8 +1357,8 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf) for (ptr = 0; ptr < num_ptrs; ptr++) { if (otx2_alloc_rbuf(pfvf, pool, &bufptr)) return -ENOMEM; - otx2_aura_freeptr(pfvf, pool_id, - bufptr + OTX2_HEAD_ROOM); + pfvf->hw_ops->aura_freeptr(pfvf, pool_id, + bufptr + OTX2_HEAD_ROOM); } } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 9ac9b420dd95..51aaa6ae0fd3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -50,6 +50,9 @@ enum arua_mapped_qtypes { #define NIX_LF_ERR_VEC 0x81 #define NIX_LF_POISON_VEC 0x82 +/* Send skid of 2000 packets required for CQ size of 4K CQEs. */ +#define SEND_CQ_SKID 2000 + /* RSS configuration */ struct otx2_rss_ctx { u8 ind_tbl[MAX_RSS_INDIR_TBL_SIZE]; @@ -275,9 +278,18 @@ struct otx2_flow_config { struct list_head flow_list; }; +struct dev_hw_ops { + int (*sq_aq_init)(void *dev, u16 qidx, u16 sqb_aura); + void (*sqe_flush)(void *dev, struct otx2_snd_queue *sq, + int size, int qidx); + void (*refill_pool_ptrs)(void *dev, struct otx2_cq_queue *cq); + void (*aura_freeptr)(void *dev, int aura, u64 buf); +}; + struct otx2_nic { void __iomem *reg_base; struct net_device *netdev; + struct dev_hw_ops *hw_ops; void *iommu_domain; u16 max_frs; u16 rbsize; /* Receive buffer size */ @@ -507,10 +519,51 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr) } #else -#define otx2_write128(lo, hi, addr) +#define otx2_write128(lo, hi, addr) writeq((hi) | (lo), addr) #define otx2_atomic64_add(incr, ptr) ({ *ptr += incr; }) #endif +static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, + u64 *ptrs, u64 num_ptrs, + u64 *lmt_addr) +{ + u64 size = 0, count_eot = 0; + u64 tar_addr, val = 0; + + tar_addr = (__force u64)otx2_get_regaddr(pfvf, NPA_LF_AURA_BATCH_FREE0); + /* LMTID is same as AURA Id */ + val = (aura & 0x7FF) | BIT_ULL(63); + /* Set if [127:64] of last 128bit word has a valid pointer */ + count_eot = (num_ptrs % 2) ? 0ULL : 1ULL; + /* Set AURA ID to free pointer */ + ptrs[0] = (count_eot << 32) | (aura & 0xFFFFF); + /* Target address for LMTST flush tells HW how many 128bit + * words are valid from NPA_LF_AURA_BATCH_FREE0. + * + * tar_addr[6:4] is LMTST size-1 in units of 128b. + */ + if (num_ptrs > 2) { + size = (sizeof(u64) * num_ptrs) / 16; + if (!count_eot) + size++; + tar_addr |= ((size - 1) & 0x7) << 4; + } + memcpy(lmt_addr, ptrs, sizeof(u64) * num_ptrs); + /* Perform LMTST flush */ + cn10k_lmt_flush(val, tar_addr); +} + +static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf) +{ + struct otx2_nic *pfvf = dev; + struct otx2_pool *pool; + u64 ptrs[2]; + + pool = &pfvf->qset.pool[aura]; + ptrs[1] = buf; + __cn10k_aura_freeptr(pfvf, aura, ptrs, 2, pool->lmt_addr); +} + /* Alloc pointer from pool/aura */ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura) { @@ -522,11 +575,12 @@ static inline u64 otx2_aura_allocptr(struct otx2_nic *pfvf, int aura) } /* Free pointer to a pool/aura */ -static inline void otx2_aura_freeptr(struct otx2_nic *pfvf, - int aura, u64 buf) +static inline void otx2_aura_freeptr(void *dev, int aura, u64 buf) { - otx2_write128(buf, (u64)aura | BIT_ULL(63), - otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0)); + struct otx2_nic *pfvf = dev; + void __iomem *addr = otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0); + + otx2_write128(buf, (u64)aura | BIT_ULL(63), addr); } static inline int otx2_get_pool_idx(struct otx2_nic *pfvf, int type, int idx) @@ -681,6 +735,10 @@ void otx2_ctx_disable(struct mbox *mbox, int type, bool npa); int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable); void otx2_cleanup_rx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); void otx2_cleanup_tx_cqes(struct otx2_nic *pfvf, struct otx2_cq_queue *cq); +int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); +int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura); +int otx2_alloc_buffer(struct otx2_nic *pfvf, struct otx2_cq_queue *cq, + dma_addr_t *dma); /* RSS configuration APIs*/ int otx2_rss_init(struct otx2_nic *pfvf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index afafaec88c8b..f87cfcfc2832 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -22,6 +22,7 @@ #include "otx2_txrx.h" #include "otx2_struct.h" #include "otx2_ptp.h" +#include "cn10k.h" #include <rvu_trace.h> #define DRV_NAME "rvu_nicpf" @@ -46,39 +47,6 @@ enum { static int otx2_config_hw_tx_tstamp(struct otx2_nic *pfvf, bool enable); static int otx2_config_hw_rx_tstamp(struct otx2_nic *pfvf, bool enable); -static int cn10k_lmtst_init(struct otx2_nic *pf) -{ - int size, num_lines; - u64 base; - - if (!test_bit(CN10K_LMTST, &pf->hw.cap_flag)) - return 0; - - base = pci_resource_start(pf->pdev, PCI_MBOX_BAR_NUM) + - (MBOX_SIZE * (pf->total_vfs + 1)); - - size = pci_resource_len(pf->pdev, PCI_MBOX_BAR_NUM) - - (MBOX_SIZE * (pf->total_vfs + 1)); - - pf->hw.lmt_base = ioremap(base, size); - - if (!pf->hw.lmt_base) { - dev_err(pf->dev, "Unable to map PF LMTST region\n"); - return -ENOMEM; - } - - /* FIXME: Get the num of LMTST lines from LMT table */ - pf->tot_lmt_lines = size / LMT_LINE_SIZE; - num_lines = (pf->tot_lmt_lines - NIX_LMTID_BASE) / - pf->hw.tx_queues; - /* Number of LMT lines per SQ queues */ - pf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines; - - pf->nix_lmt_size = pf->nix_lmt_lines * LMT_LINE_SIZE; - - return 0; -} - static int otx2_change_mtu(struct net_device *netdev, int new_mtu) { bool if_up = netif_running(netdev); @@ -2404,7 +2372,7 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_detach_rsrc; - err = cn10k_lmtst_init(pf); + err = cn10k_pf_lmtst_init(pf); if (err) goto err_detach_rsrc; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h index 1e052d76a580..21b811c6ee0f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_reg.h @@ -94,6 +94,7 @@ #define NPA_LF_QINTX_INT_W1S(a) (NPA_LFBASE | 0x318 | (a) << 12) #define NPA_LF_QINTX_ENA_W1S(a) (NPA_LFBASE | 0x320 | (a) << 12) #define NPA_LF_QINTX_ENA_W1C(a) (NPA_LFBASE | 0x330 | (a) << 12) +#define NPA_LF_AURA_BATCH_FREE0 (NPA_LFBASE | 0x400) /* NIX LF registers */ #define NIX_LFBASE (BLKTYPE_NIX << RVU_FUNC_BLKADDR_SHIFT) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c index 68f80e75c8c7..59a7bd88d907 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c @@ -17,6 +17,7 @@ #include "otx2_struct.h" #include "otx2_txrx.h" #include "otx2_ptp.h" +#include "cn10k.h" #define CQE_ADDR(CQ, idx) ((CQ)->cqe_base + ((CQ)->cqe_size * (idx))) @@ -199,7 +200,8 @@ static void otx2_free_rcv_seg(struct otx2_nic *pfvf, struct nix_cqe_rx_s *cqe, sg = (struct nix_rx_sg_s *)start; seg_addr = &sg->seg_addr; for (seg = 0; seg < sg->segs; seg++, seg_addr++) - otx2_aura_freeptr(pfvf, qidx, *seg_addr & ~0x07ULL); + pfvf->hw_ops->aura_freeptr(pfvf, qidx, + *seg_addr & ~0x07ULL); start += sizeof(*sg); } } @@ -304,7 +306,6 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf, { struct nix_cqe_rx_s *cqe; int processed_cqe = 0; - dma_addr_t bufptr; while (likely(processed_cqe < budget)) { cqe = (struct nix_cqe_rx_s *)CQE_ADDR(cq, cq->cq_head); @@ -330,28 +331,23 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf, if (unlikely(!cq->pool_ptrs)) return 0; - /* Refill pool with new buffers */ + pfvf->hw_ops->refill_pool_ptrs(pfvf, cq); + + return processed_cqe; +} + +void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) +{ + struct otx2_nic *pfvf = dev; + dma_addr_t bufptr; + while (cq->pool_ptrs) { - if (unlikely(__otx2_alloc_rbuf(pfvf, cq->rbpool, &bufptr))) { - struct refill_work *work; - struct delayed_work *dwork; - - work = &pfvf->refill_wrk[cq->cq_idx]; - dwork = &work->pool_refill_work; - /* Schedule a task if no other task is running */ - if (!cq->refill_task_sched) { - cq->refill_task_sched = true; - schedule_delayed_work(dwork, - msecs_to_jiffies(100)); - } + if (otx2_alloc_buffer(pfvf, cq, &bufptr)) break; - } otx2_aura_freeptr(pfvf, cq->cq_idx, bufptr + OTX2_HEAD_ROOM); cq->pool_ptrs--; } - - return processed_cqe; } static int otx2_tx_napi_handler(struct otx2_nic *pfvf, @@ -438,7 +434,8 @@ int otx2_napi_handler(struct napi_struct *napi, int budget) return workdone; } -static void otx2_sqe_flush(struct otx2_snd_queue *sq, int size) +void otx2_sqe_flush(void *dev, struct otx2_snd_queue *sq, + int size, int qidx) { u64 status; @@ -796,7 +793,7 @@ static void otx2_sq_append_tso(struct otx2_nic *pfvf, struct otx2_snd_queue *sq, sqe_hdr->sizem1 = (offset / 16) - 1; /* Flush SQE to HW */ - otx2_sqe_flush(sq, offset); + pfvf->hw_ops->sqe_flush(pfvf, sq, offset, qidx); } } @@ -915,7 +912,7 @@ bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, netdev_tx_sent_queue(txq, skb->len); /* Flush SQE to HW */ - otx2_sqe_flush(sq, offset); + pfvf->hw_ops->sqe_flush(pfvf, sq, offset, qidx); return true; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index 73af15685657..d2b26b3357f3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -114,6 +114,7 @@ struct otx2_cq_poll { struct otx2_pool { struct qmem *stack; struct qmem *fc_addr; + u64 *lmt_addr; u16 rbsize; }; @@ -156,4 +157,10 @@ static inline u64 otx2_iova_to_phys(void *iommu_domain, dma_addr_t dma_addr) int otx2_napi_handler(struct napi_struct *napi, int budget); bool otx2_sq_append_skb(struct net_device *netdev, struct otx2_snd_queue *sq, struct sk_buff *skb, u16 qidx); +void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, + int size, int qidx); +void otx2_sqe_flush(void *dev, struct otx2_snd_queue *sq, + int size, int qidx); +void otx2_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); +void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq); #endif /* OTX2_TXRX_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 9ed850b75d59..31e03253e612 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -7,6 +7,7 @@ #include "otx2_common.h" #include "otx2_reg.h" +#include "cn10k.h" #define DRV_NAME "rvu_nicvf" #define DRV_STRING "Marvell RVU NIC Virtual Function Driver" @@ -27,31 +28,6 @@ enum { RVU_VF_INT_VEC_MBOX = 0x0, }; -static int cn10k_lmtst_init(struct otx2_nic *vf) -{ - int size, num_lines; - - if (!test_bit(CN10K_LMTST, &vf->hw.cap_flag)) - return 0; - - size = pci_resource_len(vf->pdev, PCI_MBOX_BAR_NUM); - vf->hw.lmt_base = ioremap_wc(pci_resource_start(vf->pdev, - PCI_MBOX_BAR_NUM), - size); - if (!vf->hw.lmt_base) { - dev_err(vf->dev, "Unable to map VF LMTST region\n"); - return -ENOMEM; - } - - vf->tot_lmt_lines = size / LMT_LINE_SIZE; - /* LMTST lines per SQ */ - num_lines = (vf->tot_lmt_lines - NIX_LMTID_BASE) / - vf->hw.tx_queues; - vf->nix_lmt_lines = num_lines > 32 ? 32 : num_lines; - vf->nix_lmt_size = vf->nix_lmt_lines * LMT_LINE_SIZE; - return 0; -} - static void otx2vf_process_vfaf_mbox_msg(struct otx2_nic *vf, struct mbox_msghdr *msg) { @@ -585,7 +561,7 @@ static int otx2vf_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (err) goto err_detach_rsrc; - err = cn10k_lmtst_init(vf); + err = cn10k_vf_lmtst_init(vf); if (err) goto err_detach_rsrc; diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h index ae2279fe830a..28c04d918f0f 100644 --- a/include/linux/soc/marvell/octeontx2/asm.h +++ b/include/linux/soc/marvell/octeontx2/asm.h @@ -22,8 +22,16 @@ : [rs]"r" (ioaddr)); \ (result); \ }) +#define cn10k_lmt_flush(val, addr) \ +({ \ + __asm__ volatile(".cpu generic+lse\n" \ + "steor %x[rf],[%[rs]]" \ + : [rf]"+r"(val) \ + : [rs]"r"(addr)); \ +}) #else #define otx2_lmt_flush(ioaddr) ({ 0; }) +#define cn10k_lmt_flush(val, addr) ({ addr = val; }) #endif #endif /* __SOC_OTX2_ASM_H */

[net-next,v6,07/14] octeontx2-pf: cn10k: Use LMTST lines for NPA/NIX operations

Commit Message

Patch