From patchwork Thu Apr 6 08:33:21 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jianbo Liu X-Patchwork-Id: 96911 Delivered-To: patch@linaro.org Received: by 10.140.89.233 with SMTP id v96csp626666qgd; Thu, 6 Apr 2017 01:33:35 -0700 (PDT) X-Received: by 10.28.223.213 with SMTP id w204mr4581176wmg.57.1491467615462; Thu, 06 Apr 2017 01:33:35 -0700 (PDT) Return-Path: Received: from dpdk.org (dpdk.org. [92.243.14.124]) by mx.google.com with ESMTP id p71si28086293wma.64.2017.04.06.01.33.35; Thu, 06 Apr 2017 01:33:35 -0700 (PDT) Received-SPF: pass (google.com: domain of dev-bounces@dpdk.org designates 92.243.14.124 as permitted sender) client-ip=92.243.14.124; Authentication-Results: mx.google.com; spf=pass (google.com: domain of dev-bounces@dpdk.org designates 92.243.14.124 as permitted sender) smtp.mailfrom=dev-bounces@dpdk.org; dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=linaro.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 8369D29C7; Thu, 6 Apr 2017 10:33:33 +0200 (CEST) Received: from foss.arm.com (foss.arm.com [217.140.101.70]) by dpdk.org (Postfix) with ESMTP id CA47B1E20 for ; Thu, 6 Apr 2017 10:33:31 +0200 (CEST) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.72.51.249]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 82AD32B; Thu, 6 Apr 2017 01:33:30 -0700 (PDT) Received: from localhost.localdomain.com (usa-sjc-imap-foss1.foss.arm.com [10.72.51.249]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id A59F53F220; Thu, 6 Apr 2017 01:33:29 -0700 (PDT) From: Jianbo Liu To: dev@dpdk.org, bruce.richardson@intel.com, qi.z.zhang@intel.com, jerin.jacob@caviumnetworks.com Cc: Jianbo Liu Date: Thu, 6 Apr 2017 16:33:21 +0800 Message-Id: <1491467601-29946-1-git-send-email-jianbo.liu@linaro.org> X-Mailer: git-send-email 1.8.3.1 Subject: [dpdk-dev] [PATCH] net/i40e: sync the changes for vector PMD between x86 and arm64 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Porting two changes from x86 SSE implematation. net/i40e: fix checksum flag in x86 vector Rx net/i40e: eliminate mbuf write on rearm Signed-off-by: Jianbo Liu --- drivers/net/i40e/i40e_rxtx_vec_neon.c | 68 +++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 26 deletions(-) -- 1.8.3.1 diff --git a/drivers/net/i40e/i40e_rxtx_vec_neon.c b/drivers/net/i40e/i40e_rxtx_vec_neon.c index 902fb1f..ca6b1f4 100644 --- a/drivers/net/i40e/i40e_rxtx_vec_neon.c +++ b/drivers/net/i40e/i40e_rxtx_vec_neon.c @@ -57,7 +57,6 @@ uint64x2_t dma_addr0, dma_addr1; uint64x2_t zero = vdupq_n_u64(0); uint64_t paddr; - uint8x8_t p; rxdp = rxq->rx_ring + rxq->rxrearm_start; @@ -77,27 +76,17 @@ return; } - p = vld1_u8((uint8_t *)&rxq->mbuf_initializer); - /* Initialize the mbufs in vector, process 2 mbufs in one loop */ for (i = 0; i < RTE_I40E_RXQ_REARM_THRESH; i += 2, rxep += 2) { mb0 = rxep[0].mbuf; mb1 = rxep[1].mbuf; - /* Flush mbuf with pkt template. - * Data to be rearmed is 6 bytes long. - * Though, RX will overwrite ol_flags that are coming next - * anyway. So overwrite whole 8 bytes with one load: - * 6 bytes of rearm_data plus first 2 bytes of ol_flags. - */ - vst1_u8((uint8_t *)&mb0->rearm_data, p); paddr = mb0->buf_physaddr + RTE_PKTMBUF_HEADROOM; dma_addr0 = vdupq_n_u64(paddr); /* flush desc with pa dma_addr */ vst1q_u64((uint64_t *)&rxdp++->read, dma_addr0); - vst1_u8((uint8_t *)&mb1->rearm_data, p); paddr = mb1->buf_physaddr + RTE_PKTMBUF_HEADROOM; dma_addr1 = vdupq_n_u64(paddr); vst1q_u64((uint64_t *)&rxdp++->read, dma_addr1); @@ -117,9 +106,12 @@ } static inline void -desc_to_olflags_v(uint64x2_t descs[4], struct rte_mbuf **rx_pkts) +desc_to_olflags_v(struct i40e_rx_queue *rxq, uint64x2_t descs[4], + struct rte_mbuf **rx_pkts) { uint32x4_t vlan0, vlan1, rss, l3_l4e; + const uint64x2_t mbuf_init = {rxq->mbuf_initializer, 0}; + uint64x2_t rearm0, rearm1, rearm2, rearm3; /* mask everything except RSS, flow director and VLAN flags * bit2 is for VLAN tag, bit11 for flow director indication @@ -128,6 +120,20 @@ const uint32x4_t rss_vlan_msk = { 0x1c03804, 0x1c03804, 0x1c03804, 0x1c03804}; + const uint32x4_t cksum_mask = { + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD}; + /* map rss and vlan type to rss hash and vlan flag */ const uint8x16_t vlan_flags = { 0, 0, 0, 0, @@ -142,14 +148,16 @@ 0, 0, 0, 0}; const uint8x16_t l3_l4e_flags = { - 0, - PKT_RX_IP_CKSUM_BAD, - PKT_RX_L4_CKSUM_BAD, - PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, - PKT_RX_EIP_CKSUM_BAD, - PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, - PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD, - PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1, + PKT_RX_IP_CKSUM_BAD >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD | + PKT_RX_L4_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, 0, 0, 0, 0, 0, 0, 0, 0}; vlan0 = vzipq_u32(vreinterpretq_u32_u64(descs[0]), @@ -169,15 +177,23 @@ l3_l4e = vshrq_n_u32(vlan1, 22); l3_l4e = vreinterpretq_u32_u8(vqtbl1q_u8(l3_l4e_flags, vreinterpretq_u8_u32(l3_l4e))); - + /* then we shift left 1 bit */ + l3_l4e = vshlq_n_u32(l3_l4e, 1); + /* we need to mask out the reduntant bits */ + l3_l4e = vandq_u32(l3_l4e, cksum_mask); vlan0 = vorrq_u32(vlan0, rss); vlan0 = vorrq_u32(vlan0, l3_l4e); - rx_pkts[0]->ol_flags = vgetq_lane_u32(vlan0, 0); - rx_pkts[1]->ol_flags = vgetq_lane_u32(vlan0, 1); - rx_pkts[2]->ol_flags = vgetq_lane_u32(vlan0, 2); - rx_pkts[3]->ol_flags = vgetq_lane_u32(vlan0, 3); + rearm0 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 0), mbuf_init, 1); + rearm1 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 1), mbuf_init, 1); + rearm2 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 2), mbuf_init, 1); + rearm3 = vsetq_lane_u64(vgetq_lane_u32(vlan0, 3), mbuf_init, 1); + + vst1q_u64((uint64_t *)&rx_pkts[0]->rearm_data, rearm0); + vst1q_u64((uint64_t *)&rx_pkts[1]->rearm_data, rearm1); + vst1q_u64((uint64_t *)&rx_pkts[2]->rearm_data, rearm2); + vst1q_u64((uint64_t *)&rx_pkts[3]->rearm_data, rearm3); } #define PKTLEN_SHIFT 10 @@ -348,7 +364,7 @@ sterr_tmp2.val[1]).val[0]; stat = vgetq_lane_u64(vreinterpretq_u64_u16(staterr), 0); - desc_to_olflags_v(descs, &rx_pkts[pos]); + desc_to_olflags_v(rxq, descs, &rx_pkts[pos]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);