From patchwork Sat Jan 16 02:59:23 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xuan Zhuo X-Patchwork-Id: 365004 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI, SPF_HELO_NONE, SPF_PASS, UNPARSEABLE_RELAY, USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 0736FC433DB for ; Sat, 16 Jan 2021 03:01:02 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id AC8662395A for ; Sat, 16 Jan 2021 03:01:01 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729458AbhAPDAo (ORCPT ); Fri, 15 Jan 2021 22:00:44 -0500 Received: from out30-131.freemail.mail.aliyun.com ([115.124.30.131]:45633 "EHLO out30-131.freemail.mail.aliyun.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1729094AbhAPDAn (ORCPT ); Fri, 15 Jan 2021 22:00:43 -0500 X-Alimail-AntiSpam: AC=PASS; BC=-1|-1; BR=01201311R171e4; CH=green; DM=||false|; DS=||; FP=0|-1|-1|-1|0|-1|-1|-1; HT=e01e04426; MF=xuanzhuo@linux.alibaba.com; NM=1; PH=DS; RN=19; SR=0; TI=SMTPD_---0ULr8-ZI_1610765968; Received: from localhost(mailfrom:xuanzhuo@linux.alibaba.com fp:SMTPD_---0ULr8-ZI_1610765968) by smtp.aliyun-inc.com(127.0.0.1); Sat, 16 Jan 2021 10:59:29 +0800 From: Xuan Zhuo To: netdev@vger.kernel.org Cc: "Michael S. Tsirkin" , Jason Wang , "David S. Miller" , Jakub Kicinski , =?utf-8?b?QmrDtnJuIFTDtnBl?= =?utf-8?q?l?= , Magnus Karlsson , Jonathan Lemon , Alexei Starovoitov , Daniel Borkmann , Jesper Dangaard Brouer , John Fastabend , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , KP Singh , virtualization@lists.linux-foundation.org, bpf@vger.kernel.org Subject: [PATCH net-next v2 2/7] virtio-net, xsk: distinguish XDP_TX and XSK XMIT ctx Date: Sat, 16 Jan 2021 10:59:23 +0800 Message-Id: <27006309ce40fe3f5375b44d4afaae39ed550855.1610765285.git.xuanzhuo@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: References: In-Reply-To: References: Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org If support xsk, a new ptr will be recovered during the process of freeing the old ptr. In order to distinguish between ctx sent by XDP_TX and ctx sent by xsk, a struct is added here to distinguish between these two situations. virtnet_xdp_type.type It is used to distinguish different ctx, and virtnet_xdp_type.offset is used to record the offset between "true ctx" and virtnet_xdp_type. The newly added virtnet_xsk_hdr will be used for xsk. Signed-off-by: Xuan Zhuo --- drivers/net/virtio_net.c | 75 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index ba8e637..e707c31 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -94,6 +94,22 @@ struct virtnet_rq_stats { u64 kicks; }; +enum { + XDP_TYPE_XSK, + XDP_TYPE_TX, +}; + +struct virtnet_xdp_type { + int offset:24; + unsigned type:8; +}; + +struct virtnet_xsk_hdr { + struct virtnet_xdp_type type; + struct virtio_net_hdr_mrg_rxbuf hdr; + u32 len; +}; + #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) @@ -251,14 +267,19 @@ static bool is_xdp_frame(void *ptr) return (unsigned long)ptr & VIRTIO_XDP_FLAG; } -static void *xdp_to_ptr(struct xdp_frame *ptr) +static void *xdp_to_ptr(struct virtnet_xdp_type *ptr) { return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); } -static struct xdp_frame *ptr_to_xdp(void *ptr) +static struct virtnet_xdp_type *ptr_to_xtype(void *ptr) +{ + return (struct virtnet_xdp_type *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); +} + +static void *xtype_get_ptr(struct virtnet_xdp_type *xdptype) { - return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); + return (char *)xdptype + xdptype->offset; } /* Converting between virtqueue no. and kernel tx/rx queue no. @@ -459,11 +480,16 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; + struct virtnet_xdp_type *xdptype; int err; - if (unlikely(xdpf->headroom < vi->hdr_len)) + if (unlikely(xdpf->headroom < vi->hdr_len + sizeof(*xdptype))) return -EOVERFLOW; + xdptype = (struct virtnet_xdp_type *)(xdpf + 1); + xdptype->offset = (char *)xdpf - (char *)xdptype; + xdptype->type = XDP_TYPE_TX; + /* Make room for virtqueue hdr (also change xdpf->headroom?) */ xdpf->data -= vi->hdr_len; /* Zero header and leave csum up to XDP layers */ @@ -473,7 +499,7 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, sg_init_one(sq->sg, xdpf->data, xdpf->len); - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), + err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdptype), GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */ @@ -523,8 +549,11 @@ static int virtnet_xdp_xmit(struct net_device *dev, /* Free up any pending old buffers before queueing new ones. */ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { if (likely(is_xdp_frame(ptr))) { - struct xdp_frame *frame = ptr_to_xdp(ptr); + struct virtnet_xdp_type *xtype; + struct xdp_frame *frame; + xtype = ptr_to_xtype(ptr); + frame = xtype_get_ptr(xtype); bytes += frame->len; xdp_return_frame(frame); } else { @@ -1373,24 +1402,34 @@ static int virtnet_receive(struct receive_queue *rq, int budget, static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { - unsigned int len; unsigned int packets = 0; unsigned int bytes = 0; - void *ptr; + unsigned int len; + struct virtnet_xdp_type *xtype; + struct xdp_frame *frame; + struct virtnet_xsk_hdr *xskhdr; + struct sk_buff *skb; + void *ptr; while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { if (likely(!is_xdp_frame(ptr))) { - struct sk_buff *skb = ptr; + skb = ptr; pr_debug("Sent skb %p\n", skb); bytes += skb->len; napi_consume_skb(skb, in_napi); } else { - struct xdp_frame *frame = ptr_to_xdp(ptr); + xtype = ptr_to_xtype(ptr); - bytes += frame->len; - xdp_return_frame(frame); + if (xtype->type == XDP_TYPE_XSK) { + xskhdr = (struct virtnet_xsk_hdr *)xtype; + bytes += xskhdr->len; + } else { + frame = xtype_get_ptr(xtype); + xdp_return_frame(frame); + bytes += frame->len; + } } packets++; } @@ -2659,10 +2698,16 @@ static void free_unused_bufs(struct virtnet_info *vi) for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_frame(buf)) + if (!is_xdp_frame(buf)) { dev_kfree_skb(buf); - else - xdp_return_frame(ptr_to_xdp(buf)); + } else { + struct virtnet_xdp_type *xtype; + + xtype = ptr_to_xtype(buf); + + if (xtype->type != XDP_TYPE_XSK) + xdp_return_frame(xtype_get_ptr(xtype)); + } } } From patchwork Tue Jan 5 09:11:41 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xuan Zhuo X-Patchwork-Id: 357403 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI, SPF_HELO_NONE, SPF_PASS, UNPARSEABLE_RELAY, USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id AD09CC433E9 for ; Tue, 5 Jan 2021 09:13:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id 8486E22AAA for ; Tue, 5 Jan 2021 09:13:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727770AbhAEJMf (ORCPT ); Tue, 5 Jan 2021 04:12:35 -0500 Received: from out30-132.freemail.mail.aliyun.com ([115.124.30.132]:54094 "EHLO out30-132.freemail.mail.aliyun.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726677AbhAEJMd (ORCPT ); Tue, 5 Jan 2021 04:12:33 -0500 X-Alimail-AntiSpam: AC=PASS; BC=-1|-1; BR=01201311R141e4; CH=green; DM=||false|; DS=||; FP=0|-1|-1|-1|0|-1|-1|-1; HT=e01e01424; MF=xuanzhuo@linux.alibaba.com; NM=1; PH=DS; RN=22; SR=0; TI=SMTPD_---0UKog-tc_1609837907; Received: from localhost(mailfrom:xuanzhuo@linux.alibaba.com fp:SMTPD_---0UKog-tc_1609837907) by smtp.aliyun-inc.com(127.0.0.1); Tue, 05 Jan 2021 17:11:47 +0800 From: Xuan Zhuo To: netdev@vger.kernel.org Cc: dust.li@linux.alibaba.com, tonylu@linux.alibaba.com, "Michael S. Tsirkin" , Jason Wang , "David S. Miller" , Jakub Kicinski , =?utf-8?b?QmrDtnJuIFTDtnBl?= =?utf-8?q?l?= , Magnus Karlsson , Jonathan Lemon , Alexei Starovoitov , Daniel Borkmann , Jesper Dangaard Brouer , John Fastabend , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , KP Singh , virtualization@lists.linux-foundation.org (open list:VIRTIO CORE AND NET DRIVERS), linux-kernel@vger.kernel.org (open list), bpf@vger.kernel.org (open list:XDP SOCKETS (AF_XDP)) Subject: [PATCH netdev 3/5] virtio-net, xsk: distinguish XDP_TX and XSK XMIT ctx Date: Tue, 5 Jan 2021 17:11:41 +0800 Message-Id: X-Mailer: git-send-email 1.8.3.1 In-Reply-To: References: In-Reply-To: References: Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org If support xsk, a new ptr will be recovered during the process of freeing the old ptr. In order to distinguish between ctx sent by XDP_TX and ctx sent by xsk, a struct is added here to distinguish between these two situations. virtnet_xdp_type.type It is used to distinguish different ctx, and virtnet_xdp_type.offset is used to record the offset between "true ctx" and virtnet_xdp_type. The newly added virtnet_xsk_hdr will be used for xsk. Signed-off-by: Xuan Zhuo --- drivers/net/virtio_net.c | 77 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 62 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f2349b8..df38a9f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -94,6 +94,22 @@ struct virtnet_rq_stats { u64 kicks; }; +enum { + XDP_TYPE_XSK, + XDP_TYPE_TX, +}; + +struct virtnet_xdp_type { + int offset:24; + unsigned type:8; +}; + +struct virtnet_xsk_hdr { + struct virtnet_xdp_type type; + struct virtio_net_hdr_mrg_rxbuf hdr; + u32 len; +}; + #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) @@ -252,14 +268,19 @@ static bool is_xdp_frame(void *ptr) return (unsigned long)ptr & VIRTIO_XDP_FLAG; } -static void *xdp_to_ptr(struct xdp_frame *ptr) +static void *xdp_to_ptr(struct virtnet_xdp_type *ptr) { return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); } -static struct xdp_frame *ptr_to_xdp(void *ptr) +static struct virtnet_xdp_type *ptr_to_xtype(void *ptr) { - return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); + return (struct virtnet_xdp_type *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); +} + +static void *xtype_got_ptr(struct virtnet_xdp_type *xdptype) +{ + return (char *)xdptype + xdptype->offset; } /* Converting between virtqueue no. and kernel tx/rx queue no. @@ -460,11 +481,16 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, struct xdp_frame *xdpf) { struct virtio_net_hdr_mrg_rxbuf *hdr; + struct virtnet_xdp_type *xdptype; int err; - if (unlikely(xdpf->headroom < vi->hdr_len)) + if (unlikely(xdpf->headroom < vi->hdr_len + sizeof(*xdptype))) return -EOVERFLOW; + xdptype = (struct virtnet_xdp_type *)(xdpf + 1); + xdptype->offset = (char *)xdpf - (char *)xdptype; + xdptype->type = XDP_TYPE_TX; + /* Make room for virtqueue hdr (also change xdpf->headroom?) */ xdpf->data -= vi->hdr_len; /* Zero header and leave csum up to XDP layers */ @@ -474,7 +500,7 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, sg_init_one(sq->sg, xdpf->data, xdpf->len); - err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), + err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdptype), GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */ @@ -544,8 +570,11 @@ static int virtnet_xdp_xmit(struct net_device *dev, /* Free up any pending old buffers before queueing new ones. */ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { if (likely(is_xdp_frame(ptr))) { - struct xdp_frame *frame = ptr_to_xdp(ptr); + struct virtnet_xdp_type *xtype; + struct xdp_frame *frame; + xtype = ptr_to_xtype(ptr); + frame = xtype_got_ptr(xtype); bytes += frame->len; xdp_return_frame(frame); } else { @@ -1395,24 +1424,34 @@ static int virtnet_receive(struct receive_queue *rq, int budget, static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { - unsigned int len; unsigned int packets = 0; unsigned int bytes = 0; - void *ptr; + unsigned int len; + struct virtnet_xdp_type *xtype; + struct xdp_frame *frame; + struct virtnet_xsk_hdr *xskhdr; + struct sk_buff *skb; + void *ptr; while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { if (likely(!is_xdp_frame(ptr))) { - struct sk_buff *skb = ptr; + skb = ptr; pr_debug("Sent skb %p\n", skb); bytes += skb->len; napi_consume_skb(skb, in_napi); } else { - struct xdp_frame *frame = ptr_to_xdp(ptr); + xtype = ptr_to_xtype(ptr); - bytes += frame->len; - xdp_return_frame(frame); + if (xtype->type == XDP_TYPE_XSK) { + xskhdr = (struct virtnet_xsk_hdr *)xtype; + bytes += xskhdr->len; + } else { + frame = xtype_got_ptr(xtype); + xdp_return_frame(frame); + bytes += frame->len; + } } packets++; } @@ -2675,14 +2714,22 @@ static void free_unused_bufs(struct virtnet_info *vi) { void *buf; int i; + struct send_queue *sq; for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; + sq = vi->sq + i; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_frame(buf)) + if (!is_xdp_frame(buf)) { dev_kfree_skb(buf); - else - xdp_return_frame(ptr_to_xdp(buf)); + } else { + struct virtnet_xdp_type *xtype; + + xtype = ptr_to_xtype(buf); + + if (xtype->type != XDP_TYPE_XSK) + xdp_return_frame(xtype_got_ptr(xtype)); + } } } From patchwork Tue Jan 5 09:11:43 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xuan Zhuo X-Patchwork-Id: 357405 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI, SPF_HELO_NONE, SPF_PASS, UNPARSEABLE_RELAY, USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id 24223C4332D for ; Tue, 5 Jan 2021 09:12:48 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id E6D60229EF for ; Tue, 5 Jan 2021 09:12:47 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1727870AbhAEJMj (ORCPT ); Tue, 5 Jan 2021 04:12:39 -0500 Received: from out30-42.freemail.mail.aliyun.com ([115.124.30.42]:58402 "EHLO out30-42.freemail.mail.aliyun.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1727686AbhAEJMg (ORCPT ); Tue, 5 Jan 2021 04:12:36 -0500 X-Alimail-AntiSpam: AC=PASS; BC=-1|-1; BR=01201311R151e4; CH=green; DM=||false|; DS=||; FP=0|-1|-1|-1|0|-1|-1|-1; HT=alimailimapcm10staff010182156082; MF=xuanzhuo@linux.alibaba.com; NM=1; PH=DS; RN=22; SR=0; TI=SMTPD_---0UKoFTZy_1609837909; Received: from localhost(mailfrom:xuanzhuo@linux.alibaba.com fp:SMTPD_---0UKoFTZy_1609837909) by smtp.aliyun-inc.com(127.0.0.1); Tue, 05 Jan 2021 17:11:50 +0800 From: Xuan Zhuo To: netdev@vger.kernel.org Cc: dust.li@linux.alibaba.com, tonylu@linux.alibaba.com, "Michael S. Tsirkin" , Jason Wang , "David S. Miller" , Jakub Kicinski , =?utf-8?b?QmrDtnJuIFTDtnBl?= =?utf-8?q?l?= , Magnus Karlsson , Jonathan Lemon , Alexei Starovoitov , Daniel Borkmann , Jesper Dangaard Brouer , John Fastabend , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , KP Singh , virtualization@lists.linux-foundation.org (open list:VIRTIO CORE AND NET DRIVERS), linux-kernel@vger.kernel.org (open list), bpf@vger.kernel.org (open list:XDP SOCKETS (AF_XDP)) Subject: [PATCH netdev 5/5] virtio-net, xsk: virtio-net support xsk zero copy tx Date: Tue, 5 Jan 2021 17:11:43 +0800 Message-Id: <65b5d0af6c4ed878cbcfa53c925d9dcbb09ecc55.1609837120.git.xuanzhuo@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: References: In-Reply-To: References: Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org Virtio net support xdp socket. We should open the module param "napi_tx" for using this feature. In fact, various virtio implementations have some problems: 1. The tx interrupt may be lost 2. The tx interrupt may have a relatively large delay This brings us to several questions: 1. Wakeup wakes up a tx interrupt or directly starts a napi on the current cpu, which will cause a delay in sending packets. 2. When the tx ring is full, the tx interrupt may be lost or delayed, resulting in untimely recovery. I choose to send part of the data directly during wakeup. If the sending has not been completed, I will start a napi to complete the subsequent sending work. Since the possible delay or loss of tx interrupt occurs when the tx ring is full, I added a timer to solve this problem. The performance of udp sending based on virtio net + xsk is 6 times that of ordinary kernel udp send. * xsk_check_timeout: when the dev full or all xsk.hdr used, start timer to check the xsk.hdr is avail. the unit is us. * xsk_num_max: the xsk.hdr max num * xsk_num_percent: the max hdr num be the percent of the virtio ring size. The real xsk hdr num will the min of xsk_num_max and the percent of the num of virtio ring * xsk_budget: the budget for xsk run Signed-off-by: Xuan Zhuo Reported-by: kernel test robot Reported-by: Dan Carpenter --- drivers/net/virtio_net.c | 437 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 434 insertions(+), 3 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e744dce..76319e7 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -22,10 +22,21 @@ #include #include #include +#include static int napi_weight = NAPI_POLL_WEIGHT; module_param(napi_weight, int, 0444); +static int xsk_check_timeout = 100; +static int xsk_num_max = 1024; +static int xsk_num_percent = 80; +static int xsk_budget = 128; + +module_param(xsk_check_timeout, int, 0644); +module_param(xsk_num_max, int, 0644); +module_param(xsk_num_percent, int, 0644); +module_param(xsk_budget, int, 0644); + static bool csum = true, gso = true, napi_tx = true; module_param(csum, bool, 0444); module_param(gso, bool, 0444); @@ -110,6 +121,9 @@ struct virtnet_xsk_hdr { u32 len; }; +#define VIRTNET_STATE_XSK_WAKEUP BIT(0) +#define VIRTNET_STATE_XSK_TIMER BIT(1) + #define VIRTNET_SQ_STAT(m) offsetof(struct virtnet_sq_stats, m) #define VIRTNET_RQ_STAT(m) offsetof(struct virtnet_rq_stats, m) @@ -149,6 +163,32 @@ struct send_queue { struct virtnet_sq_stats stats; struct napi_struct napi; + + struct { + struct xsk_buff_pool __rcu *pool; + struct virtnet_xsk_hdr __rcu *hdr; + + unsigned long state; + u64 hdr_con; + u64 hdr_pro; + u64 hdr_n; + struct xdp_desc last_desc; + bool wait_slot; + /* tx interrupt issues + * 1. that may be lost + * 2. that too slow, 200/s or delay 10ms + * + * timer for: + * 1. recycle the desc.(no check for performance, see below) + * 2. check the nic ring is avali. when nic ring is full + * + * Here, the regular check is performed for dev full. The + * application layer must ensure that the number of cq is + * sufficient, otherwise there may be insufficient cq in use. + * + */ + struct hrtimer timer; + } xsk; }; /* Internal representation of a receive virtqueue */ @@ -267,6 +307,8 @@ static void __free_old_xmit_ptr(struct send_queue *sq, bool in_napi, bool xsk_wakeup, unsigned int *_packets, unsigned int *_bytes); static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi); +static int virtnet_xsk_run(struct send_queue *sq, + struct xsk_buff_pool *pool, int budget); static bool is_xdp_frame(void *ptr) { @@ -1439,6 +1481,40 @@ static int virtnet_receive(struct receive_queue *rq, int budget, return stats.packets; } +static void virt_xsk_complete(struct send_queue *sq, u32 num, bool xsk_wakeup) +{ + struct xsk_buff_pool *pool; + int n; + + rcu_read_lock(); + + WRITE_ONCE(sq->xsk.hdr_pro, sq->xsk.hdr_pro + num); + + pool = rcu_dereference(sq->xsk.pool); + if (!pool) { + if (sq->xsk.hdr_pro - sq->xsk.hdr_con == sq->xsk.hdr_n) { + kfree(sq->xsk.hdr); + rcu_assign_pointer(sq->xsk.hdr, NULL); + } + rcu_read_unlock(); + return; + } + + xsk_tx_completed(pool, num); + + rcu_read_unlock(); + + if (!xsk_wakeup || !sq->xsk.wait_slot) + return; + + n = sq->xsk.hdr_pro - sq->xsk.hdr_con; + + if (n > sq->xsk.hdr_n / 2) { + sq->xsk.wait_slot = false; + virtqueue_napi_schedule(&sq->napi, sq->vq); + } +} + static void __free_old_xmit_ptr(struct send_queue *sq, bool in_napi, bool xsk_wakeup, unsigned int *_packets, unsigned int *_bytes) @@ -1446,6 +1522,7 @@ static void __free_old_xmit_ptr(struct send_queue *sq, bool in_napi, unsigned int packets = 0; unsigned int bytes = 0; unsigned int len; + u64 xsknum = 0; struct virtnet_xdp_type *xtype; struct xdp_frame *frame; struct virtnet_xsk_hdr *xskhdr; @@ -1466,6 +1543,7 @@ static void __free_old_xmit_ptr(struct send_queue *sq, bool in_napi, if (xtype->type == XDP_TYPE_XSK) { xskhdr = (struct virtnet_xsk_hdr *)xtype; bytes += xskhdr->len; + xsknum += 1; } else { frame = xtype_got_ptr(xtype); xdp_return_frame(frame); @@ -1475,6 +1553,9 @@ static void __free_old_xmit_ptr(struct send_queue *sq, bool in_napi, packets++; } + if (xsknum) + virt_xsk_complete(sq, xsknum, xsk_wakeup); + *_packets = packets; *_bytes = bytes; } @@ -1595,6 +1676,8 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) struct virtnet_info *vi = sq->vq->vdev->priv; unsigned int index = vq2txq(sq->vq); struct netdev_queue *txq; + struct xsk_buff_pool *pool; + int work = 0; if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { /* We don't need to enable cb for XDP */ @@ -1604,15 +1687,26 @@ static int virtnet_poll_tx(struct napi_struct *napi, int budget) txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); - free_old_xmit_skbs(sq, true); + + rcu_read_lock(); + pool = rcu_dereference(sq->xsk.pool); + if (pool) { + work = virtnet_xsk_run(sq, pool, budget); + rcu_read_unlock(); + } else { + rcu_read_unlock(); + free_old_xmit_skbs(sq, true); + } + __netif_tx_unlock(txq); - virtqueue_napi_complete(napi, sq->vq, 0); + if (work < budget) + virtqueue_napi_complete(napi, sq->vq, 0); if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS) netif_tx_wake_queue(txq); - return 0; + return work; } static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) @@ -2560,16 +2654,346 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, return err; } +static enum hrtimer_restart virtnet_xsk_timeout(struct hrtimer *timer) +{ + struct send_queue *sq; + + sq = container_of(timer, struct send_queue, xsk.timer); + + clear_bit(VIRTNET_STATE_XSK_TIMER, &sq->xsk.state); + + virtqueue_napi_schedule(&sq->napi, sq->vq); + + return HRTIMER_NORESTART; +} + +static int virtnet_xsk_pool_enable(struct net_device *dev, + struct xsk_buff_pool *pool, + u16 qid) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct send_queue *sq = &vi->sq[qid]; + struct virtnet_xsk_hdr *hdr; + int n, ret = 0; + + if (qid >= dev->real_num_rx_queues || qid >= dev->real_num_tx_queues) + return -EINVAL; + + if (qid >= vi->curr_queue_pairs) + return -EINVAL; + + rcu_read_lock(); + + ret = -EBUSY; + if (rcu_dereference(sq->xsk.pool)) + goto end; + + /* check last xsk wait for hdr been free */ + if (rcu_dereference(sq->xsk.hdr)) + goto end; + + n = virtqueue_get_vring_size(sq->vq); + n = min(xsk_num_max, n * (xsk_num_percent % 100) / 100); + + ret = -ENOMEM; + hdr = kcalloc(n, sizeof(struct virtnet_xsk_hdr), GFP_ATOMIC); + if (!hdr) + goto end; + + memset(&sq->xsk, 0, sizeof(sq->xsk)); + + sq->xsk.hdr_pro = n; + sq->xsk.hdr_n = n; + + hrtimer_init(&sq->xsk.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + sq->xsk.timer.function = virtnet_xsk_timeout; + + rcu_assign_pointer(sq->xsk.pool, pool); + rcu_assign_pointer(sq->xsk.hdr, hdr); + + ret = 0; +end: + rcu_read_unlock(); + + return ret; +} + +static int virtnet_xsk_pool_disable(struct net_device *dev, u16 qid) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct send_queue *sq = &vi->sq[qid]; + + if (qid >= dev->real_num_rx_queues || qid >= dev->real_num_tx_queues) + return -EINVAL; + + if (qid >= vi->curr_queue_pairs) + return -EINVAL; + + rcu_assign_pointer(sq->xsk.pool, NULL); + + hrtimer_cancel(&sq->xsk.timer); + + synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */ + + if (sq->xsk.hdr_pro - sq->xsk.hdr_con == sq->xsk.hdr_n) { + kfree(sq->xsk.hdr); + rcu_assign_pointer(sq->xsk.hdr, NULL); + synchronize_rcu(); + } + + return 0; +} + static int virtnet_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: return virtnet_xdp_set(dev, xdp->prog, xdp->extack); + case XDP_SETUP_XSK_POOL: + xdp->xsk.need_dma = false; + if (xdp->xsk.pool) + return virtnet_xsk_pool_enable(dev, xdp->xsk.pool, + xdp->xsk.queue_id); + else + return virtnet_xsk_pool_disable(dev, xdp->xsk.queue_id); default: return -EINVAL; } } +static int virtnet_xsk_xmit(struct send_queue *sq, struct xsk_buff_pool *pool, + struct xdp_desc *desc) +{ + struct virtnet_info *vi = sq->vq->vdev->priv; + void *data, *ptr; + struct page *page; + struct virtnet_xsk_hdr *xskhdr; + u32 idx, offset, n, i, copy, copied; + u64 addr; + int err, m; + + addr = desc->addr; + + data = xsk_buff_raw_get_data(pool, addr); + offset = offset_in_page(data); + + /* one for hdr, one for the first page */ + n = 2; + m = desc->len - (PAGE_SIZE - offset); + if (m > 0) { + n += m >> PAGE_SHIFT; + if (m & PAGE_MASK) + ++n; + + n = min_t(u32, n, ARRAY_SIZE(sq->sg)); + } + + idx = sq->xsk.hdr_con % sq->xsk.hdr_n; + xskhdr = &sq->xsk.hdr[idx]; + + /* xskhdr->hdr has been memset to zero, so not need to clear again */ + + sg_init_table(sq->sg, n); + sg_set_buf(sq->sg, &xskhdr->hdr, vi->hdr_len); + + copied = 0; + for (i = 1; i < n; ++i) { + copy = min_t(int, desc->len - copied, PAGE_SIZE - offset); + + page = xsk_buff_raw_get_page(pool, addr + copied); + + sg_set_page(sq->sg + i, page, copy, offset); + copied += copy; + if (offset) + offset = 0; + } + + xskhdr->len = desc->len; + ptr = xdp_to_ptr(&xskhdr->type); + + err = virtqueue_add_outbuf(sq->vq, sq->sg, n, ptr, GFP_ATOMIC); + if (unlikely(err)) + sq->xsk.last_desc = *desc; + else + sq->xsk.hdr_con++; + + return err; +} + +static bool virtnet_xsk_dev_is_full(struct send_queue *sq) +{ + if (sq->vq->num_free < 2 + MAX_SKB_FRAGS) + return true; + + if (sq->xsk.hdr_con == sq->xsk.hdr_pro) + return true; + + return false; +} + +static int virtnet_xsk_xmit_zc(struct send_queue *sq, + struct xsk_buff_pool *pool, unsigned int budget) +{ + struct xdp_desc desc; + int err, packet = 0; + int ret = -EAGAIN; + + if (sq->xsk.last_desc.addr) { + err = virtnet_xsk_xmit(sq, pool, &sq->xsk.last_desc); + if (unlikely(err)) + return -EBUSY; + + ++packet; + sq->xsk.last_desc.addr = 0; + } + + while (budget-- > 0) { + if (virtnet_xsk_dev_is_full(sq)) { + ret = -EBUSY; + break; + } + + if (!xsk_tx_peek_desc(pool, &desc)) { + /* done */ + ret = 0; + break; + } + + err = virtnet_xsk_xmit(sq, pool, &desc); + if (unlikely(err)) { + ret = -EBUSY; + break; + } + + ++packet; + } + + if (packet) { + xsk_tx_release(pool); + + if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) { + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.kicks++; + u64_stats_update_end(&sq->stats.syncp); + } + } + + return ret; +} + +static int virtnet_xsk_run(struct send_queue *sq, + struct xsk_buff_pool *pool, int budget) +{ + int err, ret = 0; + unsigned int _packets = 0; + unsigned int _bytes = 0; + + sq->xsk.wait_slot = false; + + if (test_and_clear_bit(VIRTNET_STATE_XSK_TIMER, &sq->xsk.state)) + hrtimer_try_to_cancel(&sq->xsk.timer); + + __free_old_xmit_ptr(sq, true, false, &_packets, &_bytes); + + err = virtnet_xsk_xmit_zc(sq, pool, xsk_budget); + if (!err) { + struct xdp_desc desc; + + clear_bit(VIRTNET_STATE_XSK_WAKEUP, &sq->xsk.state); + xsk_set_tx_need_wakeup(pool); + + /* Race breaker. If new is coming after last xmit + * but before flag change + */ + + if (!xsk_tx_peek_desc(pool, &desc)) + goto end; + + set_bit(VIRTNET_STATE_XSK_WAKEUP, &sq->xsk.state); + xsk_clear_tx_need_wakeup(pool); + + sq->xsk.last_desc = desc; + ret = budget; + goto end; + } + + xsk_clear_tx_need_wakeup(pool); + + if (err == -EAGAIN) { + ret = budget; + goto end; + } + + /* -EBUSY: wait tx ring avali. + * by tx interrupt or rx interrupt or start_xmit or timer + */ + + __free_old_xmit_ptr(sq, true, false, &_packets, &_bytes); + + if (!virtnet_xsk_dev_is_full(sq)) { + ret = budget; + goto end; + } + + sq->xsk.wait_slot = true; + + if (xsk_check_timeout) { + hrtimer_start(&sq->xsk.timer, + ns_to_ktime(xsk_check_timeout * 1000), + HRTIMER_MODE_REL_PINNED); + + set_bit(VIRTNET_STATE_XSK_TIMER, &sq->xsk.state); + } + + virtnet_sq_stop_check(sq, true); + +end: + return ret; +} + +static int virtnet_xsk_wakeup(struct net_device *dev, u32 qid, u32 flag) +{ + struct virtnet_info *vi = netdev_priv(dev); + struct send_queue *sq; + struct xsk_buff_pool *pool; + struct netdev_queue *txq; + int work = 0; + + if (!netif_running(dev)) + return -ENETDOWN; + + if (qid >= vi->curr_queue_pairs) + return -EINVAL; + + sq = &vi->sq[qid]; + + rcu_read_lock(); + + pool = rcu_dereference(sq->xsk.pool); + if (!pool) + goto end; + + if (test_and_set_bit(VIRTNET_STATE_XSK_WAKEUP, &sq->xsk.state)) + goto end; + + txq = netdev_get_tx_queue(dev, qid); + + local_bh_disable(); + __netif_tx_lock(txq, raw_smp_processor_id()); + + work = virtnet_xsk_run(sq, pool, xsk_budget); + + __netif_tx_unlock(txq); + local_bh_enable(); + + if (work == xsk_budget) + virtqueue_napi_schedule(&sq->napi, sq->vq); + +end: + rcu_read_unlock(); + return 0; +} + static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, size_t len) { @@ -2624,6 +3048,7 @@ static int virtnet_set_features(struct net_device *dev, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, .ndo_bpf = virtnet_xdp, .ndo_xdp_xmit = virtnet_xdp_xmit, + .ndo_xsk_wakeup = virtnet_xsk_wakeup, .ndo_features_check = passthru_features_check, .ndo_get_phys_port_name = virtnet_get_phys_port_name, .ndo_set_features = virtnet_set_features, @@ -2722,6 +3147,7 @@ static void free_receive_page_frags(struct virtnet_info *vi) static void free_unused_bufs(struct virtnet_info *vi) { void *buf; + u32 n; int i; struct send_queue *sq; @@ -2740,6 +3166,11 @@ static void free_unused_bufs(struct virtnet_info *vi) xdp_return_frame(xtype_got_ptr(xtype)); } } + + n = sq->xsk.hdr_con + sq->xsk.hdr_n; + n -= sq->xsk.hdr_pro; + if (n) + virt_xsk_complete(sq, n, false); } for (i = 0; i < vi->max_queue_pairs; i++) { From patchwork Sat Jan 16 02:59:28 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xuan Zhuo X-Patchwork-Id: 365003 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-16.8 required=3.0 tests=BAYES_00, HEADER_FROM_DIFFERENT_DOMAINS,INCLUDES_CR_TRAILER,INCLUDES_PATCH, MAILING_LIST_MULTI, SPF_HELO_NONE, SPF_PASS, UNPARSEABLE_RELAY, USER_AGENT_GIT autolearn=unavailable autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id DE28CC433E6 for ; Sat, 16 Jan 2021 03:01:22 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by mail.kernel.org (Postfix) with ESMTP id A9E3C238E7 for ; Sat, 16 Jan 2021 03:01:22 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1729507AbhAPDBQ (ORCPT ); Fri, 15 Jan 2021 22:01:16 -0500 Received: from out30-45.freemail.mail.aliyun.com ([115.124.30.45]:50105 "EHLO out30-45.freemail.mail.aliyun.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1728644AbhAPDBQ (ORCPT ); Fri, 15 Jan 2021 22:01:16 -0500 X-Alimail-AntiSpam: AC=PASS; BC=-1|-1; BR=01201311R131e4; CH=green; DM=||false|; DS=||; FP=0|-1|-1|-1|0|-1|-1|-1; HT=e01e04394; MF=xuanzhuo@linux.alibaba.com; NM=1; PH=DS; RN=19; SR=0; TI=SMTPD_---0ULrQCaD_1610765971; Received: from localhost(mailfrom:xuanzhuo@linux.alibaba.com fp:SMTPD_---0ULrQCaD_1610765971) by smtp.aliyun-inc.com(127.0.0.1); Sat, 16 Jan 2021 10:59:31 +0800 From: Xuan Zhuo To: netdev@vger.kernel.org Cc: "Michael S. Tsirkin" , Jason Wang , "David S. Miller" , Jakub Kicinski , =?utf-8?b?QmrDtnJuIFTDtnBl?= =?utf-8?q?l?= , Magnus Karlsson , Jonathan Lemon , Alexei Starovoitov , Daniel Borkmann , Jesper Dangaard Brouer , John Fastabend , Andrii Nakryiko , Martin KaFai Lau , Song Liu , Yonghong Song , KP Singh , virtualization@lists.linux-foundation.org, bpf@vger.kernel.org Subject: [PATCH net-next v2 7/7] virtio-net, xsk: set xsk completed when packet sent done Date: Sat, 16 Jan 2021 10:59:28 +0800 Message-Id: <4949b7afe1420cfdedd890f77335fa9554f774cf.1610765285.git.xuanzhuo@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: References: In-Reply-To: References: Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org When recycling packets that have been sent, call xsk_tx_completed to inform xsk which packets have been sent. If necessary, start napi to process the packets in the xsk queue. Signed-off-by: Xuan Zhuo --- drivers/net/virtio_net.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index e552c2d..d0d620b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1442,6 +1442,42 @@ static int virtnet_receive(struct receive_queue *rq, int budget, return stats.packets; } +static void virt_xsk_complete(struct send_queue *sq, u32 num, bool xsk_wakeup) +{ + struct xsk_buff_pool *pool; + struct virtnet_xsk_hdr *hdr = NULL; + int n; + + rcu_read_lock(); + + sq->xsk.hdr_pro += num; + + pool = rcu_dereference(sq->xsk.pool); + if (!pool) { + if (sq->xsk.hdr_pro - sq->xsk.hdr_con == sq->xsk.hdr_n) + hdr = rcu_replace_pointer(sq->xsk.hdr, hdr, true); + + rcu_read_unlock(); + + kfree(hdr); + return; + } + + xsk_tx_completed(pool, num); + + rcu_read_unlock(); + + if (!xsk_wakeup || !sq->xsk.wait_slot) + return; + + n = sq->xsk.hdr_pro - sq->xsk.hdr_con; + + if (n > sq->xsk.hdr_n / 2) { + sq->xsk.wait_slot = false; + virtqueue_napi_schedule(&sq->napi, sq->vq); + } +} + static void __free_old_xmit_ptr(struct send_queue *sq, bool in_napi, bool xsk_wakeup, unsigned int *_packets, unsigned int *_bytes) @@ -1449,6 +1485,7 @@ static void __free_old_xmit_ptr(struct send_queue *sq, bool in_napi, unsigned int packets = 0; unsigned int bytes = 0; unsigned int len; + u64 xsknum = 0; struct virtnet_xdp_type *xtype; struct xdp_frame *frame; struct virtnet_xsk_hdr *xskhdr; @@ -1469,6 +1506,7 @@ static void __free_old_xmit_ptr(struct send_queue *sq, bool in_napi, if (xtype->type == XDP_TYPE_XSK) { xskhdr = (struct virtnet_xsk_hdr *)xtype; bytes += xskhdr->len; + xsknum += 1; } else { frame = xtype_get_ptr(xtype); xdp_return_frame(frame); @@ -1478,6 +1516,9 @@ static void __free_old_xmit_ptr(struct send_queue *sq, bool in_napi, packets++; } + if (xsknum) + virt_xsk_complete(sq, xsknum, xsk_wakeup); + *_packets = packets; *_bytes = bytes; } @@ -3044,10 +3085,13 @@ static void free_receive_page_frags(struct virtnet_info *vi) static void free_unused_bufs(struct virtnet_info *vi) { void *buf; + u32 n; int i; + struct send_queue *sq; for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; + sq = vi->sq + i; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { if (!is_xdp_frame(buf)) { dev_kfree_skb(buf); @@ -3060,6 +3104,11 @@ static void free_unused_bufs(struct virtnet_info *vi) xdp_return_frame(xtype_get_ptr(xtype)); } } + + n = sq->xsk.hdr_con + sq->xsk.hdr_n; + n -= sq->xsk.hdr_pro; + if (n) + virt_xsk_complete(sq, n, false); } for (i = 0; i < vi->max_queue_pairs; i++) {