diff mbox series

[net-next] net/packet: fix TPACKET_V3 performance issue in case of TSO

Message ID 20200325140845.11840-1-yang_y_yi@163.com
State New
Headers show
Series [net-next] net/packet: fix TPACKET_V3 performance issue in case of TSO | expand

Commit Message

yang_y_yi@163.com March 25, 2020, 2:08 p.m. UTC
From: Yi Yang <yangyi01@inspur.com>

TPACKET_V3 performance is very very bad in case of TSO, it is even
worse than non-TSO case. For Linux kernels which set CONFIG_HZ to
1000, req.tp_retire_blk_tov = 1 can help improve it a bit, but some
Linux distributions set CONFIG_HZ to 250, so req.tp_retire_blk_tov = 1
actually means req.tp_retire_blk_tov = 4, it won't have any help.

This fix patch can fix the aforementioned performance issue, it can
boost the performance from 3.05Gbps to 16.9Gbps, a very huge
improvement. It will retire current block as early as possible in
case of TSO in order that userspace application can consume it
in time.

Signed-off-by: Yi Yang <yangyi01@inspur.com>
---
 net/packet/af_packet.c | 42 ++++++++++++++++++++++++++++++++----------
 1 file changed, 32 insertions(+), 10 deletions(-)
diff mbox series

Patch

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index e5b0986..cbe9052 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1005,7 +1005,8 @@  static void prb_fill_curr_block(char *curr,
 /* Assumes caller has the sk->rx_queue.lock */
 static void *__packet_lookup_frame_in_block(struct packet_sock *po,
 					    struct sk_buff *skb,
-					    unsigned int len
+					    unsigned int len,
+					    bool retire_cur_block
 					    )
 {
 	struct tpacket_kbdq_core *pkc;
@@ -1041,7 +1042,8 @@  static void *__packet_lookup_frame_in_block(struct packet_sock *po,
 	end = (char *)pbd + pkc->kblk_size;
 
 	/* first try the current block */
-	if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
+	if (BLOCK_NUM_PKTS(pbd) == 0 ||
+	    (!retire_cur_block && curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end)) {
 		prb_fill_curr_block(curr, pkc, pbd, len);
 		return (void *)curr;
 	}
@@ -1066,7 +1068,8 @@  static void *__packet_lookup_frame_in_block(struct packet_sock *po,
 
 static void *packet_current_rx_frame(struct packet_sock *po,
 					    struct sk_buff *skb,
-					    int status, unsigned int len)
+					    int status, unsigned int len,
+					    bool retire_cur_block)
 {
 	char *curr = NULL;
 	switch (po->tp_version) {
@@ -1076,7 +1079,8 @@  static void *packet_current_rx_frame(struct packet_sock *po,
 					po->rx_ring.head, status);
 		return curr;
 	case TPACKET_V3:
-		return __packet_lookup_frame_in_block(po, skb, len);
+		return __packet_lookup_frame_in_block(po, skb, len,
+						      retire_cur_block);
 	default:
 		WARN(1, "TPACKET version not supported\n");
 		BUG();
@@ -2174,6 +2178,9 @@  static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	__u32 ts_status;
 	bool is_drop_n_account = false;
 	bool do_vnet = false;
+	struct virtio_net_hdr vnet_hdr;
+	int vnet_hdr_ok = 0;
+	bool retire_cur_block = false;
 
 	/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
 	 * We may add members to them until current aligned size without forcing
@@ -2269,17 +2276,32 @@  static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 			do_vnet = false;
 		}
 	}
+
+	if (do_vnet) {
+		vnet_hdr_ok = virtio_net_hdr_from_skb(skb, &vnet_hdr,
+						      vio_le(), true, 0);
+		/* Improve performance by retiring current block for
+		 * TPACKET_V3 in case of TSO.
+		 */
+		if (vnet_hdr_ok == 0) {
+			retire_cur_block = true;
+		}
+	}
+
 	spin_lock(&sk->sk_receive_queue.lock);
 	h.raw = packet_current_rx_frame(po, skb,
-					TP_STATUS_KERNEL, (macoff+snaplen));
+					TP_STATUS_KERNEL, (macoff+snaplen),
+					retire_cur_block);
 	if (!h.raw)
 		goto drop_n_account;
 
-	if (do_vnet &&
-	    virtio_net_hdr_from_skb(skb, h.raw + macoff -
-				    sizeof(struct virtio_net_hdr),
-				    vio_le(), true, 0))
-		goto drop_n_account;
+	if (do_vnet) {
+		if (vnet_hdr_ok != 0)
+			goto drop_n_account;
+		else
+			memcpy(h.raw + macoff - sizeof(struct virtio_net_hdr),
+			       &vnet_hdr, sizeof(vnet_hdr));
+	}
 
 	if (po->tp_version <= TPACKET_V2) {
 		packet_increment_rx_head(po, &po->rx_ring);