diff mbox series

[bpf-next,2/6] bpf, sockmap: On receive programs try to fast track SK_PASS ingress

Message ID 160221864872.12042.14533177764605980614.stgit@john-Precision-5820-Tower
State Superseded
Headers show
Series sockmap/sk_skb program memory acct fixes | expand

Commit Message

John Fastabend Oct. 9, 2020, 4:44 a.m. UTC
When we receive an skb and the ingress skb verdict program returns
SK_PASS we currently set the ingress flag and put it on the workqueue
so it can be turned into a sk_msg and put on the sk_msg ingress queue.
Then finally telling userspace with data_ready hook.

Here we observe that if the workqueue is empty then we can try to
convert into a sk_msg type and call data_ready directly without
bouncing through a workqueue. Its a common pattern to have a recv
verdict program for visibility that always returns SK_PASS. In this
case unless there is an ENOMEM error or we overrun the socket we
can avoid the workqueue completely only using it when we fall back
to error cases caused by memory pressure.

By doing this we eliminate another case where data may be dropped
if errors occur on memory limits in workqueue.

Fixes: 51199405f9672 ("bpf: skb_verdict, support SK_PASS on RX BPF path")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 net/core/skmsg.c |   17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

Comments

kernel test robot Oct. 9, 2020, 7:28 a.m. UTC | #1
Hi John,

I love your patch! Perhaps something to improve:

[auto build test WARNING on bpf-next/master]

url:    https://github.com/0day-ci/linux/commits/John-Fastabend/sockmap-sk_skb-program-memory-acct-fixes/20201009-124625
base:   https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master
config: x86_64-randconfig-a003-20201009 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 4cfc4025cc1433ca5ef1c526053fc9c4bfe64109)
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # install x86_64 cross compiling tool for clang build
        # apt-get install binutils-x86-64-linux-gnu
        # https://github.com/0day-ci/linux/commit/34f009aa63482e7bd76b64e4e3c698894e48ee00
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review John-Fastabend/sockmap-sk_skb-program-memory-acct-fixes/20201009-124625
        git checkout 34f009aa63482e7bd76b64e4e3c698894e48ee00
        # save the attached .config to linux build tree
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

>> net/core/skmsg.c:795:7: warning: variable 'err' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
                   if (skb_queue_empty(&psock->ingress_skb)) {
                       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   net/core/skmsg.c:798:7: note: uninitialized use occurs here
                   if (err < 0) {
                       ^~~
   net/core/skmsg.c:795:3: note: remove the 'if' if its condition is always true
                   if (skb_queue_empty(&psock->ingress_skb)) {
                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   net/core/skmsg.c:776:9: note: initialize the variable 'err' to silence this warning
           int err;
                  ^
                   = 0
   1 warning generated.

vim +795 net/core/skmsg.c

   770	
   771	static void sk_psock_verdict_apply(struct sk_psock *psock,
   772					   struct sk_buff *skb, int verdict)
   773	{
   774		struct tcp_skb_cb *tcp;
   775		struct sock *sk_other;
   776		int err;
   777	
   778		switch (verdict) {
   779		case __SK_PASS:
   780			sk_other = psock->sk;
   781			if (sock_flag(sk_other, SOCK_DEAD) ||
   782			    !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
   783				goto out_free;
   784			}
   785	
   786			tcp = TCP_SKB_CB(skb);
   787			tcp->bpf.flags |= BPF_F_INGRESS;
   788	
   789			/* If the queue is empty then we can submit directly
   790			 * into the msg queue. If its not empty we have to
   791			 * queue work otherwise we may get OOO data. Otherwise,
   792			 * if sk_psock_skb_ingress errors will be handled by
   793			 * retrying later from workqueue.
   794			 */
 > 795			if (skb_queue_empty(&psock->ingress_skb)) {
   796				err = sk_psock_skb_ingress(psock, skb);
   797			}
   798			if (err < 0) {
   799				skb_queue_tail(&psock->ingress_skb, skb);
   800				schedule_work(&psock->work);
   801			}
   802			break;
   803		case __SK_REDIRECT:
   804			sk_psock_skb_redirect(skb);
   805			break;
   806		case __SK_DROP:
   807		default:
   808	out_free:
   809			kfree_skb(skb);
   810		}
   811	}
   812	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 040ae1d75b65..dabd25313a70 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -773,6 +773,7 @@  static void sk_psock_verdict_apply(struct sk_psock *psock,
 {
 	struct tcp_skb_cb *tcp;
 	struct sock *sk_other;
+	int err;
 
 	switch (verdict) {
 	case __SK_PASS:
@@ -784,8 +785,20 @@  static void sk_psock_verdict_apply(struct sk_psock *psock,
 
 		tcp = TCP_SKB_CB(skb);
 		tcp->bpf.flags |= BPF_F_INGRESS;
-		skb_queue_tail(&psock->ingress_skb, skb);
-		schedule_work(&psock->work);
+
+		/* If the queue is empty then we can submit directly
+		 * into the msg queue. If its not empty we have to
+		 * queue work otherwise we may get OOO data. Otherwise,
+		 * if sk_psock_skb_ingress errors will be handled by
+		 * retrying later from workqueue.
+		 */
+		if (skb_queue_empty(&psock->ingress_skb)) {
+			err = sk_psock_skb_ingress(psock, skb);
+		}
+		if (err < 0) {
+			skb_queue_tail(&psock->ingress_skb, skb);
+			schedule_work(&psock->work);
+		}
 		break;
 	case __SK_REDIRECT:
 		sk_psock_skb_redirect(skb);