diff mbox series

[v5,1/2] SQUASH: tcp/dcpp: Convert timewait timer into a delayed_work

Message ID 20240415113436.3261042-2-vschneid@redhat.com
State New
Headers show
Series tcp/dcpp: Un-pin tw_timer | expand

Commit Message

Valentin Schneider April 15, 2024, 11:34 a.m. UTC
!NOTE! this changes behaviour, as the tw_timer is TIMER_PINNED and delayed
works currently cannot behave as such. TIMER_PINNED timers are enqueued
onto the local base, whereas delayed_works have their timer enqueued on the
global base AND also check for HK_TYPE_TIMER isolation.

The split between this commit and the next is mainly there for ease of
reviewing. This commit should be squashed with the next one.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
---
 include/net/inet_timewait_sock.h                 |  2 +-
 net/ipv4/inet_diag.c                             |  2 +-
 net/ipv4/inet_timewait_sock.c                    | 16 +++++++++-------
 net/ipv4/tcp_ipv4.c                              |  2 +-
 net/ipv6/tcp_ipv6.c                              |  2 +-
 .../testing/selftests/bpf/progs/bpf_iter_tcp4.c  |  2 +-
 .../testing/selftests/bpf/progs/bpf_iter_tcp6.c  |  2 +-
 7 files changed, 15 insertions(+), 13 deletions(-)

Comments

kernel test robot April 26, 2024, 8:43 a.m. UTC | #1
Hello,

kernel test robot noticed "BUG:sleeping_function_called_from_invalid_context_at_kernel/workqueue.c" on:

commit: 1463958a05a90694cf63a6decf02983ef9a0b102 ("[PATCH v5 1/2] SQUASH: tcp/dcpp: Convert timewait timer into a delayed_work")
url: https://github.com/intel-lab-lkp/linux/commits/Valentin-Schneider/SQUASH-tcp-dcpp-Convert-timewait-timer-into-a-delayed_work/20240415-193744
base: https://git.kernel.org/cgit/linux/kernel/git/bpf/bpf-next.git master
patch link: https://lore.kernel.org/all/20240415113436.3261042-2-vschneid@redhat.com/
patch subject: [PATCH v5 1/2] SQUASH: tcp/dcpp: Convert timewait timer into a delayed_work

in testcase: ltp
version: ltp-x86_64-14c1f76-1_20240420
with following parameters:

	test: net_stress.appl-ssh



compiler: gcc-13
test machine: 8 threads 1 sockets Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz (Haswell) with 16G memory

(please refer to attached dmesg/kmsg for entire log/backtrace)



If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202404261608.7c346a06-oliver.sang@intel.com


[  157.135844][    C7] BUG: sleeping function called from invalid context at kernel/workqueue.c:2195
[  157.144790][    C7] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 3686, name: sshd
[  157.153204][    C7] preempt_count: 101, expected: 0
[  157.158116][    C7] RCU nest depth: 4, expected: 0
[  157.162939][    C7] CPU: 7 PID: 3686 Comm: sshd Tainted: G S                 6.9.0-rc1-00332-g1463958a05a9 #1
[  157.172905][    C7] Hardware name: Dell Inc. OptiPlex 9020/0DNKMN, BIOS A05 12/05/2013
[  157.180861][    C7] Call Trace:
[  157.184021][    C7]  <IRQ>
[ 157.186743][ C7] dump_stack_lvl (lib/dump_stack.c:117) 
[ 157.191131][ C7] __might_resched (kernel/sched/core.c:10198) 
[ 157.195780][ C7] ? __pfx___might_resched (kernel/sched/core.c:10152) 
[ 157.200951][ C7] __cancel_work_sync (include/linux/kernel.h:73 kernel/workqueue.c:2195 kernel/workqueue.c:4295) 
[ 157.205773][ C7] ? __pfx___cancel_work_sync (kernel/workqueue.c:4290) 
[ 157.211209][ C7] inet_twsk_deschedule_put (net/ipv4/inet_timewait_sock.c:222 (discriminator 1)) 
[ 157.216470][ C7] tcp_v4_rcv (net/ipv4/tcp_ipv4.c:2413) 
[ 157.220770][ C7] ? __pfx_tcp_v4_rcv (net/ipv4/tcp_ipv4.c:2163) 
[ 157.225501][ C7] ? __kernel_text_address (kernel/extable.c:79 (discriminator 1)) 
[ 157.230668][ C7] ? __pfx_raw_v4_input (net/ipv4/raw.c:165) 
[ 157.235574][ C7] ip_protocol_deliver_rcu (net/ipv4/ip_input.c:205 (discriminator 1)) 
[ 157.240830][ C7] ip_local_deliver_finish (include/linux/rcupdate.h:813 net/ipv4/ip_input.c:234) 
[ 157.246171][ C7] ? tcp_wfree (net/ipv4/tcp_output.c:1225) 
[ 157.250464][ C7] ip_local_deliver (include/linux/netfilter.h:314 include/linux/netfilter.h:308 net/ipv4/ip_input.c:254) 
[ 157.255190][ C7] ? __pfx_ip_local_deliver (net/ipv4/ip_input.c:243) 
[ 157.260442][ C7] ? ip_rcv_finish_core+0x1c0/0x10a0 
[ 157.266219][ C7] ip_rcv (include/net/dst.h:460 (discriminator 4) net/ipv4/ip_input.c:449 (discriminator 4) include/linux/netfilter.h:314 (discriminator 4) include/linux/netfilter.h:308 (discriminator 4) net/ipv4/ip_input.c:569 (discriminator 4)) 
[ 157.270078][ C7] ? __pfx_ip_rcv (net/ipv4/ip_input.c:562) 
[ 157.274459][ C7] ? do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) 
[ 157.279012][ C7] ? update_curr (kernel/sched/fair.c:1164 (discriminator 1)) 
[ 157.283395][ C7] ? update_load_avg (kernel/sched/fair.c:4411 kernel/sched/fair.c:4748) 
[ 157.288300][ C7] ? __pfx_ip_rcv (net/ipv4/ip_input.c:562) 
[ 157.292684][ C7] __netif_receive_skb_one_core (net/core/dev.c:5585 (discriminator 4)) 
[ 157.298462][ C7] ? __pfx___netif_receive_skb_one_core (net/core/dev.c:5578) 
[ 157.304760][ C7] ? _raw_spin_lock_irq (arch/x86/include/asm/atomic.h:115 (discriminator 4) include/linux/atomic/atomic-arch-fallback.h:2170 (discriminator 4) include/linux/atomic/atomic-instrumented.h:1302 (discriminator 4) include/asm-generic/qspinlock.h:111 (discriminator 4) include/linux/spinlock.h:187 (discriminator 4) include/linux/spinlock_api_smp.h:120 (discriminator 4) kernel/locking/spinlock.c:170 (discriminator 4)) 
[ 157.309663][ C7] process_backlog (include/linux/rcupdate.h:813 net/core/dev.c:6029) 
[ 157.314305][ C7] ? __pfx_trigger_load_balance (kernel/sched/fair.c:12435) 
[ 157.319904][ C7] __napi_poll (net/core/dev.c:6679) 
[ 157.324110][ C7] ? update_process_times (kernel/time/timer.c:2480) 
[ 157.329276][ C7] net_rx_action (net/core/dev.c:6750 net/core/dev.c:6864) 
[ 157.333748][ C7] ? __pfx_net_rx_action (net/core/dev.c:6828) 
[ 157.338734][ C7] ? __pfx_sched_clock_cpu (kernel/sched/clock.c:389) 
[ 157.343902][ C7] ? __pfx_sched_clock_cpu (kernel/sched/clock.c:389) 
[ 157.349070][ C7] __do_softirq (arch/x86/include/asm/jump_label.h:27 include/linux/jump_label.h:207 include/trace/events/irq.h:142 kernel/softirq.c:555) 
[ 157.353453][ C7] ? __pfx___do_softirq (kernel/softirq.c:512) 
[ 157.358361][ C7] ? irqtime_account_irq (kernel/sched/cputime.c:64 (discriminator 1)) 
[ 157.363449][ C7] do_softirq (kernel/softirq.c:455 (discriminator 32) kernel/softirq.c:442 (discriminator 32)) 
[  157.367488][    C7]  </IRQ>
[  157.370296][    C7]  <TASK>
[ 157.373107][ C7] __local_bh_enable_ip (kernel/softirq.c:382) 
[ 157.378015][ C7] __dev_queue_xmit (net/core/dev.c:4389) 
[ 157.382836][ C7] ? unwind_next_frame (arch/x86/kernel/unwind_orc.c:406 (discriminator 1) arch/x86/kernel/unwind_orc.c:648 (discriminator 1)) 
[ 157.388007][ C7] ? __pfx___dev_queue_xmit (net/core/dev.c:4270) 
[ 157.393263][ C7] ? kernel_text_address (kernel/extable.c:119 (discriminator 1) kernel/extable.c:94 (discriminator 1)) 
[ 157.398348][ C7] ? __kernel_text_address (kernel/extable.c:79 (discriminator 1)) 
[ 157.403516][ C7] ? unwind_get_return_address (arch/x86/kernel/unwind_orc.c:369 (discriminator 1)) 
[ 157.409035][ C7] ? arch_stack_walk (arch/x86/kernel/stacktrace.c:26) 
[ 157.413769][ C7] ? eth_header (net/ethernet/eth.c:100) 
[ 157.418065][ C7] ? neigh_resolve_output (include/linux/netdevice.h:3145 net/core/neighbour.c:1558 net/core/neighbour.c:1543) 
[ 157.423324][ C7] ip_finish_output2 (include/net/neighbour.h:542 (discriminator 2) net/ipv4/ip_output.c:235 (discriminator 2)) 
[ 157.428226][ C7] ? arch_stack_walk (arch/x86/kernel/stacktrace.c:24 (discriminator 1)) 
[ 157.432953][ C7] ? __pfx_ip_skb_dst_mtu (include/net/ip.h:496) 
[ 157.438041][ C7] ? __pfx_ip_finish_output2 (net/ipv4/ip_output.c:199) 
[ 157.443394][ C7] ? __ip_finish_output (include/linux/skbuff.h:1636 include/linux/skbuff.h:4958 net/ipv4/ip_output.c:307 net/ipv4/ip_output.c:295) 
[ 157.448381][ C7] ip_output (net/ipv4/ip_output.c:427) 
[ 157.452495][ C7] ? __pfx_ip_output (net/ipv4/ip_output.c:427) 
[ 157.457137][ C7] __ip_queue_xmit (net/ipv4/ip_output.c:535) 
[ 157.461862][ C7] ? __copy_skb_header (include/net/dst.h:290 net/core/skbuff.c:1528) 
[ 157.466765][ C7] ? __skb_clone (arch/x86/include/asm/atomic.h:53 include/linux/atomic/atomic-arch-fallback.h:992 include/linux/atomic/atomic-instrumented.h:436 net/core/skbuff.c:1599) 
[ 157.471229][ C7] __tcp_transmit_skb (net/ipv4/tcp_output.c:1462 (discriminator 4)) 
[ 157.476317][ C7] ? __pfx___tcp_transmit_skb (net/ipv4/tcp_output.c:1283) 
[ 157.481740][ C7] tcp_write_xmit (net/ipv4/tcp_output.c:2792 (discriminator 2)) 
[ 157.486379][ C7] ? skb_do_copy_data_nocache (include/linux/uio.h:204 include/linux/uio.h:211 include/net/sock.h:2238) 
[ 157.491977][ C7] ? __pfx_skb_do_copy_data_nocache (include/net/sock.h:2229) 
[ 157.497923][ C7] ? skb_page_frag_refill (arch/x86/include/asm/atomic.h:23 include/linux/atomic/atomic-arch-fallback.h:457 include/linux/atomic/atomic-instrumented.h:33 include/linux/page_ref.h:67 net/core/sock.c:2908) 
[ 157.503083][ C7] __tcp_push_pending_frames (net/ipv4/tcp_output.c:2977 (discriminator 2)) 
[ 157.508505][ C7] tcp_sendmsg_locked (net/ipv4/tcp.c:1310) 
[ 157.513578][ C7] ? __pfx_chacha_block_generic (lib/crypto/chacha.c:77) 
[ 157.519173][ C7] ? __pfx_tcp_sendmsg_locked (net/ipv4/tcp.c:1040) 
[ 157.524594][ C7] ? _raw_spin_lock_bh (arch/x86/include/asm/atomic.h:115 (discriminator 4) include/linux/atomic/atomic-arch-fallback.h:2170 (discriminator 4) include/linux/atomic/atomic-instrumented.h:1302 (discriminator 4) include/asm-generic/qspinlock.h:111 (discriminator 4) include/linux/spinlock.h:187 (discriminator 4) include/linux/spinlock_api_smp.h:127 (discriminator 4) kernel/locking/spinlock.c:178 (discriminator 4)) 
[ 157.529405][ C7] ? __pfx__raw_spin_lock_bh (kernel/locking/spinlock.c:177) 
[ 157.534736][ C7] tcp_sendmsg (net/ipv4/tcp.c:1343) 
[ 157.538850][ C7] sock_write_iter (net/socket.c:730 (discriminator 1) net/socket.c:745 (discriminator 1) net/socket.c:1160 (discriminator 1)) 
[ 157.543486][ C7] ? __pfx_sock_write_iter (net/socket.c:1144) 
[ 157.548644][ C7] ? rw_verify_area (fs/read_write.c:377) 
[ 157.553279][ C7] vfs_write (include/linux/fs.h:2108 fs/read_write.c:497 fs/read_write.c:590) 
[ 157.557391][ C7] ? __pfx_vfs_write (fs/read_write.c:571) 
[ 157.562026][ C7] ? __pfx___might_resched (kernel/sched/core.c:10152) 
[ 157.567189][ C7] ksys_write (fs/read_write.c:643) 
[ 157.571389][ C7] ? __pfx_ksys_write (fs/read_write.c:633) 
[ 157.576112][ C7] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) 
[ 157.580486][ C7] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:129) 
[  157.586254][    C7] RIP: 0033:0x7f568f9de240
[ 157.590542][ C7] Code: 40 00 48 8b 15 c1 9b 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 80 3d a1 23 0e 00 00 74 17 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 48 89
All code
========
   0:	40 00 48 8b          	add    %cl,-0x75(%rax)
   4:	15 c1 9b 0d 00       	adc    $0xd9bc1,%eax
   9:	f7 d8                	neg    %eax
   b:	64 89 02             	mov    %eax,%fs:(%rdx)
   e:	48 c7 c0 ff ff ff ff 	mov    $0xffffffffffffffff,%rax
  15:	eb b7                	jmp    0xffffffffffffffce
  17:	0f 1f 00             	nopl   (%rax)
  1a:	80 3d a1 23 0e 00 00 	cmpb   $0x0,0xe23a1(%rip)        # 0xe23c2
  21:	74 17                	je     0x3a
  23:	b8 01 00 00 00       	mov    $0x1,%eax
  28:	0f 05                	syscall 
  2a:*	48 3d 00 f0 ff ff    	cmp    $0xfffffffffffff000,%rax		<-- trapping instruction
  30:	77 58                	ja     0x8a
  32:	c3                   	retq   
  33:	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)
  3a:	48 83 ec 28          	sub    $0x28,%rsp
  3e:	48                   	rex.W
  3f:	89                   	.byte 0x89

Code starting with the faulting instruction
===========================================
   0:	48 3d 00 f0 ff ff    	cmp    $0xfffffffffffff000,%rax
   6:	77 58                	ja     0x60
   8:	c3                   	retq   
   9:	0f 1f 80 00 00 00 00 	nopl   0x0(%rax)
  10:	48 83 ec 28          	sub    $0x28,%rsp
  14:	48                   	rex.W
  15:	89                   	.byte 0x89


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20240426/202404261608.7c346a06-oliver.sang@intel.com
diff mbox series

Patch

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index f28da08a37b4e..c4d64f1f8d415 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -72,7 +72,7 @@  struct inet_timewait_sock {
 				tw_tos		: 8;
 	u32			tw_txhash;
 	u32			tw_priority;
-	struct timer_list	tw_timer;
+	struct delayed_work     tw_expiry_work;
 	struct inet_bind_bucket	*tw_tb;
 	struct inet_bind2_bucket	*tw_tb2;
 };
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 7adace541fe29..ab11b688f1eeb 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -444,7 +444,7 @@  static int inet_twsk_diag_fill(struct sock *sk,
 
 	r->idiag_state	      = tw->tw_substate;
 	r->idiag_timer	      = 3;
-	tmo = tw->tw_timer.expires - jiffies;
+	tmo = tw->tw_expiry_work.timer.expires - jiffies;
 	r->idiag_expires      = jiffies_delta_to_msecs(tmo);
 	r->idiag_rqueue	      = 0;
 	r->idiag_wqueue	      = 0;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index e8de45d34d56a..7a2dcbaa1a61e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -150,11 +150,13 @@  void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(inet_twsk_hashdance);
 
-static void tw_timer_handler(struct timer_list *t)
+static void tw_expiry_workfn(struct work_struct *work)
 {
-	struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer);
-
+	struct inet_timewait_sock *tw = container_of(
+		work, struct inet_timewait_sock, tw_expiry_work.work);
+	local_bh_disable();
 	inet_twsk_kill(tw);
+	local_bh_enable();
 }
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
@@ -192,7 +194,7 @@  struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
 		tw->tw_prot	    = sk->sk_prot_creator;
 		atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
 		twsk_net_set(tw, sock_net(sk));
-		timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED);
+		INIT_DELAYED_WORK(&tw->tw_expiry_work, tw_expiry_workfn);
 		/*
 		 * Because we use RCU lookups, we should not set tw_refcnt
 		 * to a non null value before everything is setup for this
@@ -217,7 +219,7 @@  EXPORT_SYMBOL_GPL(inet_twsk_alloc);
  */
 void inet_twsk_deschedule_put(struct inet_timewait_sock *tw)
 {
-	if (del_timer_sync(&tw->tw_timer))
+	if (cancel_delayed_work_sync(&tw->tw_expiry_work))
 		inet_twsk_kill(tw);
 	inet_twsk_put(tw);
 }
@@ -255,10 +257,10 @@  void __inet_twsk_schedule(struct inet_timewait_sock *tw, int timeo, bool rearm)
 
 		__NET_INC_STATS(twsk_net(tw), kill ? LINUX_MIB_TIMEWAITKILLED :
 						     LINUX_MIB_TIMEWAITED);
-		BUG_ON(mod_timer(&tw->tw_timer, jiffies + timeo));
+		BUG_ON(!queue_delayed_work(system_unbound_wq, &tw->tw_expiry_work, timeo));
 		refcount_inc(&tw->tw_dr->tw_refcount);
 	} else {
-		mod_timer_pending(&tw->tw_timer, jiffies + timeo);
+		mod_delayed_work(system_unbound_wq, &tw->tw_expiry_work, timeo);
 	}
 }
 EXPORT_SYMBOL_GPL(__inet_twsk_schedule);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a22ee58387518..4b106f017a81f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2920,7 +2920,7 @@  static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
 			       struct seq_file *f, int i)
 {
-	long delta = tw->tw_timer.expires - jiffies;
+	long delta = tw->tw_expiry_work.timer.expires - jiffies;
 	__be32 dest, src;
 	__u16 destp, srcp;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3f4cba49e9ee6..58cd12fdc91a5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2242,7 +2242,7 @@  static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
 static void get_timewait6_sock(struct seq_file *seq,
 			       struct inet_timewait_sock *tw, int i)
 {
-	long delta = tw->tw_timer.expires - jiffies;
+	long delta = tw->tw_expiry_work.timer.expires - jiffies;
 	const struct in6_addr *dest, *src;
 	__u16 destp, srcp;
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index 92267abb462fc..a429f4eb9939c 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -152,7 +152,7 @@  static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
 	__be32 dest, src;
 	long delta;
 
-	delta = tw->tw_timer.expires - bpf_jiffies64();
+	delta = tw->tw_expiry_work.timer.expires - bpf_jiffies64();
 	dest = tw->tw_daddr;
 	src  = tw->tw_rcv_saddr;
 	destp = bpf_ntohs(tw->tw_dport);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index 943f7bba180e7..795bb34c95f72 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -157,7 +157,7 @@  static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
 	__u16 destp, srcp;
 	long delta;
 
-	delta = tw->tw_timer.expires - bpf_jiffies64();
+	delta = tw->tw_expiry_work.timer.expires - bpf_jiffies64();
 	dest = &tw->tw_v6_daddr;
 	src  = &tw->tw_v6_rcv_saddr;
 	destp = bpf_ntohs(tw->tw_dport);