diff mbox series

[1/5] sched,signal,ptrace: Rework TASK_TRACED, TASK_STOPPED state

Message ID 20220412114853.782838521@infradead.org
State New
Headers show
Series ptrace-vs-PREEMPT_RT and freezer rewrite | expand

Commit Message

Peter Zijlstra April 12, 2022, 11:44 a.m. UTC
Currently ptrace_stop() / do_signal_stop() rely on the special states
TASK_TRACED and TASK_STOPPED resp. to keep unique state. That is, this
state exists only in task->__state and nowhere else.

There's two spots of bother with this:

 - PREEMPT_RT has task->saved_state which complicates matters,
   meaning task_is_{traced,stopped}() needs to check an additional
   variable.

 - An alternative freezer implementation that itself relies on a
   special TASK state would loose TASK_TRACED/TASK_STOPPED and will
   result in misbehaviour.

As such, add additional state to task->jobctl to track this state
outside of task->__state.

NOTE: this doesn't actually fix anything yet, just adds extra state.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 include/linux/sched.h        |    8 +++-----
 include/linux/sched/jobctl.h |    8 ++++++++
 include/linux/sched/signal.h |   15 ++++++++++++++-
 kernel/ptrace.c              |   18 ++++++++++++++----
 kernel/signal.c              |    9 ++++++---
 5 files changed, 45 insertions(+), 13 deletions(-)

Comments

Oleg Nesterov April 13, 2022, 1:29 p.m. UTC | #1
On 04/12, Peter Zijlstra wrote:
>
> @@ -475,8 +483,10 @@ static int ptrace_attach(struct task_str
>  	 * in and out of STOPPED are protected by siglock.
>  	 */
>  	if (task_is_stopped(task) &&
> -	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
> +	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) {
> +		task->jobctl &= ~JOBCTL_STOPPED;
>  		signal_wake_up_state(task, __TASK_STOPPED);

OK, but just for record before I forget...

It seems that we can s/JOBCTL_STOPPED/JOBCTL_TRACED/ instead, and kill the
nasty wait_on_bit(JOBCTL_TRAPPING_BIT) along with JOBCTL_TRAPPING_BIT. Sure,
this doesn't belong to this series.

Oleg.
Peter Zijlstra April 13, 2022, 4:47 p.m. UTC | #2
On Wed, Apr 13, 2022 at 03:29:22PM +0200, Oleg Nesterov wrote:
> On 04/12, Peter Zijlstra wrote:
> >
> > @@ -475,8 +483,10 @@ static int ptrace_attach(struct task_str
> >  	 * in and out of STOPPED are protected by siglock.
> >  	 */
> >  	if (task_is_stopped(task) &&
> > -	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
> > +	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) {
> > +		task->jobctl &= ~JOBCTL_STOPPED;
> >  		signal_wake_up_state(task, __TASK_STOPPED);
> 
> OK, but just for record before I forget...
> 
> It seems that we can s/JOBCTL_STOPPED/JOBCTL_TRACED/ instead, and kill the
> nasty wait_on_bit(JOBCTL_TRAPPING_BIT) along with JOBCTL_TRAPPING_BIT. Sure,
> this doesn't belong to this series.

I'm afraid I didn't look hard enough at that part to really understand
it, but some cleanup around there sounds lovely.
diff mbox series

Patch

--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -118,11 +118,9 @@  struct task_group;
 
 #define task_is_running(task)		(READ_ONCE((task)->__state) == TASK_RUNNING)
 
-#define task_is_traced(task)		((READ_ONCE(task->__state) & __TASK_TRACED) != 0)
-
-#define task_is_stopped(task)		((READ_ONCE(task->__state) & __TASK_STOPPED) != 0)
-
-#define task_is_stopped_or_traced(task)	((READ_ONCE(task->__state) & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+#define task_is_traced(task)		((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0)
+#define task_is_stopped(task)		((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0)
+#define task_is_stopped_or_traced(task)	((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0)
 
 /*
  * Special states are those that do not use the normal wait-loop pattern. See
--- a/include/linux/sched/jobctl.h
+++ b/include/linux/sched/jobctl.h
@@ -20,6 +20,10 @@  struct task_struct;
 #define JOBCTL_LISTENING_BIT	22	/* ptracer is listening for events */
 #define JOBCTL_TRAP_FREEZE_BIT	23	/* trap for cgroup freezer */
 
+#define JOBCTL_STOPPED_BIT	24
+#define JOBCTL_TRACED_BIT	25
+#define JOBCTL_TRACED_FROZEN_BIT 26
+
 #define JOBCTL_STOP_DEQUEUED	(1UL << JOBCTL_STOP_DEQUEUED_BIT)
 #define JOBCTL_STOP_PENDING	(1UL << JOBCTL_STOP_PENDING_BIT)
 #define JOBCTL_STOP_CONSUME	(1UL << JOBCTL_STOP_CONSUME_BIT)
@@ -29,6 +33,10 @@  struct task_struct;
 #define JOBCTL_LISTENING	(1UL << JOBCTL_LISTENING_BIT)
 #define JOBCTL_TRAP_FREEZE	(1UL << JOBCTL_TRAP_FREEZE_BIT)
 
+#define JOBCTL_STOPPED		(1UL << JOBCTL_STOPPED_BIT)
+#define JOBCTL_TRACED		(1UL << JOBCTL_TRACED_BIT)
+#define JOBCTL_TRACED_FROZEN	(1UL << JOBCTL_TRACED_FROZEN_BIT)
+
 #define JOBCTL_TRAP_MASK	(JOBCTL_TRAP_STOP | JOBCTL_TRAP_NOTIFY)
 #define JOBCTL_PENDING_MASK	(JOBCTL_STOP_PENDING | JOBCTL_TRAP_MASK)
 
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -294,8 +294,10 @@  static inline int kernel_dequeue_signal(
 static inline void kernel_signal_stop(void)
 {
 	spin_lock_irq(&current->sighand->siglock);
-	if (current->jobctl & JOBCTL_STOP_DEQUEUED)
+	if (current->jobctl & JOBCTL_STOP_DEQUEUED) {
+		current->jobctl |= JOBCTL_STOPPED;
 		set_special_state(TASK_STOPPED);
+	}
 	spin_unlock_irq(&current->sighand->siglock);
 
 	schedule();
@@ -437,10 +439,21 @@  extern void signal_wake_up_state(struct
 
 static inline void signal_wake_up(struct task_struct *t, bool resume)
 {
+	lockdep_assert_held(&t->sighand->siglock);
+
+	if (resume && !(t->jobctl & JOBCTL_TRACED_FROZEN))
+		t->jobctl &= ~(JOBCTL_STOPPED | JOBCTL_TRACED);
+
 	signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
 }
+
 static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
 {
+	lockdep_assert_held(&t->sighand->siglock);
+
+	if (resume)
+		t->jobctl &= ~JOBCTL_TRACED;
+
 	signal_wake_up_state(t, resume ? __TASK_TRACED : 0);
 }
 
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -185,7 +185,12 @@  static bool looks_like_a_spurious_pid(st
 	return true;
 }
 
-/* Ensure that nothing can wake it up, even SIGKILL */
+/*
+ * Ensure that nothing can wake it up, even SIGKILL
+ *
+ * A task is switched to this state while a ptrace operation is in progress;
+ * such that the ptrace operation is uninterruptible.
+ */
 static bool ptrace_freeze_traced(struct task_struct *task)
 {
 	bool ret = false;
@@ -197,6 +202,7 @@  static bool ptrace_freeze_traced(struct
 	spin_lock_irq(&task->sighand->siglock);
 	if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
 	    !__fatal_signal_pending(task)) {
+		task->jobctl |= JOBCTL_TRACED_FROZEN;
 		WRITE_ONCE(task->__state, __TASK_TRACED);
 		ret = true;
 	}
@@ -218,9 +224,11 @@  static void ptrace_unfreeze_traced(struc
 	 */
 	spin_lock_irq(&task->sighand->siglock);
 	if (READ_ONCE(task->__state) == __TASK_TRACED) {
-		if (__fatal_signal_pending(task))
+		task->jobctl &= ~JOBCTL_TRACED_FROZEN;
+		if (__fatal_signal_pending(task)) {
+			task->jobctl &= ~JOBCTL_TRACED;
 			wake_up_state(task, __TASK_TRACED);
-		else
+		} else
 			WRITE_ONCE(task->__state, TASK_TRACED);
 	}
 	spin_unlock_irq(&task->sighand->siglock);
@@ -475,8 +483,10 @@  static int ptrace_attach(struct task_str
 	 * in and out of STOPPED are protected by siglock.
 	 */
 	if (task_is_stopped(task) &&
-	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING))
+	    task_set_jobctl_pending(task, JOBCTL_TRAP_STOP | JOBCTL_TRAPPING)) {
+		task->jobctl &= ~JOBCTL_STOPPED;
 		signal_wake_up_state(task, __TASK_STOPPED);
+	}
 
 	spin_unlock(&task->sighand->siglock);
 
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -884,7 +884,7 @@  static int check_kill_permission(int sig
 static void ptrace_trap_notify(struct task_struct *t)
 {
 	WARN_ON_ONCE(!(t->ptrace & PT_SEIZED));
-	assert_spin_locked(&t->sighand->siglock);
+	lockdep_assert_held(&t->sighand->siglock);
 
 	task_set_jobctl_pending(t, JOBCTL_TRAP_NOTIFY);
 	ptrace_signal_wake_up(t, t->jobctl & JOBCTL_LISTENING);
@@ -930,9 +930,10 @@  static bool prepare_signal(int sig, stru
 		for_each_thread(p, t) {
 			flush_sigqueue_mask(&flush, &t->pending);
 			task_clear_jobctl_pending(t, JOBCTL_STOP_PENDING);
-			if (likely(!(t->ptrace & PT_SEIZED)))
+			if (likely(!(t->ptrace & PT_SEIZED))) {
+				t->jobctl &= ~JOBCTL_STOPPED;
 				wake_up_state(t, __TASK_STOPPED);
-			else
+			} else
 				ptrace_trap_notify(t);
 		}
 
@@ -2219,6 +2220,7 @@  static int ptrace_stop(int exit_code, in
 	 * schedule() will not sleep if there is a pending signal that
 	 * can awaken the task.
 	 */
+	current->jobctl |= JOBCTL_TRACED;
 	set_special_state(TASK_TRACED);
 
 	/*
@@ -2460,6 +2462,7 @@  static bool do_signal_stop(int signr)
 		if (task_participate_group_stop(current))
 			notify = CLD_STOPPED;
 
+		current->jobctl |= JOBCTL_STOPPED;
 		set_special_state(TASK_STOPPED);
 		spin_unlock_irq(&current->sighand->siglock);