locking/spinlock/debug: snapshot lock fields

Message ID 20171023122910.28376-1-mark.rutland@arm.com
State New
Headers show
Series
  • locking/spinlock/debug: snapshot lock fields
Related show

Commit Message

Mark Rutland Oct. 23, 2017, 12:29 p.m.
Currently, the lock debug code doesn't use {READ,WRITE}_ONCE() to access
lock fields, and thus may observe torn values given a suitably unhelpful
compiler. These could result in false positives and/or false negatives
for some sanity checks.

Further, as we don't snapshot the values of various fields, these might
change between the time they are sanity checked and the time they are
logged, making debugging difficult.

This patch ensures that lock fields are accessed with
{READ,WRITE}_ONCE(), and uses a snapshot of the lock to ensure that
logged values are the same as those used for the sanity checks.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
---
 kernel/locking/spinlock_debug.c | 73 ++++++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 23 deletions(-)

Hi Peter,

As mentioned at ELCE, I put this together while trying to track down an (as
yet) inexplicable spinlock recursion bug. Luckily, it seems that my compiler
wasn't actively out to get me, and this didn't help, but it might save someone
a few hours of painful debugging in future.

Mark.

-- 
2.11.0

Comments

Peter Zijlstra Oct. 23, 2017, 1:46 p.m. | #1
On Mon, Oct 23, 2017 at 01:29:10PM +0100, Mark Rutland wrote:
> Currently, the lock debug code doesn't use {READ,WRITE}_ONCE() to access

> lock fields, and thus may observe torn values given a suitably unhelpful

> compiler. These could result in false positives and/or false negatives

> for some sanity checks.

> 

> Further, as we don't snapshot the values of various fields, these might

> change between the time they are sanity checked and the time they are

> logged, making debugging difficult.

> 

> This patch ensures that lock fields are accessed with

> {READ,WRITE}_ONCE(), and uses a snapshot of the lock to ensure that

> logged values are the same as those used for the sanity checks.

> 

> Signed-off-by: Mark Rutland <mark.rutland@arm.com>

> Cc: Peter Zijlstra <peterz@infradead.org>

> Cc: Ingo Molnar <mingo@redhat.com>


ACK, I'll try and remember to look at this if there's anything further
we can remove from this file.

> +static inline raw_spinlock_t

> +debug_spin_lock_snapshot(raw_spinlock_t *lockp)

> +{

> +	raw_spinlock_t lock;

> +

> +	lock.raw_lock = READ_ONCE(lockp->raw_lock);


I think you want to make that an smp_load_acquire(), such that we
preserve the release-acquire relation for the lock.

Since this happens _before_ the regular lock, but debug_spin_unlock()
also happens before the regular unlock. So the unlock release is still
placed right. But we've messed up our acquire.

> +	lock.magic = READ_ONCE(lockp->magic);

> +	lock.owner_cpu = READ_ONCE(lockp->owner_cpu);

> +	lock.owner = READ_ONCE(lockp->owner);

> +

> +	return lock;

> +}
kernel test robot Oct. 23, 2017, 8:22 p.m. | #2
Hi Mark,

[auto build test WARNING on tip/locking/core]
[also build test WARNING on v4.14-rc6 next-20171018]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Mark-Rutland/locking-spinlock-debug-snapshot-lock-fields/20171024-035933
config: i386-randconfig-x009-201743 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from include/linux/linkage.h:4:0,
                    from include/linux/preempt.h:9,
                    from include/linux/spinlock.h:50,
                    from kernel/locking/spinlock_debug.c:9:
   kernel/locking/spinlock_debug.c: In function 'debug_spin_unlock':
   kernel/locking/spinlock_debug.c:124:14: error: implicit declaration of function 'arch_spin_value_unlocked' [-Werror=implicit-function-declaration]
     SPIN_BUG_ON(arch_spin_value_unlocked(lock.raw_lock), lockp, lock,
                 ^
   include/linux/compiler.h:156:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> kernel/locking/spinlock_debug.c:81:2: note: in expansion of macro 'if'

     if (unlikely(cond)) spin_bug(lockp, lock, msg)
     ^~
   include/linux/compiler.h:146:24: note: in expansion of macro '__branch_check__'
    #  define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x)))
                           ^~~~~~~~~~~~~~~~
   kernel/locking/spinlock_debug.c:81:6: note: in expansion of macro 'unlikely'
     if (unlikely(cond)) spin_bug(lockp, lock, msg)
         ^~~~~~~~
   kernel/locking/spinlock_debug.c:124:2: note: in expansion of macro 'SPIN_BUG_ON'
     SPIN_BUG_ON(arch_spin_value_unlocked(lock.raw_lock), lockp, lock,
     ^~~~~~~~~~~
   kernel/locking/spinlock_debug.c: In function 'do_raw_spin_trylock':
   kernel/locking/spinlock_debug.c:154:49: error: macro "SPIN_BUG_ON" requires 4 arguments, but only 3 given
     SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
                                                    ^
   kernel/locking/spinlock_debug.c:154:2: error: 'SPIN_BUG_ON' undeclared (first use in this function)
     SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
     ^~~~~~~~~~~
   kernel/locking/spinlock_debug.c:154:2: note: each undeclared identifier is reported only once for each function it appears in
   cc1: some warnings being treated as errors

vim +/if +81 kernel/locking/spinlock_debug.c

   > 9	#include <linux/spinlock.h>

    10	#include <linux/nmi.h>
    11	#include <linux/interrupt.h>
    12	#include <linux/debug_locks.h>
    13	#include <linux/delay.h>
    14	#include <linux/export.h>
    15	
    16	void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
    17				  struct lock_class_key *key)
    18	{
    19	#ifdef CONFIG_DEBUG_LOCK_ALLOC
    20		/*
    21		 * Make sure we are not reinitializing a held lock:
    22		 */
    23		debug_check_no_locks_freed((void *)lock, sizeof(*lock));
    24		lockdep_init_map(&lock->dep_map, name, key, 0);
    25	#endif
    26		lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
    27		lock->magic = SPINLOCK_MAGIC;
    28		lock->owner = SPINLOCK_OWNER_INIT;
    29		lock->owner_cpu = -1;
    30	}
    31	
    32	EXPORT_SYMBOL(__raw_spin_lock_init);
    33	
    34	void __rwlock_init(rwlock_t *lock, const char *name,
    35			   struct lock_class_key *key)
    36	{
    37	#ifdef CONFIG_DEBUG_LOCK_ALLOC
    38		/*
    39		 * Make sure we are not reinitializing a held lock:
    40		 */
    41		debug_check_no_locks_freed((void *)lock, sizeof(*lock));
    42		lockdep_init_map(&lock->dep_map, name, key, 0);
    43	#endif
    44		lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
    45		lock->magic = RWLOCK_MAGIC;
    46		lock->owner = SPINLOCK_OWNER_INIT;
    47		lock->owner_cpu = -1;
    48	}
    49	
    50	EXPORT_SYMBOL(__rwlock_init);
    51	
    52	static void spin_dump(raw_spinlock_t *lockp, raw_spinlock_t lock,
    53			      const char *msg)
    54	{
    55		struct task_struct *owner = NULL;
    56	
    57		if (lock.owner && lock.owner != SPINLOCK_OWNER_INIT)
    58			owner = lock.owner;
    59		printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
    60			msg, raw_smp_processor_id(),
    61			current->comm, task_pid_nr(current));
    62		printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
    63				".owner_cpu: %d\n",
    64			lockp, lock.magic,
    65			owner ? owner->comm : "<none>",
    66			owner ? task_pid_nr(owner) : -1,
    67			lock.owner_cpu);
    68		dump_stack();
    69	}
    70	
    71	static void spin_bug(raw_spinlock_t *lockp, raw_spinlock_t lock,
    72			     const char *msg)
    73	{
    74		if (!debug_locks_off())
    75			return;
    76	
    77		spin_dump(lockp, lock, msg);
    78	}
    79	
    80	#define SPIN_BUG_ON(cond, lockp, lock, msg) \
  > 81		if (unlikely(cond)) spin_bug(lockp, lock, msg)

    82	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot Oct. 23, 2017, 8:26 p.m. | #3
Hi Mark,

[auto build test WARNING on tip/locking/core]
[also build test WARNING on v4.14-rc6 next-20171018]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Mark-Rutland/locking-spinlock-debug-snapshot-lock-fields/20171024-035933
config: i386-randconfig-x003-201743 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All warnings (new ones prefixed by >>):

   In file included from include/linux/linkage.h:4:0,
                    from include/linux/preempt.h:9,
                    from include/linux/spinlock.h:50,
                    from kernel/locking/spinlock_debug.c:9:
   kernel/locking/spinlock_debug.c: In function 'debug_spin_unlock':
   kernel/locking/spinlock_debug.c:124:14: error: implicit declaration of function 'arch_spin_value_unlocked' [-Werror=implicit-function-declaration]
     SPIN_BUG_ON(arch_spin_value_unlocked(lock.raw_lock), lockp, lock,
                 ^
   include/linux/compiler.h:131:34: note: in definition of macro '__branch_check__'
       ______r = __builtin_expect(!!(x), expect); \
                                     ^
>> kernel/locking/spinlock_debug.c:81:6: note: in expansion of macro 'unlikely'

     if (unlikely(cond)) spin_bug(lockp, lock, msg)
         ^~~~~~~~
   kernel/locking/spinlock_debug.c:124:2: note: in expansion of macro 'SPIN_BUG_ON'
     SPIN_BUG_ON(arch_spin_value_unlocked(lock.raw_lock), lockp, lock,
     ^~~~~~~~~~~
   kernel/locking/spinlock_debug.c: In function 'do_raw_spin_trylock':
   kernel/locking/spinlock_debug.c:154:49: error: macro "SPIN_BUG_ON" requires 4 arguments, but only 3 given
     SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
                                                    ^
   kernel/locking/spinlock_debug.c:154:2: error: 'SPIN_BUG_ON' undeclared (first use in this function)
     SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
     ^~~~~~~~~~~
   kernel/locking/spinlock_debug.c:154:2: note: each undeclared identifier is reported only once for each function it appears in
   cc1: some warnings being treated as errors

vim +/unlikely +81 kernel/locking/spinlock_debug.c

   > 9	#include <linux/spinlock.h>

    10	#include <linux/nmi.h>
    11	#include <linux/interrupt.h>
    12	#include <linux/debug_locks.h>
    13	#include <linux/delay.h>
    14	#include <linux/export.h>
    15	
    16	void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
    17				  struct lock_class_key *key)
    18	{
    19	#ifdef CONFIG_DEBUG_LOCK_ALLOC
    20		/*
    21		 * Make sure we are not reinitializing a held lock:
    22		 */
    23		debug_check_no_locks_freed((void *)lock, sizeof(*lock));
    24		lockdep_init_map(&lock->dep_map, name, key, 0);
    25	#endif
    26		lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
    27		lock->magic = SPINLOCK_MAGIC;
    28		lock->owner = SPINLOCK_OWNER_INIT;
    29		lock->owner_cpu = -1;
    30	}
    31	
    32	EXPORT_SYMBOL(__raw_spin_lock_init);
    33	
    34	void __rwlock_init(rwlock_t *lock, const char *name,
    35			   struct lock_class_key *key)
    36	{
    37	#ifdef CONFIG_DEBUG_LOCK_ALLOC
    38		/*
    39		 * Make sure we are not reinitializing a held lock:
    40		 */
    41		debug_check_no_locks_freed((void *)lock, sizeof(*lock));
    42		lockdep_init_map(&lock->dep_map, name, key, 0);
    43	#endif
    44		lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
    45		lock->magic = RWLOCK_MAGIC;
    46		lock->owner = SPINLOCK_OWNER_INIT;
    47		lock->owner_cpu = -1;
    48	}
    49	
    50	EXPORT_SYMBOL(__rwlock_init);
    51	
    52	static void spin_dump(raw_spinlock_t *lockp, raw_spinlock_t lock,
    53			      const char *msg)
    54	{
    55		struct task_struct *owner = NULL;
    56	
    57		if (lock.owner && lock.owner != SPINLOCK_OWNER_INIT)
    58			owner = lock.owner;
    59		printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
    60			msg, raw_smp_processor_id(),
    61			current->comm, task_pid_nr(current));
    62		printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
    63				".owner_cpu: %d\n",
    64			lockp, lock.magic,
    65			owner ? owner->comm : "<none>",
    66			owner ? task_pid_nr(owner) : -1,
    67			lock.owner_cpu);
    68		dump_stack();
    69	}
    70	
    71	static void spin_bug(raw_spinlock_t *lockp, raw_spinlock_t lock,
    72			     const char *msg)
    73	{
    74		if (!debug_locks_off())
    75			return;
    76	
    77		spin_dump(lockp, lock, msg);
    78	}
    79	
    80	#define SPIN_BUG_ON(cond, lockp, lock, msg) \
  > 81		if (unlikely(cond)) spin_bug(lockp, lock, msg)

    82	

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

Patch

diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 9aa0fccd5d43..be0f0e5a7097 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -49,58 +49,85 @@  void __rwlock_init(rwlock_t *lock, const char *name,
 
 EXPORT_SYMBOL(__rwlock_init);
 
-static void spin_dump(raw_spinlock_t *lock, const char *msg)
+static void spin_dump(raw_spinlock_t *lockp, raw_spinlock_t lock,
+		      const char *msg)
 {
 	struct task_struct *owner = NULL;
 
-	if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
-		owner = lock->owner;
+	if (lock.owner && lock.owner != SPINLOCK_OWNER_INIT)
+		owner = lock.owner;
 	printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
 		msg, raw_smp_processor_id(),
 		current->comm, task_pid_nr(current));
 	printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
 			".owner_cpu: %d\n",
-		lock, lock->magic,
+		lockp, lock.magic,
 		owner ? owner->comm : "<none>",
 		owner ? task_pid_nr(owner) : -1,
-		lock->owner_cpu);
+		lock.owner_cpu);
 	dump_stack();
 }
 
-static void spin_bug(raw_spinlock_t *lock, const char *msg)
+static void spin_bug(raw_spinlock_t *lockp, raw_spinlock_t lock,
+		     const char *msg)
 {
 	if (!debug_locks_off())
 		return;
 
-	spin_dump(lock, msg);
+	spin_dump(lockp, lock, msg);
 }
 
-#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
+#define SPIN_BUG_ON(cond, lockp, lock, msg) \
+	if (unlikely(cond)) spin_bug(lockp, lock, msg)
+
+/*
+ * Copy fields from a lock, ensuring that each field is read without tearing.
+ * We cannot read the whole lock atomically, so this isn't a snapshot of the
+ * whole lock at an instant in time. However, it is a stable snapshot of each
+ * field that won't change under our feet.
+ */
+static inline raw_spinlock_t
+debug_spin_lock_snapshot(raw_spinlock_t *lockp)
+{
+	raw_spinlock_t lock;
+
+	lock.raw_lock = READ_ONCE(lockp->raw_lock);
+	lock.magic = READ_ONCE(lockp->magic);
+	lock.owner_cpu = READ_ONCE(lockp->owner_cpu);
+	lock.owner = READ_ONCE(lockp->owner);
+
+	return lock;
+}
 
 static inline void
-debug_spin_lock_before(raw_spinlock_t *lock)
+debug_spin_lock_before(raw_spinlock_t *lockp)
 {
-	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-	SPIN_BUG_ON(lock->owner == current, lock, "recursion");
-	SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
-							lock, "cpu recursion");
+	raw_spinlock_t lock = debug_spin_lock_snapshot(lockp);
+
+	SPIN_BUG_ON(lock.magic != SPINLOCK_MAGIC, lockp, lock, "bad magic");
+	SPIN_BUG_ON(lock.owner == current, lockp, lock, "recursion");
+	SPIN_BUG_ON(lock.owner_cpu == raw_smp_processor_id(), lockp, lock,
+		    "cpu recursion");
 }
 
 static inline void debug_spin_lock_after(raw_spinlock_t *lock)
 {
-	lock->owner_cpu = raw_smp_processor_id();
-	lock->owner = current;
+	WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
+	WRITE_ONCE(lock->owner, current);
 }
 
-static inline void debug_spin_unlock(raw_spinlock_t *lock)
+static inline void debug_spin_unlock(raw_spinlock_t *lockp)
 {
-	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
-	SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
-	SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
-	SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
-							lock, "wrong CPU");
-	lock->owner = SPINLOCK_OWNER_INIT;
-	lock->owner_cpu = -1;
+	raw_spinlock_t lock = debug_spin_lock_snapshot(lockp);
+
+	SPIN_BUG_ON(lock.magic != SPINLOCK_MAGIC, lockp, lock, "bad magic");
+	SPIN_BUG_ON(arch_spin_value_unlocked(lock.raw_lock), lockp, lock,
+		    "already unlocked");
+	SPIN_BUG_ON(lock.owner != current, lockp, lock, "wrong owner");
+	SPIN_BUG_ON(lock.owner_cpu != raw_smp_processor_id(), lockp, lock,
+		    "wrong CPU");
+	WRITE_ONCE(lockp->owner, SPINLOCK_OWNER_INIT);
+	WRITE_ONCE(lockp->owner_cpu, -1);
 }
 
 /*