diff mbox series

v2 mm/slub: restore/expand unfreeze_partials() local exclusion scope

Message ID 476d147ab6eec386a1e8b8e11cb09708377f8c3e.camel@gmx.de
State New
Headers show
Series v2 mm/slub: restore/expand unfreeze_partials() local exclusion scope | expand

Commit Message

Mike Galbraith July 17, 2021, 2:58 p.m. UTC
On Thu, 2021-07-15 at 18:34 +0200, Mike Galbraith wrote:
> Greetings crickets,
>
> Methinks he problem is the hole these patches opened only for RT.
>
> static void put_cpu_partial(struct kmem_cache *s, struct page *page,
> int drain)
> {
> #ifdef CONFIG_SLUB_CPU_PARTIAL
> 	struct page *oldpage;
> 	int pages;
> 	int pobjects;
>
> 	slub_get_cpu_ptr(s->cpu_slab);
>         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Bah, I'm tired of waiting to see what if anything mm folks do about
this little bugger, so I'm gonna step on it my damn self and be done
with it.  Fly or die little patchlet.

mm/slub: restore/expand unfreeze_partials() local exclusion scope

2180da7ea70a ("mm, slub: use migrate_disable() on PREEMPT_RT") replaced
preempt_disable() in put_cpu_partial() with migrate_disable(), which when
combined with ___slab_alloc() having become preemptibile, leads to
kmem_cache_free()/kfree() blowing through ___slab_alloc() unimpeded,
and vice versa, resulting in PREMPT_RT exclusive explosions in both
paths while stress testing with both SLUB_CPU_PARTIAL/MEMCG enabled,
___slab_alloc() during allocation (duh), and __unfreeze_partials()
during free, both while accessing an unmapped page->freelist.

Serialize put_cpu_partial()/unfreeze_partials() on cpu_slab->lock to
ensure that alloc/free paths cannot pluck cpu_slab->partial out from
underneath each other unconstrained.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Fixes: 2180da7ea70a ("mm, slub: use migrate_disable() on PREEMPT_RT")
---
 mm/slub.c |   23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

Comments

Vlastimil Babka July 18, 2021, 7:58 a.m. UTC | #1
On 7/17/21 4:58 PM, Mike Galbraith wrote:
> On Thu, 2021-07-15 at 18:34 +0200, Mike Galbraith wrote:

>> Greetings crickets,

>>

>> Methinks he problem is the hole these patches opened only for RT.

>>

>> static void put_cpu_partial(struct kmem_cache *s, struct page *page,

>> int drain)

>> {

>> #ifdef CONFIG_SLUB_CPU_PARTIAL

>> 	struct page *oldpage;

>> 	int pages;

>> 	int pobjects;

>>

>> 	slub_get_cpu_ptr(s->cpu_slab);

>>         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

> 

> Bah, I'm tired of waiting to see what if anything mm folks do about

> this little bugger, so I'm gonna step on it my damn self and be done

> with it.  Fly or die little patchlet.


Sorry, Mike. Got some badly timed sickness. Will check ASAP.

> mm/slub: restore/expand unfreeze_partials() local exclusion scope

> 

> 2180da7ea70a ("mm, slub: use migrate_disable() on PREEMPT_RT") replaced

> preempt_disable() in put_cpu_partial() with migrate_disable(), which when

> combined with ___slab_alloc() having become preemptibile, leads to

> kmem_cache_free()/kfree() blowing through ___slab_alloc() unimpeded,

> and vice versa, resulting in PREMPT_RT exclusive explosions in both

> paths while stress testing with both SLUB_CPU_PARTIAL/MEMCG enabled,

> ___slab_alloc() during allocation (duh), and __unfreeze_partials()

> during free, both while accessing an unmapped page->freelist.

> 

> Serialize put_cpu_partial()/unfreeze_partials() on cpu_slab->lock to

> ensure that alloc/free paths cannot pluck cpu_slab->partial out from

> underneath each other unconstrained.

> 

> Signed-off-by: Mike Galbraith <efault@gmx.de>

> Fixes: 2180da7ea70a ("mm, slub: use migrate_disable() on PREEMPT_RT")

> ---

>  mm/slub.c |   23 ++++++++++++++++++++++-

>  1 file changed, 22 insertions(+), 1 deletion(-)

> 

> --- a/mm/slub.c

> +++ b/mm/slub.c

> @@ -2418,6 +2418,17 @@ static void __unfreeze_partials(struct k

>  	if (n)

>  		spin_unlock_irqrestore(&n->list_lock, flags);

> 

> +	/*

> +	 * If we got here via __slab_free() -> put_cpu_partial(),

> +	 * memcg_free_page_obj_cgroups() ->kfree() may send us

> +	 * back to __slab_free() -> put_cpu_partial() for an

> +	 * unfortunate second encounter with cpu_slab->lock.

> +	 */

> +	if (IS_ENABLED(CONFIG_PREEMPT_RT) && memcg_kmem_enabled()) {

> +		lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));

> +		local_unlock(&s->cpu_slab->lock);

> +	}

> +

>  	while (discard_page) {

>  		page = discard_page;

>  		discard_page = discard_page->next;

> @@ -2426,6 +2437,9 @@ static void __unfreeze_partials(struct k

>  		discard_slab(s, page);

>  		stat(s, FREE_SLAB);

>  	}

> +

> +	if (IS_ENABLED(CONFIG_PREEMPT_RT) && memcg_kmem_enabled())

> +		local_lock(&s->cpu_slab->lock);

>  }

> 

>  /*

> @@ -2497,7 +2511,9 @@ static void put_cpu_partial(struct kmem_

>  				 * partial array is full. Move the existing

>  				 * set to the per node partial list.

>  				 */

> +				local_lock(&s->cpu_slab->lock);

>  				unfreeze_partials(s);

> +				local_unlock(&s->cpu_slab->lock);

>  				oldpage = NULL;

>  				pobjects = 0;

>  				pages = 0;

> @@ -2579,7 +2595,9 @@ static void flush_cpu_slab(struct work_s

>  	if (c->page)

>  		flush_slab(s, c, true);

> 

> +	local_lock(&s->cpu_slab->lock);

>  	unfreeze_partials(s);

> +	local_unlock(&s->cpu_slab->lock);

>  }

> 

>  static bool has_cpu_slab(int cpu, struct kmem_cache *s)

> @@ -2632,8 +2650,11 @@ static int slub_cpu_dead(unsigned int cp

>  	struct kmem_cache *s;

> 

>  	mutex_lock(&slab_mutex);

> -	list_for_each_entry(s, &slab_caches, list)

> +	list_for_each_entry(s, &slab_caches, list) {

> +		local_lock(&s->cpu_slab->lock);

>  		__flush_cpu_slab(s, cpu);

> +		local_unlock(&s->cpu_slab->lock);

> +	}

>  	mutex_unlock(&slab_mutex);

>  	return 0;

>  }

>
Mike Galbraith July 18, 2021, 8:11 a.m. UTC | #2
On Sun, 2021-07-18 at 09:58 +0200, Vlastimil Babka wrote:
> On 7/17/21 4:58 PM, Mike Galbraith wrote:

> > On Thu, 2021-07-15 at 18:34 +0200, Mike Galbraith wrote:

> > > Greetings crickets,

> > >

> > > Methinks he problem is the hole these patches opened only for RT.

> > >

> > > static void put_cpu_partial(struct kmem_cache *s, struct page

> > > *page,

> > > int drain)

> > > {

> > > #ifdef CONFIG_SLUB_CPU_PARTIAL

> > > 	struct page *oldpage;

> > > 	int pages;

> > > 	int pobjects;

> > >

> > > 	slub_get_cpu_ptr(s->cpu_slab);

> > >         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

> >

> > Bah, I'm tired of waiting to see what if anything mm folks do about

> > this little bugger, so I'm gonna step on it my damn self and be

> > done

> > with it.  Fly or die little patchlet.

>

> Sorry, Mike. Got some badly timed sickness. Will check ASAP.


Hey, no apology necessary.  I have no way of knowing what's going on
behind the scenes, but I also didn't like the notion of just walking
away from a busted thing, so given the silence, figured it's better to
post the working something I've got (wart and all) than to let the bug
go unchallenged.  (take that damn bug.. pokes it with sharp stick;)

	-Mike
Mike Galbraith July 18, 2021, 3:43 p.m. UTC | #3
It's moot, but for the record...

@@ -2418,6 +2418,17 @@ static void __unfreeze_partials(struct k
>  	if (n)

>  		spin_unlock_irqrestore(&n->list_lock, flags);

>

> +	/*

> +	 * If we got here via __slab_free() -> put_cpu_partial(),

> +	 * memcg_free_page_obj_cgroups() ->kfree() may send us

> +	 * back to __slab_free() -> put_cpu_partial() for an

> +	 * unfortunate second encounter with cpu_slab->lock.

> +	 */

> +	if (IS_ENABLED(CONFIG_PREEMPT_RT) && memcg_kmem_enabled()) {

> +		lockdep_assert_held(this_cpu_ptr(&s->cpu_slab.lock.lock));


...that assert needs to hide behind something less transparent.

	-Mike
Vlastimil Babka July 18, 2021, 9:19 p.m. UTC | #4
On 7/17/21 4:58 PM, Mike Galbraith wrote:
> On Thu, 2021-07-15 at 18:34 +0200, Mike Galbraith wrote:

>> Greetings crickets,

>>

>> Methinks he problem is the hole these patches opened only for RT.

>>

>> static void put_cpu_partial(struct kmem_cache *s, struct page *page,

>> int drain)

>> {

>> #ifdef CONFIG_SLUB_CPU_PARTIAL

>> 	struct page *oldpage;

>> 	int pages;

>> 	int pobjects;

>>

>> 	slub_get_cpu_ptr(s->cpu_slab);

>>         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

> 

> Bah, I'm tired of waiting to see what if anything mm folks do about

> this little bugger, so I'm gonna step on it my damn self and be done

> with it.  Fly or die little patchlet.

> 

> mm/slub: restore/expand unfreeze_partials() local exclusion scope

> 

> 2180da7ea70a ("mm, slub: use migrate_disable() on PREEMPT_RT") replaced

> preempt_disable() in put_cpu_partial() with migrate_disable(), which when

> combined with ___slab_alloc() having become preemptibile, leads to

> kmem_cache_free()/kfree() blowing through ___slab_alloc() unimpeded,

> and vice versa, resulting in PREMPT_RT exclusive explosions in both

> paths while stress testing with both SLUB_CPU_PARTIAL/MEMCG enabled,

> ___slab_alloc() during allocation (duh), and __unfreeze_partials()

> during free, both while accessing an unmapped page->freelist.

> 

> Serialize put_cpu_partial()/unfreeze_partials() on cpu_slab->lock to


Hm you mention put_cpu_partial() but your patch handles only the
unfreeze_partial() call from that function? If I understand the problem
correctly, all modifications of cpu_slab->partial has to be protected
on RT after the local_lock conversion, thus also the one that
put_cpu_partial() does by itself (via this_cpu_cmpxchg).

On the other hand the slub_cpu_dead() part should really be unnecessary,
as tglx pointed out.

How about the patch below? It handles also the recursion issue
differently by not locking around __unfreeze_partials().
If that works, I can think of making it less ugly :/

----8<----
diff --git a/mm/slub.c b/mm/slub.c
index 581004a5aca9..1c7a41460941 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2437,6 +2437,9 @@ static void unfreeze_partials(struct kmem_cache *s)
 {
 	struct page *partial_page;
 
+#ifdef CONFIG_PREEMPT_RT
+	local_lock(&s->cpu_slab->lock);
+#endif
 	do {
 		partial_page = this_cpu_read(s->cpu_slab->partial);
 
@@ -2444,6 +2447,9 @@ static void unfreeze_partials(struct kmem_cache *s)
 		 this_cpu_cmpxchg(s->cpu_slab->partial, partial_page, NULL)
 				  != partial_page);
 
+#ifdef CONFIG_PREEMPT_RT
+	local_unlock(&s->cpu_slab->lock);
+#endif
 	if (partial_page)
 		__unfreeze_partials(s, partial_page);
 }
@@ -2482,7 +2488,11 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 	int pages;
 	int pobjects;
 
-	slub_get_cpu_ptr(s->cpu_slab);
+#ifndef CONFIG_PREEMPT_RT
+	get_cpu_ptr(s->cpu_slab);
+#else
+	local_lock(&s->cpu_slab->lock);
+#endif
 	do {
 		pages = 0;
 		pobjects = 0;
@@ -2496,7 +2506,15 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 				 * partial array is full. Move the existing
 				 * set to the per node partial list.
 				 */
+#ifndef CONFIG_PREEMPT_RT
 				unfreeze_partials(s);
+#else
+				this_cpu_write(s->cpu_slab->partial, NULL);
+				local_unlock(&s->cpu_slab->lock);
+				__unfreeze_partials(s, oldpage);
+				local_lock(&s->cpu_slab->lock);
+#endif
+
 				oldpage = NULL;
 				pobjects = 0;
 				pages = 0;
@@ -2513,7 +2531,11 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
 
 	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
 								!= oldpage);
-	slub_put_cpu_ptr(s->cpu_slab);
+#ifndef CONFIG_PREMPT_RT
+	put_cpu_ptr(s->cpu_slab);
+#else
+	local_unlock(&s->cpu_slab->lock);
+#endif
 #endif	/* CONFIG_SLUB_CPU_PARTIAL */
 }
Mike Galbraith July 19, 2021, 4:01 a.m. UTC | #5
(a pox on whomever broke evolution text quoting yet again)

On Sun, 2021-07-18 at 23:19 +0200, Vlastimil Babka wrote:
>

> Hm you mention put_cpu_partial() but your patch handles only the

> unfreeze_partial() call from that function?


Indeed, and that's because when I poked that slub_get_cpu_ptr(), it
poked me back by real deal deadlocking on boot, so I posted the
restoration of previously existing exclusion points with the
unlock/relock to appease lockdep (big hairy wart) bit instead.

>  If I understand the problem

> correctly, all modifications of cpu_slab->partial has to be protected

> on RT after the local_lock conversion, thus also the one that

> put_cpu_partial() does by itself (via this_cpu_cmpxchg).


Yup, that's what I concluded too, but box disagreed by not booting when
I did that, while seemingly being perfectly fine with the one in
put_cpu_partial() itself.  I figured my slub_get_cpu_ptr() replacement
had failed due to list_lock/local_lock order woes that I didn't notice
until after box bricked on me, but despite the unlock/relock in this
patch, it still manages to be a -ENOBOOT.

	-Mike
Mike Galbraith July 19, 2021, 1:15 p.m. UTC | #6
On Mon, 2021-07-19 at 06:01 +0200, Mike Galbraith wrote:
>

> ... despite the unlock/relock in this patch, it still manages to be a

> -ENOBOOT.


The somewhat bent up half of your patch below has been chugging away
long enough for me to speculate that __slab_free() vs ___slab_alloc()
is the *only* player in the explosion varieties I've been seeing.

---
 mm/slub.c |   13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2484,6 +2484,9 @@ static void put_cpu_partial(struct kmem_
 	int pobjects;

 	slub_get_cpu_ptr(s->cpu_slab);
+	/* __slab_free() vs ___slab_alloc() critical sections */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && drain)
+		local_lock(&s->cpu_slab->lock);
 	do {
 		pages = 0;
 		pobjects = 0;
@@ -2497,7 +2500,13 @@ static void put_cpu_partial(struct kmem_
 				 * partial array is full. Move the existing
 				 * set to the per node partial list.
 				 */
-				unfreeze_partials(s);
+				this_cpu_write(s->cpu_slab->partial, NULL);
+				if (IS_ENABLED(CONFIG_PREEMPT_RT) && drain)
+					local_unlock(&s->cpu_slab->lock);
+				__unfreeze_partials(s, oldpage);
+				if (IS_ENABLED(CONFIG_PREEMPT_RT) && drain)
+					local_lock(&s->cpu_slab->lock);
+
 				oldpage = NULL;
 				pobjects = 0;
 				pages = 0;
@@ -2514,6 +2523,8 @@ static void put_cpu_partial(struct kmem_

 	} while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
 								!= oldpage);
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && drain)
+		local_unlock(&s->cpu_slab->lock);
 	slub_put_cpu_ptr(s->cpu_slab);
 #endif	/* CONFIG_SLUB_CPU_PARTIAL */
 }
kernel test robot July 20, 2021, 2:46 a.m. UTC | #7
Hi Mike,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on linux-rt-devel/linux-5.13.y-rt-testing]
[cannot apply to hnaz-linux-mm/master linux/master linus/master v5.14-rc2 next-20210719]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Mike-Galbraith/v2-mm-slub-restore-expand-unfreeze_partials-local-exclusion-scope/20210718-092133
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git linux-5.13.y-rt-testing
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build):
        # https://github.com/0day-ci/linux/commit/9398c1a5075cbaa4a610319cfceeab417d1f3e12
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Mike-Galbraith/v2-mm-slub-restore-expand-unfreeze_partials-local-exclusion-scope/20210718-092133
        git checkout 9398c1a5075cbaa4a610319cfceeab417d1f3e12
        # save the attached .config to linux build tree
        mkdir build_dir
        make W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/x86/include/asm/bug.h:84,
                    from include/linux/bug.h:5,
                    from include/linux/mmdebug.h:5,
                    from include/linux/mm.h:9,
                    from mm/slub.c:13:
   mm/slub.c: In function '__unfreeze_partials':
>> mm/slub.c:2428:54: error: 'local_lock_t' {aka 'struct <anonymous>'} has no member named 'lock'

    2428 |   lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
         |                                                      ^
   include/asm-generic/bug.h:119:25: note: in definition of macro 'WARN_ON'
     119 |  int __ret_warn_on = !!(condition);    \
         |                         ^~~~~~~~~
   include/linux/lockdep.h:311:4: note: in expansion of macro 'lockdep_is_held'
     311 |    lockdep_is_held(l) == LOCK_STATE_NOT_HELD); \
         |    ^~~~~~~~~~~~~~~
   mm/slub.c:2428:3: note: in expansion of macro 'lockdep_assert_held'
    2428 |   lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
         |   ^~~~~~~~~~~~~~~~~~~
   include/linux/percpu-defs.h:241:2: note: in expansion of macro '__verify_pcpu_ptr'
     241 |  __verify_pcpu_ptr(ptr);      \
         |  ^~~~~~~~~~~~~~~~~
   include/linux/percpu-defs.h:252:27: note: in expansion of macro 'raw_cpu_ptr'
     252 | #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
         |                           ^~~~~~~~~~~
   mm/slub.c:2428:23: note: in expansion of macro 'this_cpu_ptr'
    2428 |   lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
         |                       ^~~~~~~~~~~~
>> mm/slub.c:2428:54: error: 'local_lock_t' {aka 'struct <anonymous>'} has no member named 'lock'

    2428 |   lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
         |                                                      ^
   include/asm-generic/bug.h:119:25: note: in definition of macro 'WARN_ON'
     119 |  int __ret_warn_on = !!(condition);    \
         |                         ^~~~~~~~~
   include/linux/lockdep.h:311:4: note: in expansion of macro 'lockdep_is_held'
     311 |    lockdep_is_held(l) == LOCK_STATE_NOT_HELD); \
         |    ^~~~~~~~~~~~~~~
   mm/slub.c:2428:3: note: in expansion of macro 'lockdep_assert_held'
    2428 |   lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
         |   ^~~~~~~~~~~~~~~~~~~
   include/linux/percpu-defs.h:242:2: note: in expansion of macro 'arch_raw_cpu_ptr'
     242 |  arch_raw_cpu_ptr(ptr);      \
         |  ^~~~~~~~~~~~~~~~
   include/linux/percpu-defs.h:252:27: note: in expansion of macro 'raw_cpu_ptr'
     252 | #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
         |                           ^~~~~~~~~~~
   mm/slub.c:2428:23: note: in expansion of macro 'this_cpu_ptr'
    2428 |   lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
         |                       ^~~~~~~~~~~~
>> mm/slub.c:2428:54: error: 'local_lock_t' {aka 'struct <anonymous>'} has no member named 'lock'

    2428 |   lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
         |                                                      ^
   include/asm-generic/bug.h:119:25: note: in definition of macro 'WARN_ON'
     119 |  int __ret_warn_on = !!(condition);    \
         |                         ^~~~~~~~~
   include/linux/lockdep.h:311:4: note: in expansion of macro 'lockdep_is_held'
     311 |    lockdep_is_held(l) == LOCK_STATE_NOT_HELD); \
         |    ^~~~~~~~~~~~~~~
   mm/slub.c:2428:3: note: in expansion of macro 'lockdep_assert_held'
    2428 |   lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
         |   ^~~~~~~~~~~~~~~~~~~
   include/linux/percpu-defs.h:242:2: note: in expansion of macro 'arch_raw_cpu_ptr'
     242 |  arch_raw_cpu_ptr(ptr);      \
         |  ^~~~~~~~~~~~~~~~
   include/linux/percpu-defs.h:252:27: note: in expansion of macro 'raw_cpu_ptr'
     252 | #define this_cpu_ptr(ptr) raw_cpu_ptr(ptr)
         |                           ^~~~~~~~~~~
   mm/slub.c:2428:23: note: in expansion of macro 'this_cpu_ptr'
    2428 |   lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
         |                       ^~~~~~~~~~~~


vim +2428 mm/slub.c

  2369	
  2370	#ifdef CONFIG_SLUB_CPU_PARTIAL
  2371	static void __unfreeze_partials(struct kmem_cache *s, struct page *partial_page)
  2372	{
  2373		struct kmem_cache_node *n = NULL, *n2 = NULL;
  2374		struct page *page, *discard_page = NULL;
  2375		unsigned long flags = 0;
  2376	
  2377		while (partial_page) {
  2378			struct page new;
  2379			struct page old;
  2380	
  2381			page = partial_page;
  2382			partial_page = page->next;
  2383	
  2384			n2 = get_node(s, page_to_nid(page));
  2385			if (n != n2) {
  2386				if (n)
  2387					spin_unlock_irqrestore(&n->list_lock, flags);
  2388	
  2389				n = n2;
  2390				spin_lock_irqsave(&n->list_lock, flags);
  2391			}
  2392	
  2393			do {
  2394	
  2395				old.freelist = page->freelist;
  2396				old.counters = page->counters;
  2397				VM_BUG_ON(!old.frozen);
  2398	
  2399				new.counters = old.counters;
  2400				new.freelist = old.freelist;
  2401	
  2402				new.frozen = 0;
  2403	
  2404			} while (!__cmpxchg_double_slab(s, page,
  2405					old.freelist, old.counters,
  2406					new.freelist, new.counters,
  2407					"unfreezing slab"));
  2408	
  2409			if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) {
  2410				page->next = discard_page;
  2411				discard_page = page;
  2412			} else {
  2413				add_partial(n, page, DEACTIVATE_TO_TAIL);
  2414				stat(s, FREE_ADD_PARTIAL);
  2415			}
  2416		}
  2417	
  2418		if (n)
  2419			spin_unlock_irqrestore(&n->list_lock, flags);
  2420	
  2421		/*
  2422		 * If we got here via __slab_free() -> put_cpu_partial(),
  2423		 * memcg_free_page_obj_cgroups() ->kfree() may send us
  2424		 * back to __slab_free() -> put_cpu_partial() for an
  2425		 * unfortunate second encounter with cpu_slab->lock.
  2426		 */
  2427		if (IS_ENABLED(CONFIG_PREEMPT_RT) && memcg_kmem_enabled()) {
> 2428			lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));

  2429			local_unlock(&s->cpu_slab->lock);
  2430		}
  2431	
  2432		while (discard_page) {
  2433			page = discard_page;
  2434			discard_page = discard_page->next;
  2435	
  2436			stat(s, DEACTIVATE_EMPTY);
  2437			discard_slab(s, page);
  2438			stat(s, FREE_SLAB);
  2439		}
  2440	
  2441		if (IS_ENABLED(CONFIG_PREEMPT_RT) && memcg_kmem_enabled())
  2442			local_lock(&s->cpu_slab->lock);
  2443	}
  2444	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2418,6 +2418,17 @@  static void __unfreeze_partials(struct k
 	if (n)
 		spin_unlock_irqrestore(&n->list_lock, flags);

+	/*
+	 * If we got here via __slab_free() -> put_cpu_partial(),
+	 * memcg_free_page_obj_cgroups() ->kfree() may send us
+	 * back to __slab_free() -> put_cpu_partial() for an
+	 * unfortunate second encounter with cpu_slab->lock.
+	 */
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && memcg_kmem_enabled()) {
+		lockdep_assert_held(this_cpu_ptr(&s->cpu_slab->lock.lock));
+		local_unlock(&s->cpu_slab->lock);
+	}
+
 	while (discard_page) {
 		page = discard_page;
 		discard_page = discard_page->next;
@@ -2426,6 +2437,9 @@  static void __unfreeze_partials(struct k
 		discard_slab(s, page);
 		stat(s, FREE_SLAB);
 	}
+
+	if (IS_ENABLED(CONFIG_PREEMPT_RT) && memcg_kmem_enabled())
+		local_lock(&s->cpu_slab->lock);
 }

 /*
@@ -2497,7 +2511,9 @@  static void put_cpu_partial(struct kmem_
 				 * partial array is full. Move the existing
 				 * set to the per node partial list.
 				 */
+				local_lock(&s->cpu_slab->lock);
 				unfreeze_partials(s);
+				local_unlock(&s->cpu_slab->lock);
 				oldpage = NULL;
 				pobjects = 0;
 				pages = 0;
@@ -2579,7 +2595,9 @@  static void flush_cpu_slab(struct work_s
 	if (c->page)
 		flush_slab(s, c, true);

+	local_lock(&s->cpu_slab->lock);
 	unfreeze_partials(s);
+	local_unlock(&s->cpu_slab->lock);
 }

 static bool has_cpu_slab(int cpu, struct kmem_cache *s)
@@ -2632,8 +2650,11 @@  static int slub_cpu_dead(unsigned int cp
 	struct kmem_cache *s;

 	mutex_lock(&slab_mutex);
-	list_for_each_entry(s, &slab_caches, list)
+	list_for_each_entry(s, &slab_caches, list) {
+		local_lock(&s->cpu_slab->lock);
 		__flush_cpu_slab(s, cpu);
+		local_unlock(&s->cpu_slab->lock);
+	}
 	mutex_unlock(&slab_mutex);
 	return 0;
 }