@@ -14,7 +14,7 @@
#include <linux/mnt_namespace.h>
#include <linux/user_namespace.h>
#include <linux/namei.h>
-#include <linux/delay.h>
+#include <linux/hrtimer.h>
#include <linux/security.h>
#include <linux/cred.h>
#include <linux/idr.h>
@@ -76,10 +76,4 @@ static inline void fsleep(unsigned long usecs)
msleep(DIV_ROUND_UP(usecs, 1000));
}
-#ifdef CONFIG_PREEMPT_RT
-extern void cpu_chill(void);
-#else
-# define cpu_chill() cpu_relax()
-#endif
-
#endif /* defined(_LINUX_DELAY_H) */
@@ -540,4 +540,10 @@ int hrtimers_dead_cpu(unsigned int cpu);
#define hrtimers_dead_cpu NULL
#endif
+#ifdef CONFIG_PREEMPT_RT
+extern void cpu_chill(void);
+#else
+# define cpu_chill() cpu_relax()
+#endif
+
#endif
@@ -1972,8 +1972,8 @@ config SHUFFLE_PAGE_ALLOCATOR
Say Y if unsure.
config SLUB_CPU_PARTIAL
- default y
- depends on SLUB && SMP && !PREEMPT_RT
+ default y if !PREEMPT_RT
+ depends on SLUB && SMP
bool "SLUB per cpu partial cache"
help
Per cpu partial caches accelerate objects allocation and freeing
@@ -405,20 +405,20 @@ void task_join_group_stop(struct task_struct *task)
task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING);
}
-static inline struct sigqueue *get_task_cache(struct task_struct *t)
+static struct sigqueue *sigqueue_from_cache(struct task_struct *t)
{
struct sigqueue *q = t->sigqueue_cache;
- if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
- return NULL;
- return q;
+ if (q && cmpxchg(&t->sigqueue_cache, q, NULL) == q)
+ return q;
+ return NULL;
}
-static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
+static bool sigqueue_add_cache(struct task_struct *t, struct sigqueue *q)
{
- if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
- return 0;
- return 1;
+ if (!t->sigqueue_cache && cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
+ return true;
+ return false;
}
/*
@@ -428,7 +428,7 @@ static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
*/
static struct sigqueue *
__sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
- int override_rlimit, int fromslab)
+ int override_rlimit, bool fromslab)
{
struct sigqueue *q = NULL;
struct user_struct *user;
@@ -451,7 +451,7 @@ __sigqueue_do_alloc(int sig, struct task_struct *t, gfp_t flags,
if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) {
if (!fromslab)
- q = get_task_cache(t);
+ q = sigqueue_from_cache(t);
if (!q)
q = kmem_cache_alloc(sigqueue_cachep, flags);
} else {
@@ -474,7 +474,7 @@ static struct sigqueue *
__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags,
int override_rlimit)
{
- return __sigqueue_do_alloc(sig, t, flags, override_rlimit, 0);
+ return __sigqueue_do_alloc(sig, t, flags, override_rlimit, false);
}
static void __sigqueue_free(struct sigqueue *q)
@@ -486,7 +486,7 @@ static void __sigqueue_free(struct sigqueue *q)
kmem_cache_free(sigqueue_cachep, q);
}
-static void sigqueue_free_current(struct sigqueue *q)
+static void __sigqueue_cache_or_free(struct sigqueue *q)
{
struct user_struct *up;
@@ -494,11 +494,10 @@ static void sigqueue_free_current(struct sigqueue *q)
return;
up = q->user;
- if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
- if (atomic_dec_and_test(&up->sigpending))
- free_uid(up);
- } else
- __sigqueue_free(q);
+ if (atomic_dec_and_test(&up->sigpending))
+ free_uid(up);
+ if (!task_is_realtime(current) || !sigqueue_add_cache(current, q))
+ kmem_cache_free(sigqueue_cachep, q);
}
void flush_sigqueue(struct sigpending *queue)
@@ -523,7 +522,7 @@ void flush_task_sigqueue(struct task_struct *tsk)
flush_sigqueue(&tsk->pending);
- q = get_task_cache(tsk);
+ q = sigqueue_from_cache(tsk);
if (q)
kmem_cache_free(sigqueue_cachep, q);
}
@@ -652,7 +651,7 @@ static void collect_signal(int sig, struct sigpending *list, kernel_siginfo_t *i
(info->si_code == SI_TIMER) &&
(info->si_sys_private);
- sigqueue_free_current(first);
+ __sigqueue_cache_or_free(first);
} else {
/*
* Ok, it wasn't in the queue. This must be
@@ -1895,8 +1894,7 @@ EXPORT_SYMBOL(kill_pid);
*/
struct sigqueue *sigqueue_alloc(void)
{
- /* Preallocated sigqueue objects always from the slabcache ! */
- struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, 1);
+ struct sigqueue *q = __sigqueue_do_alloc(-1, current, GFP_KERNEL, 0, true);
if (q)
q->flags |= SIGQUEUE_PREALLOC;
@@ -1 +1 @@
--rt8
+-rt9
@@ -362,6 +362,8 @@ EXPORT_SYMBOL(nr_node_ids);
EXPORT_SYMBOL(nr_online_nodes);
#endif
+int page_group_by_mobility_disabled __read_mostly;
+
struct pa_lock {
local_lock_t l;
};
@@ -369,8 +371,6 @@ static DEFINE_PER_CPU(struct pa_lock, pa_lock) = {
.l = INIT_LOCAL_LOCK(l),
};
-int page_group_by_mobility_disabled __read_mostly;
-
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
* During boot we initialize deferred pages on-demand, as needed, but once
@@ -1352,7 +1352,7 @@ static inline void prefetch_buddy(struct page *page)
}
/*
- * Frees a number of pages which have been collected from the pcp lists.
+ * Frees a number of pages from the PCP lists
* Assumes all pages on list are in same zone, and of same order.
* count is the number of pages to free.
*
@@ -1362,55 +1362,15 @@ static inline void prefetch_buddy(struct page *page)
* And clear the zone's pages_scanned counter, to hold off the "all pages are
* pinned" detection logic.
*/
-static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
- bool zone_retry)
-{
- bool isolated_pageblocks;
- struct page *page, *tmp;
-
- spin_lock(&zone->lock);
- isolated_pageblocks = has_isolate_pageblock(zone);
-
- /*
- * Use safe version since after __free_one_page(),
- * page->lru.next will not point to original list.
- */
- list_for_each_entry_safe(page, tmp, head, lru) {
- int mt = get_pcppage_migratetype(page);
-
- if (page_zone(page) != zone) {
- /*
- * free_unref_page_list() sorts pages by zone. If we end
- * up with pages from a different NUMA nodes belonging
- * to the same ZONE index then we need to redo with the
- * correct ZONE pointer. Skip the page for now, redo it
- * on the next iteration.
- */
- WARN_ON_ONCE(zone_retry == false);
- if (zone_retry)
- continue;
- }
-
- /* MIGRATE_ISOLATE page should not go to pcplists */
- VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
- /* Pageblock could have been isolated meanwhile */
- if (unlikely(isolated_pageblocks))
- mt = get_pageblock_migratetype(page);
-
- list_del(&page->lru);
- __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
- trace_mm_page_pcpu_drain(page, 0, mt);
- }
- spin_unlock(&zone->lock);
-}
-
-static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
- struct list_head *dst)
+static void free_pcppages_bulk(struct zone *zone, int count,
+ struct per_cpu_pages *pcp)
{
int migratetype = 0;
int batch_free = 0;
int prefetch_nr = READ_ONCE(pcp->batch);
- struct page *page;
+ bool isolated_pageblocks;
+ struct page *page, *tmp;
+ LIST_HEAD(head);
/*
* Ensure proper count is passed which otherwise would stuck in the
@@ -1447,7 +1407,7 @@ static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
if (bulkfree_pcp_prepare(page))
continue;
- list_add_tail(&page->lru, dst);
+ list_add_tail(&page->lru, &head);
/*
* We are going to put the page back to the global
@@ -1464,6 +1424,26 @@ static void isolate_pcp_pages(int count, struct per_cpu_pages *pcp,
}
} while (--count && --batch_free && !list_empty(list));
}
+
+ spin_lock(&zone->lock);
+ isolated_pageblocks = has_isolate_pageblock(zone);
+
+ /*
+ * Use safe version since after __free_one_page(),
+ * page->lru.next will not point to original list.
+ */
+ list_for_each_entry_safe(page, tmp, &head, lru) {
+ int mt = get_pcppage_migratetype(page);
+ /* MIGRATE_ISOLATE page should not go to pcplists */
+ VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
+ /* Pageblock could have been isolated meanwhile */
+ if (unlikely(isolated_pageblocks))
+ mt = get_pageblock_migratetype(page);
+
+ __free_one_page(page, page_to_pfn(page), zone, 0, mt, FPI_NONE);
+ trace_mm_page_pcpu_drain(page, 0, mt);
+ }
+ spin_unlock(&zone->lock);
}
static void free_one_page(struct zone *zone,
@@ -2984,18 +2964,13 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
{
unsigned long flags;
int to_drain, batch;
- LIST_HEAD(dst);
local_lock_irqsave(&pa_lock.l, flags);
batch = READ_ONCE(pcp->batch);
to_drain = min(pcp->count, batch);
if (to_drain > 0)
- isolate_pcp_pages(to_drain, pcp, &dst);
-
+ free_pcppages_bulk(zone, to_drain, pcp);
local_unlock_irqrestore(&pa_lock.l, flags);
-
- if (to_drain > 0)
- free_pcppages_bulk(zone, &dst, false);
}
#endif
@@ -3011,21 +2986,14 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
unsigned long flags;
struct per_cpu_pageset *pset;
struct per_cpu_pages *pcp;
- LIST_HEAD(dst);
- int count;
local_lock_irqsave(&pa_lock.l, flags);
pset = per_cpu_ptr(zone->pageset, cpu);
pcp = &pset->pcp;
- count = pcp->count;
- if (count)
- isolate_pcp_pages(count, pcp, &dst);
-
+ if (pcp->count)
+ free_pcppages_bulk(zone, pcp->count, pcp);
local_unlock_irqrestore(&pa_lock.l, flags);
-
- if (count)
- free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -3245,8 +3213,7 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
return true;
}
-static void free_unref_page_commit(struct page *page, unsigned long pfn,
- struct list_head *dst)
+static void free_unref_page_commit(struct page *page, unsigned long pfn)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
@@ -3275,7 +3242,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
list_add(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
if (pcp->count >= READ_ONCE(pcp->high))
- isolate_pcp_pages(READ_ONCE(pcp->batch), pcp, dst);
+ free_pcppages_bulk(zone, READ_ONCE(pcp->batch), pcp);
}
/*
@@ -3285,17 +3252,13 @@ void free_unref_page(struct page *page)
{
unsigned long flags;
unsigned long pfn = page_to_pfn(page);
- struct zone *zone = page_zone(page);
- LIST_HEAD(dst);
if (!free_unref_page_prepare(page, pfn))
return;
local_lock_irqsave(&pa_lock.l, flags);
- free_unref_page_commit(page, pfn, &dst);
+ free_unref_page_commit(page, pfn);
local_unlock_irqrestore(&pa_lock.l, flags);
- if (!list_empty(&dst))
- free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -3306,11 +3269,6 @@ void free_unref_page_list(struct list_head *list)
struct page *page, *next;
unsigned long flags, pfn;
int batch_count = 0;
- struct list_head dsts[__MAX_NR_ZONES];
- int i;
-
- for (i = 0; i < __MAX_NR_ZONES; i++)
- INIT_LIST_HEAD(&dsts[i]);
/* Prepare pages for freeing */
list_for_each_entry_safe(page, next, list, lru) {
@@ -3323,12 +3281,10 @@ void free_unref_page_list(struct list_head *list)
local_lock_irqsave(&pa_lock.l, flags);
list_for_each_entry_safe(page, next, list, lru) {
unsigned long pfn = page_private(page);
- enum zone_type type;
set_page_private(page, 0);
trace_mm_page_free_batched(page);
- type = page_zonenum(page);
- free_unref_page_commit(page, pfn, &dsts[type]);
+ free_unref_page_commit(page, pfn);
/*
* Guard against excessive IRQ disabled times when we get
@@ -3341,21 +3297,6 @@ void free_unref_page_list(struct list_head *list)
}
}
local_unlock_irqrestore(&pa_lock.l, flags);
-
- for (i = 0; i < __MAX_NR_ZONES; ) {
- struct page *page;
- struct zone *zone;
-
- if (list_empty(&dsts[i])) {
- i++;
- continue;
- }
-
- page = list_first_entry(&dsts[i], struct page, lru);
- zone = page_zone(page);
-
- free_pcppages_bulk(zone, &dsts[i], true);
- }
}
/*
@@ -3552,6 +3493,7 @@ struct page *rmqueue(struct zone *preferred_zone,
* allocate greater than order-1 page units with __GFP_NOFAIL.
*/
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
+
local_lock_irqsave(&pa_lock.l, flags);
spin_lock(&zone->lock);
@@ -3574,6 +3516,7 @@ struct page *rmqueue(struct zone *preferred_zone,
spin_unlock(&zone->lock);
if (!page)
goto failed;
+
__mod_zone_freepage_state(zone, -(1 << order),
get_pcppage_migratetype(page));
@@ -1499,12 +1499,6 @@ static bool freelist_corrupted(struct kmem_cache *s, struct page *page,
}
#endif /* CONFIG_SLUB_DEBUG */
-struct slub_free_list {
- raw_spinlock_t lock;
- struct list_head list;
-};
-static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
-
/*
* Hooks for other subsystems that check memory allocations. In a typical
* production configuration these hooks all should produce no code at all.
@@ -1860,16 +1854,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
__free_pages(page, order);
}
-static void free_delayed(struct list_head *h)
-{
- while (!list_empty(h)) {
- struct page *page = list_first_entry(h, struct page, lru);
-
- list_del(&page->lru);
- __free_slab(page->slab_cache, page);
- }
-}
-
static void rcu_free_slab(struct rcu_head *h)
{
struct page *page = container_of(h, struct page, rcu_head);
@@ -1881,22 +1865,33 @@ static void free_slab(struct kmem_cache *s, struct page *page)
{
if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) {
call_rcu(&page->rcu_head, rcu_free_slab);
- } else if (irqs_disabled()) {
- struct slub_free_list *f = this_cpu_ptr(&slub_free_list);
-
- raw_spin_lock(&f->lock);
- list_add(&page->lru, &f->list);
- raw_spin_unlock(&f->lock);
} else
__free_slab(s, page);
}
+static void discard_slab_delayed(struct kmem_cache *s, struct page *page,
+ struct list_head *delayed_free)
+{
+ dec_slabs_node(s, page_to_nid(page), page->objects);
+ list_add(&page->lru, delayed_free);
+}
+
static void discard_slab(struct kmem_cache *s, struct page *page)
{
dec_slabs_node(s, page_to_nid(page), page->objects);
free_slab(s, page);
}
+static void discard_delayed(struct list_head *l)
+{
+ while (!list_empty(l)) {
+ struct page *page = list_first_entry(l, struct page, lru);
+
+ list_del(&page->lru);
+ __free_slab(page->slab_cache, page);
+ }
+}
+
/*
* Management of partially allocated slabs.
*/
@@ -1970,15 +1965,16 @@ static inline void *acquire_slab(struct kmem_cache *s,
WARN_ON(!freelist);
return freelist;
}
-
-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
+static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
+ struct list_head *delayed_free);
static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
/*
* Try to allocate a partial slab from a specific node.
*/
static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
- struct kmem_cache_cpu *c, gfp_t flags)
+ struct kmem_cache_cpu *c, gfp_t flags,
+ struct list_head *delayed_free)
{
struct page *page, *page2;
void *object = NULL;
@@ -2011,7 +2007,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
stat(s, ALLOC_FROM_PARTIAL);
object = t;
} else {
- put_cpu_partial(s, page, 0);
+ put_cpu_partial(s, page, 0, delayed_free);
stat(s, CPU_PARTIAL_NODE);
}
if (!kmem_cache_has_cpu_partial(s)
@@ -2027,7 +2023,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
* Get a page from somewhere. Search in increasing NUMA distances.
*/
static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
- struct kmem_cache_cpu *c)
+ struct kmem_cache_cpu *c,
+ struct list_head *delayed_free)
{
#ifdef CONFIG_NUMA
struct zonelist *zonelist;
@@ -2069,7 +2066,7 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
if (n && cpuset_zone_allowed(zone, flags) &&
n->nr_partial > s->min_partial) {
- object = get_partial_node(s, n, c, flags);
+ object = get_partial_node(s, n, c, flags, delayed_free);
if (object) {
/*
* Don't check read_mems_allowed_retry()
@@ -2091,7 +2088,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
* Get a partial page, lock it and return it.
*/
static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
- struct kmem_cache_cpu *c)
+ struct kmem_cache_cpu *c,
+ struct list_head *delayed_free)
{
void *object;
int searchnode = node;
@@ -2099,11 +2097,12 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node,
if (node == NUMA_NO_NODE)
searchnode = numa_mem_id();
- object = get_partial_node(s, get_node(s, searchnode), c, flags);
+ object = get_partial_node(s, get_node(s, searchnode), c, flags,
+ delayed_free);
if (object || node != NUMA_NO_NODE)
return object;
- return get_any_partial(s, flags, c);
+ return get_any_partial(s, flags, c, delayed_free);
}
#ifdef CONFIG_PREEMPTION
@@ -2179,7 +2178,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
* Remove the cpu slab
*/
static void deactivate_slab(struct kmem_cache *s, struct page *page,
- void *freelist, struct kmem_cache_cpu *c)
+ void *freelist, struct kmem_cache_cpu *c,
+ struct list_head *delayed_free)
{
enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
struct kmem_cache_node *n = get_node(s, page_to_nid(page));
@@ -2316,7 +2316,7 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
stat(s, DEACTIVATE_FULL);
else if (m == M_FREE) {
stat(s, DEACTIVATE_EMPTY);
- discard_slab(s, page);
+ discard_slab_delayed(s, page, delayed_free);
stat(s, FREE_SLAB);
}
@@ -2331,8 +2331,8 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
* for the cpu using c (or some other guarantee must be there
* to guarantee no concurrent accesses).
*/
-static void unfreeze_partials(struct kmem_cache *s,
- struct kmem_cache_cpu *c)
+static void unfreeze_partials(struct kmem_cache *s, struct kmem_cache_cpu *c,
+ struct list_head *delayed_free)
{
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct kmem_cache_node *n = NULL, *n2 = NULL;
@@ -2386,7 +2386,7 @@ static void unfreeze_partials(struct kmem_cache *s,
discard_page = discard_page->next;
stat(s, DEACTIVATE_EMPTY);
- discard_slab(s, page);
+ discard_slab_delayed(s, page, delayed_free);
stat(s, FREE_SLAB);
}
#endif /* CONFIG_SLUB_CPU_PARTIAL */
@@ -2399,7 +2399,8 @@ static void unfreeze_partials(struct kmem_cache *s,
* If we did not find a slot then simply move all the partials to the
* per node partial list.
*/
-static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
+static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain,
+ struct list_head *delayed_free)
{
#ifdef CONFIG_SLUB_CPU_PARTIAL
struct page *oldpage;
@@ -2416,21 +2417,15 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
pobjects = oldpage->pobjects;
pages = oldpage->pages;
if (drain && pobjects > slub_cpu_partial(s)) {
- struct slub_free_list *f;
unsigned long flags;
- LIST_HEAD(tofree);
/*
* partial array is full. Move the existing
* set to the per node partial list.
*/
local_irq_save(flags);
- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
- f = this_cpu_ptr(&slub_free_list);
- raw_spin_lock(&f->lock);
- list_splice_init(&f->list, &tofree);
- raw_spin_unlock(&f->lock);
+ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab),
+ delayed_free);
local_irq_restore(flags);
- free_delayed(&tofree);
oldpage = NULL;
pobjects = 0;
pages = 0;
@@ -2451,17 +2446,18 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
unsigned long flags;
local_irq_save(flags);
- unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
+ unfreeze_partials(s, this_cpu_ptr(s->cpu_slab), delayed_free);
local_irq_restore(flags);
}
preempt_enable();
#endif /* CONFIG_SLUB_CPU_PARTIAL */
}
-static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
+static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c,
+ struct list_head *delayed_free)
{
stat(s, CPUSLAB_FLUSH);
- deactivate_slab(s, c->page, c->freelist, c);
+ deactivate_slab(s, c->page, c->freelist, c, delayed_free);
c->tid = next_tid(c->tid);
}
@@ -2471,46 +2467,81 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
*
* Called from IPI handler with interrupts disabled.
*/
-static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
+static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu,
+ struct list_head *delayed_free)
{
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
if (c->page)
- flush_slab(s, c);
+ flush_slab(s, c, delayed_free);
- unfreeze_partials(s, c);
+ unfreeze_partials(s, c, delayed_free);
}
-static void flush_cpu_slab(void *d)
-{
- struct kmem_cache *s = d;
+struct slub_flush_work {
+ struct work_struct work;
+ struct kmem_cache *s;
+ bool skip;
+};
- __flush_cpu_slab(s, smp_processor_id());
+static void flush_cpu_slab(struct work_struct *w)
+{
+ struct slub_flush_work *sfw;
+ LIST_HEAD(delayed_free);
+
+ sfw = container_of(w, struct slub_flush_work, work);
+
+ local_irq_disable();
+ __flush_cpu_slab(sfw->s, smp_processor_id(), &delayed_free);
+ local_irq_enable();
+
+ discard_delayed(&delayed_free);
}
-static bool has_cpu_slab(int cpu, void *info)
+static bool has_cpu_slab(int cpu, struct kmem_cache *s)
{
- struct kmem_cache *s = info;
struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
return c->page || slub_percpu_partial(c);
}
+static DEFINE_MUTEX(flush_lock);
+static DEFINE_PER_CPU(struct slub_flush_work, slub_flush);
+
+static void flush_all_locked(struct kmem_cache *s)
+{
+ struct slub_flush_work *sfw;
+ unsigned int cpu;
+
+ mutex_lock(&flush_lock);
+
+ for_each_online_cpu(cpu) {
+ sfw = &per_cpu(slub_flush, cpu);
+ if (!has_cpu_slab(cpu, s)) {
+ sfw->skip = true;
+ continue;
+ }
+ INIT_WORK(&sfw->work, flush_cpu_slab);
+ sfw->skip = false;
+ sfw->s = s;
+ schedule_work_on(cpu, &sfw->work);
+ }
+
+ for_each_online_cpu(cpu) {
+ sfw = &per_cpu(slub_flush, cpu);
+ if (sfw->skip)
+ continue;
+ flush_work(&sfw->work);
+ }
+
+ mutex_unlock(&flush_lock);
+}
+
static void flush_all(struct kmem_cache *s)
{
- LIST_HEAD(tofree);
- int cpu;
-
- on_each_cpu_cond(has_cpu_slab, flush_cpu_slab, s, 1);
- for_each_online_cpu(cpu) {
- struct slub_free_list *f;
-
- f = &per_cpu(slub_free_list, cpu);
- raw_spin_lock_irq(&f->lock);
- list_splice_init(&f->list, &tofree);
- raw_spin_unlock_irq(&f->lock);
- free_delayed(&tofree);
- }
+ cpus_read_lock();
+ flush_all_locked(s);
+ cpus_read_unlock();
}
/*
@@ -2521,13 +2552,15 @@ static int slub_cpu_dead(unsigned int cpu)
{
struct kmem_cache *s;
unsigned long flags;
+ LIST_HEAD(delayed_free);
mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_caches, list) {
local_irq_save(flags);
- __flush_cpu_slab(s, cpu);
+ __flush_cpu_slab(s, cpu, &delayed_free);
local_irq_restore(flags);
}
+ discard_delayed(&delayed_free);
mutex_unlock(&slab_mutex);
return 0;
}
@@ -2611,7 +2644,8 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
}
static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
- int node, struct kmem_cache_cpu **pc)
+ int node, struct kmem_cache_cpu **pc,
+ struct list_head *delayed_free)
{
void *freelist;
struct kmem_cache_cpu *c = *pc;
@@ -2619,7 +2653,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));
- freelist = get_partial(s, flags, node, c);
+ freelist = get_partial(s, flags, node, c, delayed_free);
if (freelist)
return freelist;
@@ -2628,7 +2662,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags,
if (page) {
c = raw_cpu_ptr(s->cpu_slab);
if (c->page)
- flush_slab(s, c);
+ flush_slab(s, c, delayed_free);
/*
* No other reference to the page yet so we can
@@ -2708,9 +2742,8 @@ static inline void *get_freelist(struct kmem_cache *s, struct page *page)
*/
static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
unsigned long addr, struct kmem_cache_cpu *c,
- struct list_head *to_free)
+ struct list_head *delayed_free)
{
- struct slub_free_list *f;
void *freelist;
struct page *page;
@@ -2739,7 +2772,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
goto redo;
} else {
stat(s, ALLOC_NODE_MISMATCH);
- deactivate_slab(s, page, c->freelist, c);
+ deactivate_slab(s, page, c->freelist, c, delayed_free);
goto new_slab;
}
}
@@ -2750,7 +2783,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
* information when the page leaves the per-cpu allocator
*/
if (unlikely(!pfmemalloc_match(page, gfpflags))) {
- deactivate_slab(s, page, c->freelist, c);
+ deactivate_slab(s, page, c->freelist, c, delayed_free);
goto new_slab;
}
@@ -2778,13 +2811,6 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
VM_BUG_ON(!c->page->frozen);
c->freelist = get_freepointer(s, freelist);
c->tid = next_tid(c->tid);
-
-out:
- f = this_cpu_ptr(&slub_free_list);
- raw_spin_lock(&f->lock);
- list_splice_init(&f->list, to_free);
- raw_spin_unlock(&f->lock);
-
return freelist;
new_slab:
@@ -2796,11 +2822,11 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
goto redo;
}
- freelist = new_slab_objects(s, gfpflags, node, &c);
+ freelist = new_slab_objects(s, gfpflags, node, &c, delayed_free);
if (unlikely(!freelist)) {
slab_out_of_memory(s, gfpflags, node);
- goto out;
+ return NULL;
}
page = c->page;
@@ -2812,8 +2838,8 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
!alloc_debug_processing(s, page, freelist, addr))
goto new_slab; /* Slab failed checks. Next slab needed */
- deactivate_slab(s, page, get_freepointer(s, freelist), c);
- goto out;
+ deactivate_slab(s, page, get_freepointer(s, freelist), c, delayed_free);
+ return freelist;
}
/*
@@ -2825,7 +2851,7 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
{
void *p;
unsigned long flags;
- LIST_HEAD(tofree);
+ LIST_HEAD(delayed_free);
local_irq_save(flags);
#ifdef CONFIG_PREEMPTION
@@ -2837,9 +2863,9 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
c = this_cpu_ptr(s->cpu_slab);
#endif
- p = ___slab_alloc(s, gfpflags, node, addr, c, &tofree);
+ p = ___slab_alloc(s, gfpflags, node, addr, c, &delayed_free);
local_irq_restore(flags);
- free_delayed(&tofree);
+ discard_delayed(&delayed_free);
return p;
}
@@ -3094,11 +3120,13 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
*/
stat(s, FREE_FROZEN);
} else if (new.frozen) {
+ LIST_HEAD(delayed_free);
/*
* If we just froze the page then put it onto the
* per cpu partial list.
*/
- put_cpu_partial(s, page, 1);
+ put_cpu_partial(s, page, 1, &delayed_free);
+ discard_delayed(&delayed_free);
stat(s, CPU_PARTIAL_FREE);
}
@@ -3340,9 +3368,9 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
void **p)
{
struct kmem_cache_cpu *c;
- LIST_HEAD(to_free);
int i;
struct obj_cgroup *objcg = NULL;
+ LIST_HEAD(delayed_free);
if (IS_ENABLED(CONFIG_PREEMPT_RT) && IS_ENABLED(CONFIG_DEBUG_ATOMIC_SLEEP))
WARN_ON_ONCE(!preemptible() &&
@@ -3378,7 +3406,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
* of re-populating per CPU c->freelist
*/
p[i] = ___slab_alloc(s, flags, NUMA_NO_NODE,
- _RET_IP_, c, &to_free);
+ _RET_IP_, c, &delayed_free);
if (unlikely(!p[i]))
goto error;
@@ -3393,7 +3421,8 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
}
c->tid = next_tid(c->tid);
local_irq_enable();
- free_delayed(&to_free);
+
+ discard_delayed(&delayed_free);
/* Clear memory outside IRQ disabled fastpath loop */
if (unlikely(slab_want_init_on_alloc(flags, s))) {
@@ -3408,7 +3437,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
return i;
error:
local_irq_enable();
- free_delayed(&to_free);
+ discard_delayed(&delayed_free);
slab_post_alloc_hook(s, objcg, flags, i, p);
__kmem_cache_free_bulk(s, i, p);
return 0;
@@ -3994,7 +4023,7 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
int node;
struct kmem_cache_node *n;
- flush_all(s);
+ flush_all_locked(s);
/* Attempt to free all objects */
for_each_kmem_cache_node(s, node, n) {
free_partial(s, n);
@@ -4234,7 +4263,7 @@ int __kmem_cache_shrink(struct kmem_cache *s)
unsigned long flags;
int ret = 0;
- flush_all(s);
+ flush_all_locked(s);
for_each_kmem_cache_node(s, node, n) {
INIT_LIST_HEAD(&discard);
for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
@@ -4418,6 +4447,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
int node;
struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
struct kmem_cache_node *n;
+ LIST_HEAD(delayed_free);
memcpy(s, static_cache, kmem_cache->object_size);
@@ -4426,7 +4456,8 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
* up. Even if it weren't true, IRQs are not up so we couldn't fire
* IPIs around.
*/
- __flush_cpu_slab(s, smp_processor_id());
+ __flush_cpu_slab(s, smp_processor_id(), &delayed_free);
+ discard_delayed(&delayed_free);
for_each_kmem_cache_node(s, node, n) {
struct page *p;
@@ -4446,12 +4477,6 @@ void __init kmem_cache_init(void)
{
static __initdata struct kmem_cache boot_kmem_cache,
boot_kmem_cache_node;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- raw_spin_lock_init(&per_cpu(slub_free_list, cpu).lock);
- INIT_LIST_HEAD(&per_cpu(slub_free_list, cpu).list);
- }
if (debug_guardpage_minorder())
slub_max_order = 0;
@@ -4712,6 +4737,9 @@ static int alloc_loc_track(struct loc_track *t, unsigned long max, gfp_t flags)
struct location *l;
int order;
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && flags == GFP_ATOMIC)
+ return 0;
+
order = get_order(sizeof(struct location) * max);
l = (void *)__get_free_pages(flags, order);