diff mbox

[4/5,RFC,HACK] Add VOLATILE_LRU support to the VM

Message ID 1343346949-53715-5-git-send-email-john.stultz@linaro.org
State Superseded
Headers show

Commit Message

John Stultz July 26, 2012, 11:55 p.m. UTC
---
 include/linux/fs.h         |    1 +
 include/linux/mm_inline.h  |    2 +
 include/linux/mmzone.h     |    1 +
 include/linux/page-flags.h |    3 +
 include/linux/swap.h       |    3 +
 mm/memcontrol.c            |    1 +
 mm/page_alloc.c            |    1 +
 mm/swap.c                  |   64 +++++++++++++
 mm/vmscan.c                |  215 +++++++++++++++++++++++++++++++++++++++++++-
 9 files changed, 290 insertions(+), 1 deletion(-)
diff mbox

Patch

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8fabb03..c6f3415 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -636,6 +636,7 @@  struct address_space_operations {
 	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,
 					unsigned long);
 	int (*error_remove_page)(struct address_space *, struct page *);
+	int (*purgepage)(struct page *page, struct writeback_control *wbc);
 };
 
 extern const struct address_space_operations empty_aops;
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 1397ccf..f78806c 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -91,6 +91,8 @@  static __always_inline enum lru_list page_lru(struct page *page)
 
 	if (PageUnevictable(page))
 		lru = LRU_UNEVICTABLE;
+	else if (PageIsVolatile(page))
+		lru = LRU_VOLATILE;
 	else {
 		lru = page_lru_base_type(page);
 		if (PageActive(page))
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 68c569f..96f08bb 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -162,6 +162,7 @@  enum lru_list {
 	LRU_ACTIVE_ANON = LRU_BASE + LRU_ACTIVE,
 	LRU_INACTIVE_FILE = LRU_BASE + LRU_FILE,
 	LRU_ACTIVE_FILE = LRU_BASE + LRU_FILE + LRU_ACTIVE,
+	LRU_VOLATILE,
 	LRU_UNEVICTABLE,
 	NR_LRU_LISTS
 };
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index c88d2a9..57800c8 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -108,6 +108,7 @@  enum pageflags {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	PG_compound_lock,
 #endif
+	PG_isvolatile,
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -201,6 +202,8 @@  PAGEFLAG(Dirty, dirty) TESTSCFLAG(Dirty, dirty) __CLEARPAGEFLAG(Dirty, dirty)
 PAGEFLAG(LRU, lru) __CLEARPAGEFLAG(LRU, lru)
 PAGEFLAG(Active, active) __CLEARPAGEFLAG(Active, active)
 	TESTCLEARFLAG(Active, active)
+PAGEFLAG(IsVolatile, isvolatile) __CLEARPAGEFLAG(IsVolatile, isvolatile)
+	TESTCLEARFLAG(IsVolatile, isvolatile)
 __PAGEFLAG(Slab, slab)
 PAGEFLAG(Checked, checked)		/* Used by some filesystems */
 PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned)	/* Xen */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index c84ec68..eb12d53 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -236,6 +236,9 @@  extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_page(struct page *page);
 extern void swap_setup(void);
 
+extern void mark_volatile_page(struct page *page);
+extern void mark_nonvolatile_page(struct page *page);
+
 extern void add_page_to_unevictable_list(struct page *page);
 
 /**
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f72b5e5..98e1303 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4066,6 +4066,7 @@  static const char * const mem_cgroup_lru_names[] = {
 	"active_anon",
 	"inactive_file",
 	"active_file",
+	"volatile",
 	"unevictable",
 };
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4a4f921..cffe1b6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5975,6 +5975,7 @@  static const struct trace_print_flags pageflag_names[] = {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	{1UL << PG_compound_lock,	"compound_lock"	},
 #endif
+	{1UL << PG_isvolatile,		"volatile"	},
 };
 
 static void dump_page_flags(unsigned long flags)
diff --git a/mm/swap.c b/mm/swap.c
index 4e7e2ec..9491a9c 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -574,6 +574,70 @@  void deactivate_page(struct page *page)
 	}
 }
 
+
+
+
+
+
+
+
+void mark_volatile_page(struct page *page)
+{
+	int lru;
+	bool active;
+	struct zone *zone = page_zone(page);
+	struct lruvec *lruvec;
+
+	if (!PageLRU(page))
+		return;
+
+	if (PageUnevictable(page))
+		return;
+
+	active = PageActive(page);
+	lru = page_lru_base_type(page);
+
+	spin_lock_irq(&zone->lru_lock);
+	lruvec = mem_cgroup_page_lruvec(page, zone);
+	del_page_from_lru_list(page, lruvec, lru + active);
+	add_page_to_lru_list(page, lruvec, LRU_VOLATILE);
+	SetPageIsVolatile(page);
+	ClearPageActive(page);
+	spin_unlock_irq(&zone->lru_lock);
+
+
+}
+
+
+void mark_nonvolatile_page(struct page *page)
+{
+	int lru;
+	struct zone *zone = page_zone(page);
+	struct lruvec *lruvec;
+
+	if (!PageLRU(page))
+		return;
+
+	if (!PageIsVolatile(page))
+		return;
+
+	lru = page_lru_base_type(page);
+
+	spin_lock_irq(&zone->lru_lock);
+	lruvec = mem_cgroup_page_lruvec(page, zone);
+	del_page_from_lru_list(page, lruvec, LRU_VOLATILE);
+	ClearPageIsVolatile(page);
+	SetPageActive(page);
+	add_page_to_lru_list(page, lruvec,  lru + LRU_ACTIVE);
+	spin_unlock_irq(&zone->lru_lock);
+}
+
+
+
+
+
+
+
 void lru_add_drain(void)
 {
 	lru_add_drain_cpu(get_cpu());
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 66e4310..682f147 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -483,7 +483,7 @@  static int __remove_mapping(struct address_space *mapping, struct page *page)
 	if (!page_freeze_refs(page, 2))
 		goto cannot_free;
 	/* note: atomic_cmpxchg in page_freeze_refs provides the smp_rmb */
-	if (unlikely(PageDirty(page))) {
+	if (unlikely(PageDirty(page)) && !PageIsVolatile(page)) {
 		page_unfreeze_refs(page, 2);
 		goto cannot_free;
 	}
@@ -1190,6 +1190,212 @@  putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
 	list_splice(&pages_to_free, page_list);
 }
 
+
+
+/*
+ * shrink_page_list() returns the number of reclaimed pages
+ */
+static unsigned long shrink_volatile_page_list(struct list_head *page_list,
+				      struct zone *zone,
+				      struct scan_control *sc)
+{
+	LIST_HEAD(free_pages);
+	LIST_HEAD(ret_pages);
+	unsigned long nr_reclaimed = 0;
+	unsigned long nr_writeback = 0;
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_NONE,
+		.nr_to_write = SWAP_CLUSTER_MAX,
+		.range_start = 0,
+		.range_end = LLONG_MAX,
+		.for_reclaim = 1,
+	};
+
+
+	while (!list_empty(page_list)) {
+		enum page_references references;
+		struct address_space *mapping;
+		struct page *page;
+		int may_enter_fs;
+
+		cond_resched();
+
+		page = lru_to_page(page_list);
+		list_del(&page->lru);
+
+		if (!trylock_page(page))
+			goto keep;
+
+		VM_BUG_ON(PageActive(page));
+		VM_BUG_ON(page_zone(page) != zone);
+
+		if (unlikely(!page_evictable(page, NULL)))
+			goto keep_locked;
+
+		if (!sc->may_unmap && page_mapped(page))
+			goto keep_locked;
+
+
+		may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
+			(PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
+
+		if (PageWriteback(page)) {
+			nr_writeback++;
+			unlock_page(page);
+			goto keep;
+		}
+
+		references = page_check_references(page, sc);
+		switch (references) {
+		case PAGEREF_ACTIVATE:
+		case PAGEREF_KEEP:
+			goto keep_locked;
+		case PAGEREF_RECLAIM:
+		case PAGEREF_RECLAIM_CLEAN:
+			; /* try to reclaim the page below */
+		}
+
+
+		mapping = page_mapping(page);
+
+
+		/*
+		 * The page is mapped into the page tables of one or more
+		 * processes. Try to unmap it here.
+		 */
+		if (page_mapped(page) && mapping) {
+			switch (try_to_unmap(page, TTU_UNMAP)) {
+			case SWAP_FAIL:
+			case SWAP_AGAIN:
+			case SWAP_MLOCK:
+				goto keep_locked;
+			case SWAP_SUCCESS:
+				; /* try to free the page below */
+			}
+		}
+
+		if (PageDirty(page)) {
+			/*
+			 * Only kswapd can writeback filesystem pages to
+			 * avoid risk of stack overflow but do not writeback
+			 * unless under significant pressure.
+			 */
+			if (page_is_file_cache(page) &&
+					(!current_is_kswapd() ||
+					 sc->priority >= DEF_PRIORITY - 2)) {
+				/*
+				 * Immediately reclaim when written back.
+				 * Similar in principal to deactivate_page()
+				 * except we already have the page isolated
+				 * and know it's dirty
+				 */
+				SetPageReclaim(page);
+
+				goto keep_locked;
+			}
+
+
+			if (!mapping) {
+				/*
+				 * Some data journaling orphaned pages can have
+				 * page->mapping == NULL while being dirty with clean buffers.
+				 */
+				if (page_has_private(page)) {
+					if (try_to_free_buffers(page)) {
+						ClearPageDirty(page);
+						printk("%s: orphaned page\n", __func__);
+					}
+				}
+
+			}
+		}
+
+		if (!mapping || !__remove_mapping(mapping, page))
+			goto keep_locked;
+
+
+
+		if (mapping && mapping->a_ops && mapping->a_ops->purgepage) {
+			mapping->a_ops->purgepage(page, &wbc);
+
+			/*
+			 * At this point, we have no other references and there is
+			 * no way to pick any more up (removed from LRU, removed
+			 * from pagecache). Can use non-atomic bitops now (and
+			 * we obviously don't have to worry about waking up a process
+			 * waiting on the page lock, because there are no references.
+			 */
+			__clear_page_locked(page);
+
+			unlock_page(page);
+
+			nr_reclaimed++;
+			/*	
+			 * Is there need to periodically free_page_list? It would
+			 * appear not as the counts should be low
+			 */
+			VM_BUG_ON(PageActive(page));
+			list_add(&page->lru, &free_pages);
+			continue;
+		}
+
+
+
+keep_locked:
+		unlock_page(page);
+keep:
+		list_add(&page->lru, &ret_pages);
+		VM_BUG_ON(PageLRU(page) || PageUnevictable(page));
+	}
+
+
+
+	free_hot_cold_page_list(&free_pages, 1);
+
+	list_splice(&ret_pages, page_list);
+	return nr_reclaimed;
+}
+
+
+static noinline_for_stack unsigned long
+shrink_volatile_list(unsigned long nr_to_scan, struct lruvec *lruvec,
+		     struct scan_control *sc)
+{
+	LIST_HEAD(page_list);
+	unsigned long nr_scanned;
+	unsigned long nr_reclaimed = 0;
+	unsigned long nr_taken;
+	isolate_mode_t isolate_mode = 0;
+	struct zone *zone = lruvec_zone(lruvec);
+
+
+	lru_add_drain();
+
+	if (!sc->may_unmap)
+		isolate_mode |= ISOLATE_UNMAPPED;
+	if (!sc->may_writepage)
+		isolate_mode |= ISOLATE_CLEAN;
+
+	spin_lock_irq(&zone->lru_lock);
+	nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
+				     &nr_scanned, sc, isolate_mode, LRU_VOLATILE);
+	spin_unlock_irq(&zone->lru_lock);
+
+	if (nr_taken == 0)
+		goto done;
+
+
+	nr_reclaimed = shrink_volatile_page_list(&page_list, zone, sc);
+
+	spin_lock_irq(&zone->lru_lock);
+	putback_inactive_pages(lruvec, &page_list);
+	spin_unlock_irq(&zone->lru_lock);
+done:
+	return nr_reclaimed;
+}
+
+
+
 /*
  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
  * of reclaimed pages
@@ -1776,6 +1982,13 @@  restart:
 	get_scan_count(lruvec, sc, nr);
 
 	blk_start_plug(&plug);
+
+
+	nr_to_scan = min_t(unsigned long, get_lru_size(lruvec, LRU_VOLATILE), SWAP_CLUSTER_MAX);
+	if (nr_to_scan)
+		shrink_volatile_list(nr_to_scan, lruvec, sc);
+
+
 	while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||
 					nr[LRU_INACTIVE_FILE]) {
 		for_each_evictable_lru(lru) {