diff mbox

[3/3] vrange: Add page purging logic

Message ID 1394750668-28654-3-git-send-email-john.stultz@linaro.org
State New
Headers show

Commit Message

John Stultz March 13, 2014, 10:44 p.m. UTC
This patch adds the hooks in the vmscan logic to discard volatile
pages and mark their pte as purged.

This is a simplified implementation that uses some of the logic from
Minchan's earlier efforts, so credit to Minchan for his work.

Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/vrange.h |  2 ++
 mm/internal.h          |  2 --
 mm/memory.c            | 21 +++++++++++
 mm/rmap.c              |  5 +++
 mm/vmscan.c            | 12 +++++++
 mm/vrange.c            | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 137 insertions(+), 2 deletions(-)
diff mbox

Patch

diff --git a/include/linux/vrange.h b/include/linux/vrange.h
index c4a1616..b18551f 100644
--- a/include/linux/vrange.h
+++ b/include/linux/vrange.h
@@ -7,6 +7,8 @@ 
 #define VRANGE_NONVOLATILE 0
 #define VRANGE_VOLATILE 1
 
+extern int discard_vpage(struct page *page);
+
 static inline swp_entry_t swp_entry_mk_vrange_purged(void)
 {
 	return swp_entry(SWP_VRANGE_PURGED, 0);
diff --git a/mm/internal.h b/mm/internal.h
index 29e1e76..ea66bf9 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -225,10 +225,8 @@  static inline void mlock_migrate_page(struct page *newpage, struct page *page)
 
 extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern unsigned long vma_address(struct page *page,
 				 struct vm_area_struct *vma);
-#endif
 #else /* !CONFIG_MMU */
 static inline int mlocked_vma_newpage(struct vm_area_struct *v, struct page *p)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 22dfa61..7ea9712 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -60,6 +60,7 @@ 
 #include <linux/migrate.h>
 #include <linux/string.h>
 #include <linux/dma-debug.h>
+#include <linux/vrange.h>
 
 #include <asm/io.h>
 #include <asm/pgalloc.h>
@@ -3643,6 +3644,8 @@  static int handle_pte_fault(struct mm_struct *mm,
 
 	entry = *pte;
 	if (!pte_present(entry)) {
+		swp_entry_t vrange_entry;
+retry:
 		if (pte_none(entry)) {
 			if (vma->vm_ops) {
 				if (likely(vma->vm_ops->fault))
@@ -3652,6 +3655,24 @@  static int handle_pte_fault(struct mm_struct *mm,
 			return do_anonymous_page(mm, vma, address,
 						 pte, pmd, flags);
 		}
+
+		vrange_entry = pte_to_swp_entry(entry);
+		if (unlikely(entry_is_vrange_purged(vrange_entry))) {
+			if (vma->vm_flags & VM_VOLATILE)
+				return VM_FAULT_SIGBUS;
+
+			/* zap pte */
+			ptl = pte_lockptr(mm, pmd);
+			spin_lock(ptl);
+			if (unlikely(!pte_same(*pte, entry)))
+				goto unlock;
+			flush_cache_page(vma, address, pte_pfn(*pte));
+			ptep_clear_flush(vma, address, pte);
+			pte_unmap_unlock(pte, ptl);
+			goto retry;
+		}
+
+
 		if (pte_file(entry))
 			return do_nonlinear_fault(mm, vma, address,
 					pte, pmd, flags, entry);
diff --git a/mm/rmap.c b/mm/rmap.c
index d9d4231..2b6f079 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -728,6 +728,11 @@  int page_referenced_one(struct page *page, struct vm_area_struct *vma,
 				referenced++;
 		}
 		pte_unmap_unlock(pte, ptl);
+		if (vma->vm_flags & VM_VOLATILE) {
+			pra->mapcount = 0;
+			pra->vm_flags |= VM_VOLATILE;
+			return SWAP_FAIL;
+		}
 	}
 
 	if (referenced) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a9c74b4..c5c0ee0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -43,6 +43,7 @@ 
 #include <linux/sysctl.h>
 #include <linux/oom.h>
 #include <linux/prefetch.h>
+#include <linux/vrange.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -683,6 +684,7 @@  enum page_references {
 	PAGEREF_RECLAIM,
 	PAGEREF_RECLAIM_CLEAN,
 	PAGEREF_KEEP,
+	PAGEREF_DISCARD,
 	PAGEREF_ACTIVATE,
 };
 
@@ -703,6 +705,13 @@  static enum page_references page_check_references(struct page *page,
 	if (vm_flags & VM_LOCKED)
 		return PAGEREF_RECLAIM;
 
+	/*
+	 * If volatile page is reached on LRU's tail, we discard the
+	 * page without considering recycle the page.
+	 */
+	if (vm_flags & VM_VOLATILE)
+		return PAGEREF_DISCARD;
+
 	if (referenced_ptes) {
 		if (PageSwapBacked(page))
 			return PAGEREF_ACTIVATE;
@@ -930,6 +939,9 @@  static unsigned long shrink_page_list(struct list_head *page_list,
 		switch (references) {
 		case PAGEREF_ACTIVATE:
 			goto activate_locked;
+		case PAGEREF_DISCARD:
+			if (may_enter_fs && discard_vpage(page) == 0)
+				goto free_it;
 		case PAGEREF_KEEP:
 			goto keep_locked;
 		case PAGEREF_RECLAIM:
diff --git a/mm/vrange.c b/mm/vrange.c
index 73ef7ac..99d2091 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -207,3 +207,100 @@  SYSCALL_DEFINE4(vrange, unsigned long, start,
 out:
 	return ret;
 }
+
+static void try_to_discard_one(struct page *page, struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pte_t *pte;
+	pte_t pteval;
+	spinlock_t *ptl;
+	unsigned long addr;
+
+	VM_BUG_ON(!PageLocked(page));
+
+	addr = vma_address(page, vma);
+	pte = page_check_address(page, mm, addr, &ptl, 0);
+	if (!pte)
+		return;
+
+	BUG_ON(vma->vm_flags & (VM_SPECIAL|VM_LOCKED|VM_MIXEDMAP|VM_HUGETLB));
+
+	flush_cache_page(vma, addr, page_to_pfn(page));
+	pteval = ptep_clear_flush(vma, addr, pte);
+
+	update_hiwater_rss(mm);
+	if (PageAnon(page))
+		dec_mm_counter(mm, MM_ANONPAGES);
+	else
+		dec_mm_counter(mm, MM_FILEPAGES);
+
+	page_remove_rmap(page);
+	page_cache_release(page);
+
+	set_pte_at(mm, addr, pte,
+				swp_entry_to_pte(swp_entry_mk_vrange_purged()));
+
+	pte_unmap_unlock(pte, ptl);
+	mmu_notifier_invalidate_page(mm, addr);
+
+}
+
+
+static int try_to_discard_anon_vpage(struct page *page)
+{
+	struct anon_vma *anon_vma;
+	struct anon_vma_chain *avc;
+	pgoff_t pgoff;
+
+	anon_vma = page_lock_anon_vma_read(page);
+	if (!anon_vma)
+		return -1;
+
+	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	/*
+	 * During interating the loop, some processes could see a page as
+	 * purged while others could see a page as not-purged because we have
+	 * no global lock between parent and child for protecting vrange system
+	 * call during this loop. But it's not a problem because the page is
+	 * not *SHARED* page but *COW* page so parent and child can see other
+	 * data anytime. The worst case by this race is a page was purged
+	 * but couldn't be discarded so it makes unnecessary page fault but
+	 * it wouldn't be severe.
+	 */
+	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
+		struct vm_area_struct *vma = avc->vma;
+
+		if (!(vma->vm_flags & VM_VOLATILE))
+			continue;
+		try_to_discard_one(page, vma);
+	}
+	page_unlock_anon_vma_read(anon_vma);
+	return 0;
+}
+
+
+static int try_to_discard_vpage(struct page *page)
+{
+	if (PageAnon(page))
+		return try_to_discard_anon_vpage(page);
+	return -1;
+}
+
+
+int discard_vpage(struct page *page)
+{
+	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON(PageLRU(page));
+
+	if (!try_to_discard_vpage(page)) {
+		if (PageSwapCache(page))
+			try_to_free_swap(page);
+
+		if (page_freeze_refs(page, 1)) {
+			unlock_page(page);
+			return 0;
+		}
+	}
+
+	return 1;
+}