diff mbox

[RFC,V3,3/6] arm: mm: implement get_user_pages_fast

Message ID 1394631623-17883-4-git-send-email-steve.capper@linaro.org
State New
Headers show

Commit Message

Steve Capper March 12, 2014, 1:40 p.m. UTC
An implementation of get_user_pages_fast for ARM. It is based loosely
on the PowerPC implementation. We disable interrupts in the walker to
prevent the call_rcu_sched pagetable freeing code from running under
us.

We also explicitly fire an IPI in the Transparent HugePage splitting
case to prevent splits from interfering with the fast_gup walker.
As THP splits are relatively rare, this should not have a noticable
overhead.

Signed-off-by: Steve Capper <steve.capper@linaro.org>
---
 arch/arm/include/asm/pgtable-3level.h |   6 +
 arch/arm/mm/Makefile                  |   1 +
 arch/arm/mm/gup.c                     | 299 ++++++++++++++++++++++++++++++++++
 3 files changed, 306 insertions(+)
 create mode 100644 arch/arm/mm/gup.c

Comments

Peter Zijlstra March 12, 2014, 2:18 p.m. UTC | #1
On Wed, Mar 12, 2014 at 01:40:20PM +0000, Steve Capper wrote:
> +int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
> +			  struct page **pages)
> +{
> +	struct mm_struct *mm = current->mm;
> +	unsigned long addr, len, end;
> +	unsigned long next, flags;
> +	pgd_t *pgdp;
> +	int nr = 0;
> +
> +	start &= PAGE_MASK;
> +	addr = start;
> +	len = (unsigned long) nr_pages << PAGE_SHIFT;
> +	end = start + len;
> +
> +	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
> +					start, len)))
> +		return 0;
> +
> +	/*
> +	 * Disable interrupts, we use the nested form as we can already
> +	 * have interrupts disabled by get_futex_key.
> +	 *
> +	 * With interrupts disabled, we block page table pages from being
> +	 * freed from under us. See mmu_gather_tlb in asm-generic/tlb.h
> +	 * for more details.
> +	 */
> +
> +	local_irq_save(flags);
> +	pgdp = pgd_offset(mm, addr);
> +	do {
> +		next = pgd_addr_end(addr, end);
> +		if (pgd_none(*pgdp))
> +			break;
> +		else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr))
> +			break;
> +	} while (pgdp++, addr = next, addr != end);
> +	local_irq_restore(flags);
> +
> +	return nr;
> +}

Since you just went through the trouble of enabling RCU pagetable
freeing, you might also replace these local_irq_save/restore with
rcu_read_{,un}lock().

Typically rcu_read_lock() is faster than disabling interrupts; but I've
no clue about ARM.
Steve Capper March 12, 2014, 4:20 p.m. UTC | #2
On 12 March 2014 14:18, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Mar 12, 2014 at 01:40:20PM +0000, Steve Capper wrote:
>> +int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
>> +                       struct page **pages)
>> +{
>> +     struct mm_struct *mm = current->mm;
>> +     unsigned long addr, len, end;
>> +     unsigned long next, flags;
>> +     pgd_t *pgdp;
>> +     int nr = 0;
>> +
>> +     start &= PAGE_MASK;
>> +     addr = start;
>> +     len = (unsigned long) nr_pages << PAGE_SHIFT;
>> +     end = start + len;
>> +
>> +     if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
>> +                                     start, len)))
>> +             return 0;
>> +
>> +     /*
>> +      * Disable interrupts, we use the nested form as we can already
>> +      * have interrupts disabled by get_futex_key.
>> +      *
>> +      * With interrupts disabled, we block page table pages from being
>> +      * freed from under us. See mmu_gather_tlb in asm-generic/tlb.h
>> +      * for more details.
>> +      */
>> +
>> +     local_irq_save(flags);
>> +     pgdp = pgd_offset(mm, addr);
>> +     do {
>> +             next = pgd_addr_end(addr, end);
>> +             if (pgd_none(*pgdp))
>> +                     break;
>> +             else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr))
>> +                     break;
>> +     } while (pgdp++, addr = next, addr != end);
>> +     local_irq_restore(flags);
>> +
>> +     return nr;
>> +}
>
> Since you just went through the trouble of enabling RCU pagetable
> freeing, you might also replace these local_irq_save/restore with
> rcu_read_{,un}lock().

Hi Peter,
This critical section also needs to block the THP splitting code. At
the moment an IPI is broadcast in pmdp_splitting_flush. I'm not sure
how to adapt that to block on an rcu_read_lock, I'll have a think.

Cheers,
Peter Zijlstra March 12, 2014, 4:30 p.m. UTC | #3
On Wed, Mar 12, 2014 at 04:20:15PM +0000, Steve Capper wrote:
> On 12 March 2014 14:18, Peter Zijlstra <peterz@infradead.org> wrote:
> > Since you just went through the trouble of enabling RCU pagetable
> > freeing, you might also replace these local_irq_save/restore with
> > rcu_read_{,un}lock().
> 
> Hi Peter,
> This critical section also needs to block the THP splitting code. At
> the moment an IPI is broadcast in pmdp_splitting_flush. I'm not sure
> how to adapt that to block on an rcu_read_lock, I'll have a think.

Ah, I've not looked at THP much at all.

Would it be sufficient to make sure to fail the pmd get_page()
equivalent early enough?
Peter Zijlstra March 12, 2014, 4:32 p.m. UTC | #4
On Wed, Mar 12, 2014 at 01:40:20PM +0000, Steve Capper wrote:
> +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
> +			  pmd_t *pmdp)
> +{
> +	pmd_t pmd = pmd_mksplitting(*pmdp);
> +	VM_BUG_ON(address & ~PMD_MASK);
> +	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
> +
> +	/* dummy IPI to serialise against fast_gup */
> +	smp_call_function(thp_splitting_flush_sync, NULL, 1);
> +}

do you really need to IPI the entire machine? Wouldn't the mm's TLB
invalidate mask be sufficient?
Steve Capper March 12, 2014, 4:41 p.m. UTC | #5
On 12 March 2014 16:32, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Mar 12, 2014 at 01:40:20PM +0000, Steve Capper wrote:
>> +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
>> +                       pmd_t *pmdp)
>> +{
>> +     pmd_t pmd = pmd_mksplitting(*pmdp);
>> +     VM_BUG_ON(address & ~PMD_MASK);
>> +     set_pmd_at(vma->vm_mm, address, pmdp, pmd);
>> +
>> +     /* dummy IPI to serialise against fast_gup */
>> +     smp_call_function(thp_splitting_flush_sync, NULL, 1);
>> +}
>
> do you really need to IPI the entire machine? Wouldn't the mm's TLB
> invalidate mask be sufficient?

Thank you! Yes, that would be a much better idea. I'll correct this.
Steve Capper March 12, 2014, 4:42 p.m. UTC | #6
On 12 March 2014 16:30, Peter Zijlstra <peterz@infradead.org> wrote:
> On Wed, Mar 12, 2014 at 04:20:15PM +0000, Steve Capper wrote:
>> On 12 March 2014 14:18, Peter Zijlstra <peterz@infradead.org> wrote:
>> > Since you just went through the trouble of enabling RCU pagetable
>> > freeing, you might also replace these local_irq_save/restore with
>> > rcu_read_{,un}lock().
>>
>> Hi Peter,
>> This critical section also needs to block the THP splitting code. At
>> the moment an IPI is broadcast in pmdp_splitting_flush. I'm not sure
>> how to adapt that to block on an rcu_read_lock, I'll have a think.
>
> Ah, I've not looked at THP much at all.
>
> Would it be sufficient to make sure to fail the pmd get_page()
> equivalent early enough?

I don't think that will be enough, as we haven't locked anything. I'll
refine the IPI as per your suggestion.
Peter Zijlstra March 12, 2014, 5:11 p.m. UTC | #7
On Wed, Mar 12, 2014 at 04:55:11PM +0000, Will Deacon wrote:
> On Wed, Mar 12, 2014 at 04:32:00PM +0000, Peter Zijlstra wrote:
> > On Wed, Mar 12, 2014 at 01:40:20PM +0000, Steve Capper wrote:
> > > +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
> > > +			  pmd_t *pmdp)
> > > +{
> > > +	pmd_t pmd = pmd_mksplitting(*pmdp);
> > > +	VM_BUG_ON(address & ~PMD_MASK);
> > > +	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
> > > +
> > > +	/* dummy IPI to serialise against fast_gup */
> > > +	smp_call_function(thp_splitting_flush_sync, NULL, 1);
> > > +}
> > 
> > do you really need to IPI the entire machine? Wouldn't the mm's TLB
> > invalidate mask be sufficient?
> 
> Are you thinking of using mm_cpumask(vma->vm_mm)? That's rarely cleared on
> ARM, so it tends to identify everywhere the task has ever run, regardless of
> TLB state. The reason is that the mask is also used for cache flushing
> (which is further overloaded for VIVT and VIPT w/ software maintenance
> broadcast).
> 
> I had a patch improving this a bit (below) but I didn't manage to see any
> significant improvements so I didn't pursue it further. What we probably want
> to try is nuking the mask on a h/w broadcast TLBI operation with ARMv7, but
> it will mean adding horrible checks to tlbflush.h

Ah this is because you have context tagged TLBs so your context switch
doesn't locally flush TLBs and therefore you cannot keep track of this?

Too much x86 in my head I suppose.
Catalin Marinas March 12, 2014, 5:15 p.m. UTC | #8
On Wed, Mar 12, 2014 at 01:40:20PM +0000, Steve Capper wrote:
> An implementation of get_user_pages_fast for ARM. It is based loosely
> on the PowerPC implementation. We disable interrupts in the walker to
> prevent the call_rcu_sched pagetable freeing code from running under
> us.
> 
> We also explicitly fire an IPI in the Transparent HugePage splitting
> case to prevent splits from interfering with the fast_gup walker.
> As THP splits are relatively rare, this should not have a noticable
> overhead.
> 
> Signed-off-by: Steve Capper <steve.capper@linaro.org>
> ---
>  arch/arm/include/asm/pgtable-3level.h |   6 +
>  arch/arm/mm/Makefile                  |   1 +
>  arch/arm/mm/gup.c                     | 299 ++++++++++++++++++++++++++++++++++
>  3 files changed, 306 insertions(+)
>  create mode 100644 arch/arm/mm/gup.c

Is there anything specific to ARM in this gup.c file? Could we make it
more generic like mm/gup.c?
Steve Capper March 13, 2014, 8:03 a.m. UTC | #9
On 12 March 2014 17:15, Catalin Marinas <catalin.marinas@arm.com> wrote:
> On Wed, Mar 12, 2014 at 01:40:20PM +0000, Steve Capper wrote:
>> An implementation of get_user_pages_fast for ARM. It is based loosely
>> on the PowerPC implementation. We disable interrupts in the walker to
>> prevent the call_rcu_sched pagetable freeing code from running under
>> us.
>>
>> We also explicitly fire an IPI in the Transparent HugePage splitting
>> case to prevent splits from interfering with the fast_gup walker.
>> As THP splits are relatively rare, this should not have a noticable
>> overhead.
>>
>> Signed-off-by: Steve Capper <steve.capper@linaro.org>
>> ---
>>  arch/arm/include/asm/pgtable-3level.h |   6 +
>>  arch/arm/mm/Makefile                  |   1 +
>>  arch/arm/mm/gup.c                     | 299 ++++++++++++++++++++++++++++++++++
>>  3 files changed, 306 insertions(+)
>>  create mode 100644 arch/arm/mm/gup.c
>
> Is there anything specific to ARM in this gup.c file? Could we make it
> more generic like mm/gup.c?

Hi Catalin,
The arm and arm64 cases assume that we can read the pte's atomically,
that TLB hardware broadcasts can occur (so we have to use the
page_cache_get_speculative logic), and that hugetlb pages are
equivalent in pte layout to thp's.

Also, I took a quick look at the other architectures, and a summary of
what I found can be found in this post:
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-March/239326.html

Cheers,
Steve Capper March 13, 2014, 8:24 a.m. UTC | #10
On 12 March 2014 16:55, Will Deacon <will.deacon@arm.com> wrote:
> On Wed, Mar 12, 2014 at 04:32:00PM +0000, Peter Zijlstra wrote:
>> On Wed, Mar 12, 2014 at 01:40:20PM +0000, Steve Capper wrote:
>> > +void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
>> > +                     pmd_t *pmdp)
>> > +{
>> > +   pmd_t pmd = pmd_mksplitting(*pmdp);
>> > +   VM_BUG_ON(address & ~PMD_MASK);
>> > +   set_pmd_at(vma->vm_mm, address, pmdp, pmd);
>> > +
>> > +   /* dummy IPI to serialise against fast_gup */
>> > +   smp_call_function(thp_splitting_flush_sync, NULL, 1);
>> > +}
>>
>> do you really need to IPI the entire machine? Wouldn't the mm's TLB
>> invalidate mask be sufficient?

Hey Will,

>
> Are you thinking of using mm_cpumask(vma->vm_mm)? That's rarely cleared on
> ARM, so it tends to identify everywhere the task has ever run, regardless of
> TLB state. The reason is that the mask is also used for cache flushing
> (which is further overloaded for VIVT and VIPT w/ software maintenance
> broadcast).

For the THP splitting case, I want a cpu mask to represent any cpu
that is touching the address space belonging to the THP. That way, the
IPI will block on any fast_gups taking place that contain the THP.

>
> I had a patch improving this a bit (below) but I didn't manage to see any
> significant improvements so I didn't pursue it further. What we probably want
> to try is nuking the mask on a h/w broadcast TLBI operation with ARMv7, but
> it will mean adding horrible checks to tlbflush.h

Thanks! I'm still waking up, will have a think about this.

Cheers,
Peter Zijlstra March 14, 2014, 11:47 a.m. UTC | #11
On Wed, Mar 12, 2014 at 06:11:26PM +0100, Peter Zijlstra wrote:
> Ah this is because you have context tagged TLBs so your context switch
> doesn't locally flush TLBs and therefore you cannot keep track of this?
> 
> Too much x86 in my head I suppose.

Something you could consider is something like:

typdef struct {
	...
+	unsigned long tlb_flush_count;
} mm_context_t;

struct thread_info {
	...
+	unsigned long tlb_flush_count;
};


void flush_tlb*() {
	ACCESS_ONCE(mm->context.tlb_flush_counter)++;

	...
}

void switch_to(prev, next) {
	...

	if (prev->mm != next->mm &&
	    next->mm.context.tlb_flush_counter !=
	    task_thread_info(next)->tlb_flush_counter) {
		task_thread_info(next)->tlb_flush_counter =
			next->mm.context.tlb_flush_counter;
		local_tlb_flush(next->mm);
	}
}

That way you don't have to IPI cpus that don't currently run tasks of
that mm because the next time they get scheduled the switch_to() bit
will flush their mm for you.

And thus you can keep a tight tlb invalidate mask.

Now I'm not at all sure this is beneficial for ARM, just a thought.

Also I suppose one should think about the case where the counter
wrapped. The easy way out there is to unconditionally flush the entire
machine in flush_tlb*() when that happens.
diff mbox

Patch

diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h
index b286ba9..fdc4a4f 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -226,6 +226,12 @@  static inline pte_t pte_mkspecial(pte_t pte)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define pmd_trans_huge(pmd)	(pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
 #define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
+
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp);
+#endif
 #endif
 
 #define PMD_BIT_FUNC(fn,op) \
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 7f39ce2..a2c4e87 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -7,6 +7,7 @@  obj-y				:= dma-mapping.o extable.o fault.o init.o \
 
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
 				   mmap.o pgd.o mmu.o
+obj-$(CONFIG_ARM_LPAE)		+= gup.o
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
diff --git a/arch/arm/mm/gup.c b/arch/arm/mm/gup.c
new file mode 100644
index 0000000..715ab0d
--- /dev/null
+++ b/arch/arm/mm/gup.c
@@ -0,0 +1,299 @@ 
+/*
+ * arch/arm/mm/gup.c
+ *
+ * Copyright (C) 2014 Linaro Ltd.
+ *
+ * Based on arch/powerpc/mm/gup.c which is:
+ * Copyright (C) 2008 Nick Piggin
+ * Copyright (C) 2008 Novell Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/rwsem.h>
+#include <linux/hugetlb.h>
+#include <asm/pgtable.h>
+
+static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
+			 int write, struct page **pages, int *nr)
+{
+	pte_t *ptep, *ptem;
+	int ret = 0;
+
+	ptem = ptep = pte_offset_map(&pmd, addr);
+	do {
+		pte_t pte = ACCESS_ONCE(*ptep);
+		struct page *page;
+
+		if (!pte_valid_user(pte) || pte_special(pte)
+			|| (write && !pte_write(pte)))
+			goto pte_unmap;
+
+		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+		page = pte_page(pte);
+
+		if (!page_cache_get_speculative(page))
+			goto pte_unmap;
+
+		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
+			put_page(page);
+			goto pte_unmap;
+		}
+
+		pages[*nr] = page;
+		(*nr)++;
+
+	} while (ptep++, addr += PAGE_SIZE, addr != end);
+
+	ret = 1;
+
+pte_unmap:
+	pte_unmap(ptem);
+	return ret;
+}
+
+static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	struct page *head, *page, *tail;
+	int refs;
+
+	if (!pmd_present(orig) || (write && !pmd_write(orig)))
+		return 0;
+
+	refs = 0;
+	head = pmd_page(orig);
+	page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+	tail = page;
+	do {
+		VM_BUG_ON(compound_head(page) != head);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
+		*nr -= refs;
+		while (refs--)
+			put_page(head);
+		return 0;
+	}
+
+	/*
+	 * Any tail pages need their mapcount reference taken before we
+	 * return. (This allows the THP code to bump their ref count when
+	 * they are split into base pages).
+	 */
+	while (refs--) {
+		if (PageTail(tail))
+			get_huge_page_tail(tail);
+		tail++;
+	}
+
+	return 1;
+}
+
+static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
+		unsigned long end, int write, struct page **pages, int *nr)
+{
+	struct page *head, *page, *tail;
+	pmd_t origpmd = __pmd(pud_val(orig));
+	int refs;
+
+	if (!pmd_present(origpmd) || (write && !pmd_write(origpmd)))
+		return 0;
+
+	refs = 0;
+	head = pmd_page(origpmd);
+	page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+	tail = page;
+	do {
+		VM_BUG_ON(compound_head(page) != head);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pud_val(orig) != pud_val(*pudp))) {
+		*nr -= refs;
+		while (refs--)
+			put_page(head);
+		return 0;
+	}
+
+	while (refs--) {
+		if (PageTail(tail))
+			get_huge_page_tail(tail);
+		tail++;
+	}
+
+	return 1;
+}
+
+static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pmd_t *pmdp;
+
+	pmdp = pmd_offset(&pud, addr);
+	do {
+		pmd_t pmd = ACCESS_ONCE(*pmdp);
+		next = pmd_addr_end(addr, end);
+		if (pmd_none(pmd) || pmd_trans_splitting(pmd))
+			return 0;
+
+		if (unlikely(pmd_thp_or_huge(pmd))) {
+			if (!gup_huge_pmd(pmd, pmdp, addr, next, write,
+				pages, nr))
+				return 0;
+		} else {
+			if (!gup_pte_range(pmd, addr, next, write, pages, nr))
+				return 0;
+		}
+	} while (pmdp++, addr = next, addr != end);
+
+	return 1;
+}
+
+static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pud_t *pudp;
+
+	pudp = pud_offset(pgdp, addr);
+	do {
+		pud_t pud = ACCESS_ONCE(*pudp);
+		next = pud_addr_end(addr, end);
+		if (pud_none(pud))
+			return 0;
+		if (pud_huge(pud)) {
+			if (!gup_huge_pud(pud, pudp, addr, next, write,
+					pages, nr))
+				return 0;
+		} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
+			return 0;
+	} while (pudp++, addr = next, addr != end);
+
+	return 1;
+}
+
+/*
+ * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
+ * back to the regular GUP.
+ */
+int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			  struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	unsigned long addr, len, end;
+	unsigned long next, flags;
+	pgd_t *pgdp;
+	int nr = 0;
+
+	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+
+	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+					start, len)))
+		return 0;
+
+	/*
+	 * Disable interrupts, we use the nested form as we can already
+	 * have interrupts disabled by get_futex_key.
+	 *
+	 * With interrupts disabled, we block page table pages from being
+	 * freed from under us. See mmu_gather_tlb in asm-generic/tlb.h
+	 * for more details.
+	 */
+
+	local_irq_save(flags);
+	pgdp = pgd_offset(mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(*pgdp))
+			break;
+		else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr))
+			break;
+	} while (pgdp++, addr = next, addr != end);
+	local_irq_restore(flags);
+
+	return nr;
+}
+
+int get_user_pages_fast(unsigned long start, int nr_pages, int write,
+			struct page **pages)
+{
+	struct mm_struct *mm = current->mm;
+	int nr, ret;
+
+	start &= PAGE_MASK;
+	nr = __get_user_pages_fast(start, nr_pages, write, pages);
+	ret = nr;
+
+	if (nr < nr_pages) {
+		/* Try to get the remaining pages with get_user_pages */
+		start += nr << PAGE_SHIFT;
+		pages += nr;
+
+		down_read(&mm->mmap_sem);
+		ret = get_user_pages(current, mm, start,
+				     nr_pages - nr, write, 0, pages, NULL);
+		up_read(&mm->mmap_sem);
+
+		/* Have to be a bit careful with return values */
+		if (nr > 0) {
+			if (ret < 0)
+				ret = nr;
+			else
+				ret += nr;
+		}
+	}
+
+	return ret;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#ifdef CONFIG_HAVE_RCU_TABLE_FREE
+static void thp_splitting_flush_sync(void *arg)
+{
+}
+
+void pmdp_splitting_flush(struct vm_area_struct *vma, unsigned long address,
+			  pmd_t *pmdp)
+{
+	pmd_t pmd = pmd_mksplitting(*pmdp);
+	VM_BUG_ON(address & ~PMD_MASK);
+	set_pmd_at(vma->vm_mm, address, pmdp, pmd);
+
+	/* dummy IPI to serialise against fast_gup */
+	smp_call_function(thp_splitting_flush_sync, NULL, 1);
+}
+#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */