diff mbox series

[1/3] mm: numa: avoid waiting on freed migrated pages

Message ID 1496771916-28203-2-git-send-email-will.deacon@arm.com
State Superseded
Headers show
Series mm: huge pages: Misc fixes for issues found during fuzzing | expand

Commit Message

Will Deacon June 6, 2017, 5:58 p.m. UTC
From: Mark Rutland <mark.rutland@arm.com>


In do_huge_pmd_numa_page(), we attempt to handle a migrating thp pmd by
waiting until the pmd is unlocked before we return and retry. However,
we can race with migrate_misplaced_transhuge_page():

// do_huge_pmd_numa_page                // migrate_misplaced_transhuge_page()
// Holds 0 refs on page                 // Holds 2 refs on page

vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
/* ... */
if (pmd_trans_migrating(*vmf->pmd)) {
        page = pmd_page(*vmf->pmd);
        spin_unlock(vmf->ptl);
                                        ptl = pmd_lock(mm, pmd);
                                        if (page_count(page) != 2)) {
                                                /* roll back */
                                        }
                                        /* ... */
                                        mlock_migrate_page(new_page, page);
                                        /* ... */
                                        spin_unlock(ptl);
                                        put_page(page);
                                        put_page(page); // page freed here
        wait_on_page_locked(page);
        goto out;
}

This can result in the freed page having its waiters flag set
unexpectedly, which trips the PAGE_FLAGS_CHECK_AT_PREP checks in the
page alloc/free functions. This has been observed on arm64 KVM guests.

We can avoid this by having do_huge_pmd_numa_page() take a reference on
the page before dropping the pmd lock, mirroring what we do in
__migration_entry_wait().

When we hit the race, migrate_misplaced_transhuge_page() will see the
reference and abort the migration, as it may do today in other cases.

Acked-by: Steve Capper <steve.capper@arm.com>

Signed-off-by: Mark Rutland <mark.rutland@arm.com>

Signed-off-by: Will Deacon <will.deacon@arm.com>

---
 mm/huge_memory.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

-- 
2.1.4

Comments

Vlastimil Babka June 8, 2017, 9:04 a.m. UTC | #1
On 06/06/2017 07:58 PM, Will Deacon wrote:
> From: Mark Rutland <mark.rutland@arm.com>

> 

> In do_huge_pmd_numa_page(), we attempt to handle a migrating thp pmd by

> waiting until the pmd is unlocked before we return and retry. However,

> we can race with migrate_misplaced_transhuge_page():

> 

> // do_huge_pmd_numa_page                // migrate_misplaced_transhuge_page()

> // Holds 0 refs on page                 // Holds 2 refs on page

> 

> vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);

> /* ... */

> if (pmd_trans_migrating(*vmf->pmd)) {

>         page = pmd_page(*vmf->pmd);

>         spin_unlock(vmf->ptl);

>                                         ptl = pmd_lock(mm, pmd);

>                                         if (page_count(page) != 2)) {

>                                                 /* roll back */

>                                         }

>                                         /* ... */

>                                         mlock_migrate_page(new_page, page);

>                                         /* ... */

>                                         spin_unlock(ptl);

>                                         put_page(page);

>                                         put_page(page); // page freed here

>         wait_on_page_locked(page);

>         goto out;

> }

> 

> This can result in the freed page having its waiters flag set

> unexpectedly, which trips the PAGE_FLAGS_CHECK_AT_PREP checks in the

> page alloc/free functions. This has been observed on arm64 KVM guests.

> 

> We can avoid this by having do_huge_pmd_numa_page() take a reference on

> the page before dropping the pmd lock, mirroring what we do in

> __migration_entry_wait().

> 

> When we hit the race, migrate_misplaced_transhuge_page() will see the

> reference and abort the migration, as it may do today in other cases.

> 

> Acked-by: Steve Capper <steve.capper@arm.com>

> Signed-off-by: Mark Rutland <mark.rutland@arm.com>

> Signed-off-by: Will Deacon <will.deacon@arm.com>


Nice catch! Stable candidate? Fixes: the commit that added waiters flag?
Assuming it was harmless before that?

Acked-by: Vlastimil Babka <vbabka@suse.cz>


> ---

>  mm/huge_memory.c | 8 +++++++-

>  1 file changed, 7 insertions(+), 1 deletion(-)

> 

> diff --git a/mm/huge_memory.c b/mm/huge_memory.c

> index a84909cf20d3..88c6167f194d 100644

> --- a/mm/huge_memory.c

> +++ b/mm/huge_memory.c

> @@ -1426,8 +1426,11 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)

>  	 */

>  	if (unlikely(pmd_trans_migrating(*vmf->pmd))) {

>  		page = pmd_page(*vmf->pmd);

> +		if (!get_page_unless_zero(page))

> +			goto out_unlock;

>  		spin_unlock(vmf->ptl);

>  		wait_on_page_locked(page);

> +		put_page(page);

>  		goto out;

>  	}

>  

> @@ -1459,9 +1462,12 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)

>  

>  	/* Migration could have started since the pmd_trans_migrating check */

>  	if (!page_locked) {

> +		page_nid = -1;

> +		if (!get_page_unless_zero(page))

> +			goto out_unlock;

>  		spin_unlock(vmf->ptl);

>  		wait_on_page_locked(page);

> -		page_nid = -1;

> +		put_page(page);

>  		goto out;

>  	}

>  

>
Kirill A. Shutemov June 8, 2017, 10:27 a.m. UTC | #2
On Tue, Jun 06, 2017 at 06:58:34PM +0100, Will Deacon wrote:
> From: Mark Rutland <mark.rutland@arm.com>

> 

> In do_huge_pmd_numa_page(), we attempt to handle a migrating thp pmd by

> waiting until the pmd is unlocked before we return and retry. However,

> we can race with migrate_misplaced_transhuge_page():

> 

> // do_huge_pmd_numa_page                // migrate_misplaced_transhuge_page()

> // Holds 0 refs on page                 // Holds 2 refs on page

> 

> vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);

> /* ... */

> if (pmd_trans_migrating(*vmf->pmd)) {

>         page = pmd_page(*vmf->pmd);

>         spin_unlock(vmf->ptl);

>                                         ptl = pmd_lock(mm, pmd);

>                                         if (page_count(page) != 2)) {

>                                                 /* roll back */

>                                         }

>                                         /* ... */

>                                         mlock_migrate_page(new_page, page);

>                                         /* ... */

>                                         spin_unlock(ptl);

>                                         put_page(page);

>                                         put_page(page); // page freed here

>         wait_on_page_locked(page);

>         goto out;

> }

> 

> This can result in the freed page having its waiters flag set

> unexpectedly, which trips the PAGE_FLAGS_CHECK_AT_PREP checks in the

> page alloc/free functions. This has been observed on arm64 KVM guests.

> 

> We can avoid this by having do_huge_pmd_numa_page() take a reference on

> the page before dropping the pmd lock, mirroring what we do in

> __migration_entry_wait().

> 

> When we hit the race, migrate_misplaced_transhuge_page() will see the

> reference and abort the migration, as it may do today in other cases.

> 

> Acked-by: Steve Capper <steve.capper@arm.com>

> Signed-off-by: Mark Rutland <mark.rutland@arm.com>

> Signed-off-by: Will Deacon <will.deacon@arm.com>


Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>


-- 
 Kirill A. Shutemov
Mark Rutland June 8, 2017, 10:31 a.m. UTC | #3
On Thu, Jun 08, 2017 at 11:04:05AM +0200, Vlastimil Babka wrote:
> On 06/06/2017 07:58 PM, Will Deacon wrote:

> > From: Mark Rutland <mark.rutland@arm.com>

> > 

> > In do_huge_pmd_numa_page(), we attempt to handle a migrating thp pmd by

> > waiting until the pmd is unlocked before we return and retry. However,

> > we can race with migrate_misplaced_transhuge_page():

> > 

> > // do_huge_pmd_numa_page                // migrate_misplaced_transhuge_page()

> > // Holds 0 refs on page                 // Holds 2 refs on page

> > 

> > vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);

> > /* ... */

> > if (pmd_trans_migrating(*vmf->pmd)) {

> >         page = pmd_page(*vmf->pmd);

> >         spin_unlock(vmf->ptl);

> >                                         ptl = pmd_lock(mm, pmd);

> >                                         if (page_count(page) != 2)) {

> >                                                 /* roll back */

> >                                         }

> >                                         /* ... */

> >                                         mlock_migrate_page(new_page, page);

> >                                         /* ... */

> >                                         spin_unlock(ptl);

> >                                         put_page(page);

> >                                         put_page(page); // page freed here

> >         wait_on_page_locked(page);

> >         goto out;

> > }

> > 

> > This can result in the freed page having its waiters flag set

> > unexpectedly, which trips the PAGE_FLAGS_CHECK_AT_PREP checks in the

> > page alloc/free functions. This has been observed on arm64 KVM guests.

> > 

> > We can avoid this by having do_huge_pmd_numa_page() take a reference on

> > the page before dropping the pmd lock, mirroring what we do in

> > __migration_entry_wait().

> > 

> > When we hit the race, migrate_misplaced_transhuge_page() will see the

> > reference and abort the migration, as it may do today in other cases.

> > 

> > Acked-by: Steve Capper <steve.capper@arm.com>

> > Signed-off-by: Mark Rutland <mark.rutland@arm.com>

> > Signed-off-by: Will Deacon <will.deacon@arm.com>

> 

> Nice catch! Stable candidate?


I think so, given I can hit this in practice.

> Fixes: the commit that added waiters flag?


I think we need:

Fixes: b8916634b77bffb2 ("mm: Prevent parallel splits during THP migration")

... which introduced the potential for the huge page to be freed (and
potentially reallocated) before we wait on it. The waiters flag issue is
a result of this, rather than the underlying issue.

> Assuming it was harmless before that?


I'm not entirely sure. I suspect that there are other issues that might
result, e.g. if the page were reallocated before we wait on it.

> Acked-by: Vlastimil Babka <vbabka@suse.cz>


Cheers!

Mark.

> > ---

> >  mm/huge_memory.c | 8 +++++++-

> >  1 file changed, 7 insertions(+), 1 deletion(-)

> > 

> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c

> > index a84909cf20d3..88c6167f194d 100644

> > --- a/mm/huge_memory.c

> > +++ b/mm/huge_memory.c

> > @@ -1426,8 +1426,11 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)

> >  	 */

> >  	if (unlikely(pmd_trans_migrating(*vmf->pmd))) {

> >  		page = pmd_page(*vmf->pmd);

> > +		if (!get_page_unless_zero(page))

> > +			goto out_unlock;

> >  		spin_unlock(vmf->ptl);

> >  		wait_on_page_locked(page);

> > +		put_page(page);

> >  		goto out;

> >  	}

> >  

> > @@ -1459,9 +1462,12 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)

> >  

> >  	/* Migration could have started since the pmd_trans_migrating check */

> >  	if (!page_locked) {

> > +		page_nid = -1;

> > +		if (!get_page_unless_zero(page))

> > +			goto out_unlock;

> >  		spin_unlock(vmf->ptl);

> >  		wait_on_page_locked(page);

> > -		page_nid = -1;

> > +		put_page(page);

> >  		goto out;

> >  	}

> >  

> > 

>
diff mbox series

Patch

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index a84909cf20d3..88c6167f194d 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1426,8 +1426,11 @@  int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 	 */
 	if (unlikely(pmd_trans_migrating(*vmf->pmd))) {
 		page = pmd_page(*vmf->pmd);
+		if (!get_page_unless_zero(page))
+			goto out_unlock;
 		spin_unlock(vmf->ptl);
 		wait_on_page_locked(page);
+		put_page(page);
 		goto out;
 	}
 
@@ -1459,9 +1462,12 @@  int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
 
 	/* Migration could have started since the pmd_trans_migrating check */
 	if (!page_locked) {
+		page_nid = -1;
+		if (!get_page_unless_zero(page))
+			goto out_unlock;
 		spin_unlock(vmf->ptl);
 		wait_on_page_locked(page);
-		page_nid = -1;
+		put_page(page);
 		goto out;
 	}