Message ID | 20220913195508.3511038-2-opendmb@gmail.com |
---|---|
State | New |
Headers | show |
Series | [01/21] mm/page_isolation: protect cma from isolate_single_pageblock | expand |
On 13 Sep 2022, at 15:54, Doug Berger wrote: > The function set_migratetype_isolate() has special handling for > pageblocks of MIGRATE_CMA type that protects them from being > isolated for MIGRATE_MOVABLE requests. > > Since isolate_single_pageblock() doesn't receive the migratetype > argument of start_isolate_page_range() it used the migratetype > of the pageblock instead of the requested migratetype which > defeats this MIGRATE_CMA check. > > This allows an attempt to create a gigantic page within a CMA > region to change the migratetype of the first and last pageblocks > from MIGRATE_CMA to MIGRATE_MOVABLE when they are restored after > failure, which corrupts the CMA region. > > The calls to (un)set_migratetype_isolate() for the first and last > pageblocks of the start_isolate_page_range() are moved back into > that function to allow access to its migratetype argument and make > it easier to see how all of the pageblocks in the range are > isolated. > > Fixes: b2c9e2fbba32 ("mm: make alloc_contig_range work at pageblock granularity") > Signed-off-by: Doug Berger <opendmb@gmail.com> > --- > mm/page_isolation.c | 75 +++++++++++++++++++++------------------------ > 1 file changed, 35 insertions(+), 40 deletions(-) Thanks for the fix. Why not just pass migratetype into isolate_single_pageblock() and use it when set_migratetype_isolate() is used? That would have much fewer changes. What is the reason of pulling skip isolation logic out? Ultimately, I would like to make MIGRATE_ISOLATE a separate bit, so that migratetype will not be overwritten during page isolation. Then, set_migratetype_isolate() and start_isolate_page_range() will not have migratetype to set in error recovery any more. That is on my TODO. > > diff --git a/mm/page_isolation.c b/mm/page_isolation.c > index 9d73dc38e3d7..8e16aa22cb61 100644 > --- a/mm/page_isolation.c > +++ b/mm/page_isolation.c > @@ -286,8 +286,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) > * @flags: isolation flags > * @gfp_flags: GFP flags used for migrating pages > * @isolate_before: isolate the pageblock before the boundary_pfn > - * @skip_isolation: the flag to skip the pageblock isolation in second > - * isolate_single_pageblock() > * > * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one > * pageblock. When not all pageblocks within a page are isolated at the same > @@ -302,9 +300,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) > * the in-use page then splitting the free page. > */ > static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, > - gfp_t gfp_flags, bool isolate_before, bool skip_isolation) > + gfp_t gfp_flags, bool isolate_before) > { > - unsigned char saved_mt; > unsigned long start_pfn; > unsigned long isolate_pageblock; > unsigned long pfn; > @@ -328,18 +325,6 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, > start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), > zone->zone_start_pfn); > > - saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); > - > - if (skip_isolation) > - VM_BUG_ON(!is_migrate_isolate(saved_mt)); > - else { > - ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags, > - isolate_pageblock, isolate_pageblock + pageblock_nr_pages); > - > - if (ret) > - return ret; > - } > - > /* > * Bail out early when the to-be-isolated pageblock does not form > * a free or in-use page across boundary_pfn: > @@ -428,7 +413,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, > ret = set_migratetype_isolate(page, page_mt, > flags, head_pfn, head_pfn + nr_pages); > if (ret) > - goto failed; > + return ret; > } > > ret = __alloc_contig_migrate_range(&cc, head_pfn, > @@ -443,7 +428,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, > unset_migratetype_isolate(page, page_mt); > > if (ret) > - goto failed; > + return -EBUSY; > /* > * reset pfn to the head of the free page, so > * that the free page handling code above can split > @@ -459,24 +444,19 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, > while (!PageBuddy(pfn_to_page(outer_pfn))) { > /* stop if we cannot find the free page */ > if (++order >= MAX_ORDER) > - goto failed; > + return -EBUSY; > outer_pfn &= ~0UL << order; > } > pfn = outer_pfn; > continue; > } else > #endif > - goto failed; > + return -EBUSY; > } > > pfn++; > } > return 0; > -failed: > - /* restore the original migratetype */ > - if (!skip_isolation) > - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt); > - return -EBUSY; > } > > /** > @@ -534,21 +514,30 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages); > unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages); > int ret; > - bool skip_isolation = false; > > /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */ > - ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation); > + ret = set_migratetype_isolate(pfn_to_page(isolate_start), migratetype, > + flags, isolate_start, isolate_start + pageblock_nr_pages); > if (ret) > return ret; > - > - if (isolate_start == isolate_end - pageblock_nr_pages) > - skip_isolation = true; > + ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false); > + if (ret) > + goto unset_start_block; > > /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */ > - ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation); > + pfn = isolate_end - pageblock_nr_pages; > + if (isolate_start != pfn) { > + ret = set_migratetype_isolate(pfn_to_page(pfn), migratetype, > + flags, pfn, pfn + pageblock_nr_pages); > + if (ret) > + goto unset_start_block; > + } > + ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true); > if (ret) { > - unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); > - return ret; > + if (isolate_start != pfn) > + goto unset_end_block; > + else > + goto unset_start_block; > } > > /* skip isolated pageblocks at the beginning and end */ > @@ -557,15 +546,21 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > pfn += pageblock_nr_pages) { > page = __first_valid_page(pfn, pageblock_nr_pages); > if (page && set_migratetype_isolate(page, migratetype, flags, > - start_pfn, end_pfn)) { > - undo_isolate_page_range(isolate_start, pfn, migratetype); > - unset_migratetype_isolate( > - pfn_to_page(isolate_end - pageblock_nr_pages), > - migratetype); > - return -EBUSY; > - } > + start_pfn, end_pfn)) > + goto unset_isolated_blocks; > } > return 0; > + > +unset_isolated_blocks: > + ret = -EBUSY; > + undo_isolate_page_range(isolate_start + pageblock_nr_pages, pfn, > + migratetype); > +unset_end_block: > + unset_migratetype_isolate(pfn_to_page(isolate_end - pageblock_nr_pages), > + migratetype); > +unset_start_block: > + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); > + return ret; > } > > /* > -- > 2.25.1 -- Best Regards, Yan, Zi
On 9/13/2022 6:09 PM, Zi Yan wrote: > On 13 Sep 2022, at 20:59, Doug Berger wrote: > >> On 9/13/2022 5:02 PM, Zi Yan wrote: >>> On 13 Sep 2022, at 15:54, Doug Berger wrote: >>> >>>> The function set_migratetype_isolate() has special handling for >>>> pageblocks of MIGRATE_CMA type that protects them from being >>>> isolated for MIGRATE_MOVABLE requests. >>>> >>>> Since isolate_single_pageblock() doesn't receive the migratetype >>>> argument of start_isolate_page_range() it used the migratetype >>>> of the pageblock instead of the requested migratetype which >>>> defeats this MIGRATE_CMA check. >>>> >>>> This allows an attempt to create a gigantic page within a CMA >>>> region to change the migratetype of the first and last pageblocks >>>> from MIGRATE_CMA to MIGRATE_MOVABLE when they are restored after >>>> failure, which corrupts the CMA region. >>>> >>>> The calls to (un)set_migratetype_isolate() for the first and last >>>> pageblocks of the start_isolate_page_range() are moved back into >>>> that function to allow access to its migratetype argument and make >>>> it easier to see how all of the pageblocks in the range are >>>> isolated. >>>> >>>> Fixes: b2c9e2fbba32 ("mm: make alloc_contig_range work at pageblock granularity") >>>> Signed-off-by: Doug Berger <opendmb@gmail.com> >>>> --- >>>> mm/page_isolation.c | 75 +++++++++++++++++++++------------------------ >>>> 1 file changed, 35 insertions(+), 40 deletions(-) >>> >>> Thanks for the fix. >> Thanks for the review. >> >>> >>> Why not just pass migratetype into isolate_single_pageblock() and use >>> it when set_migratetype_isolate() is used? That would have much >>> fewer changes. What is the reason of pulling skip isolation logic out? >> I found the skip_isolation logic confusing and thought that setting and restoring the migratetype within the same function and consolidating the error recovery paths also within that function was easier to understand and less prone to accidental breakage. >> >> In particular, setting MIGRATE_ISOLATE in isolate_single_pageblock() and having to remember to unset it in start_isolate_page_range() differently on different error paths was troublesome for me. > > Wouldn't this work as well? > > diff --git a/mm/page_isolation.c b/mm/page_isolation.c > index c1307d1bea81..a312cabd0d95 100644 > --- a/mm/page_isolation.c > +++ b/mm/page_isolation.c > @@ -288,6 +288,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) > * @isolate_before: isolate the pageblock before the boundary_pfn > * @skip_isolation: the flag to skip the pageblock isolation in second > * isolate_single_pageblock() > + * @migratetype: Migrate type to set in error recovery. > * > * Free and in-use pages can be as big as MAX_ORDER and contain more than one > * pageblock. When not all pageblocks within a page are isolated at the same > @@ -302,9 +303,9 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) > * the in-use page then splitting the free page. > */ > static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, > - gfp_t gfp_flags, bool isolate_before, bool skip_isolation) > + gfp_t gfp_flags, bool isolate_before, bool skip_isolation, > + int migratetype) > { > - unsigned char saved_mt; > unsigned long start_pfn; > unsigned long isolate_pageblock; > unsigned long pfn; > @@ -328,12 +329,10 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, > start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), > zone->zone_start_pfn); > > - saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); > - > if (skip_isolation) > - VM_BUG_ON(!is_migrate_isolate(saved_mt)); > + VM_BUG_ON(!is_migrate_isolate(get_pageblock_migratetype(pfn_to_page(isolate_pageblock)))); > else { > - ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags, > + ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype, flags, > isolate_pageblock, isolate_pageblock + pageblock_nr_pages); > > if (ret) > @@ -475,7 +474,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, > failed: > /* restore the original migratetype */ > if (!skip_isolation) > - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt); > + unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype); > return -EBUSY; > } > > @@ -537,7 +536,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > bool skip_isolation = false; > > /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */ > - ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation); > + ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, > + skip_isolation, migratetype); > if (ret) > return ret; > > @@ -545,7 +545,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, > skip_isolation = true; > > /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */ > - ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation); > + ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, > + skip_isolation, migratetype); > if (ret) { > unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); > return ret; > I would expect this to work as well, but it is not my preference. >> >> It could certainly be done differently, but this was my preference. > > A smaller patch can make review easier, right? It certainly can. Especially when it is for code that you are familiar with ;). I am happy to have you submit a patch to fix this issue and submit it to stable for backporting. Fixing the issue is what's important to me. > >>> >>> Ultimately, I would like to make MIGRATE_ISOLATE a separate bit, >>> so that migratetype will not be overwritten during page isolation. >>> Then, set_migratetype_isolate() and start_isolate_page_range() >>> will not have migratetype to set in error recovery any more. >>> That is on my TODO. >>> >>>> >>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c >>>> index 9d73dc38e3d7..8e16aa22cb61 100644 >>>> --- a/mm/page_isolation.c >>>> +++ b/mm/page_isolation.c >>>> @@ -286,8 +286,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) >>>> * @flags: isolation flags >>>> * @gfp_flags: GFP flags used for migrating pages >>>> * @isolate_before: isolate the pageblock before the boundary_pfn >>>> - * @skip_isolation: the flag to skip the pageblock isolation in second >>>> - * isolate_single_pageblock() >>>> * >>>> * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one >>>> * pageblock. When not all pageblocks within a page are isolated at the same >>>> @@ -302,9 +300,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) >>>> * the in-use page then splitting the free page. >>>> */ >>>> static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>> - gfp_t gfp_flags, bool isolate_before, bool skip_isolation) >>>> + gfp_t gfp_flags, bool isolate_before) >>>> { >>>> - unsigned char saved_mt; >>>> unsigned long start_pfn; >>>> unsigned long isolate_pageblock; >>>> unsigned long pfn; >>>> @@ -328,18 +325,6 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>> start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), >>>> zone->zone_start_pfn); >>>> >>>> - saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); >>>> - >>>> - if (skip_isolation) >>>> - VM_BUG_ON(!is_migrate_isolate(saved_mt)); >>>> - else { >>>> - ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags, >>>> - isolate_pageblock, isolate_pageblock + pageblock_nr_pages); >>>> - >>>> - if (ret) >>>> - return ret; >>>> - } >>>> - >>>> /* >>>> * Bail out early when the to-be-isolated pageblock does not form >>>> * a free or in-use page across boundary_pfn: >>>> @@ -428,7 +413,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>> ret = set_migratetype_isolate(page, page_mt, >>>> flags, head_pfn, head_pfn + nr_pages); >>>> if (ret) >>>> - goto failed; >>>> + return ret; >>>> } >>>> >>>> ret = __alloc_contig_migrate_range(&cc, head_pfn, >>>> @@ -443,7 +428,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>> unset_migratetype_isolate(page, page_mt); >>>> >>>> if (ret) >>>> - goto failed; >>>> + return -EBUSY; >>>> /* >>>> * reset pfn to the head of the free page, so >>>> * that the free page handling code above can split >>>> @@ -459,24 +444,19 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>> while (!PageBuddy(pfn_to_page(outer_pfn))) { >>>> /* stop if we cannot find the free page */ >>>> if (++order >= MAX_ORDER) >>>> - goto failed; >>>> + return -EBUSY; >>>> outer_pfn &= ~0UL << order; >>>> } >>>> pfn = outer_pfn; >>>> continue; >>>> } else >>>> #endif >>>> - goto failed; >>>> + return -EBUSY; >>>> } >>>> >>>> pfn++; >>>> } >>>> return 0; >>>> -failed: >>>> - /* restore the original migratetype */ >>>> - if (!skip_isolation) >>>> - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt); >>>> - return -EBUSY; >>>> } >>>> >>>> /** >>>> @@ -534,21 +514,30 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, >>>> unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages); >>>> unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages); >>>> int ret; >>>> - bool skip_isolation = false; >>>> >>>> /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */ >>>> - ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation); >>>> + ret = set_migratetype_isolate(pfn_to_page(isolate_start), migratetype, >>>> + flags, isolate_start, isolate_start + pageblock_nr_pages); >>>> if (ret) >>>> return ret; >>>> - >>>> - if (isolate_start == isolate_end - pageblock_nr_pages) >>>> - skip_isolation = true; >>>> + ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false); >>>> + if (ret) >>>> + goto unset_start_block; >>>> >>>> /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */ >>>> - ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation); >>>> + pfn = isolate_end - pageblock_nr_pages; >>>> + if (isolate_start != pfn) { >>>> + ret = set_migratetype_isolate(pfn_to_page(pfn), migratetype, >>>> + flags, pfn, pfn + pageblock_nr_pages); >>>> + if (ret) >>>> + goto unset_start_block; >>>> + } >>>> + ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true); >>>> if (ret) { >>>> - unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); >>>> - return ret; >>>> + if (isolate_start != pfn) >>>> + goto unset_end_block; >>>> + else >>>> + goto unset_start_block; >>>> } >>>> >>>> /* skip isolated pageblocks at the beginning and end */ >>>> @@ -557,15 +546,21 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, >>>> pfn += pageblock_nr_pages) { >>>> page = __first_valid_page(pfn, pageblock_nr_pages); >>>> if (page && set_migratetype_isolate(page, migratetype, flags, >>>> - start_pfn, end_pfn)) { >>>> - undo_isolate_page_range(isolate_start, pfn, migratetype); >>>> - unset_migratetype_isolate( >>>> - pfn_to_page(isolate_end - pageblock_nr_pages), >>>> - migratetype); >>>> - return -EBUSY; >>>> - } >>>> + start_pfn, end_pfn)) >>>> + goto unset_isolated_blocks; >>>> } >>>> return 0; >>>> + >>>> +unset_isolated_blocks: >>>> + ret = -EBUSY; >>>> + undo_isolate_page_range(isolate_start + pageblock_nr_pages, pfn, >>>> + migratetype); >>>> +unset_end_block: >>>> + unset_migratetype_isolate(pfn_to_page(isolate_end - pageblock_nr_pages), >>>> + migratetype); >>>> +unset_start_block: >>>> + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); >>>> + return ret; >>>> } >>>> >>>> /* >>>> -- >>>> 2.25.1 >>> >>> >>> -- >>> Best Regards, >>> Yan, Zi > > > -- > Best Regards, > Yan, Zi Thanks for your efforts to get alloc_contig_range to work at pageblock granularity! -Doug
On 9/13/2022 6:53 PM, Zi Yan wrote: > On 13 Sep 2022, at 21:47, Doug Berger wrote: > >> On 9/13/2022 6:09 PM, Zi Yan wrote: >>> On 13 Sep 2022, at 20:59, Doug Berger wrote: >>> >>>> On 9/13/2022 5:02 PM, Zi Yan wrote: >>>>> On 13 Sep 2022, at 15:54, Doug Berger wrote: >>>>> >>>>>> The function set_migratetype_isolate() has special handling for >>>>>> pageblocks of MIGRATE_CMA type that protects them from being >>>>>> isolated for MIGRATE_MOVABLE requests. >>>>>> >>>>>> Since isolate_single_pageblock() doesn't receive the migratetype >>>>>> argument of start_isolate_page_range() it used the migratetype >>>>>> of the pageblock instead of the requested migratetype which >>>>>> defeats this MIGRATE_CMA check. >>>>>> >>>>>> This allows an attempt to create a gigantic page within a CMA >>>>>> region to change the migratetype of the first and last pageblocks >>>>>> from MIGRATE_CMA to MIGRATE_MOVABLE when they are restored after >>>>>> failure, which corrupts the CMA region. >>>>>> >>>>>> The calls to (un)set_migratetype_isolate() for the first and last >>>>>> pageblocks of the start_isolate_page_range() are moved back into >>>>>> that function to allow access to its migratetype argument and make >>>>>> it easier to see how all of the pageblocks in the range are >>>>>> isolated. >>>>>> >>>>>> Fixes: b2c9e2fbba32 ("mm: make alloc_contig_range work at pageblock granularity") >>>>>> Signed-off-by: Doug Berger <opendmb@gmail.com> >>>>>> --- >>>>>> mm/page_isolation.c | 75 +++++++++++++++++++++------------------------ >>>>>> 1 file changed, 35 insertions(+), 40 deletions(-) >>>>> >>>>> Thanks for the fix. >>>> Thanks for the review. >>>> >>>>> >>>>> Why not just pass migratetype into isolate_single_pageblock() and use >>>>> it when set_migratetype_isolate() is used? That would have much >>>>> fewer changes. What is the reason of pulling skip isolation logic out? >>>> I found the skip_isolation logic confusing and thought that setting and restoring the migratetype within the same function and consolidating the error recovery paths also within that function was easier to understand and less prone to accidental breakage. >>>> >>>> In particular, setting MIGRATE_ISOLATE in isolate_single_pageblock() and having to remember to unset it in start_isolate_page_range() differently on different error paths was troublesome for me. >>> >>> Wouldn't this work as well? >>> >>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c >>> index c1307d1bea81..a312cabd0d95 100644 >>> --- a/mm/page_isolation.c >>> +++ b/mm/page_isolation.c >>> @@ -288,6 +288,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) >>> * @isolate_before: isolate the pageblock before the boundary_pfn >>> * @skip_isolation: the flag to skip the pageblock isolation in second >>> * isolate_single_pageblock() >>> + * @migratetype: Migrate type to set in error recovery. >>> * >>> * Free and in-use pages can be as big as MAX_ORDER and contain more than one >>> * pageblock. When not all pageblocks within a page are isolated at the same >>> @@ -302,9 +303,9 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) >>> * the in-use page then splitting the free page. >>> */ >>> static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>> - gfp_t gfp_flags, bool isolate_before, bool skip_isolation) >>> + gfp_t gfp_flags, bool isolate_before, bool skip_isolation, >>> + int migratetype) >>> { >>> - unsigned char saved_mt; >>> unsigned long start_pfn; >>> unsigned long isolate_pageblock; >>> unsigned long pfn; >>> @@ -328,12 +329,10 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>> start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), >>> zone->zone_start_pfn); >>> >>> - saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); >>> - >>> if (skip_isolation) >>> - VM_BUG_ON(!is_migrate_isolate(saved_mt)); >>> + VM_BUG_ON(!is_migrate_isolate(get_pageblock_migratetype(pfn_to_page(isolate_pageblock)))); >>> else { >>> - ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags, >>> + ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype, flags, >>> isolate_pageblock, isolate_pageblock + pageblock_nr_pages); >>> >>> if (ret) >>> @@ -475,7 +474,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>> failed: >>> /* restore the original migratetype */ >>> if (!skip_isolation) >>> - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt); >>> + unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype); >>> return -EBUSY; >>> } >>> >>> @@ -537,7 +536,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, >>> bool skip_isolation = false; >>> >>> /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */ >>> - ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation); >>> + ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, >>> + skip_isolation, migratetype); >>> if (ret) >>> return ret; >>> >>> @@ -545,7 +545,8 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, >>> skip_isolation = true; >>> >>> /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */ >>> - ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation); >>> + ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, >>> + skip_isolation, migratetype); >>> if (ret) { >>> unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); >>> return ret; >>> >> I would expect this to work as well, but it is not my preference. >> >>>> >>>> It could certainly be done differently, but this was my preference. >>> >>> A smaller patch can make review easier, right? >> It certainly can. Especially when it is for code that you are familiar with ;). >> >> I am happy to have you submit a patch to fix this issue and submit it to stable for backporting. Fixing the issue is what's important to me. >> > > I can submit the above as a patch. Is there a visible userspace issue, so that we need to > backport it? Thanks. I did not observe symptoms of the issue, but I did observe the issue when allocating gigantic huge pages as part of the hugetlbfs on a system with CMA regions. My best guess is that it probably does not create a "functional" problem since the error would likely be cancelled out by subsequent CMA allocations restoring the pageblock migratetype. However, in the meantime the page allocator would handle free pages in those pageblocks without the MIGRATE_CMA qualifications which might impact driver performance. There might be other problems of which I am unaware. The issue currently only exists in the wild in v5.19, so it would be nice to get it backported there to nip it in the bud. > >>> >>>>> >>>>> Ultimately, I would like to make MIGRATE_ISOLATE a separate bit, >>>>> so that migratetype will not be overwritten during page isolation. >>>>> Then, set_migratetype_isolate() and start_isolate_page_range() >>>>> will not have migratetype to set in error recovery any more. >>>>> That is on my TODO. >>>>> >>>>>> >>>>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c >>>>>> index 9d73dc38e3d7..8e16aa22cb61 100644 >>>>>> --- a/mm/page_isolation.c >>>>>> +++ b/mm/page_isolation.c >>>>>> @@ -286,8 +286,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) >>>>>> * @flags: isolation flags >>>>>> * @gfp_flags: GFP flags used for migrating pages >>>>>> * @isolate_before: isolate the pageblock before the boundary_pfn >>>>>> - * @skip_isolation: the flag to skip the pageblock isolation in second >>>>>> - * isolate_single_pageblock() >>>>>> * >>>>>> * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one >>>>>> * pageblock. When not all pageblocks within a page are isolated at the same >>>>>> @@ -302,9 +300,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) >>>>>> * the in-use page then splitting the free page. >>>>>> */ >>>>>> static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>>>> - gfp_t gfp_flags, bool isolate_before, bool skip_isolation) >>>>>> + gfp_t gfp_flags, bool isolate_before) >>>>>> { >>>>>> - unsigned char saved_mt; >>>>>> unsigned long start_pfn; >>>>>> unsigned long isolate_pageblock; >>>>>> unsigned long pfn; >>>>>> @@ -328,18 +325,6 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>>>> start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), >>>>>> zone->zone_start_pfn); >>>>>> >>>>>> - saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); >>>>>> - >>>>>> - if (skip_isolation) >>>>>> - VM_BUG_ON(!is_migrate_isolate(saved_mt)); >>>>>> - else { >>>>>> - ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags, >>>>>> - isolate_pageblock, isolate_pageblock + pageblock_nr_pages); >>>>>> - >>>>>> - if (ret) >>>>>> - return ret; >>>>>> - } >>>>>> - >>>>>> /* >>>>>> * Bail out early when the to-be-isolated pageblock does not form >>>>>> * a free or in-use page across boundary_pfn: >>>>>> @@ -428,7 +413,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>>>> ret = set_migratetype_isolate(page, page_mt, >>>>>> flags, head_pfn, head_pfn + nr_pages); >>>>>> if (ret) >>>>>> - goto failed; >>>>>> + return ret; >>>>>> } >>>>>> >>>>>> ret = __alloc_contig_migrate_range(&cc, head_pfn, >>>>>> @@ -443,7 +428,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>>>> unset_migratetype_isolate(page, page_mt); >>>>>> >>>>>> if (ret) >>>>>> - goto failed; >>>>>> + return -EBUSY; >>>>>> /* >>>>>> * reset pfn to the head of the free page, so >>>>>> * that the free page handling code above can split >>>>>> @@ -459,24 +444,19 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, >>>>>> while (!PageBuddy(pfn_to_page(outer_pfn))) { >>>>>> /* stop if we cannot find the free page */ >>>>>> if (++order >= MAX_ORDER) >>>>>> - goto failed; >>>>>> + return -EBUSY; >>>>>> outer_pfn &= ~0UL << order; >>>>>> } >>>>>> pfn = outer_pfn; >>>>>> continue; >>>>>> } else >>>>>> #endif >>>>>> - goto failed; >>>>>> + return -EBUSY; >>>>>> } >>>>>> >>>>>> pfn++; >>>>>> } >>>>>> return 0; >>>>>> -failed: >>>>>> - /* restore the original migratetype */ >>>>>> - if (!skip_isolation) >>>>>> - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt); >>>>>> - return -EBUSY; >>>>>> } >>>>>> >>>>>> /** >>>>>> @@ -534,21 +514,30 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, >>>>>> unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages); >>>>>> unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages); >>>>>> int ret; >>>>>> - bool skip_isolation = false; >>>>>> >>>>>> /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */ >>>>>> - ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation); >>>>>> + ret = set_migratetype_isolate(pfn_to_page(isolate_start), migratetype, >>>>>> + flags, isolate_start, isolate_start + pageblock_nr_pages); >>>>>> if (ret) >>>>>> return ret; >>>>>> - >>>>>> - if (isolate_start == isolate_end - pageblock_nr_pages) >>>>>> - skip_isolation = true; >>>>>> + ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false); >>>>>> + if (ret) >>>>>> + goto unset_start_block; >>>>>> >>>>>> /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */ >>>>>> - ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation); >>>>>> + pfn = isolate_end - pageblock_nr_pages; >>>>>> + if (isolate_start != pfn) { >>>>>> + ret = set_migratetype_isolate(pfn_to_page(pfn), migratetype, >>>>>> + flags, pfn, pfn + pageblock_nr_pages); >>>>>> + if (ret) >>>>>> + goto unset_start_block; >>>>>> + } >>>>>> + ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true); >>>>>> if (ret) { >>>>>> - unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); >>>>>> - return ret; >>>>>> + if (isolate_start != pfn) >>>>>> + goto unset_end_block; >>>>>> + else >>>>>> + goto unset_start_block; >>>>>> } >>>>>> >>>>>> /* skip isolated pageblocks at the beginning and end */ >>>>>> @@ -557,15 +546,21 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, >>>>>> pfn += pageblock_nr_pages) { >>>>>> page = __first_valid_page(pfn, pageblock_nr_pages); >>>>>> if (page && set_migratetype_isolate(page, migratetype, flags, >>>>>> - start_pfn, end_pfn)) { >>>>>> - undo_isolate_page_range(isolate_start, pfn, migratetype); >>>>>> - unset_migratetype_isolate( >>>>>> - pfn_to_page(isolate_end - pageblock_nr_pages), >>>>>> - migratetype); >>>>>> - return -EBUSY; >>>>>> - } >>>>>> + start_pfn, end_pfn)) >>>>>> + goto unset_isolated_blocks; >>>>>> } >>>>>> return 0; >>>>>> + >>>>>> +unset_isolated_blocks: >>>>>> + ret = -EBUSY; >>>>>> + undo_isolate_page_range(isolate_start + pageblock_nr_pages, pfn, >>>>>> + migratetype); >>>>>> +unset_end_block: >>>>>> + unset_migratetype_isolate(pfn_to_page(isolate_end - pageblock_nr_pages), >>>>>> + migratetype); >>>>>> +unset_start_block: >>>>>> + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); >>>>>> + return ret; >>>>>> } >>>>>> >>>>>> /* >>>>>> -- >>>>>> 2.25.1 >>>>> >>>>> >>>>> -- >>>>> Best Regards, >>>>> Yan, Zi >>> >>> >>> -- >>> Best Regards, >>> Yan, Zi >> Thanks for your efforts to get alloc_contig_range to work at pageblock granularity! >> -Doug > > > -- > Best Regards, > Yan, Zi Thanks, -Doug
Hi Doug,
I love your patch! Perhaps something to improve:
[auto build test WARNING on robh/for-next]
[also build test WARNING on linus/master v6.0-rc5]
[cannot apply to akpm-mm/mm-everything next-20220915]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Doug-Berger/mm-introduce-Designated-Movable-Blocks/20220914-040216
base: https://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git for-next
config: x86_64-randconfig-a012 (https://download.01.org/0day-ci/archive/20220916/202209161112.0TpDtDXi-lkp@intel.com/config)
compiler: clang version 14.0.6 (https://github.com/llvm/llvm-project f28c006a5895fc0e329fe15fead81e37457cb1d1)
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/intel-lab-lkp/linux/commit/10d000298e8a6b50a40ccc90d0d638105255f6e2
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review Doug-Berger/mm-introduce-Designated-Movable-Blocks/20220914-040216
git checkout 10d000298e8a6b50a40ccc90d0d638105255f6e2
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross W=1 O=build_dir ARCH=x86_64 SHELL=/bin/bash
If you fix the issue, kindly add following tag where applicable
Reported-by: kernel test robot <lkp@intel.com>
All warnings (new ones prefixed by >>):
>> mm/page_isolation.c:309:6: warning: unused variable 'ret' [-Wunused-variable]
int ret;
^
1 warning generated.
vim +/ret +309 mm/page_isolation.c
a5d76b54a3f3a4 KAMEZAWA Hiroyuki 2007-10-16 281
b2c9e2fbba3253 Zi Yan 2022-05-12 282 /**
b2c9e2fbba3253 Zi Yan 2022-05-12 283 * isolate_single_pageblock() -- tries to isolate a pageblock that might be
b2c9e2fbba3253 Zi Yan 2022-05-12 284 * within a free or in-use page.
b2c9e2fbba3253 Zi Yan 2022-05-12 285 * @boundary_pfn: pageblock-aligned pfn that a page might cross
88ee134320b831 Zi Yan 2022-05-24 286 * @flags: isolation flags
b2c9e2fbba3253 Zi Yan 2022-05-12 287 * @gfp_flags: GFP flags used for migrating pages
b2c9e2fbba3253 Zi Yan 2022-05-12 288 * @isolate_before: isolate the pageblock before the boundary_pfn
b2c9e2fbba3253 Zi Yan 2022-05-12 289 *
b2c9e2fbba3253 Zi Yan 2022-05-12 290 * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
b2c9e2fbba3253 Zi Yan 2022-05-12 291 * pageblock. When not all pageblocks within a page are isolated at the same
b2c9e2fbba3253 Zi Yan 2022-05-12 292 * time, free page accounting can go wrong. For example, in the case of
b2c9e2fbba3253 Zi Yan 2022-05-12 293 * MAX_ORDER-1 = pageblock_order + 1, a MAX_ORDER-1 page has two pagelbocks.
b2c9e2fbba3253 Zi Yan 2022-05-12 294 * [ MAX_ORDER-1 ]
b2c9e2fbba3253 Zi Yan 2022-05-12 295 * [ pageblock0 | pageblock1 ]
b2c9e2fbba3253 Zi Yan 2022-05-12 296 * When either pageblock is isolated, if it is a free page, the page is not
b2c9e2fbba3253 Zi Yan 2022-05-12 297 * split into separate migratetype lists, which is supposed to; if it is an
b2c9e2fbba3253 Zi Yan 2022-05-12 298 * in-use page and freed later, __free_one_page() does not split the free page
b2c9e2fbba3253 Zi Yan 2022-05-12 299 * either. The function handles this by splitting the free page or migrating
b2c9e2fbba3253 Zi Yan 2022-05-12 300 * the in-use page then splitting the free page.
b2c9e2fbba3253 Zi Yan 2022-05-12 301 */
88ee134320b831 Zi Yan 2022-05-24 302 static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
10d000298e8a6b Doug Berger 2022-09-13 303 gfp_t gfp_flags, bool isolate_before)
b2c9e2fbba3253 Zi Yan 2022-05-12 304 {
b2c9e2fbba3253 Zi Yan 2022-05-12 305 unsigned long start_pfn;
b2c9e2fbba3253 Zi Yan 2022-05-12 306 unsigned long isolate_pageblock;
b2c9e2fbba3253 Zi Yan 2022-05-12 307 unsigned long pfn;
b2c9e2fbba3253 Zi Yan 2022-05-12 308 struct zone *zone;
88ee134320b831 Zi Yan 2022-05-24 @309 int ret;
b2c9e2fbba3253 Zi Yan 2022-05-12 310
b2c9e2fbba3253 Zi Yan 2022-05-12 311 VM_BUG_ON(!IS_ALIGNED(boundary_pfn, pageblock_nr_pages));
b2c9e2fbba3253 Zi Yan 2022-05-12 312
b2c9e2fbba3253 Zi Yan 2022-05-12 313 if (isolate_before)
b2c9e2fbba3253 Zi Yan 2022-05-12 314 isolate_pageblock = boundary_pfn - pageblock_nr_pages;
b2c9e2fbba3253 Zi Yan 2022-05-12 315 else
b2c9e2fbba3253 Zi Yan 2022-05-12 316 isolate_pageblock = boundary_pfn;
b2c9e2fbba3253 Zi Yan 2022-05-12 317
b2c9e2fbba3253 Zi Yan 2022-05-12 318 /*
b2c9e2fbba3253 Zi Yan 2022-05-12 319 * scan at the beginning of MAX_ORDER_NR_PAGES aligned range to avoid
b2c9e2fbba3253 Zi Yan 2022-05-12 320 * only isolating a subset of pageblocks from a bigger than pageblock
b2c9e2fbba3253 Zi Yan 2022-05-12 321 * free or in-use page. Also make sure all to-be-isolated pageblocks
b2c9e2fbba3253 Zi Yan 2022-05-12 322 * are within the same zone.
b2c9e2fbba3253 Zi Yan 2022-05-12 323 */
b2c9e2fbba3253 Zi Yan 2022-05-12 324 zone = page_zone(pfn_to_page(isolate_pageblock));
b2c9e2fbba3253 Zi Yan 2022-05-12 325 start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES),
b2c9e2fbba3253 Zi Yan 2022-05-12 326 zone->zone_start_pfn);
b2c9e2fbba3253 Zi Yan 2022-05-12 327
b2c9e2fbba3253 Zi Yan 2022-05-12 328 /*
b2c9e2fbba3253 Zi Yan 2022-05-12 329 * Bail out early when the to-be-isolated pageblock does not form
b2c9e2fbba3253 Zi Yan 2022-05-12 330 * a free or in-use page across boundary_pfn:
b2c9e2fbba3253 Zi Yan 2022-05-12 331 *
b2c9e2fbba3253 Zi Yan 2022-05-12 332 * 1. isolate before boundary_pfn: the page after is not online
b2c9e2fbba3253 Zi Yan 2022-05-12 333 * 2. isolate after boundary_pfn: the page before is not online
b2c9e2fbba3253 Zi Yan 2022-05-12 334 *
b2c9e2fbba3253 Zi Yan 2022-05-12 335 * This also ensures correctness. Without it, when isolate after
b2c9e2fbba3253 Zi Yan 2022-05-12 336 * boundary_pfn and [start_pfn, boundary_pfn) are not online,
b2c9e2fbba3253 Zi Yan 2022-05-12 337 * __first_valid_page() will return unexpected NULL in the for loop
b2c9e2fbba3253 Zi Yan 2022-05-12 338 * below.
b2c9e2fbba3253 Zi Yan 2022-05-12 339 */
b2c9e2fbba3253 Zi Yan 2022-05-12 340 if (isolate_before) {
b2c9e2fbba3253 Zi Yan 2022-05-12 341 if (!pfn_to_online_page(boundary_pfn))
b2c9e2fbba3253 Zi Yan 2022-05-12 342 return 0;
b2c9e2fbba3253 Zi Yan 2022-05-12 343 } else {
b2c9e2fbba3253 Zi Yan 2022-05-12 344 if (!pfn_to_online_page(boundary_pfn - 1))
b2c9e2fbba3253 Zi Yan 2022-05-12 345 return 0;
b2c9e2fbba3253 Zi Yan 2022-05-12 346 }
b2c9e2fbba3253 Zi Yan 2022-05-12 347
b2c9e2fbba3253 Zi Yan 2022-05-12 348 for (pfn = start_pfn; pfn < boundary_pfn;) {
b2c9e2fbba3253 Zi Yan 2022-05-12 349 struct page *page = __first_valid_page(pfn, boundary_pfn - pfn);
b2c9e2fbba3253 Zi Yan 2022-05-12 350
b2c9e2fbba3253 Zi Yan 2022-05-12 351 VM_BUG_ON(!page);
b2c9e2fbba3253 Zi Yan 2022-05-12 352 pfn = page_to_pfn(page);
b2c9e2fbba3253 Zi Yan 2022-05-12 353 /*
b2c9e2fbba3253 Zi Yan 2022-05-12 354 * start_pfn is MAX_ORDER_NR_PAGES aligned, if there is any
b2c9e2fbba3253 Zi Yan 2022-05-12 355 * free pages in [start_pfn, boundary_pfn), its head page will
b2c9e2fbba3253 Zi Yan 2022-05-12 356 * always be in the range.
b2c9e2fbba3253 Zi Yan 2022-05-12 357 */
b2c9e2fbba3253 Zi Yan 2022-05-12 358 if (PageBuddy(page)) {
b2c9e2fbba3253 Zi Yan 2022-05-12 359 int order = buddy_order(page);
b2c9e2fbba3253 Zi Yan 2022-05-12 360
86d28b0709279c Zi Yan 2022-05-26 361 if (pfn + (1UL << order) > boundary_pfn) {
86d28b0709279c Zi Yan 2022-05-26 362 /* free page changed before split, check it again */
86d28b0709279c Zi Yan 2022-05-26 363 if (split_free_page(page, order, boundary_pfn - pfn))
86d28b0709279c Zi Yan 2022-05-26 364 continue;
86d28b0709279c Zi Yan 2022-05-26 365 }
86d28b0709279c Zi Yan 2022-05-26 366
86d28b0709279c Zi Yan 2022-05-26 367 pfn += 1UL << order;
b2c9e2fbba3253 Zi Yan 2022-05-12 368 continue;
b2c9e2fbba3253 Zi Yan 2022-05-12 369 }
b2c9e2fbba3253 Zi Yan 2022-05-12 370 /*
b2c9e2fbba3253 Zi Yan 2022-05-12 371 * migrate compound pages then let the free page handling code
b2c9e2fbba3253 Zi Yan 2022-05-12 372 * above do the rest. If migration is not possible, just fail.
b2c9e2fbba3253 Zi Yan 2022-05-12 373 */
b2c9e2fbba3253 Zi Yan 2022-05-12 374 if (PageCompound(page)) {
b2c9e2fbba3253 Zi Yan 2022-05-12 375 struct page *head = compound_head(page);
b2c9e2fbba3253 Zi Yan 2022-05-12 376 unsigned long head_pfn = page_to_pfn(head);
547be963c99f1e Zi Yan 2022-05-30 377 unsigned long nr_pages = compound_nr(head);
b2c9e2fbba3253 Zi Yan 2022-05-12 378
88ee134320b831 Zi Yan 2022-05-24 379 if (head_pfn + nr_pages <= boundary_pfn) {
b2c9e2fbba3253 Zi Yan 2022-05-12 380 pfn = head_pfn + nr_pages;
b2c9e2fbba3253 Zi Yan 2022-05-12 381 continue;
b2c9e2fbba3253 Zi Yan 2022-05-12 382 }
b2c9e2fbba3253 Zi Yan 2022-05-12 383 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
b2c9e2fbba3253 Zi Yan 2022-05-12 384 /*
b2c9e2fbba3253 Zi Yan 2022-05-12 385 * hugetlb, lru compound (THP), and movable compound pages
b2c9e2fbba3253 Zi Yan 2022-05-12 386 * can be migrated. Otherwise, fail the isolation.
b2c9e2fbba3253 Zi Yan 2022-05-12 387 */
b2c9e2fbba3253 Zi Yan 2022-05-12 388 if (PageHuge(page) || PageLRU(page) || __PageMovable(page)) {
b2c9e2fbba3253 Zi Yan 2022-05-12 389 int order;
b2c9e2fbba3253 Zi Yan 2022-05-12 390 unsigned long outer_pfn;
88ee134320b831 Zi Yan 2022-05-24 391 int page_mt = get_pageblock_migratetype(page);
88ee134320b831 Zi Yan 2022-05-24 392 bool isolate_page = !is_migrate_isolate_page(page);
b2c9e2fbba3253 Zi Yan 2022-05-12 393 struct compact_control cc = {
b2c9e2fbba3253 Zi Yan 2022-05-12 394 .nr_migratepages = 0,
b2c9e2fbba3253 Zi Yan 2022-05-12 395 .order = -1,
b2c9e2fbba3253 Zi Yan 2022-05-12 396 .zone = page_zone(pfn_to_page(head_pfn)),
b2c9e2fbba3253 Zi Yan 2022-05-12 397 .mode = MIGRATE_SYNC,
b2c9e2fbba3253 Zi Yan 2022-05-12 398 .ignore_skip_hint = true,
b2c9e2fbba3253 Zi Yan 2022-05-12 399 .no_set_skip_hint = true,
b2c9e2fbba3253 Zi Yan 2022-05-12 400 .gfp_mask = gfp_flags,
b2c9e2fbba3253 Zi Yan 2022-05-12 401 .alloc_contig = true,
b2c9e2fbba3253 Zi Yan 2022-05-12 402 };
b2c9e2fbba3253 Zi Yan 2022-05-12 403 INIT_LIST_HEAD(&cc.migratepages);
b2c9e2fbba3253 Zi Yan 2022-05-12 404
88ee134320b831 Zi Yan 2022-05-24 405 /*
88ee134320b831 Zi Yan 2022-05-24 406 * XXX: mark the page as MIGRATE_ISOLATE so that
88ee134320b831 Zi Yan 2022-05-24 407 * no one else can grab the freed page after migration.
88ee134320b831 Zi Yan 2022-05-24 408 * Ideally, the page should be freed as two separate
88ee134320b831 Zi Yan 2022-05-24 409 * pages to be added into separate migratetype free
88ee134320b831 Zi Yan 2022-05-24 410 * lists.
88ee134320b831 Zi Yan 2022-05-24 411 */
88ee134320b831 Zi Yan 2022-05-24 412 if (isolate_page) {
88ee134320b831 Zi Yan 2022-05-24 413 ret = set_migratetype_isolate(page, page_mt,
88ee134320b831 Zi Yan 2022-05-24 414 flags, head_pfn, head_pfn + nr_pages);
88ee134320b831 Zi Yan 2022-05-24 415 if (ret)
10d000298e8a6b Doug Berger 2022-09-13 416 return ret;
88ee134320b831 Zi Yan 2022-05-24 417 }
88ee134320b831 Zi Yan 2022-05-24 418
b2c9e2fbba3253 Zi Yan 2022-05-12 419 ret = __alloc_contig_migrate_range(&cc, head_pfn,
b2c9e2fbba3253 Zi Yan 2022-05-12 420 head_pfn + nr_pages);
b2c9e2fbba3253 Zi Yan 2022-05-12 421
88ee134320b831 Zi Yan 2022-05-24 422 /*
88ee134320b831 Zi Yan 2022-05-24 423 * restore the page's migratetype so that it can
88ee134320b831 Zi Yan 2022-05-24 424 * be split into separate migratetype free lists
88ee134320b831 Zi Yan 2022-05-24 425 * later.
88ee134320b831 Zi Yan 2022-05-24 426 */
88ee134320b831 Zi Yan 2022-05-24 427 if (isolate_page)
88ee134320b831 Zi Yan 2022-05-24 428 unset_migratetype_isolate(page, page_mt);
88ee134320b831 Zi Yan 2022-05-24 429
b2c9e2fbba3253 Zi Yan 2022-05-12 430 if (ret)
10d000298e8a6b Doug Berger 2022-09-13 431 return -EBUSY;
b2c9e2fbba3253 Zi Yan 2022-05-12 432 /*
b2c9e2fbba3253 Zi Yan 2022-05-12 433 * reset pfn to the head of the free page, so
b2c9e2fbba3253 Zi Yan 2022-05-12 434 * that the free page handling code above can split
b2c9e2fbba3253 Zi Yan 2022-05-12 435 * the free page to the right migratetype list.
b2c9e2fbba3253 Zi Yan 2022-05-12 436 *
b2c9e2fbba3253 Zi Yan 2022-05-12 437 * head_pfn is not used here as a hugetlb page order
b2c9e2fbba3253 Zi Yan 2022-05-12 438 * can be bigger than MAX_ORDER-1, but after it is
b2c9e2fbba3253 Zi Yan 2022-05-12 439 * freed, the free page order is not. Use pfn within
b2c9e2fbba3253 Zi Yan 2022-05-12 440 * the range to find the head of the free page.
b2c9e2fbba3253 Zi Yan 2022-05-12 441 */
b2c9e2fbba3253 Zi Yan 2022-05-12 442 order = 0;
b2c9e2fbba3253 Zi Yan 2022-05-12 443 outer_pfn = pfn;
b2c9e2fbba3253 Zi Yan 2022-05-12 444 while (!PageBuddy(pfn_to_page(outer_pfn))) {
88ee134320b831 Zi Yan 2022-05-24 445 /* stop if we cannot find the free page */
88ee134320b831 Zi Yan 2022-05-24 446 if (++order >= MAX_ORDER)
10d000298e8a6b Doug Berger 2022-09-13 447 return -EBUSY;
b2c9e2fbba3253 Zi Yan 2022-05-12 448 outer_pfn &= ~0UL << order;
b2c9e2fbba3253 Zi Yan 2022-05-12 449 }
b2c9e2fbba3253 Zi Yan 2022-05-12 450 pfn = outer_pfn;
b2c9e2fbba3253 Zi Yan 2022-05-12 451 continue;
b2c9e2fbba3253 Zi Yan 2022-05-12 452 } else
b2c9e2fbba3253 Zi Yan 2022-05-12 453 #endif
10d000298e8a6b Doug Berger 2022-09-13 454 return -EBUSY;
b2c9e2fbba3253 Zi Yan 2022-05-12 455 }
b2c9e2fbba3253 Zi Yan 2022-05-12 456
b2c9e2fbba3253 Zi Yan 2022-05-12 457 pfn++;
b2c9e2fbba3253 Zi Yan 2022-05-12 458 }
b2c9e2fbba3253 Zi Yan 2022-05-12 459 return 0;
b2c9e2fbba3253 Zi Yan 2022-05-12 460 }
b2c9e2fbba3253 Zi Yan 2022-05-12 461
diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 9d73dc38e3d7..8e16aa22cb61 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -286,8 +286,6 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * @flags: isolation flags * @gfp_flags: GFP flags used for migrating pages * @isolate_before: isolate the pageblock before the boundary_pfn - * @skip_isolation: the flag to skip the pageblock isolation in second - * isolate_single_pageblock() * * Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one * pageblock. When not all pageblocks within a page are isolated at the same @@ -302,9 +300,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * the in-use page then splitting the free page. */ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, - gfp_t gfp_flags, bool isolate_before, bool skip_isolation) + gfp_t gfp_flags, bool isolate_before) { - unsigned char saved_mt; unsigned long start_pfn; unsigned long isolate_pageblock; unsigned long pfn; @@ -328,18 +325,6 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), zone->zone_start_pfn); - saved_mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock)); - - if (skip_isolation) - VM_BUG_ON(!is_migrate_isolate(saved_mt)); - else { - ret = set_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt, flags, - isolate_pageblock, isolate_pageblock + pageblock_nr_pages); - - if (ret) - return ret; - } - /* * Bail out early when the to-be-isolated pageblock does not form * a free or in-use page across boundary_pfn: @@ -428,7 +413,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, ret = set_migratetype_isolate(page, page_mt, flags, head_pfn, head_pfn + nr_pages); if (ret) - goto failed; + return ret; } ret = __alloc_contig_migrate_range(&cc, head_pfn, @@ -443,7 +428,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, unset_migratetype_isolate(page, page_mt); if (ret) - goto failed; + return -EBUSY; /* * reset pfn to the head of the free page, so * that the free page handling code above can split @@ -459,24 +444,19 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, while (!PageBuddy(pfn_to_page(outer_pfn))) { /* stop if we cannot find the free page */ if (++order >= MAX_ORDER) - goto failed; + return -EBUSY; outer_pfn &= ~0UL << order; } pfn = outer_pfn; continue; } else #endif - goto failed; + return -EBUSY; } pfn++; } return 0; -failed: - /* restore the original migratetype */ - if (!skip_isolation) - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), saved_mt); - return -EBUSY; } /** @@ -534,21 +514,30 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned long isolate_start = ALIGN_DOWN(start_pfn, pageblock_nr_pages); unsigned long isolate_end = ALIGN(end_pfn, pageblock_nr_pages); int ret; - bool skip_isolation = false; /* isolate [isolate_start, isolate_start + pageblock_nr_pages) pageblock */ - ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false, skip_isolation); + ret = set_migratetype_isolate(pfn_to_page(isolate_start), migratetype, + flags, isolate_start, isolate_start + pageblock_nr_pages); if (ret) return ret; - - if (isolate_start == isolate_end - pageblock_nr_pages) - skip_isolation = true; + ret = isolate_single_pageblock(isolate_start, flags, gfp_flags, false); + if (ret) + goto unset_start_block; /* isolate [isolate_end - pageblock_nr_pages, isolate_end) pageblock */ - ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true, skip_isolation); + pfn = isolate_end - pageblock_nr_pages; + if (isolate_start != pfn) { + ret = set_migratetype_isolate(pfn_to_page(pfn), migratetype, + flags, pfn, pfn + pageblock_nr_pages); + if (ret) + goto unset_start_block; + } + ret = isolate_single_pageblock(isolate_end, flags, gfp_flags, true); if (ret) { - unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); - return ret; + if (isolate_start != pfn) + goto unset_end_block; + else + goto unset_start_block; } /* skip isolated pageblocks at the beginning and end */ @@ -557,15 +546,21 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); if (page && set_migratetype_isolate(page, migratetype, flags, - start_pfn, end_pfn)) { - undo_isolate_page_range(isolate_start, pfn, migratetype); - unset_migratetype_isolate( - pfn_to_page(isolate_end - pageblock_nr_pages), - migratetype); - return -EBUSY; - } + start_pfn, end_pfn)) + goto unset_isolated_blocks; } return 0; + +unset_isolated_blocks: + ret = -EBUSY; + undo_isolate_page_range(isolate_start + pageblock_nr_pages, pfn, + migratetype); +unset_end_block: + unset_migratetype_isolate(pfn_to_page(isolate_end - pageblock_nr_pages), + migratetype); +unset_start_block: + unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype); + return ret; } /*
The function set_migratetype_isolate() has special handling for pageblocks of MIGRATE_CMA type that protects them from being isolated for MIGRATE_MOVABLE requests. Since isolate_single_pageblock() doesn't receive the migratetype argument of start_isolate_page_range() it used the migratetype of the pageblock instead of the requested migratetype which defeats this MIGRATE_CMA check. This allows an attempt to create a gigantic page within a CMA region to change the migratetype of the first and last pageblocks from MIGRATE_CMA to MIGRATE_MOVABLE when they are restored after failure, which corrupts the CMA region. The calls to (un)set_migratetype_isolate() for the first and last pageblocks of the start_isolate_page_range() are moved back into that function to allow access to its migratetype argument and make it easier to see how all of the pageblocks in the range are isolated. Fixes: b2c9e2fbba32 ("mm: make alloc_contig_range work at pageblock granularity") Signed-off-by: Doug Berger <opendmb@gmail.com> --- mm/page_isolation.c | 75 +++++++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 40 deletions(-)