diff mbox series

[net-next,v7,1/5] mm: add a signature in struct page

Message ID 20210604183349.30040-2-mcroce@linux.microsoft.com
State New
Headers show
Series page_pool: recycle buffers | expand

Commit Message

Matteo Croce June 4, 2021, 6:33 p.m. UTC
From: Matteo Croce <mcroce@microsoft.com>

This is needed by the page_pool to avoid recycling a page not allocated
via page_pool.

The page->signature field is aliased to page->lru.next and
page->compound_head, but it can't be set by mistake because the
signature value is a bad pointer, and can't trigger a false positive
in PageTail() because the last bit is 0.

Co-developed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Matteo Croce <mcroce@microsoft.com>
---
 include/linux/mm.h       | 12 +++++++-----
 include/linux/mm_types.h | 12 +++++++++++-
 include/linux/poison.h   |  3 +++
 net/core/page_pool.c     |  6 ++++++
 4 files changed, 27 insertions(+), 6 deletions(-)

Comments

Matthew Wilcox June 4, 2021, 7:07 p.m. UTC | #1
On Fri, Jun 04, 2021 at 08:33:45PM +0200, Matteo Croce wrote:
> @@ -130,7 +137,10 @@ struct page {
>  			};
>  		};
>  		struct {	/* Tail pages of compound page */
> -			unsigned long compound_head;	/* Bit zero is set */
> +			/* Bit zero is set
> +			 * Bit one if pfmemalloc page
> +			 */
> +			unsigned long compound_head;

I would drop this hunk.  Bit 1 is not used for this purpose in tail
pages; it's used for that purpose in head and base pages.

I suppose we could do something like ...

 static inline void set_page_pfmemalloc(struct page *page)
 {
-	page->index = -1UL;
+	page->lru.next = (void *)2;
 }

if it's causing confusion.
Matthew Wilcox June 5, 2021, 2:32 p.m. UTC | #2
On Sat, Jun 05, 2021 at 12:59:50AM +0200, Matteo Croce wrote:
> On Fri, Jun 4, 2021 at 9:08 PM Matthew Wilcox <willy@infradead.org> wrote:

> >

> > On Fri, Jun 04, 2021 at 08:33:45PM +0200, Matteo Croce wrote:

> > > @@ -130,7 +137,10 @@ struct page {

> > >                       };

> > >               };

> > >               struct {        /* Tail pages of compound page */

> > > -                     unsigned long compound_head;    /* Bit zero is set */

> > > +                     /* Bit zero is set

> > > +                      * Bit one if pfmemalloc page

> > > +                      */

> > > +                     unsigned long compound_head;

> >

> > I would drop this hunk.  Bit 1 is not used for this purpose in tail

> > pages; it's used for that purpose in head and base pages.

> >

> > I suppose we could do something like ...

> >

> >  static inline void set_page_pfmemalloc(struct page *page)

> >  {

> > -       page->index = -1UL;

> > +       page->lru.next = (void *)2;

> >  }

> >

> > if it's causing confusion.

> >

> 

> If you prefer, ok for me.

> Why not "(void *)BIT(1)"? Just to remark that it's a single bit and

> not a magic like value?


I don't have a strong preference.  I'd use '2', but I wouldn't ask
BIT(1) to be changed.
Matteo Croce June 6, 2021, 1:50 a.m. UTC | #3
On Sat, Jun 5, 2021 at 4:32 PM Matthew Wilcox <willy@infradead.org> wrote:
>

> On Sat, Jun 05, 2021 at 12:59:50AM +0200, Matteo Croce wrote:

> > On Fri, Jun 4, 2021 at 9:08 PM Matthew Wilcox <willy@infradead.org> wrote:

> > >

> > > On Fri, Jun 04, 2021 at 08:33:45PM +0200, Matteo Croce wrote:

> > > > @@ -130,7 +137,10 @@ struct page {

> > > >                       };

> > > >               };

> > > >               struct {        /* Tail pages of compound page */

> > > > -                     unsigned long compound_head;    /* Bit zero is set */

> > > > +                     /* Bit zero is set

> > > > +                      * Bit one if pfmemalloc page

> > > > +                      */

> > > > +                     unsigned long compound_head;

> > >

> > > I would drop this hunk.  Bit 1 is not used for this purpose in tail

> > > pages; it's used for that purpose in head and base pages.

> > >

> > > I suppose we could do something like ...

> > >

> > >  static inline void set_page_pfmemalloc(struct page *page)

> > >  {

> > > -       page->index = -1UL;

> > > +       page->lru.next = (void *)2;

> > >  }

> > >

> > > if it's causing confusion.

> > >

> >


And change all the *_pfmemalloc functions to use page->lru.next like this?

@@ -1668,10 +1668,12 @@ struct address_space *page_mapping(struct page *page);
static inline bool page_is_pfmemalloc(const struct page *page)
{
       /*
-        * Page index cannot be this large so this must be
-        * a pfmemalloc page.
+        * This is not a tail page; compound_head of a head page is unused
+        * at return from the page allocator, and will be overwritten
+        * by callers who do not care whether the page came from the
+        * reserves.
        */
-       return page->index == -1UL;
+       return (uintptr_t)page->lru.next & BIT(1);
}

/*
@@ -1680,12 +1682,12 @@ static inline bool page_is_pfmemalloc(const
struct page *page)
 */
static inline void set_page_pfmemalloc(struct page *page)
{
-       page->index = -1UL;
+       page->lru.next = (void *)BIT(1);
}

static inline void clear_page_pfmemalloc(struct page *page)
{
-       page->index = 0;
+       page->lru.next = NULL;

}

-- 
per aspera ad upstream
Matthew Wilcox June 7, 2021, 1:52 p.m. UTC | #4
On Sun, Jun 06, 2021 at 03:50:54AM +0200, Matteo Croce wrote:
> And change all the *_pfmemalloc functions to use page->lru.next like this?

> 

> @@ -1668,10 +1668,12 @@ struct address_space *page_mapping(struct page *page);

> static inline bool page_is_pfmemalloc(const struct page *page)

> {

>        /*

> -        * Page index cannot be this large so this must be

> -        * a pfmemalloc page.

> +        * This is not a tail page; compound_head of a head page is unused

> +        * at return from the page allocator, and will be overwritten

> +        * by callers who do not care whether the page came from the

> +        * reserves.

>         */


The comment doesn't make a lot of sense if we're switching to use
lru.next.  How about:

	/*
	 * lru.next has bit 1 set if the page is allocated from the
	 * pfmemalloc reserves.  Callers may simply overwrite it if
	 * they do not need to preserve that information.
	 */
Matteo Croce June 7, 2021, 1:58 p.m. UTC | #5
On Mon, Jun 7, 2021 at 3:53 PM Matthew Wilcox <willy@infradead.org> wrote:
>

> On Sun, Jun 06, 2021 at 03:50:54AM +0200, Matteo Croce wrote:

> > And change all the *_pfmemalloc functions to use page->lru.next like this?

> >

> > @@ -1668,10 +1668,12 @@ struct address_space *page_mapping(struct page *page);

> > static inline bool page_is_pfmemalloc(const struct page *page)

> > {

> >        /*

> > -        * Page index cannot be this large so this must be

> > -        * a pfmemalloc page.

> > +        * This is not a tail page; compound_head of a head page is unused

> > +        * at return from the page allocator, and will be overwritten

> > +        * by callers who do not care whether the page came from the

> > +        * reserves.

> >         */

>

> The comment doesn't make a lot of sense if we're switching to use

> lru.next.  How about:

>

>         /*

>          * lru.next has bit 1 set if the page is allocated from the

>          * pfmemalloc reserves.  Callers may simply overwrite it if

>          * they do not need to preserve that information.

>          */


Sounds good!

-- 
per aspera ad upstream
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c274f75efcf9..b71074a5e82b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1668,10 +1668,12 @@  struct address_space *page_mapping(struct page *page);
 static inline bool page_is_pfmemalloc(const struct page *page)
 {
 	/*
-	 * Page index cannot be this large so this must be
-	 * a pfmemalloc page.
+	 * This is not a tail page; compound_head of a head page is unused
+	 * at return from the page allocator, and will be overwritten
+	 * by callers who do not care whether the page came from the
+	 * reserves.
 	 */
-	return page->index == -1UL;
+	return page->compound_head & BIT(1);
 }
 
 /*
@@ -1680,12 +1682,12 @@  static inline bool page_is_pfmemalloc(const struct page *page)
  */
 static inline void set_page_pfmemalloc(struct page *page)
 {
-	page->index = -1UL;
+	page->compound_head = BIT(1);
 }
 
 static inline void clear_page_pfmemalloc(struct page *page)
 {
-	page->index = 0;
+	page->compound_head = 0;
 }
 
 /*
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5aacc1c10a45..09f90598ff63 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -96,6 +96,13 @@  struct page {
 			unsigned long private;
 		};
 		struct {	/* page_pool used by netstack */
+			/**
+			 * @pp_magic: magic value to avoid recycling non
+			 * page_pool allocated pages.
+			 */
+			unsigned long pp_magic;
+			struct page_pool *pp;
+			unsigned long _pp_mapping_pad;
 			/**
 			 * @dma_addr: might require a 64-bit value on
 			 * 32-bit architectures.
@@ -130,7 +137,10 @@  struct page {
 			};
 		};
 		struct {	/* Tail pages of compound page */
-			unsigned long compound_head;	/* Bit zero is set */
+			/* Bit zero is set
+			 * Bit one if pfmemalloc page
+			 */
+			unsigned long compound_head;
 
 			/* First tail page only */
 			unsigned char compound_dtor;
diff --git a/include/linux/poison.h b/include/linux/poison.h
index aff1c9250c82..d62ef5a6b4e9 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -78,4 +78,7 @@ 
 /********** security/ **********/
 #define KEY_DESTROY		0xbd
 
+/********** net/core/page_pool.c **********/
+#define PP_SIGNATURE		(0x40 + POISON_POINTER_DELTA)
+
 #endif
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 3c4c4c7a0402..e1321bc9d316 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -17,6 +17,7 @@ 
 #include <linux/dma-mapping.h>
 #include <linux/page-flags.h>
 #include <linux/mm.h> /* for __put_page() */
+#include <linux/poison.h>
 
 #include <trace/events/page_pool.h>
 
@@ -221,6 +222,8 @@  static struct page *__page_pool_alloc_page_order(struct page_pool *pool,
 		return NULL;
 	}
 
+	page->pp_magic |= PP_SIGNATURE;
+
 	/* Track how many pages are held 'in-flight' */
 	pool->pages_state_hold_cnt++;
 	trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt);
@@ -263,6 +266,7 @@  static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
 			put_page(page);
 			continue;
 		}
+		page->pp_magic |= PP_SIGNATURE;
 		pool->alloc.cache[pool->alloc.count++] = page;
 		/* Track how many pages are held 'in-flight' */
 		pool->pages_state_hold_cnt++;
@@ -341,6 +345,8 @@  void page_pool_release_page(struct page_pool *pool, struct page *page)
 			     DMA_ATTR_SKIP_CPU_SYNC);
 	page_pool_set_dma_addr(page, 0);
 skip_dma_unmap:
+	page->pp_magic = 0;
+
 	/* This may be the last page returned, releasing the pool, so
 	 * it is not safe to reference pool afterwards.
 	 */