linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
To: linux-mm@kvack.org
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
	Vishal Moola <vishal.moola@gmail.com>,
	Johannes Weiner <hannes@cmpxchg.org>
Subject: [RFC PATCH 7/7] mm: Allocate ptdesc from slab
Date: Mon, 20 Oct 2025 01:16:42 +0100	[thread overview]
Message-ID: <20251020001652.2116669-8-willy@infradead.org> (raw)
In-Reply-To: <20251020001652.2116669-1-willy@infradead.org>

Create a slab cache for ptdescs and point to the struct page from the
ptdesc.  Remove all the padding from ptdesc that makes it line up with
struct page.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/mm.h       |  1 +
 include/linux/mm_types.h | 50 ++++------------------------------------
 mm/internal.h            |  1 +
 mm/memory.c              | 35 ++++++++++++++++++++++++----
 mm/mm_init.c             |  1 +
 5 files changed, 37 insertions(+), 51 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e60b181da3df..e8bb52061b0c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2970,6 +2970,7 @@ static inline struct ptdesc *page_ptdesc(const struct page *page)
  * The high bits are used for information like zone/node/section.
  */
 enum pt_flags {
+	/* Bits 0-3 used for pt_order */
 	PT_reserved = PG_reserved,
 };
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f5d9e0afe0fa..efdf29b8b478 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -548,38 +548,30 @@ FOLIO_MATCH(compound_head, _head_3);
 /**
  * struct ptdesc -    Memory descriptor for page tables.
  * @pt_flags: enum pt_flags plus zone/node/section.
+ * @pt_page: page allocated to store page table entries.
  * @pt_rcu_head:      For freeing page table pages.
  * @pt_list:          List of used page tables. Used for s390 gmap shadow pages
  *                    (which are not linked into the user page tables) and x86
  *                    pgds.
- * @_pt_pad_1:        Padding that aliases with page's compound head.
  * @pmd_huge_pte:     Protected by ptdesc->ptl, used for THPs.
- * @__page_mapping:   Aliases with page->mapping. Unused for page tables.
  * @pt_index:         Used for s390 gmap.
  * @pt_mm:            Used for x86 pgds.
  * @pt_frag_refcount: For fragmented page table tracking. Powerpc only.
  * @pt_share_count:   Used for HugeTLB PMD page table share count.
- * @_pt_pad_2:        Padding to ensure proper alignment.
  * @ptl:              Lock for the page table.
- * @__page_type:      Same as page->page_type. Unused for page tables.
- * @__page_refcount:  Same as page refcount.
- * @pt_memcg_data:    Memcg data. Tracked for page tables here.
  *
  * This struct overlays struct page for now. Do not modify without a good
  * understanding of the issues.
  */
 struct ptdesc {
 	memdesc_flags_t pt_flags;
+	struct page *pt_page;
 
 	union {
 		struct rcu_head pt_rcu_head;
 		struct list_head pt_list;
-		struct {
-			unsigned long _pt_pad_1;
-			pgtable_t pmd_huge_pte;
-		};
+		pgtable_t pmd_huge_pte;
 	};
-	unsigned long __page_mapping;
 
 	union {
 		pgoff_t pt_index;
@@ -591,47 +583,13 @@ struct ptdesc {
 	};
 
 	union {
-		unsigned long _pt_pad_2;
 #if ALLOC_SPLIT_PTLOCKS
 		spinlock_t *ptl;
 #else
 		spinlock_t ptl;
 #endif
 	};
-	unsigned int __page_type;
-	atomic_t __page_refcount;
-#ifdef CONFIG_MEMCG
-	unsigned long pt_memcg_data;
-#endif
-};
-
-#define TABLE_MATCH(pg, pt)						\
-	static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt))
-TABLE_MATCH(flags, pt_flags);
-TABLE_MATCH(compound_head, pt_list);
-TABLE_MATCH(compound_head, _pt_pad_1);
-TABLE_MATCH(mapping, __page_mapping);
-TABLE_MATCH(__folio_index, pt_index);
-TABLE_MATCH(rcu_head, pt_rcu_head);
-TABLE_MATCH(page_type, __page_type);
-TABLE_MATCH(_refcount, __page_refcount);
-#ifdef CONFIG_MEMCG
-TABLE_MATCH(memcg_data, pt_memcg_data);
-#endif
-#undef TABLE_MATCH
-static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
-
-#define ptdesc_page(pt)			(_Generic((pt),			\
-	const struct ptdesc *:		(const struct page *)(pt),	\
-	struct ptdesc *:		(struct page *)(pt)))
-
-#define ptdesc_folio(pt)		(_Generic((pt),			\
-	const struct ptdesc *:		(const struct folio *)(pt),	\
-	struct ptdesc *:		(struct folio *)(pt)))
-
-#define page_ptdesc(p)			(_Generic((p),			\
-	const struct page *:		(const struct ptdesc *)(p),	\
-	struct page *:			(struct ptdesc *)(p)))
+} __aligned(16);
 
 #ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
 static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc)
diff --git a/mm/internal.h b/mm/internal.h
index 15d64601289b..d57487ba443d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -100,6 +100,7 @@ struct pagetable_move_control {
 	unlikely(__ret_warn_once);					\
 })
 
+void __init ptcache_init(void);
 void page_writeback_init(void);
 
 /*
diff --git a/mm/memory.c b/mm/memory.c
index 47eb5834db23..331582bec495 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -7267,10 +7267,17 @@ long copy_folio_from_user(struct folio *dst_folio,
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
+static struct kmem_cache *ptcache;
+
+void __init ptcache_init(void)
+{
+	ptcache = KMEM_CACHE(ptdesc, 0);
+}
+
 /**
  * pagetable_alloc - Allocate pagetables
  * @gfp:    GFP flags
- * @order:  desired pagetable order
+ * @order:  pagetable order
  *
  * pagetable_alloc allocates memory for page tables as well as a page table
  * descriptor to describe that memory.
@@ -7279,16 +7286,34 @@ long copy_folio_from_user(struct folio *dst_folio,
  */
 struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order)
 {
-	struct page *page = alloc_frozen_pages_noprof(gfp | __GFP_COMP, order);
+	struct page *page;
 	pg_data_t *pgdat;
+	struct ptdesc *ptdesc;
+
+	BUG_ON(!ptcache);
 
-	if (!page)
+	ptdesc = kmem_cache_alloc(ptcache, gfp);
+	if (!ptdesc)
 		return NULL;
 
+	page = alloc_pages_memdesc(gfp, order,
+			memdesc_create(ptdesc, MEMDESC_TYPE_PAGE_TABLE));
+	if (!page) {
+		kmem_cache_free(ptcache, ptdesc);
+		return NULL;
+	}
+
+	VM_BUG_ON_PAGE(memdesc_type(page->memdesc) != MEMDESC_TYPE_PAGE_TABLE, page);
 	pgdat = NODE_DATA(page_to_nid(page));
 	mod_node_page_state(pgdat, NR_PAGETABLE, 1 << order);
 	__SetPageTable(page);
-	return page_ptdesc(page);
+	page->__folio_index = (unsigned long)ptdesc;
+
+	ptdesc->pt_flags = page->flags;
+	ptdesc->pt_flags.f |= order;
+	ptdesc->pt_page = page;
+
+	return ptdesc;
 }
 
 /**
@@ -7302,7 +7327,7 @@ void pagetable_free(struct ptdesc *pt)
 {
 	pg_data_t *pgdat = NODE_DATA(memdesc_nid(pt->pt_flags));
 	struct page *page = ptdesc_page(pt);
-	unsigned int order = compound_order(page);
+	unsigned int order = pt->pt_flags.f & 0xf;
 
 	mod_node_page_state(pgdat, NR_PAGETABLE, -(1L << order));
 	__ClearPageTable(page);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3db2dea7db4c..dc6d2f81b692 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2712,6 +2712,7 @@ void __init mm_core_init(void)
 	 */
 	page_ext_init_flatmem_late();
 	kmemleak_init();
+	ptcache_init();
 	ptlock_cache_init();
 	pgtable_cache_init();
 	debug_objects_mem_init();
-- 
2.47.2



  parent reply	other threads:[~2025-10-20  0:35 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-20  0:16 [RFC PATCH 0/7] Separate ptdesc from struct page Matthew Wilcox (Oracle)
2025-10-20  0:16 ` [RFC PATCH 1/7] mm: Use frozen pages for page tables Matthew Wilcox (Oracle)
2025-10-20  0:16 ` [RFC PATCH 2/7] mm: Account pagetable memory when allocated Matthew Wilcox (Oracle)
2025-11-11  7:09   ` Anshuman Khandual
2025-11-11 16:43     ` Matthew Wilcox
2025-10-20  0:16 ` [RFC PATCH 3/7] mm: Mark " Matthew Wilcox (Oracle)
2025-10-20  0:16 ` [RFC PATCH 4/7] pgtable: Remove uses of page->lru Matthew Wilcox (Oracle)
2025-11-11  7:53   ` Anshuman Khandual
2025-11-11 18:46     ` Matthew Wilcox
2025-10-20  0:16 ` [RFC PATCH 5/7] x86: Call preallocate_vmalloc_pages() later Matthew Wilcox (Oracle)
2025-11-11  8:59   ` Anshuman Khandual
2025-11-12 18:36     ` Vishal Moola (Oracle)
2025-11-12 19:31       ` Vishal Moola (Oracle)
2025-11-13 13:53     ` Matthew Wilcox
2025-10-20  0:16 ` [RFC PATCH 6/7] mm: Add alloc_pages_memdesc family of APIs Matthew Wilcox (Oracle)
2025-10-20  0:16 ` Matthew Wilcox (Oracle) [this message]
2025-10-20  6:43 ` [syzbot ci] Re: Separate ptdesc from struct page syzbot ci

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251020001652.2116669-8-willy@infradead.org \
    --to=willy@infradead.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-mm@kvack.org \
    --cc=vishal.moola@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox