From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
To: linux-mm@kvack.org
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
Vishal Moola <vishal.moola@gmail.com>,
Johannes Weiner <hannes@cmpxchg.org>
Subject: [RFC PATCH 7/7] mm: Allocate ptdesc from slab
Date: Mon, 20 Oct 2025 01:16:42 +0100 [thread overview]
Message-ID: <20251020001652.2116669-8-willy@infradead.org> (raw)
In-Reply-To: <20251020001652.2116669-1-willy@infradead.org>
Create a slab cache for ptdescs and point to the struct page from the
ptdesc. Remove all the padding from ptdesc that makes it line up with
struct page.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/mm.h | 1 +
include/linux/mm_types.h | 50 ++++------------------------------------
mm/internal.h | 1 +
mm/memory.c | 35 ++++++++++++++++++++++++----
mm/mm_init.c | 1 +
5 files changed, 37 insertions(+), 51 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e60b181da3df..e8bb52061b0c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2970,6 +2970,7 @@ static inline struct ptdesc *page_ptdesc(const struct page *page)
* The high bits are used for information like zone/node/section.
*/
enum pt_flags {
+ /* Bits 0-3 used for pt_order */
PT_reserved = PG_reserved,
};
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f5d9e0afe0fa..efdf29b8b478 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -548,38 +548,30 @@ FOLIO_MATCH(compound_head, _head_3);
/**
* struct ptdesc - Memory descriptor for page tables.
* @pt_flags: enum pt_flags plus zone/node/section.
+ * @pt_page: page allocated to store page table entries.
* @pt_rcu_head: For freeing page table pages.
* @pt_list: List of used page tables. Used for s390 gmap shadow pages
* (which are not linked into the user page tables) and x86
* pgds.
- * @_pt_pad_1: Padding that aliases with page's compound head.
* @pmd_huge_pte: Protected by ptdesc->ptl, used for THPs.
- * @__page_mapping: Aliases with page->mapping. Unused for page tables.
* @pt_index: Used for s390 gmap.
* @pt_mm: Used for x86 pgds.
* @pt_frag_refcount: For fragmented page table tracking. Powerpc only.
* @pt_share_count: Used for HugeTLB PMD page table share count.
- * @_pt_pad_2: Padding to ensure proper alignment.
* @ptl: Lock for the page table.
- * @__page_type: Same as page->page_type. Unused for page tables.
- * @__page_refcount: Same as page refcount.
- * @pt_memcg_data: Memcg data. Tracked for page tables here.
*
* This struct overlays struct page for now. Do not modify without a good
* understanding of the issues.
*/
struct ptdesc {
memdesc_flags_t pt_flags;
+ struct page *pt_page;
union {
struct rcu_head pt_rcu_head;
struct list_head pt_list;
- struct {
- unsigned long _pt_pad_1;
- pgtable_t pmd_huge_pte;
- };
+ pgtable_t pmd_huge_pte;
};
- unsigned long __page_mapping;
union {
pgoff_t pt_index;
@@ -591,47 +583,13 @@ struct ptdesc {
};
union {
- unsigned long _pt_pad_2;
#if ALLOC_SPLIT_PTLOCKS
spinlock_t *ptl;
#else
spinlock_t ptl;
#endif
};
- unsigned int __page_type;
- atomic_t __page_refcount;
-#ifdef CONFIG_MEMCG
- unsigned long pt_memcg_data;
-#endif
-};
-
-#define TABLE_MATCH(pg, pt) \
- static_assert(offsetof(struct page, pg) == offsetof(struct ptdesc, pt))
-TABLE_MATCH(flags, pt_flags);
-TABLE_MATCH(compound_head, pt_list);
-TABLE_MATCH(compound_head, _pt_pad_1);
-TABLE_MATCH(mapping, __page_mapping);
-TABLE_MATCH(__folio_index, pt_index);
-TABLE_MATCH(rcu_head, pt_rcu_head);
-TABLE_MATCH(page_type, __page_type);
-TABLE_MATCH(_refcount, __page_refcount);
-#ifdef CONFIG_MEMCG
-TABLE_MATCH(memcg_data, pt_memcg_data);
-#endif
-#undef TABLE_MATCH
-static_assert(sizeof(struct ptdesc) <= sizeof(struct page));
-
-#define ptdesc_page(pt) (_Generic((pt), \
- const struct ptdesc *: (const struct page *)(pt), \
- struct ptdesc *: (struct page *)(pt)))
-
-#define ptdesc_folio(pt) (_Generic((pt), \
- const struct ptdesc *: (const struct folio *)(pt), \
- struct ptdesc *: (struct folio *)(pt)))
-
-#define page_ptdesc(p) (_Generic((p), \
- const struct page *: (const struct ptdesc *)(p), \
- struct page *: (struct ptdesc *)(p)))
+} __aligned(16);
#ifdef CONFIG_HUGETLB_PMD_PAGE_TABLE_SHARING
static inline void ptdesc_pmd_pts_init(struct ptdesc *ptdesc)
diff --git a/mm/internal.h b/mm/internal.h
index 15d64601289b..d57487ba443d 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -100,6 +100,7 @@ struct pagetable_move_control {
unlikely(__ret_warn_once); \
})
+void __init ptcache_init(void);
void page_writeback_init(void);
/*
diff --git a/mm/memory.c b/mm/memory.c
index 47eb5834db23..331582bec495 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -7267,10 +7267,17 @@ long copy_folio_from_user(struct folio *dst_folio,
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
+static struct kmem_cache *ptcache;
+
+void __init ptcache_init(void)
+{
+ ptcache = KMEM_CACHE(ptdesc, 0);
+}
+
/**
* pagetable_alloc - Allocate pagetables
* @gfp: GFP flags
- * @order: desired pagetable order
+ * @order: pagetable order
*
* pagetable_alloc allocates memory for page tables as well as a page table
* descriptor to describe that memory.
@@ -7279,16 +7286,34 @@ long copy_folio_from_user(struct folio *dst_folio,
*/
struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order)
{
- struct page *page = alloc_frozen_pages_noprof(gfp | __GFP_COMP, order);
+ struct page *page;
pg_data_t *pgdat;
+ struct ptdesc *ptdesc;
+
+ BUG_ON(!ptcache);
- if (!page)
+ ptdesc = kmem_cache_alloc(ptcache, gfp);
+ if (!ptdesc)
return NULL;
+ page = alloc_pages_memdesc(gfp, order,
+ memdesc_create(ptdesc, MEMDESC_TYPE_PAGE_TABLE));
+ if (!page) {
+ kmem_cache_free(ptcache, ptdesc);
+ return NULL;
+ }
+
+ VM_BUG_ON_PAGE(memdesc_type(page->memdesc) != MEMDESC_TYPE_PAGE_TABLE, page);
pgdat = NODE_DATA(page_to_nid(page));
mod_node_page_state(pgdat, NR_PAGETABLE, 1 << order);
__SetPageTable(page);
- return page_ptdesc(page);
+ page->__folio_index = (unsigned long)ptdesc;
+
+ ptdesc->pt_flags = page->flags;
+ ptdesc->pt_flags.f |= order;
+ ptdesc->pt_page = page;
+
+ return ptdesc;
}
/**
@@ -7302,7 +7327,7 @@ void pagetable_free(struct ptdesc *pt)
{
pg_data_t *pgdat = NODE_DATA(memdesc_nid(pt->pt_flags));
struct page *page = ptdesc_page(pt);
- unsigned int order = compound_order(page);
+ unsigned int order = pt->pt_flags.f & 0xf;
mod_node_page_state(pgdat, NR_PAGETABLE, -(1L << order));
__ClearPageTable(page);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 3db2dea7db4c..dc6d2f81b692 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2712,6 +2712,7 @@ void __init mm_core_init(void)
*/
page_ext_init_flatmem_late();
kmemleak_init();
+ ptcache_init();
ptlock_cache_init();
pgtable_cache_init();
debug_objects_mem_init();
--
2.47.2
next prev parent reply other threads:[~2025-10-20 0:35 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-20 0:16 [RFC PATCH 0/7] Separate ptdesc from struct page Matthew Wilcox (Oracle)
2025-10-20 0:16 ` [RFC PATCH 1/7] mm: Use frozen pages for page tables Matthew Wilcox (Oracle)
2025-10-20 0:16 ` [RFC PATCH 2/7] mm: Account pagetable memory when allocated Matthew Wilcox (Oracle)
2025-11-11 7:09 ` Anshuman Khandual
2025-11-11 16:43 ` Matthew Wilcox
2025-10-20 0:16 ` [RFC PATCH 3/7] mm: Mark " Matthew Wilcox (Oracle)
2025-10-20 0:16 ` [RFC PATCH 4/7] pgtable: Remove uses of page->lru Matthew Wilcox (Oracle)
2025-11-11 7:53 ` Anshuman Khandual
2025-11-11 18:46 ` Matthew Wilcox
2025-10-20 0:16 ` [RFC PATCH 5/7] x86: Call preallocate_vmalloc_pages() later Matthew Wilcox (Oracle)
2025-11-11 8:59 ` Anshuman Khandual
2025-11-12 18:36 ` Vishal Moola (Oracle)
2025-11-12 19:31 ` Vishal Moola (Oracle)
2025-11-13 13:53 ` Matthew Wilcox
2025-10-20 0:16 ` [RFC PATCH 6/7] mm: Add alloc_pages_memdesc family of APIs Matthew Wilcox (Oracle)
2025-10-20 0:16 ` Matthew Wilcox (Oracle) [this message]
2025-10-20 6:43 ` [syzbot ci] Re: Separate ptdesc from struct page syzbot ci
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251020001652.2116669-8-willy@infradead.org \
--to=willy@infradead.org \
--cc=hannes@cmpxchg.org \
--cc=linux-mm@kvack.org \
--cc=vishal.moola@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox