* [PATCH mm-unstable v2 1/5] mm: consolidate anonymous folio PTE mapping into helpers
2026-02-26 1:29 [PATCH mm-unstable v2 0/5] mm: khugepaged cleanups and mTHP prerequisites Nico Pache
@ 2026-02-26 1:29 ` Nico Pache
2026-02-26 1:29 ` [PATCH mm-unstable v2 2/5] mm: introduce is_pmd_order helper Nico Pache
` (3 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Nico Pache @ 2026-02-26 1:29 UTC (permalink / raw)
To: linux-kernel, linux-mm
Cc: aarcange, akpm, anshuman.khandual, apopple, baohua, baolin.wang,
byungchul, catalin.marinas, cl, corbet, dave.hansen, david,
dev.jain, gourry, hannes, hughd, jackmanb, jack, jannh, jglisse,
joshua.hahnjy, kas, lance.yang, Liam.Howlett, lorenzo.stoakes,
mathieu.desnoyers, matthew.brost, mhiramat, mhocko, npache,
peterx, pfalcato, rakie.kim, raquini, rdunlap, richard.weiyang,
rientjes, rostedt, rppt, ryan.roberts, shivankg, sunnanyong,
surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang,
ziy, zokeefe
The anonymous page fault handler in do_anonymous_page() open-codes the
sequence to map a newly allocated anonymous folio at the PTE level:
- construct the PTE entry
- add rmap
- add to LRU
- set the PTEs
- update the MMU cache.
Introduce a two helpers to consolidate this duplicated logic, mirroring the
existing map_anon_folio_pmd_nopf() pattern for PMD-level mappings:
map_anon_folio_pte_nopf(): constructs the PTE entry, takes folio
references, adds anon rmap and LRU. This function also handles the
uffd_wp that can occur in the pf variant.
map_anon_folio_pte_pf(): extends the nopf variant to handle MM_ANONPAGES
counter updates, and mTHP fault allocation statistics for the page fault
path.
The zero-page read path in do_anonymous_page() is also untangled from the
shared setpte label, since it does not allocate a folio and should not
share the same mapping sequence as the write path. Make nr_pages = 1
rather than relying on the variable. This makes it more clear that we
are operating on the zero page only.
This refactoring will also help reduce code duplication between mm/memory.c
and mm/khugepaged.c, and provides a clean API for PTE-level anonymous folio
mapping that can be reused by future callers.
Signed-off-by: Nico Pache <npache@redhat.com>
---
include/linux/mm.h | 4 ++++
mm/memory.c | 59 +++++++++++++++++++++++++++++++---------------
2 files changed, 44 insertions(+), 19 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 13336340612e..3ebf143c7502 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4901,4 +4901,8 @@ static inline bool snapshot_page_is_faithful(const struct page_snapshot *ps)
void snapshot_page(struct page_snapshot *ps, const struct page *page);
+void map_anon_folio_pte_nopf(struct folio *folio, pte_t *pte,
+ struct vm_area_struct *vma, unsigned long addr,
+ bool uffd_wp);
+
#endif /* _LINUX_MM_H */
diff --git a/mm/memory.c b/mm/memory.c
index 9385842c3503..a1a364e1fdcd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5189,6 +5189,36 @@ static struct folio *alloc_anon_folio(struct vm_fault *vmf)
return folio_prealloc(vma->vm_mm, vma, vmf->address, true);
}
+void map_anon_folio_pte_nopf(struct folio *folio, pte_t *pte,
+ struct vm_area_struct *vma, unsigned long addr,
+ bool uffd_wp)
+{
+ unsigned int nr_pages = folio_nr_pages(folio);
+ pte_t entry = folio_mk_pte(folio, vma->vm_page_prot);
+
+ entry = pte_sw_mkyoung(entry);
+
+ if (vma->vm_flags & VM_WRITE)
+ entry = pte_mkwrite(pte_mkdirty(entry), vma);
+ if (uffd_wp)
+ entry = pte_mkuffd_wp(entry);
+
+ folio_ref_add(folio, nr_pages - 1);
+ folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
+ folio_add_lru_vma(folio, vma);
+ set_ptes(vma->vm_mm, addr, pte, entry, nr_pages);
+ update_mmu_cache_range(NULL, vma, addr, pte, nr_pages);
+}
+
+static void map_anon_folio_pte_pf(struct folio *folio, pte_t *pte,
+ struct vm_area_struct *vma, unsigned long addr,
+ unsigned int nr_pages, bool uffd_wp)
+{
+ map_anon_folio_pte_nopf(folio, pte, vma, addr, uffd_wp);
+ add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);
+ count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_FAULT_ALLOC);
+}
+
/*
* We enter with non-exclusive mmap_lock (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
@@ -5235,7 +5265,14 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
pte_unmap_unlock(vmf->pte, vmf->ptl);
return handle_userfault(vmf, VM_UFFD_MISSING);
}
- goto setpte;
+ if (vmf_orig_pte_uffd_wp(vmf))
+ entry = pte_mkuffd_wp(entry);
+ set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
+
+ /* No need to invalidate - it was non-present before */
+ update_mmu_cache_range(vmf, vma, addr, vmf->pte,
+ /*nr_pages=*/ 1);
+ goto unlock;
}
/* Allocate our own private page. */
@@ -5259,11 +5296,6 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
*/
__folio_mark_uptodate(folio);
- entry = folio_mk_pte(folio, vma->vm_page_prot);
- entry = pte_sw_mkyoung(entry);
- if (vma->vm_flags & VM_WRITE)
- entry = pte_mkwrite(pte_mkdirty(entry), vma);
-
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl);
if (!vmf->pte)
goto release;
@@ -5285,19 +5317,8 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
folio_put(folio);
return handle_userfault(vmf, VM_UFFD_MISSING);
}
-
- folio_ref_add(folio, nr_pages - 1);
- add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages);
- count_mthp_stat(folio_order(folio), MTHP_STAT_ANON_FAULT_ALLOC);
- folio_add_new_anon_rmap(folio, vma, addr, RMAP_EXCLUSIVE);
- folio_add_lru_vma(folio, vma);
-setpte:
- if (vmf_orig_pte_uffd_wp(vmf))
- entry = pte_mkuffd_wp(entry);
- set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages);
-
- /* No need to invalidate - it was non-present before */
- update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr_pages);
+ map_anon_folio_pte_pf(folio, vmf->pte, vma, addr, nr_pages,
+ vmf_orig_pte_uffd_wp(vmf));
unlock:
if (vmf->pte)
pte_unmap_unlock(vmf->pte, vmf->ptl);
--
2.53.0
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH mm-unstable v2 2/5] mm: introduce is_pmd_order helper
2026-02-26 1:29 [PATCH mm-unstable v2 0/5] mm: khugepaged cleanups and mTHP prerequisites Nico Pache
2026-02-26 1:29 ` [PATCH mm-unstable v2 1/5] mm: consolidate anonymous folio PTE mapping into helpers Nico Pache
@ 2026-02-26 1:29 ` Nico Pache
2026-02-26 1:29 ` [PATCH mm-unstable v2 3/5] mm/khugepaged: define COLLAPSE_MAX_PTES_LIMIT as HPAGE_PMD_NR - 1 Nico Pache
` (2 subsequent siblings)
4 siblings, 0 replies; 6+ messages in thread
From: Nico Pache @ 2026-02-26 1:29 UTC (permalink / raw)
To: linux-kernel, linux-mm
Cc: aarcange, akpm, anshuman.khandual, apopple, baohua, baolin.wang,
byungchul, catalin.marinas, cl, corbet, dave.hansen, david,
dev.jain, gourry, hannes, hughd, jackmanb, jack, jannh, jglisse,
joshua.hahnjy, kas, lance.yang, Liam.Howlett, lorenzo.stoakes,
mathieu.desnoyers, matthew.brost, mhiramat, mhocko, npache,
peterx, pfalcato, rakie.kim, raquini, rdunlap, richard.weiyang,
rientjes, rostedt, rppt, ryan.roberts, shivankg, sunnanyong,
surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang,
ziy, zokeefe
In order to add mTHP support to khugepaged, we will often be checking if a
given order is (or is not) a PMD order. Some places in the kernel already
use this check, so lets create a simple helper function to keep the code
clean and readable.
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Suggested-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Nico Pache <npache@redhat.com>
---
include/linux/huge_mm.h | 5 +++++
mm/huge_memory.c | 2 +-
mm/khugepaged.c | 6 +++---
mm/memory.c | 2 +-
mm/mempolicy.c | 2 +-
mm/page_alloc.c | 4 ++--
mm/shmem.c | 3 +--
7 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a4d9f964dfde..bd7f0e1d8094 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -771,6 +771,11 @@ static inline bool pmd_is_huge(pmd_t pmd)
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static inline bool is_pmd_order(unsigned int order)
+{
+ return order == HPAGE_PMD_ORDER;
+}
+
static inline int split_folio_to_list_to_order(struct folio *folio,
struct list_head *list, int new_order)
{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8003d3a49822..a688d5ff806e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -4100,7 +4100,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
i_mmap_unlock_read(mapping);
out:
xas_destroy(&xas);
- if (old_order == HPAGE_PMD_ORDER)
+ if (is_pmd_order(old_order))
count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
count_mthp_stat(old_order, !ret ? MTHP_STAT_SPLIT : MTHP_STAT_SPLIT_FAILED);
return ret;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index c85d7381adb5..2ef4b972470b 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1533,7 +1533,7 @@ static enum scan_result try_collapse_pte_mapped_thp(struct mm_struct *mm, unsign
if (IS_ERR(folio))
return SCAN_PAGE_NULL;
- if (folio_order(folio) != HPAGE_PMD_ORDER) {
+ if (!is_pmd_order(folio_order(folio))) {
result = SCAN_PAGE_COMPOUND;
goto drop_folio;
}
@@ -2016,7 +2016,7 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
* we locked the first folio, then a THP might be there already.
* This will be discovered on the first iteration.
*/
- if (folio_order(folio) == HPAGE_PMD_ORDER &&
+ if (is_pmd_order(folio_order(folio)) &&
folio->index == start) {
/* Maybe PMD-mapped */
result = SCAN_PTE_MAPPED_HUGEPAGE;
@@ -2346,7 +2346,7 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm,
continue;
}
- if (folio_order(folio) == HPAGE_PMD_ORDER &&
+ if (is_pmd_order(folio_order(folio)) &&
folio->index == start) {
/* Maybe PMD-mapped */
result = SCAN_PTE_MAPPED_HUGEPAGE;
diff --git a/mm/memory.c b/mm/memory.c
index a1a364e1fdcd..cb76fa182eab 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5427,7 +5427,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *pa
if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
return ret;
- if (folio_order(folio) != HPAGE_PMD_ORDER)
+ if (!is_pmd_order(folio_order(folio)))
return ret;
page = &folio->page;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0e5175f1c767..e5528c35bbb8 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2449,7 +2449,7 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
/* filter "hugepage" allocation, unless from alloc_pages() */
- order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) {
+ is_pmd_order(order) && ilx != NO_INTERLEAVE_INDEX) {
/*
* For hugepage allocation and non-interleave policy which
* allows the current node (or other explicitly preferred
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d88c8c67ac0b..96ffb47bcfee 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -687,7 +687,7 @@ static inline unsigned int order_to_pindex(int migratetype, int order)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
bool movable;
if (order > PAGE_ALLOC_COSTLY_ORDER) {
- VM_BUG_ON(order != HPAGE_PMD_ORDER);
+ VM_BUG_ON(!is_pmd_order(order));
movable = migratetype == MIGRATE_MOVABLE;
@@ -719,7 +719,7 @@ static inline bool pcp_allowed_order(unsigned int order)
if (order <= PAGE_ALLOC_COSTLY_ORDER)
return true;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (order == HPAGE_PMD_ORDER)
+ if (is_pmd_order(order))
return true;
#endif
return false;
diff --git a/mm/shmem.c b/mm/shmem.c
index cfed6c3ff853..ba74803c7518 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -5558,8 +5558,7 @@ static ssize_t thpsize_shmem_enabled_store(struct kobject *kobj,
spin_unlock(&huge_shmem_orders_lock);
} else if (sysfs_streq(buf, "inherit")) {
/* Do not override huge allocation policy with non-PMD sized mTHP */
- if (shmem_huge == SHMEM_HUGE_FORCE &&
- order != HPAGE_PMD_ORDER)
+ if (shmem_huge == SHMEM_HUGE_FORCE && !is_pmd_order(order))
return -EINVAL;
spin_lock(&huge_shmem_orders_lock);
--
2.53.0
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH mm-unstable v2 3/5] mm/khugepaged: define COLLAPSE_MAX_PTES_LIMIT as HPAGE_PMD_NR - 1
2026-02-26 1:29 [PATCH mm-unstable v2 0/5] mm: khugepaged cleanups and mTHP prerequisites Nico Pache
2026-02-26 1:29 ` [PATCH mm-unstable v2 1/5] mm: consolidate anonymous folio PTE mapping into helpers Nico Pache
2026-02-26 1:29 ` [PATCH mm-unstable v2 2/5] mm: introduce is_pmd_order helper Nico Pache
@ 2026-02-26 1:29 ` Nico Pache
2026-02-26 1:29 ` [PATCH mm-unstable v2 4/5] mm/khugepaged: rename hpage_collapse_* to collapse_* Nico Pache
2026-02-26 1:29 ` [PATCH mm-unstable v2 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd() Nico Pache
4 siblings, 0 replies; 6+ messages in thread
From: Nico Pache @ 2026-02-26 1:29 UTC (permalink / raw)
To: linux-kernel, linux-mm
Cc: aarcange, akpm, anshuman.khandual, apopple, baohua, baolin.wang,
byungchul, catalin.marinas, cl, corbet, dave.hansen, david,
dev.jain, gourry, hannes, hughd, jackmanb, jack, jannh, jglisse,
joshua.hahnjy, kas, lance.yang, Liam.Howlett, lorenzo.stoakes,
mathieu.desnoyers, matthew.brost, mhiramat, mhocko, npache,
peterx, pfalcato, rakie.kim, raquini, rdunlap, richard.weiyang,
rientjes, rostedt, rppt, ryan.roberts, shivankg, sunnanyong,
surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang,
ziy, zokeefe
The value (HPAGE_PMD_NR - 1) is used often in the khugepaged code to
signify the limit of the max_ptes_* values. Add a define for this to
increase code readability and reuse.
Acked-by: Pedro Falcato <pfalcato@suse.de>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Nico Pache <npache@redhat.com>
---
mm/khugepaged.c | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 2ef4b972470b..4615f34911d1 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -89,6 +89,7 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
*
* Note that these are only respected if collapse was initiated by khugepaged.
*/
+#define COLLAPSE_MAX_PTES_LIMIT (HPAGE_PMD_NR - 1)
unsigned int khugepaged_max_ptes_none __read_mostly;
static unsigned int khugepaged_max_ptes_swap __read_mostly;
static unsigned int khugepaged_max_ptes_shared __read_mostly;
@@ -256,7 +257,7 @@ static ssize_t max_ptes_none_store(struct kobject *kobj,
unsigned long max_ptes_none;
err = kstrtoul(buf, 10, &max_ptes_none);
- if (err || max_ptes_none > HPAGE_PMD_NR - 1)
+ if (err || max_ptes_none > COLLAPSE_MAX_PTES_LIMIT)
return -EINVAL;
khugepaged_max_ptes_none = max_ptes_none;
@@ -281,7 +282,7 @@ static ssize_t max_ptes_swap_store(struct kobject *kobj,
unsigned long max_ptes_swap;
err = kstrtoul(buf, 10, &max_ptes_swap);
- if (err || max_ptes_swap > HPAGE_PMD_NR - 1)
+ if (err || max_ptes_swap > COLLAPSE_MAX_PTES_LIMIT)
return -EINVAL;
khugepaged_max_ptes_swap = max_ptes_swap;
@@ -307,7 +308,7 @@ static ssize_t max_ptes_shared_store(struct kobject *kobj,
unsigned long max_ptes_shared;
err = kstrtoul(buf, 10, &max_ptes_shared);
- if (err || max_ptes_shared > HPAGE_PMD_NR - 1)
+ if (err || max_ptes_shared > COLLAPSE_MAX_PTES_LIMIT)
return -EINVAL;
khugepaged_max_ptes_shared = max_ptes_shared;
@@ -379,7 +380,7 @@ int __init khugepaged_init(void)
return -ENOMEM;
khugepaged_pages_to_scan = HPAGE_PMD_NR * 8;
- khugepaged_max_ptes_none = HPAGE_PMD_NR - 1;
+ khugepaged_max_ptes_none = COLLAPSE_MAX_PTES_LIMIT;
khugepaged_max_ptes_swap = HPAGE_PMD_NR / 8;
khugepaged_max_ptes_shared = HPAGE_PMD_NR / 2;
--
2.53.0
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH mm-unstable v2 4/5] mm/khugepaged: rename hpage_collapse_* to collapse_*
2026-02-26 1:29 [PATCH mm-unstable v2 0/5] mm: khugepaged cleanups and mTHP prerequisites Nico Pache
` (2 preceding siblings ...)
2026-02-26 1:29 ` [PATCH mm-unstable v2 3/5] mm/khugepaged: define COLLAPSE_MAX_PTES_LIMIT as HPAGE_PMD_NR - 1 Nico Pache
@ 2026-02-26 1:29 ` Nico Pache
2026-02-26 1:29 ` [PATCH mm-unstable v2 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd() Nico Pache
4 siblings, 0 replies; 6+ messages in thread
From: Nico Pache @ 2026-02-26 1:29 UTC (permalink / raw)
To: linux-kernel, linux-mm
Cc: aarcange, akpm, anshuman.khandual, apopple, baohua, baolin.wang,
byungchul, catalin.marinas, cl, corbet, dave.hansen, david,
dev.jain, gourry, hannes, hughd, jackmanb, jack, jannh, jglisse,
joshua.hahnjy, kas, lance.yang, Liam.Howlett, lorenzo.stoakes,
mathieu.desnoyers, matthew.brost, mhiramat, mhocko, npache,
peterx, pfalcato, rakie.kim, raquini, rdunlap, richard.weiyang,
rientjes, rostedt, rppt, ryan.roberts, shivankg, sunnanyong,
surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang,
ziy, zokeefe
The hpage_collapse functions describe functions used by madvise_collapse
and khugepaged. remove the unnecessary hpage prefix to shorten the
function name.
Reviewed-by: Wei Yang <richard.weiyang@gmail.com>
Reviewed-by: Lance Yang <lance.yang@linux.dev>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Signed-off-by: Nico Pache <npache@redhat.com>
---
mm/khugepaged.c | 71 +++++++++++++++++++++++--------------------------
mm/mremap.c | 2 +-
2 files changed, 35 insertions(+), 38 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 4615f34911d1..64086488ca01 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -392,14 +392,14 @@ void __init khugepaged_destroy(void)
kmem_cache_destroy(mm_slot_cache);
}
-static inline int hpage_collapse_test_exit(struct mm_struct *mm)
+static inline int collapse_test_exit(struct mm_struct *mm)
{
return atomic_read(&mm->mm_users) == 0;
}
-static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm)
+static inline int collapse_test_exit_or_disable(struct mm_struct *mm)
{
- return hpage_collapse_test_exit(mm) ||
+ return collapse_test_exit(mm) ||
mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm);
}
@@ -433,7 +433,7 @@ void __khugepaged_enter(struct mm_struct *mm)
int wakeup;
/* __khugepaged_exit() must not run from under us */
- VM_BUG_ON_MM(hpage_collapse_test_exit(mm), mm);
+ VM_BUG_ON_MM(collapse_test_exit(mm), mm);
if (unlikely(mm_flags_test_and_set(MMF_VM_HUGEPAGE, mm)))
return;
@@ -487,7 +487,7 @@ void __khugepaged_exit(struct mm_struct *mm)
} else if (slot) {
/*
* This is required to serialize against
- * hpage_collapse_test_exit() (which is guaranteed to run
+ * collapse_test_exit() (which is guaranteed to run
* under mmap sem read mode). Stop here (after we return all
* pagetables will be destroyed) until khugepaged has finished
* working on the pagetables under the mmap_lock.
@@ -582,7 +582,7 @@ static enum scan_result __collapse_huge_page_isolate(struct vm_area_struct *vma,
goto out;
}
- /* See hpage_collapse_scan_pmd(). */
+ /* See collapse_scan_pmd(). */
if (folio_maybe_mapped_shared(folio)) {
++shared;
if (cc->is_khugepaged &&
@@ -833,7 +833,7 @@ static struct collapse_control khugepaged_collapse_control = {
.is_khugepaged = true,
};
-static bool hpage_collapse_scan_abort(int nid, struct collapse_control *cc)
+static bool collapse_scan_abort(int nid, struct collapse_control *cc)
{
int i;
@@ -868,7 +868,7 @@ static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
}
#ifdef CONFIG_NUMA
-static int hpage_collapse_find_target_node(struct collapse_control *cc)
+static int collapse_find_target_node(struct collapse_control *cc)
{
int nid, target_node = 0, max_value = 0;
@@ -887,7 +887,7 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc)
return target_node;
}
#else
-static int hpage_collapse_find_target_node(struct collapse_control *cc)
+static int collapse_find_target_node(struct collapse_control *cc)
{
return 0;
}
@@ -906,7 +906,7 @@ static enum scan_result hugepage_vma_revalidate(struct mm_struct *mm, unsigned l
enum tva_type type = cc->is_khugepaged ? TVA_KHUGEPAGED :
TVA_FORCED_COLLAPSE;
- if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
+ if (unlikely(collapse_test_exit_or_disable(mm)))
return SCAN_ANY_PROCESS;
*vmap = vma = find_vma(mm, address);
@@ -977,7 +977,7 @@ static enum scan_result check_pmd_still_valid(struct mm_struct *mm,
/*
* Bring missing pages in from swap, to complete THP collapse.
- * Only done if hpage_collapse_scan_pmd believes it is worthwhile.
+ * Only done if khugepaged_scan_pmd believes it is worthwhile.
*
* Called and returns without pte mapped or spinlocks held.
* Returns result: if not SCAN_SUCCEED, mmap_lock has been released.
@@ -1063,7 +1063,7 @@ static enum scan_result alloc_charge_folio(struct folio **foliop, struct mm_stru
{
gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() :
GFP_TRANSHUGE);
- int node = hpage_collapse_find_target_node(cc);
+ int node = collapse_find_target_node(cc);
struct folio *folio;
folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask);
@@ -1241,10 +1241,9 @@ static enum scan_result collapse_huge_page(struct mm_struct *mm, unsigned long a
return result;
}
-static enum scan_result hpage_collapse_scan_pmd(struct mm_struct *mm,
- struct vm_area_struct *vma, unsigned long start_addr,
- bool *mmap_locked, unsigned int *cur_progress,
- struct collapse_control *cc)
+static enum scan_result collapse_scan_pmd(struct mm_struct *mm,
+ struct vm_area_struct *vma, unsigned long start_addr, bool *mmap_locked,
+ unsigned int *cur_progress, struct collapse_control *cc)
{
pmd_t *pmd;
pte_t *pte, *_pte;
@@ -1366,7 +1365,7 @@ static enum scan_result hpage_collapse_scan_pmd(struct mm_struct *mm,
* hit record.
*/
node = folio_nid(folio);
- if (hpage_collapse_scan_abort(node, cc)) {
+ if (collapse_scan_abort(node, cc)) {
result = SCAN_SCAN_ABORT;
goto out_unmap;
}
@@ -1432,7 +1431,7 @@ static void collect_mm_slot(struct mm_slot *slot)
lockdep_assert_held(&khugepaged_mm_lock);
- if (hpage_collapse_test_exit(mm)) {
+ if (collapse_test_exit(mm)) {
/* free mm_slot */
hash_del(&slot->hash);
list_del(&slot->mm_node);
@@ -1787,7 +1786,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
if (find_pmd_or_thp_or_none(mm, addr, &pmd) != SCAN_SUCCEED)
continue;
- if (hpage_collapse_test_exit(mm))
+ if (collapse_test_exit(mm))
continue;
if (!file_backed_vma_is_retractable(vma))
@@ -2305,9 +2304,9 @@ static enum scan_result collapse_file(struct mm_struct *mm, unsigned long addr,
return result;
}
-static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm,
- unsigned long addr, struct file *file, pgoff_t start,
- unsigned int *cur_progress, struct collapse_control *cc)
+static enum scan_result collapse_scan_file(struct mm_struct *mm, unsigned long addr,
+ struct file *file, pgoff_t start, unsigned int *cur_progress,
+ struct collapse_control *cc)
{
struct folio *folio = NULL;
struct address_space *mapping = file->f_mapping;
@@ -2362,7 +2361,7 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm,
}
node = folio_nid(folio);
- if (hpage_collapse_scan_abort(node, cc)) {
+ if (collapse_scan_abort(node, cc)) {
result = SCAN_SCAN_ABORT;
folio_put(folio);
break;
@@ -2418,7 +2417,7 @@ static enum scan_result hpage_collapse_scan_file(struct mm_struct *mm,
return result;
}
-static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result *result,
+static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *result,
struct collapse_control *cc)
__releases(&khugepaged_mm_lock)
__acquires(&khugepaged_mm_lock)
@@ -2453,7 +2452,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
goto breakouterloop_mmap_lock;
progress++;
- if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
+ if (unlikely(collapse_test_exit_or_disable(mm)))
goto breakouterloop;
vma_iter_init(&vmi, mm, khugepaged_scan.address);
@@ -2461,7 +2460,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
unsigned long hstart, hend;
cond_resched();
- if (unlikely(hpage_collapse_test_exit_or_disable(mm))) {
+ if (unlikely(collapse_test_exit_or_disable(mm))) {
progress++;
break;
}
@@ -2484,7 +2483,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
unsigned int cur_progress = 0;
cond_resched();
- if (unlikely(hpage_collapse_test_exit_or_disable(mm)))
+ if (unlikely(collapse_test_exit_or_disable(mm)))
goto breakouterloop;
VM_BUG_ON(khugepaged_scan.address < hstart ||
@@ -2497,13 +2496,13 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
mmap_read_unlock(mm);
mmap_locked = false;
- *result = hpage_collapse_scan_file(mm,
+ *result = collapse_scan_file(mm,
khugepaged_scan.address, file, pgoff,
&cur_progress, cc);
fput(file);
if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
mmap_read_lock(mm);
- if (hpage_collapse_test_exit_or_disable(mm))
+ if (collapse_test_exit_or_disable(mm))
goto breakouterloop;
*result = try_collapse_pte_mapped_thp(mm,
khugepaged_scan.address, false);
@@ -2512,7 +2511,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
mmap_read_unlock(mm);
}
} else {
- *result = hpage_collapse_scan_pmd(mm, vma,
+ *result = collapse_scan_pmd(mm, vma,
khugepaged_scan.address, &mmap_locked,
&cur_progress, cc);
}
@@ -2546,7 +2545,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, enum scan_result
* Release the current mm_slot if this mm is about to die, or
* if we scanned all vmas of this mm, or THP got disabled.
*/
- if (hpage_collapse_test_exit_or_disable(mm) || !vma) {
+ if (collapse_test_exit_or_disable(mm) || !vma) {
/*
* Make sure that if mm_users is reaching zero while
* khugepaged runs here, khugepaged_exit will find
@@ -2599,8 +2598,8 @@ static void khugepaged_do_scan(struct collapse_control *cc)
pass_through_head++;
if (khugepaged_has_work() &&
pass_through_head < 2)
- progress += khugepaged_scan_mm_slot(pages - progress,
- &result, cc);
+ progress += collapse_scan_mm_slot(pages - progress,
+ &result, cc);
else
progress = pages;
spin_unlock(&khugepaged_mm_lock);
@@ -2844,8 +2843,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
mmap_read_unlock(mm);
mmap_locked = false;
*lock_dropped = true;
- result = hpage_collapse_scan_file(mm, addr, file, pgoff,
- NULL, cc);
+ result = collapse_scan_file(mm, addr, file, pgoff, NULL, cc);
if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
mapping_can_writeback(file->f_mapping)) {
@@ -2859,8 +2857,7 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
}
fput(file);
} else {
- result = hpage_collapse_scan_pmd(mm, vma, addr,
- &mmap_locked, NULL, cc);
+ result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, NULL, cc);
}
if (!mmap_locked)
*lock_dropped = true;
diff --git a/mm/mremap.c b/mm/mremap.c
index 2be876a70cc0..eb222af91c2d 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -244,7 +244,7 @@ static int move_ptes(struct pagetable_move_control *pmc,
goto out;
}
/*
- * Now new_pte is none, so hpage_collapse_scan_file() path can not find
+ * Now new_pte is none, so collapse_scan_file() path can not find
* this by traversing file->f_mapping, so there is no concurrency with
* retract_page_tables(). In addition, we already hold the exclusive
* mmap_lock, so this new_pte page is stable, so there is no need to get
--
2.53.0
^ permalink raw reply [flat|nested] 6+ messages in thread* [PATCH mm-unstable v2 5/5] mm/khugepaged: unify khugepaged and madv_collapse with collapse_single_pmd()
2026-02-26 1:29 [PATCH mm-unstable v2 0/5] mm: khugepaged cleanups and mTHP prerequisites Nico Pache
` (3 preceding siblings ...)
2026-02-26 1:29 ` [PATCH mm-unstable v2 4/5] mm/khugepaged: rename hpage_collapse_* to collapse_* Nico Pache
@ 2026-02-26 1:29 ` Nico Pache
4 siblings, 0 replies; 6+ messages in thread
From: Nico Pache @ 2026-02-26 1:29 UTC (permalink / raw)
To: linux-kernel, linux-mm
Cc: aarcange, akpm, anshuman.khandual, apopple, baohua, baolin.wang,
byungchul, catalin.marinas, cl, corbet, dave.hansen, david,
dev.jain, gourry, hannes, hughd, jackmanb, jack, jannh, jglisse,
joshua.hahnjy, kas, lance.yang, Liam.Howlett, lorenzo.stoakes,
mathieu.desnoyers, matthew.brost, mhiramat, mhocko, npache,
peterx, pfalcato, rakie.kim, raquini, rdunlap, richard.weiyang,
rientjes, rostedt, rppt, ryan.roberts, shivankg, sunnanyong,
surenb, thomas.hellstrom, tiwai, usamaarif642, vbabka,
vishal.moola, wangkefeng.wang, will, willy, yang, ying.huang,
ziy, zokeefe
The khugepaged daemon and madvise_collapse have two different
implementations that do almost the same thing.
Create collapse_single_pmd to increase code reuse and create an entry
point to these two users.
Refactor madvise_collapse and collapse_scan_mm_slot to use the new
collapse_single_pmd function. This introduces a minor behavioral change
that is most likely an undiscovered bug. The current implementation of
khugepaged tests collapse_test_exit_or_disable before calling
collapse_pte_mapped_thp, but we weren't doing it in the madvise_collapse
case. By unifying these two callers madvise_collapse now also performs
this check. We also modify the return value to be SCAN_ANY_PROCESS which
properly indicates that this process is no longer valid to operate on.
We also guard the khugepaged_pages_collapsed variable to ensure its only
incremented for khugepaged.
Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Signed-off-by: Nico Pache <npache@redhat.com>
---
mm/khugepaged.c | 128 ++++++++++++++++++++++++++----------------------
1 file changed, 69 insertions(+), 59 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 64086488ca01..0058970d4579 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2417,6 +2417,70 @@ static enum scan_result collapse_scan_file(struct mm_struct *mm, unsigned long a
return result;
}
+/*
+ * Try to collapse a single PMD starting at a PMD aligned addr, and return
+ * the results.
+ */
+static enum scan_result collapse_single_pmd(unsigned long addr,
+ struct vm_area_struct *vma, bool *mmap_locked,
+ unsigned int *cur_progress, struct collapse_control *cc)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ bool triggered_wb = false;
+ enum scan_result result;
+ struct file *file;
+ pgoff_t pgoff;
+
+ if (vma_is_anonymous(vma)) {
+ result = collapse_scan_pmd(mm, vma, addr, mmap_locked, cur_progress, cc);
+ goto end;
+ }
+
+ file = get_file(vma->vm_file);
+ pgoff = linear_page_index(vma, addr);
+
+ mmap_read_unlock(mm);
+ *mmap_locked = false;
+retry:
+ result = collapse_scan_file(mm, addr, file, pgoff, cur_progress, cc);
+
+ /*
+ * For MADV_COLLAPSE, when encountering dirty pages, try to writeback,
+ * then retry the collapse one time.
+ */
+ if (!cc->is_khugepaged && result == SCAN_PAGE_DIRTY_OR_WRITEBACK &&
+ triggered_wb && mapping_can_writeback(file->f_mapping)) {
+ const loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
+ const loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
+
+ filemap_write_and_wait_range(file->f_mapping, lstart, lend);
+ triggered_wb = true;
+ goto retry;
+ }
+ fput(file);
+
+ if (result != SCAN_PTE_MAPPED_HUGEPAGE)
+ goto end;
+
+ mmap_read_lock(mm);
+ *mmap_locked = true;
+ if (collapse_test_exit_or_disable(mm)) {
+ mmap_read_unlock(mm);
+ *mmap_locked = false;
+ return SCAN_ANY_PROCESS;
+ }
+ result = try_collapse_pte_mapped_thp(mm, addr, !cc->is_khugepaged);
+ if (result == SCAN_PMD_MAPPED)
+ result = SCAN_SUCCEED;
+ mmap_read_unlock(mm);
+ *mmap_locked = false;
+
+end:
+ if (cc->is_khugepaged && result == SCAN_SUCCEED)
+ ++khugepaged_pages_collapsed;
+ return result;
+}
+
static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *result,
struct collapse_control *cc)
__releases(&khugepaged_mm_lock)
@@ -2489,36 +2553,9 @@ static unsigned int collapse_scan_mm_slot(unsigned int pages, enum scan_result *
VM_BUG_ON(khugepaged_scan.address < hstart ||
khugepaged_scan.address + HPAGE_PMD_SIZE >
hend);
- if (!vma_is_anonymous(vma)) {
- struct file *file = get_file(vma->vm_file);
- pgoff_t pgoff = linear_page_index(vma,
- khugepaged_scan.address);
-
- mmap_read_unlock(mm);
- mmap_locked = false;
- *result = collapse_scan_file(mm,
- khugepaged_scan.address, file, pgoff,
- &cur_progress, cc);
- fput(file);
- if (*result == SCAN_PTE_MAPPED_HUGEPAGE) {
- mmap_read_lock(mm);
- if (collapse_test_exit_or_disable(mm))
- goto breakouterloop;
- *result = try_collapse_pte_mapped_thp(mm,
- khugepaged_scan.address, false);
- if (*result == SCAN_PMD_MAPPED)
- *result = SCAN_SUCCEED;
- mmap_read_unlock(mm);
- }
- } else {
- *result = collapse_scan_pmd(mm, vma,
- khugepaged_scan.address, &mmap_locked,
- &cur_progress, cc);
- }
-
- if (*result == SCAN_SUCCEED)
- ++khugepaged_pages_collapsed;
+ *result = collapse_single_pmd(khugepaged_scan.address,
+ vma, &mmap_locked, &cur_progress, cc);
/* move to next address */
khugepaged_scan.address += HPAGE_PMD_SIZE;
progress += cur_progress;
@@ -2819,13 +2856,12 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
for (addr = hstart; addr < hend; addr += HPAGE_PMD_SIZE) {
enum scan_result result = SCAN_FAIL;
- bool triggered_wb = false;
-retry:
if (!mmap_locked) {
cond_resched();
mmap_read_lock(mm);
mmap_locked = true;
+ *lock_dropped = true;
result = hugepage_vma_revalidate(mm, addr, false, &vma,
cc);
if (result != SCAN_SUCCEED) {
@@ -2836,46 +2872,20 @@ int madvise_collapse(struct vm_area_struct *vma, unsigned long start,
hend = min(hend, vma->vm_end & HPAGE_PMD_MASK);
}
mmap_assert_locked(mm);
- if (!vma_is_anonymous(vma)) {
- struct file *file = get_file(vma->vm_file);
- pgoff_t pgoff = linear_page_index(vma, addr);
-
- mmap_read_unlock(mm);
- mmap_locked = false;
- *lock_dropped = true;
- result = collapse_scan_file(mm, addr, file, pgoff, NULL, cc);
- if (result == SCAN_PAGE_DIRTY_OR_WRITEBACK && !triggered_wb &&
- mapping_can_writeback(file->f_mapping)) {
- loff_t lstart = (loff_t)pgoff << PAGE_SHIFT;
- loff_t lend = lstart + HPAGE_PMD_SIZE - 1;
+ result = collapse_single_pmd(addr, vma, &mmap_locked, NULL, cc);
- filemap_write_and_wait_range(file->f_mapping, lstart, lend);
- triggered_wb = true;
- fput(file);
- goto retry;
- }
- fput(file);
- } else {
- result = collapse_scan_pmd(mm, vma, addr, &mmap_locked, NULL, cc);
- }
if (!mmap_locked)
*lock_dropped = true;
-handle_result:
switch (result) {
case SCAN_SUCCEED:
case SCAN_PMD_MAPPED:
++thps;
break;
- case SCAN_PTE_MAPPED_HUGEPAGE:
- BUG_ON(mmap_locked);
- mmap_read_lock(mm);
- result = try_collapse_pte_mapped_thp(mm, addr, true);
- mmap_read_unlock(mm);
- goto handle_result;
/* Whitelisted set of results where continuing OK */
case SCAN_NO_PTE_TABLE:
+ case SCAN_PTE_MAPPED_HUGEPAGE:
case SCAN_PTE_NON_PRESENT:
case SCAN_PTE_UFFD_WP:
case SCAN_LACK_REFERENCED_PAGE:
--
2.53.0
^ permalink raw reply [flat|nested] 6+ messages in thread