linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Oscar Salvador <osalvador@suse.de>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@kernel.org>,
	Michal Hocko <mhocko@suse.com>,
	Vlastimil Babka <vbabka@kernel.org>,
	Muchun Song <muchun.song@linux.dev>,
	Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	Oscar Salvador <osalvador@suse.de>
Subject: [RFC PATCH 7/7] mm: Make /proc/pid/pagemap use the new generic pagewalk API
Date: Sun, 12 Apr 2026 19:42:44 +0200	[thread overview]
Message-ID: <20260412174244.133715-8-osalvador@suse.de> (raw)
In-Reply-To: <20260412174244.133715-1-osalvador@suse.de>

Have /proc/pid/pagemap make use of the new generic API, and remove
the code which was using the old one.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
---
 arch/x86/include/asm/pgtable.h |   4 +
 arch/x86/mm/pgtable.c          |  18 +-
 fs/proc/task_mmu.c             | 906 +++++++++++++++------------------
 include/linux/leafops.h        |  13 +
 include/linux/pgtable.h        |  30 ++
 mm/pgtable-generic.c           |  10 +
 6 files changed, 481 insertions(+), 500 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index a68ff339cd56..1d18f6177784 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1400,6 +1400,10 @@ static inline pud_t pudp_establish(struct vm_area_struct *vma,
 }
 #endif
 
+#define __HAVE_ARCH_PUDP_INVALIDATE_AD
+extern pud_t pudp_invalidate_ad(struct vm_area_struct *vma,
+				unsigned long address, pud_t *pudp);
+
 #define __HAVE_ARCH_PMDP_INVALIDATE_AD
 extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
 				unsigned long address, pmd_t *pmdp);
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 2e5ecfdce73c..828f5ca9195e 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -530,8 +530,22 @@ pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
 }
 #endif
 
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
-	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+#if (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) || \
+	defined CONFIG_HUGETLB_PAGE
+
+pud_t pudp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
+			 pud_t *pudp)
+{
+	VM_WARN_ON_ONCE(!pud_present(*pudp));
+
+	/*
+	 * No flush is necessary. Once an invalid PUD is established, the PUD's
+	 * access and dirty bits cannot be updated.
+	 */
+	return pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp));
+}
+
 pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
 		     pud_t *pudp)
 {
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 776e7a6baf00..6b6d5a39cd5a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1856,192 +1856,6 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 	return make_pme(frame, flags);
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr,
-		unsigned long end, struct vm_area_struct *vma,
-		struct pagemapread *pm)
-{
-	unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT;
-	u64 flags = 0, frame = 0;
-	pmd_t pmd = *pmdp;
-	struct page *page = NULL;
-	struct folio *folio = NULL;
-	int err = 0;
-
-	if (vma->vm_flags & VM_SOFTDIRTY)
-		flags |= PM_SOFT_DIRTY;
-
-	if (pmd_none(pmd))
-		goto populate_pagemap;
-
-	if (pmd_present(pmd)) {
-		page = pmd_page(pmd);
-
-		flags |= PM_PRESENT;
-		if (pmd_soft_dirty(pmd))
-			flags |= PM_SOFT_DIRTY;
-		if (pmd_uffd_wp(pmd))
-			flags |= PM_UFFD_WP;
-		if (pm->show_pfn)
-			frame = pmd_pfn(pmd) + idx;
-	} else if (thp_migration_supported()) {
-		const softleaf_t entry = softleaf_from_pmd(pmd);
-		unsigned long offset;
-
-		if (pm->show_pfn) {
-			if (softleaf_has_pfn(entry))
-				offset = softleaf_to_pfn(entry) + idx;
-			else
-				offset = swp_offset(entry) + idx;
-			frame = swp_type(entry) |
-				(offset << MAX_SWAPFILES_SHIFT);
-		}
-		flags |= PM_SWAP;
-		if (pmd_swp_soft_dirty(pmd))
-			flags |= PM_SOFT_DIRTY;
-		if (pmd_swp_uffd_wp(pmd))
-			flags |= PM_UFFD_WP;
-		VM_WARN_ON_ONCE(!pmd_is_migration_entry(pmd));
-		page = softleaf_to_page(entry);
-	}
-
-	if (page) {
-		folio = page_folio(page);
-		if (!folio_test_anon(folio))
-			flags |= PM_FILE;
-	}
-
-populate_pagemap:
-	for (; addr != end; addr += PAGE_SIZE, idx++) {
-		u64 cur_flags = flags;
-		pagemap_entry_t pme;
-
-		if (folio && (flags & PM_PRESENT) &&
-		    __folio_page_mapped_exclusively(folio, page))
-			cur_flags |= PM_MMAP_EXCLUSIVE;
-
-		pme = make_pme(frame, cur_flags);
-		err = add_to_pagemap(&pme, pm);
-		if (err)
-			break;
-		if (pm->show_pfn) {
-			if (flags & PM_PRESENT)
-				frame++;
-			else if (flags & PM_SWAP)
-				frame += (1 << MAX_SWAPFILES_SHIFT);
-		}
-	}
-	return err;
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
-			     struct mm_walk *walk)
-{
-	struct vm_area_struct *vma = walk->vma;
-	struct pagemapread *pm = walk->private;
-	spinlock_t *ptl;
-	pte_t *pte, *orig_pte;
-	int err = 0;
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	ptl = pmd_trans_huge_lock(pmdp, vma);
-	if (ptl) {
-		err = pagemap_pmd_range_thp(pmdp, addr, end, vma, pm);
-		spin_unlock(ptl);
-		return err;
-	}
-#endif
-
-	/*
-	 * We can assume that @vma always points to a valid one and @end never
-	 * goes beyond vma->vm_end.
-	 */
-	orig_pte = pte = pte_offset_map_lock(walk->mm, pmdp, addr, &ptl);
-	if (!pte) {
-		walk->action = ACTION_AGAIN;
-		return err;
-	}
-	for (; addr < end; pte++, addr += PAGE_SIZE) {
-		pagemap_entry_t pme;
-
-		pme = pte_to_pagemap_entry(pm, vma, addr, ptep_get(pte));
-		err = add_to_pagemap(&pme, pm);
-		if (err)
-			break;
-	}
-	pte_unmap_unlock(orig_pte, ptl);
-
-	cond_resched();
-
-	return err;
-}
-
-#ifdef CONFIG_HUGETLB_PAGE
-/* This function walks within one hugetlb entry in the single call */
-static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
-				 unsigned long addr, unsigned long end,
-				 struct mm_walk *walk)
-{
-	struct pagemapread *pm = walk->private;
-	struct vm_area_struct *vma = walk->vma;
-	u64 flags = 0, frame = 0;
-	spinlock_t *ptl;
-	int err = 0;
-	pte_t pte;
-
-	if (vma->vm_flags & VM_SOFTDIRTY)
-		flags |= PM_SOFT_DIRTY;
-
-	ptl = huge_pte_lock(hstate_vma(vma), walk->mm, ptep);
-	pte = huge_ptep_get(walk->mm, addr, ptep);
-	if (pte_present(pte)) {
-		struct folio *folio = page_folio(pte_page(pte));
-
-		if (!folio_test_anon(folio))
-			flags |= PM_FILE;
-
-		if (!folio_maybe_mapped_shared(folio) &&
-		    !hugetlb_pmd_shared(ptep))
-			flags |= PM_MMAP_EXCLUSIVE;
-
-		if (huge_pte_uffd_wp(pte))
-			flags |= PM_UFFD_WP;
-
-		flags |= PM_PRESENT;
-		if (pm->show_pfn)
-			frame = pte_pfn(pte) +
-				((addr & ~hmask) >> PAGE_SHIFT);
-	} else if (pte_swp_uffd_wp_any(pte)) {
-		flags |= PM_UFFD_WP;
-	}
-
-	for (; addr != end; addr += PAGE_SIZE) {
-		pagemap_entry_t pme = make_pme(frame, flags);
-
-		err = add_to_pagemap(&pme, pm);
-		if (err)
-			break;
-		if (pm->show_pfn && (flags & PM_PRESENT))
-			frame++;
-	}
-
-	spin_unlock(ptl);
-	cond_resched();
-
-	return err;
-}
-#else
-#define pagemap_hugetlb_range	NULL
-#endif /* HUGETLB_PAGE */
-
-static const struct mm_walk_ops pagemap_ops = {
-	.pmd_entry	= pagemap_pmd_range,
-	.pte_hole	= pagemap_pte_hole,
-	.hugetlb_entry	= pagemap_hugetlb_range,
-	.walk_lock	= PGWALK_RDLOCK,
-};
-
 /*
  * /proc/pid/pagemap - an array mapping virtual pages to pfns
  *
@@ -2070,99 +1884,6 @@ static const struct mm_walk_ops pagemap_ops = {
  * determine which areas of memory are actually mapped and llseek to
  * skip over unmapped regions.
  */
-static ssize_t pagemap_read(struct file *file, char __user *buf,
-			    size_t count, loff_t *ppos)
-{
-	struct mm_struct *mm = file->private_data;
-	struct pagemapread pm;
-	unsigned long src;
-	unsigned long svpfn;
-	unsigned long start_vaddr;
-	unsigned long end_vaddr;
-	int ret = 0, copied = 0;
-
-	if (!mm || !mmget_not_zero(mm))
-		goto out;
-
-	ret = -EINVAL;
-	/* file position must be aligned */
-	if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
-		goto out_mm;
-
-	ret = 0;
-	if (!count)
-		goto out_mm;
-
-	/* do not disclose physical addresses: attack vector */
-	pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
-
-	pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
-	pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
-	ret = -ENOMEM;
-	if (!pm.buffer)
-		goto out_mm;
-
-	src = *ppos;
-	svpfn = src / PM_ENTRY_BYTES;
-	end_vaddr = mm->task_size;
-
-	/* watch out for wraparound */
-	start_vaddr = end_vaddr;
-	if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) {
-		unsigned long end;
-
-		ret = mmap_read_lock_killable(mm);
-		if (ret)
-			goto out_free;
-		start_vaddr = untagged_addr_remote(mm, svpfn << PAGE_SHIFT);
-		mmap_read_unlock(mm);
-
-		end = start_vaddr + ((count / PM_ENTRY_BYTES) << PAGE_SHIFT);
-		if (end >= start_vaddr && end < mm->task_size)
-			end_vaddr = end;
-	}
-
-	/* Ensure the address is inside the task */
-	if (start_vaddr > mm->task_size)
-		start_vaddr = end_vaddr;
-
-	ret = 0;
-	while (count && (start_vaddr < end_vaddr)) {
-		int len;
-		unsigned long end;
-
-		pm.pos = 0;
-		end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
-		/* overflow ? */
-		if (end < start_vaddr || end > end_vaddr)
-			end = end_vaddr;
-		ret = mmap_read_lock_killable(mm);
-		if (ret)
-			goto out_free;
-		ret = walk_page_range(mm, start_vaddr, end, &pagemap_ops, &pm);
-		mmap_read_unlock(mm);
-		start_vaddr = end;
-
-		len = min(count, PM_ENTRY_BYTES * pm.pos);
-		if (copy_to_user(buf, pm.buffer, len)) {
-			ret = -EFAULT;
-			goto out_free;
-		}
-		copied += len;
-		buf += len;
-		count -= len;
-	}
-	*ppos += copied;
-	if (!ret || ret == PM_END_OF_BUFFER)
-		ret = copied;
-
-out_free:
-	kfree(pm.buffer);
-out_mm:
-	mmput(mm);
-out:
-	return ret;
-}
 
 static int pagemap_open(struct inode *inode, struct file *file)
 {
@@ -2267,6 +1988,23 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma,
 	}
 }
 
+#ifdef CONFIG_HUGETLB_PAGE
+static void make_uffd_wp_pud(struct vm_area_struct *vma,
+			     unsigned long addr, pud_t *pudp)
+{
+	pud_t old, pud = *pudp;
+
+	if (pud_present(pud)) {
+		old = pudp_invalidate_ad(vma, addr, pudp);
+		pud = pud_mkuffd_wp(old);
+		set_pud_at(vma->vm_mm, addr, pudp, pud);
+	} else if (pud_is_migration_entry(pud)) {
+		pud = pud_swp_mkuffd_wp(pud);
+		set_pud_at(vma->vm_mm, addr, pudp, pud);
+	}
+}
+#endif
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static unsigned long pagemap_thp_category(struct pagemap_scan_private *p,
 					  struct vm_area_struct *vma,
@@ -2539,216 +2277,6 @@ static int pagemap_scan_output(unsigned long categories,
 	return ret;
 }
 
-static int pagemap_scan_thp_entry(pmd_t *pmd, unsigned long start,
-				  unsigned long end, struct mm_walk *walk)
-{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	struct pagemap_scan_private *p = walk->private;
-	struct vm_area_struct *vma = walk->vma;
-	unsigned long categories;
-	spinlock_t *ptl;
-	int ret = 0;
-
-	ptl = pmd_trans_huge_lock(pmd, vma);
-	if (!ptl)
-		return -ENOENT;
-
-	categories = p->cur_vma_category |
-		     pagemap_thp_category(p, vma, start, *pmd);
-
-	if (!pagemap_scan_is_interesting_page(categories, p))
-		goto out_unlock;
-
-	ret = pagemap_scan_output(categories, p, start, &end);
-	if (start == end)
-		goto out_unlock;
-
-	if (~p->arg.flags & PM_SCAN_WP_MATCHING)
-		goto out_unlock;
-	if (~categories & PAGE_IS_WRITTEN)
-		goto out_unlock;
-
-	/*
-	 * Break huge page into small pages if the WP operation
-	 * needs to be performed on a portion of the huge page.
-	 */
-	if (end != start + HPAGE_SIZE) {
-		spin_unlock(ptl);
-		split_huge_pmd(vma, pmd, start);
-		pagemap_scan_backout_range(p, start, end);
-		/* Report as if there was no THP */
-		return -ENOENT;
-	}
-
-	make_uffd_wp_pmd(vma, start, pmd);
-	flush_tlb_range(vma, start, end);
-out_unlock:
-	spin_unlock(ptl);
-	return ret;
-#else /* !CONFIG_TRANSPARENT_HUGEPAGE */
-	return -ENOENT;
-#endif
-}
-
-static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
-				  unsigned long end, struct mm_walk *walk)
-{
-	struct pagemap_scan_private *p = walk->private;
-	struct vm_area_struct *vma = walk->vma;
-	unsigned long addr, flush_end = 0;
-	pte_t *pte, *start_pte;
-	spinlock_t *ptl;
-	int ret;
-
-	ret = pagemap_scan_thp_entry(pmd, start, end, walk);
-	if (ret != -ENOENT)
-		return ret;
-
-	ret = 0;
-	start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
-	if (!pte) {
-		walk->action = ACTION_AGAIN;
-		return 0;
-	}
-
-	lazy_mmu_mode_enable();
-
-	if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) {
-		/* Fast path for performing exclusive WP */
-		for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
-			pte_t ptent = ptep_get(pte);
-
-			if ((pte_present(ptent) && pte_uffd_wp(ptent)) ||
-			    pte_swp_uffd_wp_any(ptent))
-				continue;
-			make_uffd_wp_pte(vma, addr, pte, ptent);
-			if (!flush_end)
-				start = addr;
-			flush_end = addr + PAGE_SIZE;
-		}
-		goto flush_and_return;
-	}
-
-	if (!p->arg.category_anyof_mask && !p->arg.category_inverted &&
-	    p->arg.category_mask == PAGE_IS_WRITTEN &&
-	    p->arg.return_mask == PAGE_IS_WRITTEN) {
-		for (addr = start; addr < end; pte++, addr += PAGE_SIZE) {
-			unsigned long next = addr + PAGE_SIZE;
-			pte_t ptent = ptep_get(pte);
-
-			if ((pte_present(ptent) && pte_uffd_wp(ptent)) ||
-			    pte_swp_uffd_wp_any(ptent))
-				continue;
-			ret = pagemap_scan_output(p->cur_vma_category | PAGE_IS_WRITTEN,
-						  p, addr, &next);
-			if (next == addr)
-				break;
-			if (~p->arg.flags & PM_SCAN_WP_MATCHING)
-				continue;
-			make_uffd_wp_pte(vma, addr, pte, ptent);
-			if (!flush_end)
-				start = addr;
-			flush_end = next;
-		}
-		goto flush_and_return;
-	}
-
-	for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
-		pte_t ptent = ptep_get(pte);
-		unsigned long categories = p->cur_vma_category |
-					   pagemap_page_category(p, vma, addr, ptent);
-		unsigned long next = addr + PAGE_SIZE;
-
-		if (!pagemap_scan_is_interesting_page(categories, p))
-			continue;
-
-		ret = pagemap_scan_output(categories, p, addr, &next);
-		if (next == addr)
-			break;
-
-		if (~p->arg.flags & PM_SCAN_WP_MATCHING)
-			continue;
-		if (~categories & PAGE_IS_WRITTEN)
-			continue;
-
-		make_uffd_wp_pte(vma, addr, pte, ptent);
-		if (!flush_end)
-			start = addr;
-		flush_end = next;
-	}
-
-flush_and_return:
-	if (flush_end)
-		flush_tlb_range(vma, start, addr);
-
-	lazy_mmu_mode_disable();
-	pte_unmap_unlock(start_pte, ptl);
-
-	cond_resched();
-	return ret;
-}
-
-#ifdef CONFIG_HUGETLB_PAGE
-static int pagemap_scan_hugetlb_entry(pte_t *ptep, unsigned long hmask,
-				      unsigned long start, unsigned long end,
-				      struct mm_walk *walk)
-{
-	struct pagemap_scan_private *p = walk->private;
-	struct vm_area_struct *vma = walk->vma;
-	unsigned long categories;
-	spinlock_t *ptl;
-	int ret = 0;
-	pte_t pte;
-
-	if (~p->arg.flags & PM_SCAN_WP_MATCHING) {
-		/* Go the short route when not write-protecting pages. */
-
-		pte = huge_ptep_get(walk->mm, start, ptep);
-		categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
-
-		if (!pagemap_scan_is_interesting_page(categories, p))
-			return 0;
-
-		return pagemap_scan_output(categories, p, start, &end);
-	}
-
-	i_mmap_lock_write(vma->vm_file->f_mapping);
-	ptl = huge_pte_lock(hstate_vma(vma), vma->vm_mm, ptep);
-
-	pte = huge_ptep_get(walk->mm, start, ptep);
-	categories = p->cur_vma_category | pagemap_hugetlb_category(pte);
-
-	if (!pagemap_scan_is_interesting_page(categories, p))
-		goto out_unlock;
-
-	ret = pagemap_scan_output(categories, p, start, &end);
-	if (start == end)
-		goto out_unlock;
-
-	if (~categories & PAGE_IS_WRITTEN)
-		goto out_unlock;
-
-	if (end != start + HPAGE_SIZE) {
-		/* Partial HugeTLB page WP isn't possible. */
-		pagemap_scan_backout_range(p, start, end);
-		p->arg.walk_end = start;
-		ret = 0;
-		goto out_unlock;
-	}
-
-	make_uffd_wp_huge_pte(vma, start, ptep, pte);
-	flush_hugetlb_tlb_range(vma, start, end);
-
-out_unlock:
-	spin_unlock(ptl);
-	i_mmap_unlock_write(vma->vm_file->f_mapping);
-
-	return ret;
-}
-#else
-#define pagemap_scan_hugetlb_entry NULL
-#endif
-
 static int pagemap_scan_pte_hole(unsigned long addr, unsigned long end,
 				 int depth, struct mm_walk *walk)
 {
@@ -2773,13 +2301,6 @@ static int pagemap_scan_pte_hole(unsigned long addr, unsigned long end,
 	return ret;
 }
 
-static const struct mm_walk_ops pagemap_scan_ops = {
-	.test_walk = pagemap_scan_test_walk,
-	.pmd_entry = pagemap_scan_pmd_entry,
-	.pte_hole = pagemap_scan_pte_hole,
-	.hugetlb_entry = pagemap_scan_hugetlb_entry,
-};
-
 static int pagemap_scan_get_args(struct pm_scan_arg *arg,
 				 unsigned long uarg)
 {
@@ -2877,6 +2398,135 @@ static long pagemap_scan_flush_buffer(struct pagemap_scan_private *p)
 	return n;
 }
 
+static unsigned long pagemap_set_category(struct pagemap_scan_private *p,
+				   struct pt_range_walk *ptw,
+				   enum pt_range_walk_type type)
+{
+	unsigned long categories = 0;
+
+	if (ptw->present) {
+		categories |= PAGE_IS_PRESENT;
+
+		if (type == PTW_FOLIO && !PageAnon(ptw->page))
+			categories |= PAGE_IS_FILE;
+		if (type == PTW_PFN)
+			categories |= PAGE_IS_PFNZERO;
+	} else {
+		categories |= PAGE_IS_SWAPPED;
+	}
+
+	switch (ptw->level) {
+	case PTW_PUD_LEVEL:
+		if (ptw->present) {
+			if (!pud_uffd_wp(ptw->pud))
+				categories |= PAGE_IS_WRITTEN;
+			if (pud_soft_dirty(ptw->pud))
+				categories |= PAGE_IS_SOFT_DIRTY;
+		} else {
+			if (!pud_swp_uffd_wp(ptw->pud))
+				categories |= PAGE_IS_WRITTEN;
+			if (pud_swp_soft_dirty(ptw->pud))
+				categories |= PAGE_IS_SOFT_DIRTY;
+		}
+		break;
+	case PTW_PMD_LEVEL:
+		if (ptw->present) {
+			if (!pmd_uffd_wp(ptw->pmd))
+				categories |= PAGE_IS_WRITTEN;
+			if (pmd_soft_dirty(ptw->pmd))
+				categories |= PAGE_IS_SOFT_DIRTY;
+		} else {
+			const softleaf_t entry = softleaf_from_pmd(ptw->pmd);
+
+			if (softleaf_has_pfn(entry) &&
+			    !folio_test_anon(softleaf_to_folio(entry)))
+				categories |= PAGE_IS_FILE;
+			if (!pmd_swp_uffd_wp(ptw->pmd))
+				categories |= PAGE_IS_WRITTEN;
+			if (pmd_swp_soft_dirty(ptw->pmd))
+				categories |= PAGE_IS_SOFT_DIRTY;
+		}
+		break;
+	case PTW_PTE_LEVEL:
+		if (ptw->present) {
+			if (!pte_uffd_wp(ptw->pte))
+				categories |= PAGE_IS_WRITTEN;
+			if (pte_soft_dirty(ptw->pte))
+				categories |= PAGE_IS_SOFT_DIRTY;
+		} else {
+			if (!pte_swp_uffd_wp_any(ptw->pte))
+				categories |= PAGE_IS_WRITTEN;
+			if (pte_swp_soft_dirty(ptw->pte))
+				categories |= PAGE_IS_SOFT_DIRTY;
+		}
+		break;
+	}
+
+	return categories;
+}
+
+static int pagemap_scan_walk(struct vm_area_struct *vma, struct pagemap_scan_private *p,
+			      unsigned long addr)
+{
+	int ret = 0;
+	struct pt_range_walk ptw = {
+		.mm = vma->vm_mm
+	};
+	enum pt_range_walk_type type;
+	pt_type_flags_t flags = PT_TYPE_ALL;
+
+keep_walking:
+	type = pt_range_walk_start(&ptw, vma, addr, vma->vm_end, flags);
+	while (type != PTW_DONE) {
+		unsigned long categories = p->cur_vma_category |
+					   pagemap_set_category(p, &ptw, type);
+		unsigned long curr_addr = ptw.curr_addr;
+
+		if (pagemap_scan_is_interesting_page(categories, p)) {
+			unsigned long end;
+
+			end = ptw.next_addr;
+
+			if (~p->arg.flags & PM_SCAN_WP_MATCHING)
+				goto keep_walking;
+			if (~categories & PAGE_IS_WRITTEN)
+				goto keep_walking;
+
+			ret = pagemap_scan_output(categories, p, curr_addr, &end);
+			if (curr_addr == end)
+				goto out;
+
+			if (end != curr_addr + HPAGE_SIZE) {
+				if (is_vm_hugetlb_page(ptw.vma)) {
+					/* Partial HugeTLB page WP isn't possible. */
+					pagemap_scan_backout_range(p, curr_addr, end);
+					p->arg.walk_end = curr_addr;
+					ret = 0;
+					goto keep_walking;
+				}
+				if (ptw.level == PTW_PMD_LEVEL) {
+					pt_range_walk_done(&ptw);
+					split_huge_pmd(ptw.vma, ptw.pmdp, curr_addr);
+					pagemap_scan_backout_range(p, curr_addr, end);
+					/* Relaunch now that we split the pmd */
+					goto keep_walking;
+				}
+			}
+
+			if (ptw.level == PTW_PUD_LEVEL)
+				make_uffd_wp_pud(ptw.vma, curr_addr, ptw.pudp);
+			if (ptw.level == PTW_PMD_LEVEL)
+				make_uffd_wp_pmd(ptw.vma, curr_addr, ptw.pmdp);
+			if (ptw.level == PTW_PTE_LEVEL)
+				make_uffd_wp_pte(ptw.vma, curr_addr, ptw.ptep, ptw.pte);
+		}
+		type = pt_range_walk_next(&ptw, vma, vma->vm_start, vma->vm_end, flags);
+	}
+out:
+	pt_range_walk_done(&ptw);
+	return ret;
+}
+
 static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
 {
 	struct pagemap_scan_private p = {0};
@@ -2897,6 +2547,7 @@ static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
 	for (walk_start = p.arg.start; walk_start < p.arg.end;
 			walk_start = p.arg.walk_end) {
 		struct mmu_notifier_range range;
+		unsigned long next;
 		long n_out;
 
 		if (fatal_signal_pending(current)) {
@@ -2915,8 +2566,21 @@ static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
 			mmu_notifier_invalidate_range_start(&range);
 		}
 
-		ret = walk_page_range(mm, walk_start, p.arg.end,
-				      &pagemap_scan_ops, &p);
+		do {
+			struct vm_area_struct *vma = find_vma(mm, walk_start);
+
+			if (vma) {
+				ret = pagemap_scan_walk(vma, &p, walk_start);
+				if (ret)
+					break;
+				walk_start = min(p.arg.end, vma->vm_end);
+				next = walk_start;
+			} else {
+				walk_start = p.arg.end;
+				next = p.arg.end;
+			}
+
+		} while (next < p.arg.end);
 
 		if (p.arg.flags & PM_SCAN_WP_MATCHING)
 			mmu_notifier_invalidate_range_end(&range);
@@ -2950,6 +2614,251 @@ static long do_pagemap_scan(struct mm_struct *mm, unsigned long uarg)
 	return ret;
 }
 
+static int pagemap_read_walk_range(struct vm_area_struct *vma, unsigned long start,
+				    struct pagemapread *pm)
+{
+	int err = 0;
+	struct pt_range_walk ptw = {
+		.mm = vma->vm_mm
+	};
+	enum pt_range_walk_type type;
+	pt_type_flags_t wflags = PT_TYPE_ALL;
+	pte_t *ptep;
+
+	wflags &= ~(PT_TYPE_NONE|PT_TYPE_PFN);
+
+	type = pt_range_walk_start(&ptw, vma, start, vma->vm_end, wflags);
+	while (type != PTW_DONE) {
+		unsigned long end;
+		u64 frame = 0, flags = 0;
+		struct page *page = NULL;
+		struct folio *folio = NULL;
+
+		end = 0;
+		switch (ptw.level) {
+		case PTW_PUD_LEVEL:
+			end = pud_addr_end(start, vma->vm_end);
+			if (vma->vm_flags & VM_SOFTDIRTY)
+				flags |= PM_SOFT_DIRTY;
+
+			if (pud_present(ptw.pud)) {
+				page = pud_page(ptw.pud);
+				folio = page_folio(page);
+				flags |= PM_PRESENT;
+
+				if (!folio_test_anon(folio))
+					flags |= PM_FILE;
+
+				if (pm->show_pfn) {
+					unsigned long hmask = huge_page_mask(hstate_vma(vma));
+
+					frame = pud_pfn(ptw.pud) +
+						((start & ~hmask) >> PAGE_SHIFT);
+				}
+			} else if (pud_swp_uffd_wp(ptw.pud)) {
+				flags |= PM_UFFD_WP;
+			}
+			break;
+		case PTW_PMD_LEVEL:
+			unsigned int idx = (start & ~PMD_MASK) >> PAGE_SHIFT;
+
+			end = pmd_addr_end(start, vma->vm_end);
+			if (vma->vm_flags & VM_SOFTDIRTY)
+				flags |= PM_SOFT_DIRTY;
+
+			if (pmd_present(ptw.pmd)) {
+				page = pmd_page(ptw.pmd);
+				flags |= PM_PRESENT;
+
+				if (pmd_soft_dirty(ptw.pmd))
+					flags |= PM_SOFT_DIRTY;
+				if (pmd_uffd_wp(ptw.pmd))
+					flags |= PM_UFFD_WP;
+				if (pm->show_pfn)
+					frame = pmd_pfn(ptw.pmd) + idx;
+			} else if (thp_migration_supported() || IS_ENABLED(CONFIG_HUGETLB_PAGE)) {
+				const softleaf_t entry = softleaf_from_pmd(ptw.pmd);
+				unsigned long offset;
+
+				if (pm->show_pfn) {
+					if (softleaf_has_pfn(entry))
+						offset = softleaf_to_pfn(entry) + idx;
+					else
+						offset = swp_offset(entry) + idx;
+					frame = swp_type(entry) |
+						(offset << MAX_SWAPFILES_SHIFT);
+				}
+
+				if (!is_vm_hugetlb_page(vma))
+					flags |= PM_SWAP;
+				if (pmd_swp_soft_dirty(ptw.pmd))
+					flags |= PM_SOFT_DIRTY;
+				if (pmd_swp_uffd_wp(ptw.pmd))
+					flags |= PM_UFFD_WP;
+
+				VM_WARN_ON_ONCE(!pmd_is_migration_entry(ptw.pmd));
+				page = softleaf_to_page(entry);
+			}
+
+			if (page) {
+				folio = page_folio(page);
+				if (!folio_test_anon(folio))
+					flags |= PM_FILE;
+			}
+
+			break;
+		case PTW_PTE_LEVEL:
+			end = pmd_addr_end(start, vma->vm_end);
+			break;
+		}
+
+		if (ptw.level == PTW_PTE_LEVEL) {
+			ptep = ptw.ptep;
+			for (; start < end; ptep++, start += PAGE_SIZE) {
+				pagemap_entry_t pme;
+
+				pme = pte_to_pagemap_entry(pm, vma, start, ptep_get(ptep));
+				err = add_to_pagemap(&pme, pm);
+				ptw.next_addr = start + PAGE_SIZE;
+				if (err)
+					break;
+			}
+		} else {
+			for (; start != end; start += PAGE_SIZE) {
+				u64 cur_flags = flags;
+				pagemap_entry_t pme;
+
+				if (folio && (flags & PM_PRESENT) &&
+				    __folio_page_mapped_exclusively(folio, page))
+					cur_flags |= PM_MMAP_EXCLUSIVE;
+
+				pme = make_pme(frame, cur_flags);
+				err = add_to_pagemap(&pme, pm);
+				if (err)
+					break;
+				if (pm->show_pfn) {
+					if (flags & PM_PRESENT)
+						frame++;
+					else if (flags & PM_SWAP)
+						frame += (1 << MAX_SWAPFILES_SHIFT);
+				}
+			}
+		}
+		type = pt_range_walk_next(&ptw, vma, vma->vm_start, vma->vm_end, wflags);
+	}
+	pt_range_walk_done(&ptw);
+
+	return err;
+}
+
+static ssize_t pagemap_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *ppos)
+{
+	struct mm_struct *mm = file->private_data;
+	struct pagemapread pm;
+	unsigned long src;
+	unsigned long svpfn;
+	unsigned long start_vaddr;
+	unsigned long end_vaddr;
+	int ret = 0, copied = 0;
+
+	if (!mm || !mmget_not_zero(mm))
+		goto out;
+
+	ret = -EINVAL;
+	/* file position must be aligned */
+	if ((*ppos % PM_ENTRY_BYTES) || (count % PM_ENTRY_BYTES))
+		goto out_mm;
+
+	ret = 0;
+	if (!count)
+		goto out_mm;
+
+	/* do not disclose physical addresses: attack vector */
+	pm.show_pfn = file_ns_capable(file, &init_user_ns, CAP_SYS_ADMIN);
+
+	pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
+	pm.buffer = kmalloc_array(pm.len, PM_ENTRY_BYTES, GFP_KERNEL);
+	ret = -ENOMEM;
+	if (!pm.buffer)
+		goto out_mm;
+
+	src = *ppos;
+	svpfn = src / PM_ENTRY_BYTES;
+	end_vaddr = mm->task_size;
+
+	/* watch out for wraparound */
+	start_vaddr = end_vaddr;
+	if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) {
+		unsigned long end;
+
+		ret = mmap_read_lock_killable(mm);
+		if (ret)
+			goto out_free;
+		start_vaddr = untagged_addr_remote(mm, svpfn << PAGE_SHIFT);
+		mmap_read_unlock(mm);
+
+		end = start_vaddr + ((count / PM_ENTRY_BYTES) << PAGE_SHIFT);
+		if (end >= start_vaddr && end < mm->task_size)
+			end_vaddr = end;
+	}
+
+	/* Ensure the address is inside the task */
+	if (start_vaddr > mm->task_size)
+		start_vaddr = end_vaddr;
+
+	ret = 0;
+
+	while (count && (start_vaddr < end_vaddr)) {
+		int len;
+		unsigned long end;
+		unsigned long next;
+
+		pm.pos = 0;
+		end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
+		if (end < start_vaddr || end > end_vaddr)
+			end = end_vaddr;
+		ret = mmap_read_lock_killable(mm);
+		if (ret)
+			goto out_free;
+
+		do {
+			struct vm_area_struct *vma = find_vma(mm, start_vaddr);
+
+			if (vma) {
+				ret = pagemap_read_walk_range(vma, start_vaddr, &pm);
+				if (ret)
+					goto out_err;
+				start_vaddr = min(end, vma->vm_end);
+				next = start_vaddr;
+			} else {
+				next = end;
+			}
+		} while (next < end);
+out_err:
+		mmap_read_unlock(mm);
+
+		len = min(count, PM_ENTRY_BYTES * pm.pos);
+		if (copy_to_user(buf, pm.buffer, len)) {
+			ret = -EFAULT;
+			goto out_free;
+		}
+		copied += len;
+		buf += len;
+		count -= len;
+	}
+	*ppos += copied;
+	if (!ret || ret == PM_END_OF_BUFFER)
+		ret = copied;
+
+out_free:
+	kfree(pm.buffer);
+out_mm:
+	mmput(mm);
+out:
+	return ret;
+}
+
 static long do_pagemap_cmd(struct file *file, unsigned int cmd,
 			   unsigned long arg)
 {
@@ -2972,6 +2881,7 @@ const struct file_operations proc_pagemap_operations = {
 	.unlocked_ioctl = do_pagemap_cmd,
 	.compat_ioctl	= do_pagemap_cmd,
 };
+
 #endif /* CONFIG_PROC_PAGE_MONITOR */
 
 #ifdef CONFIG_NUMA
diff --git a/include/linux/leafops.h b/include/linux/leafops.h
index 122ac50aeb09..6444625c6fbb 100644
--- a/include/linux/leafops.h
+++ b/include/linux/leafops.h
@@ -618,6 +618,19 @@ static inline bool pmd_is_device_private_entry(pmd_t pmd)
 
 #endif /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */
 
+#ifdef CONFIG_HUGETLB_PAGE
+/**
+ * pud_is_migration_entry() - Does this PUD entry encode a migration entry?
+ * @pud: PUD entry.
+ *
+ * Returns: true if the PUD encodes a migration entry, otherwise false.
+ */
+static inline bool pud_is_migration_entry(pud_t pud)
+{
+	return softleaf_is_migration(softleaf_from_pud(pud));
+}
+#endif
+
 /**
  * pmd_is_migration_entry() - Does this PMD entry encode a migration entry?
  * @pmd: PMD entry.
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 6f01d5ed73f6..6f8e83a5bb08 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1229,11 +1229,21 @@ static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
 }
 #endif
 
+#ifndef __HAVE_ARCH_PUDP_INVALIDATE
+extern pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+			    pud_t *pudp);
+#endif
+
 #ifndef __HAVE_ARCH_PMDP_INVALIDATE
 extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 			    pmd_t *pmdp);
 #endif
 
+#ifndef __HAVE_ARCH_PUDP_INVALIDATE_AD
+extern pud_t pudp_invalidate_ad(struct vm_area_struct *vma,
+				unsigned long address, pud_t *pudp);
+#endif
+
 #ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD
 
 /*
@@ -1776,6 +1786,21 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
+static inline pud_t pud_swp_mksoft_dirty(pud_t pud)
+{
+	return pud;
+}
+
+static inline int pud_swp_soft_dirty(pud_t pud)
+{
+	return 0;
+}
+
+static inline pud_t pud_swp_clear_soft_dirty(pud_t pud)
+{
+	return pud;
+}
+
 static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
 {
 	return pmd;
@@ -1818,6 +1843,11 @@ static inline int pmd_soft_dirty(pmd_t pmd)
 	return 0;
 }
 
+static inline int pud_soft_dirty(pud_t pud)
+{
+	return 0;
+}
+
 static inline pte_t pte_mksoft_dirty(pte_t pte)
 {
 	return pte;
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index af7966169d69..f390c93b98b2 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -206,6 +206,16 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
 }
 #endif
 
+#ifndef __HAVE_ARCH_PUDP_INVALIDATE_AD
+pud_t pudp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
+			 pud_t *pudp)
+
+{
+	VM_WARN_ON_ONCE(!pud_present(*pudp));
+	return pudp_invalidate(vma, address, pudp);
+}
+#endif
+
 #ifndef __HAVE_ARCH_PMDP_INVALIDATE_AD
 pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address,
 			 pmd_t *pmdp)
-- 
2.35.3



      parent reply	other threads:[~2026-04-12 17:43 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-12 17:42 [RFC PATCH 0/7] Implement a " Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 1/7] mm: Add softleaf_from_pud Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 2/7] mm: Add {pmd,pud}_huge_lock helper Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 3/7] mm: Implement folio_pmd_batch Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 4/7] mm: Implement pt_range_walk Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 5/7] mm: Make /proc/pid/smaps use the new generic pagewalk API Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 6/7] mm: Make /proc/pid/numa_maps " Oscar Salvador
2026-04-12 17:42 ` Oscar Salvador [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260412174244.133715-8-osalvador@suse.de \
    --to=osalvador@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=david@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox