Re: [RFC PATCH 5/7] mm: Make /proc/pid/smaps use the new generic pagewalk API

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Usama Arif <usama.arif@linux.dev>
To: Oscar Salvador <osalvador@suse.de>
Cc: Usama Arif <usama.arif@linux.dev>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>,
	Michal Hocko <mhocko@suse.com>,
	Vlastimil Babka <vbabka@kernel.org>,
	Muchun Song <muchun.song@linux.dev>,
	Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [RFC PATCH 5/7] mm: Make /proc/pid/smaps use the new generic pagewalk API
Date: Mon, 13 Apr 2026 07:18:00 -0700	[thread overview]
Message-ID: <20260413141801.1465873-1-usama.arif@linux.dev> (raw)
In-Reply-To: <20260412174244.133715-6-osalvador@suse.de>

On Sun, 12 Apr 2026 19:42:42 +0200 Oscar Salvador <osalvador@suse.de> wrote:

> Have /proc/pid/smaps make use of the new generic API, and remove
> the code which was using the old one.
> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> ---
>  fs/proc/task_mmu.c | 309 ++++++++++++---------------------------------
>  1 file changed, 84 insertions(+), 225 deletions(-)
> 
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index e091931d7ca1..afbcdb11ad80 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -915,7 +915,7 @@ static void smaps_page_accumulate(struct mem_size_stats *mss,
>  
>  static void smaps_account(struct mem_size_stats *mss, struct page *page,
>  		bool compound, bool young, bool dirty, bool locked,
> -		bool present)
> +		bool present, int ssize)
>  {
>  	struct folio *folio = page_folio(page);
>  	int i, nr = compound ? compound_nr(page) : 1;
> @@ -923,6 +923,11 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
>  	bool exclusive;
>  	int mapcount;
>  
> +	if (ssize) {
> +		nr = ssize / PAGE_SIZE;
> +		size = ssize;
> +	}
> +
>  	/*
>  	 * First accumulate quantities that depend only on |size| and the type
>  	 * of the compound page.
> @@ -988,150 +993,6 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
>  	}
>  }
>  
> -#ifdef CONFIG_SHMEM
> -static int smaps_pte_hole(unsigned long addr, unsigned long end,
> -			  __always_unused int depth, struct mm_walk *walk)
> -{
> -	struct mem_size_stats *mss = walk->private;
> -	struct vm_area_struct *vma = walk->vma;
> -
> -	mss->swap += shmem_partial_swap_usage(walk->vma->vm_file->f_mapping,
> -					      linear_page_index(vma, addr),
> -					      linear_page_index(vma, end));
> -
> -	return 0;
> -}
> -#else
> -#define smaps_pte_hole		NULL
> -#endif /* CONFIG_SHMEM */
> -
> -static void smaps_pte_hole_lookup(unsigned long addr, struct mm_walk *walk)
> -{
> -#ifdef CONFIG_SHMEM
> -	if (walk->ops->pte_hole) {
> -		/* depth is not used */
> -		smaps_pte_hole(addr, addr + PAGE_SIZE, 0, walk);
> -	}
> -#endif
> -}
> -
> -static void smaps_pte_entry(pte_t *pte, unsigned long addr,
> -		struct mm_walk *walk)
> -{
> -	struct mem_size_stats *mss = walk->private;
> -	struct vm_area_struct *vma = walk->vma;
> -	bool locked = !!(vma->vm_flags & VM_LOCKED);
> -	struct page *page = NULL;
> -	bool present = false, young = false, dirty = false;
> -	pte_t ptent = ptep_get(pte);
> -
> -	if (pte_present(ptent)) {
> -		page = vm_normal_page(vma, addr, ptent);
> -		young = pte_young(ptent);
> -		dirty = pte_dirty(ptent);
> -		present = true;
> -	} else if (pte_none(ptent)) {
> -		smaps_pte_hole_lookup(addr, walk);
> -	} else {
> -		const softleaf_t entry = softleaf_from_pte(ptent);
> -
> -		if (softleaf_is_swap(entry)) {
> -			int mapcount;
> -
> -			mss->swap += PAGE_SIZE;
> -			mapcount = swp_swapcount(entry);
> -			if (mapcount >= 2) {
> -				u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT;
> -
> -				do_div(pss_delta, mapcount);
> -				mss->swap_pss += pss_delta;
> -			} else {
> -				mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT;
> -			}
> -		} else if (softleaf_has_pfn(entry)) {
> -			if (softleaf_is_device_private(entry))
> -				present = true;
> -			page = softleaf_to_page(entry);
> -		}
> -	}
> -
> -	if (!page)
> -		return;
> -
> -	smaps_account(mss, page, false, young, dirty, locked, present);
> -}
> -
> -#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> -static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
> -		struct mm_walk *walk)
> -{
> -	struct mem_size_stats *mss = walk->private;
> -	struct vm_area_struct *vma = walk->vma;
> -	bool locked = !!(vma->vm_flags & VM_LOCKED);
> -	struct page *page = NULL;
> -	bool present = false;
> -	struct folio *folio;
> -
> -	if (pmd_none(*pmd))
> -		return;
> -	if (pmd_present(*pmd)) {
> -		page = vm_normal_page_pmd(vma, addr, *pmd);
> -		present = true;
> -	} else if (unlikely(thp_migration_supported())) {
> -		const softleaf_t entry = softleaf_from_pmd(*pmd);
> -
> -		if (softleaf_has_pfn(entry))
> -			page = softleaf_to_page(entry);
> -	}
> -	if (IS_ERR_OR_NULL(page))
> -		return;
> -	folio = page_folio(page);
> -	if (folio_test_anon(folio))
> -		mss->anonymous_thp += HPAGE_PMD_SIZE;
> -	else if (folio_test_swapbacked(folio))
> -		mss->shmem_thp += HPAGE_PMD_SIZE;
> -	else if (folio_is_zone_device(folio))
> -		/* pass */;
> -	else
> -		mss->file_thp += HPAGE_PMD_SIZE;
> -
> -	smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd),
> -		      locked, present);
> -}
> -#else
> -static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
> -		struct mm_walk *walk)
> -{
> -}
> -#endif
> -
> -static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
> -			   struct mm_walk *walk)
> -{
> -	struct vm_area_struct *vma = walk->vma;
> -	pte_t *pte;
> -	spinlock_t *ptl;
> -
> -	ptl = pmd_trans_huge_lock(pmd, vma);
> -	if (ptl) {
> -		smaps_pmd_entry(pmd, addr, walk);
> -		spin_unlock(ptl);
> -		goto out;
> -	}
> -
> -	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
> -	if (!pte) {
> -		walk->action = ACTION_AGAIN;
> -		return 0;
> -	}
> -	for (; addr != end; pte++, addr += PAGE_SIZE)
> -		smaps_pte_entry(pte, addr, walk);
> -	pte_unmap_unlock(pte - 1, ptl);
> -out:
> -	cond_resched();
> -	return 0;
> -}
> -
>  static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
>  {
>  	/*
> @@ -1228,58 +1089,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
>  	seq_putc(m, '\n');
>  }
>  
> -#ifdef CONFIG_HUGETLB_PAGE
> -static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
> -				 unsigned long addr, unsigned long end,
> -				 struct mm_walk *walk)
> -{
> -	struct mem_size_stats *mss = walk->private;
> -	struct vm_area_struct *vma = walk->vma;
> -	struct folio *folio = NULL;
> -	bool present = false;
> -	spinlock_t *ptl;
> -	pte_t ptent;
> -
> -	ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
> -	ptent = huge_ptep_get(walk->mm, addr, pte);
> -	if (pte_present(ptent)) {
> -		folio = page_folio(pte_page(ptent));
> -		present = true;
> -	} else {
> -		const softleaf_t entry = softleaf_from_pte(ptent);
> -
> -		if (softleaf_has_pfn(entry))
> -			folio = softleaf_to_folio(entry);
> -	}
> -
> -	if (folio) {
> -		/* We treat non-present entries as "maybe shared". */
> -		if (!present || folio_maybe_mapped_shared(folio) ||
> -		    hugetlb_pmd_shared(pte))
> -			mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
> -		else
> -			mss->private_hugetlb += huge_page_size(hstate_vma(vma));
> -	}
> -	spin_unlock(ptl);
> -	return 0;
> -}
> -#else
> -#define smaps_hugetlb_range	NULL
> -#endif /* HUGETLB_PAGE */
> -
> -static const struct mm_walk_ops smaps_walk_ops = {
> -	.pmd_entry		= smaps_pte_range,
> -	.hugetlb_entry		= smaps_hugetlb_range,
> -	.walk_lock		= PGWALK_RDLOCK,
> -};
> -
> -static const struct mm_walk_ops smaps_shmem_walk_ops = {
> -	.pmd_entry		= smaps_pte_range,
> -	.hugetlb_entry		= smaps_hugetlb_range,
> -	.pte_hole		= smaps_pte_hole,
> -	.walk_lock		= PGWALK_RDLOCK,
> -};
> -
>  /*
>   * Gather mem stats from @vma with the indicated beginning
>   * address @start, and keep them in @mss.
> @@ -1287,40 +1096,90 @@ static const struct mm_walk_ops smaps_shmem_walk_ops = {
>   * Use vm_start of @vma as the beginning address if @start is 0.
>   */
>  static void smap_gather_stats(struct vm_area_struct *vma,
> -		struct mem_size_stats *mss, unsigned long start)
> +				  struct mem_size_stats *mss,
> +				  unsigned long start)
>  {
> -	const struct mm_walk_ops *ops = &smaps_walk_ops;
> -
> -	/* Invalid start */
> -	if (start >= vma->vm_end)
> -		return;
> +	struct pt_range_walk ptw = {
> +		.mm = vma->vm_mm
> +	};
> +	enum pt_range_walk_type type;
> +	pt_type_flags_t flags = PT_TYPE_ALL;
>  
> -	if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) {
> -		/*
> -		 * For shared or readonly shmem mappings we know that all
> -		 * swapped out pages belong to the shmem object, and we can
> -		 * obtain the swap value much more efficiently. For private
> -		 * writable mappings, we might have COW pages that are
> -		 * not affected by the parent swapped out pages of the shmem
> -		 * object, so we have to distinguish them during the page walk.
> -		 * Unless we know that the shmem object (or the part mapped by
> -		 * our VMA) has no swapped out pages at all.
> -		 */
> -		unsigned long shmem_swapped = shmem_swap_usage(vma);
> +	if (!start)
> +		start = vma->vm_start;
> +
> +	flags &= ~(PT_TYPE_NONE|PT_TYPE_PFN);
> +
> +	type = pt_range_walk_start(&ptw, vma, start, vma->vm_end, flags);
> +	while (type != PTW_DONE) {
> +		bool locked = !!(vma->vm_flags & VM_LOCKED);
> +		bool compound = false, account = false;
> +		unsigned long swap_size;
> +		int mapcount;
> +
> +		switch (type) {
> +		case PTW_FOLIO:
> +		case PTW_MIGRATION:
> +		case PTW_HWPOISON:
> +		case PTW_DEVICE:
> +			/*
> +			 * We either have a folio because vm_normal_folio was
> +			 * successful, or because we had a special swap entry
> +			 * and could retrieve it with softleaf_to_page.
> +			 */
> +			if (is_vm_hugetlb_page(vma)) {
> +				/* HugeTLB */
> +				unsigned long size = huge_page_size(hstate_vma(ptw.vma));
> +
> +				if (!ptw.present || folio_maybe_mapped_shared(ptw.folio) ||
> +				    ptw.pmd_shared)
> +					mss->shared_hugetlb += size;
> +				else
> +					mss->private_hugetlb += size;
> +			} else {
> +				account = true;
> +				if (ptw.level == PTW_PMD_LEVEL) {
> +					/* THP */
> +					compound = true;
> +					if (folio_test_anon(ptw.folio))
> +						mss->anonymous_thp += ptw.size;
> +					else if (folio_test_swapbacked(ptw.folio))
> +						mss->shmem_thp += ptw.size;
> +					else if (folio_is_zone_device(ptw.folio))
> +						/* pass */;
> +					else
> +						mss->file_thp += ptw.size;
> +				} else if (ptw.level == PTW_PTE_LEVEL && ptw.nr_entries > 1) {
> +					compound = true;
> +				}
> +			}
> +			break;
> +		case PTW_SWAP:
> +			account = true;
> +			swap_size = PAGE_SIZE * ptw.nr_entries;
> +			mss->swap += swap_size;
> +			mapcount = swp_swapcount(ptw.softleaf_entry);
> +			if (mapcount >= 2) {
> +				u64 pss_delta = (u64)swap_size << PSS_SHIFT;
>  
> -		if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
> -					!(vma->vm_flags & VM_WRITE))) {
> -			mss->swap += shmem_swapped;
> -		} else {
> -			ops = &smaps_shmem_walk_ops;


The old smap_gather_stats had special handling for shmem swap
accounting.  For shared or readonly shmem mappings it used
shmem_swap_usage() to efficiently account swapped-out shmem pages.
For private writable shmem mappings it used smaps_pte_hole() via
smaps_shmem_walk_ops to call shmem_partial_swap_usage() for each
PTE hole.

The new code removes all of this.  The pt_range_walk API does not
have pte_hole callbacks, so shmem pages that are swapped out (and
thus have no PTE) would not be counted in the Swap field of smaps?


> +				do_div(pss_delta, mapcount);
> +				mss->swap_pss += pss_delta;
> +			} else {
> +				mss->swap_pss += (u64)swap_size << PSS_SHIFT;
> +			}
> +			break;
> +		default:
> +			/* Ooops */
> +			break;
>  		}
> +
> +		if (account && ptw.folio)
> +			smaps_account(mss, ptw.page, compound, ptw.young,
> +				      ptw.dirty, locked, ptw.present, ptw.size);
> +		type = pt_range_walk_next(&ptw, vma, start, vma->vm_end, flags);
>  	}
>  
> -	/* mmap_lock is held in m_start */
> -	if (!start)
> -		walk_page_vma(vma, ops, mss);
> -	else
> -		walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss);
> +	pt_range_walk_done(&ptw);
>  }
>  
>  #define SEQ_PUT_DEC(str, val) \
> -- 
> 2.35.3
> 
>

next prev parent reply	other threads:[~2026-04-13 14:18 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-12 17:42 [RFC PATCH 0/7] Implement a " Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 1/7] mm: Add softleaf_from_pud Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 2/7] mm: Add {pmd,pud}_huge_lock helper Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 3/7] mm: Implement folio_pmd_batch Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 4/7] mm: Implement pt_range_walk Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 5/7] mm: Make /proc/pid/smaps use the new generic pagewalk API Oscar Salvador
2026-04-13 14:18   ` Usama Arif [this message]
2026-04-13 14:31     ` Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 6/7] mm: Make /proc/pid/numa_maps " Oscar Salvador
2026-04-12 17:42 ` [RFC PATCH 7/7] mm: Make /proc/pid/pagemap " Oscar Salvador
2026-04-13  7:38 ` [syzbot ci] Re: Implement a " syzbot ci

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260413141801.1465873-1-usama.arif@linux.dev \
    --to=usama.arif@linux.dev \
    --cc=akpm@linux-foundation.org \
    --cc=david@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=osalvador@suse.de \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox