Re: [PATCH 1/2] mm/mempolicy: track page allocations per mempolicy

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Vlastimil Babka <vbabka@suse.cz>
To: JP Kobryn <inwardvessel@gmail.com>, linux-mm@kvack.org
Cc: apopple@nvidia.com, akpm@linux-foundation.org,
	axelrasmussen@google.com, byungchul@sk.com,
	cgroups@vger.kernel.org, david@kernel.org, eperezma@redhat.com,
	gourry@gourry.net, jasowang@redhat.com, hannes@cmpxchg.org,
	joshua.hahnjy@gmail.com, Liam.Howlett@oracle.com,
	linux-kernel@vger.kernel.org, lorenzo.stoakes@oracle.com,
	matthew.brost@intel.com, mst@redhat.com, mhocko@suse.com,
	rppt@kernel.org, muchun.song@linux.dev,
	zhengqi.arch@bytedance.com, rakie.kim@sk.com,
	roman.gushchin@linux.dev, shakeel.butt@linux.dev,
	surenb@google.com, virtualization@lists.linux.dev,
	weixugc@google.com, xuanzhuo@linux.alibaba.com,
	ying.huang@linux.alibaba.com, yuanchu@google.com, ziy@nvidia.com,
	kernel-team@meta.com
Subject: Re: [PATCH 1/2] mm/mempolicy: track page allocations per mempolicy
Date: Thu, 12 Feb 2026 16:24:08 +0100	[thread overview]
Message-ID: <96b63efb-551f-4dd5-b4a2-ac67da577431@suse.cz> (raw)
In-Reply-To: <20260212045109.255391-2-inwardvessel@gmail.com>

On 2/12/26 05:51, JP Kobryn wrote:
> It would be useful to see a breakdown of allocations to understand which
> NUMA policies are driving them. For example, when investigating memory
> pressure, having policy-specific counts could show that allocations were
> bound to the affected node (via MPOL_BIND).
> 
> Add per-policy page allocation counters as new node stat items. These
> counters can provide correlation between a mempolicy and pressure on a
> given node.
> 
> Signed-off-by: JP Kobryn <inwardvessel@gmail.com>
> Suggested-by: Johannes Weiner <hannes@cmpxchg.org>

Are the numa_{hit,miss,etc.} counters insufficient? Could they be extended
in a way that would capture any missing important details? A counter per
policy type seems exhaustive, but then on one hand it might be not important
to distinguish beetween some of them, and on the other hand it doesn't track
the nodemask anyway.

> ---
>  include/linux/mmzone.h |  9 +++++++++
>  mm/mempolicy.c         | 30 ++++++++++++++++++++++++++++--
>  mm/vmstat.c            |  9 +++++++++
>  3 files changed, 46 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index fc5d6c88d2f0..762609d5f0af 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -255,6 +255,15 @@ enum node_stat_item {
>  	PGDEMOTE_DIRECT,
>  	PGDEMOTE_KHUGEPAGED,
>  	PGDEMOTE_PROACTIVE,
> +#ifdef CONFIG_NUMA
> +	PGALLOC_MPOL_DEFAULT,
> +	PGALLOC_MPOL_PREFERRED,
> +	PGALLOC_MPOL_BIND,
> +	PGALLOC_MPOL_INTERLEAVE,
> +	PGALLOC_MPOL_LOCAL,
> +	PGALLOC_MPOL_PREFERRED_MANY,
> +	PGALLOC_MPOL_WEIGHTED_INTERLEAVE,
> +#endif
>  #ifdef CONFIG_HUGETLB_PAGE
>  	NR_HUGETLB,
>  #endif
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 68a98ba57882..3c64784af761 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -217,6 +217,21 @@ static void reduce_interleave_weights(unsigned int *bw, u8 *new_iw)
>  		new_iw[nid] /= iw_gcd;
>  }
>  
> +#define CHECK_MPOL_NODE_STAT_OFFSET(mpol) \
> +	BUILD_BUG_ON(PGALLOC_##mpol - mpol != PGALLOC_MPOL_DEFAULT)
> +
> +static enum node_stat_item mpol_node_stat(unsigned short mode)
> +{
> +	CHECK_MPOL_NODE_STAT_OFFSET(MPOL_PREFERRED);
> +	CHECK_MPOL_NODE_STAT_OFFSET(MPOL_BIND);
> +	CHECK_MPOL_NODE_STAT_OFFSET(MPOL_INTERLEAVE);
> +	CHECK_MPOL_NODE_STAT_OFFSET(MPOL_LOCAL);
> +	CHECK_MPOL_NODE_STAT_OFFSET(MPOL_PREFERRED_MANY);
> +	CHECK_MPOL_NODE_STAT_OFFSET(MPOL_WEIGHTED_INTERLEAVE);
> +
> +	return PGALLOC_MPOL_DEFAULT + mode;
> +}
> +
>  int mempolicy_set_node_perf(unsigned int node, struct access_coordinate *coords)
>  {
>  	struct weighted_interleave_state *new_wi_state, *old_wi_state = NULL;
> @@ -2446,8 +2461,14 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
>  
>  	nodemask = policy_nodemask(gfp, pol, ilx, &nid);
>  
> -	if (pol->mode == MPOL_PREFERRED_MANY)
> -		return alloc_pages_preferred_many(gfp, order, nid, nodemask);
> +	if (pol->mode == MPOL_PREFERRED_MANY) {
> +		page = alloc_pages_preferred_many(gfp, order, nid, nodemask);
> +		if (page)
> +			__mod_node_page_state(page_pgdat(page),
> +					mpol_node_stat(MPOL_PREFERRED_MANY), 1 << order);
> +
> +		return page;
> +	}
>  
>  	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
>  	    /* filter "hugepage" allocation, unless from alloc_pages() */
> @@ -2472,6 +2493,9 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
>  			page = __alloc_frozen_pages_noprof(
>  				gfp | __GFP_THISNODE | __GFP_NORETRY, order,
>  				nid, NULL);
> +			if (page)
> +				__mod_node_page_state(page_pgdat(page),
> +						mpol_node_stat(pol->mode), 1 << order);
>  			if (page || !(gfp & __GFP_DIRECT_RECLAIM))
>  				return page;
>  			/*
> @@ -2484,6 +2508,8 @@ static struct page *alloc_pages_mpol(gfp_t gfp, unsigned int order,
>  	}
>  
>  	page = __alloc_frozen_pages_noprof(gfp, order, nid, nodemask);
> +	if (page)
> +		__mod_node_page_state(page_pgdat(page), mpol_node_stat(pol->mode), 1 << order);
>  
>  	if (unlikely(pol->mode == MPOL_INTERLEAVE ||
>  		     pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) {
> diff --git a/mm/vmstat.c b/mm/vmstat.c
> index 65de88cdf40e..74e0ddde1e93 100644
> --- a/mm/vmstat.c
> +++ b/mm/vmstat.c
> @@ -1291,6 +1291,15 @@ const char * const vmstat_text[] = {
>  	[I(PGDEMOTE_DIRECT)]			= "pgdemote_direct",
>  	[I(PGDEMOTE_KHUGEPAGED)]		= "pgdemote_khugepaged",
>  	[I(PGDEMOTE_PROACTIVE)]			= "pgdemote_proactive",
> +#ifdef CONFIG_NUMA
> +	[I(PGALLOC_MPOL_DEFAULT)]		= "pgalloc_mpol_default",
> +	[I(PGALLOC_MPOL_PREFERRED)]		= "pgalloc_mpol_preferred",
> +	[I(PGALLOC_MPOL_BIND)]			= "pgalloc_mpol_bind",
> +	[I(PGALLOC_MPOL_INTERLEAVE)]		= "pgalloc_mpol_interleave",
> +	[I(PGALLOC_MPOL_LOCAL)]			= "pgalloc_mpol_local",
> +	[I(PGALLOC_MPOL_PREFERRED_MANY)]	= "pgalloc_mpol_preferred_many",
> +	[I(PGALLOC_MPOL_WEIGHTED_INTERLEAVE)]	= "pgalloc_mpol_weighted_interleave",
> +#endif
>  #ifdef CONFIG_HUGETLB_PAGE
>  	[I(NR_HUGETLB)]				= "nr_hugetlb",
>  #endif

next prev parent reply	other threads:[~2026-02-12 15:24 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-12  4:51 [PATCH 0/2] improve per-node allocation and reclaim visibility JP Kobryn
2026-02-12  4:51 ` [PATCH 1/2] mm/mempolicy: track page allocations per mempolicy JP Kobryn
2026-02-12  7:29   ` Michal Hocko
2026-02-12 21:22     ` JP Kobryn
2026-02-16  8:26       ` Michal Hocko
2026-02-16 17:50         ` JP Kobryn (Meta)
2026-02-16 21:07           ` Michal Hocko
2026-02-17  7:48             ` JP Kobryn (Meta)
2026-02-17 12:37               ` Michal Hocko
2026-02-17 18:19                 ` JP Kobryn (Meta)
2026-02-17 18:52                   ` Michal Hocko
2026-02-12 15:07   ` Shakeel Butt
2026-02-12 21:23     ` JP Kobryn
2026-02-12 15:24   ` Vlastimil Babka [this message]
2026-02-12 21:25     ` JP Kobryn
2026-02-13  8:54       ` Vlastimil Babka
2026-02-13 19:56         ` JP Kobryn (Meta)
2026-02-18  4:25   ` kernel test robot
2026-02-12  4:51 ` [PATCH 2/2] mm: move pgscan and pgsteal to node stats JP Kobryn
2026-02-12  7:08   ` Michael S. Tsirkin
2026-02-12 21:23     ` JP Kobryn
2026-02-12  7:29   ` Michal Hocko
2026-02-12 21:20     ` JP Kobryn
2026-02-12  4:57 ` [PATCH 0/2] improve per-node allocation and reclaim visibility Matthew Wilcox
2026-02-12 21:22   ` JP Kobryn
2026-02-12 21:53     ` Matthew Wilcox
2026-02-12 18:08 ` [syzbot ci] " syzbot ci

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=96b63efb-551f-4dd5-b4a2-ac67da577431@suse.cz \
    --to=vbabka@suse.cz \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=axelrasmussen@google.com \
    --cc=byungchul@sk.com \
    --cc=cgroups@vger.kernel.org \
    --cc=david@kernel.org \
    --cc=eperezma@redhat.com \
    --cc=gourry@gourry.net \
    --cc=hannes@cmpxchg.org \
    --cc=inwardvessel@gmail.com \
    --cc=jasowang@redhat.com \
    --cc=joshua.hahnjy@gmail.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=matthew.brost@intel.com \
    --cc=mhocko@suse.com \
    --cc=mst@redhat.com \
    --cc=muchun.song@linux.dev \
    --cc=rakie.kim@sk.com \
    --cc=roman.gushchin@linux.dev \
    --cc=rppt@kernel.org \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    --cc=virtualization@lists.linux.dev \
    --cc=weixugc@google.com \
    --cc=xuanzhuo@linux.alibaba.com \
    --cc=ying.huang@linux.alibaba.com \
    --cc=yuanchu@google.com \
    --cc=zhengqi.arch@bytedance.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox