linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Joshua Hahn <joshua.hahnjy@gmail.com>
To: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>,
	Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Vlastimil Babka <vbabka@kernel.org>,
	Mike Rapoport <rppt@kernel.org>,
	Suren Baghdasaryan <surenb@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@suse.com>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	Muchun Song <muchun.song@linux.dev>,
	Waiman Long <longman@redhat.com>,
	Chen Ridong <chenridong@huaweicloud.com>,
	Tejun Heo <tj@kernel.org>, Michal Koutny <mkoutny@suse.com>,
	linux-mm@kvack.org, cgroups@vger.kernel.org,
	linux-kernel@vger.kernel.org, kernel-team@meta.com
Subject: [RFC PATCH 3/6] mm/memory-tiers, memcontrol: Introduce toptier capacity updates
Date: Mon, 23 Feb 2026 14:38:26 -0800	[thread overview]
Message-ID: <20260223223830.586018-4-joshua.hahnjy@gmail.com> (raw)
In-Reply-To: <20260223223830.586018-1-joshua.hahnjy@gmail.com>

What a memcg considers to be a valid toptier node is defined by three
criteria: (1) The node has CPUs, (2) The node has online memory,
and (3) The node is within the cgroup's cpuset.mems.

Of the three, the second and third criteria are the only ones that can
change dynamically during runtime, via memory hotplug events and
cpuset.mems changes, respectively.

Introduce functions to calculate and update toptier capacity, and call
them during cpuset.mems changes and memory hotplug events.

Signed-off-by: Joshua Hahn <joshua.hahnjy@gmail.com>
---
 include/linux/memcontrol.h   |  6 ++++++
 include/linux/memory-tiers.h | 29 +++++++++++++++++++++++++
 include/linux/page_counter.h |  2 ++
 kernel/cgroup/cpuset.c       |  2 +-
 mm/memcontrol.c              | 17 +++++++++++++++
 mm/memory-tiers.c            | 41 ++++++++++++++++++++++++++++++++++++
 mm/page_counter.c            |  8 +++++++
 7 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5173a9f16721..900a36112b62 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -608,6 +608,8 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root,
 void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 				     struct mem_cgroup *memcg);
 
+void update_memcg_toptier_capacity(void);
+
 static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
 					  struct mem_cgroup *memcg)
 {
@@ -1116,6 +1118,10 @@ static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 {
 }
 
+static inline void update_memcg_toptier_capacity(void)
+{
+}
+
 static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
 					  struct mem_cgroup *memcg)
 {
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 85440473effb..cf616885e0db 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -53,6 +53,9 @@ int mt_perf_to_adistance(struct access_coordinate *perf, int *adist);
 struct memory_dev_type *mt_find_alloc_memory_type(int adist,
 						  struct list_head *memory_types);
 void mt_put_memory_types(struct list_head *memory_types);
+void mt_get_toptier_nodemask(nodemask_t *mask, const nodemask_t *allowed);
+unsigned long mt_get_toptier_capacity(const nodemask_t *allowed);
+unsigned long mt_get_total_capacity(const nodemask_t *allowed);
 #ifdef CONFIG_MIGRATION
 int next_demotion_node(int node, const nodemask_t *allowed_mask);
 void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets);
@@ -152,5 +155,31 @@ static inline struct memory_dev_type *mt_find_alloc_memory_type(int adist,
 static inline void mt_put_memory_types(struct list_head *memory_types)
 {
 }
+
+static inline void mt_get_toptier_nodemask(nodemask_t *mask,
+					   const nodemask_t *allowed)
+{
+	*mask = node_states[N_MEMORY];
+	if (allowed)
+		nodes_and(*mask, *mask, *allowed);
+}
+
+static inline unsigned long mt_get_toptier_capacity(const nodemask_t *allowed)
+{
+	int nid;
+	unsigned long capacity = 0;
+
+	for_each_node_state(nid, N_MEMORY) {
+		if (allowed && !node_isset(nid, *allowed))
+			continue;
+		capacity += NODE_DATA(nid)->node_present_pages;
+	}
+	return capacity;
+}
+
+static inline unsigned long mt_get_total_capacity(const nodemask_t *allowed)
+{
+	return mt_get_toptier_capacity(allowed);
+}
 #endif	/* CONFIG_NUMA */
 #endif  /* _LINUX_MEMORY_TIERS_H */
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 128c1272c88c..ada5f1dd75d4 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -121,6 +121,8 @@ static inline void page_counter_reset_watermark(struct page_counter *counter)
 void page_counter_calculate_protection(struct page_counter *root,
 				       struct page_counter *counter,
 				       bool recursive_protection);
+void page_counter_update_toptier_capacity(struct page_counter *counter,
+					  const nodemask_t *allowed);
 unsigned long page_counter_toptier_high(struct page_counter *counter);
 unsigned long page_counter_toptier_low(struct page_counter *counter);
 #else
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 7607dfe516e6..e5641dc1af88 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2620,7 +2620,6 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
 	rcu_read_lock();
 	cpuset_for_each_descendant_pre(cp, pos_css, cs) {
 		struct cpuset *parent = parent_cs(cp);
-
 		bool has_mems = nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems);
 
 		/*
@@ -2701,6 +2700,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
 
 	/* use trialcs->mems_allowed as a temp variable */
 	update_nodemasks_hier(cs, &trialcs->mems_allowed);
+	update_memcg_toptier_capacity();
 	return 0;
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0be1e823d813..f3e4a6ce7181 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -54,6 +54,7 @@
 #include <linux/seq_file.h>
 #include <linux/vmpressure.h>
 #include <linux/memremap.h>
+#include <linux/memory-tiers.h>
 #include <linux/mm_inline.h>
 #include <linux/swap_cgroup.h>
 #include <linux/cpu.h>
@@ -3906,6 +3907,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 
 		page_counter_init(&memcg->memory, &parent->memory, memcg_on_dfl);
 		page_counter_init(&memcg->swap, &parent->swap, false);
+		page_counter_update_toptier_capacity(&memcg->memory, NULL);
 #ifdef CONFIG_MEMCG_V1
 		memcg->memory.track_failcnt = !memcg_on_dfl;
 		WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable));
@@ -3917,6 +3919,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		init_memcg_events();
 		page_counter_init(&memcg->memory, NULL, true);
 		page_counter_init(&memcg->swap, NULL, false);
+		page_counter_update_toptier_capacity(&memcg->memory, NULL);
 #ifdef CONFIG_MEMCG_V1
 		page_counter_init(&memcg->kmem, NULL, false);
 		page_counter_init(&memcg->tcpmem, NULL, false);
@@ -4804,6 +4807,20 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 	page_counter_calculate_protection(&root->memory, &memcg->memory, recursive_protection);
 }
 
+void update_memcg_toptier_capacity(void)
+{
+	struct mem_cgroup *memcg;
+	nodemask_t allowed;
+
+	for_each_mem_cgroup(memcg) {
+		if (memcg == root_mem_cgroup)
+			continue;
+
+		cpuset_nodes_allowed(memcg->css.cgroup, &allowed);
+		page_counter_update_toptier_capacity(&memcg->memory, &allowed);
+	}
+}
+
 static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
 			gfp_t gfp)
 {
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index a88256381519..259caaf4be8f 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -889,6 +889,7 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self,
 		mutex_lock(&memory_tier_lock);
 		if (clear_node_memory_tier(nn->nid))
 			establish_demotion_targets();
+		update_memcg_toptier_capacity();
 		mutex_unlock(&memory_tier_lock);
 		break;
 	case NODE_ADDED_FIRST_MEMORY:
@@ -896,6 +897,7 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self,
 		memtier = set_node_memory_tier(nn->nid);
 		if (!IS_ERR(memtier))
 			establish_demotion_targets();
+		update_memcg_toptier_capacity();
 		mutex_unlock(&memory_tier_lock);
 		break;
 	}
@@ -941,6 +943,45 @@ bool numa_demotion_enabled = false;
 
 bool tier_aware_memcg_limits;
 
+void mt_get_toptier_nodemask(nodemask_t *mask, const nodemask_t *allowed)
+{
+	int nid;
+
+	*mask = NODE_MASK_NONE;
+	for_each_node_state(nid, N_MEMORY) {
+		if (node_is_toptier(nid))
+			node_set(nid, *mask);
+	}
+	if (allowed)
+		nodes_and(*mask, *mask, *allowed);
+}
+
+unsigned long mt_get_toptier_capacity(const nodemask_t *allowed)
+{
+	int nid;
+	unsigned long capacity = 0;
+	nodemask_t mask;
+
+	mt_get_toptier_nodemask(&mask, allowed);
+	for_each_node_mask(nid, mask)
+		capacity += NODE_DATA(nid)->node_present_pages;
+
+	return capacity;
+}
+
+unsigned long mt_get_total_capacity(const nodemask_t *allowed)
+{
+	int nid;
+	unsigned long capacity = 0;
+
+	for_each_node_state(nid, N_MEMORY) {
+		if (allowed && !node_isset(nid, *allowed))
+			continue;
+		capacity += NODE_DATA(nid)->node_present_pages;
+	}
+	return capacity;
+}
+
 #ifdef CONFIG_MIGRATION
 #ifdef CONFIG_SYSFS
 static ssize_t demotion_enabled_show(struct kobject *kobj,
diff --git a/mm/page_counter.c b/mm/page_counter.c
index 5ec97811c418..cf21c72bfd4e 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -11,6 +11,7 @@
 #include <linux/string.h>
 #include <linux/sched.h>
 #include <linux/bug.h>
+#include <linux/memory-tiers.h>
 #include <asm/page.h>
 
 static bool track_protection(struct page_counter *c)
@@ -463,6 +464,13 @@ void page_counter_calculate_protection(struct page_counter *root,
 			recursive_protection));
 }
 
+void page_counter_update_toptier_capacity(struct page_counter *counter,
+					  const nodemask_t *allowed)
+{
+	counter->toptier_capacity = mt_get_toptier_capacity(allowed);
+	counter->total_capacity = mt_get_total_capacity(allowed);
+}
+
 unsigned long page_counter_toptier_high(struct page_counter *counter)
 {
 	unsigned long high = READ_ONCE(counter->high);
-- 
2.47.3



  parent reply	other threads:[~2026-02-23 22:38 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-23 22:38 [RFC PATCH 0/6] mm/memcontrol: Make memcg limits tier-aware Joshua Hahn
2026-02-23 22:38 ` [RFC PATCH 1/6] mm/memory-tiers: Introduce tier-aware memcg limit sysfs Joshua Hahn
2026-02-23 22:38 ` [RFC PATCH 2/6] mm/page_counter: Introduce tiered memory awareness to page_counter Joshua Hahn
2026-02-23 22:38 ` Joshua Hahn [this message]
2026-02-23 22:38 ` [RFC PATCH 4/6] mm/memcontrol: Charge and uncharge from toptier Joshua Hahn
2026-02-23 22:38 ` [RFC PATCH 5/6] mm/memcontrol, page_counter: Make memory.low tier-aware Joshua Hahn
2026-02-23 22:38 ` [RFC PATCH 6/6] mm/memcontrol: Make memory.high tier-aware Joshua Hahn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260223223830.586018-4-joshua.hahnjy@gmail.com \
    --to=joshua.hahnjy@gmail.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=chenridong@huaweicloud.com \
    --cc=david@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=mkoutny@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=rppt@kernel.org \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    --cc=tj@kernel.org \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox