linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Chen Ridong <chenridong@huaweicloud.com>
To: akpm@linux-foundation.org, david@kernel.org,
	lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com,
	vbabka@suse.cz, rppt@kernel.org, surenb@google.com,
	mhocko@suse.com, axelrasmussen@google.com, yuanchu@google.com,
	weixugc@google.com, hannes@cmpxchg.org,
	zhengqi.arch@bytedance.com, shakeel.butt@linux.dev
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	lujialin4@huawei.com, chenridong@huawei.com
Subject: [RFC -next] memcg: Optimize creation performance when LRU_GEN is enabled
Date: Wed, 19 Nov 2025 08:37:22 +0000	[thread overview]
Message-ID: <20251119083722.1365680-1-chenridong@huaweicloud.com> (raw)

From: Chen Ridong <chenridong@huawei.com>

With LRU_GEN=y and LRU_GEN_ENABLED=n, a performance regression occurs
when creating a large number of memory cgroups (memcgs):

	# time mkdir testcg_{1..10000}

	real	0m7.167s
	user	0m0.037s
	sys	0m6.773s

	# time mkdir testcg_{1..20000}

	real	0m27.158s
	user	0m0.079s
	sys	0m26.270s

In contrast, with LRU_GEN=n, creation of the same number of memcgs
performs better:

	# time mkdir testcg_{1..10000}

	real	0m3.386s
	user	0m0.044s
	sys	0m3.009s

	# time mkdir testcg_{1..20000}

	real	0m6.876s
	user	0m0.075s
	sys	0m6.121s

The root cause is that lru_gen node onlining uses hlist_nulls_add_tail_rcu,
which traverses the entire list to find the tail. This traversal scales
with the number of memcgs, even when LRU_GEN is runtime-disabled.

Fix this by adding a per-lru_gen tail pointer to track the list's tail.
Appending new nodes now uses the tail pointer directly, eliminating full
list traversal.

After applying this patch, memcg creation performance with LRU_GEN=y
matches the fully disabled baseline:

	#time mkdir testcg_{1..10000}

	real	0m3.368s
	user	0m0.025s
	sys	0m3.012s

	# time mkdir testcg_{1..20000}
	real	0m6.742s
	user	0m0.085s
	sys	0m5.995s

Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
 include/linux/mmzone.h |  4 +++
 mm/vmscan.c            | 78 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4398e027f450..bdee57b35126 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -513,6 +513,8 @@ struct lru_gen_folio {
 	u8 gen;
 	/* the list segment this lru_gen_folio belongs to */
 	u8 seg;
+	/* the bin index this lru_gen_folio is queued on */
+	u8 bin;
 	/* per-node lru_gen_folio list for global reclaim */
 	struct hlist_nulls_node list;
 };
@@ -610,6 +612,8 @@ struct lru_gen_memcg {
 	unsigned long nr_memcgs[MEMCG_NR_GENS];
 	/* per-node lru_gen_folio list for global reclaim */
 	struct hlist_nulls_head	fifo[MEMCG_NR_GENS][MEMCG_NR_BINS];
+	/* cached tails to speed up enqueueing */
+	struct hlist_nulls_node *tails[MEMCG_NR_GENS][MEMCG_NR_BINS];
 	/* protects the above */
 	spinlock_t lock;
 };
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8890f4b58673..6c2665e48f19 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4299,6 +4299,66 @@ enum {
 	MEMCG_LRU_YOUNG,
 };
 
+static void memcg_lru_add_head_locked(struct pglist_data *pgdat,
+				      struct lruvec *lruvec, int gen, int bin)
+{
+	struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+	struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+	struct hlist_nulls_node *node = &lruvec->lrugen.list;
+	bool empty = !memcg_lru->tails[gen][bin];
+
+	hlist_nulls_add_head_rcu(node, head);
+	lruvec->lrugen.bin = bin;
+
+	if (empty)
+		memcg_lru->tails[gen][bin] = node;
+}
+
+static void memcg_lru_add_tail_locked(struct pglist_data *pgdat,
+				      struct lruvec *lruvec, int gen, int bin)
+{
+	struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+	struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+	struct hlist_nulls_node *node = &lruvec->lrugen.list;
+	struct hlist_nulls_node *tail = memcg_lru->tails[gen][bin];
+
+	if (tail) {
+		WRITE_ONCE(node->next, tail->next);
+		WRITE_ONCE(node->pprev, &tail->next);
+		rcu_assign_pointer(hlist_nulls_next_rcu(tail), node);
+	} else {
+		hlist_nulls_add_head_rcu(node, head);
+	}
+
+	memcg_lru->tails[gen][bin] = node;
+	lruvec->lrugen.bin = bin;
+}
+
+static void memcg_lru_del_locked(struct pglist_data *pgdat, struct lruvec *lruvec,
+				 bool reinit)
+{
+	int gen = lruvec->lrugen.gen;
+	int bin = lruvec->lrugen.bin;
+	struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+	struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+	struct hlist_nulls_node *node = &lruvec->lrugen.list;
+	struct hlist_nulls_node *prev = NULL;
+
+	if (hlist_nulls_unhashed(node))
+		return;
+
+	if (memcg_lru->tails[gen][bin] == node) {
+		if (node->pprev != &head->first)
+			prev = container_of(node->pprev, struct hlist_nulls_node, next);
+		memcg_lru->tails[gen][bin] = prev;
+	}
+
+	if (reinit)
+		hlist_nulls_del_init_rcu(node);
+	else
+		hlist_nulls_del_rcu(node);
+}
+
 static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
 {
 	int seg;
@@ -4326,15 +4386,15 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
 	else
 		VM_WARN_ON_ONCE(true);
 
+	memcg_lru_del_locked(pgdat, lruvec, false);
+
 	WRITE_ONCE(lruvec->lrugen.seg, seg);
 	WRITE_ONCE(lruvec->lrugen.gen, new);
 
-	hlist_nulls_del_rcu(&lruvec->lrugen.list);
-
 	if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
-		hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+		memcg_lru_add_head_locked(pgdat, lruvec, new, bin);
 	else
-		hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+		memcg_lru_add_tail_locked(pgdat, lruvec, new, bin);
 
 	pgdat->memcg_lru.nr_memcgs[old]--;
 	pgdat->memcg_lru.nr_memcgs[new]++;
@@ -4365,7 +4425,7 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg)
 
 		lruvec->lrugen.gen = gen;
 
-		hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
+		memcg_lru_add_tail_locked(pgdat, lruvec, gen, bin);
 		pgdat->memcg_lru.nr_memcgs[gen]++;
 
 		spin_unlock_irq(&pgdat->memcg_lru.lock);
@@ -4399,7 +4459,7 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
 
 		gen = lruvec->lrugen.gen;
 
-		hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
+		memcg_lru_del_locked(pgdat, lruvec, true);
 		pgdat->memcg_lru.nr_memcgs[gen]--;
 
 		if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
@@ -5664,8 +5724,10 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat)
 	spin_lock_init(&pgdat->memcg_lru.lock);
 
 	for (i = 0; i < MEMCG_NR_GENS; i++) {
-		for (j = 0; j < MEMCG_NR_BINS; j++)
+		for (j = 0; j < MEMCG_NR_BINS; j++) {
 			INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
+			pgdat->memcg_lru.tails[i][j] = NULL;
+		}
 	}
 }
 
@@ -5687,6 +5749,8 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
 
 	if (mm_state)
 		mm_state->seq = MIN_NR_GENS;
+
+	lrugen->bin = 0;
 }
 
 #ifdef CONFIG_MEMCG
-- 
2.34.1



             reply	other threads:[~2025-11-19  8:52 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-19  8:37 Chen Ridong [this message]
2025-11-24  3:52 ` Chen Ridong
2025-11-26  2:29   ` Chen Ridong
2025-11-26 17:15 ` Johannes Weiner
2025-11-27  9:04   ` Chen Ridong
2025-12-04 12:59     ` Chen Ridong
2025-12-04 13:01       ` Chen Ridong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251119083722.1365680-1-chenridong@huaweicloud.com \
    --to=chenridong@huaweicloud.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=axelrasmussen@google.com \
    --cc=chenridong@huawei.com \
    --cc=david@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=lujialin4@huawei.com \
    --cc=mhocko@suse.com \
    --cc=rppt@kernel.org \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=weixugc@google.com \
    --cc=yuanchu@google.com \
    --cc=zhengqi.arch@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox