linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC -next] memcg: Optimize creation performance when LRU_GEN is enabled
@ 2025-11-19  8:37 Chen Ridong
  2025-11-24  3:52 ` Chen Ridong
  2025-11-26 17:15 ` Johannes Weiner
  0 siblings, 2 replies; 7+ messages in thread
From: Chen Ridong @ 2025-11-19  8:37 UTC (permalink / raw)
  To: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
	mhocko, axelrasmussen, yuanchu, weixugc, hannes, zhengqi.arch,
	shakeel.butt
  Cc: linux-mm, linux-kernel, lujialin4, chenridong

From: Chen Ridong <chenridong@huawei.com>

With LRU_GEN=y and LRU_GEN_ENABLED=n, a performance regression occurs
when creating a large number of memory cgroups (memcgs):

	# time mkdir testcg_{1..10000}

	real	0m7.167s
	user	0m0.037s
	sys	0m6.773s

	# time mkdir testcg_{1..20000}

	real	0m27.158s
	user	0m0.079s
	sys	0m26.270s

In contrast, with LRU_GEN=n, creation of the same number of memcgs
performs better:

	# time mkdir testcg_{1..10000}

	real	0m3.386s
	user	0m0.044s
	sys	0m3.009s

	# time mkdir testcg_{1..20000}

	real	0m6.876s
	user	0m0.075s
	sys	0m6.121s

The root cause is that lru_gen node onlining uses hlist_nulls_add_tail_rcu,
which traverses the entire list to find the tail. This traversal scales
with the number of memcgs, even when LRU_GEN is runtime-disabled.

Fix this by adding a per-lru_gen tail pointer to track the list's tail.
Appending new nodes now uses the tail pointer directly, eliminating full
list traversal.

After applying this patch, memcg creation performance with LRU_GEN=y
matches the fully disabled baseline:

	#time mkdir testcg_{1..10000}

	real	0m3.368s
	user	0m0.025s
	sys	0m3.012s

	# time mkdir testcg_{1..20000}
	real	0m6.742s
	user	0m0.085s
	sys	0m5.995s

Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
 include/linux/mmzone.h |  4 +++
 mm/vmscan.c            | 78 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4398e027f450..bdee57b35126 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -513,6 +513,8 @@ struct lru_gen_folio {
 	u8 gen;
 	/* the list segment this lru_gen_folio belongs to */
 	u8 seg;
+	/* the bin index this lru_gen_folio is queued on */
+	u8 bin;
 	/* per-node lru_gen_folio list for global reclaim */
 	struct hlist_nulls_node list;
 };
@@ -610,6 +612,8 @@ struct lru_gen_memcg {
 	unsigned long nr_memcgs[MEMCG_NR_GENS];
 	/* per-node lru_gen_folio list for global reclaim */
 	struct hlist_nulls_head	fifo[MEMCG_NR_GENS][MEMCG_NR_BINS];
+	/* cached tails to speed up enqueueing */
+	struct hlist_nulls_node *tails[MEMCG_NR_GENS][MEMCG_NR_BINS];
 	/* protects the above */
 	spinlock_t lock;
 };
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8890f4b58673..6c2665e48f19 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4299,6 +4299,66 @@ enum {
 	MEMCG_LRU_YOUNG,
 };
 
+static void memcg_lru_add_head_locked(struct pglist_data *pgdat,
+				      struct lruvec *lruvec, int gen, int bin)
+{
+	struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+	struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+	struct hlist_nulls_node *node = &lruvec->lrugen.list;
+	bool empty = !memcg_lru->tails[gen][bin];
+
+	hlist_nulls_add_head_rcu(node, head);
+	lruvec->lrugen.bin = bin;
+
+	if (empty)
+		memcg_lru->tails[gen][bin] = node;
+}
+
+static void memcg_lru_add_tail_locked(struct pglist_data *pgdat,
+				      struct lruvec *lruvec, int gen, int bin)
+{
+	struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+	struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+	struct hlist_nulls_node *node = &lruvec->lrugen.list;
+	struct hlist_nulls_node *tail = memcg_lru->tails[gen][bin];
+
+	if (tail) {
+		WRITE_ONCE(node->next, tail->next);
+		WRITE_ONCE(node->pprev, &tail->next);
+		rcu_assign_pointer(hlist_nulls_next_rcu(tail), node);
+	} else {
+		hlist_nulls_add_head_rcu(node, head);
+	}
+
+	memcg_lru->tails[gen][bin] = node;
+	lruvec->lrugen.bin = bin;
+}
+
+static void memcg_lru_del_locked(struct pglist_data *pgdat, struct lruvec *lruvec,
+				 bool reinit)
+{
+	int gen = lruvec->lrugen.gen;
+	int bin = lruvec->lrugen.bin;
+	struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+	struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+	struct hlist_nulls_node *node = &lruvec->lrugen.list;
+	struct hlist_nulls_node *prev = NULL;
+
+	if (hlist_nulls_unhashed(node))
+		return;
+
+	if (memcg_lru->tails[gen][bin] == node) {
+		if (node->pprev != &head->first)
+			prev = container_of(node->pprev, struct hlist_nulls_node, next);
+		memcg_lru->tails[gen][bin] = prev;
+	}
+
+	if (reinit)
+		hlist_nulls_del_init_rcu(node);
+	else
+		hlist_nulls_del_rcu(node);
+}
+
 static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
 {
 	int seg;
@@ -4326,15 +4386,15 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
 	else
 		VM_WARN_ON_ONCE(true);
 
+	memcg_lru_del_locked(pgdat, lruvec, false);
+
 	WRITE_ONCE(lruvec->lrugen.seg, seg);
 	WRITE_ONCE(lruvec->lrugen.gen, new);
 
-	hlist_nulls_del_rcu(&lruvec->lrugen.list);
-
 	if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
-		hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+		memcg_lru_add_head_locked(pgdat, lruvec, new, bin);
 	else
-		hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+		memcg_lru_add_tail_locked(pgdat, lruvec, new, bin);
 
 	pgdat->memcg_lru.nr_memcgs[old]--;
 	pgdat->memcg_lru.nr_memcgs[new]++;
@@ -4365,7 +4425,7 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg)
 
 		lruvec->lrugen.gen = gen;
 
-		hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
+		memcg_lru_add_tail_locked(pgdat, lruvec, gen, bin);
 		pgdat->memcg_lru.nr_memcgs[gen]++;
 
 		spin_unlock_irq(&pgdat->memcg_lru.lock);
@@ -4399,7 +4459,7 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
 
 		gen = lruvec->lrugen.gen;
 
-		hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
+		memcg_lru_del_locked(pgdat, lruvec, true);
 		pgdat->memcg_lru.nr_memcgs[gen]--;
 
 		if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
@@ -5664,8 +5724,10 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat)
 	spin_lock_init(&pgdat->memcg_lru.lock);
 
 	for (i = 0; i < MEMCG_NR_GENS; i++) {
-		for (j = 0; j < MEMCG_NR_BINS; j++)
+		for (j = 0; j < MEMCG_NR_BINS; j++) {
 			INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
+			pgdat->memcg_lru.tails[i][j] = NULL;
+		}
 	}
 }
 
@@ -5687,6 +5749,8 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
 
 	if (mm_state)
 		mm_state->seq = MIN_NR_GENS;
+
+	lrugen->bin = 0;
 }
 
 #ifdef CONFIG_MEMCG
-- 
2.34.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2025-12-04 13:02 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-19  8:37 [RFC -next] memcg: Optimize creation performance when LRU_GEN is enabled Chen Ridong
2025-11-24  3:52 ` Chen Ridong
2025-11-26  2:29   ` Chen Ridong
2025-11-26 17:15 ` Johannes Weiner
2025-11-27  9:04   ` Chen Ridong
2025-12-04 12:59     ` Chen Ridong
2025-12-04 13:01       ` Chen Ridong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox