From: Chen Ridong <chenridong@huaweicloud.com>
To: akpm@linux-foundation.org, david@kernel.org,
lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com,
vbabka@suse.cz, rppt@kernel.org, surenb@google.com,
mhocko@suse.com, axelrasmussen@google.com, yuanchu@google.com,
weixugc@google.com, hannes@cmpxchg.org,
zhengqi.arch@bytedance.com, shakeel.butt@linux.dev
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
lujialin4@huawei.com, chenridong@huawei.com
Subject: [RFC -next] memcg: Optimize creation performance when LRU_GEN is enabled
Date: Wed, 19 Nov 2025 08:37:22 +0000 [thread overview]
Message-ID: <20251119083722.1365680-1-chenridong@huaweicloud.com> (raw)
From: Chen Ridong <chenridong@huawei.com>
With LRU_GEN=y and LRU_GEN_ENABLED=n, a performance regression occurs
when creating a large number of memory cgroups (memcgs):
# time mkdir testcg_{1..10000}
real 0m7.167s
user 0m0.037s
sys 0m6.773s
# time mkdir testcg_{1..20000}
real 0m27.158s
user 0m0.079s
sys 0m26.270s
In contrast, with LRU_GEN=n, creation of the same number of memcgs
performs better:
# time mkdir testcg_{1..10000}
real 0m3.386s
user 0m0.044s
sys 0m3.009s
# time mkdir testcg_{1..20000}
real 0m6.876s
user 0m0.075s
sys 0m6.121s
The root cause is that lru_gen node onlining uses hlist_nulls_add_tail_rcu,
which traverses the entire list to find the tail. This traversal scales
with the number of memcgs, even when LRU_GEN is runtime-disabled.
Fix this by adding a per-lru_gen tail pointer to track the list's tail.
Appending new nodes now uses the tail pointer directly, eliminating full
list traversal.
After applying this patch, memcg creation performance with LRU_GEN=y
matches the fully disabled baseline:
#time mkdir testcg_{1..10000}
real 0m3.368s
user 0m0.025s
sys 0m3.012s
# time mkdir testcg_{1..20000}
real 0m6.742s
user 0m0.085s
sys 0m5.995s
Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
include/linux/mmzone.h | 4 +++
mm/vmscan.c | 78 ++++++++++++++++++++++++++++++++++++++----
2 files changed, 75 insertions(+), 7 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4398e027f450..bdee57b35126 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -513,6 +513,8 @@ struct lru_gen_folio {
u8 gen;
/* the list segment this lru_gen_folio belongs to */
u8 seg;
+ /* the bin index this lru_gen_folio is queued on */
+ u8 bin;
/* per-node lru_gen_folio list for global reclaim */
struct hlist_nulls_node list;
};
@@ -610,6 +612,8 @@ struct lru_gen_memcg {
unsigned long nr_memcgs[MEMCG_NR_GENS];
/* per-node lru_gen_folio list for global reclaim */
struct hlist_nulls_head fifo[MEMCG_NR_GENS][MEMCG_NR_BINS];
+ /* cached tails to speed up enqueueing */
+ struct hlist_nulls_node *tails[MEMCG_NR_GENS][MEMCG_NR_BINS];
/* protects the above */
spinlock_t lock;
};
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8890f4b58673..6c2665e48f19 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4299,6 +4299,66 @@ enum {
MEMCG_LRU_YOUNG,
};
+static void memcg_lru_add_head_locked(struct pglist_data *pgdat,
+ struct lruvec *lruvec, int gen, int bin)
+{
+ struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+ struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+ struct hlist_nulls_node *node = &lruvec->lrugen.list;
+ bool empty = !memcg_lru->tails[gen][bin];
+
+ hlist_nulls_add_head_rcu(node, head);
+ lruvec->lrugen.bin = bin;
+
+ if (empty)
+ memcg_lru->tails[gen][bin] = node;
+}
+
+static void memcg_lru_add_tail_locked(struct pglist_data *pgdat,
+ struct lruvec *lruvec, int gen, int bin)
+{
+ struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+ struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+ struct hlist_nulls_node *node = &lruvec->lrugen.list;
+ struct hlist_nulls_node *tail = memcg_lru->tails[gen][bin];
+
+ if (tail) {
+ WRITE_ONCE(node->next, tail->next);
+ WRITE_ONCE(node->pprev, &tail->next);
+ rcu_assign_pointer(hlist_nulls_next_rcu(tail), node);
+ } else {
+ hlist_nulls_add_head_rcu(node, head);
+ }
+
+ memcg_lru->tails[gen][bin] = node;
+ lruvec->lrugen.bin = bin;
+}
+
+static void memcg_lru_del_locked(struct pglist_data *pgdat, struct lruvec *lruvec,
+ bool reinit)
+{
+ int gen = lruvec->lrugen.gen;
+ int bin = lruvec->lrugen.bin;
+ struct lru_gen_memcg *memcg_lru = &pgdat->memcg_lru;
+ struct hlist_nulls_head *head = &memcg_lru->fifo[gen][bin];
+ struct hlist_nulls_node *node = &lruvec->lrugen.list;
+ struct hlist_nulls_node *prev = NULL;
+
+ if (hlist_nulls_unhashed(node))
+ return;
+
+ if (memcg_lru->tails[gen][bin] == node) {
+ if (node->pprev != &head->first)
+ prev = container_of(node->pprev, struct hlist_nulls_node, next);
+ memcg_lru->tails[gen][bin] = prev;
+ }
+
+ if (reinit)
+ hlist_nulls_del_init_rcu(node);
+ else
+ hlist_nulls_del_rcu(node);
+}
+
static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
{
int seg;
@@ -4326,15 +4386,15 @@ static void lru_gen_rotate_memcg(struct lruvec *lruvec, int op)
else
VM_WARN_ON_ONCE(true);
+ memcg_lru_del_locked(pgdat, lruvec, false);
+
WRITE_ONCE(lruvec->lrugen.seg, seg);
WRITE_ONCE(lruvec->lrugen.gen, new);
- hlist_nulls_del_rcu(&lruvec->lrugen.list);
-
if (op == MEMCG_LRU_HEAD || op == MEMCG_LRU_OLD)
- hlist_nulls_add_head_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+ memcg_lru_add_head_locked(pgdat, lruvec, new, bin);
else
- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[new][bin]);
+ memcg_lru_add_tail_locked(pgdat, lruvec, new, bin);
pgdat->memcg_lru.nr_memcgs[old]--;
pgdat->memcg_lru.nr_memcgs[new]++;
@@ -4365,7 +4425,7 @@ void lru_gen_online_memcg(struct mem_cgroup *memcg)
lruvec->lrugen.gen = gen;
- hlist_nulls_add_tail_rcu(&lruvec->lrugen.list, &pgdat->memcg_lru.fifo[gen][bin]);
+ memcg_lru_add_tail_locked(pgdat, lruvec, gen, bin);
pgdat->memcg_lru.nr_memcgs[gen]++;
spin_unlock_irq(&pgdat->memcg_lru.lock);
@@ -4399,7 +4459,7 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
gen = lruvec->lrugen.gen;
- hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
+ memcg_lru_del_locked(pgdat, lruvec, true);
pgdat->memcg_lru.nr_memcgs[gen]--;
if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
@@ -5664,8 +5724,10 @@ void lru_gen_init_pgdat(struct pglist_data *pgdat)
spin_lock_init(&pgdat->memcg_lru.lock);
for (i = 0; i < MEMCG_NR_GENS; i++) {
- for (j = 0; j < MEMCG_NR_BINS; j++)
+ for (j = 0; j < MEMCG_NR_BINS; j++) {
INIT_HLIST_NULLS_HEAD(&pgdat->memcg_lru.fifo[i][j], i);
+ pgdat->memcg_lru.tails[i][j] = NULL;
+ }
}
}
@@ -5687,6 +5749,8 @@ void lru_gen_init_lruvec(struct lruvec *lruvec)
if (mm_state)
mm_state->seq = MIN_NR_GENS;
+
+ lrugen->bin = 0;
}
#ifdef CONFIG_MEMCG
--
2.34.1
next reply other threads:[~2025-11-19 8:52 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-19 8:37 Chen Ridong [this message]
2025-11-24 3:52 ` Chen Ridong
2025-11-26 2:29 ` Chen Ridong
2025-11-26 17:15 ` Johannes Weiner
2025-11-27 9:04 ` Chen Ridong
2025-12-04 12:59 ` Chen Ridong
2025-12-04 13:01 ` Chen Ridong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251119083722.1365680-1-chenridong@huaweicloud.com \
--to=chenridong@huaweicloud.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=axelrasmussen@google.com \
--cc=chenridong@huawei.com \
--cc=david@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=lujialin4@huawei.com \
--cc=mhocko@suse.com \
--cc=rppt@kernel.org \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=weixugc@google.com \
--cc=yuanchu@google.com \
--cc=zhengqi.arch@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox