From: Chen Ridong <chenridong@huaweicloud.com>
To: akpm@linux-foundation.org, axelrasmussen@google.com,
yuanchu@google.com, weixugc@google.com, david@kernel.org,
lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com,
vbabka@suse.cz, rppt@kernel.org, surenb@google.com,
mhocko@suse.com, corbet@lwn.net, skhan@linuxfoundation.org,
hannes@cmpxchg.org, roman.gushchin@linux.dev,
shakeel.butt@linux.dev, muchun.song@linux.dev,
zhengqi.arch@bytedance.com
Cc: linux-mm@kvack.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
lujialin4@huawei.com, chenridong@huaweicloud.com,
ryncsn@gmail.com
Subject: [RFC PATCH -next 5/7] mm/mglru: combine shrink_many into shrink_node_memcgs
Date: Tue, 20 Jan 2026 13:42:54 +0000 [thread overview]
Message-ID: <20260120134256.2271710-6-chenridong@huaweicloud.com> (raw)
In-Reply-To: <20260120134256.2271710-1-chenridong@huaweicloud.com>
From: Chen Ridong <chenridong@huawei.com>
The memcg LRU was originally introduced to improve scalability during
global reclaim, but it only supports gen lru global reclaim and remains
complex in implementation.
Previous patches have introduced heat-level-based memcg reclaim, which is
significantly simpler. This patch switches gen lru global reclaim to the
heat-level-based reclaim mechanism.
The following results are from a 24-hour test provided by YU Zhao [1]:
Throughput (number of requests) before after Change
Total 22879701 25331956 +10%
Tail latency (number of requests) before after Change
[128s, inf) 19197 15628 -19%
[64s, 128s) 4500 3815 -29%
[32s, 64s) 14971 13755 -36%
[16s, 32s) 46117 42942 -7%
[1] https://lore.kernel.org/all/20221220214923.1229538-1-yuzhao@google.com/
Signed-off-by: Chen Ridong <chenridong@huawei.com>
---
mm/vmscan.c | 101 ++++++++++++----------------------------------------
1 file changed, 22 insertions(+), 79 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 27c6fdbc9394..f806838c3cea 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4965,76 +4965,6 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
MEMCG_LRU_TAIL : MEMCG_LRU_YOUNG;
}
-static void shrink_many(struct pglist_data *pgdat, struct scan_control *sc)
-{
- int op;
- int gen;
- int bin;
- int first_bin;
- struct lruvec *lruvec;
- struct lru_gen_folio *lrugen;
- struct mem_cgroup *memcg;
- struct hlist_nulls_node *pos;
-
- gen = get_memcg_gen(READ_ONCE(pgdat->memcg_lru.seq));
- bin = first_bin = get_random_u32_below(MEMCG_NR_BINS);
-restart:
- op = 0;
- memcg = NULL;
-
- rcu_read_lock();
-
- hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
- if (op) {
- lru_gen_rotate_memcg(lruvec, op);
- op = 0;
- }
-
- mem_cgroup_put(memcg);
- memcg = NULL;
-
- if (gen != READ_ONCE(lrugen->gen))
- continue;
-
- lruvec = container_of(lrugen, struct lruvec, lrugen);
- memcg = lruvec_memcg(lruvec);
-
- if (!mem_cgroup_tryget(memcg)) {
- lru_gen_release_memcg(memcg);
- memcg = NULL;
- continue;
- }
-
- rcu_read_unlock();
-
- op = shrink_one(lruvec, sc);
-
- rcu_read_lock();
-
- if (lru_gen_should_abort_scan(lruvec, sc))
- break;
- }
-
- rcu_read_unlock();
-
- if (op)
- lru_gen_rotate_memcg(lruvec, op);
-
- mem_cgroup_put(memcg);
-
- if (!is_a_nulls(pos))
- return;
-
- /* restart if raced with lru_gen_rotate_memcg() */
- if (gen != get_nulls_value(pos))
- goto restart;
-
- /* try the rest of the bins of the current generation */
- bin = get_memcg_bin(bin + 1);
- if (bin != first_bin)
- goto restart;
-}
-
static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
{
struct blk_plug plug;
@@ -5064,6 +4994,7 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
blk_finish_plug(&plug);
}
+static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc);
static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
{
struct blk_plug plug;
@@ -5093,7 +5024,7 @@ static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *
if (mem_cgroup_disabled())
shrink_one(&pgdat->__lruvec, sc);
else
- shrink_many(pgdat, sc);
+ shrink_node_memcgs(pgdat, sc);
if (current_is_kswapd())
sc->nr_reclaimed += reclaimed;
@@ -5800,6 +5731,11 @@ static bool lru_gen_should_abort_scan(struct lruvec *lruvec, struct scan_control
{
return false;
}
+
+static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
+{
+ BUILD_BUG();
+}
#endif /* CONFIG_LRU_GEN */
static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
@@ -5813,11 +5749,6 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
bool proportional_reclaim;
struct blk_plug plug;
- if (lru_gen_enabled() && !root_reclaim(sc)) {
- lru_gen_shrink_lruvec(lruvec, sc);
- return;
- }
-
get_scan_count(lruvec, sc, nr);
/* Record the original scan target for proportional adjustments later */
@@ -6127,7 +6058,8 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
* For kswapd, reliable forward progress is more important
* than a quick return to idle. Always do full walks.
*/
- if (current_is_kswapd() || sc->memcg_full_walk)
+ if ((current_is_kswapd() && lru_gen_enabled())
+ || sc->memcg_full_walk)
partial = NULL;
for (level = MEMCG_LEVEL_COLD; level < max_level; level++) {
@@ -6178,7 +6110,13 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
reclaimed = sc->nr_reclaimed;
scanned = sc->nr_scanned;
- shrink_lruvec(lruvec, sc);
+ if (lru_gen_enabled()) {
+ if (!lruvec_is_sizable(lruvec, sc))
+ continue;
+ lru_gen_shrink_lruvec(lruvec, sc);
+ } else
+ shrink_lruvec(lruvec, sc);
+
if (!memcg || memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B))
shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
sc->priority);
@@ -6196,7 +6134,12 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
flush_reclaim_state(sc);
/* If partial walks are allowed, bail once goal is reached */
- if (partial && sc->nr_reclaimed >= sc->nr_to_reclaim) {
+ if (lru_gen_enabled() && root_reclaim(sc)) {
+ if (lru_gen_should_abort_scan(lruvec, sc)) {
+ mem_cgroup_iter_break(target_memcg, memcg);
+ break;
+ }
+ } else if (partial && sc->nr_reclaimed >= sc->nr_to_reclaim) {
mem_cgroup_iter_break(target_memcg, memcg);
break;
}
--
2.34.1
next prev parent reply other threads:[~2026-01-20 13:58 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-20 13:42 [RFC PATCH -next 0/7] Introduce heat-level memcg reclaim Chen Ridong
2026-01-20 13:42 ` [RFC PATCH -next 1/7] vmscan: add memcg heat level for reclaim Chen Ridong
2026-01-21 7:53 ` Chen Ridong
2026-01-21 14:58 ` Kairui Song
2026-01-22 2:32 ` Chen Ridong
2026-02-06 22:47 ` Yuanchu Xie
2026-02-09 8:17 ` Chen Ridong
2026-01-20 13:42 ` [RFC PATCH -next 2/7] mm/mglru: make calls to flush_reclaim_state() similar for MGLRU and non-MGLRU Chen Ridong
2026-01-20 13:42 ` [RFC PATCH -next 3/7] mm/mglru: rename should_abort_scan to lru_gen_should_abort_scan Chen Ridong
2026-01-20 13:42 ` [RFC PATCH -next 4/7] mm/mglru: extend lru_gen_shrink_lruvec to support root reclaim Chen Ridong
2026-01-20 13:42 ` Chen Ridong [this message]
2026-01-21 8:13 ` [RFC PATCH -next 5/7] mm/mglru: combine shrink_many into shrink_node_memcgs Chen Ridong
2026-01-20 13:42 ` [RFC PATCH -next 6/7] mm/mglru: remove memcg disable handling from lru_gen_shrink_node Chen Ridong
2026-01-20 13:42 ` [RFC PATCH -next 7/7] mm/mglru: remove memcg lru Chen Ridong
2026-01-29 11:25 ` [RFC PATCH -next 0/7] Introduce heat-level memcg reclaim Chen Ridong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260120134256.2271710-6-chenridong@huaweicloud.com \
--to=chenridong@huaweicloud.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=axelrasmussen@google.com \
--cc=cgroups@vger.kernel.org \
--cc=corbet@lwn.net \
--cc=david@kernel.org \
--cc=hannes@cmpxchg.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=lujialin4@huawei.com \
--cc=mhocko@suse.com \
--cc=muchun.song@linux.dev \
--cc=roman.gushchin@linux.dev \
--cc=rppt@kernel.org \
--cc=ryncsn@gmail.com \
--cc=shakeel.butt@linux.dev \
--cc=skhan@linuxfoundation.org \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
--cc=weixugc@google.com \
--cc=yuanchu@google.com \
--cc=zhengqi.arch@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox