From: Qi Zheng <qi.zheng@linux.dev>
To: hannes@cmpxchg.org, hughd@google.com, mhocko@suse.com,
roman.gushchin@linux.dev, shakeel.butt@linux.dev,
muchun.song@linux.dev, david@kernel.org,
lorenzo.stoakes@oracle.com, ziy@nvidia.com, harry.yoo@oracle.com,
yosry.ahmed@linux.dev, imran.f.khan@oracle.com,
kamalesh.babulal@oracle.com, axelrasmussen@google.com,
yuanchu@google.com, weixugc@google.com,
chenridong@huaweicloud.com, mkoutny@suse.com,
akpm@linux-foundation.org, hamzamahfooz@linux.microsoft.com,
apais@linux.microsoft.com, lance.yang@linux.dev, bhe@redhat.com
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
cgroups@vger.kernel.org, Qi Zheng <zhengqi.arch@bytedance.com>
Subject: [PATCH v4 25/31] mm: vmscan: prepare for reparenting traditional LRU folios
Date: Thu, 5 Feb 2026 17:01:44 +0800 [thread overview]
Message-ID: <26efe2997261631c951d326bef6a20fe0796c43c.1770279888.git.zhengqi.arch@bytedance.com> (raw)
In-Reply-To: <cover.1770279888.git.zhengqi.arch@bytedance.com>
From: Qi Zheng <zhengqi.arch@bytedance.com>
To resolve the dying memcg issue, we need to reparent LRU folios of child
memcg to its parent memcg. For traditional LRU list, each lruvec of every
memcg comprises four LRU lists. Due to the symmetry of the LRU lists, it
is feasible to transfer the LRU lists from a memcg to its parent memcg
during the reparenting process.
This commit implements the specific function, which will be used during
the reparenting process.
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Muchun Song <muchun.song@linux.dev>
---
include/linux/swap.h | 21 +++++++++++++++++++++
mm/swap.c | 37 +++++++++++++++++++++++++++++++++++++
mm/vmscan.c | 19 -------------------
3 files changed, 58 insertions(+), 19 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 39ecd25217178..62e124ec6b75a 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -570,6 +570,8 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg)
return READ_ONCE(memcg->swappiness);
}
+
+void lru_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent);
#else
static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
{
@@ -634,5 +636,24 @@ static inline bool mem_cgroup_swap_full(struct folio *folio)
}
#endif
+/* for_each_managed_zone_pgdat - helper macro to iterate over all managed zones in a pgdat up to
+ * and including the specified highidx
+ * @zone: The current zone in the iterator
+ * @pgdat: The pgdat which node_zones are being iterated
+ * @idx: The index variable
+ * @highidx: The index of the highest zone to return
+ *
+ * This macro iterates through all managed zones up to and including the specified highidx.
+ * The zone iterator enters an invalid state after macro call and must be reinitialized
+ * before it can be used again.
+ */
+#define for_each_managed_zone_pgdat(zone, pgdat, idx, highidx) \
+ for ((idx) = 0, (zone) = (pgdat)->node_zones; \
+ (idx) <= (highidx); \
+ (idx)++, (zone)++) \
+ if (!managed_zone(zone)) \
+ continue; \
+ else
+
#endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */
diff --git a/mm/swap.c b/mm/swap.c
index d5bfe6a76ca45..25f39d4263fb5 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1090,6 +1090,43 @@ void folio_batch_remove_exceptionals(struct folio_batch *fbatch)
fbatch->nr = j;
}
+#ifdef CONFIG_MEMCG
+static void lruvec_reparent_lru(struct lruvec *child_lruvec,
+ struct lruvec *parent_lruvec,
+ enum lru_list lru, int nid)
+{
+ int zid;
+ struct zone *zone;
+
+ if (lru != LRU_UNEVICTABLE)
+ list_splice_tail_init(&child_lruvec->lists[lru], &parent_lruvec->lists[lru]);
+
+ for_each_managed_zone_pgdat(zone, NODE_DATA(nid), zid, MAX_NR_ZONES - 1) {
+ unsigned long size = mem_cgroup_get_zone_lru_size(child_lruvec, lru, zid);
+
+ mem_cgroup_update_lru_size(parent_lruvec, lru, zid, size);
+ }
+}
+
+void lru_reparent_memcg(struct mem_cgroup *memcg, struct mem_cgroup *parent)
+{
+ int nid;
+
+ for_each_node(nid) {
+ enum lru_list lru;
+ struct lruvec *child_lruvec, *parent_lruvec;
+
+ child_lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
+ parent_lruvec = mem_cgroup_lruvec(parent, NODE_DATA(nid));
+ parent_lruvec->anon_cost += child_lruvec->anon_cost;
+ parent_lruvec->file_cost += child_lruvec->file_cost;
+
+ for_each_lru(lru)
+ lruvec_reparent_lru(child_lruvec, parent_lruvec, lru, nid);
+ }
+}
+#endif
+
static const struct ctl_table swap_sysctl_table[] = {
{
.procname = "page-cluster",
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f904231e33ec0..e2d9ef9a5dedc 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -269,25 +269,6 @@ static int sc_swappiness(struct scan_control *sc, struct mem_cgroup *memcg)
}
#endif
-/* for_each_managed_zone_pgdat - helper macro to iterate over all managed zones in a pgdat up to
- * and including the specified highidx
- * @zone: The current zone in the iterator
- * @pgdat: The pgdat which node_zones are being iterated
- * @idx: The index variable
- * @highidx: The index of the highest zone to return
- *
- * This macro iterates through all managed zones up to and including the specified highidx.
- * The zone iterator enters an invalid state after macro call and must be reinitialized
- * before it can be used again.
- */
-#define for_each_managed_zone_pgdat(zone, pgdat, idx, highidx) \
- for ((idx) = 0, (zone) = (pgdat)->node_zones; \
- (idx) <= (highidx); \
- (idx)++, (zone)++) \
- if (!managed_zone(zone)) \
- continue; \
- else
-
static void set_task_reclaim_state(struct task_struct *task,
struct reclaim_state *rs)
{
--
2.20.1
next prev parent reply other threads:[~2026-02-05 9:04 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-05 8:54 [PATCH v4 00/31] Eliminate Dying Memory Cgroup Qi Zheng
2026-02-05 8:54 ` [PATCH v4 01/31] mm: memcontrol: remove dead code of checking parent memory cgroup Qi Zheng
2026-02-05 8:54 ` [PATCH v4 02/31] mm: workingset: use folio_lruvec() in workingset_refault() Qi Zheng
2026-02-05 8:54 ` [PATCH v4 03/31] mm: rename unlock_page_lruvec_irq and its variants Qi Zheng
2026-02-05 8:54 ` [PATCH v4 04/31] mm: vmscan: prepare for the refactoring the move_folios_to_lru() Qi Zheng
2026-02-05 8:54 ` [PATCH v4 05/31] mm: vmscan: refactor move_folios_to_lru() Qi Zheng
2026-02-05 8:54 ` [PATCH v4 06/31] mm: memcontrol: allocate object cgroup for non-kmem case Qi Zheng
2026-02-05 8:54 ` [PATCH v4 07/31] mm: memcontrol: return root object cgroup for root memory cgroup Qi Zheng
2026-02-05 9:01 ` [PATCH v4 08/31] mm: memcontrol: prevent memory cgroup release in get_mem_cgroup_from_folio() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 09/31] buffer: prevent memory cgroup release in folio_alloc_buffers() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 10/31] writeback: prevent memory cgroup release in writeback module Qi Zheng
2026-02-05 9:01 ` [PATCH v4 11/31] mm: memcontrol: prevent memory cgroup release in count_memcg_folio_events() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 12/31] mm: page_io: prevent memory cgroup release in page_io module Qi Zheng
2026-02-05 9:01 ` [PATCH v4 13/31] mm: migrate: prevent memory cgroup release in folio_migrate_mapping() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 14/31] mm: mglru: prevent memory cgroup release in mglru Qi Zheng
2026-02-05 9:01 ` [PATCH v4 15/31] mm: memcontrol: prevent memory cgroup release in mem_cgroup_swap_full() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 16/31] mm: workingset: prevent memory cgroup release in lru_gen_eviction() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 17/31] mm: thp: prevent memory cgroup release in folio_split_queue_lock{_irqsave}() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 18/31] mm: zswap: prevent memory cgroup release in zswap_compress() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 19/31] mm: workingset: prevent lruvec release in workingset_refault() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 20/31] mm: zswap: prevent lruvec release in zswap_folio_swapin() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 21/31] mm: swap: prevent lruvec release in lru_gen_clear_refs() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 22/31] mm: workingset: prevent lruvec release in workingset_activation() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 23/31] mm: do not open-code lruvec lock Qi Zheng
2026-02-05 9:01 ` [PATCH v4 24/31] mm: memcontrol: prepare for reparenting LRU pages for " Qi Zheng
2026-02-05 15:02 ` kernel test robot
2026-02-05 15:02 ` kernel test robot
2026-02-06 6:13 ` Qi Zheng
2026-02-06 23:34 ` Shakeel Butt
2026-02-05 9:01 ` Qi Zheng [this message]
2026-02-07 1:28 ` [PATCH v4 25/31] mm: vmscan: prepare for reparenting traditional LRU folios Shakeel Butt
2026-02-05 9:01 ` [PATCH v4 26/31] mm: vmscan: prepare for reparenting MGLRU folios Qi Zheng
2026-02-12 8:46 ` Harry Yoo
2026-02-15 7:28 ` Qi Zheng
2026-02-05 9:01 ` [PATCH v4 27/31] mm: memcontrol: refactor memcg_reparent_objcgs() Qi Zheng
2026-02-05 9:01 ` [PATCH v4 28/31] mm: workingset: use lruvec_lru_size() to get the number of lru pages Qi Zheng
2026-02-07 1:48 ` Shakeel Butt
2026-02-07 3:59 ` Muchun Song
2026-02-05 9:01 ` [PATCH v4 29/31] mm: memcontrol: prepare for reparenting non-hierarchical stats Qi Zheng
2026-02-07 2:19 ` Shakeel Butt
2026-02-10 6:47 ` Qi Zheng
2026-02-11 0:38 ` Shakeel Butt
2026-02-05 9:01 ` [PATCH v4 30/31] mm: memcontrol: eliminate the problem of dying memory cgroup for LRU folios Qi Zheng
2026-02-07 19:59 ` Usama Arif
2026-02-07 22:25 ` Shakeel Butt
2026-02-09 3:49 ` Qi Zheng
2026-02-09 17:53 ` Shakeel Butt
2026-02-10 3:11 ` Qi Zheng
2026-02-05 9:01 ` [PATCH v4 31/31] mm: lru: add VM_WARN_ON_ONCE_FOLIO to lru maintenance helpers Qi Zheng
2026-02-07 22:26 ` Shakeel Butt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=26efe2997261631c951d326bef6a20fe0796c43c.1770279888.git.zhengqi.arch@bytedance.com \
--to=qi.zheng@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=apais@linux.microsoft.com \
--cc=axelrasmussen@google.com \
--cc=bhe@redhat.com \
--cc=cgroups@vger.kernel.org \
--cc=chenridong@huaweicloud.com \
--cc=david@kernel.org \
--cc=hamzamahfooz@linux.microsoft.com \
--cc=hannes@cmpxchg.org \
--cc=harry.yoo@oracle.com \
--cc=hughd@google.com \
--cc=imran.f.khan@oracle.com \
--cc=kamalesh.babulal@oracle.com \
--cc=lance.yang@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mhocko@suse.com \
--cc=mkoutny@suse.com \
--cc=muchun.song@linux.dev \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=weixugc@google.com \
--cc=yosry.ahmed@linux.dev \
--cc=yuanchu@google.com \
--cc=zhengqi.arch@bytedance.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox