From: Muchun Song <songmuchun@bytedance.com>
To: hannes@cmpxchg.org, mhocko@kernel.org, roman.gushchin@linux.dev,
shakeel.butt@linux.dev, muchun.song@linux.dev,
akpm@linux-foundation.org, david@fromorbit.com,
zhengqi.arch@bytedance.com, yosry.ahmed@linux.dev,
nphamcs@gmail.com, chengming.zhou@linux.dev
Cc: linux-kernel@vger.kernel.org, cgroups@vger.kernel.org,
linux-mm@kvack.org, hamzamahfooz@linux.microsoft.com,
apais@linux.microsoft.com, Muchun Song <songmuchun@bytedance.com>
Subject: [PATCH RFC 09/28] mm: memcontrol: allocate object cgroup for non-kmem case
Date: Tue, 15 Apr 2025 10:45:13 +0800 [thread overview]
Message-ID: <20250415024532.26632-10-songmuchun@bytedance.com> (raw)
In-Reply-To: <20250415024532.26632-1-songmuchun@bytedance.com>
Pagecache pages are charged at allocation time and hold a reference
to the original memory cgroup until reclaimed. Depending on memory
pressure, page sharing patterns between different cgroups and cgroup
creation/destruction rates, many dying memory cgroups can be pinned
by pagecache pages, reducing page reclaim efficiency and wasting
memory. Converting LRU folios and most other raw memory cgroup pins
to the object cgroup direction can fix this long-living problem.
As a result, the objcg infrastructure is no longer solely applicable
to the kmem case. In this patch, we extend the scope of the objcg
infrastructure beyond the kmem case, enabling LRU folios to reuse
it for folio charging purposes.
It should be noted that LRU folios are not accounted for at the root
level, yet the folio->memcg_data points to the root_mem_cgroup. Hence,
the folio->memcg_data of LRU folios always points to a valid pointer.
However, the root_mem_cgroup does not possess an object cgroup.
Therefore, we also allocate an object cgroup for the root_mem_cgroup.
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
---
mm/memcontrol.c | 50 +++++++++++++++++++++++--------------------------
1 file changed, 23 insertions(+), 27 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0fc76d50bc23..a6362d11b46c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -193,10 +193,10 @@ static struct obj_cgroup *obj_cgroup_alloc(void)
return objcg;
}
-static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
- struct mem_cgroup *parent)
+static void memcg_reparent_objcgs(struct mem_cgroup *memcg)
{
struct obj_cgroup *objcg, *iter;
+ struct mem_cgroup *parent = parent_mem_cgroup(memcg);
objcg = rcu_replace_pointer(memcg->objcg, NULL, true);
@@ -3156,30 +3156,17 @@ unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
return val;
}
-static int memcg_online_kmem(struct mem_cgroup *memcg)
+static void memcg_online_kmem(struct mem_cgroup *memcg)
{
- struct obj_cgroup *objcg;
-
if (mem_cgroup_kmem_disabled())
- return 0;
+ return;
if (unlikely(mem_cgroup_is_root(memcg)))
- return 0;
-
- objcg = obj_cgroup_alloc();
- if (!objcg)
- return -ENOMEM;
-
- objcg->memcg = memcg;
- rcu_assign_pointer(memcg->objcg, objcg);
- obj_cgroup_get(objcg);
- memcg->orig_objcg = objcg;
+ return;
static_branch_enable(&memcg_kmem_online_key);
memcg->kmemcg_id = memcg->id.id;
-
- return 0;
}
static void memcg_offline_kmem(struct mem_cgroup *memcg)
@@ -3194,12 +3181,6 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
parent = parent_mem_cgroup(memcg);
memcg_reparent_list_lrus(memcg, parent);
-
- /*
- * Objcg's reparenting must be after list_lru's, make sure list_lru
- * helpers won't use parent's list_lru until child is drained.
- */
- memcg_reparent_objcgs(memcg, parent);
}
#ifdef CONFIG_CGROUP_WRITEBACK
@@ -3711,9 +3692,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ struct obj_cgroup *objcg;
- if (memcg_online_kmem(memcg))
- goto remove_id;
+ memcg_online_kmem(memcg);
/*
* A memcg must be visible for expand_shrinker_info()
@@ -3723,6 +3704,15 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
if (alloc_shrinker_info(memcg))
goto offline_kmem;
+ objcg = obj_cgroup_alloc();
+ if (!objcg)
+ goto free_shrinker;
+
+ objcg->memcg = memcg;
+ rcu_assign_pointer(memcg->objcg, objcg);
+ obj_cgroup_get(objcg);
+ memcg->orig_objcg = objcg;
+
if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled())
queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
FLUSH_TIME);
@@ -3745,9 +3735,10 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
xa_store(&mem_cgroup_ids, memcg->id.id, memcg, GFP_KERNEL);
return 0;
+free_shrinker:
+ free_shrinker_info(memcg);
offline_kmem:
memcg_offline_kmem(memcg);
-remove_id:
mem_cgroup_id_remove(memcg);
return -ENOMEM;
}
@@ -3764,6 +3755,11 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
zswap_memcg_offline_cleanup(memcg);
memcg_offline_kmem(memcg);
+ /*
+ * Objcg's reparenting must be after list_lru's above, make sure list_lru
+ * helpers won't use parent's list_lru until child is drained.
+ */
+ memcg_reparent_objcgs(memcg);
reparent_shrinker_deferred(memcg);
wb_memcg_offline(memcg);
lru_gen_offline_memcg(memcg);
--
2.20.1
next prev parent reply other threads:[~2025-04-15 2:46 UTC|newest]
Thread overview: 69+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-15 2:45 [PATCH RFC 00/28] Eliminate Dying Memory Cgroup Muchun Song
2025-04-15 2:45 ` [PATCH RFC 01/28] mm: memcontrol: remove dead code of checking parent memory cgroup Muchun Song
2025-04-17 14:35 ` Johannes Weiner
2025-04-15 2:45 ` [PATCH RFC 02/28] mm: memcontrol: use folio_memcg_charged() to avoid potential rcu lock holding Muchun Song
2025-04-17 14:48 ` Johannes Weiner
2025-04-18 2:38 ` Muchun Song
2025-04-15 2:45 ` [PATCH RFC 03/28] mm: workingset: use folio_lruvec() in workingset_refault() Muchun Song
2025-04-17 14:52 ` Johannes Weiner
2025-04-15 2:45 ` [PATCH RFC 04/28] mm: rename unlock_page_lruvec_irq and its variants Muchun Song
2025-04-17 14:53 ` Johannes Weiner
2025-04-15 2:45 ` [PATCH RFC 05/28] mm: thp: replace folio_memcg() with folio_memcg_charged() Muchun Song
2025-04-17 14:54 ` Johannes Weiner
2025-04-15 2:45 ` [PATCH RFC 06/28] mm: thp: introduce folio_split_queue_lock and its variants Muchun Song
2025-04-17 14:58 ` Johannes Weiner
2025-04-18 19:50 ` Johannes Weiner
2025-04-19 14:20 ` Muchun Song
2025-04-15 2:45 ` [PATCH RFC 07/28] mm: thp: use folio_batch to handle THP splitting in deferred_split_scan() Muchun Song
2025-04-30 14:37 ` Johannes Weiner
2025-05-06 6:44 ` Hugh Dickins
2025-05-06 21:44 ` Hugh Dickins
2025-05-07 3:30 ` Muchun Song
2025-04-15 2:45 ` [PATCH RFC 08/28] mm: vmscan: refactor move_folios_to_lru() Muchun Song
2025-04-30 14:49 ` Johannes Weiner
2025-04-15 2:45 ` Muchun Song [this message]
2025-04-15 2:45 ` [PATCH RFC 10/28] mm: memcontrol: return root object cgroup for root memory cgroup Muchun Song
2025-06-28 3:09 ` Chen Ridong
2025-06-30 7:16 ` Muchun Song
2025-04-15 2:45 ` [PATCH RFC 11/28] mm: memcontrol: prevent memory cgroup release in get_mem_cgroup_from_folio() Muchun Song
2025-04-15 2:45 ` [PATCH RFC 12/28] buffer: prevent memory cgroup release in folio_alloc_buffers() Muchun Song
2025-04-15 2:45 ` [PATCH RFC 13/28] writeback: prevent memory cgroup release in writeback module Muchun Song
2025-04-15 2:45 ` [PATCH RFC 14/28] mm: memcontrol: prevent memory cgroup release in count_memcg_folio_events() Muchun Song
2025-04-15 2:45 ` [PATCH RFC 15/28] mm: page_io: prevent memory cgroup release in page_io module Muchun Song
2025-04-15 2:45 ` [PATCH RFC 16/28] mm: migrate: prevent memory cgroup release in folio_migrate_mapping() Muchun Song
2025-04-15 2:45 ` [PATCH RFC 17/28] mm: mglru: prevent memory cgroup release in mglru Muchun Song
2025-04-15 2:45 ` [PATCH RFC 18/28] mm: memcontrol: prevent memory cgroup release in mem_cgroup_swap_full() Muchun Song
2025-04-15 2:45 ` [PATCH RFC 19/28] mm: workingset: prevent memory cgroup release in lru_gen_eviction() Muchun Song
2025-04-15 2:45 ` [PATCH RFC 20/28] mm: workingset: prevent lruvec release in workingset_refault() Muchun Song
2025-04-15 2:45 ` [PATCH RFC 21/28] mm: zswap: prevent lruvec release in zswap_folio_swapin() Muchun Song
2025-04-17 17:39 ` Nhat Pham
2025-04-18 2:36 ` Chengming Zhou
2025-04-15 2:45 ` [PATCH RFC 22/28] mm: swap: prevent lruvec release in swap module Muchun Song
2025-04-15 2:45 ` [PATCH RFC 23/28] mm: workingset: prevent lruvec release in workingset_activation() Muchun Song
2025-04-15 2:45 ` [PATCH RFC 24/28] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock Muchun Song
2025-04-15 2:45 ` [PATCH RFC 25/28] mm: thp: prepare for reparenting LRU pages for split queue lock Muchun Song
2025-04-15 2:45 ` [PATCH RFC 26/28] mm: memcontrol: introduce memcg_reparent_ops Muchun Song
2025-06-30 12:47 ` Harry Yoo
2025-07-01 22:12 ` Harry Yoo
2025-07-07 9:29 ` [External] " Muchun Song
2025-07-09 0:14 ` Harry Yoo
2025-04-15 2:45 ` [PATCH RFC 27/28] mm: memcontrol: eliminate the problem of dying memory cgroup for LRU folios Muchun Song
2025-05-20 11:27 ` Harry Yoo
2025-05-22 2:31 ` Muchun Song
2025-05-23 1:24 ` Harry Yoo
2025-04-15 2:45 ` [PATCH RFC 28/28] mm: lru: add VM_WARN_ON_ONCE_FOLIO to lru maintenance helpers Muchun Song
2025-04-15 2:53 ` [PATCH RFC 00/28] Eliminate Dying Memory Cgroup Muchun Song
2025-04-15 6:19 ` Kairui Song
2025-04-15 8:01 ` Muchun Song
2025-04-17 18:22 ` Kairui Song
2025-04-17 19:04 ` Johannes Weiner
2025-06-27 8:50 ` Chen Ridong
2025-04-17 21:45 ` Roman Gushchin
2025-04-28 3:43 ` Kairui Song
2025-06-27 9:02 ` Chen Ridong
2025-06-27 18:54 ` Kairui Song
2025-06-27 19:14 ` Shakeel Butt
2025-06-28 9:21 ` Chen Ridong
2025-04-22 14:20 ` Yosry Ahmed
2025-05-23 1:23 ` Harry Yoo
2025-05-23 2:39 ` Muchun Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250415024532.26632-10-songmuchun@bytedance.com \
--to=songmuchun@bytedance.com \
--cc=akpm@linux-foundation.org \
--cc=apais@linux.microsoft.com \
--cc=cgroups@vger.kernel.org \
--cc=chengming.zhou@linux.dev \
--cc=david@fromorbit.com \
--cc=hamzamahfooz@linux.microsoft.com \
--cc=hannes@cmpxchg.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=nphamcs@gmail.com \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=yosry.ahmed@linux.dev \
--cc=zhengqi.arch@bytedance.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox