linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Qi Zheng <qi.zheng@linux.dev>
To: hannes@cmpxchg.org, hughd@google.com, mhocko@suse.com,
	roman.gushchin@linux.dev, shakeel.butt@linux.dev,
	muchun.song@linux.dev, david@kernel.org,
	lorenzo.stoakes@oracle.com, ziy@nvidia.com, harry.yoo@oracle.com,
	imran.f.khan@oracle.com, kamalesh.babulal@oracle.com,
	axelrasmussen@google.com, yuanchu@google.com, weixugc@google.com,
	chenridong@huaweicloud.com, mkoutny@suse.com,
	akpm@linux-foundation.org, hamzamahfooz@linux.microsoft.com,
	apais@linux.microsoft.com, lance.yang@linux.dev
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	cgroups@vger.kernel.org, Muchun Song <songmuchun@bytedance.com>,
	Qi Zheng <zhengqi.arch@bytedance.com>
Subject: [PATCH v2 10/28] writeback: prevent memory cgroup release in writeback module
Date: Wed, 17 Dec 2025 15:27:34 +0800	[thread overview]
Message-ID: <d2863fabba49a16572a84163e42cbce64aee27c9.1765956025.git.zhengqi.arch@bytedance.com> (raw)
In-Reply-To: <cover.1765956025.git.zhengqi.arch@bytedance.com>

From: Muchun Song <songmuchun@bytedance.com>

In the near future, a folio will no longer pin its corresponding
memory cgroup. To ensure safety, it will only be appropriate to
hold the rcu read lock or acquire a reference to the memory cgroup
returned by folio_memcg(), thereby preventing it from being released.

In the current patch, the function get_mem_cgroup_css_from_folio()
and the rcu read lock are employed to safeguard against the release
of the memory cgroup.

This serves as a preparatory measure for the reparenting of the
LRU pages.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
---
 fs/fs-writeback.c                | 22 +++++++++++-----------
 include/linux/memcontrol.h       |  9 +++++++--
 include/trace/events/writeback.h |  3 +++
 mm/memcontrol.c                  | 14 ++++++++------
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5dd6e89a6d29e..2e57b7e2b4453 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -279,15 +279,13 @@ void __inode_attach_wb(struct inode *inode, struct folio *folio)
 	if (inode_cgwb_enabled(inode)) {
 		struct cgroup_subsys_state *memcg_css;
 
-		if (folio) {
-			memcg_css = mem_cgroup_css_from_folio(folio);
-			wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
-		} else {
-			/* must pin memcg_css, see wb_get_create() */
+		/* must pin memcg_css, see wb_get_create() */
+		if (folio)
+			memcg_css = get_mem_cgroup_css_from_folio(folio);
+		else
 			memcg_css = task_get_css(current, memory_cgrp_id);
-			wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
-			css_put(memcg_css);
-		}
+		wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
+		css_put(memcg_css);
 	}
 
 	if (!wb)
@@ -979,16 +977,16 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio
 	if (!wbc->wb || wbc->no_cgroup_owner)
 		return;
 
-	css = mem_cgroup_css_from_folio(folio);
+	css = get_mem_cgroup_css_from_folio(folio);
 	/* dead cgroups shouldn't contribute to inode ownership arbitration */
 	if (!css_is_online(css))
-		return;
+		goto out;
 
 	id = css->id;
 
 	if (id == wbc->wb_id) {
 		wbc->wb_bytes += bytes;
-		return;
+		goto out;
 	}
 
 	if (id == wbc->wb_lcand_id)
@@ -1001,6 +999,8 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio
 		wbc->wb_tcand_bytes += bytes;
 	else
 		wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
+out:
+	css_put(css);
 }
 EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 776d9be1f446a..bc526e0d37e0b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -895,7 +895,7 @@ static inline bool mm_match_cgroup(struct mm_struct *mm,
 	return match;
 }
 
-struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio);
+struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio);
 ino_t page_cgroup_ino(struct page *page);
 
 static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
@@ -1549,9 +1549,14 @@ static inline void mem_cgroup_track_foreign_dirty(struct folio *folio,
 	if (mem_cgroup_disabled())
 		return;
 
+	if (!folio_memcg_charged(folio))
+		return;
+
+	rcu_read_lock();
 	memcg = folio_memcg(folio);
-	if (unlikely(memcg && &memcg->css != wb->memcg_css))
+	if (unlikely(&memcg->css != wb->memcg_css))
 		mem_cgroup_track_foreign_dirty_slowpath(folio, wb);
+	rcu_read_unlock();
 }
 
 void mem_cgroup_flush_foreign(struct bdi_writeback *wb);
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 311a341e6fe42..f5bfe8c1a160a 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -295,7 +295,10 @@ TRACE_EVENT(track_foreign_dirty,
 		__entry->ino		= inode ? inode->i_ino : 0;
 		__entry->memcg_id	= wb->memcg_css->id;
 		__entry->cgroup_ino	= __trace_wb_assign_cgroup(wb);
+
+		rcu_read_lock();
 		__entry->page_cgroup_ino = cgroup_ino(folio_memcg(folio)->css.cgroup);
+		rcu_read_unlock();
 	),
 
 	TP_printk("bdi %s[%llu]: ino=%lu memcg_id=%u cgroup_ino=%lu page_cgroup_ino=%lu",
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 431b3154c70c5..131f940c03fa0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -241,7 +241,7 @@ DEFINE_STATIC_KEY_FALSE(memcg_bpf_enabled_key);
 EXPORT_SYMBOL(memcg_bpf_enabled_key);
 
 /**
- * mem_cgroup_css_from_folio - css of the memcg associated with a folio
+ * get_mem_cgroup_css_from_folio - acquire a css of the memcg associated with a folio
  * @folio: folio of interest
  *
  * If memcg is bound to the default hierarchy, css of the memcg associated
@@ -251,14 +251,16 @@ EXPORT_SYMBOL(memcg_bpf_enabled_key);
  * If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup
  * is returned.
  */
-struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio)
+struct cgroup_subsys_state *get_mem_cgroup_css_from_folio(struct folio *folio)
 {
-	struct mem_cgroup *memcg = folio_memcg(folio);
+	struct mem_cgroup *memcg;
 
-	if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
-		memcg = root_mem_cgroup;
+	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
+		return &root_mem_cgroup->css;
 
-	return &memcg->css;
+	memcg = get_mem_cgroup_from_folio(folio);
+
+	return memcg ? &memcg->css : &root_mem_cgroup->css;
 }
 
 /**
-- 
2.20.1



  parent reply	other threads:[~2025-12-17  7:30 UTC|newest]

Thread overview: 149+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-17  7:27 [PATCH v2 00/28] Eliminate Dying Memory Cgroup Qi Zheng
2025-12-17  7:27 ` [PATCH v2 01/28] mm: memcontrol: remove dead code of checking parent memory cgroup Qi Zheng
2025-12-18 23:31   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 02/28] mm: workingset: use folio_lruvec() in workingset_refault() Qi Zheng
2025-12-18 23:32   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 03/28] mm: rename unlock_page_lruvec_irq and its variants Qi Zheng
2025-12-18  9:00   ` David Hildenbrand (Red Hat)
2025-12-18 23:34   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 04/28] mm: vmscan: prepare for the refactoring the move_folios_to_lru() Qi Zheng
2025-12-17 21:13   ` Johannes Weiner
2025-12-18  9:04   ` David Hildenbrand (Red Hat)
2025-12-18  9:31     ` Qi Zheng
2025-12-18 23:39   ` Shakeel Butt
2025-12-25  3:45   ` Chen Ridong
2025-12-17  7:27 ` [PATCH v2 05/28] mm: vmscan: refactor move_folios_to_lru() Qi Zheng
2025-12-19  0:04   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 06/28] mm: memcontrol: allocate object cgroup for non-kmem case Qi Zheng
2025-12-17 21:22   ` Johannes Weiner
2025-12-18  6:25     ` Qi Zheng
2025-12-19  0:23   ` Shakeel Butt
2025-12-25  6:23   ` Chen Ridong
2025-12-17  7:27 ` [PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup Qi Zheng
2025-12-17 21:28   ` Johannes Weiner
2025-12-19  0:39   ` Shakeel Butt
2025-12-26  1:03   ` Chen Ridong
2025-12-26  3:10     ` Muchun Song
2025-12-26  3:50       ` Chen Ridong
2025-12-26  3:58         ` Chen Ridong
2025-12-17  7:27 ` [PATCH v2 08/28] mm: memcontrol: prevent memory cgroup release in get_mem_cgroup_from_folio() Qi Zheng
2025-12-17 21:45   ` Johannes Weiner
2025-12-18  6:31     ` Qi Zheng
2025-12-19  2:09     ` Shakeel Butt
2025-12-19  3:53       ` Johannes Weiner
2025-12-19  3:56         ` Johannes Weiner
2025-12-17  7:27 ` [PATCH v2 09/28] buffer: prevent memory cgroup release in folio_alloc_buffers() Qi Zheng
2025-12-17 21:45   ` Johannes Weiner
2025-12-19  2:14   ` Shakeel Butt
2025-12-26  2:01     ` Chen Ridong
2025-12-17  7:27 ` Qi Zheng [this message]
2025-12-17 22:08   ` [PATCH v2 10/28] writeback: prevent memory cgroup release in writeback module Johannes Weiner
2025-12-19  2:30   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 11/28] mm: memcontrol: prevent memory cgroup release in count_memcg_folio_events() Qi Zheng
2025-12-17 22:11   ` Johannes Weiner
2025-12-19 23:31   ` Shakeel Butt
2025-12-26  2:12   ` Chen Ridong
2025-12-17  7:27 ` [PATCH v2 12/28] mm: page_io: prevent memory cgroup release in page_io module Qi Zheng
2025-12-17 22:12   ` Johannes Weiner
2025-12-19 23:44   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 13/28] mm: migrate: prevent memory cgroup release in folio_migrate_mapping() Qi Zheng
2025-12-17 22:14   ` Johannes Weiner
2025-12-18  9:09   ` David Hildenbrand (Red Hat)
2025-12-18  9:36     ` Qi Zheng
2025-12-18  9:43       ` David Hildenbrand (Red Hat)
2025-12-18 11:40         ` Qi Zheng
2025-12-18 11:56           ` David Hildenbrand (Red Hat)
2025-12-18 13:00             ` Qi Zheng
2025-12-18 13:04               ` David Hildenbrand (Red Hat)
2025-12-18 13:16                 ` Qi Zheng
2025-12-19  4:12                   ` Harry Yoo
2025-12-19  6:18                     ` David Hildenbrand (Red Hat)
2025-12-18 14:26     ` Johannes Weiner
2025-12-22  3:42       ` Qi Zheng
2025-12-30 20:07       ` David Hildenbrand (Red Hat)
2025-12-19 23:51   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 14/28] mm: mglru: prevent memory cgroup release in mglru Qi Zheng
2025-12-17 22:18   ` Johannes Weiner
2025-12-18  6:50     ` Qi Zheng
2025-12-20  0:58     ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 15/28] mm: memcontrol: prevent memory cgroup release in mem_cgroup_swap_full() Qi Zheng
2025-12-17 22:21   ` Johannes Weiner
2025-12-20  1:05   ` Shakeel Butt
2025-12-22  4:02     ` Qi Zheng
2025-12-26  2:29     ` Chen Ridong
2025-12-17  7:27 ` [PATCH v2 16/28] mm: workingset: prevent memory cgroup release in lru_gen_eviction() Qi Zheng
2025-12-17 22:23   ` Johannes Weiner
2025-12-20  1:06   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 17/28] mm: thp: prevent memory cgroup release in folio_split_queue_lock{_irqsave}() Qi Zheng
2025-12-17 22:27   ` Johannes Weiner
2025-12-20  1:11     ` Shakeel Butt
2025-12-22  3:33       ` Qi Zheng
2025-12-18  9:10   ` David Hildenbrand (Red Hat)
2025-12-17  7:27 ` [PATCH v2 18/28] mm: zswap: prevent memory cgroup release in zswap_compress() Qi Zheng
2025-12-17 22:27   ` Johannes Weiner
2025-12-20  1:14   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 19/28] mm: workingset: prevent lruvec release in workingset_refault() Qi Zheng
2025-12-17 22:30   ` Johannes Weiner
2025-12-18  6:57     ` Qi Zheng
2025-12-17  7:27 ` [PATCH v2 20/28] mm: zswap: prevent lruvec release in zswap_folio_swapin() Qi Zheng
2025-12-17 22:33   ` Johannes Weiner
2025-12-18  7:09     ` Qi Zheng
2025-12-18 13:02       ` Johannes Weiner
2025-12-20  1:23   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 21/28] mm: swap: prevent lruvec release in lru_gen_clear_refs() Qi Zheng
2025-12-17 22:34   ` Johannes Weiner
2025-12-20  1:24   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 22/28] mm: workingset: prevent lruvec release in workingset_activation() Qi Zheng
2025-12-17 22:36   ` Johannes Weiner
2025-12-20  1:25   ` Shakeel Butt
2025-12-17  7:27 ` [PATCH v2 23/28] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock Qi Zheng
2025-12-18 13:00   ` Johannes Weiner
2025-12-18 13:17     ` Qi Zheng
2025-12-20  2:03   ` Shakeel Butt
2025-12-23  6:14     ` Qi Zheng
2025-12-17  7:27 ` [PATCH v2 24/28] mm: vmscan: prepare for reparenting traditional LRU folios Qi Zheng
2025-12-18 13:32   ` Johannes Weiner
2025-12-22  3:55     ` Qi Zheng
2025-12-17  7:27 ` [PATCH v2 25/28] mm: vmscan: prepare for reparenting MGLRU folios Qi Zheng
2025-12-17  7:27 ` [PATCH v2 26/28] mm: memcontrol: refactor memcg_reparent_objcgs() Qi Zheng
2025-12-18 13:45   ` Johannes Weiner
2025-12-22  3:56     ` Qi Zheng
2025-12-17  7:27 ` [PATCH v2 27/28] mm: memcontrol: eliminate the problem of dying memory cgroup for LRU folios Qi Zheng
2025-12-18 14:06   ` Johannes Weiner
2025-12-22  3:59     ` Qi Zheng
2025-12-17  7:27 ` [PATCH v2 28/28] mm: lru: add VM_WARN_ON_ONCE_FOLIO to lru maintenance helpers Qi Zheng
2025-12-18 14:07   ` Johannes Weiner
2025-12-23 20:04 ` [PATCH v2 00/28] Eliminate Dying Memory Cgroup Yosry Ahmed
2025-12-23 23:20   ` Shakeel Butt
2025-12-24  0:07     ` Yosry Ahmed
2025-12-24  0:36       ` Shakeel Butt
2025-12-24  0:43         ` Yosry Ahmed
2025-12-24  0:58           ` Shakeel Butt
2025-12-29  9:42             ` Qi Zheng
2025-12-29 10:52               ` Michal Koutný
2025-12-29  7:48     ` Qi Zheng
2025-12-29  9:35       ` Harry Yoo
2025-12-29  9:46         ` Qi Zheng
2025-12-29 10:53         ` Michal Koutný
2025-12-24  8:43   ` Harry Yoo
2025-12-24 14:51     ` Yosry Ahmed
2025-12-26 11:24       ` Harry Yoo
2025-12-30  1:36 ` Roman Gushchin
2025-12-30  2:44   ` Qi Zheng
2025-12-30  4:20     ` Roman Gushchin
2025-12-30  4:25       ` Qi Zheng
2025-12-30  4:48         ` Shakeel Butt
2025-12-30 16:46           ` Zi Yan
2025-12-30 18:13             ` Shakeel Butt
2025-12-30 19:18               ` Chris Mason
2025-12-30 20:51                 ` Matthew Wilcox
2025-12-30 21:10                   ` Chris Mason
2025-12-30 22:30                     ` Roman Gushchin
2025-12-30 22:03                   ` Roman Gushchin
2025-12-30 21:07                 ` Zi Yan
2025-12-30 19:34             ` Roman Gushchin
2025-12-30 21:13               ` Zi Yan
2025-12-30  4:01   ` Shakeel Butt
2025-12-30  4:11     ` Roman Gushchin
2025-12-30 18:36       ` Shakeel Butt
2025-12-30 20:47         ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d2863fabba49a16572a84163e42cbce64aee27c9.1765956025.git.zhengqi.arch@bytedance.com \
    --to=qi.zheng@linux.dev \
    --cc=akpm@linux-foundation.org \
    --cc=apais@linux.microsoft.com \
    --cc=axelrasmussen@google.com \
    --cc=cgroups@vger.kernel.org \
    --cc=chenridong@huaweicloud.com \
    --cc=david@kernel.org \
    --cc=hamzamahfooz@linux.microsoft.com \
    --cc=hannes@cmpxchg.org \
    --cc=harry.yoo@oracle.com \
    --cc=hughd@google.com \
    --cc=imran.f.khan@oracle.com \
    --cc=kamalesh.babulal@oracle.com \
    --cc=lance.yang@linux.dev \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=mkoutny@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=songmuchun@bytedance.com \
    --cc=weixugc@google.com \
    --cc=yuanchu@google.com \
    --cc=zhengqi.arch@bytedance.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox