From: Chen Ridong <chenridong@huaweicloud.com>
To: Qi Zheng <qi.zheng@linux.dev>,
hannes@cmpxchg.org, hughd@google.com, mhocko@suse.com,
roman.gushchin@linux.dev, shakeel.butt@linux.dev,
muchun.song@linux.dev, david@kernel.org,
lorenzo.stoakes@oracle.com, ziy@nvidia.com, harry.yoo@oracle.com,
imran.f.khan@oracle.com, kamalesh.babulal@oracle.com,
axelrasmussen@google.com, yuanchu@google.com, weixugc@google.com,
mkoutny@suse.com, akpm@linux-foundation.org,
hamzamahfooz@linux.microsoft.com, apais@linux.microsoft.com,
lance.yang@linux.dev
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
cgroups@vger.kernel.org, Muchun Song <songmuchun@bytedance.com>,
Qi Zheng <zhengqi.arch@bytedance.com>
Subject: Re: [PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup
Date: Fri, 26 Dec 2025 09:03:44 +0800 [thread overview]
Message-ID: <05500a05-aa90-4b60-a324-2819dc2c5805@huaweicloud.com> (raw)
In-Reply-To: <3e454b151f3926dbd67d5df6dc2b129edd927101.1765956025.git.zhengqi.arch@bytedance.com>
On 2025/12/17 15:27, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
>
> Memory cgroup functions such as get_mem_cgroup_from_folio() and
> get_mem_cgroup_from_mm() return a valid memory cgroup pointer,
> even for the root memory cgroup. In contrast, the situation for
> object cgroups has been different.
>
> Previously, the root object cgroup couldn't be returned because
> it didn't exist. Now that a valid root object cgroup exists, for
> the sake of consistency, it's necessary to align the behavior of
> object-cgroup-related operations with that of memory cgroup APIs.
>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
> include/linux/memcontrol.h | 26 +++++++++++++++++-----
> mm/memcontrol.c | 45 ++++++++++++++++++++------------------
> mm/percpu.c | 2 +-
> 3 files changed, 45 insertions(+), 28 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 288dd6337f80f..776d9be1f446a 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -332,6 +332,7 @@ struct mem_cgroup {
> #define MEMCG_CHARGE_BATCH 64U
>
> extern struct mem_cgroup *root_mem_cgroup;
> +extern struct obj_cgroup *root_obj_cgroup;
>
> enum page_memcg_data_flags {
> /* page->memcg_data is a pointer to an slabobj_ext vector */
> @@ -549,6 +550,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
> return (memcg == root_mem_cgroup);
> }
>
> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> +{
> + return objcg == root_obj_cgroup;
> +}
> +
> static inline bool mem_cgroup_disabled(void)
> {
> return !cgroup_subsys_enabled(memory_cgrp_subsys);
> @@ -773,23 +779,26 @@ struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
>
> static inline bool obj_cgroup_tryget(struct obj_cgroup *objcg)
> {
> + if (obj_cgroup_is_root(objcg))
> + return true;
> return percpu_ref_tryget(&objcg->refcnt);
> }
>
> -static inline void obj_cgroup_get(struct obj_cgroup *objcg)
> +static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
> + unsigned long nr)
> {
> - percpu_ref_get(&objcg->refcnt);
> + if (!obj_cgroup_is_root(objcg))
> + percpu_ref_get_many(&objcg->refcnt, nr);
> }
>
> -static inline void obj_cgroup_get_many(struct obj_cgroup *objcg,
> - unsigned long nr)
> +static inline void obj_cgroup_get(struct obj_cgroup *objcg)
> {
> - percpu_ref_get_many(&objcg->refcnt, nr);
> + obj_cgroup_get_many(objcg, 1);
> }
>
> static inline void obj_cgroup_put(struct obj_cgroup *objcg)
> {
> - if (objcg)
> + if (objcg && !obj_cgroup_is_root(objcg))
> percpu_ref_put(&objcg->refcnt);
> }
>
> @@ -1084,6 +1093,11 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
> return true;
> }
>
> +static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg)
> +{
> + return true;
> +}
> +
> static inline bool mem_cgroup_disabled(void)
> {
> return true;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 544b3200db12d..21b5aad34cae7 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -83,6 +83,8 @@ EXPORT_SYMBOL(memory_cgrp_subsys);
> struct mem_cgroup *root_mem_cgroup __read_mostly;
> EXPORT_SYMBOL(root_mem_cgroup);
>
> +struct obj_cgroup *root_obj_cgroup __read_mostly;
> +
> /* Active memory cgroup to use from an interrupt context */
> DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg);
> EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg);
> @@ -2634,15 +2636,14 @@ struct mem_cgroup *mem_cgroup_from_slab_obj(void *p)
>
> static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
> {
> - struct obj_cgroup *objcg = NULL;
> + for (; memcg; memcg = parent_mem_cgroup(memcg)) {
> + struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
>
> - for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
> - objcg = rcu_dereference(memcg->objcg);
> if (likely(objcg && obj_cgroup_tryget(objcg)))
> - break;
> - objcg = NULL;
> + return objcg;
> }
> - return objcg;
> +
> + return NULL;
> }
>
> static struct obj_cgroup *current_objcg_update(void)
> @@ -2716,18 +2717,17 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
> * Objcg reference is kept by the task, so it's safe
> * to use the objcg by the current task.
> */
> - return objcg;
> + return objcg ? : root_obj_cgroup;
> }
>
> memcg = this_cpu_read(int_active_memcg);
> if (unlikely(memcg))
> goto from_memcg;
>
> - return NULL;
> + return root_obj_cgroup;
>
> from_memcg:
> - objcg = NULL;
> - for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
> + for (; memcg; memcg = parent_mem_cgroup(memcg)) {
> /*
> * Memcg pointer is protected by scope (see set_active_memcg())
> * and is pinning the corresponding objcg, so objcg can't go
> @@ -2736,10 +2736,10 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void)
> */
> objcg = rcu_dereference_check(memcg->objcg, 1);
> if (likely(objcg))
> - break;
> + return objcg;
> }
>
> - return objcg;
> + return root_obj_cgroup;
> }
>
> struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
> @@ -2753,14 +2753,8 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio)
> objcg = __folio_objcg(folio);
> obj_cgroup_get(objcg);
> } else {
> - struct mem_cgroup *memcg;
> -
> rcu_read_lock();
> - memcg = __folio_memcg(folio);
> - if (memcg)
> - objcg = __get_obj_cgroup_from_memcg(memcg);
> - else
> - objcg = NULL;
> + objcg = __get_obj_cgroup_from_memcg(__folio_memcg(folio));
> rcu_read_unlock();
> }
> return objcg;
> @@ -2863,7 +2857,7 @@ int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order)
> int ret = 0;
>
> objcg = current_obj_cgroup();
> - if (objcg) {
> + if (objcg && !obj_cgroup_is_root(objcg)) {
> ret = obj_cgroup_charge_pages(objcg, gfp, 1 << order);
> if (!ret) {
> obj_cgroup_get(objcg);
> @@ -3164,7 +3158,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> * obj_cgroup_get() is used to get a permanent reference.
> */
> objcg = current_obj_cgroup();
> - if (!objcg)
> + if (!objcg || obj_cgroup_is_root(objcg))
> return true;
>
> /*
> @@ -3851,6 +3845,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
> if (!objcg)
> goto free_shrinker;
>
> + if (unlikely(mem_cgroup_is_root(memcg)))
> + root_obj_cgroup = objcg;
> +
> objcg->memcg = memcg;
> rcu_assign_pointer(memcg->objcg, objcg);
> obj_cgroup_get(objcg);
> @@ -5471,6 +5468,9 @@ void obj_cgroup_charge_zswap(struct obj_cgroup *objcg, size_t size)
> if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
> return;
>
> + if (obj_cgroup_is_root(objcg))
> + return;
> +
> VM_WARN_ON_ONCE(!(current->flags & PF_MEMALLOC));
>
> /* PF_MEMALLOC context, charging must succeed */
> @@ -5498,6 +5498,9 @@ void obj_cgroup_uncharge_zswap(struct obj_cgroup *objcg, size_t size)
> if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
> return;
>
> + if (obj_cgroup_is_root(objcg))
> + return;
> +
> obj_cgroup_uncharge(objcg, size);
>
If we modify zswap by adding MEMCG_ZSWAP_B and MEMCG_ZSWAPPED with obj_cgroup_charge_zswap , then
remove a control group (via rmdir) and reparent its objects to the root cgroup, then for the root
cgroup, obj_cgroup_uncharge_zswap will return directly due to the obj_cgroup_is_root check. Would
this cause us to miss decrementing MEMCG_ZSWAP_B and MEMCG_ZSWAPPED?
> rcu_read_lock();
> diff --git a/mm/percpu.c b/mm/percpu.c
> index 81462ce5866e1..5c1a9b77d6b93 100644
> --- a/mm/percpu.c
> +++ b/mm/percpu.c
> @@ -1616,7 +1616,7 @@ static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
> return true;
>
> objcg = current_obj_cgroup();
> - if (!objcg)
> + if (!objcg || obj_cgroup_is_root(objcg))
> return true;
>
> if (obj_cgroup_charge(objcg, gfp, pcpu_obj_full_size(size)))
--
Best regards,
Ridong
next prev parent reply other threads:[~2025-12-26 1:03 UTC|newest]
Thread overview: 149+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-17 7:27 [PATCH v2 00/28] Eliminate Dying Memory Cgroup Qi Zheng
2025-12-17 7:27 ` [PATCH v2 01/28] mm: memcontrol: remove dead code of checking parent memory cgroup Qi Zheng
2025-12-18 23:31 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 02/28] mm: workingset: use folio_lruvec() in workingset_refault() Qi Zheng
2025-12-18 23:32 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 03/28] mm: rename unlock_page_lruvec_irq and its variants Qi Zheng
2025-12-18 9:00 ` David Hildenbrand (Red Hat)
2025-12-18 23:34 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 04/28] mm: vmscan: prepare for the refactoring the move_folios_to_lru() Qi Zheng
2025-12-17 21:13 ` Johannes Weiner
2025-12-18 9:04 ` David Hildenbrand (Red Hat)
2025-12-18 9:31 ` Qi Zheng
2025-12-18 23:39 ` Shakeel Butt
2025-12-25 3:45 ` Chen Ridong
2025-12-17 7:27 ` [PATCH v2 05/28] mm: vmscan: refactor move_folios_to_lru() Qi Zheng
2025-12-19 0:04 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 06/28] mm: memcontrol: allocate object cgroup for non-kmem case Qi Zheng
2025-12-17 21:22 ` Johannes Weiner
2025-12-18 6:25 ` Qi Zheng
2025-12-19 0:23 ` Shakeel Butt
2025-12-25 6:23 ` Chen Ridong
2025-12-17 7:27 ` [PATCH v2 07/28] mm: memcontrol: return root object cgroup for root memory cgroup Qi Zheng
2025-12-17 21:28 ` Johannes Weiner
2025-12-19 0:39 ` Shakeel Butt
2025-12-26 1:03 ` Chen Ridong [this message]
2025-12-26 3:10 ` Muchun Song
2025-12-26 3:50 ` Chen Ridong
2025-12-26 3:58 ` Chen Ridong
2025-12-17 7:27 ` [PATCH v2 08/28] mm: memcontrol: prevent memory cgroup release in get_mem_cgroup_from_folio() Qi Zheng
2025-12-17 21:45 ` Johannes Weiner
2025-12-18 6:31 ` Qi Zheng
2025-12-19 2:09 ` Shakeel Butt
2025-12-19 3:53 ` Johannes Weiner
2025-12-19 3:56 ` Johannes Weiner
2025-12-17 7:27 ` [PATCH v2 09/28] buffer: prevent memory cgroup release in folio_alloc_buffers() Qi Zheng
2025-12-17 21:45 ` Johannes Weiner
2025-12-19 2:14 ` Shakeel Butt
2025-12-26 2:01 ` Chen Ridong
2025-12-17 7:27 ` [PATCH v2 10/28] writeback: prevent memory cgroup release in writeback module Qi Zheng
2025-12-17 22:08 ` Johannes Weiner
2025-12-19 2:30 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 11/28] mm: memcontrol: prevent memory cgroup release in count_memcg_folio_events() Qi Zheng
2025-12-17 22:11 ` Johannes Weiner
2025-12-19 23:31 ` Shakeel Butt
2025-12-26 2:12 ` Chen Ridong
2025-12-17 7:27 ` [PATCH v2 12/28] mm: page_io: prevent memory cgroup release in page_io module Qi Zheng
2025-12-17 22:12 ` Johannes Weiner
2025-12-19 23:44 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 13/28] mm: migrate: prevent memory cgroup release in folio_migrate_mapping() Qi Zheng
2025-12-17 22:14 ` Johannes Weiner
2025-12-18 9:09 ` David Hildenbrand (Red Hat)
2025-12-18 9:36 ` Qi Zheng
2025-12-18 9:43 ` David Hildenbrand (Red Hat)
2025-12-18 11:40 ` Qi Zheng
2025-12-18 11:56 ` David Hildenbrand (Red Hat)
2025-12-18 13:00 ` Qi Zheng
2025-12-18 13:04 ` David Hildenbrand (Red Hat)
2025-12-18 13:16 ` Qi Zheng
2025-12-19 4:12 ` Harry Yoo
2025-12-19 6:18 ` David Hildenbrand (Red Hat)
2025-12-18 14:26 ` Johannes Weiner
2025-12-22 3:42 ` Qi Zheng
2025-12-30 20:07 ` David Hildenbrand (Red Hat)
2025-12-19 23:51 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 14/28] mm: mglru: prevent memory cgroup release in mglru Qi Zheng
2025-12-17 22:18 ` Johannes Weiner
2025-12-18 6:50 ` Qi Zheng
2025-12-20 0:58 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 15/28] mm: memcontrol: prevent memory cgroup release in mem_cgroup_swap_full() Qi Zheng
2025-12-17 22:21 ` Johannes Weiner
2025-12-20 1:05 ` Shakeel Butt
2025-12-22 4:02 ` Qi Zheng
2025-12-26 2:29 ` Chen Ridong
2025-12-17 7:27 ` [PATCH v2 16/28] mm: workingset: prevent memory cgroup release in lru_gen_eviction() Qi Zheng
2025-12-17 22:23 ` Johannes Weiner
2025-12-20 1:06 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 17/28] mm: thp: prevent memory cgroup release in folio_split_queue_lock{_irqsave}() Qi Zheng
2025-12-17 22:27 ` Johannes Weiner
2025-12-20 1:11 ` Shakeel Butt
2025-12-22 3:33 ` Qi Zheng
2025-12-18 9:10 ` David Hildenbrand (Red Hat)
2025-12-17 7:27 ` [PATCH v2 18/28] mm: zswap: prevent memory cgroup release in zswap_compress() Qi Zheng
2025-12-17 22:27 ` Johannes Weiner
2025-12-20 1:14 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 19/28] mm: workingset: prevent lruvec release in workingset_refault() Qi Zheng
2025-12-17 22:30 ` Johannes Weiner
2025-12-18 6:57 ` Qi Zheng
2025-12-17 7:27 ` [PATCH v2 20/28] mm: zswap: prevent lruvec release in zswap_folio_swapin() Qi Zheng
2025-12-17 22:33 ` Johannes Weiner
2025-12-18 7:09 ` Qi Zheng
2025-12-18 13:02 ` Johannes Weiner
2025-12-20 1:23 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 21/28] mm: swap: prevent lruvec release in lru_gen_clear_refs() Qi Zheng
2025-12-17 22:34 ` Johannes Weiner
2025-12-20 1:24 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 22/28] mm: workingset: prevent lruvec release in workingset_activation() Qi Zheng
2025-12-17 22:36 ` Johannes Weiner
2025-12-20 1:25 ` Shakeel Butt
2025-12-17 7:27 ` [PATCH v2 23/28] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock Qi Zheng
2025-12-18 13:00 ` Johannes Weiner
2025-12-18 13:17 ` Qi Zheng
2025-12-20 2:03 ` Shakeel Butt
2025-12-23 6:14 ` Qi Zheng
2025-12-17 7:27 ` [PATCH v2 24/28] mm: vmscan: prepare for reparenting traditional LRU folios Qi Zheng
2025-12-18 13:32 ` Johannes Weiner
2025-12-22 3:55 ` Qi Zheng
2025-12-17 7:27 ` [PATCH v2 25/28] mm: vmscan: prepare for reparenting MGLRU folios Qi Zheng
2025-12-17 7:27 ` [PATCH v2 26/28] mm: memcontrol: refactor memcg_reparent_objcgs() Qi Zheng
2025-12-18 13:45 ` Johannes Weiner
2025-12-22 3:56 ` Qi Zheng
2025-12-17 7:27 ` [PATCH v2 27/28] mm: memcontrol: eliminate the problem of dying memory cgroup for LRU folios Qi Zheng
2025-12-18 14:06 ` Johannes Weiner
2025-12-22 3:59 ` Qi Zheng
2025-12-17 7:27 ` [PATCH v2 28/28] mm: lru: add VM_WARN_ON_ONCE_FOLIO to lru maintenance helpers Qi Zheng
2025-12-18 14:07 ` Johannes Weiner
2025-12-23 20:04 ` [PATCH v2 00/28] Eliminate Dying Memory Cgroup Yosry Ahmed
2025-12-23 23:20 ` Shakeel Butt
2025-12-24 0:07 ` Yosry Ahmed
2025-12-24 0:36 ` Shakeel Butt
2025-12-24 0:43 ` Yosry Ahmed
2025-12-24 0:58 ` Shakeel Butt
2025-12-29 9:42 ` Qi Zheng
2025-12-29 10:52 ` Michal Koutný
2025-12-29 7:48 ` Qi Zheng
2025-12-29 9:35 ` Harry Yoo
2025-12-29 9:46 ` Qi Zheng
2025-12-29 10:53 ` Michal Koutný
2025-12-24 8:43 ` Harry Yoo
2025-12-24 14:51 ` Yosry Ahmed
2025-12-26 11:24 ` Harry Yoo
2025-12-30 1:36 ` Roman Gushchin
2025-12-30 2:44 ` Qi Zheng
2025-12-30 4:20 ` Roman Gushchin
2025-12-30 4:25 ` Qi Zheng
2025-12-30 4:48 ` Shakeel Butt
2025-12-30 16:46 ` Zi Yan
2025-12-30 18:13 ` Shakeel Butt
2025-12-30 19:18 ` Chris Mason
2025-12-30 20:51 ` Matthew Wilcox
2025-12-30 21:10 ` Chris Mason
2025-12-30 22:30 ` Roman Gushchin
2025-12-30 22:03 ` Roman Gushchin
2025-12-30 21:07 ` Zi Yan
2025-12-30 19:34 ` Roman Gushchin
2025-12-30 21:13 ` Zi Yan
2025-12-30 4:01 ` Shakeel Butt
2025-12-30 4:11 ` Roman Gushchin
2025-12-30 18:36 ` Shakeel Butt
2025-12-30 20:47 ` Roman Gushchin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=05500a05-aa90-4b60-a324-2819dc2c5805@huaweicloud.com \
--to=chenridong@huaweicloud.com \
--cc=akpm@linux-foundation.org \
--cc=apais@linux.microsoft.com \
--cc=axelrasmussen@google.com \
--cc=cgroups@vger.kernel.org \
--cc=david@kernel.org \
--cc=hamzamahfooz@linux.microsoft.com \
--cc=hannes@cmpxchg.org \
--cc=harry.yoo@oracle.com \
--cc=hughd@google.com \
--cc=imran.f.khan@oracle.com \
--cc=kamalesh.babulal@oracle.com \
--cc=lance.yang@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=mhocko@suse.com \
--cc=mkoutny@suse.com \
--cc=muchun.song@linux.dev \
--cc=qi.zheng@linux.dev \
--cc=roman.gushchin@linux.dev \
--cc=shakeel.butt@linux.dev \
--cc=songmuchun@bytedance.com \
--cc=weixugc@google.com \
--cc=yuanchu@google.com \
--cc=zhengqi.arch@bytedance.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox