linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Johannes Weiner <hannes@cmpxchg.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Hao Li <hao.li@linux.dev>, Michal Hocko <mhocko@kernel.org>,
	Roman Gushchin <roman.gushchin@linux.dev>,
	Shakeel Butt <shakeel.butt@linux.dev>,
	Vlastimil Babka <vbabka@suse.cz>,
	Harry Yoo <harry.yoo@oracle.com>,
	linux-mm@kvack.org, cgroups@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH 5/5] mm: memcg: separate slab stat accounting from objcg charge cache
Date: Mon,  2 Mar 2026 14:50:18 -0500	[thread overview]
Message-ID: <20260302195305.620713-6-hannes@cmpxchg.org> (raw)
In-Reply-To: <20260302195305.620713-1-hannes@cmpxchg.org>

Cgroup slab metrics are cached per-cpu the same way as the sub-page
charge cache. However, the intertwined code to manage those dependent
caches right now is quite difficult to follow.

Specifically, cached slab stat updates occur in consume() if there was
enough charge cache to satisfy the new object. If that fails, whole
pages are reserved, and slab stats are updated when the remainder of
those pages, after subtracting the size of the new slab object, are
put into the charge cache. This already juggles a delicate mix of the
object size, the page charge size, and the remainder to put into the
byte cache. Doing slab accounting in this path as well is fragile, and
has recently caused a bug where the input parameters between the two
caches were mixed up.

Refactor the consume() and refill() paths into unlocked and locked
variants that only do charge caching. Then let the slab path manage
its own lock section and open-code charging and accounting.

This makes the slab stat cache subordinate to the charge cache:
__refill_obj_stock() is called first to prepare it;
__account_obj_stock() follows to hitch a ride.

This results in a minor behavioral change: previously, a mismatching
percpu stock would always be drained for the purpose of setting up
slab account caching, even if there was no byte remainder to put into
the charge cache. Now, the stock is left alone, and slab accounting
takes the uncached path if there is a mismatch. This is exceedingly
rare, and it was probably never worth draining the whole stock just to
cache the slab stat update.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 mm/memcontrol.c | 100 +++++++++++++++++++++++++++++-------------------
 1 file changed, 61 insertions(+), 39 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4f12b75743d4..9c6f9849b717 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3218,16 +3218,18 @@ static struct obj_stock_pcp *trylock_stock(void)
 
 static void unlock_stock(struct obj_stock_pcp *stock)
 {
-	local_unlock(&obj_stock.lock);
+	if (stock)
+		local_unlock(&obj_stock.lock);
 }
 
+/* Call after __refill_obj_stock() to ensure stock->cached_objg == objcg */
 static void __account_obj_stock(struct obj_cgroup *objcg,
 				struct obj_stock_pcp *stock, int nr,
 				struct pglist_data *pgdat, enum node_stat_item idx)
 {
 	int *bytes;
 
-	if (!stock)
+	if (!stock || READ_ONCE(stock->cached_objcg) != objcg)
 		goto direct;
 
 	/*
@@ -3274,8 +3276,20 @@ static void __account_obj_stock(struct obj_cgroup *objcg,
 		mod_objcg_mlstate(objcg, pgdat, idx, nr);
 }
 
-static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
-			      struct pglist_data *pgdat, enum node_stat_item idx)
+static bool __consume_obj_stock(struct obj_cgroup *objcg,
+				struct obj_stock_pcp *stock,
+				unsigned int nr_bytes)
+{
+	if (objcg == READ_ONCE(stock->cached_objcg) &&
+	    stock->nr_bytes >= nr_bytes) {
+		stock->nr_bytes -= nr_bytes;
+		return true;
+	}
+
+	return false;
+}
+
+static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes)
 {
 	struct obj_stock_pcp *stock;
 	bool ret = false;
@@ -3284,14 +3298,7 @@ static bool consume_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 	if (!stock)
 		return ret;
 
-	if (objcg == READ_ONCE(stock->cached_objcg) && stock->nr_bytes >= nr_bytes) {
-		stock->nr_bytes -= nr_bytes;
-		ret = true;
-
-		if (pgdat)
-			__account_obj_stock(objcg, stock, nr_bytes, pgdat, idx);
-	}
-
+	ret = __consume_obj_stock(objcg, stock, nr_bytes);
 	unlock_stock(stock);
 
 	return ret;
@@ -3376,17 +3383,14 @@ static bool obj_stock_flush_required(struct obj_stock_pcp *stock,
 	return flush;
 }
 
-static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
-		bool allow_uncharge, int nr_acct, struct pglist_data *pgdat,
-		enum node_stat_item idx)
+static void __refill_obj_stock(struct obj_cgroup *objcg,
+			       struct obj_stock_pcp *stock,
+			       unsigned int nr_bytes,
+			       bool allow_uncharge)
 {
-	struct obj_stock_pcp *stock;
 	unsigned int nr_pages = 0;
 
-	stock = trylock_stock();
 	if (!stock) {
-		if (pgdat)
-			__account_obj_stock(objcg, NULL, nr_acct, pgdat, idx);
 		nr_pages = nr_bytes >> PAGE_SHIFT;
 		nr_bytes = nr_bytes & (PAGE_SIZE - 1);
 		atomic_add(nr_bytes, &objcg->nr_charged_bytes);
@@ -3404,20 +3408,25 @@ static void refill_obj_stock(struct obj_cgroup *objcg, unsigned int nr_bytes,
 	}
 	stock->nr_bytes += nr_bytes;
 
-	if (pgdat)
-		__account_obj_stock(objcg, stock, nr_acct, pgdat, idx);
-
 	if (allow_uncharge && (stock->nr_bytes > PAGE_SIZE)) {
 		nr_pages = stock->nr_bytes >> PAGE_SHIFT;
 		stock->nr_bytes &= (PAGE_SIZE - 1);
 	}
 
-	unlock_stock(stock);
 out:
 	if (nr_pages)
 		obj_cgroup_uncharge_pages(objcg, nr_pages);
 }
 
+static void refill_obj_stock(struct obj_cgroup *objcg,
+			     unsigned int nr_bytes,
+			     bool allow_uncharge)
+{
+	struct obj_stock_pcp *stock = trylock_stock();
+	__refill_obj_stock(objcg, stock, nr_bytes, allow_uncharge);
+	unlock_stock(stock);
+}
+
 static int __obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp,
 			       size_t size, size_t *remainder)
 {
@@ -3432,13 +3441,12 @@ static int __obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp,
 	return ret;
 }
 
-static int obj_cgroup_charge_account(struct obj_cgroup *objcg, gfp_t gfp, size_t size,
-				     struct pglist_data *pgdat, enum node_stat_item idx)
+int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
 {
 	size_t remainder;
 	int ret;
 
-	if (likely(consume_obj_stock(objcg, size, pgdat, idx)))
+	if (likely(consume_obj_stock(objcg, size)))
 		return 0;
 
 	/*
@@ -3465,20 +3473,15 @@ static int obj_cgroup_charge_account(struct obj_cgroup *objcg, gfp_t gfp, size_t
 	 * race.
 	 */
 	ret = __obj_cgroup_charge(objcg, gfp, size, &remainder);
-	if (!ret && (remainder || pgdat))
-		refill_obj_stock(objcg, remainder, false, size, pgdat, idx);
+	if (!ret && remainder)
+		refill_obj_stock(objcg, remainder, false);
 
 	return ret;
 }
 
-int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size)
-{
-	return obj_cgroup_charge_account(objcg, gfp, size, NULL, 0);
-}
-
 void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
 {
-	refill_obj_stock(objcg, size, true, 0, NULL, 0);
+	refill_obj_stock(objcg, size, true);
 }
 
 static inline size_t obj_full_size(struct kmem_cache *s)
@@ -3493,6 +3496,7 @@ static inline size_t obj_full_size(struct kmem_cache *s)
 bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 				  gfp_t flags, size_t size, void **p)
 {
+	size_t obj_size = obj_full_size(s);
 	struct obj_cgroup *objcg;
 	struct slab *slab;
 	unsigned long off;
@@ -3533,6 +3537,7 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 	for (i = 0; i < size; i++) {
 		unsigned long obj_exts;
 		struct slabobj_ext *obj_ext;
+		struct obj_stock_pcp *stock;
 
 		slab = virt_to_slab(p[i]);
 
@@ -3552,9 +3557,20 @@ bool __memcg_slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
 		 * TODO: we could batch this until slab_pgdat(slab) changes
 		 * between iterations, with a more complicated undo
 		 */
-		if (obj_cgroup_charge_account(objcg, flags, obj_full_size(s),
-					slab_pgdat(slab), cache_vmstat_idx(s)))
-			return false;
+		stock = trylock_stock();
+		if (!stock || !__consume_obj_stock(objcg, stock, obj_size)) {
+			size_t remainder;
+
+			unlock_stock(stock);
+			if (__obj_cgroup_charge(objcg, flags, obj_size, &remainder))
+				return false;
+			stock = trylock_stock();
+			if (remainder)
+				__refill_obj_stock(objcg, stock, remainder, false);
+		}
+		__account_obj_stock(objcg, stock, obj_size,
+				    slab_pgdat(slab), cache_vmstat_idx(s));
+		unlock_stock(stock);
 
 		obj_exts = slab_obj_exts(slab);
 		get_slab_obj_exts(obj_exts);
@@ -3576,6 +3592,7 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
 	for (int i = 0; i < objects; i++) {
 		struct obj_cgroup *objcg;
 		struct slabobj_ext *obj_ext;
+		struct obj_stock_pcp *stock;
 		unsigned int off;
 
 		off = obj_to_index(s, slab, p[i]);
@@ -3585,8 +3602,13 @@ void __memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
 			continue;
 
 		obj_ext->objcg = NULL;
-		refill_obj_stock(objcg, obj_size, true, -obj_size,
-				 slab_pgdat(slab), cache_vmstat_idx(s));
+
+		stock = trylock_stock();
+		__refill_obj_stock(objcg, stock, obj_size, true);
+		__account_obj_stock(objcg, stock, -obj_size,
+				    slab_pgdat(slab), cache_vmstat_idx(s));
+		unlock_stock(stock);
+
 		obj_cgroup_put(objcg);
 	}
 }
-- 
2.53.0



  parent reply	other threads:[~2026-03-02 19:53 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-02 19:50 [PATCH 0/5]: memcg: obj stock and slab stat caching cleanups Johannes Weiner
2026-03-02 19:50 ` [PATCH 1/5] mm: memcg: factor out trylock_stock() and unlock_stock() Johannes Weiner
2026-03-02 21:43   ` Shakeel Butt
2026-03-02 19:50 ` [PATCH 2/5] mm: memcg: simplify objcg charge size and stock remainder math Johannes Weiner
2026-03-02 21:44   ` Shakeel Butt
2026-03-02 19:50 ` [PATCH 3/5] mm: memcontrol: split out __obj_cgroup_charge() Johannes Weiner
2026-03-02 21:45   ` Shakeel Butt
2026-03-02 19:50 ` [PATCH 4/5] mm: memcontrol: use __account_obj_stock() in the !locked path Johannes Weiner
2026-03-02 21:50   ` Shakeel Butt
2026-03-02 19:50 ` Johannes Weiner [this message]
2026-03-02 22:20   ` [PATCH 5/5] mm: memcg: separate slab stat accounting from objcg charge cache Shakeel Butt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260302195305.620713-6-hannes@cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=hao.li@linux.dev \
    --cc=harry.yoo@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=roman.gushchin@linux.dev \
    --cc=shakeel.butt@linux.dev \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox