linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Harry Yoo <harry.yoo@oracle.com>
To: akpm@linux-foundation.org, vbabka@suse.cz
Cc: andreyknvl@gmail.com, cl@linux.com, dvyukov@google.com,
	glider@google.com, hannes@cmpxchg.org, linux-mm@kvack.org,
	mhocko@kernel.org, muchun.song@linux.dev, rientjes@google.com,
	roman.gushchin@linux.dev, ryabinin.a.a@gmail.com,
	shakeel.butt@linux.dev, surenb@google.com,
	vincenzo.frascino@arm.com, yeoreum.yun@arm.com,
	harry.yoo@oracle.com
Subject: [RFC V2 PATCH 4/5] mm/slab: save memory by allocating slabobj_ext array from leftover
Date: Wed, 27 Aug 2025 20:37:17 +0900	[thread overview]
Message-ID: <20250827113726.707801-5-harry.yoo@oracle.com> (raw)
In-Reply-To: <20250827113726.707801-1-harry.yoo@oracle.com>

The leftover space in a slab is always smaller than s->size, and
kmem caches for large objects that are not power-of-two sizes tend to have
a greater amount of leftover space per slab. In some cases, the leftover
space is larger than the size of the slabobj_ext array for the slab.

An excellent example of such a cache is ext4_inode_cache. On my system,
the object size is 1144, with a preferred order of 3, 28 objects per slab,
and 736 bytes of leftover space per slab.

Since the size of the slabobj_ext array is only 224 bytes (w/o mem
profiling) or 448 bytes (w/ mem profiling) per slab, the entire array
fits within the leftover space.

Allocate the slabobj_exts array from this unused space instead of using
kcalloc(), when it is large enough. The array is always allocated when
creating new slabs, because implementing lazy allocation correctly is
difficult without expensive synchronization.

To avoid unnecessary overhead when MEMCG (with SLAB_ACCOUNT) and
MEM_ALLOC_PROFILING are not used for the cache, only allocate the
slabobj_ext array only when either of them are enabled when slabs are
created.

[ MEMCG=y, MEM_ALLOC_PROFILING=y ]

Before patch (run updatedb):
  Slab:            5815196 kB
  SReclaimable:    5042824 kB
  SUnreclaim:       772372 kB

After patch (run updatedb):
  Slab:            5748664 kB
  SReclaimable:    5041608 kB
  SUnreclaim:       707084 kB (-63.75 MiB)

[ MEMCG=y, MEM_ALLOC_PROFILING=n ]

Before patch (run updatedb):
  Slab:            5637764 kB
  SReclaimable:    5042428 kB
  SUnreclaim:       595284 kB

After patch (run updatedb):
  Slab:            5598992 kB
  SReclaimable:    5042248 kB
  SUnreclaim:       560396 kB (-34.07 MiB)

Enjoy the memory savings!

Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
---
 mm/slub.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 140 insertions(+), 5 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index b80bf3a24ab9..ad9a1cae48b2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -878,6 +878,94 @@ static inline unsigned int get_orig_size(struct kmem_cache *s, void *object)
 	return *(unsigned int *)p;
 }
 
+#ifdef CONFIG_SLAB_OBJ_EXT
+
+/*
+ * Check if memory cgroup or memory allocation profiling is enabled.
+ * If enabled, SLUB tries to reduce memory overhead of accounting
+ * slab objects. If neither is enabled when this function is called,
+ * the optimization is simply skipped to avoid affecting caches that do not
+ * need slabobj_ext metadata.
+ *
+ * However, this may disable optimization when memory cgroup or memory
+ * allocation profiling is used, but slabs are created too early
+ * even before those subsystems are initialized.
+ */
+static inline bool need_slab_obj_exts(struct kmem_cache *s)
+{
+	if (!mem_cgroup_disabled() && (s->flags & SLAB_ACCOUNT))
+		return true;
+
+	if (mem_alloc_profiling_enabled())
+		return true;
+
+	return false;
+}
+
+static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
+{
+	return sizeof(struct slabobj_ext) * slab->objects;
+}
+
+static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
+						    struct slab *slab)
+{
+	unsigned long objext_offset;
+
+	objext_offset = s->red_left_pad + s->size * slab->objects;
+	objext_offset = ALIGN(objext_offset, sizeof(struct slabobj_ext));
+	return objext_offset;
+}
+
+static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
+						     struct slab *slab)
+{
+	unsigned long objext_offset = obj_exts_offset_in_slab(s, slab);
+	unsigned long objext_size = obj_exts_size_in_slab(slab);
+
+	return objext_offset + objext_size <= slab_size(slab);
+}
+
+static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
+{
+	unsigned long obj_exts;
+
+	if (!obj_exts_fit_within_slab_leftover(s, slab))
+		return false;
+
+	obj_exts = (unsigned long)slab_address(slab);
+	obj_exts += obj_exts_offset_in_slab(s, slab);
+	return obj_exts == slab_obj_exts(slab);
+}
+#else
+static inline bool need_slab_obj_exts(struct kmem_cache *s)
+{
+	return false;
+}
+
+static inline unsigned int obj_exts_size_in_slab(struct slab *slab)
+{
+	return 0;
+}
+
+static inline unsigned long obj_exts_offset_in_slab(struct kmem_cache *s,
+						    struct slab *slab)
+{
+	return 0;
+}
+
+static inline bool obj_exts_fit_within_slab_leftover(struct kmem_cache *s,
+						     struct slab *slab)
+{
+	return false;
+}
+
+static inline bool obj_exts_in_slab(struct kmem_cache *s, struct slab *slab)
+{
+	return false;
+}
+#endif
+
 #ifdef CONFIG_SLUB_DEBUG
 static unsigned long object_map[BITS_TO_LONGS(MAX_OBJS_PER_PAGE)];
 static DEFINE_SPINLOCK(object_map_lock);
@@ -1406,7 +1494,15 @@ slab_pad_check(struct kmem_cache *s, struct slab *slab)
 	start = slab_address(slab);
 	length = slab_size(slab);
 	end = start + length;
-	remainder = length % s->size;
+
+	if (obj_exts_in_slab(s, slab)) {
+		remainder = length;
+		remainder -= obj_exts_offset_in_slab(s, slab);
+		remainder -= obj_exts_size_in_slab(slab);
+	} else {
+		remainder = length % s->size;
+	}
+
 	if (!remainder)
 		return;
 
@@ -2140,6 +2236,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
 	if (!obj_exts)
 		return;
 
+	if (obj_exts_in_slab(slab->slab_cache, slab)) {
+		slab->obj_exts = 0;
+		return;
+	}
+
 	/*
 	 * obj_exts was created with __GFP_NO_OBJ_EXT flag, therefore its
 	 * corresponding extension will be NULL. alloc_tag_sub() will throw a
@@ -2152,6 +2253,29 @@ static inline void free_slab_obj_exts(struct slab *slab)
 	slab->obj_exts = 0;
 }
 
+/*
+ * Try to allocate slabobj_ext array from unused space.
+ * This function must be called on a freshly allocated slab to prevent
+ * concurrency problems.
+ */
+static void alloc_slab_obj_exts_early(struct kmem_cache *s, struct slab *slab)
+{
+	void *addr;
+
+	if (!need_slab_obj_exts(s))
+		return;
+
+	if (obj_exts_fit_within_slab_leftover(s, slab)) {
+		addr = slab_address(slab) + obj_exts_offset_in_slab(s, slab);
+		kasan_unpoison_range(addr, obj_exts_size_in_slab(slab));
+		memset(addr, 0, obj_exts_size_in_slab(slab));
+		slab->obj_exts = (unsigned long)addr;
+		if (IS_ENABLED(CONFIG_MEMCG))
+			slab->obj_exts |= MEMCG_DATA_OBJEXTS;
+		slab_set_stride(slab, sizeof(struct slabobj_ext));
+	}
+}
+
 #else /* CONFIG_SLAB_OBJ_EXT */
 
 static inline void init_slab_obj_exts(struct slab *slab)
@@ -2168,6 +2292,11 @@ static inline void free_slab_obj_exts(struct slab *slab)
 {
 }
 
+static inline void alloc_slab_obj_exts_early(struct kmem_cache *s,
+						       struct slab *slab)
+{
+}
+
 #endif /* CONFIG_SLAB_OBJ_EXT */
 
 #ifdef CONFIG_MEM_ALLOC_PROFILING
@@ -3125,7 +3254,9 @@ static inline bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
 static __always_inline void account_slab(struct slab *slab, int order,
 					 struct kmem_cache *s, gfp_t gfp)
 {
-	if (memcg_kmem_online() && (s->flags & SLAB_ACCOUNT))
+	if (memcg_kmem_online() &&
+			(s->flags & SLAB_ACCOUNT) &&
+			!slab_obj_exts(slab))
 		alloc_slab_obj_exts(slab, s, gfp, true);
 
 	mod_node_page_state(slab_pgdat(slab), cache_vmstat_idx(s),
@@ -3184,9 +3315,6 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	slab->objects = oo_objects(oo);
 	slab->inuse = 0;
 	slab->frozen = 0;
-	init_slab_obj_exts(slab);
-
-	account_slab(slab, oo_order(oo), s, flags);
 
 	slab->slab_cache = s;
 
@@ -3195,6 +3323,13 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	start = slab_address(slab);
 
 	setup_slab_debug(s, slab, start);
+	init_slab_obj_exts(slab);
+	/*
+	 * Poison the slab before initializing the slabobj_ext array
+	 * to prevent the array from being overwritten.
+	 */
+	alloc_slab_obj_exts_early(s, slab);
+	account_slab(slab, oo_order(oo), s, flags);
 
 	shuffle = shuffle_freelist(s, slab);
 
-- 
2.43.0



  parent reply	other threads:[~2025-08-27 11:38 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-27 11:37 [RFC V2 PATCH 0/5] mm/slab: reduce slab accounting memory overhead by allocating slabobj_ext metadata within unused slab space Harry Yoo
2025-08-27 11:37 ` [RFC V2 PATCH 1/5] mm/slab: ensure all metadata in slab object is word-aligned Harry Yoo
2025-08-27 11:37 ` [RFC V2 PATCH 2/5] mm/slab: abstract slabobj_ext access via new slab_obj_ext() helper Harry Yoo
2025-08-27 11:37 ` [RFC V2 PATCH 3/5] mm/slab: use stride to access slabobj_ext Harry Yoo
2025-08-27 11:37 ` Harry Yoo [this message]
2025-08-27 11:37 ` [RFC V2 PATCH 5/5] mm/slab: place slabobj_ext metadata in unused space within s->size Harry Yoo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250827113726.707801-5-harry.yoo@oracle.com \
    --to=harry.yoo@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=andreyknvl@gmail.com \
    --cc=cl@linux.com \
    --cc=dvyukov@google.com \
    --cc=glider@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=ryabinin.a.a@gmail.com \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=vincenzo.frascino@arm.com \
    --cc=yeoreum.yun@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox