linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: <hu.shengming@zte.com.cn>
To: <vbabka@kernel.org>, <harry@kernel.org>, <akpm@linux-foundation.org>
Cc: <hao.li@linux.dev>, <cl@gentwo.org>, <rientjes@google.com>,
	<roman.gushchin@linux.dev>, <linux-mm@kvack.org>,
	<linux-kernel@vger.kernel.org>, <zhang.run@zte.com.cn>,
	<xu.xin16@zte.com.cn>, <yang.tao172@zte.com.cn>,
	<yang.yang29@zte.com.cn>
Subject: [PATCH] mm/slub: skip freelist construction for whole-slab bulk refill
Date: Sat, 28 Mar 2026 12:55:38 +0800 (CST)	[thread overview]
Message-ID: <20260328125538341lvTGRpS62UNdRiAAz2gH3@zte.com.cn> (raw)

From: Shengming Hu <hu.shengming@zte.com.cn>

refill_objects() still carries a long-standing note that a whole-slab
bulk refill could avoid building a freelist that is immediately drained.

When the remaining bulk allocation is large enough to fully consume a
new slab, constructing the freelist is unnecessary overhead. Instead,
allocate the slab without building its freelist and hand out all objects
directly to the caller. The slab is then initialized as fully in-use.

Keep the existing behavior when CONFIG_SLAB_FREELIST_RANDOM is enabled,
as freelist construction is required to provide randomized object order.

Additionally, mark setup_object() as inline. After introducing this
optimization, the compiler no longer consistently inlines this helper,
which can regress performance in this hot path. Explicitly marking it
inline restores the expected code generation.

This reduces per-object overhead in bulk allocation paths and improves
allocation throughput significantly.

Benchmark results (slub_bulk_bench):

  Machine: qemu-system-x86_64 -m 1024M -smp 8
  Kernel:  Linux 7.0.0-rc5-next-20260326
  Config:  x86_64_defconfig
  Rounds:  20
  Total:   256MB

  obj_size=16, batch=256:
    before: 28.80 ± 1.20 ns/object
    after:  17.95 ± 0.94 ns/object
    delta:  -37.7%

  obj_size=32, batch=128:
    before: 33.00 ± 0.00 ns/object
    after:  21.75 ± 0.44 ns/object
    delta:  -34.1%

  obj_size=64, batch=64:
    before: 44.30 ± 0.73 ns/object
    after:  30.60 ± 0.50 ns/object
    delta:  -30.9%

  obj_size=128, batch=32:
    before: 81.40 ± 1.85 ns/object
    after:  47.00 ± 0.00 ns/object
    delta:  -42.3%

  obj_size=256, batch=32:
    before: 101.20 ± 1.28 ns/object
    after:  52.55 ± 0.60 ns/object
    delta:  -48.1%

  obj_size=512, batch=32:
    before: 109.40 ± 2.30 ns/object
    after:  53.80 ± 0.62 ns/object
    delta:  -50.8%

Link: https://github.com/HSM6236/slub_bulk_test.git
Signed-off-by: Shengming Hu <hu.shengming@zte.com.cn>
---
 mm/slub.c | 90 +++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 71 insertions(+), 19 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index fb2c5c57bc4e..c0ecfb42b035 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2733,7 +2733,7 @@ bool slab_free_freelist_hook(struct kmem_cache *s, void **head, void **tail,
 	return *head != NULL;
 }

-static void *setup_object(struct kmem_cache *s, void *object)
+static inline void *setup_object(struct kmem_cache *s, void *object)
 {
 	setup_object_debug(s, object);
 	object = kasan_init_slab_obj(s, object);
@@ -3438,7 +3438,8 @@ static __always_inline void unaccount_slab(struct slab *slab, int order,
 			    -(PAGE_SIZE << order));
 }

-static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node,
+				  bool build_freelist)
 {
 	bool allow_spin = gfpflags_allow_spinning(flags);
 	struct slab *slab;
@@ -3446,7 +3447,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	gfp_t alloc_gfp;
 	void *start, *p, *next;
 	int idx;
-	bool shuffle;
+	bool shuffle = false;

 	flags &= gfp_allowed_mask;

@@ -3483,6 +3484,7 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	slab->frozen = 0;

 	slab->slab_cache = s;
+	slab->freelist = NULL;

 	kasan_poison_slab(slab);

@@ -3497,9 +3499,10 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	alloc_slab_obj_exts_early(s, slab);
 	account_slab(slab, oo_order(oo), s, flags);

-	shuffle = shuffle_freelist(s, slab, allow_spin);
+	if (build_freelist)
+		shuffle = shuffle_freelist(s, slab, allow_spin);

-	if (!shuffle) {
+	if (build_freelist && !shuffle) {
 		start = fixup_red_left(s, start);
 		start = setup_object(s, start);
 		slab->freelist = start;
@@ -3515,7 +3518,8 @@ static struct slab *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	return slab;
 }

-static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node)
+static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node,
+			     bool build_freelist)
 {
 	if (unlikely(flags & GFP_SLAB_BUG_MASK))
 		flags = kmalloc_fix_flags(flags);
@@ -3523,7 +3527,7 @@ static struct slab *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	WARN_ON_ONCE(s->ctor && (flags & __GFP_ZERO));

 	return allocate_slab(s,
-		flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node);
+		flags & (GFP_RECLAIM_MASK | GFP_CONSTRAINT_MASK), node, build_freelist);
 }

 static void __free_slab(struct kmem_cache *s, struct slab *slab, bool allow_spin)
@@ -4395,6 +4399,45 @@ static unsigned int alloc_from_new_slab(struct kmem_cache *s, struct slab *slab,
 	return allocated;
 }

+static unsigned int alloc_whole_from_new_slab(struct kmem_cache *s,
+		struct slab *slab, void **p)
+{
+	unsigned int allocated = 0;
+	void *object;
+
+	object = fixup_red_left(s, slab_address(slab));
+	object = setup_object(s, object);
+
+	while (allocated < slab->objects - 1) {
+		p[allocated] = object;
+		maybe_wipe_obj_freeptr(s, object);
+
+		allocated++;
+		object += s->size;
+		object = setup_object(s, object);
+	}
+
+	p[allocated] = object;
+	maybe_wipe_obj_freeptr(s, object);
+	allocated++;
+
+	slab->freelist = NULL;
+	slab->inuse = slab->objects;
+	inc_slabs_node(s, slab_nid(slab), slab->objects);
+
+	return allocated;
+}
+
+static inline bool bulk_refill_consumes_whole_slab(struct kmem_cache *s,
+		unsigned int count)
+{
+#ifdef CONFIG_SLAB_FREELIST_RANDOM
+	return false;
+#else
+	return count >= oo_objects(s->oo);
+#endif
+}
+
 /*
  * Slow path. We failed to allocate via percpu sheaves or they are not available
  * due to bootstrap or debugging enabled or SLUB_TINY.
@@ -4441,7 +4484,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
 	if (object)
 		goto success;

-	slab = new_slab(s, pc.flags, node);
+	slab = new_slab(s, pc.flags, node, true);

 	if (unlikely(!slab)) {
 		if (node != NUMA_NO_NODE && !(gfpflags & __GFP_THISNODE)
@@ -7244,18 +7287,27 @@ refill_objects(struct kmem_cache *s, void **p, gfp_t gfp, unsigned int min,

 new_slab:

-	slab = new_slab(s, gfp, local_node);
-	if (!slab)
-		goto out;
-
-	stat(s, ALLOC_SLAB);
-
 	/*
-	 * TODO: possible optimization - if we know we will consume the whole
-	 * slab we might skip creating the freelist?
+	 * If the remaining bulk allocation is large enough to consume
+	 * an entire slab, avoid building the freelist only to drain it
+	 * immediately. Instead, allocate a slab without a freelist and
+	 * hand out all objects directly.
 	 */
-	refilled += alloc_from_new_slab(s, slab, p + refilled, max - refilled,
-					/* allow_spin = */ true);
+	if (bulk_refill_consumes_whole_slab(s, max - refilled)) {
+		slab = new_slab(s, gfp, local_node, false);
+		if (!slab)
+			goto out;
+		stat(s, ALLOC_SLAB);
+		refilled += alloc_whole_from_new_slab(s, slab, p + refilled);
+	} else {
+		slab = new_slab(s, gfp, local_node, true);
+		if (!slab)
+			goto out;
+		stat(s, ALLOC_SLAB);
+		refilled += alloc_from_new_slab(s, slab, p + refilled,
+						max - refilled,
+						/* allow_spin = */ true);
+	}

 	if (refilled < min)
 		goto new_slab;
@@ -7587,7 +7639,7 @@ static void early_kmem_cache_node_alloc(int node)

 	BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));

-	slab = new_slab(kmem_cache_node, GFP_NOWAIT, node);
+	slab = new_slab(kmem_cache_node, GFP_NOWAIT, node, true);

 	BUG_ON(!slab);
 	if (slab_nid(slab) != node) {
-- 
2.25.1


             reply	other threads:[~2026-03-28  4:55 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-28  4:55 hu.shengming [this message]
2026-03-30 12:21 ` Vlastimil Babka (SUSE)
2026-03-30 13:26   ` hu.shengming

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260328125538341lvTGRpS62UNdRiAAz2gH3@zte.com.cn \
    --to=hu.shengming@zte.com.cn \
    --cc=akpm@linux-foundation.org \
    --cc=cl@gentwo.org \
    --cc=hao.li@linux.dev \
    --cc=harry@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=vbabka@kernel.org \
    --cc=xu.xin16@zte.com.cn \
    --cc=yang.tao172@zte.com.cn \
    --cc=yang.yang29@zte.com.cn \
    --cc=zhang.run@zte.com.cn \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox