[PATCH RFC v3 3/9] mm/slub: handle bulk and single object freeing separately

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Vlastimil Babka <vbabka@suse.cz>
To: Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@kernel.org>,
	 David Rientjes <rientjes@google.com>,
	Joonsoo Kim <iamjoonsoo.kim@lge.com>,
	 Matthew Wilcox <willy@infradead.org>,
	 "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	 Roman Gushchin <roman.gushchin@linux.dev>,
	 Hyeonggon Yoo <42.hyeyoo@gmail.com>,
	 Alexander Potapenko <glider@google.com>,
	Marco Elver <elver@google.com>,
	 Dmitry Vyukov <dvyukov@google.com>,
	linux-mm@kvack.org,  linux-kernel@vger.kernel.org,
	maple-tree@lists.infradead.org,  kasan-dev@googlegroups.com,
	Vlastimil Babka <vbabka@suse.cz>
Subject: [PATCH RFC v3 3/9] mm/slub: handle bulk and single object freeing separately
Date: Wed, 29 Nov 2023 10:53:28 +0100	[thread overview]
Message-ID: <20231129-slub-percpu-caches-v3-3-6bcf536772bc@suse.cz> (raw)
In-Reply-To: <20231129-slub-percpu-caches-v3-0-6bcf536772bc@suse.cz>

Until now we have a single function slab_free() handling both single
object freeing and bulk freeing with neccessary hooks, the latter case
requiring slab_free_freelist_hook(). It should be however better to
distinguish the two scenarios for the following reasons:

- code simpler to follow for the single object case

- better code generation - although inlining should eliminate the
  slab_free_freelist_hook() in case no debugging options are enabled, it
  seems it's not perfect. When e.g. KASAN is enabled, we're imposing
  additional unnecessary overhead for single object freeing.

- preparation to add percpu array caches in later patches

Therefore, simplify slab_free() for the single object case by dropping
unnecessary parameters and calling only slab_free_hook() instead of
slab_free_freelist_hook(). Rename the bulk variant to slab_free_bulk()
and adjust callers accordingly.

While at it, flip (and document) slab_free_hook() return value so that
it returns true when the freeing can proceed, which matches the logic of
slab_free_freelist_hook() and is not confusingly the opposite.

Additionally we can simplify a bit by changing the tail parameter of
do_slab_free() when freeing a single object - instead of NULL we can set
equal to head.

bloat-o-meter shows small code reduction with a .config that has KASAN
etc disabled:

add/remove: 0/0 grow/shrink: 0/4 up/down: 0/-118 (-118)
Function                                     old     new   delta
kmem_cache_alloc_bulk                       1203    1196      -7
kmem_cache_free                              861     835     -26
__kmem_cache_free                            741     704     -37
kmem_cache_free_bulk                         911     863     -48

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/slub.c | 57 ++++++++++++++++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 23 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 16748aeada8f..7d23f10d42e6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1770,9 +1770,12 @@ static bool freelist_corrupted(struct kmem_cache *s, struct slab *slab,
 /*
  * Hooks for other subsystems that check memory allocations. In a typical
  * production configuration these hooks all should produce no code at all.
+ *
+ * Returns true if freeing of the object can proceed, false if its reuse
+ * was delayed by KASAN quarantine.
  */
-static __always_inline bool slab_free_hook(struct kmem_cache *s,
-						void *x, bool init)
+static __always_inline
+bool slab_free_hook(struct kmem_cache *s, void *x, bool init)
 {
 	kmemleak_free_recursive(x, s->flags);
 	kmsan_slab_free(s, x);
@@ -1805,7 +1808,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s,
 		       s->size - s->inuse - rsize);
 	}
 	/* KASAN might put x into memory quarantine, delaying its reuse. */
-	return kasan_slab_free(s, x, init);
+	return !kasan_slab_free(s, x, init);
 }
 
 static inline bool slab_free_freelist_hook(struct kmem_cache *s,
@@ -1815,7 +1818,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
 
 	void *object;
 	void *next = *head;
-	void *old_tail = *tail ? *tail : *head;
+	void *old_tail = *tail;
 
 	if (is_kfence_address(next)) {
 		slab_free_hook(s, next, false);
@@ -1831,7 +1834,7 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
 		next = get_freepointer(s, object);
 
 		/* If object's reuse doesn't have to be delayed */
-		if (!slab_free_hook(s, object, slab_want_init_on_free(s))) {
+		if (slab_free_hook(s, object, slab_want_init_on_free(s))) {
 			/* Move object to the new freelist */
 			set_freepointer(s, object, *head);
 			*head = object;
@@ -1846,9 +1849,6 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
 		}
 	} while (object != old_tail);
 
-	if (*head == *tail)
-		*tail = NULL;
-
 	return *head != NULL;
 }
 
@@ -3743,7 +3743,6 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
 				struct slab *slab, void *head, void *tail,
 				int cnt, unsigned long addr)
 {
-	void *tail_obj = tail ? : head;
 	struct kmem_cache_cpu *c;
 	unsigned long tid;
 	void **freelist;
@@ -3762,14 +3761,14 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
 	barrier();
 
 	if (unlikely(slab != c->slab)) {
-		__slab_free(s, slab, head, tail_obj, cnt, addr);
+		__slab_free(s, slab, head, tail, cnt, addr);
 		return;
 	}
 
 	if (USE_LOCKLESS_FAST_PATH()) {
 		freelist = READ_ONCE(c->freelist);
 
-		set_freepointer(s, tail_obj, freelist);
+		set_freepointer(s, tail, freelist);
 
 		if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
 			note_cmpxchg_failure("slab_free", s, tid);
@@ -3786,7 +3785,7 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
 		tid = c->tid;
 		freelist = c->freelist;
 
-		set_freepointer(s, tail_obj, freelist);
+		set_freepointer(s, tail, freelist);
 		c->freelist = head;
 		c->tid = next_tid(tid);
 
@@ -3799,15 +3798,27 @@ static void do_slab_free(struct kmem_cache *s,
 				struct slab *slab, void *head, void *tail,
 				int cnt, unsigned long addr)
 {
-	void *tail_obj = tail ? : head;
-
-	__slab_free(s, slab, head, tail_obj, cnt, addr);
+	__slab_free(s, slab, head, tail, cnt, addr);
 }
 #endif /* CONFIG_SLUB_TINY */
 
-static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
-				      void *head, void *tail, void **p, int cnt,
-				      unsigned long addr)
+static __fastpath_inline
+void slab_free(struct kmem_cache *s, struct slab *slab, void *object,
+	       unsigned long addr)
+{
+	bool init;
+
+	memcg_slab_free_hook(s, slab, &object, 1);
+
+	init = !is_kfence_address(object) && slab_want_init_on_free(s);
+
+	if (likely(slab_free_hook(s, object, init)))
+		do_slab_free(s, slab, object, object, 1, addr);
+}
+
+static __fastpath_inline
+void slab_free_bulk(struct kmem_cache *s, struct slab *slab, void *head,
+		    void *tail, void **p, int cnt, unsigned long addr)
 {
 	memcg_slab_free_hook(s, slab, p, cnt);
 	/*
@@ -3821,13 +3832,13 @@ static __fastpath_inline void slab_free(struct kmem_cache *s, struct slab *slab,
 #ifdef CONFIG_KASAN_GENERIC
 void ___cache_free(struct kmem_cache *cache, void *x, unsigned long addr)
 {
-	do_slab_free(cache, virt_to_slab(x), x, NULL, 1, addr);
+	do_slab_free(cache, virt_to_slab(x), x, x, 1, addr);
 }
 #endif
 
 void __kmem_cache_free(struct kmem_cache *s, void *x, unsigned long caller)
 {
-	slab_free(s, virt_to_slab(x), x, NULL, &x, 1, caller);
+	slab_free(s, virt_to_slab(x), x, caller);
 }
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -3836,7 +3847,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
 	if (!s)
 		return;
 	trace_kmem_cache_free(_RET_IP_, x, s);
-	slab_free(s, virt_to_slab(x), x, NULL, &x, 1, _RET_IP_);
+	slab_free(s, virt_to_slab(x), x, _RET_IP_);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -3953,8 +3964,8 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
 		if (!df.slab)
 			continue;
 
-		slab_free(df.s, df.slab, df.freelist, df.tail, &p[size], df.cnt,
-			  _RET_IP_);
+		slab_free_bulk(df.s, df.slab, df.freelist, df.tail, &p[size],
+				df.cnt, _RET_IP_);
 	} while (likely(size));
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);

-- 
2.43.0

next prev parent reply	other threads:[~2023-11-29  9:53 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-11-29  9:53 [PATCH RFC v3 0/9] SLUB percpu array caches and maple tree nodes Vlastimil Babka
2023-11-29  9:53 ` [PATCH RFC v3 1/9] mm/slub: fix bulk alloc and free stats Vlastimil Babka
2023-11-29  9:53 ` [PATCH RFC v3 2/9] mm/slub: introduce __kmem_cache_free_bulk() without free hooks Vlastimil Babka
2023-11-29  9:53 ` Vlastimil Babka [this message]
2023-11-29  9:53 ` [PATCH RFC v3 4/9] mm/slub: free KFENCE objects in slab_free_hook() Vlastimil Babka
2023-11-29 12:00   ` Marco Elver
2023-11-29  9:53 ` [PATCH RFC v3 5/9] mm/slub: add opt-in percpu array cache of objects Vlastimil Babka
2023-11-29 10:35   ` Marco Elver
2023-12-15 18:28   ` Suren Baghdasaryan
2023-12-15 21:17     ` Suren Baghdasaryan
2023-11-29  9:53 ` [PATCH RFC v3 6/9] tools: Add SLUB percpu array functions for testing Vlastimil Babka
2023-11-29  9:53 ` [PATCH RFC v3 7/9] maple_tree: use slub percpu array Vlastimil Babka
2023-11-29  9:53 ` [PATCH RFC v3 8/9] maple_tree: Remove MA_STATE_PREALLOC Vlastimil Babka
2023-11-29  9:53 ` [PATCH RFC v3 9/9] maple_tree: replace preallocation with slub percpu array prefill Vlastimil Babka
2023-11-29 20:16 ` [PATCH RFC v3 0/9] SLUB percpu array caches and maple tree nodes Christoph Lameter (Ampere)
2023-11-29 21:20   ` Matthew Wilcox
2023-12-14 20:14     ` Christoph Lameter (Ampere)
2023-11-30  9:14   ` Vlastimil Babka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20231129-slub-percpu-caches-v3-3-6bcf536772bc@suse.cz \
    --to=vbabka@suse.cz \
    --cc=42.hyeyoo@gmail.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux.com \
    --cc=dvyukov@google.com \
    --cc=elver@google.com \
    --cc=glider@google.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=maple-tree@lists.infradead.org \
    --cc=penberg@kernel.org \
    --cc=rientjes@google.com \
    --cc=roman.gushchin@linux.dev \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox