From: Hyeonggon Yoo <42.hyeyoo@gmail.com>
To: Vlastimil Babka <vbabka@suse.cz>
Cc: David Rientjes <rientjes@google.com>,
Christoph Lameter <cl@linux.com>,
Pekka Enberg <penberg@kernel.org>,
Joonsoo Kim <iamjoonsoo.kim@lge.com>,
Andrew Morton <akpm@linux-foundation.org>,
Roman Gushchin <roman.gushchin@linux.dev>,
Andrey Ryabinin <ryabinin.a.a@gmail.com>,
Alexander Potapenko <glider@google.com>,
Andrey Konovalov <andreyknvl@gmail.com>,
Dmitry Vyukov <dvyukov@google.com>,
Vincenzo Frascino <vincenzo.frascino@arm.com>,
Marco Elver <elver@google.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@kernel.org>,
Shakeel Butt <shakeelb@google.com>,
Muchun Song <muchun.song@linux.dev>,
Kees Cook <keescook@chromium.org>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
kasan-dev@googlegroups.com, cgroups@vger.kernel.org,
linux-hardening@vger.kernel.org
Subject: Re: [PATCH v2 20/21] mm/slub: optimize alloc fastpath code layout
Date: Thu, 7 Dec 2023 11:32:12 +0900 [thread overview]
Message-ID: <ZXEurG+jk62uNgRK@localhost.localdomain> (raw)
In-Reply-To: <20231120-slab-remove-slab-v2-20-9c9c70177183@suse.cz>
On Mon, Nov 20, 2023 at 07:34:31PM +0100, Vlastimil Babka wrote:
> With allocation fastpaths no longer divided between two .c files, we
> have better inlining, however checking the disassembly of
> kmem_cache_alloc() reveals we can do better to make the fastpaths
> smaller and move the less common situations out of line or to separate
> functions, to reduce instruction cache pressure.
>
> - split memcg pre/post alloc hooks to inlined checks that use likely()
> to assume there will be no objcg handling necessary, and non-inline
> functions doing the actual handling
>
> - add some more likely/unlikely() to pre/post alloc hooks to indicate
> which scenarios should be out of line
>
> - change gfp_allowed_mask handling in slab_post_alloc_hook() so the
> code can be optimized away when kasan/kmsan/kmemleak is configured out
>
> bloat-o-meter shows:
> add/remove: 4/2 grow/shrink: 1/8 up/down: 521/-2924 (-2403)
> Function old new delta
> __memcg_slab_post_alloc_hook - 461 +461
> kmem_cache_alloc_bulk 775 791 +16
> __pfx_should_failslab.constprop - 16 +16
> __pfx___memcg_slab_post_alloc_hook - 16 +16
> should_failslab.constprop - 12 +12
> __pfx_memcg_slab_post_alloc_hook 16 - -16
> kmem_cache_alloc_lru 1295 1023 -272
> kmem_cache_alloc_node 1118 817 -301
> kmem_cache_alloc 1076 772 -304
> kmalloc_node_trace 1149 838 -311
> kmalloc_trace 1102 789 -313
> __kmalloc_node_track_caller 1393 1080 -313
> __kmalloc_node 1397 1082 -315
> __kmalloc 1374 1059 -315
> memcg_slab_post_alloc_hook 464 - -464
>
> Note that gcc still decided to inline __memcg_pre_alloc_hook(), but the
> code is out of line. Forcing noinline did not improve the results. As a
> result the fastpaths are shorter and overal code size is reduced.
>
> Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
> ---
> mm/slub.c | 89 ++++++++++++++++++++++++++++++++++++++-------------------------
> 1 file changed, 54 insertions(+), 35 deletions(-)
>
> diff --git a/mm/slub.c b/mm/slub.c
> index 5683f1d02e4f..77d259f3d592 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -1866,25 +1866,17 @@ static inline size_t obj_full_size(struct kmem_cache *s)
> /*
> * Returns false if the allocation should fail.
> */
> -static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
> - struct list_lru *lru,
> - struct obj_cgroup **objcgp,
> - size_t objects, gfp_t flags)
> +static bool __memcg_slab_pre_alloc_hook(struct kmem_cache *s,
> + struct list_lru *lru,
> + struct obj_cgroup **objcgp,
> + size_t objects, gfp_t flags)
> {
> - struct obj_cgroup *objcg;
> -
> - if (!memcg_kmem_online())
> - return true;
> -
> - if (!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT))
> - return true;
> -
> /*
> * The obtained objcg pointer is safe to use within the current scope,
> * defined by current task or set_active_memcg() pair.
> * obj_cgroup_get() is used to get a permanent reference.
> */
> - objcg = current_obj_cgroup();
> + struct obj_cgroup *objcg = current_obj_cgroup();
> if (!objcg)
> return true;
>
> @@ -1907,17 +1899,34 @@ static inline bool memcg_slab_pre_alloc_hook(struct kmem_cache *s,
> return true;
> }
>
> -static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
> - struct obj_cgroup *objcg,
> - gfp_t flags, size_t size,
> - void **p)
> +/*
> + * Returns false if the allocation should fail.
> + */
> +static __fastpath_inline
> +bool memcg_slab_pre_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
> + struct obj_cgroup **objcgp, size_t objects,
> + gfp_t flags)
> +{
> + if (!memcg_kmem_online())
> + return true;
> +
> + if (likely(!(flags & __GFP_ACCOUNT) && !(s->flags & SLAB_ACCOUNT)))
> + return true;
> +
> + return likely(__memcg_slab_pre_alloc_hook(s, lru, objcgp, objects,
> + flags));
> +}
> +
> +static void __memcg_slab_post_alloc_hook(struct kmem_cache *s,
> + struct obj_cgroup *objcg,
> + gfp_t flags, size_t size,
> + void **p)
> {
> struct slab *slab;
> unsigned long off;
> size_t i;
>
> - if (!memcg_kmem_online() || !objcg)
> - return;
> + flags &= gfp_allowed_mask;
>
> for (i = 0; i < size; i++) {
> if (likely(p[i])) {
> @@ -1940,6 +1949,16 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
> }
> }
>
> +static __fastpath_inline
> +void memcg_slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
> + gfp_t flags, size_t size, void **p)
> +{
> + if (likely(!memcg_kmem_online() || !objcg))
> + return;
> +
> + return __memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
> +}
> +
> static inline void memcg_slab_free_hook(struct kmem_cache *s, struct slab *slab,
> void **p, int objects)
> {
> @@ -3709,34 +3728,34 @@ noinline int should_failslab(struct kmem_cache *s, gfp_t gfpflags)
> }
> ALLOW_ERROR_INJECTION(should_failslab, ERRNO);
>
> -static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
> - struct list_lru *lru,
> - struct obj_cgroup **objcgp,
> - size_t size, gfp_t flags)
> +static __fastpath_inline
> +struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
> + struct list_lru *lru,
> + struct obj_cgroup **objcgp,
> + size_t size, gfp_t flags)
> {
> flags &= gfp_allowed_mask;
>
> might_alloc(flags);
>
> - if (should_failslab(s, flags))
> + if (unlikely(should_failslab(s, flags)))
> return NULL;
>
> - if (!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags))
> + if (unlikely(!memcg_slab_pre_alloc_hook(s, lru, objcgp, size, flags)))
> return NULL;
>
> return s;
> }
>
> -static inline void slab_post_alloc_hook(struct kmem_cache *s,
> - struct obj_cgroup *objcg, gfp_t flags,
> - size_t size, void **p, bool init,
> - unsigned int orig_size)
> +static __fastpath_inline
> +void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
> + gfp_t flags, size_t size, void **p, bool init,
> + unsigned int orig_size)
> {
> unsigned int zero_size = s->object_size;
> bool kasan_init = init;
> size_t i;
> -
> - flags &= gfp_allowed_mask;
> + gfp_t init_flags = flags & gfp_allowed_mask;
>
> /*
> * For kmalloc object, the allocated memory size(object_size) is likely
> @@ -3769,13 +3788,13 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
> * As p[i] might get tagged, memset and kmemleak hook come after KASAN.
> */
> for (i = 0; i < size; i++) {
> - p[i] = kasan_slab_alloc(s, p[i], flags, kasan_init);
> + p[i] = kasan_slab_alloc(s, p[i], init_flags, kasan_init);
> if (p[i] && init && (!kasan_init ||
> !kasan_has_integrated_init()))
> memset(p[i], 0, zero_size);
> kmemleak_alloc_recursive(p[i], s->object_size, 1,
> - s->flags, flags);
> - kmsan_slab_alloc(s, p[i], flags);
> + s->flags, init_flags);
> + kmsan_slab_alloc(s, p[i], init_flags);
> }
>
> memcg_slab_post_alloc_hook(s, objcg, flags, size, p);
> @@ -3799,7 +3818,7 @@ static __fastpath_inline void *slab_alloc_node(struct kmem_cache *s, struct list
> bool init = false;
>
> s = slab_pre_alloc_hook(s, lru, &objcg, 1, gfpflags);
> - if (!s)
> + if (unlikely(!s))
> return NULL;
>
> object = kfence_alloc(s, orig_size, gfpflags);
>
> --
Looks good to me,
Reviewed-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
> 2.42.1
>
>
next prev parent reply other threads:[~2023-12-07 2:32 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-20 18:34 [PATCH v2 00/21] remove the SLAB allocator Vlastimil Babka
2023-11-20 18:34 ` [PATCH v2 01/21] mm/slab, docs: switch mm-api docs generation from slab.c to slub.c Vlastimil Babka
2023-11-24 0:46 ` David Rientjes
2023-12-05 3:53 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 02/21] mm/slab: remove CONFIG_SLAB from all Kconfig and Makefile Vlastimil Babka
2023-12-05 4:15 ` Hyeonggon Yoo
2023-12-05 10:14 ` Vlastimil Babka
2023-12-06 0:08 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 03/21] KASAN: remove code paths guarded by CONFIG_SLAB Vlastimil Babka
2023-11-21 8:23 ` Hyeonggon Yoo
2023-11-21 16:47 ` Andrey Konovalov
2023-12-05 4:26 ` Hyeonggon Yoo
2023-12-05 4:48 ` Hyeonggon Yoo
2023-12-05 10:16 ` Vlastimil Babka
2023-11-20 18:34 ` [PATCH v2 04/21] KFENCE: cleanup kfence_guarded_alloc() after CONFIG_SLAB removal Vlastimil Babka
2023-12-06 8:01 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 05/21] mm/memcontrol: remove CONFIG_SLAB #ifdef guards Vlastimil Babka
2023-12-06 8:12 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 06/21] cpu/hotplug: remove CPUHP_SLAB_PREPARE hooks Vlastimil Babka
2023-12-01 11:28 ` Thomas Gleixner
2023-12-06 8:28 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 07/21] mm/slab: remove CONFIG_SLAB code from slab common code Vlastimil Babka
2023-12-06 9:05 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 08/21] mm/mempool/dmapool: remove CONFIG_DEBUG_SLAB ifdefs Vlastimil Babka
2023-12-06 9:10 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 09/21] mm/slab: remove mm/slab.c and slab_def.h Vlastimil Babka
2023-11-22 20:07 ` Christoph Lameter
2023-12-06 9:31 ` Hyeonggon Yoo
2023-12-06 9:37 ` Vlastimil Babka
2023-11-20 18:34 ` [PATCH v2 10/21] mm/slab: move struct kmem_cache_cpu declaration to slub.c Vlastimil Babka
2023-12-06 9:35 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 11/21] mm/slab: move the rest of slub_def.h to mm/slab.h Vlastimil Babka
2023-12-06 9:45 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 12/21] mm/slab: consolidate includes in the internal mm/slab.h Vlastimil Babka
2023-12-07 0:30 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 13/21] mm/slab: move pre/post-alloc hooks from slab.h to slub.c Vlastimil Babka
2023-12-07 0:43 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 14/21] mm/slab: move memcg related functions " Vlastimil Babka
2023-12-07 0:59 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 15/21] mm/slab: move struct kmem_cache_node " Vlastimil Babka
2023-12-07 1:11 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 16/21] mm/slab: move kfree() from slab_common.c " Vlastimil Babka
2023-12-05 4:38 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 17/21] mm/slab: move kmalloc_slab() to mm/slab.h Vlastimil Babka
2023-12-07 1:28 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 18/21] mm/slab: move kmalloc() functions from slab_common.c to slub.c Vlastimil Babka
2023-12-07 1:30 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 19/21] mm/slub: remove slab_alloc() and __kmem_cache_alloc_lru() wrappers Vlastimil Babka
2023-12-07 1:35 ` Hyeonggon Yoo
2023-11-20 18:34 ` [PATCH v2 20/21] mm/slub: optimize alloc fastpath code layout Vlastimil Babka
2023-12-07 2:32 ` Hyeonggon Yoo [this message]
2023-11-20 18:34 ` [PATCH v2 21/21] mm/slub: optimize free fast path " Vlastimil Babka
2023-12-07 2:40 ` Hyeonggon Yoo
2023-11-24 0:45 ` [PATCH v2 00/21] remove the SLAB allocator David Rientjes
2023-11-24 9:26 ` Vlastimil Babka
2023-12-07 2:45 ` Hyeonggon Yoo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZXEurG+jk62uNgRK@localhost.localdomain \
--to=42.hyeyoo@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=andreyknvl@gmail.com \
--cc=cgroups@vger.kernel.org \
--cc=cl@linux.com \
--cc=dvyukov@google.com \
--cc=elver@google.com \
--cc=glider@google.com \
--cc=hannes@cmpxchg.org \
--cc=iamjoonsoo.kim@lge.com \
--cc=kasan-dev@googlegroups.com \
--cc=keescook@chromium.org \
--cc=linux-hardening@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=muchun.song@linux.dev \
--cc=penberg@kernel.org \
--cc=rientjes@google.com \
--cc=roman.gushchin@linux.dev \
--cc=ryabinin.a.a@gmail.com \
--cc=shakeelb@google.com \
--cc=vbabka@suse.cz \
--cc=vincenzo.frascino@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox