From: Hyeonggon Yoo <42.hyeyoo@gmail.com>
To: Peter Zijlstra <peterz@infradead.org>
Cc: torvalds@linux-foundation.org, corbet@lwn.net, will@kernel.org,
boqun.feng@gmail.com, mark.rutland@arm.com,
catalin.marinas@arm.com, dennis@kernel.org, tj@kernel.org,
cl@linux.com, hca@linux.ibm.com, gor@linux.ibm.com,
agordeev@linux.ibm.com, borntraeger@linux.ibm.com,
svens@linux.ibm.com, tglx@linutronix.de, mingo@redhat.com,
bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org,
hpa@zytor.com, joro@8bytes.org, suravee.suthikulpanit@amd.com,
robin.murphy@arm.com, dwmw2@infradead.org,
baolu.lu@linux.intel.com, Arnd Bergmann <arnd@arndb.de>,
Herbert Xu <herbert@gondor.apana.org.au>,
davem@davemloft.net, penberg@kernel.org, rientjes@google.com,
iamjoonsoo.kim@lge.com, Andrew Morton <akpm@linux-foundation.org>,
vbabka@suse.cz, roman.gushchin@linux.dev,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-mm@kvack.org, linux-s390@vger.kernel.org,
iommu@lists.linux.dev, linux-arch@vger.kernel.org,
linux-crypto@vger.kernel.org
Subject: Re: [PATCH v2 08/10] slub: Replace cmpxchg_double()
Date: Wed, 8 Feb 2023 13:31:39 +0000 [thread overview]
Message-ID: <Y+OkOxpOnRYcI3DS@localhost> (raw)
In-Reply-To: <20230202152655.684926740@infradead.org>
On Thu, Feb 02, 2023 at 03:50:38PM +0100, Peter Zijlstra wrote:
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> ---
> include/linux/slub_def.h | 12 ++-
> mm/slab.h | 45 +++++++++++++-
> mm/slub.c | 142 ++++++++++++++++++++++++++++-------------------
> 3 files changed, 135 insertions(+), 64 deletions(-)
>
> --- a/include/linux/slub_def.h
> +++ b/include/linux/slub_def.h
> @@ -39,7 +39,8 @@ enum stat_item {
> CPU_PARTIAL_FREE, /* Refill cpu partial on free */
> CPU_PARTIAL_NODE, /* Refill cpu partial from node partial */
> CPU_PARTIAL_DRAIN, /* Drain cpu partial to node partial */
> - NR_SLUB_STAT_ITEMS };
> + NR_SLUB_STAT_ITEMS
> +};
>
> #ifndef CONFIG_SLUB_TINY
> /*
> @@ -47,8 +48,13 @@ enum stat_item {
> * with this_cpu_cmpxchg_double() alignment requirements.
> */
> struct kmem_cache_cpu {
> - void **freelist; /* Pointer to next available object */
> - unsigned long tid; /* Globally unique transaction id */
> + union {
> + struct {
> + void **freelist; /* Pointer to next available object */
> + unsigned long tid; /* Globally unique transaction id */
> + };
> + freelist_aba_t freelist_tid;
> + };
> struct slab *slab; /* The slab from which we are allocating */
> #ifdef CONFIG_SLUB_CPU_PARTIAL
> struct slab *partial; /* Partially allocated frozen slabs */
> --- a/mm/slab.h
> +++ b/mm/slab.h
> @@ -5,6 +5,34 @@
> * Internal slab definitions
> */
>
> +/*
> + * Freelist pointer and counter to cmpxchg together, avoids the typical ABA
> + * problems with cmpxchg of just a pointer.
> + */
> +typedef union {
> + struct {
> + void *freelist;
> + unsigned long counter;
> + };
> +#ifdef CONFIG_64BIT
> + u128 full;
> +#else
> + u64 full;
> +#endif
> +} freelist_aba_t;
> +
> +#ifdef CONFIG_64BIT
> +# ifdef system_has_cmpxchg128
> +# define system_has_freelist_aba() system_has_cmpxchg128()
> +# define try_cmpxchg_freelist try_cmpxchg128
> +# endif
> +#else /* CONFIG_64BIT */
> +# ifdef system_has_cmpxchg64
> +# define system_has_freelist_aba() system_has_cmpxchg64()
> +# define try_cmpxchg_freelist try_cmpxchg64
> +# endif
> +#endif /* CONFIG_64BIT */
> +
> /* Reuses the bits in struct page */
> struct slab {
> unsigned long __page_flags;
> @@ -37,14 +65,21 @@ struct slab {
> #endif
> };
> /* Double-word boundary */
> - void *freelist; /* first free object */
> union {
> - unsigned long counters;
> struct {
> - unsigned inuse:16;
> - unsigned objects:15;
> - unsigned frozen:1;
> + void *freelist; /* first free object */
> + union {
> + unsigned long counters;
> + struct {
> + unsigned inuse:16;
> + unsigned objects:15;
> + unsigned frozen:1;
> + };
> + };
> };
> +#ifdef system_has_freelist_aba
> + freelist_aba_t freelist_counter;
> +#endif
> };
> };
> struct rcu_head rcu_head;
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -292,7 +292,13 @@ static inline bool kmem_cache_has_cpu_pa
> /* Poison object */
> #define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
> /* Use cmpxchg_double */
> +
> +#if defined(system_has_freelist_aba) && \
> + defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
> #define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
> +#else
> +#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0U)
> +#endif
>
> /*
> * Tracking user of a slab.
> @@ -512,6 +518,43 @@ static __always_inline void slab_unlock(
> __bit_spin_unlock(PG_locked, &page->flags);
> }
>
> +static inline bool
> +__update_freelist_fast(struct slab *slab,
> + void *freelist_old, unsigned long counters_old,
> + void *freelist_new, unsigned long counters_new)
> +{
> +
> + bool ret = false;
> +
> +#ifdef system_has_freelist_aba
> + freelist_aba_t old = { .freelist = freelist_old, .counter = counters_old };
> + freelist_aba_t new = { .freelist = freelist_new, .counter = counters_new };
> +
> + ret = try_cmpxchg_freelist(&slab->freelist_counter.full, &old.full, new.full);
> +#endif /* system_has_freelist_aba */
> +
> + return ret;
> +}
> +
> +static inline bool
> +__update_freelist_slow(struct slab *slab,
> + void *freelist_old, unsigned long counters_old,
> + void *freelist_new, unsigned long counters_new)
> +{
> + bool ret = false;
> +
> + slab_lock(slab);
> + if (slab->freelist == freelist_old &&
> + slab->counters == counters_old) {
> + slab->freelist = freelist_new;
> + slab->counters = counters_new;
> + ret = true;
> + }
> + slab_unlock(slab);
> +
> + return ret;
> +}
> +
> /*
> * Interrupts must be disabled (for the fallback code to work right), typically
> * by an _irqsave() lock variant. On PREEMPT_RT the preempt_disable(), which is
> @@ -519,33 +562,25 @@ static __always_inline void slab_unlock(
> * allocation/ free operation in hardirq context. Therefore nothing can
> * interrupt the operation.
> */
> -static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
> +static inline bool __slab_update_freelist(struct kmem_cache *s, struct slab *slab,
> void *freelist_old, unsigned long counters_old,
> void *freelist_new, unsigned long counters_new,
> const char *n)
> {
> + bool ret;
> +
> if (USE_LOCKLESS_FAST_PATH())
> lockdep_assert_irqs_disabled();
> -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
> - defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
> +
> if (s->flags & __CMPXCHG_DOUBLE) {
> - if (cmpxchg_double(&slab->freelist, &slab->counters,
> - freelist_old, counters_old,
> - freelist_new, counters_new))
> - return true;
> - } else
> -#endif
> - {
> - slab_lock(slab);
> - if (slab->freelist == freelist_old &&
> - slab->counters == counters_old) {
> - slab->freelist = freelist_new;
> - slab->counters = counters_new;
> - slab_unlock(slab);
> - return true;
> - }
> - slab_unlock(slab);
> + ret = __update_freelist_fast(slab, freelist_old, counters_old,
> + freelist_new, counters_new);
> + } else {
> + ret = __update_freelist_slow(slab, freelist_old, counters_old,
> + freelist_new, counters_new);
> }
> + if (likely(ret))
> + return true;
>
> cpu_relax();
> stat(s, CMPXCHG_DOUBLE_FAIL);
> @@ -557,36 +592,26 @@ static inline bool __cmpxchg_double_slab
> return false;
> }
>
> -static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct slab *slab,
> +static inline bool slab_update_freelist(struct kmem_cache *s, struct slab *slab,
> void *freelist_old, unsigned long counters_old,
> void *freelist_new, unsigned long counters_new,
> const char *n)
> {
> -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
> - defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
> + bool ret;
> +
> if (s->flags & __CMPXCHG_DOUBLE) {
> - if (cmpxchg_double(&slab->freelist, &slab->counters,
> - freelist_old, counters_old,
> - freelist_new, counters_new))
> - return true;
> - } else
> -#endif
> - {
> + ret = __update_freelist_fast(slab, freelist_old, counters_old,
> + freelist_new, counters_new);
> + } else {
> unsigned long flags;
>
> local_irq_save(flags);
> - slab_lock(slab);
> - if (slab->freelist == freelist_old &&
> - slab->counters == counters_old) {
> - slab->freelist = freelist_new;
> - slab->counters = counters_new;
> - slab_unlock(slab);
> - local_irq_restore(flags);
> - return true;
> - }
> - slab_unlock(slab);
> + ret = __update_freelist_slow(slab, freelist_old, counters_old,
> + freelist_new, counters_new);
> local_irq_restore(flags);
> }
> + if (likely(ret))
> + return true;
>
> cpu_relax();
> stat(s, CMPXCHG_DOUBLE_FAIL);
> @@ -2229,7 +2254,7 @@ static inline void *acquire_slab(struct
> VM_BUG_ON(new.frozen);
> new.frozen = 1;
>
> - if (!__cmpxchg_double_slab(s, slab,
> + if (!__slab_update_freelist(s, slab,
> freelist, counters,
> new.freelist, new.counters,
> "acquire_slab"))
> @@ -2555,7 +2580,7 @@ static void deactivate_slab(struct kmem_
> }
>
>
> - if (!cmpxchg_double_slab(s, slab,
> + if (!slab_update_freelist(s, slab,
> old.freelist, old.counters,
> new.freelist, new.counters,
> "unfreezing slab")) {
> @@ -2612,7 +2637,7 @@ static void __unfreeze_partials(struct k
>
> new.frozen = 0;
>
> - } while (!__cmpxchg_double_slab(s, slab,
> + } while (!__slab_update_freelist(s, slab,
> old.freelist, old.counters,
> new.freelist, new.counters,
> "unfreezing slab"));
> @@ -3009,6 +3034,18 @@ static inline bool pfmemalloc_match(stru
> }
>
> #ifndef CONFIG_SLUB_TINY
> +static inline bool
> +__update_cpu_freelist_fast(struct kmem_cache *s,
> + void *freelist_old, void *freelist_new,
> + unsigned long tid)
> +{
> + freelist_aba_t old = { .freelist = freelist_old, .counter = tid };
> + freelist_aba_t new = { .freelist = freelist_new, .counter = next_tid(tid) };
> +
> + return this_cpu_cmpxchg(s->cpu_slab->freelist_tid.full,
> + old.full, new.full) == old.full;
> +}
> +
> /*
> * Check the slab->freelist and either transfer the freelist to the
> * per cpu freelist or deactivate the slab.
> @@ -3035,7 +3072,7 @@ static inline void *get_freelist(struct
> new.inuse = slab->objects;
> new.frozen = freelist != NULL;
>
> - } while (!__cmpxchg_double_slab(s, slab,
> + } while (!__slab_update_freelist(s, slab,
> freelist, counters,
> NULL, new.counters,
> "get_freelist"));
> @@ -3360,11 +3397,7 @@ static __always_inline void *__slab_allo
> * against code executing on this cpu *not* from access by
> * other cpus.
> */
> - if (unlikely(!this_cpu_cmpxchg_double(
> - s->cpu_slab->freelist, s->cpu_slab->tid,
> - object, tid,
> - next_object, next_tid(tid)))) {
> -
> + if (unlikely(!__update_cpu_freelist_fast(s, object, next_object, tid))) {
> note_cmpxchg_failure("slab_alloc", s, tid);
> goto redo;
> }
> @@ -3632,7 +3665,7 @@ static void __slab_free(struct kmem_cach
> }
> }
>
> - } while (!cmpxchg_double_slab(s, slab,
> + } while (!slab_update_freelist(s, slab,
> prior, counters,
> head, new.counters,
> "__slab_free"));
> @@ -3737,11 +3770,7 @@ static __always_inline void do_slab_free
>
> set_freepointer(s, tail_obj, freelist);
>
> - if (unlikely(!this_cpu_cmpxchg_double(
> - s->cpu_slab->freelist, s->cpu_slab->tid,
> - freelist, tid,
> - head, next_tid(tid)))) {
> -
> + if (unlikely(!__update_cpu_freelist_fast(s, freelist, head, tid))) {
> note_cmpxchg_failure("slab_free", s, tid);
> goto redo;
> }
> @@ -4505,11 +4534,12 @@ static int kmem_cache_open(struct kmem_c
> }
> }
>
> -#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
> +#if defined(system_has_freelist_aba) && \
> defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
> - if (system_has_cmpxchg_double() && (s->flags & SLAB_NO_CMPXCHG) == 0)
> + if (system_has_freelist_aba() && !(s->flags & SLAB_NO_CMPXCHG)) {
> /* Enable fast mode */
> s->flags |= __CMPXCHG_DOUBLE;
> + }
> #endif
>
> /*
Acked-by: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Thanks!
next prev parent reply other threads:[~2023-02-08 13:32 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-02 14:50 [PATCH v2 00/10] Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() Peter Zijlstra
2023-02-02 14:50 ` [PATCH v2 01/10] cyrpto/b128ops: Remove struct u128 Peter Zijlstra
2023-02-02 20:21 ` H. Peter Anvin
2023-02-02 14:50 ` [PATCH v2 02/10] types: Introduce [us]128 Peter Zijlstra
2023-02-02 14:50 ` [PATCH v2 03/10] arch: Introduce arch_{,try_}_cmpxchg128{,_local}() Peter Zijlstra
2023-02-02 17:04 ` Heiko Carstens
2023-02-03 16:52 ` Mark Rutland
2023-02-02 14:50 ` [PATCH v2 04/10] instrumentation: Wire up cmpxchg128() Peter Zijlstra
2023-02-03 16:55 ` Mark Rutland
2023-02-02 14:50 ` [PATCH v2 05/10] percpu: Wire up cmpxchg128 Peter Zijlstra
2023-02-02 17:05 ` Heiko Carstens
2023-02-03 17:02 ` Mark Rutland
2023-02-03 17:25 ` Arnd Bergmann
2023-02-06 11:24 ` Peter Zijlstra
2023-02-06 12:14 ` Peter Zijlstra
2023-02-06 12:48 ` Peter Zijlstra
2023-02-06 13:20 ` Arnd Bergmann
2023-02-06 12:19 ` Arnd Bergmann
2023-02-02 14:50 ` [PATCH v2 06/10] x86,amd_iommu: Replace cmpxchg_double() Peter Zijlstra
2023-02-02 14:50 ` [PATCH v2 07/10] x86,intel_iommu: " Peter Zijlstra
2023-02-03 2:51 ` Baolu Lu
2023-02-02 14:50 ` [PATCH v2 08/10] slub: " Peter Zijlstra
2023-02-08 13:31 ` Hyeonggon Yoo [this message]
2023-02-02 14:50 ` [PATCH v2 09/10] arch: Remove cmpxchg_double Peter Zijlstra
2023-02-08 13:44 ` Hyeonggon Yoo
2023-02-02 14:50 ` [PATCH v2 10/10] s390/cpum_sf: Convert to cmpxchg128() Peter Zijlstra
2023-02-02 17:05 ` Heiko Carstens
2023-02-02 19:39 ` [PATCH v2 00/10] Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() Linus Torvalds
2023-02-02 22:45 ` David Laight
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Y+OkOxpOnRYcI3DS@localhost \
--to=42.hyeyoo@gmail.com \
--cc=agordeev@linux.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=baolu.lu@linux.intel.com \
--cc=boqun.feng@gmail.com \
--cc=borntraeger@linux.ibm.com \
--cc=bp@alien8.de \
--cc=catalin.marinas@arm.com \
--cc=cl@linux.com \
--cc=corbet@lwn.net \
--cc=dave.hansen@linux.intel.com \
--cc=davem@davemloft.net \
--cc=dennis@kernel.org \
--cc=dwmw2@infradead.org \
--cc=gor@linux.ibm.com \
--cc=hca@linux.ibm.com \
--cc=herbert@gondor.apana.org.au \
--cc=hpa@zytor.com \
--cc=iamjoonsoo.kim@lge.com \
--cc=iommu@lists.linux.dev \
--cc=joro@8bytes.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-crypto@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux-s390@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=penberg@kernel.org \
--cc=peterz@infradead.org \
--cc=rientjes@google.com \
--cc=robin.murphy@arm.com \
--cc=roman.gushchin@linux.dev \
--cc=suravee.suthikulpanit@amd.com \
--cc=svens@linux.ibm.com \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=vbabka@suse.cz \
--cc=will@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox