Re: [PATCH RFC] Avoid memory barrier in read_seqcount() through load acquire

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Mark Rutland <mark.rutland@arm.com>
To: cl@gentwo.org
Cc: Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>, Waiman Long <longman@redhat.com>,
	Boqun Feng <boqun.feng@gmail.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH RFC] Avoid memory barrier in read_seqcount() through load acquire
Date: Mon, 19 Aug 2024 09:45:56 +0100	[thread overview]
Message-ID: <ZsMGRIec1y8hdKRG@J2N7QTR9R3.cambridge.arm.com> (raw)
In-Reply-To: <20240813-seq_optimize-v1-1-84d57182e6a7@gentwo.org>

On Tue, Aug 13, 2024 at 11:26:17AM -0700, Christoph Lameter via B4 Relay wrote:
> From: "Christoph Lameter (Ampere)" <cl@gentwo.org>
> 
> Some architectures support load acquire which can save us a memory
> barrier and save some cycles.
> 
> A typical sequence
> 
> 	do {
> 		seq = read_seqcount_begin(&s);
> 		<something>
> 	} while (read_seqcount_retry(&s, seq);
> 
> requires 13 cycles on ARM64 for an empty loop. Two read memory barriers are
> needed. One for each of the seqcount_* functions.
> 
> We can replace the first read barrier with a load acquire of
> the seqcount which saves us one barrier.
> 
> On ARM64 doing so reduces the cycle count from 13 to 8.

The patch itself looks reasonable to me, but in the commit message could
you please replace "ARM64" with the specific micro-architecture you're
testing on? There are tens of contemporary micro-architectures, and for
future reference it'd be good to know what specifically you've tested
on.

If you cannot disclose that for some reason, just say "on my ARM64 test
machine" or something like that, so that we're not implying that this is
true for all ARM64 implementations.

Mark.

> 
> Signed-off-by: Christoph Lameter (Ampere) <cl@gentwo.org>
> ---
>  arch/Kconfig            |  5 +++++
>  arch/arm64/Kconfig      |  1 +
>  include/linux/seqlock.h | 41 +++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 47 insertions(+)
> 
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 975dd22a2dbd..3f8867110a57 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -1600,6 +1600,11 @@ config ARCH_HAS_KERNEL_FPU_SUPPORT
>  	  Architectures that select this option can run floating-point code in
>  	  the kernel, as described in Documentation/core-api/floating-point.rst.
>  
> +config ARCH_HAS_ACQUIRE_RELEASE
> +	bool
> +	help
> +	  Architectures that support acquire / release can avoid memory fences
> +
>  source "kernel/gcov/Kconfig"
>  
>  source "scripts/gcc-plugins/Kconfig"
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index a2f8ff354ca6..19e34fff145f 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -39,6 +39,7 @@ config ARM64
>  	select ARCH_HAS_PTE_DEVMAP
>  	select ARCH_HAS_PTE_SPECIAL
>  	select ARCH_HAS_HW_PTE_YOUNG
> +	select ARCH_HAS_ACQUIRE_RELEASE
>  	select ARCH_HAS_SETUP_DMA_OPS
>  	select ARCH_HAS_SET_DIRECT_MAP
>  	select ARCH_HAS_SET_MEMORY
> diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
> index d90d8ee29d81..353fcf32b800 100644
> --- a/include/linux/seqlock.h
> +++ b/include/linux/seqlock.h
> @@ -176,6 +176,28 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s)	\
>  	return seq;							\
>  }									\
>  									\
> +static __always_inline unsigned						\
> +__seqprop_##lockname##_sequence_acquire(const seqcount_##lockname##_t *s) \
> +{									\
> +	unsigned seq = smp_load_acquire(&s->seqcount.sequence);		\
> +									\
> +	if (!IS_ENABLED(CONFIG_PREEMPT_RT))				\
> +		return seq;						\
> +									\
> +	if (preemptible && unlikely(seq & 1)) {				\
> +		__SEQ_LOCK(lockbase##_lock(s->lock));			\
> +		__SEQ_LOCK(lockbase##_unlock(s->lock));			\
> +									\
> +		/*							\
> +		 * Re-read the sequence counter since the (possibly	\
> +		 * preempted) writer made progress.			\
> +		 */							\
> +		seq = smp_load_acquire(&s->seqcount.sequence);		\
> +	}								\
> +									\
> +	return seq;							\
> +}									\
> +									\
>  static __always_inline bool						\
>  __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s)	\
>  {									\
> @@ -211,6 +233,11 @@ static inline unsigned __seqprop_sequence(const seqcount_t *s)
>  	return READ_ONCE(s->sequence);
>  }
>  
> +static inline unsigned __seqprop_sequence_acquire(const seqcount_t *s)
> +{
> +	return smp_load_acquire(&s->sequence);
> +}
> +
>  static inline bool __seqprop_preemptible(const seqcount_t *s)
>  {
>  	return false;
> @@ -259,6 +286,7 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
>  #define seqprop_ptr(s)			__seqprop(s, ptr)(s)
>  #define seqprop_const_ptr(s)		__seqprop(s, const_ptr)(s)
>  #define seqprop_sequence(s)		__seqprop(s, sequence)(s)
> +#define seqprop_sequence_acquire(s)	__seqprop(s, sequence_acquire)(s)
>  #define seqprop_preemptible(s)		__seqprop(s, preemptible)(s)
>  #define seqprop_assert(s)		__seqprop(s, assert)(s)
>  
> @@ -293,6 +321,18 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
>   *
>   * Return: count to be passed to read_seqcount_retry()
>   */
> +#ifdef CONFIG_ARCH_HAS_ACQUIRE_RELEASE
> +#define raw_read_seqcount_begin(s)					\
> +({									\
> +	unsigned _seq;							\
> +									\
> +	while ((_seq = seqprop_sequence_acquire(s)) & 1)		\
> +		cpu_relax();						\
> +									\
> +	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);			\
> +	_seq;								\
> +})
> +#else
>  #define raw_read_seqcount_begin(s)					\
>  ({									\
>  	unsigned _seq = __read_seqcount_begin(s);			\
> @@ -300,6 +340,7 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
>  	smp_rmb();							\
>  	_seq;								\
>  })
> +#endif
>  
>  /**
>   * read_seqcount_begin() - begin a seqcount_t read critical section
> 
> ---
> base-commit: 6b4aa469f04999c3f244515fa7491b4d093c5167
> change-id: 20240813-seq_optimize-68c48696c798
> 
> Best regards,
> -- 
> Christoph Lameter <cl@gentwo.org>
> 
> 
>

next prev parent reply	other threads:[~2024-08-19  8:46 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-08-13 18:26 Christoph Lameter via B4 Relay
2024-08-13 19:01 ` Waiman Long
2024-08-13 19:41   ` Christoph Lameter (Ampere)
2024-08-13 19:48   ` Linus Torvalds
2024-08-13 19:58     ` Waiman Long
2024-08-13 20:01       ` Linus Torvalds
2024-08-13 20:23         ` Waiman Long
2024-08-19  8:45 ` Mark Rutland [this message]
2024-08-19 16:25   ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZsMGRIec1y8hdKRG@J2N7QTR9R3.cambridge.arm.com \
    --to=mark.rutland@arm.com \
    --cc=boqun.feng@gmail.com \
    --cc=catalin.marinas@arm.com \
    --cc=cl@gentwo.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=torvalds@linux-foundation.org \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox