linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin
@ 2024-11-21 16:28 Suren Baghdasaryan
  2024-11-21 16:28 ` [PATCH v2 2/3] mm: convert mm_lock_seq to a proper seqcount Suren Baghdasaryan
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2024-11-21 16:28 UTC (permalink / raw)
  To: akpm
  Cc: peterz, andrii, jannh, Liam.Howlett, lorenzo.stoakes, vbabka,
	mhocko, shakeel.butt, hannes, david, willy, brauner, oleg, arnd,
	richard.weiyang, zhangpeng.00, linmiaohe, viro, hca, linux-mm,
	linux-kernel, surenb

Add raw_seqcount_try_begin() to opens a read critical section of the given
seqcount_t if the counter is even. This enables eliding the critical
section entirely if the counter is odd, instead of doing the speculation
knowing it will fail.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
Applies over Linus' ToT

 include/linux/seqlock.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index 5298765d6ca4..22c2c48b4265 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
 	__seq;								\
 })
 
+/**
+ * raw_seqcount_try_begin() - begin a seqcount_t read critical section
+ *                            w/o lockdep and w/o counter stabilization
+ * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Similar to raw_seqcount_begin(), except it enables eliding the critical
+ * section entirely if odd, instead of doing the speculation knowing it will
+ * fail.
+ *
+ * Useful when counter stabilization is more or less equivalent to taking
+ * the lock and there is a slowpath that does that.
+ *
+ * If true, start will be set to the (even) sequence count read.
+ *
+ * Return: true when a read critical section is started.
+ */
+#define raw_seqcount_try_begin(s, start)				\
+({									\
+	start = raw_read_seqcount(s);					\
+	!(start & 1);							\
+})
+
 /**
  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
  *                        lockdep and w/o counter stabilization

base-commit: 43fb83c17ba2d63dfb798f0be7453ed55ca3f9c2
-- 
2.47.0.338.g60cca15819-goog



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 2/3] mm: convert mm_lock_seq to a proper seqcount
  2024-11-21 16:28 [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin Suren Baghdasaryan
@ 2024-11-21 16:28 ` Suren Baghdasaryan
  2024-11-22 15:14   ` Liam R. Howlett
  2024-11-21 16:28 ` [PATCH v2 3/3] mm: introduce mmap_lock_speculate_{try_begin|retry} Suren Baghdasaryan
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 11+ messages in thread
From: Suren Baghdasaryan @ 2024-11-21 16:28 UTC (permalink / raw)
  To: akpm
  Cc: peterz, andrii, jannh, Liam.Howlett, lorenzo.stoakes, vbabka,
	mhocko, shakeel.butt, hannes, david, willy, brauner, oleg, arnd,
	richard.weiyang, zhangpeng.00, linmiaohe, viro, hca, linux-mm,
	linux-kernel, surenb

Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock
variants to increment it, in-line with the usual seqcount usage pattern.
This lets us check whether the mmap_lock is write-locked by checking
mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be
used when implementing mmap_lock speculation functions.
As a result vm_lock_seq is also change to be unsigned to match the type
of mm_lock_seq.sequence.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
Changes since v1 [1]
- Added ASSERT_EXCLUSIVE_WRITER() instead of a comment in
vma_end_write_all, per Peter Zijlstra

[1] https://lore.kernel.org/all/20241024205231.1944747-1-surenb@google.com/

 include/linux/mm.h               | 12 +++----
 include/linux/mm_types.h         |  7 ++--
 include/linux/mmap_lock.h        | 55 +++++++++++++++++++++-----------
 kernel/fork.c                    |  5 +--
 mm/init-mm.c                     |  2 +-
 tools/testing/vma/vma.c          |  4 +--
 tools/testing/vma/vma_internal.h |  4 +--
 7 files changed, 53 insertions(+), 36 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index feb5c8021bef..e6de22738ee1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -710,7 +710,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
 	 * we don't rely on for anything - the mm_lock_seq read against which we
 	 * need ordering is below.
 	 */
-	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
+	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence))
 		return false;
 
 	if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
@@ -727,7 +727,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
 	 * after it has been unlocked.
 	 * This pairs with RELEASE semantics in vma_end_write_all().
 	 */
-	if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
+	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) {
 		up_read(&vma->vm_lock->lock);
 		return false;
 	}
@@ -742,7 +742,7 @@ static inline void vma_end_read(struct vm_area_struct *vma)
 }
 
 /* WARNING! Can only be used if mmap_lock is expected to be write-locked */
-static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
+static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
 {
 	mmap_assert_write_locked(vma->vm_mm);
 
@@ -750,7 +750,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
 	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
 	 * mm->mm_lock_seq can't be concurrently modified.
 	 */
-	*mm_lock_seq = vma->vm_mm->mm_lock_seq;
+	*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
 	return (vma->vm_lock_seq == *mm_lock_seq);
 }
 
@@ -761,7 +761,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
  */
 static inline void vma_start_write(struct vm_area_struct *vma)
 {
-	int mm_lock_seq;
+	unsigned int mm_lock_seq;
 
 	if (__is_vma_write_locked(vma, &mm_lock_seq))
 		return;
@@ -779,7 +779,7 @@ static inline void vma_start_write(struct vm_area_struct *vma)
 
 static inline void vma_assert_write_locked(struct vm_area_struct *vma)
 {
-	int mm_lock_seq;
+	unsigned int mm_lock_seq;
 
 	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
 }
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 381d22eba088..ac72888a54b8 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -715,7 +715,7 @@ struct vm_area_struct {
 	 * counter reuse can only lead to occasional unnecessary use of the
 	 * slowpath.
 	 */
-	int vm_lock_seq;
+	unsigned int vm_lock_seq;
 	/* Unstable RCU readers are allowed to read this. */
 	struct vma_lock *vm_lock;
 #endif
@@ -909,6 +909,9 @@ struct mm_struct {
 		 * Roughly speaking, incrementing the sequence number is
 		 * equivalent to releasing locks on VMAs; reading the sequence
 		 * number can be part of taking a read lock on a VMA.
+		 * Incremented every time mmap_lock is write-locked/unlocked.
+		 * Initialized to 0, therefore odd values indicate mmap_lock
+		 * is write-locked and even values that it's released.
 		 *
 		 * Can be modified under write mmap_lock using RELEASE
 		 * semantics.
@@ -917,7 +920,7 @@ struct mm_struct {
 		 * Can be read with ACQUIRE semantics if not holding write
 		 * mmap_lock.
 		 */
-		int mm_lock_seq;
+		seqcount_t mm_lock_seq;
 #endif
 
 
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index de9dc20b01ba..083b7fa2588e 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -71,39 +71,38 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
 }
 
 #ifdef CONFIG_PER_VMA_LOCK
-/*
- * Drop all currently-held per-VMA locks.
- * This is called from the mmap_lock implementation directly before releasing
- * a write-locked mmap_lock (or downgrading it to read-locked).
- * This should normally NOT be called manually from other places.
- * If you want to call this manually anyway, keep in mind that this will release
- * *all* VMA write locks, including ones from further up the stack.
- */
-static inline void vma_end_write_all(struct mm_struct *mm)
+static inline void mm_lock_seqcount_init(struct mm_struct *mm)
 {
-	mmap_assert_write_locked(mm);
-	/*
-	 * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
-	 * mmap_lock being held.
-	 * We need RELEASE semantics here to ensure that preceding stores into
-	 * the VMA take effect before we unlock it with this store.
-	 * Pairs with ACQUIRE semantics in vma_start_read().
-	 */
-	smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
+	seqcount_init(&mm->mm_lock_seq);
+}
+
+static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
+{
+	do_raw_write_seqcount_begin(&mm->mm_lock_seq);
+}
+
+static inline void mm_lock_seqcount_end(struct mm_struct *mm)
+{
+	do_raw_write_seqcount_end(&mm->mm_lock_seq);
 }
+
 #else
-static inline void vma_end_write_all(struct mm_struct *mm) {}
+static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
+static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
+static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
 #endif
 
 static inline void mmap_init_lock(struct mm_struct *mm)
 {
 	init_rwsem(&mm->mmap_lock);
+	mm_lock_seqcount_init(mm);
 }
 
 static inline void mmap_write_lock(struct mm_struct *mm)
 {
 	__mmap_lock_trace_start_locking(mm, true);
 	down_write(&mm->mmap_lock);
+	mm_lock_seqcount_begin(mm);
 	__mmap_lock_trace_acquire_returned(mm, true, true);
 }
 
@@ -111,6 +110,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
 {
 	__mmap_lock_trace_start_locking(mm, true);
 	down_write_nested(&mm->mmap_lock, subclass);
+	mm_lock_seqcount_begin(mm);
 	__mmap_lock_trace_acquire_returned(mm, true, true);
 }
 
@@ -120,10 +120,27 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm)
 
 	__mmap_lock_trace_start_locking(mm, true);
 	ret = down_write_killable(&mm->mmap_lock);
+	if (!ret)
+		mm_lock_seqcount_begin(mm);
 	__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
 	return ret;
 }
 
+/*
+ * Drop all currently-held per-VMA locks.
+ * This is called from the mmap_lock implementation directly before releasing
+ * a write-locked mmap_lock (or downgrading it to read-locked).
+ * This should normally NOT be called manually from other places.
+ * If you want to call this manually anyway, keep in mind that this will release
+ * *all* VMA write locks, including ones from further up the stack.
+ */
+static inline void vma_end_write_all(struct mm_struct *mm)
+{
+	mmap_assert_write_locked(mm);
+	ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
+	mm_lock_seqcount_end(mm);
+}
+
 static inline void mmap_write_unlock(struct mm_struct *mm)
 {
 	__mmap_lock_trace_released(mm, true);
diff --git a/kernel/fork.c b/kernel/fork.c
index e58d27c05788..8cd36645b9fc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -449,7 +449,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma)
 		return false;
 
 	init_rwsem(&vma->vm_lock->lock);
-	vma->vm_lock_seq = -1;
+	vma->vm_lock_seq = UINT_MAX;
 
 	return true;
 }
@@ -1262,9 +1262,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	seqcount_init(&mm->write_protect_seq);
 	mmap_init_lock(mm);
 	INIT_LIST_HEAD(&mm->mmlist);
-#ifdef CONFIG_PER_VMA_LOCK
-	mm->mm_lock_seq = 0;
-#endif
 	mm_pgtables_bytes_init(mm);
 	mm->map_count = 0;
 	mm->locked_vm = 0;
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 24c809379274..6af3ad675930 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -40,7 +40,7 @@ struct mm_struct init_mm = {
 	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
 	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
 #ifdef CONFIG_PER_VMA_LOCK
-	.mm_lock_seq	= 0,
+	.mm_lock_seq	= SEQCNT_ZERO(init_mm.mm_lock_seq),
 #endif
 	.user_ns	= &init_user_ns,
 	.cpu_bitmap	= CPU_BITS_NONE,
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index b33b47342d41..9074aaced9c5 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -87,7 +87,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
 	 * begun. Linking to the tree will have caused this to be incremented,
 	 * which means we will get a false positive otherwise.
 	 */
-	vma->vm_lock_seq = -1;
+	vma->vm_lock_seq = UINT_MAX;
 
 	return vma;
 }
@@ -212,7 +212,7 @@ static bool vma_write_started(struct vm_area_struct *vma)
 	int seq = vma->vm_lock_seq;
 
 	/* We reset after each check. */
-	vma->vm_lock_seq = -1;
+	vma->vm_lock_seq = UINT_MAX;
 
 	/* The vma_start_write() stub simply increments this value. */
 	return seq > -1;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index c5b9da034511..4007ec580f85 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -231,7 +231,7 @@ struct vm_area_struct {
 	 * counter reuse can only lead to occasional unnecessary use of the
 	 * slowpath.
 	 */
-	int vm_lock_seq;
+	unsigned int vm_lock_seq;
 	struct vma_lock *vm_lock;
 #endif
 
@@ -406,7 +406,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma)
 		return false;
 
 	init_rwsem(&vma->vm_lock->lock);
-	vma->vm_lock_seq = -1;
+	vma->vm_lock_seq = UINT_MAX;
 
 	return true;
 }
-- 
2.47.0.338.g60cca15819-goog



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH v2 3/3] mm: introduce mmap_lock_speculate_{try_begin|retry}
  2024-11-21 16:28 [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin Suren Baghdasaryan
  2024-11-21 16:28 ` [PATCH v2 2/3] mm: convert mm_lock_seq to a proper seqcount Suren Baghdasaryan
@ 2024-11-21 16:28 ` Suren Baghdasaryan
  2024-11-22 11:05   ` Peter Zijlstra
  2024-11-22 15:15   ` Liam R. Howlett
  2024-11-22 11:10 ` [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin David Hildenbrand
  2024-11-22 15:13 ` Liam R. Howlett
  3 siblings, 2 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2024-11-21 16:28 UTC (permalink / raw)
  To: akpm
  Cc: peterz, andrii, jannh, Liam.Howlett, lorenzo.stoakes, vbabka,
	mhocko, shakeel.butt, hannes, david, willy, brauner, oleg, arnd,
	richard.weiyang, zhangpeng.00, linmiaohe, viro, hca, linux-mm,
	linux-kernel, surenb

Add helper functions to speculatively perform operations without
read-locking mmap_lock, expecting that mmap_lock will not be
write-locked and mm is not modified from under us.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
Changes since v1 [1]
- Changed to use new raw_seqcount_try_begin() API, per Peter Zijlstra
- Renamed mmap_lock_speculation_{begin|end} into
mmap_lock_speculate_{try_begin|retry}, per Peter Zijlstra

Note: the return value of mmap_lock_speculate_retry() is opposive to
what it was in mmap_lock_speculation_end(). true now means speculation failed.

[1] https://lore.kernel.org/all/20241024205231.1944747-2-surenb@google.com/

 include/linux/mmap_lock.h | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 083b7fa2588e..0b39a0f99a3b 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -71,6 +71,7 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
 }
 
 #ifdef CONFIG_PER_VMA_LOCK
+
 static inline void mm_lock_seqcount_init(struct mm_struct *mm)
 {
 	seqcount_init(&mm->mm_lock_seq);
@@ -86,11 +87,39 @@ static inline void mm_lock_seqcount_end(struct mm_struct *mm)
 	do_raw_write_seqcount_end(&mm->mm_lock_seq);
 }
 
-#else
+static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
+{
+	/*
+	 * Since mmap_lock is a sleeping lock, and waiting for it to become
+	 * unlocked is more or less equivalent with taking it ourselves, don't
+	 * bother with the speculative path if mmap_lock is already write-locked
+	 * and take the slow path, which takes the lock.
+	 */
+	return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq);
+}
+
+static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
+{
+	return do_read_seqcount_retry(&mm->mm_lock_seq, seq);
+}
+
+#else /* CONFIG_PER_VMA_LOCK */
+
 static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
 static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
 static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
-#endif
+
+static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
+{
+	return false;
+}
+
+static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
+{
+	return true;
+}
+
+#endif /* CONFIG_PER_VMA_LOCK */
 
 static inline void mmap_init_lock(struct mm_struct *mm)
 {
-- 
2.47.0.338.g60cca15819-goog



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 3/3] mm: introduce mmap_lock_speculate_{try_begin|retry}
  2024-11-21 16:28 ` [PATCH v2 3/3] mm: introduce mmap_lock_speculate_{try_begin|retry} Suren Baghdasaryan
@ 2024-11-22 11:05   ` Peter Zijlstra
  2024-11-22 15:03     ` Suren Baghdasaryan
  2024-11-22 15:15   ` Liam R. Howlett
  1 sibling, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2024-11-22 11:05 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: akpm, andrii, jannh, Liam.Howlett, lorenzo.stoakes, vbabka,
	mhocko, shakeel.butt, hannes, david, willy, brauner, oleg, arnd,
	richard.weiyang, zhangpeng.00, linmiaohe, viro, hca, linux-mm,
	linux-kernel

On Thu, Nov 21, 2024 at 08:28:26AM -0800, Suren Baghdasaryan wrote:
> Add helper functions to speculatively perform operations without
> read-locking mmap_lock, expecting that mmap_lock will not be
> write-locked and mm is not modified from under us.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

Thanks for these, you're okay with me taking these through tip/perf/core
for the next cycle along with Andrii's uprobe patch?


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin
  2024-11-21 16:28 [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin Suren Baghdasaryan
  2024-11-21 16:28 ` [PATCH v2 2/3] mm: convert mm_lock_seq to a proper seqcount Suren Baghdasaryan
  2024-11-21 16:28 ` [PATCH v2 3/3] mm: introduce mmap_lock_speculate_{try_begin|retry} Suren Baghdasaryan
@ 2024-11-22 11:10 ` David Hildenbrand
  2024-11-22 11:19   ` Peter Zijlstra
  2024-11-22 15:13 ` Liam R. Howlett
  3 siblings, 1 reply; 11+ messages in thread
From: David Hildenbrand @ 2024-11-22 11:10 UTC (permalink / raw)
  To: Suren Baghdasaryan, akpm
  Cc: peterz, andrii, jannh, Liam.Howlett, lorenzo.stoakes, vbabka,
	mhocko, shakeel.butt, hannes, willy, brauner, oleg, arnd,
	richard.weiyang, zhangpeng.00, linmiaohe, viro, hca, linux-mm,
	linux-kernel

On 21.11.24 17:28, Suren Baghdasaryan wrote:
> Add raw_seqcount_try_begin() to opens a read critical section of the given
> seqcount_t if the counter is even. This enables eliding the critical
> section entirely if the counter is odd, instead of doing the speculation
> knowing it will fail.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>
> ---
> Applies over Linus' ToT
> 
>   include/linux/seqlock.h | 22 ++++++++++++++++++++++
>   1 file changed, 22 insertions(+)
> 
> diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
> index 5298765d6ca4..22c2c48b4265 100644
> --- a/include/linux/seqlock.h
> +++ b/include/linux/seqlock.h
> @@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
>   	__seq;								\
>   })
>   
> +/**
> + * raw_seqcount_try_begin() - begin a seqcount_t read critical section
> + *                            w/o lockdep and w/o counter stabilization
> + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
> + *
> + * Similar to raw_seqcount_begin(), except it enables eliding the critical
> + * section entirely if odd, instead of doing the speculation knowing it will
> + * fail.
> + *
> + * Useful when counter stabilization is more or less equivalent to taking
> + * the lock and there is a slowpath that does that.
> + *
> + * If true, start will be set to the (even) sequence count read.
> + *
> + * Return: true when a read critical section is started.
> + */
> +#define raw_seqcount_try_begin(s, start)				\
> +({									\
> +	start = raw_read_seqcount(s);					\
> +	!(start & 1);							\
> +})


In gup_fast(), we simply do

seq = raw_read_seqcount(&current->mm->write_protect_seq);
if (seq & 1)
	return 0;

Should we be using that there as well?

if (!raw_seqcount_try_begin(&current->mm->write_protect_seqs, seq))
	return 0;

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin
  2024-11-22 11:10 ` [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin David Hildenbrand
@ 2024-11-22 11:19   ` Peter Zijlstra
  2024-11-22 11:21     ` David Hildenbrand
  0 siblings, 1 reply; 11+ messages in thread
From: Peter Zijlstra @ 2024-11-22 11:19 UTC (permalink / raw)
  To: David Hildenbrand
  Cc: Suren Baghdasaryan, akpm, andrii, jannh, Liam.Howlett,
	lorenzo.stoakes, vbabka, mhocko, shakeel.butt, hannes, willy,
	brauner, oleg, arnd, richard.weiyang, zhangpeng.00, linmiaohe,
	viro, hca, linux-mm, linux-kernel

On Fri, Nov 22, 2024 at 12:10:29PM +0100, David Hildenbrand wrote:

> In gup_fast(), we simply do
> 
> seq = raw_read_seqcount(&current->mm->write_protect_seq);
> if (seq & 1)
> 	return 0;
> 
> Should we be using that there as well?
> 
> if (!raw_seqcount_try_begin(&current->mm->write_protect_seqs, seq))
> 	return 0;

Might as well. A quick grep doesn't find me another instance of this
pattern, but does find me something 'funny' in net/netfilter/x_tables.c.
Let's pretend I didn't see that for now ... *sigh*

Want me to stick a patch like this on, or do you want to do that later,
when the dust has settled?


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin
  2024-11-22 11:19   ` Peter Zijlstra
@ 2024-11-22 11:21     ` David Hildenbrand
  0 siblings, 0 replies; 11+ messages in thread
From: David Hildenbrand @ 2024-11-22 11:21 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Suren Baghdasaryan, akpm, andrii, jannh, Liam.Howlett,
	lorenzo.stoakes, vbabka, mhocko, shakeel.butt, hannes, willy,
	brauner, oleg, arnd, richard.weiyang, zhangpeng.00, linmiaohe,
	viro, hca, linux-mm, linux-kernel

On 22.11.24 12:19, Peter Zijlstra wrote:
> On Fri, Nov 22, 2024 at 12:10:29PM +0100, David Hildenbrand wrote:
> 
>> In gup_fast(), we simply do
>>
>> seq = raw_read_seqcount(&current->mm->write_protect_seq);
>> if (seq & 1)
>> 	return 0;
>>
>> Should we be using that there as well?
>>
>> if (!raw_seqcount_try_begin(&current->mm->write_protect_seqs, seq))
>> 	return 0;
> 
> Might as well. A quick grep doesn't find me another instance of this
> pattern, but does find me something 'funny' in net/netfilter/x_tables.c.
> Let's pretend I didn't see that for now ... *sigh*

:)

I'm also not 100% sure about barriers in gup_fast() around the 
raw_seqcount .... and I'm pretending I didn't see any of that as well ...

> 
> Want me to stick a patch like this on, or do you want to do that later,
> when the dust has settled?

Feel free to add a patch to just change that as well, it's an easy change.

For this patch

Reviewed-by: David Hildenbrand <david@redhat.com>

-- 
Cheers,

David / dhildenb



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 3/3] mm: introduce mmap_lock_speculate_{try_begin|retry}
  2024-11-22 11:05   ` Peter Zijlstra
@ 2024-11-22 15:03     ` Suren Baghdasaryan
  0 siblings, 0 replies; 11+ messages in thread
From: Suren Baghdasaryan @ 2024-11-22 15:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: akpm, andrii, jannh, Liam.Howlett, lorenzo.stoakes, vbabka,
	mhocko, shakeel.butt, hannes, david, willy, brauner, oleg, arnd,
	richard.weiyang, zhangpeng.00, linmiaohe, viro, hca, linux-mm,
	linux-kernel

On Fri, Nov 22, 2024 at 3:06 AM Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Nov 21, 2024 at 08:28:26AM -0800, Suren Baghdasaryan wrote:
> > Add helper functions to speculatively perform operations without
> > read-locking mmap_lock, expecting that mmap_lock will not be
> > write-locked and mm is not modified from under us.
> >
> > Suggested-by: Peter Zijlstra <peterz@infradead.org>
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
>
> Thanks for these, you're okay with me taking these through tip/perf/core
> for the next cycle along with Andrii's uprobe patch?

Yes, I'm fine with that plan. Thank you!


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin
  2024-11-21 16:28 [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin Suren Baghdasaryan
                   ` (2 preceding siblings ...)
  2024-11-22 11:10 ` [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin David Hildenbrand
@ 2024-11-22 15:13 ` Liam R. Howlett
  3 siblings, 0 replies; 11+ messages in thread
From: Liam R. Howlett @ 2024-11-22 15:13 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: akpm, peterz, andrii, jannh, lorenzo.stoakes, vbabka, mhocko,
	shakeel.butt, hannes, david, willy, brauner, oleg, arnd,
	richard.weiyang, zhangpeng.00, linmiaohe, viro, hca, linux-mm,
	linux-kernel

* Suren Baghdasaryan <surenb@google.com> [241121 11:28]:
> Add raw_seqcount_try_begin() to opens a read critical section of the given
> seqcount_t if the counter is even. This enables eliding the critical
> section entirely if the counter is odd, instead of doing the speculation
> knowing it will fail.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>

> ---
> Applies over Linus' ToT
> 
>  include/linux/seqlock.h | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
> index 5298765d6ca4..22c2c48b4265 100644
> --- a/include/linux/seqlock.h
> +++ b/include/linux/seqlock.h
> @@ -318,6 +318,28 @@ SEQCOUNT_LOCKNAME(mutex,        struct mutex,    true,     mutex)
>  	__seq;								\
>  })
>  
> +/**
> + * raw_seqcount_try_begin() - begin a seqcount_t read critical section
> + *                            w/o lockdep and w/o counter stabilization
> + * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
> + *
> + * Similar to raw_seqcount_begin(), except it enables eliding the critical
> + * section entirely if odd, instead of doing the speculation knowing it will
> + * fail.
> + *
> + * Useful when counter stabilization is more or less equivalent to taking
> + * the lock and there is a slowpath that does that.
> + *
> + * If true, start will be set to the (even) sequence count read.
> + *
> + * Return: true when a read critical section is started.
> + */
> +#define raw_seqcount_try_begin(s, start)				\
> +({									\
> +	start = raw_read_seqcount(s);					\
> +	!(start & 1);							\
> +})
> +
>  /**
>   * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
>   *                        lockdep and w/o counter stabilization
> 
> base-commit: 43fb83c17ba2d63dfb798f0be7453ed55ca3f9c2
> -- 
> 2.47.0.338.g60cca15819-goog
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 2/3] mm: convert mm_lock_seq to a proper seqcount
  2024-11-21 16:28 ` [PATCH v2 2/3] mm: convert mm_lock_seq to a proper seqcount Suren Baghdasaryan
@ 2024-11-22 15:14   ` Liam R. Howlett
  0 siblings, 0 replies; 11+ messages in thread
From: Liam R. Howlett @ 2024-11-22 15:14 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: akpm, peterz, andrii, jannh, lorenzo.stoakes, vbabka, mhocko,
	shakeel.butt, hannes, david, willy, brauner, oleg, arnd,
	richard.weiyang, zhangpeng.00, linmiaohe, viro, hca, linux-mm,
	linux-kernel

* Suren Baghdasaryan <surenb@google.com> [241121 11:28]:
> Convert mm_lock_seq to be seqcount_t and change all mmap_write_lock
> variants to increment it, in-line with the usual seqcount usage pattern.
> This lets us check whether the mmap_lock is write-locked by checking
> mm_lock_seq.sequence counter (odd=locked, even=unlocked). This will be
> used when implementing mmap_lock speculation functions.
> As a result vm_lock_seq is also change to be unsigned to match the type
> of mm_lock_seq.sequence.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>

> ---
> Changes since v1 [1]
> - Added ASSERT_EXCLUSIVE_WRITER() instead of a comment in
> vma_end_write_all, per Peter Zijlstra
> 
> [1] https://lore.kernel.org/all/20241024205231.1944747-1-surenb@google.com/
> 
>  include/linux/mm.h               | 12 +++----
>  include/linux/mm_types.h         |  7 ++--
>  include/linux/mmap_lock.h        | 55 +++++++++++++++++++++-----------
>  kernel/fork.c                    |  5 +--
>  mm/init-mm.c                     |  2 +-
>  tools/testing/vma/vma.c          |  4 +--
>  tools/testing/vma/vma_internal.h |  4 +--
>  7 files changed, 53 insertions(+), 36 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index feb5c8021bef..e6de22738ee1 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -710,7 +710,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
>  	 * we don't rely on for anything - the mm_lock_seq read against which we
>  	 * need ordering is below.
>  	 */
> -	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
> +	if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq.sequence))
>  		return false;
>  
>  	if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
> @@ -727,7 +727,7 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
>  	 * after it has been unlocked.
>  	 * This pairs with RELEASE semantics in vma_end_write_all().
>  	 */
> -	if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
> +	if (unlikely(vma->vm_lock_seq == raw_read_seqcount(&vma->vm_mm->mm_lock_seq))) {
>  		up_read(&vma->vm_lock->lock);
>  		return false;
>  	}
> @@ -742,7 +742,7 @@ static inline void vma_end_read(struct vm_area_struct *vma)
>  }
>  
>  /* WARNING! Can only be used if mmap_lock is expected to be write-locked */
> -static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
> +static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_lock_seq)
>  {
>  	mmap_assert_write_locked(vma->vm_mm);
>  
> @@ -750,7 +750,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
>  	 * current task is holding mmap_write_lock, both vma->vm_lock_seq and
>  	 * mm->mm_lock_seq can't be concurrently modified.
>  	 */
> -	*mm_lock_seq = vma->vm_mm->mm_lock_seq;
> +	*mm_lock_seq = vma->vm_mm->mm_lock_seq.sequence;
>  	return (vma->vm_lock_seq == *mm_lock_seq);
>  }
>  
> @@ -761,7 +761,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
>   */
>  static inline void vma_start_write(struct vm_area_struct *vma)
>  {
> -	int mm_lock_seq;
> +	unsigned int mm_lock_seq;
>  
>  	if (__is_vma_write_locked(vma, &mm_lock_seq))
>  		return;
> @@ -779,7 +779,7 @@ static inline void vma_start_write(struct vm_area_struct *vma)
>  
>  static inline void vma_assert_write_locked(struct vm_area_struct *vma)
>  {
> -	int mm_lock_seq;
> +	unsigned int mm_lock_seq;
>  
>  	VM_BUG_ON_VMA(!__is_vma_write_locked(vma, &mm_lock_seq), vma);
>  }
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 381d22eba088..ac72888a54b8 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -715,7 +715,7 @@ struct vm_area_struct {
>  	 * counter reuse can only lead to occasional unnecessary use of the
>  	 * slowpath.
>  	 */
> -	int vm_lock_seq;
> +	unsigned int vm_lock_seq;
>  	/* Unstable RCU readers are allowed to read this. */
>  	struct vma_lock *vm_lock;
>  #endif
> @@ -909,6 +909,9 @@ struct mm_struct {
>  		 * Roughly speaking, incrementing the sequence number is
>  		 * equivalent to releasing locks on VMAs; reading the sequence
>  		 * number can be part of taking a read lock on a VMA.
> +		 * Incremented every time mmap_lock is write-locked/unlocked.
> +		 * Initialized to 0, therefore odd values indicate mmap_lock
> +		 * is write-locked and even values that it's released.
>  		 *
>  		 * Can be modified under write mmap_lock using RELEASE
>  		 * semantics.
> @@ -917,7 +920,7 @@ struct mm_struct {
>  		 * Can be read with ACQUIRE semantics if not holding write
>  		 * mmap_lock.
>  		 */
> -		int mm_lock_seq;
> +		seqcount_t mm_lock_seq;
>  #endif
>  
>  
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index de9dc20b01ba..083b7fa2588e 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -71,39 +71,38 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
>  }
>  
>  #ifdef CONFIG_PER_VMA_LOCK
> -/*
> - * Drop all currently-held per-VMA locks.
> - * This is called from the mmap_lock implementation directly before releasing
> - * a write-locked mmap_lock (or downgrading it to read-locked).
> - * This should normally NOT be called manually from other places.
> - * If you want to call this manually anyway, keep in mind that this will release
> - * *all* VMA write locks, including ones from further up the stack.
> - */
> -static inline void vma_end_write_all(struct mm_struct *mm)
> +static inline void mm_lock_seqcount_init(struct mm_struct *mm)
>  {
> -	mmap_assert_write_locked(mm);
> -	/*
> -	 * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
> -	 * mmap_lock being held.
> -	 * We need RELEASE semantics here to ensure that preceding stores into
> -	 * the VMA take effect before we unlock it with this store.
> -	 * Pairs with ACQUIRE semantics in vma_start_read().
> -	 */
> -	smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
> +	seqcount_init(&mm->mm_lock_seq);
> +}
> +
> +static inline void mm_lock_seqcount_begin(struct mm_struct *mm)
> +{
> +	do_raw_write_seqcount_begin(&mm->mm_lock_seq);
> +}
> +
> +static inline void mm_lock_seqcount_end(struct mm_struct *mm)
> +{
> +	do_raw_write_seqcount_end(&mm->mm_lock_seq);
>  }
> +
>  #else
> -static inline void vma_end_write_all(struct mm_struct *mm) {}
> +static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
> +static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
> +static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
>  #endif
>  
>  static inline void mmap_init_lock(struct mm_struct *mm)
>  {
>  	init_rwsem(&mm->mmap_lock);
> +	mm_lock_seqcount_init(mm);
>  }
>  
>  static inline void mmap_write_lock(struct mm_struct *mm)
>  {
>  	__mmap_lock_trace_start_locking(mm, true);
>  	down_write(&mm->mmap_lock);
> +	mm_lock_seqcount_begin(mm);
>  	__mmap_lock_trace_acquire_returned(mm, true, true);
>  }
>  
> @@ -111,6 +110,7 @@ static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
>  {
>  	__mmap_lock_trace_start_locking(mm, true);
>  	down_write_nested(&mm->mmap_lock, subclass);
> +	mm_lock_seqcount_begin(mm);
>  	__mmap_lock_trace_acquire_returned(mm, true, true);
>  }
>  
> @@ -120,10 +120,27 @@ static inline int mmap_write_lock_killable(struct mm_struct *mm)
>  
>  	__mmap_lock_trace_start_locking(mm, true);
>  	ret = down_write_killable(&mm->mmap_lock);
> +	if (!ret)
> +		mm_lock_seqcount_begin(mm);
>  	__mmap_lock_trace_acquire_returned(mm, true, ret == 0);
>  	return ret;
>  }
>  
> +/*
> + * Drop all currently-held per-VMA locks.
> + * This is called from the mmap_lock implementation directly before releasing
> + * a write-locked mmap_lock (or downgrading it to read-locked).
> + * This should normally NOT be called manually from other places.
> + * If you want to call this manually anyway, keep in mind that this will release
> + * *all* VMA write locks, including ones from further up the stack.
> + */
> +static inline void vma_end_write_all(struct mm_struct *mm)
> +{
> +	mmap_assert_write_locked(mm);
> +	ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq);
> +	mm_lock_seqcount_end(mm);
> +}
> +
>  static inline void mmap_write_unlock(struct mm_struct *mm)
>  {
>  	__mmap_lock_trace_released(mm, true);
> diff --git a/kernel/fork.c b/kernel/fork.c
> index e58d27c05788..8cd36645b9fc 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -449,7 +449,7 @@ static bool vma_lock_alloc(struct vm_area_struct *vma)
>  		return false;
>  
>  	init_rwsem(&vma->vm_lock->lock);
> -	vma->vm_lock_seq = -1;
> +	vma->vm_lock_seq = UINT_MAX;
>  
>  	return true;
>  }
> @@ -1262,9 +1262,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
>  	seqcount_init(&mm->write_protect_seq);
>  	mmap_init_lock(mm);
>  	INIT_LIST_HEAD(&mm->mmlist);
> -#ifdef CONFIG_PER_VMA_LOCK
> -	mm->mm_lock_seq = 0;
> -#endif
>  	mm_pgtables_bytes_init(mm);
>  	mm->map_count = 0;
>  	mm->locked_vm = 0;
> diff --git a/mm/init-mm.c b/mm/init-mm.c
> index 24c809379274..6af3ad675930 100644
> --- a/mm/init-mm.c
> +++ b/mm/init-mm.c
> @@ -40,7 +40,7 @@ struct mm_struct init_mm = {
>  	.arg_lock	=  __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
>  	.mmlist		= LIST_HEAD_INIT(init_mm.mmlist),
>  #ifdef CONFIG_PER_VMA_LOCK
> -	.mm_lock_seq	= 0,
> +	.mm_lock_seq	= SEQCNT_ZERO(init_mm.mm_lock_seq),
>  #endif
>  	.user_ns	= &init_user_ns,
>  	.cpu_bitmap	= CPU_BITS_NONE,
> diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
> index b33b47342d41..9074aaced9c5 100644
> --- a/tools/testing/vma/vma.c
> +++ b/tools/testing/vma/vma.c
> @@ -87,7 +87,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
>  	 * begun. Linking to the tree will have caused this to be incremented,
>  	 * which means we will get a false positive otherwise.
>  	 */
> -	vma->vm_lock_seq = -1;
> +	vma->vm_lock_seq = UINT_MAX;
>  
>  	return vma;
>  }
> @@ -212,7 +212,7 @@ static bool vma_write_started(struct vm_area_struct *vma)
>  	int seq = vma->vm_lock_seq;
>  
>  	/* We reset after each check. */
> -	vma->vm_lock_seq = -1;
> +	vma->vm_lock_seq = UINT_MAX;
>  
>  	/* The vma_start_write() stub simply increments this value. */
>  	return seq > -1;
> diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
> index c5b9da034511..4007ec580f85 100644
> --- a/tools/testing/vma/vma_internal.h
> +++ b/tools/testing/vma/vma_internal.h
> @@ -231,7 +231,7 @@ struct vm_area_struct {
>  	 * counter reuse can only lead to occasional unnecessary use of the
>  	 * slowpath.
>  	 */
> -	int vm_lock_seq;
> +	unsigned int vm_lock_seq;
>  	struct vma_lock *vm_lock;
>  #endif
>  
> @@ -406,7 +406,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma)
>  		return false;
>  
>  	init_rwsem(&vma->vm_lock->lock);
> -	vma->vm_lock_seq = -1;
> +	vma->vm_lock_seq = UINT_MAX;
>  
>  	return true;
>  }
> -- 
> 2.47.0.338.g60cca15819-goog
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2 3/3] mm: introduce mmap_lock_speculate_{try_begin|retry}
  2024-11-21 16:28 ` [PATCH v2 3/3] mm: introduce mmap_lock_speculate_{try_begin|retry} Suren Baghdasaryan
  2024-11-22 11:05   ` Peter Zijlstra
@ 2024-11-22 15:15   ` Liam R. Howlett
  1 sibling, 0 replies; 11+ messages in thread
From: Liam R. Howlett @ 2024-11-22 15:15 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: akpm, peterz, andrii, jannh, lorenzo.stoakes, vbabka, mhocko,
	shakeel.butt, hannes, david, willy, brauner, oleg, arnd,
	richard.weiyang, zhangpeng.00, linmiaohe, viro, hca, linux-mm,
	linux-kernel

* Suren Baghdasaryan <surenb@google.com> [241121 11:28]:
> Add helper functions to speculatively perform operations without
> read-locking mmap_lock, expecting that mmap_lock will not be
> write-locked and mm is not modified from under us.
> 
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>

> ---
> Changes since v1 [1]
> - Changed to use new raw_seqcount_try_begin() API, per Peter Zijlstra
> - Renamed mmap_lock_speculation_{begin|end} into
> mmap_lock_speculate_{try_begin|retry}, per Peter Zijlstra
> 
> Note: the return value of mmap_lock_speculate_retry() is opposive to
> what it was in mmap_lock_speculation_end(). true now means speculation failed.
> 
> [1] https://lore.kernel.org/all/20241024205231.1944747-2-surenb@google.com/
> 
>  include/linux/mmap_lock.h | 33 +++++++++++++++++++++++++++++++--
>  1 file changed, 31 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
> index 083b7fa2588e..0b39a0f99a3b 100644
> --- a/include/linux/mmap_lock.h
> +++ b/include/linux/mmap_lock.h
> @@ -71,6 +71,7 @@ static inline void mmap_assert_write_locked(const struct mm_struct *mm)
>  }
>  
>  #ifdef CONFIG_PER_VMA_LOCK
> +
>  static inline void mm_lock_seqcount_init(struct mm_struct *mm)
>  {
>  	seqcount_init(&mm->mm_lock_seq);
> @@ -86,11 +87,39 @@ static inline void mm_lock_seqcount_end(struct mm_struct *mm)
>  	do_raw_write_seqcount_end(&mm->mm_lock_seq);
>  }
>  
> -#else
> +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
> +{
> +	/*
> +	 * Since mmap_lock is a sleeping lock, and waiting for it to become
> +	 * unlocked is more or less equivalent with taking it ourselves, don't
> +	 * bother with the speculative path if mmap_lock is already write-locked
> +	 * and take the slow path, which takes the lock.
> +	 */
> +	return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq);
> +}
> +
> +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
> +{
> +	return do_read_seqcount_retry(&mm->mm_lock_seq, seq);
> +}
> +
> +#else /* CONFIG_PER_VMA_LOCK */
> +
>  static inline void mm_lock_seqcount_init(struct mm_struct *mm) {}
>  static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {}
>  static inline void mm_lock_seqcount_end(struct mm_struct *mm) {}
> -#endif
> +
> +static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq)
> +{
> +	return false;
> +}
> +
> +static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq)
> +{
> +	return true;
> +}
> +
> +#endif /* CONFIG_PER_VMA_LOCK */
>  
>  static inline void mmap_init_lock(struct mm_struct *mm)
>  {
> -- 
> 2.47.0.338.g60cca15819-goog
> 
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-11-22 15:15 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-11-21 16:28 [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin Suren Baghdasaryan
2024-11-21 16:28 ` [PATCH v2 2/3] mm: convert mm_lock_seq to a proper seqcount Suren Baghdasaryan
2024-11-22 15:14   ` Liam R. Howlett
2024-11-21 16:28 ` [PATCH v2 3/3] mm: introduce mmap_lock_speculate_{try_begin|retry} Suren Baghdasaryan
2024-11-22 11:05   ` Peter Zijlstra
2024-11-22 15:03     ` Suren Baghdasaryan
2024-11-22 15:15   ` Liam R. Howlett
2024-11-22 11:10 ` [PATCH v2 1/3] seqlock: add raw_seqcount_try_begin David Hildenbrand
2024-11-22 11:19   ` Peter Zijlstra
2024-11-22 11:21     ` David Hildenbrand
2024-11-22 15:13 ` Liam R. Howlett

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox