From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>,
linux-mm@kvack.org, Suren Baghdasaryan <surenb@google.com>,
Chris Li <chriscli@google.com>,
"Liam R. Howlett" <Liam.Howlett@oracle.com>,
Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
Vlastimil Babka <vbabka@suse.cz>,
Shakeel Butt <shakeel.butt@linux.dev>,
Jann Horn <jannh@google.com>, Pedro Falcato <pfalcato@suse.de>
Subject: [PATCH v2 1/2] mm: Add vma_start_write_killable()
Date: Mon, 10 Nov 2025 20:32:01 +0000 [thread overview]
Message-ID: <20251110203204.1454057-2-willy@infradead.org> (raw)
In-Reply-To: <20251110203204.1454057-1-willy@infradead.org>
The vma can be held read-locked for a substantial period of time, eg if
memory allocation needs to go into reclaim. It's useful to be able to
send fatal signals to threads which are waiting for the write lock.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Suren Baghdasaryan <surenb@google.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
Documentation/mm/process_addrs.rst | 9 +++++++-
include/linux/mmap_lock.h | 30 ++++++++++++++++++++++++--
mm/mmap_lock.c | 34 ++++++++++++++++++++++--------
tools/testing/vma/vma_internal.h | 8 +++++++
4 files changed, 69 insertions(+), 12 deletions(-)
diff --git a/Documentation/mm/process_addrs.rst b/Documentation/mm/process_addrs.rst
index be49e2a269e4..7f2f3e87071d 100644
--- a/Documentation/mm/process_addrs.rst
+++ b/Documentation/mm/process_addrs.rst
@@ -48,7 +48,8 @@ Terminology
* **VMA locks** - The VMA lock is at VMA granularity (of course) which behaves
as a read/write semaphore in practice. A VMA read lock is obtained via
:c:func:`!lock_vma_under_rcu` (and unlocked via :c:func:`!vma_end_read`) and a
- write lock via :c:func:`!vma_start_write` (all VMA write locks are unlocked
+ write lock via vma_start_write() or vma_start_write_killable()
+ (all VMA write locks are unlocked
automatically when the mmap write lock is released). To take a VMA write lock
you **must** have already acquired an :c:func:`!mmap_write_lock`.
* **rmap locks** - When trying to access VMAs through the reverse mapping via a
@@ -907,3 +908,9 @@ Stack expansion
Stack expansion throws up additional complexities in that we cannot permit there
to be racing page faults, as a result we invoke :c:func:`!vma_start_write` to
prevent this in :c:func:`!expand_downwards` or :c:func:`!expand_upwards`.
+
+------------------------
+Functions and structures
+------------------------
+
+.. kernel-doc:: include/linux/mmap_lock.h
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 2c9fffa58714..378dfb9f1335 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -195,7 +195,8 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, unsigned int *mm_l
return (vma->vm_lock_seq == *mm_lock_seq);
}
-void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq);
+int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq,
+ int state);
/*
* Begin writing to a VMA.
@@ -209,7 +210,30 @@ static inline void vma_start_write(struct vm_area_struct *vma)
if (__is_vma_write_locked(vma, &mm_lock_seq))
return;
- __vma_start_write(vma, mm_lock_seq);
+ __vma_start_write(vma, mm_lock_seq, TASK_UNINTERRUPTIBLE);
+}
+
+/**
+ * vma_start_write_killable - Begin writing to a VMA.
+ * @vma: The VMA we are going to modify.
+ *
+ * Exclude concurrent readers under the per-VMA lock until the currently
+ * write-locked mmap_lock is dropped or downgraded.
+ *
+ * Context: May sleep while waiting for readers to drop the vma read lock.
+ * Caller must already hold the mmap_lock for write.
+ *
+ * Return: 0 for a successful acquisition. -EINTR if a fatal signal was
+ * received.
+ */
+static inline __must_check
+int vma_start_write_killable(struct vm_area_struct *vma)
+{
+ unsigned int mm_lock_seq;
+
+ if (__is_vma_write_locked(vma, &mm_lock_seq))
+ return 0;
+ return __vma_start_write(vma, mm_lock_seq, TASK_KILLABLE);
}
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
@@ -286,6 +310,8 @@ static inline struct vm_area_struct *vma_start_read(struct mm_struct *mm,
{ return NULL; }
static inline void vma_end_read(struct vm_area_struct *vma) {}
static inline void vma_start_write(struct vm_area_struct *vma) {}
+static inline __must_check
+int vma_start_write_killable(struct vm_area_struct *vma) { return 0; }
static inline void vma_assert_write_locked(struct vm_area_struct *vma)
{ mmap_assert_write_locked(vma->vm_mm); }
static inline void vma_assert_attached(struct vm_area_struct *vma) {}
diff --git a/mm/mmap_lock.c b/mm/mmap_lock.c
index 0a0db5849b8e..39f341caf32c 100644
--- a/mm/mmap_lock.c
+++ b/mm/mmap_lock.c
@@ -45,8 +45,15 @@ EXPORT_SYMBOL(__mmap_lock_do_trace_released);
#ifdef CONFIG_MMU
#ifdef CONFIG_PER_VMA_LOCK
-static inline bool __vma_enter_locked(struct vm_area_struct *vma, bool detaching)
+/*
+ * Return value: 0 if vma detached,
+ * 1 if vma attached with no readers,
+ * -EINTR if signal received,
+ */
+static inline int __vma_enter_locked(struct vm_area_struct *vma,
+ bool detaching, int state)
{
+ int err;
unsigned int tgt_refcnt = VMA_LOCK_OFFSET;
/* Additional refcnt if the vma is attached. */
@@ -58,15 +65,19 @@ static inline bool __vma_enter_locked(struct vm_area_struct *vma, bool detaching
* vm_refcnt. mmap_write_lock prevents racing with vma_mark_attached().
*/
if (!refcount_add_not_zero(VMA_LOCK_OFFSET, &vma->vm_refcnt))
- return false;
+ return 0;
rwsem_acquire(&vma->vmlock_dep_map, 0, 0, _RET_IP_);
- rcuwait_wait_event(&vma->vm_mm->vma_writer_wait,
+ err = rcuwait_wait_event(&vma->vm_mm->vma_writer_wait,
refcount_read(&vma->vm_refcnt) == tgt_refcnt,
- TASK_UNINTERRUPTIBLE);
+ state);
+ if (err) {
+ rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
+ return err;
+ }
lock_acquired(&vma->vmlock_dep_map, _RET_IP_);
- return true;
+ return 1;
}
static inline void __vma_exit_locked(struct vm_area_struct *vma, bool *detached)
@@ -75,16 +86,19 @@ static inline void __vma_exit_locked(struct vm_area_struct *vma, bool *detached)
rwsem_release(&vma->vmlock_dep_map, _RET_IP_);
}
-void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq)
+int __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq,
+ int state)
{
- bool locked;
+ int locked;
/*
* __vma_enter_locked() returns false immediately if the vma is not
* attached, otherwise it waits until refcnt is indicating that vma
* is attached with no readers.
*/
- locked = __vma_enter_locked(vma, false);
+ locked = __vma_enter_locked(vma, false, state);
+ if (locked < 0)
+ return locked;
/*
* We should use WRITE_ONCE() here because we can have concurrent reads
@@ -100,6 +114,8 @@ void __vma_start_write(struct vm_area_struct *vma, unsigned int mm_lock_seq)
__vma_exit_locked(vma, &detached);
WARN_ON_ONCE(detached); /* vma should remain attached */
}
+
+ return 0;
}
EXPORT_SYMBOL_GPL(__vma_start_write);
@@ -118,7 +134,7 @@ void vma_mark_detached(struct vm_area_struct *vma)
*/
if (unlikely(!refcount_dec_and_test(&vma->vm_refcnt))) {
/* Wait until vma is detached with no readers. */
- if (__vma_enter_locked(vma, true)) {
+ if (__vma_enter_locked(vma, true, TASK_UNINTERRUPTIBLE)) {
bool detached;
__vma_exit_locked(vma, &detached);
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index dc976a285ad2..917062cfbc69 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -844,6 +844,14 @@ static inline void vma_start_write(struct vm_area_struct *vma)
vma->vm_lock_seq++;
}
+static inline __must_check
+int vma_start_write_killable(struct vm_area_struct *vma)
+{
+ /* Used to indicate to tests that a write operation has begun. */
+ vma->vm_lock_seq++;
+ return 0;
+}
+
static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
--
2.47.2
next prev parent reply other threads:[~2025-11-10 20:32 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-10 20:32 [PATCH v2 0/2] vma_start_write_killable Matthew Wilcox (Oracle)
2025-11-10 20:32 ` Matthew Wilcox (Oracle) [this message]
2025-11-11 8:58 ` [PATCH v2 1/2] mm: Add vma_start_write_killable() Vlastimil Babka
2025-11-13 13:20 ` Lorenzo Stoakes
2025-11-13 14:40 ` Matthew Wilcox
2025-11-17 19:50 ` Suren Baghdasaryan
2025-11-10 20:32 ` [PATCH v2 2/2] mm: Use vma_start_write_killable() in dup_mmap() Matthew Wilcox (Oracle)
2025-11-11 9:16 ` Vlastimil Babka
2025-11-13 13:25 ` Lorenzo Stoakes
2025-11-13 14:30 ` Matthew Wilcox
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20251110203204.1454057-2-willy@infradead.org \
--to=willy@infradead.org \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=chriscli@google.com \
--cc=jannh@google.com \
--cc=linux-mm@kvack.org \
--cc=lorenzo.stoakes@oracle.com \
--cc=pfalcato@suse.de \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox