linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Lorenzo Stoakes (Oracle)" <ljs@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: "Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Vlastimil Babka <vbabka@kernel.org>,
	Mike Rapoport <rppt@kernel.org>,
	Suren Baghdasaryan <surenb@google.com>,
	Michal Hocko <mhocko@suse.com>, Jann Horn <jannh@google.com>,
	Pedro Falcato <pfalcato@suse.de>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Jianzhou Zhao <luckd0g@163.com>,
	Oscar Salvador <osalvador@suse.de>
Subject: [PATCH 3/3] mm/mremap: check map count under mmap write lock and abstract
Date: Wed, 11 Mar 2026 17:24:38 +0000	[thread overview]
Message-ID: <18be0b48eaa8e8804eb745974ee729c3ade0c687.1773249037.git.ljs@kernel.org> (raw)
In-Reply-To: <cover.1773249037.git.ljs@kernel.org>

We are checking the mmap count in check_mremap_params(), prior to obtaining
an mmap write lock, which means that accesses to current->mm->map_count
might race with this field being updated.

Resolve this by only checking this field after the mmap write lock is held.

Additionally, abstract this check into a helper function with extensive
ASCII documentation of what's going on.

Reported-by: Jianzhou Zhao <luckd0g@163.com>
Closes: https://lore.kernel.org/all/1a7d4c26.6b46.19cdbe7eaf0.Coremail.luckd0g@163.com/
Signed-off-by: Lorenzo Stoakes (Oracle) <ljs@kernel.org>
---
 mm/mremap.c | 88 +++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 75 insertions(+), 13 deletions(-)

diff --git a/mm/mremap.c b/mm/mremap.c
index ba6c690f6c1b..ee46bbb031e6 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1028,6 +1028,75 @@ static void vrm_stat_account(struct vma_remap_struct *vrm,
 		mm->locked_vm += pages;
 }
 
+static bool __check_map_count_against_split(struct mm_struct *mm,
+					    bool before_unmaps)
+{
+	const int sys_map_count = get_sysctl_max_map_count();
+	int map_count = mm->map_count;
+
+	mmap_assert_write_locked(mm);
+
+	/*
+	 * At the point of shrinking the VMA, if new_len < old_len, we unmap
+	 * thusly in the worst case:
+	 *
+	 *              old_addr+old_len                    old_addr+old_len
+	 * |---------------.----.---------|    |---------------|    |---------|
+	 * |               .    .         | -> |      +1       | -1 |   +1    |
+	 * |---------------.----.---------|    |---------------|    |---------|
+	 *        old_addr+new_len                     old_addr+new_len
+	 *
+	 * At the point of removing the portion of an existing VMA to make space
+	 * for the moved VMA if MREMAP_FIXED, we unmap thusly in the worst case:
+	 *
+	 *   new_addr   new_addr+new_len         new_addr   new_addr+new_len
+	 * |----.---------------.---------|    |----|               |---------|
+	 * |    .               .         | -> | +1 |      -1       |   +1    |
+	 * |----.---------------.---------|    |----|               |---------|
+	 *
+	 * Therefore, before we consider the move anything, we have to account
+	 * for 2 additional VMAs possibly being created upon these unmappings.
+	 */
+	if (before_unmaps)
+		map_count += 2;
+
+	/*
+	 * At the point of MOVING the VMA:
+	 *
+	 * We start by copying a VMA, which creates an additional VMA if no
+	 * merge occurs, then if not MREMAP_DONTUNMAP, we unmap the source VMA.
+	 * In the worst case we might then observe:
+	 *
+	 *   new_addr   new_addr+new_len         new_addr   new_addr+new_len
+	 * |----|               |---------|    |----|---------------|---------|
+	 * |    |               |         | -> |    |      +1       |         |
+	 * |----|               |---------|    |----|---------------|---------|
+	 *
+	 *   old_addr   old_addr+old_len         old_addr   old_addr+old_len
+	 * |----.---------------.---------|    |----|               |---------|
+	 * |    .               .         | -> | +1 |      -1       |   +1    |
+	 * |----.---------------.---------|    |----|               |---------|
+	 *
+	 * Therefore we must check to ensure we have headroom of 2 additional
+	 * VMAs.
+	 */
+	return map_count + 2 <= sys_map_count;
+}
+
+/* Do we violate the map count limit if we split VMAs when moving the VMA? */
+static bool check_map_count_against_split(void)
+{
+	return __check_map_count_against_split(current->mm,
+					       /*before_unmaps=*/false);
+}
+
+/* Do we violate the map count limit if we split VMAs prior to early unmaps? */
+static bool check_map_count_against_split_early(void)
+{
+	return __check_map_count_against_split(current->mm,
+					       /*before_unmaps=*/true);
+}
+
 /*
  * Perform checks before attempting to write a VMA prior to it being
  * moved.
@@ -1045,7 +1114,7 @@ static unsigned long prep_move_vma(struct vma_remap_struct *vrm)
 	 * which may not merge, then (if MREMAP_DONTUNMAP is not set) unmap the
 	 * source, which may split, causing a net increase of 2 mappings.
 	 */
-	if (current->mm->map_count + 2 > get_sysctl_max_map_count())
+	if (!check_map_count_against_split())
 		return -ENOMEM;
 
 	if (vma->vm_ops && vma->vm_ops->may_split) {
@@ -1804,18 +1873,6 @@ static unsigned long check_mremap_params(struct vma_remap_struct *vrm)
 	if (vrm_overlaps(vrm))
 		return -EINVAL;
 
-	/*
-	 * We may unmap twice before invoking move_vma(), that is if new_len <
-	 * old_len (shrinking), and in the MREMAP_FIXED case, unmapping part of
-	 * a VMA located at the destination.
-	 *
-	 * In the worst case, both unmappings will cause splits, resulting in a
-	 * net increased map count of 2. In move_vma() we check for headroom of
-	 * 2 additional mappings, so check early to avoid bailing out then.
-	 */
-	if (current->mm->map_count + 4 > get_sysctl_max_map_count())
-		return -ENOMEM;
-
 	return 0;
 }
 
@@ -1925,6 +1982,11 @@ static unsigned long do_mremap(struct vma_remap_struct *vrm)
 		return -EINTR;
 	vrm->mmap_locked = true;
 
+	if (!check_map_count_against_split_early()) {
+		mmap_write_unlock(mm);
+		return -ENOMEM;
+	}
+
 	if (vrm_move_only(vrm)) {
 		res = remap_move(vrm);
 	} else {
-- 
2.53.0



  parent reply	other threads:[~2026-03-11 17:26 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-11 17:24 [PATCH 0/3] mm: improve map count checks Lorenzo Stoakes (Oracle)
2026-03-11 17:24 ` [PATCH 1/3] mm/mremap: correct invalid map count check Lorenzo Stoakes (Oracle)
2026-03-27  9:19   ` Pedro Falcato
2026-03-11 17:24 ` [PATCH 2/3] mm: abstract reading sysctl_max_map_count, and READ_ONCE() Lorenzo Stoakes (Oracle)
2026-03-27  9:20   ` Pedro Falcato
2026-03-11 17:24 ` Lorenzo Stoakes (Oracle) [this message]
2026-03-27  9:22   ` [PATCH 3/3] mm/mremap: check map count under mmap write lock and abstract Pedro Falcato
2026-03-27  9:58     ` Lorenzo Stoakes (Oracle)
2026-03-27  5:42 ` [PATCH 0/3] mm: improve map count checks Andrew Morton
2026-03-27  9:23   ` Pedro Falcato

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=18be0b48eaa8e8804eb745974ee729c3ade0c687.1773249037.git.ljs@kernel.org \
    --to=ljs@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=jannh@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luckd0g@163.com \
    --cc=mhocko@suse.com \
    --cc=osalvador@suse.de \
    --cc=pfalcato@suse.de \
    --cc=rppt@kernel.org \
    --cc=surenb@google.com \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox