From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Suren Baghdasaryan <surenb@google.com>,
"Liam R . Howlett" <Liam.Howlett@oracle.com>,
Vlastimil Babka <vbabka@suse.cz>,
Shakeel Butt <shakeel.butt@linux.dev>,
David Hildenbrand <david@kernel.org>,
Rik van Riel <riel@surriel.com>, Harry Yoo <harry.yoo@oracle.com>,
Jann Horn <jannh@google.com>, Mike Rapoport <rppt@kernel.org>,
Michal Hocko <mhocko@suse.com>, Pedro Falcato <pfalcato@suse.de>,
Chris Li <chriscli@google.com>,
Barry Song <v-songbaohua@oppo.com>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: [PATCH 8/8] mm/rmap: separate out fork-only logic on anon_vma_clone()
Date: Wed, 17 Dec 2025 12:27:10 +0000 [thread overview]
Message-ID: <175d29b48f7e49ce9a1b19e3a6c72cc80c0c5bb4.1765970117.git.lorenzo.stoakes@oracle.com> (raw)
In-Reply-To: <cover.1765970117.git.lorenzo.stoakes@oracle.com>
Specify which operation is being performed to anon_vma_clone(), which
allows us to do checks specific to each operation type, as well as to
separate out and make clear that the anon_vma reuse logic is absolutely
specific to fork only.
This opens the door to further refactorings and refinements later as we
have more information to work with.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
mm/internal.h | 11 +++++-
mm/rmap.c | 67 ++++++++++++++++++++++----------
mm/vma.c | 6 +--
tools/testing/vma/vma_internal.h | 11 +++++-
4 files changed, 69 insertions(+), 26 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 469d4ef1ccc5..b4d4bca0f9a7 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -244,7 +244,16 @@ static inline void anon_vma_unlock_read(struct anon_vma *anon_vma)
struct anon_vma *folio_get_anon_vma(const struct folio *folio);
-int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src);
+/* Operations which modify VMAs. */
+enum vma_operation {
+ VMA_OP_SPLIT,
+ VMA_OP_MERGE_UNFAULTED,
+ VMA_OP_REMAP,
+ VMA_OP_FORK,
+};
+
+int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src,
+ enum vma_operation operation);
int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma);
int __anon_vma_prepare(struct vm_area_struct *vma);
void unlink_anon_vmas(struct vm_area_struct *vma);
diff --git a/mm/rmap.c b/mm/rmap.c
index de9de6d71c23..f08e6bc57379 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -232,12 +232,13 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
}
static void check_anon_vma_clone(struct vm_area_struct *dst,
- struct vm_area_struct *src)
+ struct vm_area_struct *src,
+ enum vma_operation operation)
{
/* The write lock must be held. */
mmap_assert_write_locked(src->vm_mm);
- /* If not a fork (implied by dst->anon_vma) then must be on same mm. */
- VM_WARN_ON_ONCE(dst->anon_vma && dst->vm_mm != src->vm_mm);
+ /* If not a fork then must be on same mm. */
+ VM_WARN_ON_ONCE(operation != VMA_OP_FORK && dst->vm_mm != src->vm_mm);
/* No source anon_vma is a no-op. */
VM_WARN_ON_ONCE(!src->anon_vma && !list_empty(&src->anon_vma_chain));
@@ -249,6 +250,35 @@ static void check_anon_vma_clone(struct vm_area_struct *dst,
* must be the same across dst and src.
*/
VM_WARN_ON_ONCE(dst->anon_vma && dst->anon_vma != src->anon_vma);
+
+ /* For the anon_vma to be compatible, it can only be singular. */
+ VM_WARN_ON_ONCE(operation == VMA_OP_MERGE_UNFAULTED &&
+ !list_is_singular(&src->anon_vma_chain));
+#ifdef CONFIG_PER_VMA_LOCK
+ /* Only merging an unfaulted VMA leaves the destination attached. */
+ VM_WARN_ON_ONCE(operation != VMA_OP_MERGE_UNFAULTED &&
+ vma_is_attached(dst));
+#endif
+}
+
+static void find_reusable_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma *anon_vma)
+{
+ /* If already populated, nothing to do.*/
+ if (vma->anon_vma)
+ return;
+
+ /*
+ * We reuse an anon_vma if any linking VMAs were unmapped and it has
+ * only a single child at most.
+ */
+ if (anon_vma->num_active_vmas > 0)
+ return;
+ if (anon_vma->num_children > 1)
+ return;
+
+ vma->anon_vma = anon_vma;
+ anon_vma->num_active_vmas++;
}
/**
@@ -256,6 +286,7 @@ static void check_anon_vma_clone(struct vm_area_struct *dst,
* all of the anon_vma objects contained within @src anon_vma_chain's.
* @dst: The destination VMA with an empty anon_vma_chain.
* @src: The source VMA we wish to duplicate.
+ * @operation: The type of operation which resulted in the clone.
*
* This is the heart of the VMA side of the anon_vma implementation - we invoke
* this function whenever we need to set up a new VMA's anon_vma state.
@@ -278,14 +309,16 @@ static void check_anon_vma_clone(struct vm_area_struct *dst,
*
* Returns: 0 on success, -ENOMEM on failure.
*/
-int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
+int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src,
+ enum vma_operation operation)
{
struct anon_vma_chain *avc, *pavc;
+ struct anon_vma *active_anon_vma = src->anon_vma;
- if (!src->anon_vma)
+ if (!active_anon_vma)
return 0;
- check_anon_vma_clone(dst, src);
+ check_anon_vma_clone(dst, src, operation);
/*
* Allocate AVCs. We don't need an anon_vma lock for this as we
@@ -309,22 +342,14 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
struct anon_vma *anon_vma = avc->anon_vma;
anon_vma_interval_tree_insert(avc, &anon_vma->rb_root);
-
- /*
- * Reuse existing anon_vma if it has no vma and only one
- * anon_vma child.
- *
- * Root anon_vma is never reused:
- * it has self-parent reference and at least one child.
- */
- if (!dst->anon_vma && src->anon_vma &&
- anon_vma->num_children < 2 &&
- anon_vma->num_active_vmas == 0)
- dst->anon_vma = anon_vma;
+ if (operation == VMA_OP_FORK)
+ find_reusable_anon_vma(dst, anon_vma);
}
- if (dst->anon_vma)
+
+ if (operation != VMA_OP_FORK)
dst->anon_vma->num_active_vmas++;
- anon_vma_unlock_write(src->anon_vma);
+
+ anon_vma_unlock_write(active_anon_vma);
return 0;
enomem_failure:
@@ -361,7 +386,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
* First, attach the new VMA to the parent VMA's anon_vmas,
* so rmap can find non-COWed pages in child processes.
*/
- error = anon_vma_clone(vma, pvma);
+ error = anon_vma_clone(vma, pvma, VMA_OP_FORK);
if (error)
return error;
diff --git a/mm/vma.c b/mm/vma.c
index feb4bbd3b259..e297c3a94133 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -525,7 +525,7 @@ __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (err)
goto out_free_vmi;
- err = anon_vma_clone(new, vma);
+ err = anon_vma_clone(new, vma, VMA_OP_SPLIT);
if (err)
goto out_free_mpol;
@@ -623,7 +623,7 @@ static int dup_anon_vma(struct vm_area_struct *dst,
vma_assert_write_locked(dst);
dst->anon_vma = src->anon_vma;
- ret = anon_vma_clone(dst, src);
+ ret = anon_vma_clone(dst, src, VMA_OP_MERGE_UNFAULTED);
if (ret)
return ret;
@@ -1862,7 +1862,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
vma_set_range(new_vma, addr, addr + len, pgoff);
if (vma_dup_policy(vma, new_vma))
goto out_free_vma;
- if (anon_vma_clone(new_vma, vma))
+ if (anon_vma_clone(new_vma, vma, VMA_OP_REMAP))
goto out_free_mempol;
if (new_vma->vm_file)
get_file(new_vma->vm_file);
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 93e5792306d9..7fa56dcc53a6 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -600,6 +600,14 @@ struct mmap_action {
bool hide_from_rmap_until_complete :1;
};
+/* Operations which modify VMAs. */
+enum vma_operation {
+ VMA_OP_SPLIT,
+ VMA_OP_MERGE_UNFAULTED,
+ VMA_OP_REMAP,
+ VMA_OP_FORK,
+};
+
/*
* Describes a VMA that is about to be mmap()'ed. Drivers may choose to
* manipulate mutable fields which will cause those fields to be updated in the
@@ -1157,7 +1165,8 @@ static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_stru
return 0;
}
-static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
+static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src,
+ enum vma_operation operation)
{
/* For testing purposes. We indicate that an anon_vma has been cloned. */
if (src->anon_vma != NULL) {
--
2.52.0
next prev parent reply other threads:[~2025-12-17 12:27 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-17 12:27 [PATCH 0/8] mm: clean up anon_vma implementation Lorenzo Stoakes
2025-12-17 12:27 ` [PATCH 1/8] mm/rmap: improve anon_vma_clone(), unlink_anon_vmas() comments, add asserts Lorenzo Stoakes
2025-12-19 18:22 ` Liam R. Howlett
2025-12-29 21:18 ` Suren Baghdasaryan
2025-12-30 21:21 ` Suren Baghdasaryan
2025-12-17 12:27 ` [PATCH 2/8] mm/rmap: skip unfaulted VMAs on anon_vma clone, unlink Lorenzo Stoakes
2025-12-19 18:28 ` Liam R. Howlett
2025-12-29 21:41 ` Suren Baghdasaryan
2025-12-17 12:27 ` [PATCH 3/8] mm/rmap: remove unnecessary root lock dance in anon_vma clone, unmap Lorenzo Stoakes
2025-12-29 22:17 ` Suren Baghdasaryan
2025-12-17 12:27 ` [PATCH 4/8] mm/rmap: remove anon_vma_merge() function Lorenzo Stoakes
2025-12-30 19:35 ` Suren Baghdasaryan
2025-12-17 12:27 ` [PATCH 5/8] mm/rmap: make anon_vma functions internal Lorenzo Stoakes
2025-12-30 19:38 ` Suren Baghdasaryan
2025-12-17 12:27 ` [PATCH 6/8] mm/mmap_lock: add vma_is_attached() helper Lorenzo Stoakes
2025-12-30 19:50 ` Suren Baghdasaryan
2025-12-17 12:27 ` [PATCH 7/8] mm/rmap: allocate anon_vma_chain objects unlocked when possible Lorenzo Stoakes
2025-12-30 21:35 ` Suren Baghdasaryan
2025-12-17 12:27 ` Lorenzo Stoakes [this message]
2025-12-30 22:02 ` [PATCH 8/8] mm/rmap: separate out fork-only logic on anon_vma_clone() Suren Baghdasaryan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=175d29b48f7e49ce9a1b19e3a6c72cc80c0c5bb4.1765970117.git.lorenzo.stoakes@oracle.com \
--to=lorenzo.stoakes@oracle.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=chriscli@google.com \
--cc=david@kernel.org \
--cc=harry.yoo@oracle.com \
--cc=jannh@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.com \
--cc=pfalcato@suse.de \
--cc=riel@surriel.com \
--cc=rppt@kernel.org \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=v-songbaohua@oppo.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox