* [RFC PATCH v2] mm/rmap: make num_children and num_active_vmas update in internally
@ 2025-09-08 14:05 Yajun Deng
2025-09-08 14:10 ` Matthew Wilcox
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Yajun Deng @ 2025-09-08 14:05 UTC (permalink / raw)
To: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, riel, harry.yoo
Cc: linux-mm, linux-kernel, Yajun Deng
If the anon_vma_alloc() is called, the num_children of the parent of
the anon_vma will be updated. But this operation occurs outside of
anon_vma_alloc(). There are two callers, one has itself as its parent,
while another has a real parent. That means they have the same logic.
The update of num_active_vmas and vma->anon_vma are not performed
together. These operations should be performed under a function.
Add an __anon_vma_alloc() function that implements anon_vma_alloc().
If the caller has a real parent, called __anon_vma_alloc() and pass
the parent to it. If it not, called anon_vma_alloc() directly. It will
set the parent and root of the anon_vma and also updates the num_children
of its parent anon_vma.
Introduce vma_attach_anon() and vma_detach_anon() to update
num_active_vmas with vma->anon_vma together.
Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
---
v2: fix a WARNING in unlink_anon_vmas and optimize the code
v1: https://lore.kernel.org/all/20250905132019.18915-1-yajun.deng@linux.dev/
---
mm/internal.h | 17 ++++++++++++++
mm/rmap.c | 64 +++++++++++++++++++++++++++++----------------------
2 files changed, 53 insertions(+), 28 deletions(-)
diff --git a/mm/internal.h b/mm/internal.h
index 9b0129531d00..12bc71bb2304 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -953,6 +953,23 @@ static inline bool free_area_empty(struct free_area *area, int migratetype)
return list_empty(&area->free_list[migratetype]);
}
+static inline void vma_attach_anon(struct vm_area_struct *vma,
+ struct anon_vma *anon_vma)
+{
+ mmap_assert_locked(vma->vm_mm);
+ lockdep_assert_held_write(&anon_vma->root->rwsem);
+ vma->anon_vma = anon_vma;
+ vma->anon_vma->num_active_vmas++;
+}
+
+static inline void vma_detach_anon(struct vm_area_struct *vma)
+{
+ mmap_assert_locked(vma->vm_mm);
+ lockdep_assert_held_write(&vma->anon_vma->root->rwsem);
+ vma->anon_vma->num_active_vmas--;
+ vma->anon_vma = NULL;
+}
+
/* mm/util.c */
struct anon_vma *folio_anon_vma(const struct folio *folio);
diff --git a/mm/rmap.c b/mm/rmap.c
index 34333ae3bd80..de557707c34a 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -86,15 +86,25 @@
static struct kmem_cache *anon_vma_cachep;
static struct kmem_cache *anon_vma_chain_cachep;
-static inline struct anon_vma *anon_vma_alloc(void)
+static inline struct anon_vma *__anon_vma_alloc(struct anon_vma *parent)
{
struct anon_vma *anon_vma;
anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
- if (anon_vma) {
- atomic_set(&anon_vma->refcount, 1);
- anon_vma->num_children = 0;
- anon_vma->num_active_vmas = 0;
+ if (!anon_vma)
+ return NULL;
+
+ atomic_set(&anon_vma->refcount, 1);
+ anon_vma->num_children = 0;
+ anon_vma->num_active_vmas = 0;
+ if (parent) {
+ /*
+ * The root anon_vma's rwsem is the lock actually used when we
+ * lock any of the anon_vmas in this anon_vma tree.
+ */
+ anon_vma->parent = parent;
+ anon_vma->root = parent->root;
+ } else {
anon_vma->parent = anon_vma;
/*
* Initialise the anon_vma root to point to itself. If called
@@ -102,10 +112,18 @@ static inline struct anon_vma *anon_vma_alloc(void)
*/
anon_vma->root = anon_vma;
}
+ anon_vma_lock_write(anon_vma);
+ anon_vma->parent->num_children++;
+ anon_vma_unlock_write(anon_vma);
return anon_vma;
}
+static inline struct anon_vma *anon_vma_alloc(void)
+{
+ return __anon_vma_alloc(NULL);
+}
+
static inline void anon_vma_free(struct anon_vma *anon_vma)
{
VM_BUG_ON(atomic_read(&anon_vma->refcount));
@@ -201,7 +219,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
- anon_vma->num_children++; /* self-parent link for new root */
allocated = anon_vma;
}
@@ -209,9 +226,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
/* page_table_lock to protect against threads */
spin_lock(&mm->page_table_lock);
if (likely(!vma->anon_vma)) {
- vma->anon_vma = anon_vma;
+ vma_attach_anon(vma, anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
- anon_vma->num_active_vmas++;
allocated = NULL;
avc = NULL;
}
@@ -355,38 +371,31 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
if (vma->anon_vma)
return 0;
- /* Then add our own anon_vma. */
- anon_vma = anon_vma_alloc();
- if (!anon_vma)
- goto out_error;
- anon_vma->num_active_vmas++;
avc = anon_vma_chain_alloc(GFP_KERNEL);
if (!avc)
- goto out_error_free_anon_vma;
+ goto out_error;
+
+ /* Then add our own anon_vma. */
+ anon_vma = __anon_vma_alloc(pvma->anon_vma);
+ if (!anon_vma)
+ goto out_error_free_avc;
- /*
- * The root anon_vma's rwsem is the lock actually used when we
- * lock any of the anon_vmas in this anon_vma tree.
- */
- anon_vma->root = pvma->anon_vma->root;
- anon_vma->parent = pvma->anon_vma;
/*
* With refcounts, an anon_vma can stay around longer than the
* process it belongs to. The root anon_vma needs to be pinned until
* this anon_vma is freed, because the lock lives in the root.
*/
get_anon_vma(anon_vma->root);
- /* Mark this anon_vma as the one where our new (COWed) pages go. */
- vma->anon_vma = anon_vma;
anon_vma_lock_write(anon_vma);
+ /* Mark this anon_vma as the one where our new (COWed) pages go. */
+ vma_attach_anon(vma, anon_vma);
anon_vma_chain_link(vma, avc, anon_vma);
- anon_vma->parent->num_children++;
anon_vma_unlock_write(anon_vma);
return 0;
- out_error_free_anon_vma:
- put_anon_vma(anon_vma);
+ out_error_free_avc:
+ anon_vma_chain_free(avc);
out_error:
unlink_anon_vmas(vma);
return -ENOMEM;
@@ -420,14 +429,13 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
anon_vma_chain_free(avc);
}
if (vma->anon_vma) {
- vma->anon_vma->num_active_vmas--;
-
/*
* vma would still be needed after unlink, and anon_vma will be prepared
* when handle fault.
*/
- vma->anon_vma = NULL;
+ vma_detach_anon(vma);
}
+
unlock_anon_vma_root(root);
/*
--
2.25.1
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] mm/rmap: make num_children and num_active_vmas update in internally
2025-09-08 14:05 [RFC PATCH v2] mm/rmap: make num_children and num_active_vmas update in internally Yajun Deng
@ 2025-09-08 14:10 ` Matthew Wilcox
2025-09-08 14:38 ` Lorenzo Stoakes
2025-09-08 14:47 ` Liam R. Howlett
2 siblings, 0 replies; 4+ messages in thread
From: Matthew Wilcox @ 2025-09-08 14:10 UTC (permalink / raw)
To: Yajun Deng
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, riel, harry.yoo, linux-mm, linux-kernel
On Mon, Sep 08, 2025 at 02:05:04PM +0000, Yajun Deng wrote:
> If the anon_vma_alloc() is called, the num_children of the parent of
> the anon_vma will be updated. But this operation occurs outside of
> anon_vma_alloc(). There are two callers, one has itself as its parent,
> while another has a real parent. That means they have the same logic.
No, they don't. This is terrible. Please stop.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] mm/rmap: make num_children and num_active_vmas update in internally
2025-09-08 14:05 [RFC PATCH v2] mm/rmap: make num_children and num_active_vmas update in internally Yajun Deng
2025-09-08 14:10 ` Matthew Wilcox
@ 2025-09-08 14:38 ` Lorenzo Stoakes
2025-09-08 14:47 ` Liam R. Howlett
2 siblings, 0 replies; 4+ messages in thread
From: Lorenzo Stoakes @ 2025-09-08 14:38 UTC (permalink / raw)
To: Yajun Deng
Cc: akpm, david, Liam.Howlett, vbabka, rppt, surenb, mhocko, riel,
harry.yoo, linux-mm, linux-kernel
On Mon, Sep 08, 2025 at 02:05:04PM +0000, Yajun Deng wrote:
> If the anon_vma_alloc() is called, the num_children of the parent of
> the anon_vma will be updated. But this operation occurs outside of
> anon_vma_alloc(). There are two callers, one has itself as its parent,
> while another has a real parent. That means they have the same logic.
No they do not. As I explained to you at length.
>
> The update of num_active_vmas and vma->anon_vma are not performed
> together. These operations should be performed under a function.
>
> Add an __anon_vma_alloc() function that implements anon_vma_alloc().
> If the caller has a real parent, called __anon_vma_alloc() and pass
> the parent to it. If it not, called anon_vma_alloc() directly. It will
> set the parent and root of the anon_vma and also updates the num_children
> of its parent anon_vma.
Doing exactly what I told you not to do...
>
> Introduce vma_attach_anon() and vma_detach_anon() to update
> num_active_vmas with vma->anon_vma together.
>
> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
NAK.
There's so much wrong with this and you've ignored review Liam and I have
spent time giving you (a resource which I have very little of), it's simply
not a good use of my time to look at this further.
Please abandon this idea, it's not good, and you're not implementing it
well.
Thanks, Lorenzo
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC PATCH v2] mm/rmap: make num_children and num_active_vmas update in internally
2025-09-08 14:05 [RFC PATCH v2] mm/rmap: make num_children and num_active_vmas update in internally Yajun Deng
2025-09-08 14:10 ` Matthew Wilcox
2025-09-08 14:38 ` Lorenzo Stoakes
@ 2025-09-08 14:47 ` Liam R. Howlett
2 siblings, 0 replies; 4+ messages in thread
From: Liam R. Howlett @ 2025-09-08 14:47 UTC (permalink / raw)
To: Yajun Deng
Cc: akpm, david, lorenzo.stoakes, vbabka, rppt, surenb, mhocko, riel,
harry.yoo, linux-mm, linux-kernel
* Yajun Deng <yajun.deng@linux.dev> [250908 10:06]:
> If the anon_vma_alloc() is called, the num_children of the parent of
> the anon_vma will be updated. But this operation occurs outside of
> anon_vma_alloc(). There are two callers, one has itself as its parent,
> while another has a real parent. That means they have the same logic.
>
> The update of num_active_vmas and vma->anon_vma are not performed
> together. These operations should be performed under a function.
>
> Add an __anon_vma_alloc() function that implements anon_vma_alloc().
> If the caller has a real parent, called __anon_vma_alloc() and pass
> the parent to it. If it not, called anon_vma_alloc() directly. It will
> set the parent and root of the anon_vma and also updates the num_children
> of its parent anon_vma.
>
> Introduce vma_attach_anon() and vma_detach_anon() to update
> num_active_vmas with vma->anon_vma together.
>
> Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
> ---
> v2: fix a WARNING in unlink_anon_vmas and optimize the code
Fixed all the bugs I found in v1?
Tested this patch this time?
What about the parent fields without the lock?
> v1: https://lore.kernel.org/all/20250905132019.18915-1-yajun.deng@linux.dev/
> ---
> mm/internal.h | 17 ++++++++++++++
> mm/rmap.c | 64 +++++++++++++++++++++++++++++----------------------
> 2 files changed, 53 insertions(+), 28 deletions(-)
>
> diff --git a/mm/internal.h b/mm/internal.h
> index 9b0129531d00..12bc71bb2304 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -953,6 +953,23 @@ static inline bool free_area_empty(struct free_area *area, int migratetype)
> return list_empty(&area->free_list[migratetype]);
> }
>
> +static inline void vma_attach_anon(struct vm_area_struct *vma,
> + struct anon_vma *anon_vma)
Function names are still confusing.
> +{
> + mmap_assert_locked(vma->vm_mm);
> + lockdep_assert_held_write(&anon_vma->root->rwsem);
> + vma->anon_vma = anon_vma;
> + vma->anon_vma->num_active_vmas++;
> +}
> +
> +static inline void vma_detach_anon(struct vm_area_struct *vma)
> +{
> + mmap_assert_locked(vma->vm_mm);
> + lockdep_assert_held_write(&vma->anon_vma->root->rwsem);
> + vma->anon_vma->num_active_vmas--;
> + vma->anon_vma = NULL;
> +}
> +
> /* mm/util.c */
> struct anon_vma *folio_anon_vma(const struct folio *folio);
>
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 34333ae3bd80..de557707c34a 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -86,15 +86,25 @@
> static struct kmem_cache *anon_vma_cachep;
> static struct kmem_cache *anon_vma_chain_cachep;
>
> -static inline struct anon_vma *anon_vma_alloc(void)
> +static inline struct anon_vma *__anon_vma_alloc(struct anon_vma *parent)
> {
> struct anon_vma *anon_vma;
>
> anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
> - if (anon_vma) {
> - atomic_set(&anon_vma->refcount, 1);
> - anon_vma->num_children = 0;
> - anon_vma->num_active_vmas = 0;
> + if (!anon_vma)
> + return NULL;
> +
> + atomic_set(&anon_vma->refcount, 1);
> + anon_vma->num_children = 0;
> + anon_vma->num_active_vmas = 0;
> + if (parent) {
> + /*
> + * The root anon_vma's rwsem is the lock actually used when we
> + * lock any of the anon_vmas in this anon_vma tree.
> + */
> + anon_vma->parent = parent;
> + anon_vma->root = parent->root;
> + } else {
None of this is worth doing in this function. A wrapper only makes the
code harder to follow.
> anon_vma->parent = anon_vma;
> /*
> * Initialise the anon_vma root to point to itself. If called
> @@ -102,10 +112,18 @@ static inline struct anon_vma *anon_vma_alloc(void)
> */
> anon_vma->root = anon_vma;
> }
> + anon_vma_lock_write(anon_vma);
> + anon_vma->parent->num_children++;
> + anon_vma_unlock_write(anon_vma);
>
> return anon_vma;
> }
>
> +static inline struct anon_vma *anon_vma_alloc(void)
> +{
> + return __anon_vma_alloc(NULL);
> +}
> +
> static inline void anon_vma_free(struct anon_vma *anon_vma)
> {
> VM_BUG_ON(atomic_read(&anon_vma->refcount));
> @@ -201,7 +219,6 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> anon_vma = anon_vma_alloc();
> if (unlikely(!anon_vma))
> goto out_enomem_free_avc;
> - anon_vma->num_children++; /* self-parent link for new root */
> allocated = anon_vma;
> }
>
> @@ -209,9 +226,8 @@ int __anon_vma_prepare(struct vm_area_struct *vma)
> /* page_table_lock to protect against threads */
> spin_lock(&mm->page_table_lock);
> if (likely(!vma->anon_vma)) {
> - vma->anon_vma = anon_vma;
> + vma_attach_anon(vma, anon_vma);
> anon_vma_chain_link(vma, avc, anon_vma);
> - anon_vma->num_active_vmas++;
> allocated = NULL;
> avc = NULL;
> }
> @@ -355,38 +371,31 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
> if (vma->anon_vma)
> return 0;
>
> - /* Then add our own anon_vma. */
> - anon_vma = anon_vma_alloc();
> - if (!anon_vma)
> - goto out_error;
> - anon_vma->num_active_vmas++;
> avc = anon_vma_chain_alloc(GFP_KERNEL);
> if (!avc)
> - goto out_error_free_anon_vma;
> + goto out_error;
> +
> + /* Then add our own anon_vma. */
> + anon_vma = __anon_vma_alloc(pvma->anon_vma);
> + if (!anon_vma)
> + goto out_error_free_avc;
>
> - /*
> - * The root anon_vma's rwsem is the lock actually used when we
> - * lock any of the anon_vmas in this anon_vma tree.
> - */
> - anon_vma->root = pvma->anon_vma->root;
> - anon_vma->parent = pvma->anon_vma;
> /*
> * With refcounts, an anon_vma can stay around longer than the
> * process it belongs to. The root anon_vma needs to be pinned until
> * this anon_vma is freed, because the lock lives in the root.
> */
> get_anon_vma(anon_vma->root);
> - /* Mark this anon_vma as the one where our new (COWed) pages go. */
> - vma->anon_vma = anon_vma;
> anon_vma_lock_write(anon_vma);
> + /* Mark this anon_vma as the one where our new (COWed) pages go. */
> + vma_attach_anon(vma, anon_vma);
> anon_vma_chain_link(vma, avc, anon_vma);
> - anon_vma->parent->num_children++;
> anon_vma_unlock_write(anon_vma);
>
> return 0;
>
> - out_error_free_anon_vma:
> - put_anon_vma(anon_vma);
> + out_error_free_avc:
> + anon_vma_chain_free(avc);
> out_error:
> unlink_anon_vmas(vma);
> return -ENOMEM;
Your plan was to reduce the complexity of the code by isolating
duplicate code in a function. This hasn't panned out in this case as
the code is much harder to follow. For instance, the parent logic was
spelled out before while now it's in __anon_vma_alloc(), which seems
unexpected. It is being initialized in the alloc function..
You had to add two functions for the two call sites, which is a hint
things are not going to work out here.
Reversing the logic above resulted in a different but similar undo steps
while hiding what has happened with the anon_vma->parent.
This isn't simplifying anything so I think you should move on.
> @@ -420,14 +429,13 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
> anon_vma_chain_free(avc);
> }
> if (vma->anon_vma) {
> - vma->anon_vma->num_active_vmas--;
> -
> /*
> * vma would still be needed after unlink, and anon_vma will be prepared
> * when handle fault.
> */
> - vma->anon_vma = NULL;
> + vma_detach_anon(vma);
> }
> +
> unlock_anon_vma_root(root);
>
> /*
> --
> 2.25.1
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2025-09-08 14:47 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-08 14:05 [RFC PATCH v2] mm/rmap: make num_children and num_active_vmas update in internally Yajun Deng
2025-09-08 14:10 ` Matthew Wilcox
2025-09-08 14:38 ` Lorenzo Stoakes
2025-09-08 14:47 ` Liam R. Howlett
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox