* [PATCH] mm/mmap: Clean up validate_mm() calls
@ 2023-07-04 18:24 Liam R. Howlett
2023-07-04 18:36 ` Linus Torvalds
0 siblings, 1 reply; 4+ messages in thread
From: Liam R. Howlett @ 2023-07-04 18:24 UTC (permalink / raw)
To: linux-mm, linux-kernel, Andrew Morton
Cc: Liam R. Howlett, Linus Torvalds, Oliver Sang
validate_mm() calls are too spread out and duplicated in numerous
locations. Also, now that the stack write is done under the write lock,
it is not necessary to validate the mm prior to write operations.
Add a validate_mm() to the stack expansions, and to vma_complete() so
that numerous others may be dropped.
Note that vma_link() (and also insert_vm_struct() by call path) already
call validate_mm().
vma_merge() also had an unnecessary call to vma_iter_free() since the
logic change to abort earlier if no merging is necessary.
Drop extra validate_mm() calls at the start of functions and error paths
which won't write to the tree.
Relocate the validate_mm() call in the do_brk_flags() to avoid
re-running the same test when vma_complete() is used.
The call within the error path of mmap_region() is left intentionally
because of the complexity of the function and the potential of drivers
modifying the tree.
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oliver Sang <oliver.sang@intel.com>
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
mm/mmap.c | 24 ++++--------------------
1 file changed, 4 insertions(+), 20 deletions(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index 204ddcd52625..964a8aa59297 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -597,6 +597,7 @@ static inline void vma_complete(struct vma_prepare *vp,
}
if (vp->insert && vp->file)
uprobe_mmap(vp->insert);
+ validate_mm(mm);
}
/*
@@ -678,7 +679,6 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
vma_iter_store(vmi, vma);
vma_complete(&vp, vmi, vma->vm_mm);
- validate_mm(vma->vm_mm);
return 0;
nomem:
@@ -718,7 +718,6 @@ int vma_shrink(struct vma_iterator *vmi, struct vm_area_struct *vma,
vma->vm_end = end;
vma->vm_pgoff = pgoff;
vma_complete(&vp, vmi, vma->vm_mm);
- validate_mm(vma->vm_mm);
return 0;
}
@@ -891,7 +890,6 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
long adj_start = 0;
- validate_mm(mm);
/*
* We later require that vma->vm_flags == vm_flags,
* so this tests vma->vm_flags & VM_SPECIAL, too.
@@ -1018,10 +1016,7 @@ struct vm_area_struct *vma_merge(struct vma_iterator *vmi, struct mm_struct *mm,
}
vma_complete(&vp, vmi, mm);
- vma_iter_free(vmi);
- validate_mm(mm);
khugepaged_enter_vma(res, vm_flags);
-
return res;
}
@@ -1196,7 +1191,6 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags_t vm_flags;
int pkey = 0;
- validate_mm(mm);
*populate = 0;
if (!len)
@@ -2023,6 +2017,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address)
anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma(vma, vma->vm_flags);
mas_destroy(&mas);
+ validate_mm(mm);
return error;
}
#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2111,6 +2106,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma(vma, vma->vm_flags);
mas_destroy(&mas);
+ validate_mm(mm);
return error;
}
@@ -2288,7 +2284,6 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas)
remove_vma(vma, false);
}
vm_unacct_memory(nr_accounted);
- validate_mm(mm);
}
/*
@@ -2325,8 +2320,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct vm_area_struct *new;
int err;
- validate_mm(vma->vm_mm);
-
WARN_ON(vma->vm_start >= addr);
WARN_ON(vma->vm_end <= addr);
@@ -2383,7 +2376,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
/* Success. */
if (new_below)
vma_next(vmi);
- validate_mm(vma->vm_mm);
return 0;
out_free_mpol:
@@ -2392,7 +2384,6 @@ int __split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
vma_iter_free(vmi);
out_free_vma:
vm_area_free(new);
- validate_mm(vma->vm_mm);
return err;
}
@@ -3043,7 +3034,6 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct mm_struct *mm = current->mm;
struct vma_prepare vp;
- validate_mm(mm);
/*
* Check against address space limits by the changed size
* Note: This happens *after* clearing old mappings in some code paths.
@@ -3095,6 +3085,7 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
goto mas_store_fail;
mm->map_count++;
+ validate_mm(mm);
ksm_add_vma(vma);
out:
perf_event_mmap(vma);
@@ -3103,7 +3094,6 @@ static int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
if (flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
vm_flags_set(vma, VM_SOFTDIRTY);
- validate_mm(mm);
return 0;
mas_store_fail:
@@ -3284,7 +3274,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
bool faulted_in_anon_vma = true;
VMA_ITERATOR(vmi, mm, addr);
- validate_mm(mm);
/*
* If anonymous vma has not yet been faulted, update new pgoff
* to match new location, to increase its chance of merging.
@@ -3343,7 +3332,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
goto out_vma_link;
*need_rmap_locks = false;
}
- validate_mm(mm);
return new_vma;
out_vma_link:
@@ -3359,7 +3347,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
out_free_vma:
vm_area_free(new_vma);
out:
- validate_mm(mm);
return NULL;
}
@@ -3496,7 +3483,6 @@ static struct vm_area_struct *__install_special_mapping(
int ret;
struct vm_area_struct *vma;
- validate_mm(mm);
vma = vm_area_alloc(mm);
if (unlikely(vma == NULL))
return ERR_PTR(-ENOMEM);
@@ -3519,12 +3505,10 @@ static struct vm_area_struct *__install_special_mapping(
perf_event_mmap(vma);
- validate_mm(mm);
return vma;
out:
vm_area_free(vma);
- validate_mm(mm);
return ERR_PTR(ret);
}
--
2.39.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] mm/mmap: Clean up validate_mm() calls
2023-07-04 18:24 [PATCH] mm/mmap: Clean up validate_mm() calls Liam R. Howlett
@ 2023-07-04 18:36 ` Linus Torvalds
2023-07-04 18:47 ` Liam R. Howlett
0 siblings, 1 reply; 4+ messages in thread
From: Linus Torvalds @ 2023-07-04 18:36 UTC (permalink / raw)
To: Liam R. Howlett; +Cc: linux-mm, linux-kernel, Andrew Morton, Oliver Sang
On Tue, 4 Jul 2023 at 11:25, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
>
> validate_mm() calls are too spread out and duplicated in numerous
> locations. Also, now that the stack write is done under the write lock,
> it is not necessary to validate the mm prior to write operations.
So while I applied the fixes directly since I was doing all the
write-locking stuff (and asked for the locking cleanup), I'm hoping
these kinds of cleanups will now go back to normal and go through
Andrew.
I do have a question related to the write locking: now that we should
always hold the mmap lock for writing when doing any modifications,
can the "lock_is_held()" assertions be tightened?
Right now it's "any locking", but for actual modification it should
probably be using
lockdep_is_held_type(mt->ma_external_lock, 1)
but there's just one 'mt_lock_is_held()' function (presumably because
the internal lock is always just a spinlock that doesn't have the
reader/writer distinction).
Linus
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] mm/mmap: Clean up validate_mm() calls
2023-07-04 18:36 ` Linus Torvalds
@ 2023-07-04 18:47 ` Liam R. Howlett
2023-07-05 20:46 ` Liam R. Howlett
0 siblings, 1 reply; 4+ messages in thread
From: Liam R. Howlett @ 2023-07-04 18:47 UTC (permalink / raw)
To: Linus Torvalds
Cc: linux-mm, linux-kernel, Andrew Morton, Oliver Sang,
Matthew Wilcox (Oracle)
* Linus Torvalds <torvalds@linux-foundation.org> [230704 14:36]:
> On Tue, 4 Jul 2023 at 11:25, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> >
> > validate_mm() calls are too spread out and duplicated in numerous
> > locations. Also, now that the stack write is done under the write lock,
> > it is not necessary to validate the mm prior to write operations.
>
> So while I applied the fixes directly since I was doing all the
> write-locking stuff (and asked for the locking cleanup), I'm hoping
> these kinds of cleanups will now go back to normal and go through
> Andrew.
>
> I do have a question related to the write locking: now that we should
> always hold the mmap lock for writing when doing any modifications,
> can the "lock_is_held()" assertions be tightened?
>
> Right now it's "any locking", but for actual modification it should
> probably be using
>
> lockdep_is_held_type(mt->ma_external_lock, 1)
>
> but there's just one 'mt_lock_is_held()' function (presumably because
> the internal lock is always just a spinlock that doesn't have the
> reader/writer distinction).
Ah, yes. I was trying to do just that, but ran into an issue and backed
out of fully fixing this portion up until later.
The issue arises with the use of the same external lock for the munmap()
case where I'm using the second tree to track the VMAs. Using the
spinlock causes issues with the potential sleeping within allocations.
So, I'm still working out a way to do what you (and willy) asked here.
Thanks,
Liam
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] mm/mmap: Clean up validate_mm() calls
2023-07-04 18:47 ` Liam R. Howlett
@ 2023-07-05 20:46 ` Liam R. Howlett
0 siblings, 0 replies; 4+ messages in thread
From: Liam R. Howlett @ 2023-07-05 20:46 UTC (permalink / raw)
To: Linus Torvalds, linux-mm, linux-kernel, Andrew Morton,
Oliver Sang, Matthew Wilcox (Oracle)
[-- Attachment #1: Type: text/plain, Size: 2324 bytes --]
* Liam R. Howlett <Liam.Howlett@Oracle.com> [230704 14:47]:
> * Linus Torvalds <torvalds@linux-foundation.org> [230704 14:36]:
> > On Tue, 4 Jul 2023 at 11:25, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> > >
> > > validate_mm() calls are too spread out and duplicated in numerous
> > > locations. Also, now that the stack write is done under the write lock,
> > > it is not necessary to validate the mm prior to write operations.
> >
> > So while I applied the fixes directly since I was doing all the
> > write-locking stuff (and asked for the locking cleanup), I'm hoping
> > these kinds of cleanups will now go back to normal and go through
> > Andrew.
> >
> > I do have a question related to the write locking: now that we should
> > always hold the mmap lock for writing when doing any modifications,
> > can the "lock_is_held()" assertions be tightened?
> >
> > Right now it's "any locking", but for actual modification it should
> > probably be using
> >
> > lockdep_is_held_type(mt->ma_external_lock, 1)
For completeness of the email tread; it turns out we want 0 as the last
parameter.
(include/linux/lockdep.h)
/*
* Acquire a lock.
*
* Values for "read":
*
* 0: exclusive (write) acquire
* 1: read-acquire (no recursion allowed)
* 2: read-acquire with same-instance recursion allowed
*
* Values for check:
*
* 0: simple checks (freeing, held-at-exit-time, etc.)
* 1: full validation
*/
...
/*
* Same "read" as for lock_acquire(), except -1 means any.
*/
extern int lock_is_held_type(const struct lockdep_map *lock, int read);
> >
> > but there's just one 'mt_lock_is_held()' function (presumably because
> > the internal lock is always just a spinlock that doesn't have the
> > reader/writer distinction).
>
> Ah, yes. I was trying to do just that, but ran into an issue and backed
> out of fully fixing this portion up until later.
>
Here are two patches to increase the strictness of the maple tree
locking. I've boot tested them on x86_64 with the bots config and
ensured the lockdep problem was resolved.
The first introduces the new mt_write_locked() function, which ensures
the lock type is for writing.
The second updates the munmap path to avoid triggering the warnings
associated with dropping the mmap_lock prior to freeing the VMAs.
Thanks,
Liam
[-- Attachment #2: 0001-maple_tree-Be-more-strict-about-locking.patch --]
[-- Type: text/x-diff, Size: 2895 bytes --]
From c214e54a20258ca9c3ff787b435b04a1d900ad21 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Wed, 5 Jul 2023 12:37:47 -0400
Subject: [PATCH 1/2] maple_tree: Be more strict about locking
Use lockdep to check the write path in the maple tree holds the lock in
write mode.
Introduce mt_write_lock_is_held() to check if the lock is held for
writing. Update the necessary checks for rcu_dereference_protected() to
use the new write lock check.
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
include/linux/maple_tree.h | 12 ++++++++++--
lib/maple_tree.c | 10 ++++++++--
2 files changed, 18 insertions(+), 4 deletions(-)
diff --git a/include/linux/maple_tree.h b/include/linux/maple_tree.h
index 295548cca8b3..f856d67a5d7c 100644
--- a/include/linux/maple_tree.h
+++ b/include/linux/maple_tree.h
@@ -184,12 +184,20 @@ enum maple_type {
#ifdef CONFIG_LOCKDEP
typedef struct lockdep_map *lockdep_map_p;
-#define mt_lock_is_held(mt) lock_is_held(mt->ma_external_lock)
+#define mt_write_lock_is_held(mt) \
+ (!(mt)->ma_external_lock || \
+ lock_is_held_type((mt)->ma_external_lock, 0))
+
+#define mt_lock_is_held(mt) \
+ (!(mt)->ma_external_lock || \
+ lock_is_held((mt)->ma_external_lock))
+
#define mt_set_external_lock(mt, lock) \
(mt)->ma_external_lock = &(lock)->dep_map
#else
typedef struct { /* nothing */ } lockdep_map_p;
-#define mt_lock_is_held(mt) 1
+#define mt_lock_is_held(mt) 1
+#define mt_write_lock_is_held(mt) 1
#define mt_set_external_lock(mt, lock) do { } while (0)
#endif
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index bfffbb7cab26..1c9eab89e34b 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -804,6 +804,12 @@ static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt)
}
}
+static inline bool mt_write_locked(const struct maple_tree *mt)
+{
+ return mt_external_lock(mt) ? mt_write_lock_is_held(mt) :
+ lockdep_is_held(&mt->ma_lock);
+}
+
static inline bool mt_locked(const struct maple_tree *mt)
{
return mt_external_lock(mt) ? mt_lock_is_held(mt) :
@@ -819,7 +825,7 @@ static inline void *mt_slot(const struct maple_tree *mt,
static inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots,
unsigned char offset)
{
- return rcu_dereference_protected(slots[offset], mt_locked(mt));
+ return rcu_dereference_protected(slots[offset], mt_write_locked(mt));
}
/*
* mas_slot_locked() - Get the slot value when holding the maple tree lock.
@@ -862,7 +868,7 @@ static inline void *mas_root(struct ma_state *mas)
static inline void *mt_root_locked(struct maple_tree *mt)
{
- return rcu_dereference_protected(mt->ma_root, mt_locked(mt));
+ return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt));
}
/*
--
2.39.2
[-- Attachment #3: 0002-mm-mmap-Change-detached-vma-locking-scheme.patch --]
[-- Type: text/x-diff, Size: 1065 bytes --]
From 58fd73f90e7331678a728ada9bf92013105afbc1 Mon Sep 17 00:00:00 2001
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Date: Wed, 5 Jul 2023 14:47:49 -0400
Subject: [PATCH 2/2] mm/mmap: Change detached vma locking scheme
Don't set the lock to the mm lock so that the detached VMA tree does not
complain about being unlocked when the mmap_lock is dropped prior to
freeing the tree.
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
mm/mmap.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/mmap.c b/mm/mmap.c
index 964a8aa59297..3bb5a4e1f4f1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2426,7 +2426,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
unsigned long locked_vm = 0;
MA_STATE(mas_detach, &mt_detach, 0, 0);
mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK);
- mt_set_external_lock(&mt_detach, &mm->mmap_lock);
+ mt_detach.ma_external_lock = NULL;
/*
* If we need to split any vma, do it now to save pain later.
--
2.39.2
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-07-05 20:46 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-04 18:24 [PATCH] mm/mmap: Clean up validate_mm() calls Liam R. Howlett
2023-07-04 18:36 ` Linus Torvalds
2023-07-04 18:47 ` Liam R. Howlett
2023-07-05 20:46 ` Liam R. Howlett
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox