linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Liam Howlett <liam.howlett@oracle.com>
To: "maple-tree@lists.infradead.org" <maple-tree@lists.infradead.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@redhat.com>,
	Douglas Gilbert <dgilbert@interlog.com>
Cc: Song Liu <songliubraving@fb.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	Matthew Wilcox <willy@infradead.org>,
	David Rientjes <rientjes@google.com>,
	Axel Rasmussen <axelrasmussen@google.com>,
	Suren Baghdasaryan <surenb@google.com>,
	Vlastimil Babka <vbabka@suse.cz>, Rik van Riel <riel@surriel.com>,
	Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH v3 10/66] kernel/fork: Use maple tree for dup_mmap() during forking
Date: Tue, 5 Oct 2021 01:30:35 +0000	[thread overview]
Message-ID: <20211005012959.1110504-11-Liam.Howlett@oracle.com> (raw)
In-Reply-To: <20211005012959.1110504-1-Liam.Howlett@oracle.com>

From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>

The maple tree was already tracking VMAs in this function by an earlier
commit, but the rbtree iterator was being used to iterate the list.
Change the iterator to use a maple tree native iterator, rcu locking,
and switch to the maple tree advanced API to avoid multiple walks of the
tree during insert operations.

anon_vma_fork() may enter the slow path and cause a schedule() call to
cause rcu issues.  Drop the rcu lock and reacquiring the lock.  There is
no harm in this approach as the mmap_sem is taken for write/read and
held across the schedule() call so the VMAs will not change.

Note that the bulk allocation of nodes is also happening here for
performance reasons.  The node calculations are done internally to the
tree and use the VMA count and assume the worst-case node requirements.
The VM_DONT_COPY flag does not allow for the most efficient copy method
of the tree and so a bulk loading algorithm is used.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
 include/linux/mm.h       |  2 --
 include/linux/sched/mm.h |  9 +++++++++
 kernel/fork.c            | 35 +++++++++++++++++++++++++++++------
 mm/mmap.c                |  4 ----
 4 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index df592bf937f9..73a52aba448f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2500,8 +2500,6 @@ extern bool arch_has_descending_max_zone_pfns(void);
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
-/* maple_tree */
-void vma_store(struct mm_struct *mm, struct vm_area_struct *vma);
 
 /* interval_tree.c */
 void vma_interval_tree_insert(struct vm_area_struct *node,
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 5561486fddef..87f4f53a3d7d 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -8,6 +8,7 @@
 #include <linux/mm_types.h>
 #include <linux/gfp.h>
 #include <linux/sync_core.h>
+#include <linux/maple_tree.h>
 
 /*
  * Routines for handling mm_structs
@@ -67,11 +68,19 @@ static inline void mmdrop(struct mm_struct *mm)
  */
 static inline void mmget(struct mm_struct *mm)
 {
+	mt_set_in_rcu(&mm->mm_mt);
 	atomic_inc(&mm->mm_users);
 }
 
 static inline bool mmget_not_zero(struct mm_struct *mm)
 {
+	/*
+	 * There is a race below during task tear down that can cause the maple
+	 * tree to enter rcu mode with only a single user.  If this race
+	 * happens, the result would be that the maple tree nodes would remain
+	 * active for an extra RCU read cycle.
+	 */
+	mt_set_in_rcu(&mm->mm_mt);
 	return atomic_inc_not_zero(&mm->mm_users);
 }
 
diff --git a/kernel/fork.c b/kernel/fork.c
index e8d602de40e8..b8b7063d94e8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -492,7 +492,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 	struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
 	struct rb_node **rb_link, *rb_parent;
 	int retval;
-	unsigned long charge;
+	unsigned long charge = 0;
+	MA_STATE(old_mas, &oldmm->mm_mt, 0, 0);
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
 	LIST_HEAD(uf);
 
 	uprobe_start_dup_mmap();
@@ -526,11 +528,19 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		goto out;
 
 	prev = NULL;
-	for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
+
+	retval = mas_entry_count(&mas, oldmm->map_count);
+	if (retval)
+		goto out;
+
+	rcu_read_lock();
+	mas_for_each(&old_mas, mpnt, ULONG_MAX) {
 		struct file *file;
 
+		rcu_read_unlock();
 		if (mpnt->vm_flags & VM_DONTCOPY) {
 			vm_stat_account(mm, mpnt->vm_flags, -vma_pages(mpnt));
+			rcu_read_lock();
 			continue;
 		}
 		charge = 0;
@@ -540,7 +550,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		 */
 		if (fatal_signal_pending(current)) {
 			retval = -EINTR;
-			goto out;
+			goto loop_out;
 		}
 		if (mpnt->vm_flags & VM_ACCOUNT) {
 			unsigned long len = vma_pages(mpnt);
@@ -568,6 +578,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 			tmp->anon_vma = NULL;
 		} else if (anon_vma_fork(tmp, mpnt))
 			goto fail_nomem_anon_vma_fork;
+
 		tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
 		file = tmp->vm_file;
 		if (file) {
@@ -606,7 +617,11 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		rb_parent = &tmp->vm_rb;
 
 		/* Link the vma into the MT */
-		vma_store(mm, tmp);
+		mas_lock(&mas);
+		mas.index = tmp->vm_start;
+		mas.last = tmp->vm_end - 1;
+		mas_store(&mas, tmp);
+		mas_unlock(&mas);
 
 		mm->map_count++;
 		if (!(tmp->vm_flags & VM_WIPEONFORK))
@@ -616,10 +631,17 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 			tmp->vm_ops->open(tmp);
 
 		if (retval)
-			goto out;
+			goto loop_out;
+
+		rcu_read_lock();
 	}
+	rcu_read_unlock();
 	/* a new mm has just been created */
 	retval = arch_dup_mmap(oldmm, mm);
+loop_out:
+	rcu_read_lock();
+	mas_destroy(&mas);
+	rcu_read_unlock();
 out:
 	mmap_write_unlock(mm);
 	flush_tlb_mm(oldmm);
@@ -635,7 +657,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 fail_nomem:
 	retval = -ENOMEM;
 	vm_unacct_memory(charge);
-	goto out;
+	goto loop_out;
 }
 
 static inline int mm_alloc_pgd(struct mm_struct *mm)
@@ -1112,6 +1134,7 @@ static inline void __mmput(struct mm_struct *mm)
 {
 	VM_BUG_ON(atomic_read(&mm->mm_users));
 
+	mt_clear_in_rcu(&mm->mm_mt);
 	uprobe_clear_state(mm);
 	exit_aio(mm);
 	ksm_exit(mm);
diff --git a/mm/mmap.c b/mm/mmap.c
index 08f7338ca4f0..3da639586715 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -782,10 +782,6 @@ void vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma)
 		GFP_KERNEL);
 }
 
-void vma_store(struct mm_struct *mm, struct vm_area_struct *vma) {
-	vma_mt_store(mm, vma);
-}
-
 static void
 __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct vm_area_struct *prev, struct rb_node **rb_link,
-- 
2.30.2


  parent reply	other threads:[~2021-10-05  1:35 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-05  1:30 [PATCH v3 00/66] Introducing the Maple Tree Liam Howlett
2021-10-05  1:30 ` [PATCH v3 01/66] radix tree test suite: Add pr_err define Liam Howlett
2021-10-05  1:30 ` [PATCH v3 03/66] radix tree test suite: Add allocation counts and size to kmem_cache Liam Howlett
2021-10-05  1:30 ` [PATCH v3 02/66] radix tree test suite: Add kmem_cache_set_non_kernel() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 04/66] radix tree test suite: Add support for slab bulk APIs Liam Howlett
2021-10-05  1:30 ` [PATCH v3 05/66] Maple Tree: Add new data structure Liam Howlett
2021-10-05  1:30 ` [PATCH v3 06/66] mm: Start tracking VMAs with maple tree Liam Howlett
2021-10-05  1:30 ` [PATCH v3 07/66] mm/mmap: Use the maple tree in find_vma() instead of the rbtree Liam Howlett
2021-10-05  1:30 ` [PATCH v3 09/66] mm/mmap: Use maple tree for unmapped_area{_topdown} Liam Howlett
2021-10-05  1:30 ` [PATCH v3 08/66] mm/mmap: Use the maple tree for find_vma_prev() instead of the rbtree Liam Howlett
2021-10-05  1:30 ` [PATCH v3 11/66] mm: Remove rb tree Liam Howlett
2021-10-05  1:30 ` Liam Howlett [this message]
2021-10-05  1:30 ` [PATCH v3 14/66] mm: Optimize find_exact_vma() to use vma_lookup() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 13/66] xen/privcmd: Optimized privcmd_ioctl_mmap() by using vma_lookup() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 12/66] mmap: Change zeroing of maple tree in __vma_adjust Liam Howlett
2021-10-05  1:30 ` [PATCH v3 16/66] mm/mmap: Change do_brk_flags() to expand existing VMA and add do_brk_munmap() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 15/66] mm/khugepaged: Optimize collapse_pte_mapped_thp() by using vma_lookup() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 18/66] mm/mmap: Use advanced maple tree API for mmap_region() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 17/66] mm: Use maple tree operations for find_vma_intersection() and find_vma() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 19/66] mm: Remove vmacache Liam Howlett
2021-10-05  1:30 ` [PATCH v3 20/66] mm/mmap: Move mmap_region() below do_munmap() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 21/66] mm/mmap: Convert count_vma_pages_range() to use ma_state Liam Howlett
2021-10-05  1:30 ` [PATCH v3 22/66] mm/mmap: Reorganize munmap to use maple states Liam Howlett
2021-10-05  1:30 ` [PATCH v3 23/66] mm/mmap: Change do_brk_munmap() to use do_mas_align_munmap() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 24/66] mm: Introduce vma_next() and vma_prev() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 26/66] arch/parisc: Remove mmap linked list from kernel/cache Liam Howlett
2021-10-05  1:30 ` [PATCH v3 25/66] arch/arm64: Remove mmap linked list from vdso Liam Howlett
2021-10-05  1:30 ` [PATCH v3 27/66] arch/powerpc: Remove mmap linked list from mm/book3s32/tlb Liam Howlett
2021-10-05  1:30 ` [PATCH v3 29/66] arch/s390: Use maple tree iterators instead of linked list Liam Howlett
2021-10-05  1:30 ` [PATCH v3 28/66] arch/powerpc: Remove mmap linked list from mm/book3s64/subpage_prot Liam Howlett
2021-10-05  1:30 ` [PATCH v3 30/66] arch/x86: Use maple tree iterators for vdso/vma Liam Howlett
2021-10-05  1:30 ` [PATCH v3 31/66] arch/xtensa: Use maple tree iterators for unmapped area Liam Howlett
2021-10-05  1:30 ` [PATCH v3 33/66] drivers/tee/optee: Use maple tree iterators for __check_mem_type() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 32/66] drivers/misc/cxl: Use maple tree iterators for cxl_prefault_vma() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 34/66] fs/binfmt_elf: Use maple tree iterators for fill_files_note() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 35/66] fs/coredump: Use maple tree iterators in place of linked list Liam Howlett
2021-10-05  1:30 ` [PATCH v3 37/66] fs/proc/base: " Liam Howlett
2021-10-05  1:30 ` [PATCH v3 36/66] fs/exec: Use vma_next() instead " Liam Howlett
2021-10-05  1:30 ` [PATCH v3 39/66] fs/userfaultfd: Stop using vma " Liam Howlett
2021-10-05  1:30 ` [PATCH v3 38/66] fs/proc/task_mmu: Stop using linked list and highest_vm_end Liam Howlett
2021-10-05  1:30 ` [PATCH v3 40/66] ipc/shm: Stop using the vma linked list Liam Howlett
2021-10-05  1:31 ` [PATCH v3 41/66] kernel/acct: Use maple tree iterators instead of " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 42/66] kernel/events/core: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 43/66] kernel/events/uprobes: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 45/66] kernel/fork: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 46/66] arch/um/kernel/tlb: Stop using " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 44/66] kernel/sched/fair: Use maple tree iterators instead of " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 47/66] bpf: Remove VMA " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 48/66] mm/gup: Use maple tree navigation instead of " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 49/66] mm/khugepaged: Use maple tree iterators instead of vma " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 50/66] mm/ksm: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 51/66] mm/madvise: Use vma_next " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 54/66] mm/mlock: Use maple tree iterators " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 52/66] mm/memcontrol: Stop using mm->highest_vm_end Liam Howlett
2021-10-05  1:31 ` [PATCH v3 53/66] mm/mempolicy: Use maple tree iterators instead of vma linked list Liam Howlett
2021-10-05  1:31 ` [PATCH v3 55/66] mm/mprotect: Use maple tree navigation " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 56/66] mm/mremap: Use vma_next() " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 57/66] mm/msync: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 58/66] mm/oom_kill: Use maple tree iterators " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 59/66] mm/pagewalk: Use vma_next() " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 60/66] mm/swapfile: Use maple tree iterator " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 61/66] damon: Change vma iterator to mas_for_each Liam Howlett
2021-10-05  1:31 ` [PATCH v3 63/66] s390: Use the maple tree iterator for vdso Liam Howlett
2021-10-05  1:31 ` [PATCH v3 62/66] powerpc: Use " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 64/66] i915: Use the " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 65/66] mm: Remove the vma linked list Liam Howlett
2021-10-05  1:31 ` [PATCH v3 66/66] mm/mmap: Drop range_has_overlap() function Liam Howlett

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211005012959.1110504-11-Liam.Howlett@oracle.com \
    --to=liam.howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=axelrasmussen@google.com \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dgilbert@interlog.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=maple-tree@lists.infradead.org \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=riel@surriel.com \
    --cc=rientjes@google.com \
    --cc=songliubraving@fb.com \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox