linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
To: linux-mm@kvack.org, Andrew Morton <akpm@linux-foundation.org>
Cc: Suren Baghdasaryan <surenb@google.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Lorenzo Stoakes <lstoakes@gmail.com>,
	Matthew Wilcox <willy@infradead.org>,
	sidhartha.kumar@oracle.com,
	"Paul E . McKenney" <paulmck@kernel.org>,
	Bert Karwatzki <spasswolf@web.de>, Jiri Olsa <olsajiri@gmail.com>,
	linux-kernel@vger.kernel.org, Kees Cook <kees@kernel.org>,
	"Liam R. Howlett" <Liam.Howlett@Oracle.com>
Subject: [PATCH v2 12/15] mm/mmap: Avoid zeroing vma tree in mmap_region()
Date: Tue, 25 Jun 2024 15:11:42 -0400	[thread overview]
Message-ID: <20240625191145.3382793-13-Liam.Howlett@oracle.com> (raw)
In-Reply-To: <20240625191145.3382793-1-Liam.Howlett@oracle.com>

From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>

Instead of zeroing the vma tree and then overwriting the area, let the
area be overwritten and then clean up the gathered vmas using
vms_complete_munmap_vmas().

Temporarily keep track of the number of pages that will be removed and
reduce the charged amount.

This also drops the validate_mm() call in the vma_expand() function.
This is necessary as it would fail since the mm map_count would be
incorrect during a vma expansion, prior to the cleanup from
vms_complete_munmap_vmas().

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
 mm/internal.h |  1 +
 mm/mmap.c     | 65 +++++++++++++++++++++++++++++++--------------------
 2 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index b0300cb22353..2ad6310059eb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1502,6 +1502,7 @@ struct vma_munmap_struct {
 	unsigned long stack_vm;
 	unsigned long data_vm;
 	bool unlock;			/* Unlock after the munmap */
+	bool cleared_ptes;		/* If the PTE are cleared already */
 };
 
 void __meminit __init_single_page(struct page *page, unsigned long pfn,
diff --git a/mm/mmap.c b/mm/mmap.c
index 5efcba084e12..b7f47964aaf0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -406,17 +406,21 @@ anon_vma_interval_tree_post_update_vma(struct vm_area_struct *vma)
 }
 
 static unsigned long count_vma_pages_range(struct mm_struct *mm,
-		unsigned long addr, unsigned long end)
+		unsigned long addr, unsigned long end,
+		unsigned long *nr_accounted)
 {
 	VMA_ITERATOR(vmi, mm, addr);
 	struct vm_area_struct *vma;
 	unsigned long nr_pages = 0;
 
+	*nr_accounted = 0;
 	for_each_vma_range(vmi, vma, end) {
 		unsigned long vm_start = max(addr, vma->vm_start);
 		unsigned long vm_end = min(end, vma->vm_end);
 
 		nr_pages += PHYS_PFN(vm_end - vm_start);
+		if (vma->vm_flags & VM_ACCOUNT)
+			*nr_accounted += PHYS_PFN(vm_end - vm_start);
 	}
 
 	return nr_pages;
@@ -527,6 +531,7 @@ static inline void init_vma_munmap(struct vma_munmap_struct *vms,
 	vms->exec_vm = vms->stack_vm = vms->data_vm = 0;
 	vms->unmap_start = FIRST_USER_ADDRESS;
 	vms->unmap_end = USER_PGTABLES_CEILING;
+	vms->cleared_ptes = false;
 }
 
 /*
@@ -735,7 +740,6 @@ int vma_expand(struct vma_iterator *vmi, struct vm_area_struct *vma,
 	vma_iter_store(vmi, vma);
 
 	vma_complete(&vp, vmi, vma->vm_mm);
-	validate_mm(vma->vm_mm);
 	return 0;
 
 nomem:
@@ -2631,6 +2635,8 @@ static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
 	if (vms->unlock)
 		mmap_write_downgrade(mm);
 
+	if (vms->cleared_ptes)
+		goto cleared_ptes;
 	/*
 	 * We can free page tables without write-locking mmap_lock because VMAs
 	 * were isolated before we downgraded mmap_lock.
@@ -2639,6 +2645,7 @@ static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms,
 	unmap_region(mm, mas_detach, vms->vma, vms->prev, vms->next,
 		     vms->unmap_start, vms->unmap_end, vms->vma_count,
 		     !vms->unlock);
+cleared_ptes:
 	/* Update high watermark before we lower total_vm */
 	update_hiwater_vm(mm);
 	/* Stat accounting */
@@ -2927,24 +2934,19 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long merge_start = addr, merge_end = end;
 	bool writable_file_mapping = false;
 	pgoff_t vm_pgoff;
-	int error;
+	int error = -ENOMEM;
 	VMA_ITERATOR(vmi, mm, addr);
+	unsigned long nr_pages, nr_accounted;
 
-	/* Check against address space limit. */
-	if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
-		unsigned long nr_pages;
-
-		/*
-		 * MAP_FIXED may remove pages of mappings that intersects with
-		 * requested mapping. Account for the pages it would unmap.
-		 */
-		nr_pages = count_vma_pages_range(mm, addr, end);
-
-		if (!may_expand_vm(mm, vm_flags,
-					(len >> PAGE_SHIFT) - nr_pages))
-			return -ENOMEM;
-	}
+	nr_pages = count_vma_pages_range(mm, addr, end, &nr_accounted);
 
+	/* Check against address space limit. */
+	/*
+	 * MAP_FIXED may remove pages of mappings that intersects with requested
+	 * mapping. Account for the pages it would unmap.
+	 */
+	if (!may_expand_vm(mm, vm_flags, (len >> PAGE_SHIFT) - nr_pages))
+		return -ENOMEM;
 
 	if (unlikely(!can_modify_mm(mm, addr, end)))
 		return -EPERM;
@@ -2962,14 +2964,12 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		if (vms_gather_munmap_vmas(&vms, &mas_detach))
 			return -ENOMEM;
 
-		if (vma_iter_clear_gfp(&vmi, addr, end, GFP_KERNEL))
-			return -ENOMEM;
-
-		vms_complete_munmap_vmas(&vms, &mas_detach);
 		next = vms.next;
 		prev = vms.prev;
 		vma = NULL;
 	} else {
+		/* Minimal setup of vms */
+		vms.nr_pages = 0;
 		next = vma_next(&vmi);
 		prev = vma_prev(&vmi);
 		if (prev)
@@ -2981,8 +2981,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	 */
 	if (accountable_mapping(file, vm_flags)) {
 		charged = len >> PAGE_SHIFT;
+		charged -= nr_accounted;
 		if (security_vm_enough_memory_mm(mm, charged))
-			return -ENOMEM;
+			goto abort_munmap;
+		vms.nr_accounted = 0;
 		vm_flags |= VM_ACCOUNT;
 	}
 
@@ -3031,10 +3033,8 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	 * not unmapped, but the maps are removed from the list.
 	 */
 	vma = vm_area_alloc(mm);
-	if (!vma) {
-		error = -ENOMEM;
+	if (!vma)
 		goto unacct_error;
-	}
 
 	vma_iter_config(&vmi, addr, end);
 	vma_set_range(vma, addr, end, pgoff);
@@ -3043,6 +3043,14 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 
 	if (file) {
 		vma->vm_file = get_file(file);
+		/* This may map PTE, so ensure there are not existing PTE */
+		if (vms.nr_pages) {
+			mas_set(&mas_detach, 1);
+			unmap_region(mm, &mas_detach, vms.vma, prev, next,
+				     vms.unmap_start, vms.unmap_end,
+				     vms.vma_count, /*mm_wr_locked = */ true);
+			vms.cleared_ptes = true;
+		}
 		error = call_mmap(file, vma);
 		if (error)
 			goto unmap_and_free_vma;
@@ -3133,6 +3141,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 expanded:
 	perf_event_mmap(vma);
 
+	if (vms.nr_pages)
+		vms_complete_munmap_vmas(&vms, &mas_detach);
+
 	vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
 		if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) ||
@@ -3181,6 +3192,10 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 unacct_error:
 	if (charged)
 		vm_unacct_memory(charged);
+
+abort_munmap:
+	if (vms.nr_pages)
+		abort_munmap_vmas(&mas_detach);
 	validate_mm(mm);
 	return error;
 }
-- 
2.43.0



  parent reply	other threads:[~2024-06-25 19:12 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-25 19:11 [PATCH v2 00/15] Avoid MAP_FIXED gap exposure Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 01/15] mm/mmap: Correctly position vma_iterator in __split_vma() Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 02/15] mm/mmap: Introduce abort_munmap_vmas() Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 03/15] mm/mmap: Introduce vmi_complete_munmap_vmas() Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 04/15] mm/mmap: Extract the gathering of vmas from do_vmi_align_munmap() Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 05/15] mm/mmap: Introduce vma_munmap_struct for use in munmap operations Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 06/15] mm/mmap: Change munmap to use vma_munmap_struct() for accounting and surrounding vmas Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 07/15] mm/mmap: Extract validate_mm() from vma_complete() Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 08/15] mm/mmap: Inline munmap operation in mmap_region() Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 09/15] mm/mmap: Expand mmap_region() munmap call Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 10/15] mm/mmap: Reposition vma iterator in mmap_region() Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 11/15] mm/mmap: Track start and end of munmap in vma_munmap_struct Liam R. Howlett
2024-06-25 19:11 ` Liam R. Howlett [this message]
2024-06-25 19:11 ` [PATCH v2 13/15] mm/mmap: Use PHYS_PFN in mmap_region() Liam R. Howlett
2024-06-25 19:11 ` [PATCH v2 14/15] mm/mmap: Use vms accounted pages " Liam R. Howlett
2024-06-26 16:32   ` Kees Cook
2024-06-26 18:04     ` Liam R. Howlett
2024-06-26 18:45       ` Kees Cook
2024-06-25 19:11 ` [PATCH v2 15/15] mm/mmap: Move may_expand_vm() check " Liam R. Howlett
2024-06-26 20:58 ` [PATCH v2 00/15] Avoid MAP_FIXED gap exposure Andrew Morton
2024-06-27  1:15   ` Liam R. Howlett
2024-06-27  1:28     ` Andrew Morton
2024-06-27 13:31       ` Liam R. Howlett

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240625191145.3382793-13-Liam.Howlett@oracle.com \
    --to=liam.howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=kees@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lstoakes@gmail.com \
    --cc=olsajiri@gmail.com \
    --cc=paulmck@kernel.org \
    --cc=sidhartha.kumar@oracle.com \
    --cc=spasswolf@web.de \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox