linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Liam R. Howlett" <Liam.Howlett@oracle.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Suren Baghdasaryan <surenb@google.com>,
	Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	Pedro Falcato <pfalcato@suse.de>,
	David Hildenbrand <david@redhat.com>,
	Vlastimil Babka <vbabka@suse.cz>, Michal Hocko <mhocko@suse.com>,
	Jann Horn <jannh@google.com>,
	shikemeng@huaweicloud.com, kasong@tencent.com, nphamcs@gmail.com,
	bhe@redhat.com, baohua@kernel.org, chrisl@kernel.org,
	Matthew Wilcox <willy@infradead.org>,
	"Liam R. Howlett" <Liam.Howlett@oracle.com>
Subject: [PATCH v3 05/11] mm/memory: Add tree limit to free_pgtables()
Date: Wed, 21 Jan 2026 11:49:40 -0500	[thread overview]
Message-ID: <20260121164946.2093480-6-Liam.Howlett@oracle.com> (raw)
In-Reply-To: <20260121164946.2093480-1-Liam.Howlett@oracle.com>

The ceiling and tree search limit need to be different arguments for the
future change in the failed fork attempt.  The ceiling and floor
variables are not very descriptive, so change them to pg_start/pg_end.

Adding a new variable for the vma_end to the function as it will differ
from the pg_end in the later patches in the series.

Add a kernel doc about the free_pgtables() function.

Test code also updated.

No functional changes intended.

Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Reviewed-by: Pedro Falcato <pfalcato@suse.de>
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
---
 mm/internal.h                    |  6 +++--
 mm/memory.c                      | 42 +++++++++++++++++++++++++-------
 mm/mmap.c                        |  2 +-
 mm/vma.c                         |  3 ++-
 tools/testing/vma/vma_internal.h |  3 ++-
 5 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 5afe55751fe08..2cdc5c9396f10 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -510,8 +510,10 @@ void deactivate_file_folio(struct folio *folio);
 void folio_activate(struct folio *folio);
 
 void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
-		   struct vm_area_struct *start_vma, unsigned long floor,
-		   unsigned long ceiling, bool mm_wr_locked);
+		   struct vm_area_struct *vma, unsigned long pg_start,
+		   unsigned long pg_end, unsigned long vma_end,
+		   bool mm_wr_locked);
+
 void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte);
 
 struct zap_details;
diff --git a/mm/memory.c b/mm/memory.c
index 4b0790c8fa48e..9043cfda65b94 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -370,23 +370,47 @@ void free_pgd_range(struct mmu_gather *tlb,
 	} while (pgd++, addr = next, addr != end);
 }
 
+/**
+ * free_pgtables() - Free a range of page tables
+ * @tlb: The mmu gather
+ * @mas: The maple state
+ * @vma: The first vma
+ * @pg_start: The lowest page table address (floor)
+ * @pg_end: The highest page table address (ceiling)
+ * @vma_end: The highest vma tree search address
+ * @mm_wr_locked: boolean indicating if the mm is write locked
+ *
+ * Note: pg_start and pg_end are provided to indicate the absolute range of the
+ * page tables that should be removed.  This can differ from the vma mappings on
+ * some archs that may have mappings that need to be removed outside the vmas.
+ * Note that the prev->vm_end and next->vm_start are often used.
+ *
+ * The vma_end differs from the pg_end when a dup_mmap() failed and the tree has
+ * unrelated data to the mm_struct being torn down.
+ */
 void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
-		   struct vm_area_struct *vma, unsigned long floor,
-		   unsigned long ceiling, bool mm_wr_locked)
+		   struct vm_area_struct *vma, unsigned long pg_start,
+		   unsigned long pg_end, unsigned long vma_end,
+		   bool mm_wr_locked)
 {
 	struct unlink_vma_file_batch vb;
 
+	/*
+	 * Note: USER_PGTABLES_CEILING may be passed as the value of pg_end and
+	 * may be 0.  Underflow is expected in this case.  Otherwise the
+	 * pagetable end is exclusive.
+	 * vma_end is exclusive.
+	 * The last vma address should never be larger than the pagetable end.
+	 */
+	WARN_ON_ONCE(vma_end - 1 > pg_end - 1);
+
 	tlb_free_vmas(tlb);
 
 	do {
 		unsigned long addr = vma->vm_start;
 		struct vm_area_struct *next;
 
-		/*
-		 * Note: USER_PGTABLES_CEILING may be passed as ceiling and may
-		 * be 0.  This will underflow and is okay.
-		 */
-		next = mas_find(mas, ceiling - 1);
+		next = mas_find(mas, vma_end - 1);
 		if (unlikely(xa_is_zero(next)))
 			next = NULL;
 
@@ -406,7 +430,7 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
 		 */
 		while (next && next->vm_start <= vma->vm_end + PMD_SIZE) {
 			vma = next;
-			next = mas_find(mas, ceiling - 1);
+			next = mas_find(mas, vma_end - 1);
 			if (unlikely(xa_is_zero(next)))
 				next = NULL;
 			if (mm_wr_locked)
@@ -417,7 +441,7 @@ void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
 		unlink_file_vma_batch_final(&vb);
 
 		free_pgd_range(tlb, addr, vma->vm_end,
-			floor, next ? next->vm_start : ceiling);
+			pg_start, next ? next->vm_start : pg_end);
 		vma = next;
 	} while (vma);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 9c8adc505d3de..827a64cdcc681 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1308,7 +1308,7 @@ void exit_mmap(struct mm_struct *mm)
 	mt_clear_in_rcu(&mm->mm_mt);
 	vma_iter_set(&vmi, vma->vm_end);
 	free_pgtables(&tlb, &vmi.mas, vma, FIRST_USER_ADDRESS,
-		      USER_PGTABLES_CEILING, true);
+		      USER_PGTABLES_CEILING, USER_PGTABLES_CEILING, true);
 	tlb_finish_mmu(&tlb);
 
 	/*
diff --git a/mm/vma.c b/mm/vma.c
index 0c35cdc0d3b7b..b2b9e7b3284f3 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -484,6 +484,7 @@ void unmap_region(struct ma_state *mas, struct vm_area_struct *vma,
 	unmap_vmas(&tlb, mas, vma, vma_start, vma_end, vma_end);
 	mas_set(mas, vma->vm_end);
 	free_pgtables(&tlb, mas, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+		      next ? next->vm_start : USER_PGTABLES_CEILING,
 		      next ? next->vm_start : USER_PGTABLES_CEILING,
 		      /* mm_wr_locked = */ true);
 	tlb_finish_mmu(&tlb);
@@ -1275,7 +1276,7 @@ static inline void vms_clear_ptes(struct vma_munmap_struct *vms,
 	mas_set(mas_detach, 1);
 	/* start and end may be different if there is no prev or next vma. */
 	free_pgtables(&tlb, mas_detach, vms->vma, vms->unmap_start,
-		      vms->unmap_end, mm_wr_locked);
+		      vms->unmap_end, vms->unmap_end, mm_wr_locked);
 	tlb_finish_mmu(&tlb);
 	vms->clear_ptes = false;
 }
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index 7fa56dcc53a6b..f50b8ddee6120 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -1139,7 +1139,8 @@ static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas,
 
 static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas,
 		   struct vm_area_struct *vma, unsigned long floor,
-		   unsigned long ceiling, bool mm_wr_locked)
+		   unsigned long ceiling, unsigned long tree_max,
+		   bool mm_wr_locked)
 {
 }
 
-- 
2.47.3



  parent reply	other threads:[~2026-01-21 16:51 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-21 16:49 [PATCH v3 00/11] Remove XA_ZERO from error recovery of dup_mmap() Liam R. Howlett
2026-01-21 16:49 ` [PATCH v3 01/11] mm: Relocate the page table ceiling and floor definitions Liam R. Howlett
2026-01-21 17:26   ` SeongJae Park
2026-01-21 18:52     ` Liam R. Howlett
2026-01-21 16:49 ` [PATCH v3 02/11] mm/mmap: Move exit_mmap() trace point Liam R. Howlett
2026-01-21 16:49 ` [PATCH v3 03/11] mm/mmap: Abstract vma clean up from exit_mmap() Liam R. Howlett
2026-01-21 16:49 ` [PATCH v3 04/11] mm/vma: Add limits to unmap_region() for vmas Liam R. Howlett
2026-01-21 16:49 ` Liam R. Howlett [this message]
2026-01-21 16:49 ` [PATCH v3 06/11] mm/vma: Add page table limit to unmap_region() Liam R. Howlett
2026-01-21 16:49 ` [PATCH v3 07/11] mm: Change dup_mmap() recovery Liam R. Howlett
2026-01-21 16:49 ` [PATCH v3 08/11] mm: Introduce unmap_desc struct to reduce function arguments Liam R. Howlett
2026-01-21 16:49 ` [PATCH v3 09/11] mm/vma: Use unmap_desc in exit_mmap() and vms_clear_ptes() Liam R. Howlett
2026-01-21 16:49 ` [PATCH v3 10/11] mm/vma: Use unmap_region() in vms_clear_ptes() Liam R. Howlett
2026-01-21 16:49 ` [PATCH v3 11/11] mm: Use unmap_desc struct for freeing page tables Liam R. Howlett
2026-01-24 18:45   ` Chris Mason
2026-02-07 12:37     ` Liam R. Howlett
2026-02-10 20:41       ` Liam R. Howlett
2026-02-10 21:42   ` [PATCH] mm: Fix up unmap desc use on exit_mmap() Liam R. Howlett
2026-01-21 18:59 ` [PATCH v3 00/11] Remove XA_ZERO from error recovery of dup_mmap() Andrew Morton
2026-01-21 19:34   ` Liam R. Howlett

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260121164946.2093480-6-Liam.Howlett@oracle.com \
    --to=liam.howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=baohua@kernel.org \
    --cc=bhe@redhat.com \
    --cc=chrisl@kernel.org \
    --cc=david@redhat.com \
    --cc=jannh@google.com \
    --cc=kasong@tencent.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=nphamcs@gmail.com \
    --cc=pfalcato@suse.de \
    --cc=shikemeng@huaweicloud.com \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox