linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH V1 6.1.y] Revert "mm/mprotect: delete pmd_none_or_clear_bad_unless_trans_huge()"
@ 2026-01-16  3:38 Harry Yoo
  2026-02-03 15:59 ` Patch "Revert "mm/mprotect: delete pmd_none_or_clear_bad_unless_trans_huge()"" has been added to the 6.1-stable tree gregkh
  0 siblings, 1 reply; 2+ messages in thread
From: Harry Yoo @ 2026-01-16  3:38 UTC (permalink / raw)
  To: Greg Kroah-Hartman, stable
  Cc: Liam.Howlett, akpm, david, hughd, jannh, linux-mm,
	lorenzo.stoakes, pfalcato, vbabka, Harry Yoo

This reverts commit 91750c8a4be42d73b6810a1c35d73c8a3cd0b481 which is
commit 670ddd8cdcbd1d07a4571266ae3517f821728c3a upstream.

While the commit fixes a race condition between NUMA balancing and THP
migration, it causes a NULL-pointer-deref when the pmd temporarily
transitions from pmd_trans_huge() to pmd_none(). Verifying whether the
pmd value has changed under page table lock does not prevent the crash,
as it occurs when acquiring the lock.

Since the original issue addressed by the commit is quite rare and
non-fatal, revert the commit. A better backport solution that more
closely matches the upstream semantics will be provided as a follow-up.

Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
---
 mm/mprotect.c | 101 +++++++++++++++++++++++++++++---------------------
 1 file changed, 58 insertions(+), 43 deletions(-)

diff --git a/mm/mprotect.c b/mm/mprotect.c
index f09229fbcf6c9..8216f4018ee75 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -73,12 +73,10 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma,
 }
 
 static long change_pte_range(struct mmu_gather *tlb,
-		struct vm_area_struct *vma, pmd_t *pmd, pmd_t pmd_old,
-		unsigned long addr, unsigned long end, pgprot_t newprot,
-		unsigned long cp_flags)
+		struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
+		unsigned long end, pgprot_t newprot, unsigned long cp_flags)
 {
 	pte_t *pte, oldpte;
-	pmd_t _pmd;
 	spinlock_t *ptl;
 	long pages = 0;
 	int target_node = NUMA_NO_NODE;
@@ -88,15 +86,21 @@ static long change_pte_range(struct mmu_gather *tlb,
 
 	tlb_change_page_size(tlb, PAGE_SIZE);
 
+	/*
+	 * Can be called with only the mmap_lock for reading by
+	 * prot_numa so we must check the pmd isn't constantly
+	 * changing from under us from pmd_none to pmd_trans_huge
+	 * and/or the other way around.
+	 */
+	if (pmd_trans_unstable(pmd))
+		return 0;
+
+	/*
+	 * The pmd points to a regular pte so the pmd can't change
+	 * from under us even if the mmap_lock is only hold for
+	 * reading.
+	 */
 	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-	/* Make sure pmd didn't change after acquiring ptl */
-	_pmd = pmd_read_atomic(pmd);
-	/* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
-	barrier();
-	if (!pmd_same(pmd_old, _pmd)) {
-		pte_unmap_unlock(pte, ptl);
-		return -EAGAIN;
-	}
 
 	/* Get target node for single threaded private VMAs */
 	if (prot_numa && !(vma->vm_flags & VM_SHARED) &&
@@ -284,6 +288,31 @@ static long change_pte_range(struct mmu_gather *tlb,
 	return pages;
 }
 
+/*
+ * Used when setting automatic NUMA hinting protection where it is
+ * critical that a numa hinting PMD is not confused with a bad PMD.
+ */
+static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
+{
+	pmd_t pmdval = pmd_read_atomic(pmd);
+
+	/* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	barrier();
+#endif
+
+	if (pmd_none(pmdval))
+		return 1;
+	if (pmd_trans_huge(pmdval))
+		return 0;
+	if (unlikely(pmd_bad(pmdval))) {
+		pmd_clear_bad(pmd);
+		return 1;
+	}
+
+	return 0;
+}
+
 /* Return true if we're uffd wr-protecting file-backed memory, or false */
 static inline bool
 uffd_wp_protect_file(struct vm_area_struct *vma, unsigned long cp_flags)
@@ -331,34 +360,22 @@ static inline long change_pmd_range(struct mmu_gather *tlb,
 
 	pmd = pmd_offset(pud, addr);
 	do {
-		long ret;
-		pmd_t _pmd;
-again:
+		long this_pages;
+
 		next = pmd_addr_end(addr, end);
-		_pmd = pmd_read_atomic(pmd);
-		/* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		barrier();
-#endif
 
 		change_pmd_prepare(vma, pmd, cp_flags);
 		/*
 		 * Automatic NUMA balancing walks the tables with mmap_lock
 		 * held for read. It's possible a parallel update to occur
-		 * between pmd_trans_huge(), is_swap_pmd(), and
-		 * a pmd_none_or_clear_bad() check leading to a false positive
-		 * and clearing. Hence, it's necessary to atomically read
-		 * the PMD value for all the checks.
+		 * between pmd_trans_huge() and a pmd_none_or_clear_bad()
+		 * check leading to a false positive and clearing.
+		 * Hence, it's necessary to atomically read the PMD value
+		 * for all the checks.
 		 */
-		if (!is_swap_pmd(_pmd) && !pmd_devmap(_pmd) && !pmd_trans_huge(_pmd)) {
-			if (pmd_none(_pmd))
-				goto next;
-
-			if (pmd_bad(_pmd)) {
-				pmd_clear_bad(pmd);
-				goto next;
-			}
-		}
+		if (!is_swap_pmd(*pmd) && !pmd_devmap(*pmd) &&
+		     pmd_none_or_clear_bad_unless_trans_huge(pmd))
+			goto next;
 
 		/* invoke the mmu notifier if the pmd is populated */
 		if (!range.start) {
@@ -368,7 +385,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb,
 			mmu_notifier_invalidate_range_start(&range);
 		}
 
-		if (is_swap_pmd(_pmd) || pmd_trans_huge(_pmd) || pmd_devmap(_pmd)) {
+		if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) {
 			if ((next - addr != HPAGE_PMD_SIZE) ||
 			    uffd_wp_protect_file(vma, cp_flags)) {
 				__split_huge_pmd(vma, pmd, addr, false, NULL);
@@ -383,11 +400,11 @@ static inline long change_pmd_range(struct mmu_gather *tlb,
 				 * change_huge_pmd() does not defer TLB flushes,
 				 * so no need to propagate the tlb argument.
 				 */
-				ret = change_huge_pmd(tlb, vma, pmd,
-						      addr, newprot, cp_flags);
+				int nr_ptes = change_huge_pmd(tlb, vma, pmd,
+						addr, newprot, cp_flags);
 
-				if (ret) {
-					if (ret == HPAGE_PMD_NR) {
+				if (nr_ptes) {
+					if (nr_ptes == HPAGE_PMD_NR) {
 						pages += HPAGE_PMD_NR;
 						nr_huge_updates++;
 					}
@@ -398,11 +415,9 @@ static inline long change_pmd_range(struct mmu_gather *tlb,
 			}
 			/* fall through, the trans huge pmd just split */
 		}
-		ret = change_pte_range(tlb, vma, pmd, _pmd, addr, next,
-				       newprot, cp_flags);
-		if (ret < 0)
-			goto again;
-		pages += ret;
+		this_pages = change_pte_range(tlb, vma, pmd, addr, next,
+					      newprot, cp_flags);
+		pages += this_pages;
 next:
 		cond_resched();
 	} while (pmd++, addr = next, addr != end);
-- 
2.43.0



^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2026-02-03 16:00 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-01-16  3:38 [PATCH V1 6.1.y] Revert "mm/mprotect: delete pmd_none_or_clear_bad_unless_trans_huge()" Harry Yoo
2026-02-03 15:59 ` Patch "Revert "mm/mprotect: delete pmd_none_or_clear_bad_unless_trans_huge()"" has been added to the 6.1-stable tree gregkh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox