linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ryan Roberts <ryan.roberts@arm.com>
To: David Hildenbrand <david@redhat.com>,
	Mark Rutland <mark.rutland@arm.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@kernel.org>, Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Muchun Song <muchun.song@linux.dev>
Cc: Ryan Roberts <ryan.roberts@arm.com>,
	linux-arm-kernel@lists.infradead.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [RFC PATCH v1 3/4] mm/memory: Use ptep_get_lockless_norecency() for orig_pte
Date: Thu, 15 Feb 2024 12:17:55 +0000	[thread overview]
Message-ID: <20240215121756.2734131-4-ryan.roberts@arm.com> (raw)
In-Reply-To: <20240215121756.2734131-1-ryan.roberts@arm.com>

Let's convert handle_pte_fault()'s use of ptep_get_lockless() to
ptep_get_lockless_norecency() to save orig_pte.

There are a number of places that follow this model:

    orig_pte = ptep_get_lockless(ptep)
    ...
    <lock>
    if (!pte_same(orig_pte, ptep_get(ptep)))
            // RACE!
    ...
    <unlock>

So we need to be careful to convert all of those to use
pte_same_norecency() so that the access and dirty bits are excluded from
the comparison.

Additionally there are a couple of places that genuinely rely on the
access and dirty bits of orig_pte, but with some careful refactoring, we
can use ptep_get() once we are holding the lock to achieve equivalent
logic.

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
 mm/memory.c | 55 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 8e65fa1884f1..075245314ec3 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3014,7 +3014,7 @@ static inline int pte_unmap_same(struct vm_fault *vmf)
 #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPTION)
 	if (sizeof(pte_t) > sizeof(unsigned long)) {
 		spin_lock(vmf->ptl);
-		same = pte_same(ptep_get(vmf->pte), vmf->orig_pte);
+		same = pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte);
 		spin_unlock(vmf->ptl);
 	}
 #endif
@@ -3062,11 +3062,14 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,
 	 * take a double page fault, so mark it accessed here.
 	 */
 	vmf->pte = NULL;
-	if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) {
+	if (!arch_has_hw_pte_young()) {
 		pte_t entry;

 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
-		if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+		if (likely(vmf->pte))
+			entry = ptep_get(vmf->pte);
+		if (unlikely(!vmf->pte ||
+			     !pte_same_norecency(entry, vmf->orig_pte))) {
 			/*
 			 * Other thread has already handled the fault
 			 * and update local tlb only
@@ -3077,9 +3080,11 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,
 			goto pte_unlock;
 		}

-		entry = pte_mkyoung(vmf->orig_pte);
-		if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))
-			update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
+		if (!pte_young(entry)) {
+			entry = pte_mkyoung(entry);
+			if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0))
+				update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1);
+		}
 	}

 	/*
@@ -3094,7 +3099,8 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,

 		/* Re-validate under PTL if the page is still mapped */
 		vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl);
-		if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+		if (unlikely(!vmf->pte ||
+			     !pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte))) {
 			/* The PTE changed under us, update local tlb */
 			if (vmf->pte)
 				update_mmu_tlb(vma, addr, vmf->pte);
@@ -3369,14 +3375,17 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 	 * Re-check the pte - we dropped the lock
 	 */
 	vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl);
-	if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+	if (likely(vmf->pte))
+		entry = ptep_get(vmf->pte);
+	if (likely(vmf->pte && pte_same_norecency(entry, vmf->orig_pte))) {
 		if (old_folio) {
 			if (!folio_test_anon(old_folio)) {
 				dec_mm_counter(mm, mm_counter_file(old_folio));
 				inc_mm_counter(mm, MM_ANONPAGES);
 			}
 		} else {
-			ksm_might_unmap_zero_page(mm, vmf->orig_pte);
+			/* Needs dirty bit so can't use vmf->orig_pte. */
+			ksm_might_unmap_zero_page(mm, entry);
 			inc_mm_counter(mm, MM_ANONPAGES);
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
@@ -3494,7 +3503,7 @@ static vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf, struct folio *folio
 	 * We might have raced with another page fault while we released the
 	 * pte_offset_map_lock.
 	 */
-	if (!pte_same(ptep_get(vmf->pte), vmf->orig_pte)) {
+	if (!pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte)) {
 		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		return VM_FAULT_NOPAGE;
@@ -3883,7 +3892,8 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf)

 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 				&vmf->ptl);
-	if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
+	if (likely(vmf->pte &&
+		   pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte)))
 		restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte);

 	if (vmf->pte)
@@ -3928,7 +3938,7 @@ static vm_fault_t pte_marker_clear(struct vm_fault *vmf)
 	 * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_POISONED.
 	 * So is_pte_marker() check is not enough to safely drop the pte.
 	 */
-	if (pte_same(vmf->orig_pte, ptep_get(vmf->pte)))
+	if (pte_same_norecency(vmf->orig_pte, ptep_get(vmf->pte)))
 		pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte);
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
 	return 0;
@@ -4028,8 +4038,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
 			if (unlikely(!vmf->pte ||
-				     !pte_same(ptep_get(vmf->pte),
-							vmf->orig_pte)))
+				     !pte_same_norecency(ptep_get(vmf->pte),
+							 vmf->orig_pte)))
 				goto unlock;

 			/*
@@ -4117,7 +4127,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
 					vmf->address, &vmf->ptl);
 			if (likely(vmf->pte &&
-				   pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
+				   pte_same_norecency(ptep_get(vmf->pte),
+						      vmf->orig_pte)))
 				ret = VM_FAULT_OOM;
 			goto unlock;
 		}
@@ -4187,7 +4198,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	 */
 	vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address,
 			&vmf->ptl);
-	if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
+	if (unlikely(!vmf->pte ||
+		     !pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte)))
 		goto out_nomap;

 	if (unlikely(!folio_test_uptodate(folio))) {
@@ -4747,7 +4759,7 @@ void set_pte_range(struct vm_fault *vmf, struct folio *folio,
 static bool vmf_pte_changed(struct vm_fault *vmf)
 {
 	if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)
-		return !pte_same(ptep_get(vmf->pte), vmf->orig_pte);
+		return !pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte);

 	return !pte_none(ptep_get(vmf->pte));
 }
@@ -5125,7 +5137,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	 * the pfn may be screwed if the read is non atomic.
 	 */
 	spin_lock(vmf->ptl);
-	if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+	if (unlikely(!pte_same_norecency(ptep_get(vmf->pte), vmf->orig_pte))) {
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		goto out;
 	}
@@ -5197,7 +5209,8 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 					       vmf->address, &vmf->ptl);
 		if (unlikely(!vmf->pte))
 			goto out;
-		if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+		if (unlikely(!pte_same_norecency(ptep_get(vmf->pte),
+							  vmf->orig_pte))) {
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
 			goto out;
 		}
@@ -5343,7 +5356,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 						 vmf->address, &vmf->ptl);
 		if (unlikely(!vmf->pte))
 			return 0;
-		vmf->orig_pte = ptep_get_lockless(vmf->pte);
+		vmf->orig_pte = ptep_get_lockless_norecency(vmf->pte);
 		vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID;

 		if (pte_none(vmf->orig_pte)) {
@@ -5363,7 +5376,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)

 	spin_lock(vmf->ptl);
 	entry = vmf->orig_pte;
-	if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) {
+	if (unlikely(!pte_same_norecency(ptep_get(vmf->pte), entry))) {
 		update_mmu_tlb(vmf->vma, vmf->address, vmf->pte);
 		goto unlock;
 	}
--
2.25.1



  parent reply	other threads:[~2024-02-15 12:18 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-15 12:17 [RFC PATCH v1 0/4] Reduce cost of ptep_get_lockless on arm64 Ryan Roberts
2024-02-15 12:17 ` [RFC PATCH v1 1/4] mm: Introduce ptep_get_lockless_norecency() Ryan Roberts
     [not found]   ` <7aefa967-43aa-490b-ae0d-7d1455402e89@redhat.com>
2024-03-26 16:39     ` Ryan Roberts
2024-03-27  9:28       ` David Hildenbrand
2024-03-27  9:57         ` Ryan Roberts
2024-03-27 17:02           ` David Hildenbrand
2024-02-15 12:17 ` [RFC PATCH v1 2/4] mm/gup: Use ptep_get_lockless_norecency() Ryan Roberts
2024-03-26 16:30   ` David Hildenbrand
2024-03-26 16:48     ` Ryan Roberts
2024-02-15 12:17 ` Ryan Roberts [this message]
2024-03-26 17:02   ` [RFC PATCH v1 3/4] mm/memory: Use ptep_get_lockless_norecency() for orig_pte David Hildenbrand
2024-03-26 17:27     ` Ryan Roberts
2024-03-26 17:38       ` David Hildenbrand
2024-03-26 17:48         ` Ryan Roberts
2024-03-26 17:58           ` David Hildenbrand
2024-03-27  9:51             ` Ryan Roberts
2024-03-27 17:05               ` David Hildenbrand
2024-02-15 12:17 ` [RFC PATCH v1 4/4] arm64/mm: Override ptep_get_lockless_norecency() Ryan Roberts
2024-03-26 16:35   ` David Hildenbrand
2024-03-26 16:17 ` [RFC PATCH v1 0/4] Reduce cost of ptep_get_lockless on arm64 David Hildenbrand
2024-03-26 16:31   ` Ryan Roberts
     [not found]     ` <de143212-49ce-4c30-8bfa-4c0ff613f107@redhat.com>
2024-03-26 16:53       ` Ryan Roberts
2024-03-26 17:04         ` David Hildenbrand
2024-03-26 17:32           ` Ryan Roberts
2024-03-26 17:39             ` David Hildenbrand
2024-03-26 17:51               ` Ryan Roberts
2024-03-27  9:34                 ` David Hildenbrand
2024-03-27 10:01                   ` Ryan Roberts
2024-04-03 12:59                   ` Ryan Roberts
2024-04-08  8:36                     ` David Hildenbrand
2024-04-09 16:35                       ` Ryan Roberts
2024-04-10 20:09                         ` David Hildenbrand
2024-04-11  9:45                           ` Ryan Roberts
     [not found]                             ` <70a36403-aefd-4311-b612-84e602465689@redhat.com>
2024-04-15  9:28                               ` Ryan Roberts
     [not found]                                 ` <3e50030d-2289-4470-a727-a293baa21618@redhat.com>
2024-04-15 13:30                                   ` Ryan Roberts
     [not found]                                     ` <969dc6c3-2764-4a35-9fa6-7596832fb2a3@redhat.com>
2024-04-15 14:34                                       ` Ryan Roberts
     [not found]                                         ` <11b1c25b-3e20-4acf-9be5-57b508266c5b@redhat.com>
2024-04-15 15:17                                           ` Ryan Roberts
2024-04-15 15:22                                             ` David Hildenbrand
2024-04-15 15:53                                               ` Ryan Roberts
2024-04-15 16:02                                                 ` David Hildenbrand
2024-04-23 10:15                                                   ` Ryan Roberts
2024-04-23 10:18                                                     ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240215121756.2734131-4-ryan.roberts@arm.com \
    --to=ryan.roberts@arm.com \
    --cc=adrian.hunter@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=catalin.marinas@arm.com \
    --cc=david@redhat.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mark.rutland@arm.com \
    --cc=muchun.song@linux.dev \
    --cc=will@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox