linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Kiryl Shutsemau (Meta)" <kas@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Xu <peterx@redhat.com>,
	David Hildenbrand <david@kernel.org>,
	Lorenzo Stoakes <ljs@kernel.org>, Mike Rapoport <rppt@kernel.org>,
	Suren Baghdasaryan <surenb@google.com>,
	Vlastimil Babka <vbabka@kernel.org>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Zi Yan <ziy@nvidia.com>, Jonathan Corbet <corbet@lwn.net>,
	Shuah Khan <skhan@linuxfoundation.org>,
	Sean Christopherson <seanjc@google.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org,
	kvm@vger.kernel.org, "Kiryl Shutsemau (Meta)" <kas@kernel.org>
Subject: [RFC, PATCH 05/12] mm: intercept protnone faults on VM_UFFD_MINOR anonymous VMAs
Date: Tue, 14 Apr 2026 15:23:39 +0100	[thread overview]
Message-ID: <20260414142354.1465950-6-kas@kernel.org> (raw)
In-Reply-To: <20260414142354.1465950-1-kas@kernel.org>

When a protnone PTE/PMD fault occurs on a VMA with VM_UFFD_MINOR,
dispatch to the userfaultfd minor fault path instead of NUMA balancing.
Async: restore permissions inline. Sync: deliver via handle_userfault().

Feed NUMA locality stats from the fault path via task_numa_fault()
so the scheduler retains placement data even though NUMA scanning
is skipped on these VMAs.

Signed-off-by: Kiryl Shutsemau (Meta) <kas@kernel.org>
Assisted-by: Claude:claude-opus-4-6
---
 include/linux/huge_mm.h |  6 +++++
 mm/huge_memory.c        | 24 +++++++++++++++++++
 mm/memory.c             | 51 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 79 insertions(+), 2 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index a4d9f964dfde..a900bb530998 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -519,6 +519,7 @@ static inline bool folio_test_pmd_mappable(struct folio *folio)
 }
 
 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf);
+vm_fault_t do_huge_pmd_uffd_minor(struct vm_fault *vmf);
 
 vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf);
 
@@ -707,6 +708,11 @@ static inline vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	return 0;
 }
 
+static inline vm_fault_t do_huge_pmd_uffd_minor(struct vm_fault *vmf)
+{
+	return 0;
+}
+
 static inline vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf)
 {
 	return 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 2ad736ff007c..264c646a8573 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2181,6 +2181,30 @@ static inline bool can_change_pmd_writable(struct vm_area_struct *vma,
 	return pmd_dirty(pmd);
 }
 
+vm_fault_t do_huge_pmd_uffd_minor(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+
+	if (userfaultfd_minor_async(vma)) {
+		pmd_t pmd;
+
+		vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
+		if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) {
+			spin_unlock(vmf->ptl);
+			return 0;
+		}
+		pmd = pmd_modify(vmf->orig_pmd, vma->vm_page_prot);
+		pmd = pmd_mkyoung(pmd);
+		set_pmd_at(vma->vm_mm, vmf->address & HPAGE_PMD_MASK,
+			   vmf->pmd, pmd);
+		update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
+		spin_unlock(vmf->ptl);
+		return 0;
+	}
+
+	return handle_userfault(vmf, VM_UFFD_MINOR);
+}
+
 /* NUMA hinting page fault entry point for trans huge pmds */
 vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 {
diff --git a/mm/memory.c b/mm/memory.c
index c65e82c86fed..f068ff4027e8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -6045,6 +6045,47 @@ static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_stru
 	}
 }
 
+static void uffd_minor_feed_numa_fault(struct vm_fault *vmf)
+{
+	struct folio *folio;
+
+	folio = vm_normal_folio(vmf->vma, vmf->address, vmf->orig_pte);
+	if (folio) {
+		int nid = folio_nid(folio);
+		int flags = 0;
+
+		if (nid == numa_node_id())
+			flags |= TNF_FAULT_LOCAL;
+		task_numa_fault(folio_last_cpupid(folio), nid, 1, flags);
+	}
+}
+
+static vm_fault_t do_uffd_minor_anon(struct vm_fault *vmf)
+{
+	/* Feed NUMA stats even though we skip NUMA scanning on this VMA */
+	uffd_minor_feed_numa_fault(vmf);
+
+	if (userfaultfd_minor_async(vmf->vma)) {
+		pte_t pte;
+
+		spin_lock(vmf->ptl);
+		if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) {
+			pte_unmap_unlock(vmf->pte, vmf->ptl);
+			return 0;
+		}
+		pte = pte_modify(vmf->orig_pte, vmf->vma->vm_page_prot);
+		pte = pte_mkyoung(pte);
+		set_pte_at(vmf->vma->vm_mm, vmf->address, vmf->pte, pte);
+		update_mmu_cache(vmf->vma, vmf->address, vmf->pte);
+		pte_unmap_unlock(vmf->pte, vmf->ptl);
+		return 0;
+	}
+
+	/* Sync mode: unmap PTE and deliver to userfaultfd handler */
+	pte_unmap(vmf->pte);
+	return handle_userfault(vmf, VM_UFFD_MINOR);
+}
+
 static vm_fault_t do_numa_page(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -6319,8 +6360,11 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
 	if (!pte_present(vmf->orig_pte))
 		return do_swap_page(vmf);
 
-	if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma))
+	if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) {
+		if (userfaultfd_minor(vmf->vma))
+			return do_uffd_minor_anon(vmf);
 		return do_numa_page(vmf);
+	}
 
 	spin_lock(vmf->ptl);
 	entry = vmf->orig_pte;
@@ -6434,8 +6478,11 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		return 0;
 	}
 	if (pmd_trans_huge(vmf.orig_pmd)) {
-		if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma))
+		if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) {
+			if (userfaultfd_minor(vma))
+				return do_huge_pmd_uffd_minor(&vmf);
 			return do_huge_pmd_numa_page(&vmf);
+		}
 
 		if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) &&
 		    !pmd_write(vmf.orig_pmd)) {
-- 
2.51.2



  parent reply	other threads:[~2026-04-14 14:24 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-14 14:23 [RFC, PATCH 00/12] userfaultfd: working set tracking for VM guest memory Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 01/12] userfaultfd: define UAPI constants for anonymous minor faults Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 02/12] userfaultfd: add UFFD_FEATURE_MINOR_ANON registration support Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 03/12] userfaultfd: implement UFFDIO_DEACTIVATE ioctl Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 04/12] userfaultfd: UFFDIO_CONTINUE for anonymous memory Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` Kiryl Shutsemau (Meta) [this message]
2026-04-14 14:23 ` [RFC, PATCH 06/12] userfaultfd: auto-resolve shmem and hugetlbfs minor faults in async mode Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 07/12] sched/numa: skip scanning anonymous VM_UFFD_MINOR VMAs Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 08/12] userfaultfd: enable UFFD_FEATURE_MINOR_ANON Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 09/12] mm/pagemap: add PAGE_IS_UFFD_DEACTIVATED to PAGEMAP_SCAN Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 10/12] userfaultfd: add UFFDIO_SET_MODE for runtime sync/async toggle Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 11/12] selftests/mm: add userfaultfd anonymous minor fault tests Kiryl Shutsemau (Meta)
2026-04-14 14:23 ` [RFC, PATCH 12/12] Documentation/userfaultfd: document working set tracking Kiryl Shutsemau (Meta)
2026-04-14 15:28 ` [RFC, PATCH 00/12] userfaultfd: working set tracking for VM guest memory Peter Xu
2026-04-14 17:08   ` Kiryl Shutsemau
2026-04-14 17:45     ` Peter Xu
2026-04-14 15:37 ` David Hildenbrand (Arm)
2026-04-14 17:10   ` Kiryl Shutsemau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260414142354.1465950-6-kas@kernel.org \
    --to=kas@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=corbet@lwn.net \
    --cc=david@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=rppt@kernel.org \
    --cc=seanjc@google.com \
    --cc=skhan@linuxfoundation.org \
    --cc=surenb@google.com \
    --cc=vbabka@kernel.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox