linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mike Rapoport <rppt@kernel.org>
To: linux-mm@kvack.org
Cc: Andrea Arcangeli <aarcange@redhat.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Axel Rasmussen <axelrasmussen@google.com>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	David Hildenbrand <david@redhat.com>,
	Hugh Dickins <hughd@google.com>,
	James Houghton <jthoughton@google.com>,
	"Liam R. Howlett" <Liam.Howlett@oracle.com>,
	Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	Michal Hocko <mhocko@suse.com>, Mike Rapoport <rppt@kernel.org>,
	Muchun Song <muchun.song@linux.dev>,
	Nikita Kalyazin <kalyazin@amazon.com>,
	Oscar Salvador <osalvador@suse.de>,
	Paolo Bonzini <pbonzini@redhat.com>, Peter Xu <peterx@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Shuah Khan <shuah@kernel.org>,
	Suren Baghdasaryan <surenb@google.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	linux-kselftest@vger.kernel.org
Subject: [PATCH RFC 14/17] KVM: guest_memfd: implement userfaultfd minor mode
Date: Tue, 27 Jan 2026 21:29:33 +0200	[thread overview]
Message-ID: <20260127192936.1250096-15-rppt@kernel.org> (raw)
In-Reply-To: <20260127192936.1250096-1-rppt@kernel.org>

From: Nikita Kalyazin <kalyazin@amazon.com>

userfaultfd notifications about minor page faults used for live migration
and snapshotting of VMs with memory backed by shared hugetlbfs or tmpfs
mappings as described in detail in commit 7677f7fd8be7 ("userfaultfd: add
minor fault registration mode").

To use the same mechanism for VMs that use guest_memfd to map their memory,
guest_memfd should support userfaultfd minor mode.

Extend ->fault() method of guest_memfd with ability to notify core page
fault handler that a page fault requires handle_userfault(VM_UFFD_MINOR)
to complete and add vm_uffd_ops to guest_memfd vm_ops with
implementation of ->can_userfault() and ->get_folio_noalloc() methods.

Signed-off-by: Nikita Kalyazin <kalyazin@amazon.com>
Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 virt/kvm/guest_memfd.c | 76 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 65 insertions(+), 11 deletions(-)

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index fdaea3422c30..087e7632bf70 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -7,6 +7,7 @@
 #include <linux/mempolicy.h>
 #include <linux/pseudo_fs.h>
 #include <linux/pagemap.h>
+#include <linux/userfaultfd_k.h>
 
 #include "kvm_mm.h"
 
@@ -121,6 +122,26 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
 	return r;
 }
 
+static struct folio *kvm_gmem_get_folio_noalloc(struct inode *inode, pgoff_t pgoff)
+{
+	return __filemap_get_folio(inode->i_mapping, pgoff,
+				   FGP_LOCK | FGP_ACCESSED, 0);
+}
+
+static struct folio *__kvm_gmem_folio_alloc(struct inode *inode, pgoff_t index)
+{
+	struct mempolicy *policy;
+	struct folio *folio;
+
+	policy = mpol_shared_policy_lookup(&GMEM_I(inode)->policy, index);
+	folio = __filemap_get_folio_mpol(inode->i_mapping, index,
+					 FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+					 mapping_gfp_mask(inode->i_mapping), policy);
+	mpol_cond_put(policy);
+
+	return folio;
+}
+
 /*
  * Returns a locked folio on success.  The caller is responsible for
  * setting the up-to-date flag before the memory is mapped into the guest.
@@ -133,25 +154,17 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
 static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
 {
 	/* TODO: Support huge pages. */
-	struct mempolicy *policy;
 	struct folio *folio;
 
 	/*
 	 * Fast-path: See if folio is already present in mapping to avoid
 	 * policy_lookup.
 	 */
-	folio = __filemap_get_folio(inode->i_mapping, index,
-				    FGP_LOCK | FGP_ACCESSED, 0);
+	folio = kvm_gmem_get_folio_noalloc(inode, index);
 	if (!IS_ERR(folio))
 		return folio;
 
-	policy = mpol_shared_policy_lookup(&GMEM_I(inode)->policy, index);
-	folio = __filemap_get_folio_mpol(inode->i_mapping, index,
-					 FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
-					 mapping_gfp_mask(inode->i_mapping), policy);
-	mpol_cond_put(policy);
-
-	return folio;
+	return __kvm_gmem_folio_alloc(inode, index);
 }
 
 static enum kvm_gfn_range_filter kvm_gmem_get_invalidate_filter(struct inode *inode)
@@ -405,7 +418,24 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
 	if (!(GMEM_I(inode)->flags & GUEST_MEMFD_FLAG_INIT_SHARED))
 		return VM_FAULT_SIGBUS;
 
-	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
+	folio = __filemap_get_folio(inode->i_mapping, vmf->pgoff,
+				    FGP_LOCK | FGP_ACCESSED, 0);
+
+	if (userfaultfd_armed(vmf->vma)) {
+		/*
+		 * If userfaultfd is registered in minor mode and a folio
+		 * exists, return VM_FAULT_UFFD_MINOR to trigger the
+		 * userfaultfd handler.
+		 */
+		if (userfaultfd_minor(vmf->vma) && !IS_ERR_OR_NULL(folio)) {
+			ret = VM_FAULT_UFFD_MINOR;
+			goto out_folio;
+		}
+	}
+
+	/* folio not in the pagecache, try to allocate */
+	if (IS_ERR(folio))
+		folio = __kvm_gmem_folio_alloc(inode, vmf->pgoff);
 	if (IS_ERR(folio)) {
 		if (PTR_ERR(folio) == -EAGAIN)
 			return VM_FAULT_RETRY;
@@ -462,12 +492,36 @@ static struct mempolicy *kvm_gmem_get_policy(struct vm_area_struct *vma,
 }
 #endif /* CONFIG_NUMA */
 
+#ifdef CONFIG_USERFAULTFD
+static bool kvm_gmem_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags)
+{
+	struct inode *inode = file_inode(vma->vm_file);
+
+	/*
+	 * Only support userfaultfd for guest_memfd with INIT_SHARED flag.
+	 * This ensures the memory can be mapped to userspace.
+	 */
+	if (!(GMEM_I(inode)->flags & GUEST_MEMFD_FLAG_INIT_SHARED))
+		return false;
+
+	return true;
+}
+
+static const struct vm_uffd_ops kvm_gmem_uffd_ops = {
+	.can_userfault = kvm_gmem_can_userfault,
+	.get_folio_noalloc = kvm_gmem_get_folio_noalloc,
+};
+#endif /* CONFIG_USERFAULTFD */
+
 static const struct vm_operations_struct kvm_gmem_vm_ops = {
 	.fault		= kvm_gmem_fault_user_mapping,
 #ifdef CONFIG_NUMA
 	.get_policy	= kvm_gmem_get_policy,
 	.set_policy	= kvm_gmem_set_policy,
 #endif
+#ifdef CONFIG_USERFAULTFD
+	.uffd_ops	= &kvm_gmem_uffd_ops,
+#endif
 };
 
 static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
-- 
2.51.0



  parent reply	other threads:[~2026-01-27 19:31 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-01-27 19:29 [PATCH RFC 00/17] mm, kvm: allow uffd suppot in guest_memfd Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 01/17] userfaultfd: introduce mfill_copy_folio_locked() helper Mike Rapoport
2026-02-03 17:45   ` Peter Xu
2026-02-08  9:49     ` Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 02/17] userfaultfd: introduce struct mfill_state Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 03/17] userfaultfd: introduce mfill_get_pmd() helper Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 04/17] userfaultfd: introduce mfill_get_vma() and mfill_put_vma() Mike Rapoport
2026-02-02 21:49   ` Peter Xu
2026-02-08  9:54     ` Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 05/17] userfaultfd: retry copying with locks dropped in mfill_atomic_pte_copy() Mike Rapoport
2026-02-02 21:23   ` Peter Xu
2026-02-08 10:01     ` Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 06/17] userfaultfd: move vma_can_userfault out of line Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 07/17] userfaultfd: introduce vm_uffd_ops Mike Rapoport
2026-02-02 21:36   ` Peter Xu
2026-02-08 10:13     ` Mike Rapoport
2026-02-11 19:35       ` Peter Xu
2026-02-15 17:47         ` Mike Rapoport
2026-02-18 21:34           ` Peter Xu
2026-01-27 19:29 ` [PATCH RFC 08/17] userfaultfd, shmem: use a VMA callback to handle UFFDIO_CONTINUE Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 09/17] userfaultfd: introduce vm_uffd_ops->alloc_folio() Mike Rapoport
2026-02-02 22:13   ` Peter Xu
2026-02-08 10:22     ` Mike Rapoport
2026-02-11 19:37       ` Peter Xu
2026-01-27 19:29 ` [PATCH RFC 10/17] shmem, userfaultfd: implement shmem uffd operations using vm_uffd_ops Mike Rapoport
2026-02-03 17:40   ` Peter Xu
2026-02-08 10:35     ` Mike Rapoport
2026-02-11 20:00       ` Peter Xu
2026-02-15 17:45         ` Mike Rapoport
2026-02-18 21:45           ` Peter Xu
2026-01-27 19:29 ` [PATCH RFC 11/17] userfaultfd: mfill_atomic() remove retry logic Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 12/17] mm: introduce VM_FAULT_UFFD_MINOR fault reason Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 13/17] mm: introduce VM_FAULT_UFFD_MISSING " Mike Rapoport
2026-01-27 19:29 ` Mike Rapoport [this message]
2026-01-27 19:29 ` [PATCH RFC 15/17] KVM: guest_memfd: implement userfaultfd missing mode Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 16/17] KVM: selftests: test userfaultfd minor for guest_memfd Mike Rapoport
2026-01-27 19:29 ` [PATCH RFC 17/17] KVM: selftests: test userfaultfd missing " Mike Rapoport
2026-02-03 20:56 ` [PATCH RFC 00/17] mm, kvm: allow uffd suppot in guest_memfd Peter Xu
2026-02-09 15:35   ` David Hildenbrand (Arm)
2026-02-11  6:04     ` Mike Rapoport
2026-02-11  9:52       ` David Hildenbrand (Arm)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260127192936.1250096-15-rppt@kernel.org \
    --to=rppt@kernel.org \
    --cc=Liam.Howlett@oracle.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=axelrasmussen@google.com \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=jthoughton@google.com \
    --cc=kalyazin@amazon.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=osalvador@suse.de \
    --cc=pbonzini@redhat.com \
    --cc=peterx@redhat.com \
    --cc=seanjc@google.com \
    --cc=shuah@kernel.org \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox