[RFC PATCH v4 13/14] KVM: arm64: Handle guest_memfd()-backed guest page faults

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Fuad Tabba <tabba@google.com>
To: kvm@vger.kernel.org, linux-arm-msm@vger.kernel.org, linux-mm@kvack.org
Cc: pbonzini@redhat.com, chenhuacai@kernel.org, mpe@ellerman.id.au,
	 anup@brainfault.org, paul.walmsley@sifive.com,
	palmer@dabbelt.com,  aou@eecs.berkeley.edu, seanjc@google.com,
	viro@zeniv.linux.org.uk,  brauner@kernel.org,
	willy@infradead.org, akpm@linux-foundation.org,
	 xiaoyao.li@intel.com, yilun.xu@intel.com,
	chao.p.peng@linux.intel.com,  jarkko@kernel.org,
	amoorthy@google.com, dmatlack@google.com,
	 yu.c.zhang@linux.intel.com, isaku.yamahata@intel.com,
	mic@digikod.net,  vbabka@suse.cz, vannapurve@google.com,
	ackerleytng@google.com,  mail@maciej.szmigiero.name,
	david@redhat.com, michael.roth@amd.com,  wei.w.wang@intel.com,
	liam.merwick@oracle.com, isaku.yamahata@gmail.com,
	 kirill.shutemov@linux.intel.com, suzuki.poulose@arm.com,
	steven.price@arm.com,  quic_eberman@quicinc.com,
	quic_mnalajal@quicinc.com, quic_tsoni@quicinc.com,
	 quic_svaddagi@quicinc.com, quic_cvanscha@quicinc.com,
	 quic_pderrin@quicinc.com, quic_pheragu@quicinc.com,
	catalin.marinas@arm.com,  james.morse@arm.com,
	yuzenghui@huawei.com, oliver.upton@linux.dev,  maz@kernel.org,
	will@kernel.org, qperret@google.com, keirf@google.com,
	 roypat@amazon.co.uk, shuah@kernel.org, hch@infradead.org,
	jgg@nvidia.com,  rientjes@google.com, jhubbard@nvidia.com,
	fvdl@google.com, hughd@google.com,  jthoughton@google.com,
	tabba@google.com
Subject: [RFC PATCH v4 13/14] KVM: arm64: Handle guest_memfd()-backed guest page faults
Date: Fri, 13 Dec 2024 16:48:09 +0000	[thread overview]
Message-ID: <20241213164811.2006197-14-tabba@google.com> (raw)
In-Reply-To: <20241213164811.2006197-1-tabba@google.com>

Add arm64 support for resolving guest page faults on
guest_memfd() backed memslots. This support is not contingent on
pKVM, or other confidential computing support, and works in both
VHE and nVHE modes.

Without confidential computing, this support is useful for
testing and debugging. In the future, it might also be useful
should a user want to use guest_memfd() for all code, whether
it's for a protected guest or not.

For now, the fault granule is restricted to PAGE_SIZE.

Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/mmu.c | 111 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 109 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 342a9bd3848f..1c4b3871967c 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1434,6 +1434,107 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
 	return vma->vm_flags & VM_MTE_ALLOWED;
 }
 
+static int guest_memfd_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
+			     struct kvm_memory_slot *memslot, bool fault_is_perm)
+{
+	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
+	bool exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
+	bool logging_active = memslot_is_logging(memslot);
+	struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt;
+	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
+	bool write_fault = kvm_is_write_fault(vcpu);
+	struct mm_struct *mm = current->mm;
+	gfn_t gfn = gpa_to_gfn(fault_ipa);
+	struct kvm *kvm = vcpu->kvm;
+	struct page *page;
+	kvm_pfn_t pfn;
+	int ret;
+
+	/* For now, guest_memfd() only supports PAGE_SIZE granules. */
+	if (WARN_ON_ONCE(fault_is_perm &&
+			 kvm_vcpu_trap_get_perm_fault_granule(vcpu) != PAGE_SIZE)) {
+		return -EFAULT;
+	}
+
+	VM_BUG_ON(write_fault && exec_fault);
+
+	if (fault_is_perm && !write_fault && !exec_fault) {
+		kvm_err("Unexpected L2 read permission error\n");
+		return -EFAULT;
+	}
+
+	/*
+	 * Permission faults just need to update the existing leaf entry,
+	 * and so normally don't require allocations from the memcache. The
+	 * only exception to this is when dirty logging is enabled at runtime
+	 * and a write fault needs to collapse a block entry into a table.
+	 */
+	if (!fault_is_perm || (logging_active && write_fault)) {
+		ret = kvm_mmu_topup_memory_cache(memcache,
+						 kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * Holds the folio lock until mapped in the guest and its refcount is
+	 * stable, to avoid races with paths that check if the folio is mapped
+	 * by the host.
+	 */
+	ret = kvm_gmem_get_pfn_locked(kvm, memslot, gfn, &pfn, &page, NULL);
+	if (ret)
+		return ret;
+
+	if (!kvm_slot_gmem_is_guest_mappable(memslot, gfn)) {
+		ret = -EAGAIN;
+		goto unlock_page;
+	}
+
+	/*
+	 * Once it's faulted in, a guest_memfd() page will stay in memory.
+	 * Therefore, count it as locked.
+	 */
+	if (!fault_is_perm) {
+		ret = account_locked_vm(mm, 1, true);
+		if (ret)
+			goto unlock_page;
+	}
+
+	read_lock(&kvm->mmu_lock);
+	if (write_fault)
+		prot |= KVM_PGTABLE_PROT_W;
+
+	if (exec_fault)
+		prot |= KVM_PGTABLE_PROT_X;
+
+	if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
+		prot |= KVM_PGTABLE_PROT_X;
+
+	/*
+	 * Under the premise of getting a FSC_PERM fault, we just need to relax
+	 * permissions.
+	 */
+	if (fault_is_perm)
+		ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
+	else
+		ret = kvm_pgtable_stage2_map(pgt, fault_ipa, PAGE_SIZE,
+					__pfn_to_phys(pfn), prot,
+					memcache,
+					KVM_PGTABLE_WALK_HANDLE_FAULT |
+					KVM_PGTABLE_WALK_SHARED);
+
+	kvm_release_faultin_page(kvm, page, !!ret, write_fault);
+	read_unlock(&kvm->mmu_lock);
+
+	if (ret && !fault_is_perm)
+		account_locked_vm(mm, 1, false);
+unlock_page:
+	unlock_page(page);
+	put_page(page);
+
+	return ret != -EAGAIN ? ret : 0;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_s2_trans *nested,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1900,8 +2001,14 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
 		goto out_unlock;
 	}
 
-	ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
-			     esr_fsc_is_permission_fault(esr));
+	if (kvm_slot_can_be_private(memslot)) {
+		ret = guest_memfd_abort(vcpu, fault_ipa, memslot,
+					esr_fsc_is_permission_fault(esr));
+	} else {
+		ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva,
+				     esr_fsc_is_permission_fault(esr));
+	}
+
 	if (ret == 0)
 		ret = 1;
 out:
-- 
2.47.1.613.gc27f4b7a9f-goog

next prev parent reply	other threads:[~2024-12-13 16:48 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-13 16:47 [RFC PATCH v4 00/14] KVM: Restricted mapping of guest_memfd at the host and arm64 support Fuad Tabba
2024-12-13 16:47 ` [RFC PATCH v4 01/14] mm: Consolidate freeing of typed folios on final folio_put() Fuad Tabba
2024-12-13 16:47 ` [RFC PATCH v4 02/14] KVM: guest_memfd: Make guest mem use guest mem inodes instead of anonymous inodes Fuad Tabba
2024-12-13 16:47 ` [RFC PATCH v4 03/14] KVM: guest_memfd: Introduce kvm_gmem_get_pfn_locked(), which retains the folio lock Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 04/14] KVM: guest_memfd: Track mappability within a struct kvm_gmem_private Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 05/14] KVM: guest_memfd: Folio mappability states and functions that manage their transition Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 06/14] KVM: guest_memfd: Handle final folio_put() of guestmem pages Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 07/14] KVM: guest_memfd: Allow host to mmap guest_memfd() pages when shared Fuad Tabba
2024-12-27  4:21   ` Alexey Kardashevskiy
2025-01-09 10:17     ` Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 08/14] KVM: guest_memfd: Add guest_memfd support to kvm_(read|/write)_guest_page() Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 09/14] KVM: guest_memfd: Add KVM capability to check if guest_memfd is host mappable Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 10/14] KVM: guest_memfd: Add a guest_memfd() flag to initialize it as mappable Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 11/14] KVM: guest_memfd: selftests: guest_memfd mmap() test when mapping is allowed Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 12/14] KVM: arm64: Skip VMA checks for slots without userspace address Fuad Tabba
2024-12-13 16:48 ` Fuad Tabba [this message]
2025-01-16 14:48   ` [RFC PATCH v4 13/14] KVM: arm64: Handle guest_memfd()-backed guest page faults Patrick Roy
2025-01-16 15:16     ` Fuad Tabba
2024-12-13 16:48 ` [RFC PATCH v4 14/14] KVM: arm64: Enable guest_memfd private memory when pKVM is enabled Fuad Tabba
2025-01-09 16:34 ` [RFC PATCH v4 00/14] KVM: Restricted mapping of guest_memfd at the host and arm64 support Fuad Tabba
2025-01-16  0:35   ` Ackerley Tng
2025-01-16  9:19     ` Fuad Tabba
2025-01-20  9:26       ` Vlastimil Babka
2025-01-20  9:36         ` David Hildenbrand
2025-01-16 14:48 ` Patrick Roy
2025-01-16 15:02   ` Fuad Tabba

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241213164811.2006197-14-tabba@google.com \
    --to=tabba@google.com \
    --cc=ackerleytng@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=amoorthy@google.com \
    --cc=anup@brainfault.org \
    --cc=aou@eecs.berkeley.edu \
    --cc=brauner@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=chao.p.peng@linux.intel.com \
    --cc=chenhuacai@kernel.org \
    --cc=david@redhat.com \
    --cc=dmatlack@google.com \
    --cc=fvdl@google.com \
    --cc=hch@infradead.org \
    --cc=hughd@google.com \
    --cc=isaku.yamahata@gmail.com \
    --cc=isaku.yamahata@intel.com \
    --cc=james.morse@arm.com \
    --cc=jarkko@kernel.org \
    --cc=jgg@nvidia.com \
    --cc=jhubbard@nvidia.com \
    --cc=jthoughton@google.com \
    --cc=keirf@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=kvm@vger.kernel.org \
    --cc=liam.merwick@oracle.com \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mail@maciej.szmigiero.name \
    --cc=maz@kernel.org \
    --cc=mic@digikod.net \
    --cc=michael.roth@amd.com \
    --cc=mpe@ellerman.id.au \
    --cc=oliver.upton@linux.dev \
    --cc=palmer@dabbelt.com \
    --cc=paul.walmsley@sifive.com \
    --cc=pbonzini@redhat.com \
    --cc=qperret@google.com \
    --cc=quic_cvanscha@quicinc.com \
    --cc=quic_eberman@quicinc.com \
    --cc=quic_mnalajal@quicinc.com \
    --cc=quic_pderrin@quicinc.com \
    --cc=quic_pheragu@quicinc.com \
    --cc=quic_svaddagi@quicinc.com \
    --cc=quic_tsoni@quicinc.com \
    --cc=rientjes@google.com \
    --cc=roypat@amazon.co.uk \
    --cc=seanjc@google.com \
    --cc=shuah@kernel.org \
    --cc=steven.price@arm.com \
    --cc=suzuki.poulose@arm.com \
    --cc=vannapurve@google.com \
    --cc=vbabka@suse.cz \
    --cc=viro@zeniv.linux.org.uk \
    --cc=wei.w.wang@intel.com \
    --cc=will@kernel.org \
    --cc=willy@infradead.org \
    --cc=xiaoyao.li@intel.com \
    --cc=yilun.xu@intel.com \
    --cc=yu.c.zhang@linux.intel.com \
    --cc=yuzenghui@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox