From: Fuad Tabba <tabba@google.com>
To: kvm@vger.kernel.org, linux-arm-msm@vger.kernel.org,
linux-mm@kvack.org, kvmarm@lists.linux.dev
Cc: pbonzini@redhat.com, chenhuacai@kernel.org, mpe@ellerman.id.au,
anup@brainfault.org, paul.walmsley@sifive.com,
palmer@dabbelt.com, aou@eecs.berkeley.edu, seanjc@google.com,
viro@zeniv.linux.org.uk, brauner@kernel.org,
willy@infradead.org, akpm@linux-foundation.org,
xiaoyao.li@intel.com, yilun.xu@intel.com,
chao.p.peng@linux.intel.com, jarkko@kernel.org,
amoorthy@google.com, dmatlack@google.com,
isaku.yamahata@intel.com, mic@digikod.net, vbabka@suse.cz,
vannapurve@google.com, ackerleytng@google.com,
mail@maciej.szmigiero.name, david@redhat.com,
michael.roth@amd.com, wei.w.wang@intel.com,
liam.merwick@oracle.com, isaku.yamahata@gmail.com,
kirill.shutemov@linux.intel.com, suzuki.poulose@arm.com,
steven.price@arm.com, quic_eberman@quicinc.com,
quic_mnalajal@quicinc.com, quic_tsoni@quicinc.com,
quic_svaddagi@quicinc.com, quic_cvanscha@quicinc.com,
quic_pderrin@quicinc.com, quic_pheragu@quicinc.com,
catalin.marinas@arm.com, james.morse@arm.com,
yuzenghui@huawei.com, oliver.upton@linux.dev, maz@kernel.org,
will@kernel.org, qperret@google.com, keirf@google.com,
roypat@amazon.co.uk, shuah@kernel.org, hch@infradead.org,
jgg@nvidia.com, rientjes@google.com, jhubbard@nvidia.com,
fvdl@google.com, hughd@google.com, jthoughton@google.com,
peterx@redhat.com, pankaj.gupta@amd.com, ira.weiny@intel.com,
tabba@google.com
Subject: [PATCH v15 12/21] KVM: x86/mmu: Consult guest_memfd when computing max_mapping_level
Date: Thu, 17 Jul 2025 17:27:22 +0100 [thread overview]
Message-ID: <20250717162731.446579-13-tabba@google.com> (raw)
In-Reply-To: <20250717162731.446579-1-tabba@google.com>
From: Ackerley Tng <ackerleytng@google.com>
Modify kvm_mmu_max_mapping_level() to consult guest_memfd for memory
regions backed by it when computing the maximum mapping level,
especially during huge page recovery.
Previously, kvm_mmu_max_mapping_level() was designed primarily for
host-backed memory and private pages. With guest_memfd now supporting
non-private memory, it's necessary to factor in guest_memfd's influence
on mapping levels for such memory.
Since guest_memfd can now be used for non-private memory, make
kvm_max_max_mapping_level, when recovering huge pages, take input from
guest_memfd.
Input is taken from guest_memfd as long as a fault to that slot and gfn
would have been served from guest_memfd. For now, take a shortcut if the
slot and gfn points to memory that is private, since recovering huge
pages aren't supported for private memory yet.
Since guest_memfd memory can also be faulted into host page tables,
__kvm_mmu_max_mapping_level() still applies since consulting lpage_info
and host page tables are required.
Move functions kvm_max_level_for_order() and
kvm_gmem_max_mapping_level() so kvm_mmu_max_mapping_level() can use
those functions.
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Co-developed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/x86/kvm/mmu/mmu.c | 90 ++++++++++++++++++++++++----------------
include/linux/kvm_host.h | 7 ++++
virt/kvm/guest_memfd.c | 17 ++++++++
3 files changed, 79 insertions(+), 35 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6bd28fda0fd3..94be15cde6da 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3282,13 +3282,67 @@ static int __kvm_mmu_max_mapping_level(struct kvm *kvm,
return min(host_level, max_level);
}
+static u8 kvm_max_level_for_order(int order)
+{
+ BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
+
+ KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) &&
+ order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) &&
+ order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K));
+
+ if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
+ return PG_LEVEL_1G;
+
+ if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
+ return PG_LEVEL_2M;
+
+ return PG_LEVEL_4K;
+}
+
+static u8 kvm_gmem_max_mapping_level(struct kvm *kvm, int order,
+ struct kvm_page_fault *fault)
+{
+ u8 req_max_level;
+ u8 max_level;
+
+ max_level = kvm_max_level_for_order(order);
+ if (max_level == PG_LEVEL_4K)
+ return PG_LEVEL_4K;
+
+ req_max_level = kvm_x86_call(max_mapping_level)(kvm, fault);
+ if (req_max_level)
+ max_level = min(max_level, req_max_level);
+
+ return max_level;
+}
+
int kvm_mmu_max_mapping_level(struct kvm *kvm,
const struct kvm_memory_slot *slot, gfn_t gfn)
{
bool is_private = kvm_slot_has_gmem(slot) &&
kvm_mem_is_private(kvm, gfn);
+ int max_level = PG_LEVEL_NUM;
+
+ /*
+ * For now, kvm_mmu_max_mapping_level() is only called from
+ * kvm_mmu_recover_huge_pages(), and that's not yet supported for
+ * private memory, hence we can take a shortcut and return early.
+ */
+ if (is_private)
+ return PG_LEVEL_4K;
- return __kvm_mmu_max_mapping_level(kvm, slot, gfn, PG_LEVEL_NUM, is_private);
+ /*
+ * For non-private pages that would have been faulted from guest_memfd,
+ * let guest_memfd influence max_mapping_level.
+ */
+ if (kvm_memslot_is_gmem_only(slot)) {
+ int order = kvm_gmem_mapping_order(slot, gfn);
+
+ max_level = min(max_level,
+ kvm_gmem_max_mapping_level(kvm, order, NULL));
+ }
+
+ return __kvm_mmu_max_mapping_level(kvm, slot, gfn, max_level, is_private);
}
void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
@@ -4450,40 +4504,6 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
vcpu->stat.pf_fixed++;
}
-static inline u8 kvm_max_level_for_order(int order)
-{
- BUILD_BUG_ON(KVM_MAX_HUGEPAGE_LEVEL > PG_LEVEL_1G);
-
- KVM_MMU_WARN_ON(order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G) &&
- order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M) &&
- order != KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K));
-
- if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_1G))
- return PG_LEVEL_1G;
-
- if (order >= KVM_HPAGE_GFN_SHIFT(PG_LEVEL_2M))
- return PG_LEVEL_2M;
-
- return PG_LEVEL_4K;
-}
-
-static u8 kvm_gmem_max_mapping_level(struct kvm *kvm, int order,
- struct kvm_page_fault *fault)
-{
- u8 req_max_level;
- u8 max_level;
-
- max_level = kvm_max_level_for_order(order);
- if (max_level == PG_LEVEL_4K)
- return PG_LEVEL_4K;
-
- req_max_level = kvm_x86_call(max_mapping_level)(kvm, fault);
- if (req_max_level)
- max_level = min(max_level, req_max_level);
-
- return max_level;
-}
-
static void kvm_mmu_finish_page_fault(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault, int r)
{
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d2218ec57ceb..662271314778 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2574,6 +2574,7 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
gfn_t gfn, kvm_pfn_t *pfn, struct page **page,
int *max_order);
+int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn);
#else
static inline int kvm_gmem_get_pfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn,
@@ -2583,6 +2584,12 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm,
KVM_BUG_ON(1, kvm);
return -EIO;
}
+static inline int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot,
+ gfn_t gfn)
+{
+ WARN_ONCE(1, "Unexpected call since gmem is disabled.");
+ return 0;
+}
#endif /* CONFIG_KVM_GMEM */
#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 2b00f8796a15..d01bd7a2c2bd 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -713,6 +713,23 @@ int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
}
EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn);
+/**
+ * kvm_gmem_mapping_order() - Get the mapping order for this @gfn in @slot.
+ *
+ * @slot: the memslot that gfn belongs to.
+ * @gfn: the gfn to look up mapping order for.
+ *
+ * This is equal to max_order that would be returned if kvm_gmem_get_pfn() were
+ * called now.
+ *
+ * Return: the mapping order for this @gfn in @slot.
+ */
+int kvm_gmem_mapping_order(const struct kvm_memory_slot *slot, gfn_t gfn)
+{
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_gmem_mapping_order);
+
#ifdef CONFIG_KVM_GENERIC_GMEM_POPULATE
long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages,
kvm_gmem_populate_cb post_populate, void *opaque)
--
2.50.0.727.gbf7dc18ff4-goog
next prev parent reply other threads:[~2025-07-17 16:28 UTC|newest]
Thread overview: 86+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-17 16:27 [PATCH v15 00/21] KVM: Enable host userspace mapping for guest_memfd-backed memory for non-CoCo VMs Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 01/21] KVM: Rename CONFIG_KVM_PRIVATE_MEM to CONFIG_KVM_GMEM Fuad Tabba
2025-07-21 15:17 ` Sean Christopherson
2025-07-21 15:26 ` Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 02/21] KVM: Rename CONFIG_KVM_GENERIC_PRIVATE_MEM to CONFIG_KVM_GENERIC_GMEM_POPULATE Fuad Tabba
2025-07-21 16:44 ` Sean Christopherson
2025-07-21 16:51 ` Fuad Tabba
2025-07-21 17:33 ` Sean Christopherson
2025-07-22 9:29 ` Fuad Tabba
2025-07-22 15:58 ` Sean Christopherson
2025-07-22 16:01 ` Fuad Tabba
2025-07-22 23:42 ` Sean Christopherson
2025-07-23 9:22 ` Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 03/21] KVM: Introduce kvm_arch_supports_gmem() Fuad Tabba
2025-07-18 1:42 ` Xiaoyao Li
2025-07-21 14:47 ` Sean Christopherson
2025-07-21 14:55 ` Fuad Tabba
2025-07-21 16:44 ` Sean Christopherson
2025-07-17 16:27 ` [PATCH v15 04/21] KVM: x86: Introduce kvm->arch.supports_gmem Fuad Tabba
2025-07-21 16:45 ` Sean Christopherson
2025-07-21 17:00 ` Fuad Tabba
2025-07-21 19:09 ` Sean Christopherson
2025-07-17 16:27 ` [PATCH v15 05/21] KVM: Rename kvm_slot_can_be_private() to kvm_slot_has_gmem() Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 06/21] KVM: Fix comments that refer to slots_lock Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 07/21] KVM: Fix comment that refers to kvm uapi header path Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 08/21] KVM: guest_memfd: Allow host to map guest_memfd pages Fuad Tabba
2025-07-18 2:56 ` Xiaoyao Li
2025-07-17 16:27 ` [PATCH v15 09/21] KVM: guest_memfd: Track guest_memfd mmap support in memslot Fuad Tabba
2025-07-18 3:33 ` Xiaoyao Li
2025-07-17 16:27 ` [PATCH v15 10/21] KVM: x86/mmu: Generalize private_max_mapping_level x86 op to max_mapping_level Fuad Tabba
2025-07-18 6:19 ` Xiaoyao Li
2025-07-21 19:46 ` Sean Christopherson
2025-07-17 16:27 ` [PATCH v15 11/21] KVM: x86/mmu: Allow NULL-able fault in kvm_max_private_mapping_level Fuad Tabba
2025-07-18 5:10 ` Xiaoyao Li
2025-07-21 23:17 ` Sean Christopherson
2025-07-22 5:35 ` Xiaoyao Li
2025-07-22 11:08 ` Fuad Tabba
2025-07-22 14:32 ` Sean Christopherson
2025-07-22 15:30 ` Fuad Tabba
2025-07-22 10:35 ` Fuad Tabba
2025-07-17 16:27 ` Fuad Tabba [this message]
2025-07-18 5:32 ` [PATCH v15 12/21] KVM: x86/mmu: Consult guest_memfd when computing max_mapping_level Xiaoyao Li
2025-07-18 5:57 ` Xiaoyao Li
2025-07-17 16:27 ` [PATCH v15 13/21] KVM: x86/mmu: Handle guest page faults for guest_memfd with shared memory Fuad Tabba
2025-07-18 6:09 ` Xiaoyao Li
2025-07-21 16:47 ` Sean Christopherson
2025-07-21 16:56 ` Fuad Tabba
2025-07-22 5:41 ` Xiaoyao Li
2025-07-22 8:43 ` Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 14/21] KVM: x86: Enable guest_memfd mmap for default VM type Fuad Tabba
2025-07-18 6:10 ` Xiaoyao Li
2025-07-21 12:22 ` Xiaoyao Li
2025-07-21 12:41 ` Fuad Tabba
2025-07-21 13:45 ` Vishal Annapurve
2025-07-21 14:42 ` Xiaoyao Li
2025-07-21 14:42 ` Sean Christopherson
2025-07-21 15:07 ` Xiaoyao Li
2025-07-21 17:29 ` Sean Christopherson
2025-07-21 20:33 ` Vishal Annapurve
2025-07-21 22:21 ` Sean Christopherson
2025-07-21 23:50 ` Vishal Annapurve
2025-07-22 14:35 ` Sean Christopherson
2025-07-23 14:08 ` Vishal Annapurve
2025-07-23 14:43 ` Sean Christopherson
2025-07-23 14:46 ` David Hildenbrand
2025-07-22 14:28 ` Xiaoyao Li
2025-07-22 14:37 ` Sean Christopherson
2025-07-22 15:31 ` Xiaoyao Li
2025-07-22 15:50 ` David Hildenbrand
2025-07-22 15:54 ` Sean Christopherson
2025-07-17 16:27 ` [PATCH v15 15/21] KVM: arm64: Refactor user_mem_abort() Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 16/21] KVM: arm64: Handle guest_memfd-backed guest page faults Fuad Tabba
2025-07-22 12:31 ` Kunwu Chan
2025-07-23 8:20 ` Marc Zyngier
2025-07-23 11:44 ` Kunwu Chan
2025-07-23 8:26 ` Marc Zyngier
2025-07-17 16:27 ` [PATCH v15 17/21] KVM: arm64: nv: Handle VNCR_EL2-triggered faults backed by guest_memfd Fuad Tabba
2025-07-23 8:29 ` Marc Zyngier
2025-07-17 16:27 ` [PATCH v15 18/21] KVM: arm64: Enable host mapping of shared guest_memfd memory Fuad Tabba
2025-07-23 8:33 ` Marc Zyngier
2025-07-23 9:18 ` Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 19/21] KVM: Introduce the KVM capability KVM_CAP_GMEM_MMAP Fuad Tabba
2025-07-18 6:14 ` Xiaoyao Li
2025-07-21 17:31 ` Sean Christopherson
2025-07-17 16:27 ` [PATCH v15 20/21] KVM: selftests: Do not use hardcoded page sizes in guest_memfd test Fuad Tabba
2025-07-17 16:27 ` [PATCH v15 21/21] KVM: selftests: guest_memfd mmap() test when mmap is supported Fuad Tabba
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250717162731.446579-13-tabba@google.com \
--to=tabba@google.com \
--cc=ackerleytng@google.com \
--cc=akpm@linux-foundation.org \
--cc=amoorthy@google.com \
--cc=anup@brainfault.org \
--cc=aou@eecs.berkeley.edu \
--cc=brauner@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=chao.p.peng@linux.intel.com \
--cc=chenhuacai@kernel.org \
--cc=david@redhat.com \
--cc=dmatlack@google.com \
--cc=fvdl@google.com \
--cc=hch@infradead.org \
--cc=hughd@google.com \
--cc=ira.weiny@intel.com \
--cc=isaku.yamahata@gmail.com \
--cc=isaku.yamahata@intel.com \
--cc=james.morse@arm.com \
--cc=jarkko@kernel.org \
--cc=jgg@nvidia.com \
--cc=jhubbard@nvidia.com \
--cc=jthoughton@google.com \
--cc=keirf@google.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.linux.dev \
--cc=liam.merwick@oracle.com \
--cc=linux-arm-msm@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mail@maciej.szmigiero.name \
--cc=maz@kernel.org \
--cc=mic@digikod.net \
--cc=michael.roth@amd.com \
--cc=mpe@ellerman.id.au \
--cc=oliver.upton@linux.dev \
--cc=palmer@dabbelt.com \
--cc=pankaj.gupta@amd.com \
--cc=paul.walmsley@sifive.com \
--cc=pbonzini@redhat.com \
--cc=peterx@redhat.com \
--cc=qperret@google.com \
--cc=quic_cvanscha@quicinc.com \
--cc=quic_eberman@quicinc.com \
--cc=quic_mnalajal@quicinc.com \
--cc=quic_pderrin@quicinc.com \
--cc=quic_pheragu@quicinc.com \
--cc=quic_svaddagi@quicinc.com \
--cc=quic_tsoni@quicinc.com \
--cc=rientjes@google.com \
--cc=roypat@amazon.co.uk \
--cc=seanjc@google.com \
--cc=shuah@kernel.org \
--cc=steven.price@arm.com \
--cc=suzuki.poulose@arm.com \
--cc=vannapurve@google.com \
--cc=vbabka@suse.cz \
--cc=viro@zeniv.linux.org.uk \
--cc=wei.w.wang@intel.com \
--cc=will@kernel.org \
--cc=willy@infradead.org \
--cc=xiaoyao.li@intel.com \
--cc=yilun.xu@intel.com \
--cc=yuzenghui@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox