* [PATCH v5 1/9] mm: Consolidate freeing of typed folios on final folio_put()
2025-03-03 17:10 [PATCH v5 0/9] KVM: Mapping guest_memfd backed memory at the host for software protected VMs Fuad Tabba
@ 2025-03-03 17:10 ` Fuad Tabba
2025-03-04 8:45 ` Kirill A. Shutemov
2025-03-03 17:10 ` [PATCH v5 2/9] KVM: guest_memfd: Handle final folio_put() of guest_memfd pages Fuad Tabba
` (7 subsequent siblings)
8 siblings, 1 reply; 31+ messages in thread
From: Fuad Tabba @ 2025-03-03 17:10 UTC (permalink / raw)
To: kvm, linux-arm-msm, linux-mm
Cc: pbonzini, chenhuacai, mpe, anup, paul.walmsley, palmer, aou,
seanjc, viro, brauner, willy, akpm, xiaoyao.li, yilun.xu,
chao.p.peng, jarkko, amoorthy, dmatlack, isaku.yamahata, mic,
vbabka, vannapurve, ackerleytng, mail, david, michael.roth,
wei.w.wang, liam.merwick, isaku.yamahata, kirill.shutemov,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx, tabba
Some folio types, such as hugetlb, handle freeing their own
folios. Moreover, guest_memfd will require being notified once a
folio's reference count reaches 0 to facilitate shared to private
folio conversion, without the folio actually being freed at that
point.
As a first step towards that, this patch consolidates freeing
folios that have a type. The first user is hugetlb folios. Later
in this patch series, guest_memfd will become the second user of
this.
Suggested-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
include/linux/page-flags.h | 15 +++++++++++++++
mm/swap.c | 23 ++++++++++++++++++-----
2 files changed, 33 insertions(+), 5 deletions(-)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 36d283552f80..6dc2494bd002 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -953,6 +953,21 @@ static inline bool page_has_type(const struct page *page)
return page_mapcount_is_type(data_race(page->page_type));
}
+static inline int page_get_type(const struct page *page)
+{
+ return page->page_type >> 24;
+}
+
+static inline bool folio_has_type(const struct folio *folio)
+{
+ return page_has_type(&folio->page);
+}
+
+static inline int folio_get_type(const struct folio *folio)
+{
+ return page_get_type(&folio->page);
+}
+
#define FOLIO_TYPE_OPS(lname, fname) \
static __always_inline bool folio_test_##fname(const struct folio *folio) \
{ \
diff --git a/mm/swap.c b/mm/swap.c
index fc8281ef4241..47bc1bb919cc 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -94,6 +94,19 @@ static void page_cache_release(struct folio *folio)
unlock_page_lruvec_irqrestore(lruvec, flags);
}
+static void free_typed_folio(struct folio *folio)
+{
+ switch (folio_get_type(folio)) {
+#ifdef CONFIG_HUGETLBFS
+ case PGTY_hugetlb:
+ free_huge_folio(folio);
+ return;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+
void __folio_put(struct folio *folio)
{
if (unlikely(folio_is_zone_device(folio))) {
@@ -101,8 +114,8 @@ void __folio_put(struct folio *folio)
return;
}
- if (folio_test_hugetlb(folio)) {
- free_huge_folio(folio);
+ if (unlikely(folio_has_type(folio))) {
+ free_typed_folio(folio);
return;
}
@@ -966,13 +979,13 @@ void folios_put_refs(struct folio_batch *folios, unsigned int *refs)
if (!folio_ref_sub_and_test(folio, nr_refs))
continue;
- /* hugetlb has its own memcg */
- if (folio_test_hugetlb(folio)) {
+ if (unlikely(folio_has_type(folio))) {
+ /* typed folios have their own memcg, if any */
if (lruvec) {
unlock_page_lruvec_irqrestore(lruvec, flags);
lruvec = NULL;
}
- free_huge_folio(folio);
+ free_typed_folio(folio);
continue;
}
folio_unqueue_deferred_split(folio);
--
2.48.1.711.g2feabab25a-goog
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 1/9] mm: Consolidate freeing of typed folios on final folio_put()
2025-03-03 17:10 ` [PATCH v5 1/9] mm: Consolidate freeing of typed folios on final folio_put() Fuad Tabba
@ 2025-03-04 8:45 ` Kirill A. Shutemov
2025-03-04 9:43 ` David Hildenbrand
0 siblings, 1 reply; 31+ messages in thread
From: Kirill A. Shutemov @ 2025-03-04 8:45 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
david, michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx
On Mon, Mar 03, 2025 at 05:10:05PM +0000, Fuad Tabba wrote:
> +static inline int page_get_type(const struct page *page)
> +{
> + return page->page_type >> 24;
This magic number in page_type code asks for a #define.
--
Kiryl Shutsemau / Kirill A. Shutemov
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 1/9] mm: Consolidate freeing of typed folios on final folio_put()
2025-03-04 8:45 ` Kirill A. Shutemov
@ 2025-03-04 9:43 ` David Hildenbrand
0 siblings, 0 replies; 31+ messages in thread
From: David Hildenbrand @ 2025-03-04 9:43 UTC (permalink / raw)
To: Kirill A. Shutemov, Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx
On 04.03.25 09:45, Kirill A. Shutemov wrote:
> On Mon, Mar 03, 2025 at 05:10:05PM +0000, Fuad Tabba wrote:
>> +static inline int page_get_type(const struct page *page)
>> +{
>> + return page->page_type >> 24;
>
> This magic number in page_type code asks for a #define.
As Willy is about to rewrite most of that either way (once the page type
is stored elsewhere), I'm not sure if that define is really valuable at
this point.
--
Cheers,
David / dhildenb
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v5 2/9] KVM: guest_memfd: Handle final folio_put() of guest_memfd pages
2025-03-03 17:10 [PATCH v5 0/9] KVM: Mapping guest_memfd backed memory at the host for software protected VMs Fuad Tabba
2025-03-03 17:10 ` [PATCH v5 1/9] mm: Consolidate freeing of typed folios on final folio_put() Fuad Tabba
@ 2025-03-03 17:10 ` Fuad Tabba
2025-03-07 17:04 ` Ackerley Tng
2025-03-03 17:10 ` [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages Fuad Tabba
` (6 subsequent siblings)
8 siblings, 1 reply; 31+ messages in thread
From: Fuad Tabba @ 2025-03-03 17:10 UTC (permalink / raw)
To: kvm, linux-arm-msm, linux-mm
Cc: pbonzini, chenhuacai, mpe, anup, paul.walmsley, palmer, aou,
seanjc, viro, brauner, willy, akpm, xiaoyao.li, yilun.xu,
chao.p.peng, jarkko, amoorthy, dmatlack, isaku.yamahata, mic,
vbabka, vannapurve, ackerleytng, mail, david, michael.roth,
wei.w.wang, liam.merwick, isaku.yamahata, kirill.shutemov,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx, tabba
Before transitioning a guest_memfd folio to unshared, thereby
disallowing access by the host and allowing the hypervisor to
transition its view of the guest page as private, we need to be
sure that the host doesn't have any references to the folio.
This patch introduces a new type for guest_memfd folios, which
isn't activated in this series but is here as a placeholder and
to facilitate the code in the subsequent patch series. This will
be used in the future to register a callback that informs the
guest_memfd subsystem when the last reference is dropped,
therefore knowing that the host doesn't have any remaining
references.
This patch also introduces the configuration option,
KVM_GMEM_SHARED_MEM, which toggles support for mapping
guest_memfd shared memory at the host.
Signed-off-by: Fuad Tabba <tabba@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand <david@redhat.com>
---
include/linux/kvm_host.h | 7 +++++++
include/linux/page-flags.h | 16 ++++++++++++++++
mm/debug.c | 1 +
mm/swap.c | 9 +++++++++
virt/kvm/Kconfig | 5 +++++
5 files changed, 38 insertions(+)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f34f4cfaa513..7788e3625f6d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2571,4 +2571,11 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
struct kvm_pre_fault_memory *range);
#endif
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+static inline void kvm_gmem_handle_folio_put(struct folio *folio)
+{
+ WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
+}
+#endif
+
#endif
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6dc2494bd002..daeee9a38e4c 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -933,6 +933,7 @@ enum pagetype {
PGTY_slab = 0xf5,
PGTY_zsmalloc = 0xf6,
PGTY_unaccepted = 0xf7,
+ PGTY_guestmem = 0xf8,
PGTY_mapcount_underflow = 0xff
};
@@ -1082,6 +1083,21 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
FOLIO_TEST_FLAG_FALSE(hugetlb)
#endif
+/*
+ * guestmem folios are used to back VM memory as managed by guest_memfd. Once
+ * the last reference is put, instead of freeing these folios back to the page
+ * allocator, they are returned to guest_memfd.
+ *
+ * For now, guestmem will only be set on these folios as long as they cannot be
+ * mapped to user space ("private state"), with the plan of always setting that
+ * type once typed folios can be mapped to user space cleanly.
+ */
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+FOLIO_TYPE_OPS(guestmem, guestmem)
+#else
+FOLIO_TEST_FLAG_FALSE(guestmem)
+#endif
+
PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
/*
diff --git a/mm/debug.c b/mm/debug.c
index 8d2acf432385..08bc42c6cba8 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -56,6 +56,7 @@ static const char *page_type_names[] = {
DEF_PAGETYPE_NAME(table),
DEF_PAGETYPE_NAME(buddy),
DEF_PAGETYPE_NAME(unaccepted),
+ DEF_PAGETYPE_NAME(guestmem),
};
static const char *page_type_name(unsigned int page_type)
diff --git a/mm/swap.c b/mm/swap.c
index 47bc1bb919cc..241880a46358 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -38,6 +38,10 @@
#include <linux/local_lock.h>
#include <linux/buffer_head.h>
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+#include <linux/kvm_host.h>
+#endif
+
#include "internal.h"
#define CREATE_TRACE_POINTS
@@ -101,6 +105,11 @@ static void free_typed_folio(struct folio *folio)
case PGTY_hugetlb:
free_huge_folio(folio);
return;
+#endif
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+ case PGTY_guestmem:
+ kvm_gmem_handle_folio_put(folio);
+ return;
#endif
default:
WARN_ON_ONCE(1);
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 54e959e7d68f..37f7734cb10f 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -124,3 +124,8 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
config HAVE_KVM_ARCH_GMEM_INVALIDATE
bool
depends on KVM_PRIVATE_MEM
+
+config KVM_GMEM_SHARED_MEM
+ select KVM_PRIVATE_MEM
+ depends on !KVM_GENERIC_MEMORY_ATTRIBUTES
+ bool
--
2.48.1.711.g2feabab25a-goog
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 2/9] KVM: guest_memfd: Handle final folio_put() of guest_memfd pages
2025-03-03 17:10 ` [PATCH v5 2/9] KVM: guest_memfd: Handle final folio_put() of guest_memfd pages Fuad Tabba
@ 2025-03-07 17:04 ` Ackerley Tng
2025-03-10 10:50 ` Fuad Tabba
0 siblings, 1 reply; 31+ messages in thread
From: Ackerley Tng @ 2025-03-07 17:04 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx, tabba
Fuad Tabba <tabba@google.com> writes:
> Before transitioning a guest_memfd folio to unshared, thereby
> disallowing access by the host and allowing the hypervisor to
> transition its view of the guest page as private, we need to be
> sure that the host doesn't have any references to the folio.
>
> This patch introduces a new type for guest_memfd folios, which
> isn't activated in this series but is here as a placeholder and
> to facilitate the code in the subsequent patch series. This will
> be used in the future to register a callback that informs the
> guest_memfd subsystem when the last reference is dropped,
> therefore knowing that the host doesn't have any remaining
> references.
>
> This patch also introduces the configuration option,
> KVM_GMEM_SHARED_MEM, which toggles support for mapping
> guest_memfd shared memory at the host.
>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> Acked-by: Vlastimil Babka <vbabka@suse.cz>
> Acked-by: David Hildenbrand <david@redhat.com>
> ---
> include/linux/kvm_host.h | 7 +++++++
> include/linux/page-flags.h | 16 ++++++++++++++++
> mm/debug.c | 1 +
> mm/swap.c | 9 +++++++++
> virt/kvm/Kconfig | 5 +++++
> 5 files changed, 38 insertions(+)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index f34f4cfaa513..7788e3625f6d 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2571,4 +2571,11 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
> struct kvm_pre_fault_memory *range);
> #endif
>
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
> +{
> + WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> +}
> +#endif
> +
> #endif
Following up with the discussion at the guest_memfd biweekly call on the
guestmem library, I think this folio_put() handler for guest_memfd could
be the first function that's refactored out into (placeholder name)
mm/guestmem.c.
This folio_put() handler has to stay in memory even after KVM (as a
module) is unloaded from memory, and so it is a good candidate for the
first function in the guestmem library.
Along those lines, CONFIG_KVM_GMEM_SHARED_MEM in this patch can be
renamed CONFIG_GUESTMEM, and CONFIG_GUESTMEM will guard the existence of
PGTY_guestmem.
CONFIG_KVM_GMEM_SHARED_MEM can be introduced in the next patch of this
series, which could, in Kconfig, select CONFIG_GUESTMEM.
> diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> index 6dc2494bd002..daeee9a38e4c 100644
> --- a/include/linux/page-flags.h
> +++ b/include/linux/page-flags.h
> @@ -933,6 +933,7 @@ enum pagetype {
> PGTY_slab = 0xf5,
> PGTY_zsmalloc = 0xf6,
> PGTY_unaccepted = 0xf7,
> + PGTY_guestmem = 0xf8,
>
> PGTY_mapcount_underflow = 0xff
> };
> @@ -1082,6 +1083,21 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
> FOLIO_TEST_FLAG_FALSE(hugetlb)
> #endif
>
> +/*
> + * guestmem folios are used to back VM memory as managed by guest_memfd. Once
> + * the last reference is put, instead of freeing these folios back to the page
> + * allocator, they are returned to guest_memfd.
> + *
> + * For now, guestmem will only be set on these folios as long as they cannot be
> + * mapped to user space ("private state"), with the plan of always setting that
> + * type once typed folios can be mapped to user space cleanly.
> + */
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +FOLIO_TYPE_OPS(guestmem, guestmem)
> +#else
> +FOLIO_TEST_FLAG_FALSE(guestmem)
> +#endif
> +
> PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
>
> /*
> diff --git a/mm/debug.c b/mm/debug.c
> index 8d2acf432385..08bc42c6cba8 100644
> --- a/mm/debug.c
> +++ b/mm/debug.c
> @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
> DEF_PAGETYPE_NAME(table),
> DEF_PAGETYPE_NAME(buddy),
> DEF_PAGETYPE_NAME(unaccepted),
> + DEF_PAGETYPE_NAME(guestmem),
> };
>
> static const char *page_type_name(unsigned int page_type)
> diff --git a/mm/swap.c b/mm/swap.c
> index 47bc1bb919cc..241880a46358 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -38,6 +38,10 @@
> #include <linux/local_lock.h>
> #include <linux/buffer_head.h>
>
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +#include <linux/kvm_host.h>
> +#endif
> +
> #include "internal.h"
>
> #define CREATE_TRACE_POINTS
> @@ -101,6 +105,11 @@ static void free_typed_folio(struct folio *folio)
> case PGTY_hugetlb:
> free_huge_folio(folio);
> return;
> +#endif
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> + case PGTY_guestmem:
> + kvm_gmem_handle_folio_put(folio);
> + return;
> #endif
> default:
> WARN_ON_ONCE(1);
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 54e959e7d68f..37f7734cb10f 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -124,3 +124,8 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
> config HAVE_KVM_ARCH_GMEM_INVALIDATE
> bool
> depends on KVM_PRIVATE_MEM
> +
> +config KVM_GMEM_SHARED_MEM
> + select KVM_PRIVATE_MEM
> + depends on !KVM_GENERIC_MEMORY_ATTRIBUTES
Enforcing that KVM_GENERIC_MEMORY_ATTRIBUTES is not selected should not
be a strict requirement. Fuad explained in an offline chat that this is
just temporary.
If we have CONFIG_GUESTMEM, then this question is moot, I think
CONFIG_GUESTMEM would just be independent of everything else; other
configs would depend on CONFIG_GUESTMEM.
> + bool
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 2/9] KVM: guest_memfd: Handle final folio_put() of guest_memfd pages
2025-03-07 17:04 ` Ackerley Tng
@ 2025-03-10 10:50 ` Fuad Tabba
2025-03-10 14:23 ` Ackerley Tng
0 siblings, 1 reply; 31+ messages in thread
From: Fuad Tabba @ 2025-03-10 10:50 UTC (permalink / raw)
To: Ackerley Tng
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx
Hi Ackerley,
On Fri, 7 Mar 2025 at 17:04, Ackerley Tng <ackerleytng@google.com> wrote:
>
> Fuad Tabba <tabba@google.com> writes:
>
> > Before transitioning a guest_memfd folio to unshared, thereby
> > disallowing access by the host and allowing the hypervisor to
> > transition its view of the guest page as private, we need to be
> > sure that the host doesn't have any references to the folio.
> >
> > This patch introduces a new type for guest_memfd folios, which
> > isn't activated in this series but is here as a placeholder and
> > to facilitate the code in the subsequent patch series. This will
> > be used in the future to register a callback that informs the
> > guest_memfd subsystem when the last reference is dropped,
> > therefore knowing that the host doesn't have any remaining
> > references.
> >
> > This patch also introduces the configuration option,
> > KVM_GMEM_SHARED_MEM, which toggles support for mapping
> > guest_memfd shared memory at the host.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > Acked-by: Vlastimil Babka <vbabka@suse.cz>
> > Acked-by: David Hildenbrand <david@redhat.com>
> > ---
> > include/linux/kvm_host.h | 7 +++++++
> > include/linux/page-flags.h | 16 ++++++++++++++++
> > mm/debug.c | 1 +
> > mm/swap.c | 9 +++++++++
> > virt/kvm/Kconfig | 5 +++++
> > 5 files changed, 38 insertions(+)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index f34f4cfaa513..7788e3625f6d 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -2571,4 +2571,11 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
> > struct kvm_pre_fault_memory *range);
> > #endif
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
> > +{
> > + WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> > +}
> > +#endif
> > +
> > #endif
>
> Following up with the discussion at the guest_memfd biweekly call on the
> guestmem library, I think this folio_put() handler for guest_memfd could
> be the first function that's refactored out into (placeholder name)
> mm/guestmem.c.
>
> This folio_put() handler has to stay in memory even after KVM (as a
> module) is unloaded from memory, and so it is a good candidate for the
> first function in the guestmem library.
>
> Along those lines, CONFIG_KVM_GMEM_SHARED_MEM in this patch can be
> renamed CONFIG_GUESTMEM, and CONFIG_GUESTMEM will guard the existence of
> PGTY_guestmem.
>
> CONFIG_KVM_GMEM_SHARED_MEM can be introduced in the next patch of this
> series, which could, in Kconfig, select CONFIG_GUESTMEM.
>
> > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> > index 6dc2494bd002..daeee9a38e4c 100644
> > --- a/include/linux/page-flags.h
> > +++ b/include/linux/page-flags.h
> > @@ -933,6 +933,7 @@ enum pagetype {
> > PGTY_slab = 0xf5,
> > PGTY_zsmalloc = 0xf6,
> > PGTY_unaccepted = 0xf7,
> > + PGTY_guestmem = 0xf8,
> >
> > PGTY_mapcount_underflow = 0xff
> > };
> > @@ -1082,6 +1083,21 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
> > FOLIO_TEST_FLAG_FALSE(hugetlb)
> > #endif
> >
> > +/*
> > + * guestmem folios are used to back VM memory as managed by guest_memfd. Once
> > + * the last reference is put, instead of freeing these folios back to the page
> > + * allocator, they are returned to guest_memfd.
> > + *
> > + * For now, guestmem will only be set on these folios as long as they cannot be
> > + * mapped to user space ("private state"), with the plan of always setting that
> > + * type once typed folios can be mapped to user space cleanly.
> > + */
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +FOLIO_TYPE_OPS(guestmem, guestmem)
> > +#else
> > +FOLIO_TEST_FLAG_FALSE(guestmem)
> > +#endif
> > +
> > PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
> >
> > /*
> > diff --git a/mm/debug.c b/mm/debug.c
> > index 8d2acf432385..08bc42c6cba8 100644
> > --- a/mm/debug.c
> > +++ b/mm/debug.c
> > @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
> > DEF_PAGETYPE_NAME(table),
> > DEF_PAGETYPE_NAME(buddy),
> > DEF_PAGETYPE_NAME(unaccepted),
> > + DEF_PAGETYPE_NAME(guestmem),
> > };
> >
> > static const char *page_type_name(unsigned int page_type)
> > diff --git a/mm/swap.c b/mm/swap.c
> > index 47bc1bb919cc..241880a46358 100644
> > --- a/mm/swap.c
> > +++ b/mm/swap.c
> > @@ -38,6 +38,10 @@
> > #include <linux/local_lock.h>
> > #include <linux/buffer_head.h>
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +#include <linux/kvm_host.h>
> > +#endif
> > +
> > #include "internal.h"
> >
> > #define CREATE_TRACE_POINTS
> > @@ -101,6 +105,11 @@ static void free_typed_folio(struct folio *folio)
> > case PGTY_hugetlb:
> > free_huge_folio(folio);
> > return;
> > +#endif
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > + case PGTY_guestmem:
> > + kvm_gmem_handle_folio_put(folio);
> > + return;
> > #endif
> > default:
> > WARN_ON_ONCE(1);
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 54e959e7d68f..37f7734cb10f 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -124,3 +124,8 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
> > config HAVE_KVM_ARCH_GMEM_INVALIDATE
> > bool
> > depends on KVM_PRIVATE_MEM
> > +
> > +config KVM_GMEM_SHARED_MEM
> > + select KVM_PRIVATE_MEM
> > + depends on !KVM_GENERIC_MEMORY_ATTRIBUTES
>
> Enforcing that KVM_GENERIC_MEMORY_ATTRIBUTES is not selected should not
> be a strict requirement. Fuad explained in an offline chat that this is
> just temporary.
>
> If we have CONFIG_GUESTMEM, then this question is moot, I think
> CONFIG_GUESTMEM would just be independent of everything else; other
> configs would depend on CONFIG_GUESTMEM.
There are two things here. First of all, the unfortunate naming
situation where PRIVATE could mean GUESTMEM, or private could mean not
shared. I plan to tackle this aspect (i.e., the naming) in a separate
patch series, since that will surely generate a lot of debate :)
The other part is that, with shared memory in-place, the memory
attributes are an orthogonal matter. The attributes are the userpace's
view of what it expects the state of the memory to be, and are used to
multiplex whether the memory being accessed is guest_memfd or the
regular (i.e., most likely anonymous) memory used normally by KVM.
This behavior however would be architecture, or even vm-type specific.
Cheers,
/fuad
> > + bool
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 2/9] KVM: guest_memfd: Handle final folio_put() of guest_memfd pages
2025-03-10 10:50 ` Fuad Tabba
@ 2025-03-10 14:23 ` Ackerley Tng
2025-03-10 14:35 ` Fuad Tabba
0 siblings, 1 reply; 31+ messages in thread
From: Ackerley Tng @ 2025-03-10 14:23 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx
Fuad Tabba <tabba@google.com> writes:
> Hi Ackerley,
>
> On Fri, 7 Mar 2025 at 17:04, Ackerley Tng <ackerleytng@google.com> wrote:
>>
>> Fuad Tabba <tabba@google.com> writes:
>>
>> > Before transitioning a guest_memfd folio to unshared, thereby
>> > disallowing access by the host and allowing the hypervisor to
>> > transition its view of the guest page as private, we need to be
>> > sure that the host doesn't have any references to the folio.
>> >
>> > This patch introduces a new type for guest_memfd folios, which
>> > isn't activated in this series but is here as a placeholder and
>> > to facilitate the code in the subsequent patch series. This will
>> > be used in the future to register a callback that informs the
>> > guest_memfd subsystem when the last reference is dropped,
>> > therefore knowing that the host doesn't have any remaining
>> > references.
>> >
>> > This patch also introduces the configuration option,
>> > KVM_GMEM_SHARED_MEM, which toggles support for mapping
>> > guest_memfd shared memory at the host.
>> >
>> > Signed-off-by: Fuad Tabba <tabba@google.com>
>> > Acked-by: Vlastimil Babka <vbabka@suse.cz>
>> > Acked-by: David Hildenbrand <david@redhat.com>
>> > ---
>> > include/linux/kvm_host.h | 7 +++++++
>> > include/linux/page-flags.h | 16 ++++++++++++++++
>> > mm/debug.c | 1 +
>> > mm/swap.c | 9 +++++++++
>> > virt/kvm/Kconfig | 5 +++++
>> > 5 files changed, 38 insertions(+)
>> >
>> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> > index f34f4cfaa513..7788e3625f6d 100644
>> > --- a/include/linux/kvm_host.h
>> > +++ b/include/linux/kvm_host.h
>> > @@ -2571,4 +2571,11 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
>> > struct kvm_pre_fault_memory *range);
>> > #endif
>> >
>> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
>> > +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
>> > +{
>> > + WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
>> > +}
>> > +#endif
>> > +
>> > #endif
>>
>> Following up with the discussion at the guest_memfd biweekly call on the
>> guestmem library, I think this folio_put() handler for guest_memfd could
>> be the first function that's refactored out into (placeholder name)
>> mm/guestmem.c.
>>
>> This folio_put() handler has to stay in memory even after KVM (as a
>> module) is unloaded from memory, and so it is a good candidate for the
>> first function in the guestmem library.
>>
>> Along those lines, CONFIG_KVM_GMEM_SHARED_MEM in this patch can be
>> renamed CONFIG_GUESTMEM, and CONFIG_GUESTMEM will guard the existence of
>> PGTY_guestmem.
>>
>> CONFIG_KVM_GMEM_SHARED_MEM can be introduced in the next patch of this
>> series, which could, in Kconfig, select CONFIG_GUESTMEM.
>>
>> > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
>> > index 6dc2494bd002..daeee9a38e4c 100644
>> > --- a/include/linux/page-flags.h
>> > +++ b/include/linux/page-flags.h
>> > @@ -933,6 +933,7 @@ enum pagetype {
>> > PGTY_slab = 0xf5,
>> > PGTY_zsmalloc = 0xf6,
>> > PGTY_unaccepted = 0xf7,
>> > + PGTY_guestmem = 0xf8,
>> >
>> > PGTY_mapcount_underflow = 0xff
>> > };
>> > @@ -1082,6 +1083,21 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
>> > FOLIO_TEST_FLAG_FALSE(hugetlb)
>> > #endif
>> >
>> > +/*
>> > + * guestmem folios are used to back VM memory as managed by guest_memfd. Once
>> > + * the last reference is put, instead of freeing these folios back to the page
>> > + * allocator, they are returned to guest_memfd.
>> > + *
>> > + * For now, guestmem will only be set on these folios as long as they cannot be
>> > + * mapped to user space ("private state"), with the plan of always setting that
>> > + * type once typed folios can be mapped to user space cleanly.
>> > + */
>> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
>> > +FOLIO_TYPE_OPS(guestmem, guestmem)
>> > +#else
>> > +FOLIO_TEST_FLAG_FALSE(guestmem)
>> > +#endif
>> > +
>> > PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
>> >
>> > /*
>> > diff --git a/mm/debug.c b/mm/debug.c
>> > index 8d2acf432385..08bc42c6cba8 100644
>> > --- a/mm/debug.c
>> > +++ b/mm/debug.c
>> > @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
>> > DEF_PAGETYPE_NAME(table),
>> > DEF_PAGETYPE_NAME(buddy),
>> > DEF_PAGETYPE_NAME(unaccepted),
>> > + DEF_PAGETYPE_NAME(guestmem),
>> > };
>> >
>> > static const char *page_type_name(unsigned int page_type)
>> > diff --git a/mm/swap.c b/mm/swap.c
>> > index 47bc1bb919cc..241880a46358 100644
>> > --- a/mm/swap.c
>> > +++ b/mm/swap.c
>> > @@ -38,6 +38,10 @@
>> > #include <linux/local_lock.h>
>> > #include <linux/buffer_head.h>
>> >
>> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
>> > +#include <linux/kvm_host.h>
>> > +#endif
>> > +
>> > #include "internal.h"
>> >
>> > #define CREATE_TRACE_POINTS
>> > @@ -101,6 +105,11 @@ static void free_typed_folio(struct folio *folio)
>> > case PGTY_hugetlb:
>> > free_huge_folio(folio);
>> > return;
>> > +#endif
>> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
>> > + case PGTY_guestmem:
>> > + kvm_gmem_handle_folio_put(folio);
>> > + return;
>> > #endif
>> > default:
>> > WARN_ON_ONCE(1);
>> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
>> > index 54e959e7d68f..37f7734cb10f 100644
>> > --- a/virt/kvm/Kconfig
>> > +++ b/virt/kvm/Kconfig
>> > @@ -124,3 +124,8 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
>> > config HAVE_KVM_ARCH_GMEM_INVALIDATE
>> > bool
>> > depends on KVM_PRIVATE_MEM
>> > +
>> > +config KVM_GMEM_SHARED_MEM
>> > + select KVM_PRIVATE_MEM
>> > + depends on !KVM_GENERIC_MEMORY_ATTRIBUTES
>>
>> Enforcing that KVM_GENERIC_MEMORY_ATTRIBUTES is not selected should not
>> be a strict requirement. Fuad explained in an offline chat that this is
>> just temporary.
>>
>> If we have CONFIG_GUESTMEM, then this question is moot, I think
>> CONFIG_GUESTMEM would just be independent of everything else; other
>> configs would depend on CONFIG_GUESTMEM.
>
> There are two things here. First of all, the unfortunate naming
> situation where PRIVATE could mean GUESTMEM, or private could mean not
> shared. I plan to tackle this aspect (i.e., the naming) in a separate
> patch series, since that will surely generate a lot of debate :)
>
Oops. By "depend on CONFIG_GUESTMEM" I meant "depend on the introduction
of the guestmem shim". I think this is a good time to introduce the shim
because the folio_put() callback needs to be in mm and not just in KVM,
which is a loadable module and hence can be removed from memory.
If we do introduce the shim, the config flag CONFIG_KVM_GMEM_SHARED_MEM
will be replaced by CONFIG_GUESTMEM (or other name), and then the
question on depending on !KVM_GENERIC_MEMORY_ATTRIBUTES will be moot
since I think an mm config flag wouldn't place a constraint on a module
config flag?
When I wrote this, I thought that config flags are easily renamed since
they're an interface and are user-facing, but I realized config flag
renaming seems to be easily renamed based on this search [1].
If we're going with renaming in a separate patch series, some mechanism
should be introduced here to handle the case where
1. Kernel (and KVM module) is compiled with KVM_GMEM_SHARED_MEM set
2. KVM is unloaded
3. folio_put() tries to call kvm_gmem_handle_folio_put()
> The other part is that, with shared memory in-place, the memory
> attributes are an orthogonal matter. The attributes are the userpace's
> view of what it expects the state of the memory to be, and are used to
> multiplex whether the memory being accessed is guest_memfd or the
> regular (i.e., most likely anonymous) memory used normally by KVM.
>
> This behavior however would be architecture, or even vm-type specific.
>
I agree it is orthogonal but I'm calling this out because "depends on
!KVM_GENERIC_MEMORY_ATTRIBUTES" means if I set
CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES, I can't use PGTY_guestmem since
CONFIG_KVM_GMEM_SHARED_MEM would get unset.
I was trying to test this with a KVM_X86_SW_PROTECTED_VM, setting up for
using the ioctl to convert memory and hit this issue.
> Cheers,
> /fuad
>
>> > + bool
[1] https://lore.kernel.org/all/?q=s%3Arename+dfn%3AKconfig+merged
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 2/9] KVM: guest_memfd: Handle final folio_put() of guest_memfd pages
2025-03-10 14:23 ` Ackerley Tng
@ 2025-03-10 14:35 ` Fuad Tabba
0 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-10 14:35 UTC (permalink / raw)
To: Ackerley Tng
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx
Hi Ackerley,
On Mon, 10 Mar 2025 at 14:23, Ackerley Tng <ackerleytng@google.com> wrote:
>
> Fuad Tabba <tabba@google.com> writes:
>
> > Hi Ackerley,
> >
> > On Fri, 7 Mar 2025 at 17:04, Ackerley Tng <ackerleytng@google.com> wrote:
> >>
> >> Fuad Tabba <tabba@google.com> writes:
> >>
> >> > Before transitioning a guest_memfd folio to unshared, thereby
> >> > disallowing access by the host and allowing the hypervisor to
> >> > transition its view of the guest page as private, we need to be
> >> > sure that the host doesn't have any references to the folio.
> >> >
> >> > This patch introduces a new type for guest_memfd folios, which
> >> > isn't activated in this series but is here as a placeholder and
> >> > to facilitate the code in the subsequent patch series. This will
> >> > be used in the future to register a callback that informs the
> >> > guest_memfd subsystem when the last reference is dropped,
> >> > therefore knowing that the host doesn't have any remaining
> >> > references.
> >> >
> >> > This patch also introduces the configuration option,
> >> > KVM_GMEM_SHARED_MEM, which toggles support for mapping
> >> > guest_memfd shared memory at the host.
> >> >
> >> > Signed-off-by: Fuad Tabba <tabba@google.com>
> >> > Acked-by: Vlastimil Babka <vbabka@suse.cz>
> >> > Acked-by: David Hildenbrand <david@redhat.com>
> >> > ---
> >> > include/linux/kvm_host.h | 7 +++++++
> >> > include/linux/page-flags.h | 16 ++++++++++++++++
> >> > mm/debug.c | 1 +
> >> > mm/swap.c | 9 +++++++++
> >> > virt/kvm/Kconfig | 5 +++++
> >> > 5 files changed, 38 insertions(+)
> >> >
> >> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> >> > index f34f4cfaa513..7788e3625f6d 100644
> >> > --- a/include/linux/kvm_host.h
> >> > +++ b/include/linux/kvm_host.h
> >> > @@ -2571,4 +2571,11 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu,
> >> > struct kvm_pre_fault_memory *range);
> >> > #endif
> >> >
> >> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> >> > +static inline void kvm_gmem_handle_folio_put(struct folio *folio)
> >> > +{
> >> > + WARN_ONCE(1, "A placeholder that shouldn't trigger. Work in progress.");
> >> > +}
> >> > +#endif
> >> > +
> >> > #endif
> >>
> >> Following up with the discussion at the guest_memfd biweekly call on the
> >> guestmem library, I think this folio_put() handler for guest_memfd could
> >> be the first function that's refactored out into (placeholder name)
> >> mm/guestmem.c.
> >>
> >> This folio_put() handler has to stay in memory even after KVM (as a
> >> module) is unloaded from memory, and so it is a good candidate for the
> >> first function in the guestmem library.
> >>
> >> Along those lines, CONFIG_KVM_GMEM_SHARED_MEM in this patch can be
> >> renamed CONFIG_GUESTMEM, and CONFIG_GUESTMEM will guard the existence of
> >> PGTY_guestmem.
> >>
> >> CONFIG_KVM_GMEM_SHARED_MEM can be introduced in the next patch of this
> >> series, which could, in Kconfig, select CONFIG_GUESTMEM.
> >>
> >> > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
> >> > index 6dc2494bd002..daeee9a38e4c 100644
> >> > --- a/include/linux/page-flags.h
> >> > +++ b/include/linux/page-flags.h
> >> > @@ -933,6 +933,7 @@ enum pagetype {
> >> > PGTY_slab = 0xf5,
> >> > PGTY_zsmalloc = 0xf6,
> >> > PGTY_unaccepted = 0xf7,
> >> > + PGTY_guestmem = 0xf8,
> >> >
> >> > PGTY_mapcount_underflow = 0xff
> >> > };
> >> > @@ -1082,6 +1083,21 @@ FOLIO_TYPE_OPS(hugetlb, hugetlb)
> >> > FOLIO_TEST_FLAG_FALSE(hugetlb)
> >> > #endif
> >> >
> >> > +/*
> >> > + * guestmem folios are used to back VM memory as managed by guest_memfd. Once
> >> > + * the last reference is put, instead of freeing these folios back to the page
> >> > + * allocator, they are returned to guest_memfd.
> >> > + *
> >> > + * For now, guestmem will only be set on these folios as long as they cannot be
> >> > + * mapped to user space ("private state"), with the plan of always setting that
> >> > + * type once typed folios can be mapped to user space cleanly.
> >> > + */
> >> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> >> > +FOLIO_TYPE_OPS(guestmem, guestmem)
> >> > +#else
> >> > +FOLIO_TEST_FLAG_FALSE(guestmem)
> >> > +#endif
> >> > +
> >> > PAGE_TYPE_OPS(Zsmalloc, zsmalloc, zsmalloc)
> >> >
> >> > /*
> >> > diff --git a/mm/debug.c b/mm/debug.c
> >> > index 8d2acf432385..08bc42c6cba8 100644
> >> > --- a/mm/debug.c
> >> > +++ b/mm/debug.c
> >> > @@ -56,6 +56,7 @@ static const char *page_type_names[] = {
> >> > DEF_PAGETYPE_NAME(table),
> >> > DEF_PAGETYPE_NAME(buddy),
> >> > DEF_PAGETYPE_NAME(unaccepted),
> >> > + DEF_PAGETYPE_NAME(guestmem),
> >> > };
> >> >
> >> > static const char *page_type_name(unsigned int page_type)
> >> > diff --git a/mm/swap.c b/mm/swap.c
> >> > index 47bc1bb919cc..241880a46358 100644
> >> > --- a/mm/swap.c
> >> > +++ b/mm/swap.c
> >> > @@ -38,6 +38,10 @@
> >> > #include <linux/local_lock.h>
> >> > #include <linux/buffer_head.h>
> >> >
> >> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> >> > +#include <linux/kvm_host.h>
> >> > +#endif
> >> > +
> >> > #include "internal.h"
> >> >
> >> > #define CREATE_TRACE_POINTS
> >> > @@ -101,6 +105,11 @@ static void free_typed_folio(struct folio *folio)
> >> > case PGTY_hugetlb:
> >> > free_huge_folio(folio);
> >> > return;
> >> > +#endif
> >> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> >> > + case PGTY_guestmem:
> >> > + kvm_gmem_handle_folio_put(folio);
> >> > + return;
> >> > #endif
> >> > default:
> >> > WARN_ON_ONCE(1);
> >> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> >> > index 54e959e7d68f..37f7734cb10f 100644
> >> > --- a/virt/kvm/Kconfig
> >> > +++ b/virt/kvm/Kconfig
> >> > @@ -124,3 +124,8 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
> >> > config HAVE_KVM_ARCH_GMEM_INVALIDATE
> >> > bool
> >> > depends on KVM_PRIVATE_MEM
> >> > +
> >> > +config KVM_GMEM_SHARED_MEM
> >> > + select KVM_PRIVATE_MEM
> >> > + depends on !KVM_GENERIC_MEMORY_ATTRIBUTES
> >>
> >> Enforcing that KVM_GENERIC_MEMORY_ATTRIBUTES is not selected should not
> >> be a strict requirement. Fuad explained in an offline chat that this is
> >> just temporary.
> >>
> >> If we have CONFIG_GUESTMEM, then this question is moot, I think
> >> CONFIG_GUESTMEM would just be independent of everything else; other
> >> configs would depend on CONFIG_GUESTMEM.
> >
> > There are two things here. First of all, the unfortunate naming
> > situation where PRIVATE could mean GUESTMEM, or private could mean not
> > shared. I plan to tackle this aspect (i.e., the naming) in a separate
> > patch series, since that will surely generate a lot of debate :)
> >
>
> Oops. By "depend on CONFIG_GUESTMEM" I meant "depend on the introduction
> of the guestmem shim". I think this is a good time to introduce the shim
> because the folio_put() callback needs to be in mm and not just in KVM,
> which is a loadable module and hence can be removed from memory.
>
> If we do introduce the shim, the config flag CONFIG_KVM_GMEM_SHARED_MEM
> will be replaced by CONFIG_GUESTMEM (or other name), and then the
> question on depending on !KVM_GENERIC_MEMORY_ATTRIBUTES will be moot
> since I think an mm config flag wouldn't place a constraint on a module
> config flag?
I see.
> When I wrote this, I thought that config flags are easily renamed since
> they're an interface and are user-facing, but I realized config flag
> renaming seems to be easily renamed based on this search [1].
>
> If we're going with renaming in a separate patch series, some mechanism
> should be introduced here to handle the case where
I'm not talking about renaming the contents of _this_ patch series. I
was referring to existing tems such as, CONFIG_KVM_PRIVATE_MEM, which
really enabled guestmemfd.
> 1. Kernel (and KVM module) is compiled with KVM_GMEM_SHARED_MEM set
> 2. KVM is unloaded
> 3. folio_put() tries to call kvm_gmem_handle_folio_put()
>
> > The other part is that, with shared memory in-place, the memory
> > attributes are an orthogonal matter. The attributes are the userpace's
> > view of what it expects the state of the memory to be, and are used to
> > multiplex whether the memory being accessed is guest_memfd or the
> > regular (i.e., most likely anonymous) memory used normally by KVM.
> >
> > This behavior however would be architecture, or even vm-type specific.
> >
>
> I agree it is orthogonal but I'm calling this out because "depends on
> !KVM_GENERIC_MEMORY_ATTRIBUTES" means if I set
> CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES, I can't use PGTY_guestmem since
> CONFIG_KVM_GMEM_SHARED_MEM would get unset.
I'll remove this dependence in the respin.
Thanks,
/fiad
> I was trying to test this with a KVM_X86_SW_PROTECTED_VM, setting up for
> using the ioctl to convert memory and hit this issue.
>
> > Cheers,
> > /fuad
> >
> >> > + bool
>
> [1] https://lore.kernel.org/all/?q=s%3Arename+dfn%3AKconfig+merged
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-03 17:10 [PATCH v5 0/9] KVM: Mapping guest_memfd backed memory at the host for software protected VMs Fuad Tabba
2025-03-03 17:10 ` [PATCH v5 1/9] mm: Consolidate freeing of typed folios on final folio_put() Fuad Tabba
2025-03-03 17:10 ` [PATCH v5 2/9] KVM: guest_memfd: Handle final folio_put() of guest_memfd pages Fuad Tabba
@ 2025-03-03 17:10 ` Fuad Tabba
2025-03-04 8:58 ` Kirill A. Shutemov
` (3 more replies)
2025-03-03 17:10 ` [PATCH v5 4/9] KVM: guest_memfd: Handle in-place shared memory as guest_memfd backed memory Fuad Tabba
` (5 subsequent siblings)
8 siblings, 4 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-03 17:10 UTC (permalink / raw)
To: kvm, linux-arm-msm, linux-mm
Cc: pbonzini, chenhuacai, mpe, anup, paul.walmsley, palmer, aou,
seanjc, viro, brauner, willy, akpm, xiaoyao.li, yilun.xu,
chao.p.peng, jarkko, amoorthy, dmatlack, isaku.yamahata, mic,
vbabka, vannapurve, ackerleytng, mail, david, michael.roth,
wei.w.wang, liam.merwick, isaku.yamahata, kirill.shutemov,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx, tabba
Add support for mmap() and fault() for guest_memfd backed memory
in the host for VMs that support in-place conversion between
shared and private. To that end, this patch adds the ability to
check whether the VM type supports in-place conversion, and only
allows mapping its memory if that's the case.
Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
indicates that the VM supports shared memory in guest_memfd, or
that the host can create VMs that support shared memory.
Supporting shared memory implies that memory can be mapped when
shared with the host.
This is controlled by the KVM_GMEM_SHARED_MEM configuration
option.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
include/linux/kvm_host.h | 11 ++++
include/uapi/linux/kvm.h | 1 +
virt/kvm/guest_memfd.c | 105 +++++++++++++++++++++++++++++++++++++++
virt/kvm/kvm_main.c | 4 ++
4 files changed, 121 insertions(+)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7788e3625f6d..2d025b8ee20e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
}
#endif
+/*
+ * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
+ * private memory is enabled and it supports in-place shared/private conversion.
+ */
+#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
+static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
+{
+ return false;
+}
+#endif
+
#ifndef kvm_arch_has_readonly_mem
static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
{
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 45e6d8fca9b9..117937a895da 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -929,6 +929,7 @@ struct kvm_enable_cap {
#define KVM_CAP_PRE_FAULT_MEMORY 236
#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
#define KVM_CAP_X86_GUEST_MODE 238
+#define KVM_CAP_GMEM_SHARED_MEM 239
struct kvm_irq_routing_irqchip {
__u32 irqchip;
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index b2aa6bf24d3a..4291956b51ae 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -312,7 +312,112 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
return gfn - slot->base_gfn + slot->gmem.pgoff;
}
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
+{
+ struct kvm_gmem *gmem = file->private_data;
+
+ /* For now, VMs that support shared memory share all their memory. */
+ return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
+}
+
+static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+ struct folio *folio;
+ vm_fault_t ret = VM_FAULT_LOCKED;
+
+ filemap_invalidate_lock_shared(inode->i_mapping);
+
+ folio = kvm_gmem_get_folio(inode, vmf->pgoff);
+ if (IS_ERR(folio)) {
+ switch (PTR_ERR(folio)) {
+ case -EAGAIN:
+ ret = VM_FAULT_RETRY;
+ break;
+ case -ENOMEM:
+ ret = VM_FAULT_OOM;
+ break;
+ default:
+ ret = VM_FAULT_SIGBUS;
+ break;
+ }
+ goto out_filemap;
+ }
+
+ if (folio_test_hwpoison(folio)) {
+ ret = VM_FAULT_HWPOISON;
+ goto out_folio;
+ }
+
+ /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
+ if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_folio;
+ }
+
+ /*
+ * Only private folios are marked as "guestmem" so far, and we never
+ * expect private folios at this point.
+ */
+ if (WARN_ON_ONCE(folio_test_guestmem(folio))) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_folio;
+ }
+
+ /* No support for huge pages. */
+ if (WARN_ON_ONCE(folio_test_large(folio))) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_folio;
+ }
+
+ if (!folio_test_uptodate(folio)) {
+ clear_highpage(folio_page(folio, 0));
+ kvm_gmem_mark_prepared(folio);
+ }
+
+ vmf->page = folio_file_page(folio, vmf->pgoff);
+
+out_folio:
+ if (ret != VM_FAULT_LOCKED) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
+
+out_filemap:
+ filemap_invalidate_unlock_shared(inode->i_mapping);
+
+ return ret;
+}
+
+static const struct vm_operations_struct kvm_gmem_vm_ops = {
+ .fault = kvm_gmem_fault,
+};
+
+static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct kvm_gmem *gmem = file->private_data;
+
+ if (!kvm_arch_gmem_supports_shared_mem(gmem->kvm))
+ return -ENODEV;
+
+ if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
+ (VM_SHARED | VM_MAYSHARE)) {
+ return -EINVAL;
+ }
+
+ file_accessed(file);
+ vm_flags_set(vma, VM_DONTDUMP);
+ vma->vm_ops = &kvm_gmem_vm_ops;
+
+ return 0;
+}
+#else
+#define kvm_gmem_mmap NULL
+#endif /* CONFIG_KVM_GMEM_SHARED_MEM */
+
static struct file_operations kvm_gmem_fops = {
+ .mmap = kvm_gmem_mmap,
.open = generic_file_open,
.release = kvm_gmem_release,
.fallocate = kvm_gmem_fallocate,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ba0327e2d0d3..38f0f402ea46 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4830,6 +4830,10 @@ static int kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
#ifdef CONFIG_KVM_PRIVATE_MEM
case KVM_CAP_GUEST_MEMFD:
return !kvm || kvm_arch_has_private_mem(kvm);
+#endif
+#ifdef CONFIG_KVM_GMEM_SHARED_MEM
+ case KVM_CAP_GMEM_SHARED_MEM:
+ return !kvm || kvm_arch_gmem_supports_shared_mem(kvm);
#endif
default:
break;
--
2.48.1.711.g2feabab25a-goog
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-03 17:10 ` [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages Fuad Tabba
@ 2025-03-04 8:58 ` Kirill A. Shutemov
2025-03-04 9:27 ` Fuad Tabba
2025-03-06 0:01 ` Ackerley Tng
` (2 subsequent siblings)
3 siblings, 1 reply; 31+ messages in thread
From: Kirill A. Shutemov @ 2025-03-04 8:58 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
david, michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx
On Mon, Mar 03, 2025 at 05:10:07PM +0000, Fuad Tabba wrote:
> Add support for mmap() and fault() for guest_memfd backed memory
> in the host for VMs that support in-place conversion between
> shared and private. To that end, this patch adds the ability to
> check whether the VM type supports in-place conversion, and only
> allows mapping its memory if that's the case.
>
> Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> indicates that the VM supports shared memory in guest_memfd, or
> that the host can create VMs that support shared memory.
> Supporting shared memory implies that memory can be mapped when
> shared with the host.
>
> This is controlled by the KVM_GMEM_SHARED_MEM configuration
> option.
>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
> include/linux/kvm_host.h | 11 ++++
> include/uapi/linux/kvm.h | 1 +
> virt/kvm/guest_memfd.c | 105 +++++++++++++++++++++++++++++++++++++++
> virt/kvm/kvm_main.c | 4 ++
> 4 files changed, 121 insertions(+)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 7788e3625f6d..2d025b8ee20e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> }
> #endif
>
> +/*
> + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> + * private memory is enabled and it supports in-place shared/private conversion.
> + */
> +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
Hm. Do we expect any caller for !CONFIG_KVM_PRIVATE_MEM?
> +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> +{
> + return false;
> +}
> +#endif
> +
> #ifndef kvm_arch_has_readonly_mem
> static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
> {
...
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index b2aa6bf24d3a..4291956b51ae 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -312,7 +312,112 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> return gfn - slot->base_gfn + slot->gmem.pgoff;
> }
>
> +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
> +{
> + struct kvm_gmem *gmem = file->private_data;
> +
> + /* For now, VMs that support shared memory share all their memory. */
> + return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
> +}
> +
> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> +{
> + struct inode *inode = file_inode(vmf->vma->vm_file);
> + struct folio *folio;
> + vm_fault_t ret = VM_FAULT_LOCKED;
> +
> + filemap_invalidate_lock_shared(inode->i_mapping);
> +
> + folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> + if (IS_ERR(folio)) {
> + switch (PTR_ERR(folio)) {
> + case -EAGAIN:
> + ret = VM_FAULT_RETRY;
> + break;
> + case -ENOMEM:
> + ret = VM_FAULT_OOM;
> + break;
> + default:
> + ret = VM_FAULT_SIGBUS;
> + break;
> + }
> + goto out_filemap;
> + }
> +
> + if (folio_test_hwpoison(folio)) {
> + ret = VM_FAULT_HWPOISON;
> + goto out_folio;
> + }
> +
> + /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
If this is a requirement, it would be cleaner to rename the function and
pass down the folio and check the lock state inside.
--
Kiryl Shutsemau / Kirill A. Shutemov
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-04 8:58 ` Kirill A. Shutemov
@ 2025-03-04 9:27 ` Fuad Tabba
2025-03-04 9:45 ` Kirill A. Shutemov
0 siblings, 1 reply; 31+ messages in thread
From: Fuad Tabba @ 2025-03-04 9:27 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
david, michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx
Hi Kirill,
On Tue, 4 Mar 2025 at 08:58, Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> On Mon, Mar 03, 2025 at 05:10:07PM +0000, Fuad Tabba wrote:
> > Add support for mmap() and fault() for guest_memfd backed memory
> > in the host for VMs that support in-place conversion between
> > shared and private. To that end, this patch adds the ability to
> > check whether the VM type supports in-place conversion, and only
> > allows mapping its memory if that's the case.
> >
> > Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> > indicates that the VM supports shared memory in guest_memfd, or
> > that the host can create VMs that support shared memory.
> > Supporting shared memory implies that memory can be mapped when
> > shared with the host.
> >
> > This is controlled by the KVM_GMEM_SHARED_MEM configuration
> > option.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > include/linux/kvm_host.h | 11 ++++
> > include/uapi/linux/kvm.h | 1 +
> > virt/kvm/guest_memfd.c | 105 +++++++++++++++++++++++++++++++++++++++
> > virt/kvm/kvm_main.c | 4 ++
> > 4 files changed, 121 insertions(+)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 7788e3625f6d..2d025b8ee20e 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> > }
> > #endif
> >
> > +/*
> > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > + * private memory is enabled and it supports in-place shared/private conversion.
> > + */
> > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
>
> Hm. Do we expect any caller for !CONFIG_KVM_PRIVATE_MEM?
>
> > +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> > +{
> > + return false;
> > +}
> > +#endif
> > +
> > #ifndef kvm_arch_has_readonly_mem
> > static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
> > {
>
> ...
>
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index b2aa6bf24d3a..4291956b51ae 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -312,7 +312,112 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> > return gfn - slot->base_gfn + slot->gmem.pgoff;
> > }
> >
> > +#ifdef CONFIG_KVM_GMEM_SHARED_MEM
> > +static bool kvm_gmem_offset_is_shared(struct file *file, pgoff_t index)
> > +{
> > + struct kvm_gmem *gmem = file->private_data;
> > +
> > + /* For now, VMs that support shared memory share all their memory. */
> > + return kvm_arch_gmem_supports_shared_mem(gmem->kvm);
> > +}
> > +
> > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> > +{
> > + struct inode *inode = file_inode(vmf->vma->vm_file);
> > + struct folio *folio;
> > + vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > + filemap_invalidate_lock_shared(inode->i_mapping);
> > +
> > + folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > + if (IS_ERR(folio)) {
> > + switch (PTR_ERR(folio)) {
> > + case -EAGAIN:
> > + ret = VM_FAULT_RETRY;
> > + break;
> > + case -ENOMEM:
> > + ret = VM_FAULT_OOM;
> > + break;
> > + default:
> > + ret = VM_FAULT_SIGBUS;
> > + break;
> > + }
> > + goto out_filemap;
> > + }
> > +
> > + if (folio_test_hwpoison(folio)) {
> > + ret = VM_FAULT_HWPOISON;
> > + goto out_folio;
> > + }
> > +
> > + /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
>
> If this is a requirement, it would be cleaner to rename the function and
> pass down the folio and check the lock state inside.
Will do.
Thanks,
/fuad
> --
> Kiryl Shutsemau / Kirill A. Shutemov
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-04 9:27 ` Fuad Tabba
@ 2025-03-04 9:45 ` Kirill A. Shutemov
2025-03-04 9:52 ` Fuad Tabba
0 siblings, 1 reply; 31+ messages in thread
From: Kirill A. Shutemov @ 2025-03-04 9:45 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
david, michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx
On Tue, Mar 04, 2025 at 09:27:06AM +0000, Fuad Tabba wrote:
> Hi Kirill,
>
> On Tue, 4 Mar 2025 at 08:58, Kirill A. Shutemov
> <kirill.shutemov@linux.intel.com> wrote:
> >
> > On Mon, Mar 03, 2025 at 05:10:07PM +0000, Fuad Tabba wrote:
> > > Add support for mmap() and fault() for guest_memfd backed memory
> > > in the host for VMs that support in-place conversion between
> > > shared and private. To that end, this patch adds the ability to
> > > check whether the VM type supports in-place conversion, and only
> > > allows mapping its memory if that's the case.
> > >
> > > Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> > > indicates that the VM supports shared memory in guest_memfd, or
> > > that the host can create VMs that support shared memory.
> > > Supporting shared memory implies that memory can be mapped when
> > > shared with the host.
> > >
> > > This is controlled by the KVM_GMEM_SHARED_MEM configuration
> > > option.
> > >
> > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > ---
> > > include/linux/kvm_host.h | 11 ++++
> > > include/uapi/linux/kvm.h | 1 +
> > > virt/kvm/guest_memfd.c | 105 +++++++++++++++++++++++++++++++++++++++
> > > virt/kvm/kvm_main.c | 4 ++
> > > 4 files changed, 121 insertions(+)
> > >
> > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > index 7788e3625f6d..2d025b8ee20e 100644
> > > --- a/include/linux/kvm_host.h
> > > +++ b/include/linux/kvm_host.h
> > > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> > > }
> > > #endif
> > >
> > > +/*
> > > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > > + * private memory is enabled and it supports in-place shared/private conversion.
> > > + */
> > > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> >
> > Hm. Do we expect any caller for !CONFIG_KVM_PRIVATE_MEM?
I think you missed this.
--
Kiryl Shutsemau / Kirill A. Shutemov
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-04 9:45 ` Kirill A. Shutemov
@ 2025-03-04 9:52 ` Fuad Tabba
0 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-04 9:52 UTC (permalink / raw)
To: Kirill A. Shutemov
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
david, michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx
On Tue, 4 Mar 2025 at 09:46, Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> On Tue, Mar 04, 2025 at 09:27:06AM +0000, Fuad Tabba wrote:
> > Hi Kirill,
> >
> > On Tue, 4 Mar 2025 at 08:58, Kirill A. Shutemov
> > <kirill.shutemov@linux.intel.com> wrote:
> > >
> > > On Mon, Mar 03, 2025 at 05:10:07PM +0000, Fuad Tabba wrote:
> > > > Add support for mmap() and fault() for guest_memfd backed memory
> > > > in the host for VMs that support in-place conversion between
> > > > shared and private. To that end, this patch adds the ability to
> > > > check whether the VM type supports in-place conversion, and only
> > > > allows mapping its memory if that's the case.
> > > >
> > > > Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> > > > indicates that the VM supports shared memory in guest_memfd, or
> > > > that the host can create VMs that support shared memory.
> > > > Supporting shared memory implies that memory can be mapped when
> > > > shared with the host.
> > > >
> > > > This is controlled by the KVM_GMEM_SHARED_MEM configuration
> > > > option.
> > > >
> > > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > > ---
> > > > include/linux/kvm_host.h | 11 ++++
> > > > include/uapi/linux/kvm.h | 1 +
> > > > virt/kvm/guest_memfd.c | 105 +++++++++++++++++++++++++++++++++++++++
> > > > virt/kvm/kvm_main.c | 4 ++
> > > > 4 files changed, 121 insertions(+)
> > > >
> > > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > > > index 7788e3625f6d..2d025b8ee20e 100644
> > > > --- a/include/linux/kvm_host.h
> > > > +++ b/include/linux/kvm_host.h
> > > > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> > > > }
> > > > #endif
> > > >
> > > > +/*
> > > > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > > > + * private memory is enabled and it supports in-place shared/private conversion.
> > > > + */
> > > > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
> > >
> > > Hm. Do we expect any caller for !CONFIG_KVM_PRIVATE_MEM?
>
> I think you missed this.
Yes I did :)
You're right (reading between the lines of where you're getting at
that is). This should be
+#if !defined(kvm_arch_gmem_supports_shared_mem) &&
!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)
I'll fix (if you agree with my understanding of your comment).
Cheers,
/fuad
> --
> Kiryl Shutsemau / Kirill A. Shutemov
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-03 17:10 ` [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages Fuad Tabba
2025-03-04 8:58 ` Kirill A. Shutemov
@ 2025-03-06 0:01 ` Ackerley Tng
2025-03-06 8:48 ` Fuad Tabba
2025-03-06 22:46 ` Ackerley Tng
2025-03-07 1:53 ` James Houghton
3 siblings, 1 reply; 31+ messages in thread
From: Ackerley Tng @ 2025-03-06 0:01 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx, tabba
Fuad Tabba <tabba@google.com> writes:
> <snip>
>
> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> +{
> + struct inode *inode = file_inode(vmf->vma->vm_file);
> + struct folio *folio;
> + vm_fault_t ret = VM_FAULT_LOCKED;
> +
> + filemap_invalidate_lock_shared(inode->i_mapping);
> +
> + folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> + if (IS_ERR(folio)) {
> + switch (PTR_ERR(folio)) {
> + case -EAGAIN:
> + ret = VM_FAULT_RETRY;
> + break;
> + case -ENOMEM:
> + ret = VM_FAULT_OOM;
> + break;
> + default:
> + ret = VM_FAULT_SIGBUS;
> + break;
> + }
> + goto out_filemap;
> + }
> +
> + if (folio_test_hwpoison(folio)) {
> + ret = VM_FAULT_HWPOISON;
> + goto out_folio;
> + }
> +
> + /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> + if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> + ret = VM_FAULT_SIGBUS;
> + goto out_folio;
> + }
> +
> + /*
> + * Only private folios are marked as "guestmem" so far, and we never
> + * expect private folios at this point.
> + */
I think this is not quite accurate.
Based on my understanding and kvm_gmem_handle_folio_put() in this other
patch [1], only pages *in transition* from shared to private state are
marked "guestmem", although it is true that no private folios or folios
marked guestmem are expected here.
> + if (WARN_ON_ONCE(folio_test_guestmem(folio))) {
> + ret = VM_FAULT_SIGBUS;
> + goto out_folio;
> + }
> +
> + /* No support for huge pages. */
> + if (WARN_ON_ONCE(folio_test_large(folio))) {
> + ret = VM_FAULT_SIGBUS;
> + goto out_folio;
> + }
> +
> + if (!folio_test_uptodate(folio)) {
> + clear_highpage(folio_page(folio, 0));
> + kvm_gmem_mark_prepared(folio);
> + }
> +
> + vmf->page = folio_file_page(folio, vmf->pgoff);
> +
> +out_folio:
> + if (ret != VM_FAULT_LOCKED) {
> + folio_unlock(folio);
> + folio_put(folio);
> + }
> +
> +out_filemap:
> + filemap_invalidate_unlock_shared(inode->i_mapping);
> +
> + return ret;
> +}
>
> <snip>
[1] https://lore.kernel.org/all/20250117163001.2326672-7-tabba@google.com/
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-06 0:01 ` Ackerley Tng
@ 2025-03-06 8:48 ` Fuad Tabba
2025-03-06 17:05 ` Ackerley Tng
0 siblings, 1 reply; 31+ messages in thread
From: Fuad Tabba @ 2025-03-06 8:48 UTC (permalink / raw)
To: Ackerley Tng
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx
Hi Ackerley,
On Thu, 6 Mar 2025 at 00:02, Ackerley Tng <ackerleytng@google.com> wrote:
>
> Fuad Tabba <tabba@google.com> writes:
>
> > <snip>
> >
> > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> > +{
> > + struct inode *inode = file_inode(vmf->vma->vm_file);
> > + struct folio *folio;
> > + vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > + filemap_invalidate_lock_shared(inode->i_mapping);
> > +
> > + folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > + if (IS_ERR(folio)) {
> > + switch (PTR_ERR(folio)) {
> > + case -EAGAIN:
> > + ret = VM_FAULT_RETRY;
> > + break;
> > + case -ENOMEM:
> > + ret = VM_FAULT_OOM;
> > + break;
> > + default:
> > + ret = VM_FAULT_SIGBUS;
> > + break;
> > + }
> > + goto out_filemap;
> > + }
> > +
> > + if (folio_test_hwpoison(folio)) {
> > + ret = VM_FAULT_HWPOISON;
> > + goto out_folio;
> > + }
> > +
> > + /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> > + if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> > + ret = VM_FAULT_SIGBUS;
> > + goto out_folio;
> > + }
> > +
> > + /*
> > + * Only private folios are marked as "guestmem" so far, and we never
> > + * expect private folios at this point.
> > + */
>
> I think this is not quite accurate.
>
> Based on my understanding and kvm_gmem_handle_folio_put() in this other
> patch [1], only pages *in transition* from shared to private state are
> marked "guestmem", although it is true that no private folios or folios
> marked guestmem are expected here.
Technically, pages in transition are private as far as the host is
concerned. This doesn't say that _all_ private pages are marked as
guestmem. It says that only private pages are marked as guestmem. It
could be private and _not_ be marked as guestmem :)
I probably should rephrase something along the lines of, "no shared
folios would be marked as guestmem". How does that sound?
Thanks,
/fuad
> > + if (WARN_ON_ONCE(folio_test_guestmem(folio))) {
> > + ret = VM_FAULT_SIGBUS;
> > + goto out_folio;
> > + }
> > +
> > + /* No support for huge pages. */
> > + if (WARN_ON_ONCE(folio_test_large(folio))) {
> > + ret = VM_FAULT_SIGBUS;
> > + goto out_folio;
> > + }
> > +
> > + if (!folio_test_uptodate(folio)) {
> > + clear_highpage(folio_page(folio, 0));
> > + kvm_gmem_mark_prepared(folio);
> > + }
> > +
> > + vmf->page = folio_file_page(folio, vmf->pgoff);
> > +
> > +out_folio:
> > + if (ret != VM_FAULT_LOCKED) {
> > + folio_unlock(folio);
> > + folio_put(folio);
> > + }
> > +
> > +out_filemap:
> > + filemap_invalidate_unlock_shared(inode->i_mapping);
> > +
> > + return ret;
> > +}
> >
> > <snip>
>
> [1] https://lore.kernel.org/all/20250117163001.2326672-7-tabba@google.com/
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-06 8:48 ` Fuad Tabba
@ 2025-03-06 17:05 ` Ackerley Tng
0 siblings, 0 replies; 31+ messages in thread
From: Ackerley Tng @ 2025-03-06 17:05 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx
Fuad Tabba <tabba@google.com> writes:
> Hi Ackerley,
>
> On Thu, 6 Mar 2025 at 00:02, Ackerley Tng <ackerleytng@google.com> wrote:
>>
>> Fuad Tabba <tabba@google.com> writes:
>>
>> > <snip>
>> >
>> > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
>> > +{
>> > + struct inode *inode = file_inode(vmf->vma->vm_file);
>> > + struct folio *folio;
>> > + vm_fault_t ret = VM_FAULT_LOCKED;
>> > +
>> > + filemap_invalidate_lock_shared(inode->i_mapping);
>> > +
>> > + folio = kvm_gmem_get_folio(inode, vmf->pgoff);
>> > + if (IS_ERR(folio)) {
>> > + switch (PTR_ERR(folio)) {
>> > + case -EAGAIN:
>> > + ret = VM_FAULT_RETRY;
>> > + break;
>> > + case -ENOMEM:
>> > + ret = VM_FAULT_OOM;
>> > + break;
>> > + default:
>> > + ret = VM_FAULT_SIGBUS;
>> > + break;
>> > + }
>> > + goto out_filemap;
>> > + }
>> > +
>> > + if (folio_test_hwpoison(folio)) {
>> > + ret = VM_FAULT_HWPOISON;
>> > + goto out_folio;
>> > + }
>> > +
>> > + /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
>> > + if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
>> > + ret = VM_FAULT_SIGBUS;
>> > + goto out_folio;
>> > + }
>> > +
>> > + /*
>> > + * Only private folios are marked as "guestmem" so far, and we never
>> > + * expect private folios at this point.
>> > + */
>>
>> I think this is not quite accurate.
>>
>> Based on my understanding and kvm_gmem_handle_folio_put() in this other
>> patch [1], only pages *in transition* from shared to private state are
>> marked "guestmem", although it is true that no private folios or folios
>> marked guestmem are expected here.
>
> Technically, pages in transition are private as far as the host is
> concerned. This doesn't say that _all_ private pages are marked as
> guestmem. It says that only private pages are marked as guestmem. It
> could be private and _not_ be marked as guestmem :)
True, didn't think of it this way!
>
> I probably should rephrase something along the lines of, "no shared
> folios would be marked as guestmem". How does that sound?
>
> Thanks,
> /fuad
>
Works for me, thank you!
>> > + if (WARN_ON_ONCE(folio_test_guestmem(folio))) {
>> > + ret = VM_FAULT_SIGBUS;
>> > + goto out_folio;
>> > + }
>> > +
>> > + /* No support for huge pages. */
>> > + if (WARN_ON_ONCE(folio_test_large(folio))) {
>> > + ret = VM_FAULT_SIGBUS;
>> > + goto out_folio;
>> > + }
>> > +
>> > + if (!folio_test_uptodate(folio)) {
>> > + clear_highpage(folio_page(folio, 0));
>> > + kvm_gmem_mark_prepared(folio);
>> > + }
>> > +
>> > + vmf->page = folio_file_page(folio, vmf->pgoff);
>> > +
>> > +out_folio:
>> > + if (ret != VM_FAULT_LOCKED) {
>> > + folio_unlock(folio);
>> > + folio_put(folio);
>> > + }
>> > +
>> > +out_filemap:
>> > + filemap_invalidate_unlock_shared(inode->i_mapping);
>> > +
>> > + return ret;
>> > +}
>> >
>> > <snip>
>>
>> [1] https://lore.kernel.org/all/20250117163001.2326672-7-tabba@google.com/
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-03 17:10 ` [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages Fuad Tabba
2025-03-04 8:58 ` Kirill A. Shutemov
2025-03-06 0:01 ` Ackerley Tng
@ 2025-03-06 22:46 ` Ackerley Tng
2025-03-07 8:57 ` Fuad Tabba
2025-03-07 1:53 ` James Houghton
3 siblings, 1 reply; 31+ messages in thread
From: Ackerley Tng @ 2025-03-06 22:46 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx, tabba
Fuad Tabba <tabba@google.com> writes:
> Add support for mmap() and fault() for guest_memfd backed memory
> in the host for VMs that support in-place conversion between
> shared and private. To that end, this patch adds the ability to
> check whether the VM type supports in-place conversion, and only
> allows mapping its memory if that's the case.
>
> Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> indicates that the VM supports shared memory in guest_memfd, or
> that the host can create VMs that support shared memory.
> Supporting shared memory implies that memory can be mapped when
> shared with the host.
>
> This is controlled by the KVM_GMEM_SHARED_MEM configuration
> option.
>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
> include/linux/kvm_host.h | 11 ++++
> include/uapi/linux/kvm.h | 1 +
> virt/kvm/guest_memfd.c | 105 +++++++++++++++++++++++++++++++++++++++
> virt/kvm/kvm_main.c | 4 ++
> 4 files changed, 121 insertions(+)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 7788e3625f6d..2d025b8ee20e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> }
> #endif
>
> +/*
> + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> + * private memory is enabled and it supports in-place shared/private conversion.
> + */
> +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
Is this a copypasta error? I'm wondering if this should be
!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM).
Also, would you consider defining a __weak function to be overridden by
different architectures, or would weak symbols not be inline-able?
> +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> +{
> + return false;
> +}
> +#endif
> +
>
> <snip>
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-06 22:46 ` Ackerley Tng
@ 2025-03-07 8:57 ` Fuad Tabba
0 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-07 8:57 UTC (permalink / raw)
To: Ackerley Tng
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx
Hi Ackerley,
On Thu, 6 Mar 2025 at 22:46, Ackerley Tng <ackerleytng@google.com> wrote:
>
> Fuad Tabba <tabba@google.com> writes:
>
> > Add support for mmap() and fault() for guest_memfd backed memory
> > in the host for VMs that support in-place conversion between
> > shared and private. To that end, this patch adds the ability to
> > check whether the VM type supports in-place conversion, and only
> > allows mapping its memory if that's the case.
> >
> > Also add the KVM capability KVM_CAP_GMEM_SHARED_MEM, which
> > indicates that the VM supports shared memory in guest_memfd, or
> > that the host can create VMs that support shared memory.
> > Supporting shared memory implies that memory can be mapped when
> > shared with the host.
> >
> > This is controlled by the KVM_GMEM_SHARED_MEM configuration
> > option.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > include/linux/kvm_host.h | 11 ++++
> > include/uapi/linux/kvm.h | 1 +
> > virt/kvm/guest_memfd.c | 105 +++++++++++++++++++++++++++++++++++++++
> > virt/kvm/kvm_main.c | 4 ++
> > 4 files changed, 121 insertions(+)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 7788e3625f6d..2d025b8ee20e 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -728,6 +728,17 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
> > }
> > #endif
> >
> > +/*
> > + * Arch code must define kvm_arch_gmem_supports_shared_mem if support for
> > + * private memory is enabled and it supports in-place shared/private conversion.
> > + */
> > +#if !defined(kvm_arch_gmem_supports_shared_mem) && !IS_ENABLED(CONFIG_KVM_PRIVATE_MEM)
>
> Is this a copypasta error? I'm wondering if this should be
> !IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM).
Yes. Kirill had pointed that out as well. I will fix it.
> Also, would you consider defining a __weak function to be overridden by
> different architectures, or would weak symbols not be inline-able?
I have no strong opinion, but I think that it should follow the same
pattern as kvm_arch_has_private_mem().
Cheers,
/fuad
> > +static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
> > +{
> > + return false;
> > +}
> > +#endif
> > +
> >
> > <snip>
^ permalink raw reply [flat|nested] 31+ messages in thread
* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-03 17:10 ` [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages Fuad Tabba
` (2 preceding siblings ...)
2025-03-06 22:46 ` Ackerley Tng
@ 2025-03-07 1:53 ` James Houghton
2025-03-07 8:55 ` Fuad Tabba
3 siblings, 1 reply; 31+ messages in thread
From: James Houghton @ 2025-03-07 1:53 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
david, michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, peterx
On Mon, Mar 3, 2025 at 9:10 AM Fuad Tabba <tabba@google.com> wrote:
> +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> +{
> + struct inode *inode = file_inode(vmf->vma->vm_file);
> + struct folio *folio;
> + vm_fault_t ret = VM_FAULT_LOCKED;
> +
> + filemap_invalidate_lock_shared(inode->i_mapping);
> +
> + folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> + if (IS_ERR(folio)) {
> + switch (PTR_ERR(folio)) {
> + case -EAGAIN:
> + ret = VM_FAULT_RETRY;
> + break;
> + case -ENOMEM:
> + ret = VM_FAULT_OOM;
> + break;
> + default:
> + ret = VM_FAULT_SIGBUS;
> + break;
Tiny nit-pick: This smells almost like an open-coded vmf_error(). For
the non-EAGAIN case, can we just call vmf_error()?
> + }
> + goto out_filemap;
> + }
> +
> + if (folio_test_hwpoison(folio)) {
> + ret = VM_FAULT_HWPOISON;
> + goto out_folio;
> + }
> +
> + /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> + if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> + ret = VM_FAULT_SIGBUS;
> + goto out_folio;
> + }
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages
2025-03-07 1:53 ` James Houghton
@ 2025-03-07 8:55 ` Fuad Tabba
0 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-07 8:55 UTC (permalink / raw)
To: James Houghton
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
david, michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, peterx
Hi James,
On Fri, 7 Mar 2025 at 01:53, James Houghton <jthoughton@google.com> wrote:
>
> On Mon, Mar 3, 2025 at 9:10 AM Fuad Tabba <tabba@google.com> wrote:
> > +static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf)
> > +{
> > + struct inode *inode = file_inode(vmf->vma->vm_file);
> > + struct folio *folio;
> > + vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > + filemap_invalidate_lock_shared(inode->i_mapping);
> > +
> > + folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > + if (IS_ERR(folio)) {
> > + switch (PTR_ERR(folio)) {
> > + case -EAGAIN:
> > + ret = VM_FAULT_RETRY;
> > + break;
> > + case -ENOMEM:
> > + ret = VM_FAULT_OOM;
> > + break;
> > + default:
> > + ret = VM_FAULT_SIGBUS;
> > + break;
>
> Tiny nit-pick: This smells almost like an open-coded vmf_error(). For
> the non-EAGAIN case, can we just call vmf_error()?
I wasn't aware of that. I will fix this on the respin, thanks for
pointing it out.
That said, any idea why vmf_error() doesn't convert -EAGAIN to VM_FAULT_RETRY?
Cheers,
/fuad
> > + }
> > + goto out_filemap;
> > + }
> > +
> > + if (folio_test_hwpoison(folio)) {
> > + ret = VM_FAULT_HWPOISON;
> > + goto out_folio;
> > + }
> > +
> > + /* Must be called with folio lock held, i.e., after kvm_gmem_get_folio() */
> > + if (!kvm_gmem_offset_is_shared(vmf->vma->vm_file, vmf->pgoff)) {
> > + ret = VM_FAULT_SIGBUS;
> > + goto out_folio;
> > + }
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v5 4/9] KVM: guest_memfd: Handle in-place shared memory as guest_memfd backed memory
2025-03-03 17:10 [PATCH v5 0/9] KVM: Mapping guest_memfd backed memory at the host for software protected VMs Fuad Tabba
` (2 preceding siblings ...)
2025-03-03 17:10 ` [PATCH v5 3/9] KVM: guest_memfd: Allow host to map guest_memfd() pages Fuad Tabba
@ 2025-03-03 17:10 ` Fuad Tabba
2025-03-06 16:09 ` Ackerley Tng
2025-03-03 17:10 ` [PATCH v5 5/9] KVM: x86: Mark KVM_X86_SW_PROTECTED_VM as supporting guest_memfd shared memory Fuad Tabba
` (4 subsequent siblings)
8 siblings, 1 reply; 31+ messages in thread
From: Fuad Tabba @ 2025-03-03 17:10 UTC (permalink / raw)
To: kvm, linux-arm-msm, linux-mm
Cc: pbonzini, chenhuacai, mpe, anup, paul.walmsley, palmer, aou,
seanjc, viro, brauner, willy, akpm, xiaoyao.li, yilun.xu,
chao.p.peng, jarkko, amoorthy, dmatlack, isaku.yamahata, mic,
vbabka, vannapurve, ackerleytng, mail, david, michael.roth,
wei.w.wang, liam.merwick, isaku.yamahata, kirill.shutemov,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx, tabba
For VMs that allow sharing guest_memfd backed memory in-place,
handle that memory the same as "private" guest_memfd memory. This
means that faulting that memory in the host or in the guest will
go through the guest_memfd subsystem.
Note that the word "private" in the name of the function
kvm_mem_is_private() doesn't necessarily indicate that the memory
isn't shared, but is due to the history and evolution of
guest_memfd and the various names it has received. In effect,
this function is used to multiplex between the path of a normal
page fault and the path of a guest_memfd backed page fault.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
include/linux/kvm_host.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2d025b8ee20e..296f1d284d55 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -2521,7 +2521,8 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
#else
static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
{
- return false;
+ return kvm_arch_gmem_supports_shared_mem(kvm) &&
+ kvm_slot_can_be_private(gfn_to_memslot(kvm, gfn));
}
#endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
--
2.48.1.711.g2feabab25a-goog
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 4/9] KVM: guest_memfd: Handle in-place shared memory as guest_memfd backed memory
2025-03-03 17:10 ` [PATCH v5 4/9] KVM: guest_memfd: Handle in-place shared memory as guest_memfd backed memory Fuad Tabba
@ 2025-03-06 16:09 ` Ackerley Tng
2025-03-06 17:02 ` Fuad Tabba
0 siblings, 1 reply; 31+ messages in thread
From: Ackerley Tng @ 2025-03-06 16:09 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx, tabba
Fuad Tabba <tabba@google.com> writes:
> For VMs that allow sharing guest_memfd backed memory in-place,
> handle that memory the same as "private" guest_memfd memory. This
> means that faulting that memory in the host or in the guest will
> go through the guest_memfd subsystem.
>
> Note that the word "private" in the name of the function
> kvm_mem_is_private() doesn't necessarily indicate that the memory
> isn't shared, but is due to the history and evolution of
> guest_memfd and the various names it has received. In effect,
> this function is used to multiplex between the path of a normal
> page fault and the path of a guest_memfd backed page fault.
>
I think this explanation is a good summary, but this change seems to
make KVM take pages via guest_memfd functions for more than just
guest-private pages.
This change picks the guest_memfd fault path as long as the memslot has
an associated guest_memfd (kvm_slot_can_be_private()) and gmem was
configured to kvm_arch_gmem_supports_shared_mem().
For shared accesses, shouldn't KVM use the memslot's userspace_addr?
It's still possibly the same mmap-ed guest_memfd inode, but via
userspace_addr. And for special accesses from within KVM (e.g. clock),
maybe some other changes are required inside KVM, or those could also
use userspace_addr.
You mentioned that pKVM doesn't use
KVM_GENERIC_MEMORY_ATTRIBUTES/mem_attr_array, so perhaps the change
required here is that kvm_mem_is_private() should be updated to
kvm_slot_can_be_private() && whatever pKVM uses to determine if a gfn is
private?
So perhaps kvm_arch_gmem_supports_shared_mem() should be something like
kvm_arch_gmem_use_guest_memfd(), where x86 would override that to use
mem_attr_array if CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES is selected?
>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
> include/linux/kvm_host.h | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 2d025b8ee20e..296f1d284d55 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2521,7 +2521,8 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> #else
> static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> {
> - return false;
> + return kvm_arch_gmem_supports_shared_mem(kvm) &&
> + kvm_slot_can_be_private(gfn_to_memslot(kvm, gfn));
> }
> #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 4/9] KVM: guest_memfd: Handle in-place shared memory as guest_memfd backed memory
2025-03-06 16:09 ` Ackerley Tng
@ 2025-03-06 17:02 ` Fuad Tabba
0 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-06 17:02 UTC (permalink / raw)
To: Ackerley Tng
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, mail, david,
michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, qperret, keirf, roypat,
shuah, hch, jgg, rientjes, jhubbard, fvdl, hughd, jthoughton,
peterx
Hi Ackerley,
On Thu, 6 Mar 2025 at 16:09, Ackerley Tng <ackerleytng@google.com> wrote:
>
> Fuad Tabba <tabba@google.com> writes:
>
> > For VMs that allow sharing guest_memfd backed memory in-place,
> > handle that memory the same as "private" guest_memfd memory. This
> > means that faulting that memory in the host or in the guest will
> > go through the guest_memfd subsystem.
> >
> > Note that the word "private" in the name of the function
> > kvm_mem_is_private() doesn't necessarily indicate that the memory
> > isn't shared, but is due to the history and evolution of
> > guest_memfd and the various names it has received. In effect,
> > this function is used to multiplex between the path of a normal
> > page fault and the path of a guest_memfd backed page fault.
> >
>
> I think this explanation is a good summary, but this change seems to
> make KVM take pages via guest_memfd functions for more than just
> guest-private pages.
>
> This change picks the guest_memfd fault path as long as the memslot has
> an associated guest_memfd (kvm_slot_can_be_private()) and gmem was
> configured to kvm_arch_gmem_supports_shared_mem().
>
> For shared accesses, shouldn't KVM use the memslot's userspace_addr?
> It's still possibly the same mmap-ed guest_memfd inode, but via
> userspace_addr. And for special accesses from within KVM (e.g. clock),
> maybe some other changes are required inside KVM, or those could also
> use userspace_addr.
The reason is that, although it does have a userspace_addr, it might
not actually be mapped. We don't require that shared memory be mapped.
Cheers,
/fuad
> You mentioned that pKVM doesn't use
> KVM_GENERIC_MEMORY_ATTRIBUTES/mem_attr_array, so perhaps the change
> required here is that kvm_mem_is_private() should be updated to
> kvm_slot_can_be_private() && whatever pKVM uses to determine if a gfn is
> private?
>
> So perhaps kvm_arch_gmem_supports_shared_mem() should be something like
> kvm_arch_gmem_use_guest_memfd(), where x86 would override that to use
> mem_attr_array if CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES is selected?
>
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > include/linux/kvm_host.h | 3 ++-
> > 1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> > index 2d025b8ee20e..296f1d284d55 100644
> > --- a/include/linux/kvm_host.h
> > +++ b/include/linux/kvm_host.h
> > @@ -2521,7 +2521,8 @@ static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> > #else
> > static inline bool kvm_mem_is_private(struct kvm *kvm, gfn_t gfn)
> > {
> > - return false;
> > + return kvm_arch_gmem_supports_shared_mem(kvm) &&
> > + kvm_slot_can_be_private(gfn_to_memslot(kvm, gfn));
> > }
> > #endif /* CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES */
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v5 5/9] KVM: x86: Mark KVM_X86_SW_PROTECTED_VM as supporting guest_memfd shared memory
2025-03-03 17:10 [PATCH v5 0/9] KVM: Mapping guest_memfd backed memory at the host for software protected VMs Fuad Tabba
` (3 preceding siblings ...)
2025-03-03 17:10 ` [PATCH v5 4/9] KVM: guest_memfd: Handle in-place shared memory as guest_memfd backed memory Fuad Tabba
@ 2025-03-03 17:10 ` Fuad Tabba
2025-03-03 17:10 ` [PATCH v5 6/9] KVM: arm64: Refactor user_mem_abort() calculation of force_pte Fuad Tabba
` (3 subsequent siblings)
8 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-03 17:10 UTC (permalink / raw)
To: kvm, linux-arm-msm, linux-mm
Cc: pbonzini, chenhuacai, mpe, anup, paul.walmsley, palmer, aou,
seanjc, viro, brauner, willy, akpm, xiaoyao.li, yilun.xu,
chao.p.peng, jarkko, amoorthy, dmatlack, isaku.yamahata, mic,
vbabka, vannapurve, ackerleytng, mail, david, michael.roth,
wei.w.wang, liam.merwick, isaku.yamahata, kirill.shutemov,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx, tabba
The KVM_X86_SW_PROTECTED_VM type is meant for experimentation and
does not have any underlying support for protected guests. This
makes it a good candidate for testing mapping shared memory.
Therefore, when the kconfig option is enabled, mark
KVM_X86_SW_PROTECTED_VM as supporting shared memory.
This means that this memory is considered by guest_memfd to be
shared with the host, with the possibility of in-place conversion
between shared and private. This allows the host to map and fault
in guest_memfd memory belonging to this VM type.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/x86/include/asm/kvm_host.h | 5 +++++
arch/x86/kvm/Kconfig | 3 ++-
2 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b7af5902ff7..c6e4925bdc8a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2245,8 +2245,13 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
#ifdef CONFIG_KVM_PRIVATE_MEM
#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
+
+#define kvm_arch_gmem_supports_shared_mem(kvm) \
+ (IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) && \
+ ((kvm)->arch.vm_type == KVM_X86_SW_PROTECTED_VM))
#else
#define kvm_arch_has_private_mem(kvm) false
+#define kvm_arch_gmem_supports_shared_mem(kvm) false
#endif
#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ea2c4f21c1ca..22d1bcdaad58 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -45,7 +45,8 @@ config KVM_X86
select HAVE_KVM_PM_NOTIFIER if PM
select KVM_GENERIC_HARDWARE_ENABLING
select KVM_GENERIC_PRE_FAULT_MEMORY
- select KVM_GENERIC_PRIVATE_MEM if KVM_SW_PROTECTED_VM
+ select KVM_PRIVATE_MEM if KVM_SW_PROTECTED_VM
+ select KVM_GMEM_SHARED_MEM if KVM_SW_PROTECTED_VM
select KVM_WERROR if WERROR
config KVM
--
2.48.1.711.g2feabab25a-goog
^ permalink raw reply [flat|nested] 31+ messages in thread* [PATCH v5 6/9] KVM: arm64: Refactor user_mem_abort() calculation of force_pte
2025-03-03 17:10 [PATCH v5 0/9] KVM: Mapping guest_memfd backed memory at the host for software protected VMs Fuad Tabba
` (4 preceding siblings ...)
2025-03-03 17:10 ` [PATCH v5 5/9] KVM: x86: Mark KVM_X86_SW_PROTECTED_VM as supporting guest_memfd shared memory Fuad Tabba
@ 2025-03-03 17:10 ` Fuad Tabba
2025-03-06 10:46 ` Quentin Perret
2025-03-03 17:10 ` [PATCH v5 7/9] KVM: arm64: Handle guest_memfd()-backed guest page faults Fuad Tabba
` (2 subsequent siblings)
8 siblings, 1 reply; 31+ messages in thread
From: Fuad Tabba @ 2025-03-03 17:10 UTC (permalink / raw)
To: kvm, linux-arm-msm, linux-mm
Cc: pbonzini, chenhuacai, mpe, anup, paul.walmsley, palmer, aou,
seanjc, viro, brauner, willy, akpm, xiaoyao.li, yilun.xu,
chao.p.peng, jarkko, amoorthy, dmatlack, isaku.yamahata, mic,
vbabka, vannapurve, ackerleytng, mail, david, michael.roth,
wei.w.wang, liam.merwick, isaku.yamahata, kirill.shutemov,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx, tabba
To simplify the code and to make the assumptions clearer,
refactor user_mem_abort() by immediately setting force_pte to
true if the conditions are met. Also, remove the comment about
logging_active being guaranteed to never be true for VM_PFNMAP
memslots, since it's not technically correct right now.
No functional change intended.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/arm64/kvm/mmu.c | 13 ++++---------
1 file changed, 4 insertions(+), 9 deletions(-)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1f55b0c7b11d..887ffa1f5b14 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1460,7 +1460,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
bool fault_is_perm)
{
int ret = 0;
- bool write_fault, writable, force_pte = false;
+ bool write_fault, writable;
bool exec_fault, mte_allowed;
bool device = false, vfio_allow_any_uc = false;
unsigned long mmu_seq;
@@ -1472,6 +1472,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
+ bool force_pte = logging_active || is_protected_kvm_enabled();
long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
@@ -1521,16 +1522,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return -EFAULT;
}
- /*
- * logging_active is guaranteed to never be true for VM_PFNMAP
- * memslots.
- */
- if (logging_active || is_protected_kvm_enabled()) {
- force_pte = true;
+ if (force_pte)
vma_shift = PAGE_SHIFT;
- } else {
+ else
vma_shift = get_vma_page_shift(vma, hva);
- }
switch (vma_shift) {
#ifndef __PAGETABLE_PMD_FOLDED
--
2.48.1.711.g2feabab25a-goog
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 6/9] KVM: arm64: Refactor user_mem_abort() calculation of force_pte
2025-03-03 17:10 ` [PATCH v5 6/9] KVM: arm64: Refactor user_mem_abort() calculation of force_pte Fuad Tabba
@ 2025-03-06 10:46 ` Quentin Perret
2025-03-06 10:54 ` Fuad Tabba
0 siblings, 1 reply; 31+ messages in thread
From: Quentin Perret @ 2025-03-06 10:46 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
david, michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, keirf, roypat, shuah, hch,
jgg, rientjes, jhubbard, fvdl, hughd, jthoughton, peterx
On Monday 03 Mar 2025 at 17:10:10 (+0000), Fuad Tabba wrote:
> To simplify the code and to make the assumptions clearer,
> refactor user_mem_abort() by immediately setting force_pte to
> true if the conditions are met. Also, remove the comment about
> logging_active being guaranteed to never be true for VM_PFNMAP
> memslots, since it's not technically correct right now.
>
> No functional change intended.
>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
> arch/arm64/kvm/mmu.c | 13 ++++---------
> 1 file changed, 4 insertions(+), 9 deletions(-)
>
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 1f55b0c7b11d..887ffa1f5b14 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -1460,7 +1460,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> bool fault_is_perm)
> {
> int ret = 0;
> - bool write_fault, writable, force_pte = false;
> + bool write_fault, writable;
> bool exec_fault, mte_allowed;
> bool device = false, vfio_allow_any_uc = false;
> unsigned long mmu_seq;
> @@ -1472,6 +1472,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> gfn_t gfn;
> kvm_pfn_t pfn;
> bool logging_active = memslot_is_logging(memslot);
> + bool force_pte = logging_active || is_protected_kvm_enabled();
> long vma_pagesize, fault_granule;
> enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
> struct kvm_pgtable *pgt;
> @@ -1521,16 +1522,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> return -EFAULT;
> }
>
> - /*
> - * logging_active is guaranteed to never be true for VM_PFNMAP
> - * memslots.
> - */
Indeed, I tried to add the following snippeton top of upstream:
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 1f55b0c7b11d..b5c3a6b9957f 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1525,6 +1525,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
* logging_active is guaranteed to never be true for VM_PFNMAP
* memslots.
*/
+ WARN_ON_ONCE(logging_active && (vma->vm_flags & VM_PFNMAP));
if (logging_active || is_protected_kvm_enabled()) {
force_pte = true;
vma_shift = PAGE_SHIFT;
And I could easily get that thing to trigger -- the trick is to back a
memslot with standard anon memory, enable dirty logging, and then mmap()
with MAP_FIXED on top of that a VM_PFNMAP region, and KVM will happily
proceed. Note that this has nothing to do with your series, it's just an
existing upstream bug.
Sadly that means the vma checks we do in kvm_arch_prepare_memory_region()
are bogus. Memslots are associated with an HVA range, not the underlying
VMAs which are not guaranteed stable. This bug applies to both the
VM_PFNMAP checks and the MTE checks, I think.
I can't immediately think of a good way to make the checks more robust,
but I'll have a think. If anybody has an idea ... :-)
Thanks,
Quentin
> - if (logging_active || is_protected_kvm_enabled()) {
> - force_pte = true;
> + if (force_pte)
> vma_shift = PAGE_SHIFT;
> - } else {
> + else
> vma_shift = get_vma_page_shift(vma, hva);
> - }
>
> switch (vma_shift) {
> #ifndef __PAGETABLE_PMD_FOLDED
> --
> 2.48.1.711.g2feabab25a-goog
>
^ permalink raw reply [flat|nested] 31+ messages in thread* Re: [PATCH v5 6/9] KVM: arm64: Refactor user_mem_abort() calculation of force_pte
2025-03-06 10:46 ` Quentin Perret
@ 2025-03-06 10:54 ` Fuad Tabba
0 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-06 10:54 UTC (permalink / raw)
To: Quentin Perret
Cc: kvm, linux-arm-msm, linux-mm, pbonzini, chenhuacai, mpe, anup,
paul.walmsley, palmer, aou, seanjc, viro, brauner, willy, akpm,
xiaoyao.li, yilun.xu, chao.p.peng, jarkko, amoorthy, dmatlack,
isaku.yamahata, mic, vbabka, vannapurve, ackerleytng, mail,
david, michael.roth, wei.w.wang, liam.merwick, isaku.yamahata,
kirill.shutemov, suzuki.poulose, steven.price, quic_eberman,
quic_mnalajal, quic_tsoni, quic_svaddagi, quic_cvanscha,
quic_pderrin, quic_pheragu, catalin.marinas, james.morse,
yuzenghui, oliver.upton, maz, will, keirf, roypat, shuah, hch,
jgg, rientjes, jhubbard, fvdl, hughd, jthoughton, peterx
On Thu, 6 Mar 2025 at 10:46, Quentin Perret <qperret@google.com> wrote:
>
> On Monday 03 Mar 2025 at 17:10:10 (+0000), Fuad Tabba wrote:
> > To simplify the code and to make the assumptions clearer,
> > refactor user_mem_abort() by immediately setting force_pte to
> > true if the conditions are met. Also, remove the comment about
> > logging_active being guaranteed to never be true for VM_PFNMAP
> > memslots, since it's not technically correct right now.
> >
> > No functional change intended.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > arch/arm64/kvm/mmu.c | 13 ++++---------
> > 1 file changed, 4 insertions(+), 9 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> > index 1f55b0c7b11d..887ffa1f5b14 100644
> > --- a/arch/arm64/kvm/mmu.c
> > +++ b/arch/arm64/kvm/mmu.c
> > @@ -1460,7 +1460,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> > bool fault_is_perm)
> > {
> > int ret = 0;
> > - bool write_fault, writable, force_pte = false;
> > + bool write_fault, writable;
> > bool exec_fault, mte_allowed;
> > bool device = false, vfio_allow_any_uc = false;
> > unsigned long mmu_seq;
> > @@ -1472,6 +1472,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> > gfn_t gfn;
> > kvm_pfn_t pfn;
> > bool logging_active = memslot_is_logging(memslot);
> > + bool force_pte = logging_active || is_protected_kvm_enabled();
> > long vma_pagesize, fault_granule;
> > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
> > struct kvm_pgtable *pgt;
> > @@ -1521,16 +1522,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> > return -EFAULT;
> > }
> >
> > - /*
> > - * logging_active is guaranteed to never be true for VM_PFNMAP
> > - * memslots.
> > - */
>
> Indeed, I tried to add the following snippeton top of upstream:
>
> diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
> index 1f55b0c7b11d..b5c3a6b9957f 100644
> --- a/arch/arm64/kvm/mmu.c
> +++ b/arch/arm64/kvm/mmu.c
> @@ -1525,6 +1525,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
> * logging_active is guaranteed to never be true for VM_PFNMAP
> * memslots.
> */
> + WARN_ON_ONCE(logging_active && (vma->vm_flags & VM_PFNMAP));
> if (logging_active || is_protected_kvm_enabled()) {
> force_pte = true;
> vma_shift = PAGE_SHIFT;
>
> And I could easily get that thing to trigger -- the trick is to back a
> memslot with standard anon memory, enable dirty logging, and then mmap()
> with MAP_FIXED on top of that a VM_PFNMAP region, and KVM will happily
> proceed. Note that this has nothing to do with your series, it's just an
> existing upstream bug.
>
Thanks Quentin. Since you had told me about this offline before I
respun this series, I removed the warning I had in previous
iterations, the existing comment about logging_active, and made this
patch a "no functional change intended" one.
> Sadly that means the vma checks we do in kvm_arch_prepare_memory_region()
> are bogus. Memslots are associated with an HVA range, not the underlying
> VMAs which are not guaranteed stable. This bug applies to both the
> VM_PFNMAP checks and the MTE checks, I think.
>
> I can't immediately think of a good way to make the checks more robust,
> but I'll have a think. If anybody has an idea ... :-)
>
Cheers,
/fuad
> Thanks,
> Quentin
>
> > - if (logging_active || is_protected_kvm_enabled()) {
> > - force_pte = true;
> > + if (force_pte)
> > vma_shift = PAGE_SHIFT;
> > - } else {
> > + else
> > vma_shift = get_vma_page_shift(vma, hva);
> > - }
> >
> > switch (vma_shift) {
> > #ifndef __PAGETABLE_PMD_FOLDED
> > --
> > 2.48.1.711.g2feabab25a-goog
> >
^ permalink raw reply [flat|nested] 31+ messages in thread
* [PATCH v5 7/9] KVM: arm64: Handle guest_memfd()-backed guest page faults
2025-03-03 17:10 [PATCH v5 0/9] KVM: Mapping guest_memfd backed memory at the host for software protected VMs Fuad Tabba
` (5 preceding siblings ...)
2025-03-03 17:10 ` [PATCH v5 6/9] KVM: arm64: Refactor user_mem_abort() calculation of force_pte Fuad Tabba
@ 2025-03-03 17:10 ` Fuad Tabba
2025-03-03 17:10 ` [PATCH v5 8/9] KVM: arm64: Enable mapping guest_memfd in arm64 Fuad Tabba
2025-03-03 17:10 ` [PATCH v5 9/9] KVM: guest_memfd: selftests: guest_memfd mmap() test when mapping is allowed Fuad Tabba
8 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-03 17:10 UTC (permalink / raw)
To: kvm, linux-arm-msm, linux-mm
Cc: pbonzini, chenhuacai, mpe, anup, paul.walmsley, palmer, aou,
seanjc, viro, brauner, willy, akpm, xiaoyao.li, yilun.xu,
chao.p.peng, jarkko, amoorthy, dmatlack, isaku.yamahata, mic,
vbabka, vannapurve, ackerleytng, mail, david, michael.roth,
wei.w.wang, liam.merwick, isaku.yamahata, kirill.shutemov,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx, tabba
Add arm64 support for handling guest page faults on guest_memfd
backed memslots.
For now, the fault granule is restricted to PAGE_SIZE.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/arm64/kvm/mmu.c | 65 +++++++++++++++++++++++++++-------------
include/linux/kvm_host.h | 5 ++++
virt/kvm/kvm_main.c | 5 ----
3 files changed, 50 insertions(+), 25 deletions(-)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 887ffa1f5b14..adb0681fc1c6 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1454,6 +1454,30 @@ static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
return vma->vm_flags & VM_MTE_ALLOWED;
}
+static kvm_pfn_t faultin_pfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, bool write_fault, bool *writable,
+ struct page **page, bool is_private)
+{
+ kvm_pfn_t pfn;
+ int ret;
+
+ if (!is_private)
+ return __kvm_faultin_pfn(slot, gfn, write_fault ? FOLL_WRITE : 0, writable, page);
+
+ *writable = false;
+
+ ret = kvm_gmem_get_pfn(kvm, slot, gfn, &pfn, page, NULL);
+ if (!ret) {
+ *writable = !memslot_is_readonly(slot);
+ return pfn;
+ }
+
+ if (ret == -EHWPOISON)
+ return KVM_PFN_ERR_HWPOISON;
+
+ return KVM_PFN_ERR_NOSLOT_MASK;
+}
+
static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
struct kvm_s2_trans *nested,
struct kvm_memory_slot *memslot, unsigned long hva,
@@ -1461,19 +1485,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
{
int ret = 0;
bool write_fault, writable;
- bool exec_fault, mte_allowed;
+ bool exec_fault, mte_allowed = false;
bool device = false, vfio_allow_any_uc = false;
unsigned long mmu_seq;
phys_addr_t ipa = fault_ipa;
struct kvm *kvm = vcpu->kvm;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma = NULL;
short vma_shift;
void *memcache;
- gfn_t gfn;
+ gfn_t gfn = ipa >> PAGE_SHIFT;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
- bool force_pte = logging_active || is_protected_kvm_enabled();
- long vma_pagesize, fault_granule;
+ bool is_gmem = kvm_mem_is_private(kvm, gfn);
+ bool force_pte = logging_active || is_gmem || is_protected_kvm_enabled();
+ long vma_pagesize, fault_granule = PAGE_SIZE;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
struct page *page;
@@ -1510,16 +1535,22 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return ret;
}
+ mmap_read_lock(current->mm);
+
/*
* Let's check if we will get back a huge page backed by hugetlbfs, or
* get block mapping for device MMIO region.
*/
- mmap_read_lock(current->mm);
- vma = vma_lookup(current->mm, hva);
- if (unlikely(!vma)) {
- kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
- mmap_read_unlock(current->mm);
- return -EFAULT;
+ if (!is_gmem) {
+ vma = vma_lookup(current->mm, hva);
+ if (unlikely(!vma)) {
+ kvm_err("Failed to find VMA for hva 0x%lx\n", hva);
+ mmap_read_unlock(current->mm);
+ return -EFAULT;
+ }
+
+ vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
+ mte_allowed = kvm_vma_mte_allowed(vma);
}
if (force_pte)
@@ -1590,18 +1621,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
ipa &= ~(vma_pagesize - 1);
}
- gfn = ipa >> PAGE_SHIFT;
- mte_allowed = kvm_vma_mte_allowed(vma);
-
- vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
-
/* Don't use the VMA after the unlock -- it may have vanished */
vma = NULL;
/*
* Read mmu_invalidate_seq so that KVM can detect if the results of
- * vma_lookup() or __kvm_faultin_pfn() become stale prior to
- * acquiring kvm->mmu_lock.
+ * vma_lookup() or faultin_pfn() become stale prior to acquiring
+ * kvm->mmu_lock.
*
* Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
* with the smp_wmb() in kvm_mmu_invalidate_end().
@@ -1609,8 +1635,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
mmap_read_unlock(current->mm);
- pfn = __kvm_faultin_pfn(memslot, gfn, write_fault ? FOLL_WRITE : 0,
- &writable, &page);
+ pfn = faultin_pfn(kvm, memslot, gfn, write_fault, &writable, &page, is_gmem);
if (pfn == KVM_PFN_ERR_HWPOISON) {
kvm_send_hwpoison_signal(hva, vma_shift);
return 0;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 296f1d284d55..88efbbf04db1 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1882,6 +1882,11 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
return gfn_to_memslot(kvm, gfn)->id;
}
+static inline bool memslot_is_readonly(const struct kvm_memory_slot *slot)
+{
+ return slot->flags & KVM_MEM_READONLY;
+}
+
static inline gfn_t
hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
{
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 38f0f402ea46..3e40acb9f5c0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2624,11 +2624,6 @@ unsigned long kvm_host_page_size(struct kvm_vcpu *vcpu, gfn_t gfn)
return size;
}
-static bool memslot_is_readonly(const struct kvm_memory_slot *slot)
-{
- return slot->flags & KVM_MEM_READONLY;
-}
-
static unsigned long __gfn_to_hva_many(const struct kvm_memory_slot *slot, gfn_t gfn,
gfn_t *nr_pages, bool write)
{
--
2.48.1.711.g2feabab25a-goog
^ permalink raw reply [flat|nested] 31+ messages in thread* [PATCH v5 8/9] KVM: arm64: Enable mapping guest_memfd in arm64
2025-03-03 17:10 [PATCH v5 0/9] KVM: Mapping guest_memfd backed memory at the host for software protected VMs Fuad Tabba
` (6 preceding siblings ...)
2025-03-03 17:10 ` [PATCH v5 7/9] KVM: arm64: Handle guest_memfd()-backed guest page faults Fuad Tabba
@ 2025-03-03 17:10 ` Fuad Tabba
2025-03-03 17:10 ` [PATCH v5 9/9] KVM: guest_memfd: selftests: guest_memfd mmap() test when mapping is allowed Fuad Tabba
8 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-03 17:10 UTC (permalink / raw)
To: kvm, linux-arm-msm, linux-mm
Cc: pbonzini, chenhuacai, mpe, anup, paul.walmsley, palmer, aou,
seanjc, viro, brauner, willy, akpm, xiaoyao.li, yilun.xu,
chao.p.peng, jarkko, amoorthy, dmatlack, isaku.yamahata, mic,
vbabka, vannapurve, ackerleytng, mail, david, michael.roth,
wei.w.wang, liam.merwick, isaku.yamahata, kirill.shutemov,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx, tabba
Enable mapping guest_memfd in arm64. For now, it applies to all
VMs in arm64 that use guest_memfd. In the future, new VM types
can restrict this via kvm_arch_gmem_supports_shared_mem().
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/arm64/include/asm/kvm_host.h | 10 ++++++++++
arch/arm64/kvm/Kconfig | 1 +
2 files changed, 11 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d919557af5e5..b3b154b81d97 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1543,4 +1543,14 @@ void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
#define kvm_has_s1poe(k) \
(kvm_has_feat((k), ID_AA64MMFR3_EL1, S1POE, IMP))
+static inline bool kvm_arch_has_private_mem(struct kvm *kvm)
+{
+ return IS_ENABLED(CONFIG_KVM_PRIVATE_MEM);
+}
+
+static inline bool kvm_arch_gmem_supports_shared_mem(struct kvm *kvm)
+{
+ return IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM);
+}
+
#endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index ead632ad01b4..4830d8805bed 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -38,6 +38,7 @@ menuconfig KVM
select HAVE_KVM_VCPU_RUN_PID_CHANGE
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
+ select KVM_GMEM_SHARED_MEM
help
Support hosting virtualized guest machines.
--
2.48.1.711.g2feabab25a-goog
^ permalink raw reply [flat|nested] 31+ messages in thread* [PATCH v5 9/9] KVM: guest_memfd: selftests: guest_memfd mmap() test when mapping is allowed
2025-03-03 17:10 [PATCH v5 0/9] KVM: Mapping guest_memfd backed memory at the host for software protected VMs Fuad Tabba
` (7 preceding siblings ...)
2025-03-03 17:10 ` [PATCH v5 8/9] KVM: arm64: Enable mapping guest_memfd in arm64 Fuad Tabba
@ 2025-03-03 17:10 ` Fuad Tabba
8 siblings, 0 replies; 31+ messages in thread
From: Fuad Tabba @ 2025-03-03 17:10 UTC (permalink / raw)
To: kvm, linux-arm-msm, linux-mm
Cc: pbonzini, chenhuacai, mpe, anup, paul.walmsley, palmer, aou,
seanjc, viro, brauner, willy, akpm, xiaoyao.li, yilun.xu,
chao.p.peng, jarkko, amoorthy, dmatlack, isaku.yamahata, mic,
vbabka, vannapurve, ackerleytng, mail, david, michael.roth,
wei.w.wang, liam.merwick, isaku.yamahata, kirill.shutemov,
suzuki.poulose, steven.price, quic_eberman, quic_mnalajal,
quic_tsoni, quic_svaddagi, quic_cvanscha, quic_pderrin,
quic_pheragu, catalin.marinas, james.morse, yuzenghui,
oliver.upton, maz, will, qperret, keirf, roypat, shuah, hch, jgg,
rientjes, jhubbard, fvdl, hughd, jthoughton, peterx, tabba
Expand the guest_memfd selftests to include testing mapping guest
memory for VM types that support it.
Also, build the guest_memfd selftest for aarch64.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
tools/testing/selftests/kvm/Makefile.kvm | 1 +
.../testing/selftests/kvm/guest_memfd_test.c | 75 +++++++++++++++++--
2 files changed, 70 insertions(+), 6 deletions(-)
diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm
index 4277b983cace..c9a3f30e28dd 100644
--- a/tools/testing/selftests/kvm/Makefile.kvm
+++ b/tools/testing/selftests/kvm/Makefile.kvm
@@ -160,6 +160,7 @@ TEST_GEN_PROGS_arm64 += coalesced_io_test
TEST_GEN_PROGS_arm64 += demand_paging_test
TEST_GEN_PROGS_arm64 += dirty_log_test
TEST_GEN_PROGS_arm64 += dirty_log_perf_test
+TEST_GEN_PROGS_arm64 += guest_memfd_test
TEST_GEN_PROGS_arm64 += guest_print_test
TEST_GEN_PROGS_arm64 += get-reg-list
TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index ce687f8d248f..38c501e49e0e 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -34,12 +34,48 @@ static void test_file_read_write(int fd)
"pwrite on a guest_mem fd should fail");
}
-static void test_mmap(int fd, size_t page_size)
+static void test_mmap_allowed(int fd, size_t total_size)
{
+ size_t page_size = getpagesize();
+ const char val = 0xaa;
+ char *mem;
+ int ret;
+ int i;
+
+ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ TEST_ASSERT(mem != MAP_FAILED, "mmaping() guest memory should pass.");
+
+ memset(mem, val, total_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(mem[i], val);
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0,
+ page_size);
+ TEST_ASSERT(!ret, "fallocate the first page should succeed");
+
+ for (i = 0; i < page_size; i++)
+ TEST_ASSERT_EQ(mem[i], 0x00);
+ for (; i < total_size; i++)
+ TEST_ASSERT_EQ(mem[i], val);
+
+ memset(mem, val, total_size);
+ for (i = 0; i < total_size; i++)
+ TEST_ASSERT_EQ(mem[i], val);
+
+ ret = munmap(mem, total_size);
+ TEST_ASSERT(!ret, "munmap should succeed");
+}
+
+static void test_mmap_denied(int fd, size_t total_size)
+{
+ size_t page_size = getpagesize();
char *mem;
mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
TEST_ASSERT_EQ(mem, MAP_FAILED);
+
+ mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ TEST_ASSERT_EQ(mem, MAP_FAILED);
}
static void test_file_size(int fd, size_t page_size, size_t total_size)
@@ -170,19 +206,27 @@ static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
close(fd1);
}
-int main(int argc, char *argv[])
+unsigned long get_shared_type(void)
{
- size_t page_size;
+#ifdef __x86_64__
+ return KVM_X86_SW_PROTECTED_VM;
+#endif
+ return 0;
+}
+
+void test_vm_type(unsigned long type, bool is_shared)
+{
+ struct kvm_vm *vm;
size_t total_size;
+ size_t page_size;
int fd;
- struct kvm_vm *vm;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
page_size = getpagesize();
total_size = page_size * 4;
- vm = vm_create_barebones();
+ vm = vm_create_barebones_type(type);
test_create_guest_memfd_invalid(vm);
test_create_guest_memfd_multiple(vm);
@@ -190,10 +234,29 @@ int main(int argc, char *argv[])
fd = vm_create_guest_memfd(vm, total_size, 0);
test_file_read_write(fd);
- test_mmap(fd, page_size);
+
+ if (is_shared)
+ test_mmap_allowed(fd, total_size);
+ else
+ test_mmap_denied(fd, total_size);
+
test_file_size(fd, page_size, total_size);
test_fallocate(fd, page_size, total_size);
test_invalid_punch_hole(fd, page_size, total_size);
close(fd);
+ kvm_vm_release(vm);
+}
+
+int main(int argc, char *argv[])
+{
+#ifndef __aarch64__
+ /* For now, arm64 only supports shared guest memory. */
+ test_vm_type(VM_TYPE_DEFAULT, false);
+#endif
+
+ if (kvm_has_cap(KVM_CAP_GMEM_SHARED_MEM))
+ test_vm_type(get_shared_type(), true);
+
+ return 0;
}
--
2.48.1.711.g2feabab25a-goog
^ permalink raw reply [flat|nested] 31+ messages in thread