* [RFC PATCH v1 01/10] KVM: guest_memfd: Don't set FGP_ACCESSED when getting folios
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 02/10] KVM: guest_memfd: Directly allocate folios with filemap_alloc_folio() Ackerley Tng
` (8 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
guest_memfd folios don't care about accessed flags since the memory is
unevictable and there is no storage to write back to, hence, cleanup the
allocation path by not setting FGP_ACCESSED.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Hildenbrand (arm) <david@kernel.org>
---
virt/kvm/guest_memfd.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 923c51a3a5256..2df27b6443115 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -126,14 +126,13 @@ static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
* Fast-path: See if folio is already present in mapping to avoid
* policy_lookup.
*/
- folio = __filemap_get_folio(inode->i_mapping, index,
- FGP_LOCK | FGP_ACCESSED, 0);
+ folio = filemap_lock_folio(inode->i_mapping, index);
if (!IS_ERR(folio))
return folio;
policy = mpol_shared_policy_lookup(&GMEM_I(inode)->policy, index);
folio = __filemap_get_folio_mpol(inode->i_mapping, index,
- FGP_LOCK | FGP_ACCESSED | FGP_CREAT,
+ FGP_LOCK | FGP_CREAT,
mapping_gfp_mask(inode->i_mapping), policy);
mpol_cond_put(policy);
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread* [RFC PATCH v1 02/10] KVM: guest_memfd: Directly allocate folios with filemap_alloc_folio()
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 01/10] KVM: guest_memfd: Don't set FGP_ACCESSED when getting folios Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 03/10] mm: truncate: Expose preparation steps for truncate_inode_pages_final() Ackerley Tng
` (7 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
__filemap_get_folio_mpol() is parametrized by a bunch of GFP flags, which
adds complexity for the reader. Since guest_memfd doesn't meaningfully use
any of the other FGP flags, undo that complexity by directly calling
filemap_alloc_folio().
Directly calling filemap_alloc_folio() also allows the order of 0 to be
explicitly specified, which is the only order guest_memfd supports. This is
easier to understand, and removes the chance of anything else being able to
unintentionally influence allocated folio size.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 51 +++++++++++++++++++++++++++++-------------
1 file changed, 36 insertions(+), 15 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 2df27b6443115..2488d7b8f2b0d 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -107,6 +107,39 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
return __kvm_gmem_prepare_folio(kvm, slot, index, folio);
}
+static struct folio *__kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
+{
+ /* TODO: Support huge pages. */
+ struct mempolicy *policy;
+ struct folio *folio;
+ gfp_t gfp;
+ int ret;
+
+ /*
+ * Fast-path: See if folio is already present in mapping to avoid
+ * policy_lookup.
+ */
+ folio = filemap_lock_folio(inode->i_mapping, index);
+ if (!IS_ERR(folio))
+ return folio;
+
+ gfp = mapping_gfp_mask(inode->i_mapping);
+
+ policy = mpol_shared_policy_lookup(&GMEM_I(inode)->policy, index);
+ folio = filemap_alloc_folio(gfp, 0, policy);
+ mpol_cond_put(policy);
+ if (!folio)
+ return ERR_PTR(-ENOMEM);
+
+ ret = filemap_add_folio(inode->i_mapping, folio, index, gfp);
+ if (ret) {
+ folio_put(folio);
+ return ERR_PTR(ret);
+ }
+
+ return folio;
+}
+
/*
* Returns a locked folio on success. The caller is responsible for
* setting the up-to-date flag before the memory is mapped into the guest.
@@ -118,23 +151,11 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot,
*/
static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
{
- /* TODO: Support huge pages. */
- struct mempolicy *policy;
struct folio *folio;
- /*
- * Fast-path: See if folio is already present in mapping to avoid
- * policy_lookup.
- */
- folio = filemap_lock_folio(inode->i_mapping, index);
- if (!IS_ERR(folio))
- return folio;
-
- policy = mpol_shared_policy_lookup(&GMEM_I(inode)->policy, index);
- folio = __filemap_get_folio_mpol(inode->i_mapping, index,
- FGP_LOCK | FGP_CREAT,
- mapping_gfp_mask(inode->i_mapping), policy);
- mpol_cond_put(policy);
+ do {
+ folio = __kvm_gmem_get_folio(inode, index);
+ } while (PTR_ERR(folio) == -EEXIST);
/*
* External interfaces like kvm_gmem_get_pfn() support dealing
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread* [RFC PATCH v1 03/10] mm: truncate: Expose preparation steps for truncate_inode_pages_final()
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 01/10] KVM: guest_memfd: Don't set FGP_ACCESSED when getting folios Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 02/10] KVM: guest_memfd: Directly allocate folios with filemap_alloc_folio() Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 04/10] KVM: guest_memfd: Implement evict_inode for guest_memfd Ackerley Tng
` (6 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
Expose preparation steps for truncate_inode_pages_final() to allow
preparation steps to be shared by filesystems that want to implement
truncation differently.
This preparation function will be used by guest_memfd in a later patch.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
include/linux/mm.h | 1 +
mm/truncate.c | 21 +++++++++++++++++++--
2 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f0d5be9dc7368..7f04f1eaab15a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3732,6 +3732,7 @@ extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info);
void truncate_inode_pages(struct address_space *mapping, loff_t lstart);
void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart,
uoff_t lend);
+void truncate_inode_pages_final_prepare(struct address_space *mapping);
void truncate_inode_pages_final(struct address_space *mapping);
/* generic vm_area_ops exported for stackable file systems */
diff --git a/mm/truncate.c b/mm/truncate.c
index 12467c1bd711e..0e85d5451adbe 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -487,7 +487,9 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
EXPORT_SYMBOL(truncate_inode_pages);
/**
- * truncate_inode_pages_final - truncate *all* pages before inode dies
+ * truncate_inode_pages_final_prepare - Prepare the mapping for final
+ * truncation but not actually truncate the inode pages. This could be
+ * used by filesystems which want to add custom truncation of folios.
* @mapping: mapping to truncate
*
* Called under (and serialized by) inode->i_rwsem.
@@ -495,7 +497,7 @@ EXPORT_SYMBOL(truncate_inode_pages);
* Filesystems have to use this in the .evict_inode path to inform the
* VM that this is the final truncate and the inode is going away.
*/
-void truncate_inode_pages_final(struct address_space *mapping)
+void truncate_inode_pages_final_prepare(struct address_space *mapping)
{
/*
* Page reclaim can not participate in regular inode lifetime
@@ -516,6 +518,21 @@ void truncate_inode_pages_final(struct address_space *mapping)
xa_lock_irq(&mapping->i_pages);
xa_unlock_irq(&mapping->i_pages);
}
+}
+EXPORT_SYMBOL(truncate_inode_pages_final_prepare);
+
+/**
+ * truncate_inode_pages_final - truncate *all* pages before inode dies
+ * @mapping: mapping to truncate
+ *
+ * Called under (and serialized by) inode->i_rwsem.
+ *
+ * Filesystems have to use this in the .evict_inode path to inform the
+ * VM that this is the final truncate and the inode is going away.
+ */
+void truncate_inode_pages_final(struct address_space *mapping)
+{
+ truncate_inode_pages_final_prepare(mapping);
truncate_inode_pages(mapping, 0);
}
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread* [RFC PATCH v1 04/10] KVM: guest_memfd: Implement evict_inode for guest_memfd
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
` (2 preceding siblings ...)
2026-02-23 7:04 ` [RFC PATCH v1 03/10] mm: truncate: Expose preparation steps for truncate_inode_pages_final() Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 05/10] mm: Export unmap_mapping_folio() for KVM Ackerley Tng
` (5 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
In a later patch, guest_memfd will be using a custom truncation
routine. This is a preparatory patch, which implements .evict_inode for
guest_memfd, but just performs exactly what would have been done if
.evict_inode were not implemented.
No functional change intended.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 2488d7b8f2b0d..57dec458bfa77 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -978,11 +978,23 @@ static void kvm_gmem_free_inode(struct inode *inode)
kmem_cache_free(kvm_gmem_inode_cachep, GMEM_I(inode));
}
+static void kvm_gmem_evict_inode(struct inode *inode)
+{
+ struct address_space *mapping = inode->i_mapping;
+
+ truncate_inode_pages_final_prepare(mapping);
+
+ truncate_inode_pages_range(mapping, 0, inode->i_size);
+
+ clear_inode(inode);
+}
+
static const struct super_operations kvm_gmem_super_operations = {
.statfs = simple_statfs,
.alloc_inode = kvm_gmem_alloc_inode,
.destroy_inode = kvm_gmem_destroy_inode,
.free_inode = kvm_gmem_free_inode,
+ .evict_inode = kvm_gmem_evict_inode,
};
static int kvm_gmem_init_fs_context(struct fs_context *fc)
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread* [RFC PATCH v1 05/10] mm: Export unmap_mapping_folio() for KVM
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
` (3 preceding siblings ...)
2026-02-23 7:04 ` [RFC PATCH v1 04/10] KVM: guest_memfd: Implement evict_inode for guest_memfd Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 06/10] mm: filemap: Export filemap_remove_folio() Ackerley Tng
` (4 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
guest_memfd needs a way to unmap a folio from all userspace processes. This
is required as part of a folio's truncation process. The function
unmap_mapping_folio() provides exactly this functionality.
Move its declaration from the internal mm/internal.h to the public
include/linux/mm.h and export the symbol.
unmap_mapping_folio() will be used by guest_memfd in a later patch to
implement a custom truncation function.
No functional change intended.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
include/linux/mm.h | 2 ++
mm/internal.h | 2 --
mm/memory.c | 2 ++
3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7f04f1eaab15a..97fa861364590 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2690,6 +2690,7 @@ extern vm_fault_t handle_mm_fault(struct vm_area_struct *vma,
extern int fixup_user_fault(struct mm_struct *mm,
unsigned long address, unsigned int fault_flags,
bool *unlocked);
+void unmap_mapping_folio(struct folio *folio);
void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows);
void unmap_mapping_range(struct address_space *mapping,
@@ -2710,6 +2711,7 @@ static inline int fixup_user_fault(struct mm_struct *mm, unsigned long address,
BUG();
return -EFAULT;
}
+static inline void unmap_mapping_folio(struct folio *folio) { }
static inline void unmap_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t nr, bool even_cows) { }
static inline void unmap_mapping_range(struct address_space *mapping,
diff --git a/mm/internal.h b/mm/internal.h
index f35dbcf99a86b..98351be76238b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -953,7 +953,6 @@ static inline bool free_area_empty(struct free_area *area, int migratetype)
struct anon_vma *folio_anon_vma(const struct folio *folio);
#ifdef CONFIG_MMU
-void unmap_mapping_folio(struct folio *folio);
extern long populate_vma_page_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end, int *locked);
extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
@@ -1131,7 +1130,6 @@ static inline struct file *maybe_unlock_mmap_for_io(struct vm_fault *vmf,
return fpin;
}
#else /* !CONFIG_MMU */
-static inline void unmap_mapping_folio(struct folio *folio) { }
static inline void mlock_new_folio(struct folio *folio) { }
static inline bool need_mlock_drain(int cpu) { return false; }
static inline void mlock_drain_local(void) { }
diff --git a/mm/memory.c b/mm/memory.c
index da360a6eb8a48..983bb25517cb7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -78,6 +78,7 @@
#include <linux/sched/sysctl.h>
#include <linux/pgalloc.h>
#include <linux/uaccess.h>
+#include <linux/kvm_types.h>
#include <trace/events/kmem.h>
@@ -4244,6 +4245,7 @@ void unmap_mapping_folio(struct folio *folio)
last_index, &details);
i_mmap_unlock_read(mapping);
}
+EXPORT_SYMBOL_FOR_KVM(unmap_mapping_folio);
/**
* unmap_mapping_pages() - Unmap pages from processes.
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread* [RFC PATCH v1 06/10] mm: filemap: Export filemap_remove_folio()
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
` (4 preceding siblings ...)
2026-02-23 7:04 ` [RFC PATCH v1 05/10] mm: Export unmap_mapping_folio() for KVM Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 07/10] KVM: guest_memfd: Implement custom truncation function Ackerley Tng
` (3 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
Export filemap_remove_folio() for use by KVM.
KVM requires this function to remove folios from guest_memfd's filemap.
guest_memfd used to rely on higher-level, exported truncation functions. To
track memory that is actually allocated (i.e. i_blocks, st_blocks),
guest_memfd will need a custom truncation function to do accounting cleanup
and will be using filemap_remove_folio() for truncation.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
mm/filemap.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/mm/filemap.c b/mm/filemap.c
index ebd75684cb0a7..379d62239fc5f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -49,6 +49,7 @@
#include <linux/sched/mm.h>
#include <linux/sysctl.h>
#include <linux/pgalloc.h>
+#include <linux/kvm_types.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -262,6 +263,7 @@ void filemap_remove_folio(struct folio *folio)
filemap_free_folio(mapping, folio);
}
+EXPORT_SYMBOL_FOR_KVM(filemap_remove_folio);
/*
* page_cache_delete_batch - delete several folios from page cache
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread* [RFC PATCH v1 07/10] KVM: guest_memfd: Implement custom truncation function
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
` (5 preceding siblings ...)
2026-02-23 7:04 ` [RFC PATCH v1 06/10] mm: filemap: Export filemap_remove_folio() Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 08/10] KVM: guest_memfd: Track amount of memory allocated on inode Ackerley Tng
` (2 subsequent siblings)
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
Implement custom truncation function for guest_memfd, and replace calls to
truncate_inode_pages_range() with calls to this custom truncation function.
The custom truncation function removes a lot of the generality supported by
truncate_inode_pages_range() not required by guest_memfd, such as
+ sub-PAGE_SIZE truncations
+ Support for writeback
In a later patch, guest_memfd use this custom truncation function to handle
updating of i_blocks and i_bytes in the inode during truncation.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 43 ++++++++++++++++++++++++++++++++++++++++--
1 file changed, 41 insertions(+), 2 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 57dec458bfa77..e6c66ab7062b3 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -247,6 +247,45 @@ static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start,
__kvm_gmem_invalidate_end(f, start, end);
}
+static void kvm_gmem_truncate_folio(struct folio *folio)
+{
+ folio_lock(folio);
+
+ if (folio_mapped(folio))
+ unmap_mapping_folio(folio);
+
+ /*
+ * guest_memfd doesn't need writeback, skip anything to do with
+ * writeback and just clear the dirty flag.
+ */
+ folio_clear_dirty(folio);
+ filemap_remove_folio(folio);
+
+ folio_unlock(folio);
+}
+
+static void kvm_gmem_truncate_range(struct inode *inode, pgoff_t start,
+ size_t nr_pages)
+
+{
+ struct folio_batch fbatch;
+ pgoff_t next;
+ pgoff_t last;
+ int i;
+
+ last = start + nr_pages - 1;
+
+ folio_batch_init(&fbatch);
+ next = start;
+ while (filemap_get_folios(inode->i_mapping, &next, last, &fbatch)) {
+ for (i = 0; i < folio_batch_count(&fbatch); ++i)
+ kvm_gmem_truncate_folio(fbatch.folios[i]);
+
+ folio_batch_release(&fbatch);
+ cond_resched();
+ }
+}
+
static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
pgoff_t start = offset >> PAGE_SHIFT;
@@ -260,7 +299,7 @@ static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
kvm_gmem_invalidate_begin(inode, start, end);
- truncate_inode_pages_range(inode->i_mapping, offset, offset + len - 1);
+ kvm_gmem_truncate_range(inode, offset, len >> PAGE_SHIFT);
kvm_gmem_invalidate_end(inode, start, end);
@@ -984,7 +1023,7 @@ static void kvm_gmem_evict_inode(struct inode *inode)
truncate_inode_pages_final_prepare(mapping);
- truncate_inode_pages_range(mapping, 0, inode->i_size);
+ kvm_gmem_truncate_range(inode, 0, inode->i_size >> PAGE_SHIFT);
clear_inode(inode);
}
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread* [RFC PATCH v1 08/10] KVM: guest_memfd: Track amount of memory allocated on inode
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
` (6 preceding siblings ...)
2026-02-23 7:04 ` [RFC PATCH v1 07/10] KVM: guest_memfd: Implement custom truncation function Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 09/10] KVM: selftests: Wrap fstat() to assert success Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 10/10] KVM: selftests: Test that st_blocks is updated on allocation Ackerley Tng
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
The guest memfd currently does not update the inode's i_blocks and i_bytes
count when memory is allocated or freed. Hence, st_blocks returned from
fstat() is always 0.
Introduce byte accounting for guest memfd inodes. When a new folio is
added to the filemap, add the folio's size using inode_add_bytes().
Conversely, when folios are truncated and removed from the mapping, sum
their sizes and subtract the total from the inode's byte count via
inode_sub_bytes().
With this change, stat.st_blocks for a guest_memfd will correctly report
the number of 512-byte blocks allocated to the file, consistent with other
memory-based filesystems like tmpfs.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
virt/kvm/guest_memfd.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index e6c66ab7062b3..ef7f049dadace 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -137,6 +137,8 @@ static struct folio *__kvm_gmem_get_folio(struct inode *inode, pgoff_t index)
return ERR_PTR(ret);
}
+ inode_add_bytes(inode, folio_size(folio));
+
return folio;
}
@@ -247,10 +249,14 @@ static void kvm_gmem_invalidate_end(struct inode *inode, pgoff_t start,
__kvm_gmem_invalidate_end(f, start, end);
}
-static void kvm_gmem_truncate_folio(struct folio *folio)
+static size_t kvm_gmem_truncate_folio(struct folio *folio)
{
+ size_t nr_bytes;
+
folio_lock(folio);
+ nr_bytes = folio_size(folio);
+
if (folio_mapped(folio))
unmap_mapping_folio(folio);
@@ -262,6 +268,8 @@ static void kvm_gmem_truncate_folio(struct folio *folio)
filemap_remove_folio(folio);
folio_unlock(folio);
+
+ return nr_bytes;
}
static void kvm_gmem_truncate_range(struct inode *inode, pgoff_t start,
@@ -269,6 +277,7 @@ static void kvm_gmem_truncate_range(struct inode *inode, pgoff_t start,
{
struct folio_batch fbatch;
+ size_t nr_bytes = 0;
pgoff_t next;
pgoff_t last;
int i;
@@ -279,11 +288,13 @@ static void kvm_gmem_truncate_range(struct inode *inode, pgoff_t start,
next = start;
while (filemap_get_folios(inode->i_mapping, &next, last, &fbatch)) {
for (i = 0; i < folio_batch_count(&fbatch); ++i)
- kvm_gmem_truncate_folio(fbatch.folios[i]);
+ nr_bytes += kvm_gmem_truncate_folio(fbatch.folios[i]);
folio_batch_release(&fbatch);
cond_resched();
}
+
+ inode_sub_bytes(inode, nr_bytes);
}
static long kvm_gmem_punch_hole(struct inode *inode, loff_t offset, loff_t len)
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread* [RFC PATCH v1 09/10] KVM: selftests: Wrap fstat() to assert success
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
` (7 preceding siblings ...)
2026-02-23 7:04 ` [RFC PATCH v1 08/10] KVM: guest_memfd: Track amount of memory allocated on inode Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
2026-02-23 7:04 ` [RFC PATCH v1 10/10] KVM: selftests: Test that st_blocks is updated on allocation Ackerley Tng
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
Extend kvm_syscalls.h to wrap fstat() to assert success. This will be used
in the next patch.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
tools/testing/selftests/kvm/guest_memfd_test.c | 15 +++++----------
.../testing/selftests/kvm/include/kvm_syscalls.h | 2 ++
2 files changed, 7 insertions(+), 10 deletions(-)
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index 618c937f3c90f..81387f06e770a 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -212,10 +212,8 @@ static void test_mmap_not_supported(int fd, size_t total_size)
static void test_file_size(int fd, size_t total_size)
{
struct stat sb;
- int ret;
- ret = fstat(fd, &sb);
- TEST_ASSERT(!ret, "fstat should succeed");
+ kvm_fstat(fd, &sb);
TEST_ASSERT_EQ(sb.st_size, total_size);
TEST_ASSERT_EQ(sb.st_blksize, page_size);
}
@@ -303,25 +301,22 @@ static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
{
- int fd1, fd2, ret;
+ int fd1, fd2;
struct stat st1, st2;
fd1 = __vm_create_guest_memfd(vm, page_size, 0);
TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
- ret = fstat(fd1, &st1);
- TEST_ASSERT(ret != -1, "memfd fstat should succeed");
+ kvm_fstat(fd1, &st1);
TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size");
fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0);
TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
- ret = fstat(fd2, &st2);
- TEST_ASSERT(ret != -1, "memfd fstat should succeed");
+ kvm_fstat(fd2, &st2);
TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size");
- ret = fstat(fd1, &st1);
- TEST_ASSERT(ret != -1, "memfd fstat should succeed");
+ kvm_fstat(fd1, &st1);
TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size");
TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
diff --git a/tools/testing/selftests/kvm/include/kvm_syscalls.h b/tools/testing/selftests/kvm/include/kvm_syscalls.h
index d4e613162bba9..3f039c34e12e0 100644
--- a/tools/testing/selftests/kvm/include/kvm_syscalls.h
+++ b/tools/testing/selftests/kvm/include/kvm_syscalls.h
@@ -2,6 +2,7 @@
#ifndef SELFTEST_KVM_SYSCALLS_H
#define SELFTEST_KVM_SYSCALLS_H
+#include <sys/stat.h>
#include <sys/syscall.h>
#define MAP_ARGS0(m,...)
@@ -77,5 +78,6 @@ __KVM_SYSCALL_DEFINE(munmap, 2, void *, mem, size_t, size);
__KVM_SYSCALL_DEFINE(close, 1, int, fd);
__KVM_SYSCALL_DEFINE(fallocate, 4, int, fd, int, mode, loff_t, offset, loff_t, len);
__KVM_SYSCALL_DEFINE(ftruncate, 2, unsigned int, fd, off_t, length);
+__KVM_SYSCALL_DEFINE(fstat, 2, int, fd, struct stat *, buf);
#endif /* SELFTEST_KVM_SYSCALLS_H */
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread* [RFC PATCH v1 10/10] KVM: selftests: Test that st_blocks is updated on allocation
2026-02-23 7:04 [RFC PATCH v1 00/10] guest_memfd: Track amount of memory allocated on inode Ackerley Tng
` (8 preceding siblings ...)
2026-02-23 7:04 ` [RFC PATCH v1 09/10] KVM: selftests: Wrap fstat() to assert success Ackerley Tng
@ 2026-02-23 7:04 ` Ackerley Tng
9 siblings, 0 replies; 11+ messages in thread
From: Ackerley Tng @ 2026-02-23 7:04 UTC (permalink / raw)
To: linux-mm, linux-kernel, linux-fsdevel, kvm, linux-kselftest
Cc: akpm, david, lorenzo.stoakes, Liam.Howlett, vbabka, rppt, surenb,
mhocko, willy, pbonzini, shuah, ackerleytng, seanjc, shivankg,
rick.p.edgecombe, yan.y.zhao, rientjes, fvdl, jthoughton,
vannapurve, pratyush, pasha.tatashin, kalyazin, tabba,
michael.roth
The st_blocks field reported by fstat should reflect the number of
allocated 512-byte blocks for the guest memfd file.
Extend the fallocate test to verify that st_blocks is correctly updated
when memory is allocated or deallocated via
fallocate(FALLOC_FL_PUNCH_HOLE).
Add checks after each fallocate call to ensure that st_blocks increases on
allocation, decreases when a hole is punched, and is restored when the hole
is re-allocated. Also verify that st_blocks remains unchanged for failing
fallocate calls.
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
---
tools/testing/selftests/kvm/guest_memfd_test.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index 81387f06e770a..89228d73fa736 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -218,41 +218,58 @@ static void test_file_size(int fd, size_t total_size)
TEST_ASSERT_EQ(sb.st_blksize, page_size);
}
+static void assert_st_blocks_matches_size(int fd, size_t expected_size)
+{
+ struct stat sb;
+
+ kvm_fstat(fd, &sb);
+ TEST_ASSERT_EQ(sb.st_blocks, expected_size / 512);
+}
+
static void test_fallocate(int fd, size_t total_size)
{
int ret;
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size);
TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
+ assert_st_blocks_matches_size(fd, total_size);
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
page_size - 1, page_size);
TEST_ASSERT(ret, "fallocate with unaligned offset should fail");
+ assert_st_blocks_matches_size(fd, total_size);
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size);
TEST_ASSERT(ret, "fallocate beginning at total_size should fail");
+ assert_st_blocks_matches_size(fd, total_size);
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size);
TEST_ASSERT(ret, "fallocate beginning after total_size should fail");
+ assert_st_blocks_matches_size(fd, total_size);
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
total_size, page_size);
TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed");
+ assert_st_blocks_matches_size(fd, total_size);
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
total_size + page_size, page_size);
TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed");
+ assert_st_blocks_matches_size(fd, total_size);
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
page_size, page_size - 1);
TEST_ASSERT(ret, "fallocate with unaligned size should fail");
+ assert_st_blocks_matches_size(fd, total_size);
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
page_size, page_size);
TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed");
+ assert_st_blocks_matches_size(fd, total_size - page_size);
ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size);
TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
+ assert_st_blocks_matches_size(fd, total_size);
}
static void test_invalid_punch_hole(int fd, size_t total_size)
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 11+ messages in thread