[PATCH] mm/shmem: use invalidate_lock to fix hole-punch race

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Gregory Price <gourry@gourry.net>
To: linux-mm@kvack.org, akpm@linux-foundation.org, hughd@google.com
Cc: david@kernel.org, ljs@kernel.org, Liam.Howlett@oracle.com,
	vbabka@kernel.org, rppt@kernel.org, surenb@google.com,
	mhocko@suse.com, baolin.wang@linux.alibaba.com,
	linux-kernel@vger.kernel.org, kernel-team@meta.com,
	stable@vger.kernel.org
Subject: [PATCH] mm/shmem: use invalidate_lock to fix hole-punch race
Date: Thu, 26 Mar 2026 11:26:11 -0500	[thread overview]
Message-ID: <20260326162611.693539-1-gourry@gourry.net> (raw)

Inflating a VM's balloon while vhost-user-net fork+exec's a helper
triggers "still mapped when deleted" on the memfd backing guest RAM:

  BUG: Bad page cache in process __balloon  pfn:6520704
  page dumped because: still mapped when deleted
  ...
  shmem_undo_range+0x3fa/0x570
  shmem_fallocate+0x366/0x4d0
  vfs_fallocate+0x13c/0x310

This BUG also resulted in guests seeing stale mappings backed by a
zeroed page, causing guest kernel panics.  I was unable to trace that
specific interaction, but it appears to be related to THP splitting.

Two races allow PTEs to be re-installed for a folio that fallocate
is about to remove from page cache:

Race 1 — fault-around (filemap_map_pages):

  fallocate              fault-around           fork
  --------               ------------           ----
  set i_private
  unmap_mapping_range()
  # zaps PTEs
                       filemap_map_pages()
                        # re-maps folio!
                                              dup_mmap()
                                              # child VMA
                                              # in tree
  shmem_undo_range()
    lock folio
    unmap_mapping_folio()
    # child VMA:
    #   no PTE, skip
                                            copy_page_range()
                                              # copies PTE
    # parent VMA:
    #   zaps PTE
  filemap_remove_folio()
    # mapcount=1, BUG!

filemap_map_pages() is called directly as .map_pages, bypassing
shmem_fault()'s i_private synchronization.

Race 2 — shmem_fault TOCTOU:

  fallocate                   shmem_fault
  --------                    -----------
                            check i_private → NULL
  set i_private
  unmap_mapping_range()
  # zaps PTEs
                            shmem_get_folio_gfp()
                              # finds folio in cache
                            finish_fault()
                              # installs PTE
  shmem_undo_range()
    truncate_inode_folio()
      # mapcount=1, BUG!

Fix both races with invalidate_lock.

This matches the existing pattern used by secretmem_fault(),
udf_page_mkwrite(), and zonefs_filemap_page_mkwrite(), all of
which take invalidate_lock shared under mmap_lock in their
fault handlers.

This also requires removing the rcu_read_lock() from
do_fault_around() so that .map_pages may use sleeping locks.

The outer rcu_read_lock is redundant for all in-tree .map_pages
implementations: every one either IS filemap_map_pages (which
takes rcu_read_lock) or is a thin wrapper around it.

Fixes: d7c1755179b8 ("mm: implement ->map_pages for shmem/tmpfs")
Cc: stable@vger.kernel.org
Signed-off-by: Gregory Price <gourry@gourry.net>
---
 mm/memory.c |  2 --
 mm/shmem.c  | 33 ++++++++++++++++++++++++++++++---
 2 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index e44469f9cf65..838583591fdf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5900,11 +5900,9 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf)
 			return VM_FAULT_OOM;
 	}
 
-	rcu_read_lock();
 	ret = vmf->vma->vm_ops->map_pages(vmf,
 			vmf->pgoff + from_pte - pte_off,
 			vmf->pgoff + to_pte - pte_off);
-	rcu_read_unlock();
 
 	return ret;
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 4ecefe02881d..5c654b86f3cf 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2731,7 +2731,8 @@ static vm_fault_t shmem_falloc_wait(struct vm_fault *vmf, struct inode *inode)
 static vm_fault_t shmem_fault(struct vm_fault *vmf)
 {
 	struct inode *inode = file_inode(vmf->vma->vm_file);
-	gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
+	struct address_space *mapping = inode->i_mapping;
+	gfp_t gfp = mapping_gfp_mask(mapping);
 	struct folio *folio = NULL;
 	vm_fault_t ret = 0;
 	int err;
@@ -2747,8 +2748,15 @@ static vm_fault_t shmem_fault(struct vm_fault *vmf)
 	}
 
 	WARN_ON_ONCE(vmf->page != NULL);
+	/*
+	 * shmem_fallocate(PUNCH_HOLE) holds invalidate_lock exclusive across
+	 * unmap+truncate.  Take it shared here so shmem_fault cannot obtain
+	 * a folio in the process of being punched.
+	 */
+	filemap_invalidate_lock_shared(mapping);
 	err = shmem_get_folio_gfp(inode, vmf->pgoff, 0, &folio, SGP_CACHE,
 				  gfp, vmf, &ret);
+	filemap_invalidate_unlock_shared(mapping);
 	if (err)
 		return vmf_error(err);
 	if (folio) {
@@ -3683,11 +3691,13 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
 		inode->i_private = &shmem_falloc;
 		spin_unlock(&inode->i_lock);
 
+		filemap_invalidate_lock(mapping);
 		if ((u64)unmap_end > (u64)unmap_start)
 			unmap_mapping_range(mapping, unmap_start,
 					    1 + unmap_end - unmap_start, 0);
 		shmem_truncate_range(inode, offset, offset + len - 1);
 		/* No need to unmap again: hole-punching leaves COWed pages */
+		filemap_invalidate_unlock(mapping);
 
 		spin_lock(&inode->i_lock);
 		inode->i_private = NULL;
@@ -5268,9 +5278,26 @@ static const struct super_operations shmem_ops = {
 #endif
 };
 
+/*
+ * shmem_fallocate(PUNCH_HOLE) holds invalidate_lock for write across
+ * unmap+truncate.  Take it for read here so fault-around cannot re-map
+ * pages being punched.
+ */
+static vm_fault_t shmem_map_pages(struct vm_fault *vmf,
+				  pgoff_t start_pgoff, pgoff_t end_pgoff)
+{
+	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+	vm_fault_t ret;
+
+	filemap_invalidate_lock_shared(mapping);
+	ret = filemap_map_pages(vmf, start_pgoff, end_pgoff);
+	filemap_invalidate_unlock_shared(mapping);
+	return ret;
+}
+
 static const struct vm_operations_struct shmem_vm_ops = {
 	.fault		= shmem_fault,
-	.map_pages	= filemap_map_pages,
+	.map_pages	= shmem_map_pages,
 #ifdef CONFIG_NUMA
 	.set_policy     = shmem_set_policy,
 	.get_policy     = shmem_get_policy,
@@ -5282,7 +5309,7 @@ static const struct vm_operations_struct shmem_vm_ops = {
 
 static const struct vm_operations_struct shmem_anon_vm_ops = {
 	.fault		= shmem_fault,
-	.map_pages	= filemap_map_pages,
+	.map_pages	= shmem_map_pages,
 #ifdef CONFIG_NUMA
 	.set_policy     = shmem_set_policy,
 	.get_policy     = shmem_get_policy,
-- 
2.53.0

next             reply	other threads:[~2026-03-26 16:26 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-26 16:26 Gregory Price [this message]
2026-03-26 17:07 ` Pedro Falcato
2026-03-26 18:37   ` Gregory Price
2026-03-26 19:16     ` Pedro Falcato
2026-03-26 19:48       ` Gregory Price
2026-03-27  4:35       ` Gregory Price
2026-03-26 19:21 ` Matthew Wilcox
2026-03-26 20:09   ` Gregory Price

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260326162611.693539-1-gourry@gourry.net \
    --to=gourry@gourry.net \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=david@kernel.org \
    --cc=hughd@google.com \
    --cc=kernel-team@meta.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=mhocko@suse.com \
    --cc=rppt@kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=surenb@google.com \
    --cc=vbabka@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox