linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>,
	Matthew Wilcox <willy@infradead.org>, Guo Ren <guoren@kernel.org>,
	Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
	Heiko Carstens <hca@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Alexander Gordeev <agordeev@linux.ibm.com>,
	Christian Borntraeger <borntraeger@linux.ibm.com>,
	Sven Schnelle <svens@linux.ibm.com>,
	"David S . Miller" <davem@davemloft.net>,
	Andreas Larsson <andreas@gaisler.com>,
	Arnd Bergmann <arnd@arndb.de>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Dan Williams <dan.j.williams@intel.com>,
	Vishal Verma <vishal.l.verma@intel.com>,
	Dave Jiang <dave.jiang@intel.com>,
	Nicolas Pitre <nico@fluxnic.net>,
	Muchun Song <muchun.song@linux.dev>,
	Oscar Salvador <osalvador@suse.de>,
	David Hildenbrand <david@redhat.com>,
	Konstantin Komarov <almaz.alexandrovich@paragon-software.com>,
	Baoquan He <bhe@redhat.com>, Vivek Goyal <vgoyal@redhat.com>,
	Dave Young <dyoung@redhat.com>, Tony Luck <tony.luck@intel.com>,
	Reinette Chatre <reinette.chatre@intel.com>,
	Dave Martin <Dave.Martin@arm.com>,
	James Morse <james.morse@arm.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Vlastimil Babka <vbabka@suse.cz>, Mike Rapoport <rppt@kernel.org>,
	Suren Baghdasaryan <surenb@google.com>,
	Michal Hocko <mhocko@suse.com>, Hugh Dickins <hughd@google.com>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	Uladzislau Rezki <urezki@gmail.com>,
	Dmitry Vyukov <dvyukov@google.com>,
	Andrey Konovalov <andreyknvl@gmail.com>,
	Jann Horn <jannh@google.com>, Pedro Falcato <pfalcato@suse.de>,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-csky@vger.kernel.org,
	linux-mips@vger.kernel.org, linux-s390@vger.kernel.org,
	sparclinux@vger.kernel.org, nvdimm@lists.linux.dev,
	linux-cxl@vger.kernel.org, linux-mm@kvack.org,
	ntfs3@lists.linux.dev, kexec@lists.infradead.org,
	kasan-dev@googlegroups.com, Jason Gunthorpe <jgg@nvidia.com>
Subject: [PATCH v2 11/16] mm: update mem char driver to use mmap_prepare
Date: Wed, 10 Sep 2025 21:22:06 +0100	[thread overview]
Message-ID: <aeee6a4896304d6dc7515e79d74f8bc5ec424415.1757534913.git.lorenzo.stoakes@oracle.com> (raw)
In-Reply-To: <cover.1757534913.git.lorenzo.stoakes@oracle.com>

Update the mem char driver (backing /dev/mem and /dev/zero) to use
f_op->mmap_prepare hook rather than the deprecated f_op->mmap.

The /dev/zero implementation has a very unique and rather concerning
characteristic in that it converts MAP_PRIVATE mmap() mappings anonymous
when they are, in fact, not.

The new f_op->mmap_prepare() can support this, but rather than introducing
a helper function to perform this hack (and risk introducing other users),
simply set desc->vm_op to NULL here and add a comment describing what's
going on.

We also introduce shmem_zero_setup_desc() to allow for the shared mapping
case via an f_op->mmap_prepare() hook, and generalise the code between this
and shmem_zero_setup().

We also use the desc->action_error_hook to filter the remap error to
-EAGAIN to keep behaviour consistent.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 drivers/char/mem.c       | 75 ++++++++++++++++++++++------------------
 include/linux/shmem_fs.h |  3 +-
 mm/shmem.c               | 40 ++++++++++++++++-----
 3 files changed, 76 insertions(+), 42 deletions(-)

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 34b815901b20..23194788ee41 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -304,13 +304,13 @@ static unsigned zero_mmap_capabilities(struct file *file)
 }
 
 /* can't do an in-place private mapping if there's no MMU */
-static inline int private_mapping_ok(struct vm_area_struct *vma)
+static inline int private_mapping_ok(struct vm_area_desc *desc)
 {
-	return is_nommu_shared_mapping(vma->vm_flags);
+	return is_nommu_shared_mapping(desc->vm_flags);
 }
 #else
 
-static inline int private_mapping_ok(struct vm_area_struct *vma)
+static inline int private_mapping_ok(struct vm_area_desc *desc)
 {
 	return 1;
 }
@@ -322,46 +322,50 @@ static const struct vm_operations_struct mmap_mem_ops = {
 #endif
 };
 
-static int mmap_mem(struct file *file, struct vm_area_struct *vma)
+static int mmap_filter_error(int err)
 {
-	size_t size = vma->vm_end - vma->vm_start;
-	phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
+	return -EAGAIN;
+}
+
+static int mmap_mem_prepare(struct vm_area_desc *desc)
+{
+	struct file *file = desc->file;
+	const size_t size = vma_desc_size(desc);
+	const phys_addr_t offset = (phys_addr_t)desc->pgoff << PAGE_SHIFT;
 
 	/* Does it even fit in phys_addr_t? */
-	if (offset >> PAGE_SHIFT != vma->vm_pgoff)
+	if (offset >> PAGE_SHIFT != desc->pgoff)
 		return -EINVAL;
 
 	/* It's illegal to wrap around the end of the physical address space. */
 	if (offset + (phys_addr_t)size - 1 < offset)
 		return -EINVAL;
 
-	if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size))
+	if (!valid_mmap_phys_addr_range(desc->pgoff, size))
 		return -EINVAL;
 
-	if (!private_mapping_ok(vma))
+	if (!private_mapping_ok(desc))
 		return -ENOSYS;
 
-	if (!range_is_allowed(vma->vm_pgoff, size))
+	if (!range_is_allowed(desc->pgoff, size))
 		return -EPERM;
 
-	if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size,
-						&vma->vm_page_prot))
+	if (!phys_mem_access_prot_allowed(file, desc->pgoff, size,
+					  &desc->page_prot))
 		return -EINVAL;
 
-	vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
-						 size,
-						 vma->vm_page_prot);
+	desc->page_prot = phys_mem_access_prot(file, desc->pgoff,
+					       size,
+					       desc->page_prot);
 
-	vma->vm_ops = &mmap_mem_ops;
+	desc->vm_ops = &mmap_mem_ops;
 
 	/* Remap-pfn-range will mark the range VM_IO */
-	if (remap_pfn_range(vma,
-			    vma->vm_start,
-			    vma->vm_pgoff,
-			    size,
-			    vma->vm_page_prot)) {
-		return -EAGAIN;
-	}
+	mmap_action_remap(&desc->action, desc->start, desc->pgoff, size,
+			desc->page_prot);
+	/* We filter remap errors to -EAGAIN. */
+	desc->action.error_hook = mmap_filter_error;
+
 	return 0;
 }
 
@@ -501,14 +505,18 @@ static ssize_t read_zero(struct file *file, char __user *buf,
 	return cleared;
 }
 
-static int mmap_zero(struct file *file, struct vm_area_struct *vma)
+static int mmap_prepare_zero(struct vm_area_desc *desc)
 {
 #ifndef CONFIG_MMU
 	return -ENOSYS;
 #endif
-	if (vma->vm_flags & VM_SHARED)
-		return shmem_zero_setup(vma);
-	vma_set_anonymous(vma);
+	if (desc->vm_flags & VM_SHARED)
+		return shmem_zero_setup_desc(desc);
+	/*
+	 * This is a highly unique situation where we mark a MAP_PRIVATE mapping
+	 *of /dev/zero anonymous, despite it not being.
+	 */
+	desc->vm_ops = NULL;
 	return 0;
 }
 
@@ -526,10 +534,11 @@ static unsigned long get_unmapped_area_zero(struct file *file,
 {
 	if (flags & MAP_SHARED) {
 		/*
-		 * mmap_zero() will call shmem_zero_setup() to create a file,
-		 * so use shmem's get_unmapped_area in case it can be huge;
-		 * and pass NULL for file as in mmap.c's get_unmapped_area(),
-		 * so as not to confuse shmem with our handle on "/dev/zero".
+		 * mmap_prepare_zero() will call shmem_zero_setup() to create a
+		 * file, so use shmem's get_unmapped_area in case it can be
+		 * huge; and pass NULL for file as in mmap.c's
+		 * get_unmapped_area(), so as not to confuse shmem with our
+		 * handle on "/dev/zero".
 		 */
 		return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags);
 	}
@@ -632,7 +641,7 @@ static const struct file_operations __maybe_unused mem_fops = {
 	.llseek		= memory_lseek,
 	.read		= read_mem,
 	.write		= write_mem,
-	.mmap		= mmap_mem,
+	.mmap_prepare	= mmap_mem_prepare,
 	.open		= open_mem,
 #ifndef CONFIG_MMU
 	.get_unmapped_area = get_unmapped_area_mem,
@@ -668,7 +677,7 @@ static const struct file_operations zero_fops = {
 	.write_iter	= write_iter_zero,
 	.splice_read	= copy_splice_read,
 	.splice_write	= splice_write_zero,
-	.mmap		= mmap_zero,
+	.mmap_prepare	= mmap_prepare_zero,
 	.get_unmapped_area = get_unmapped_area_zero,
 #ifndef CONFIG_MMU
 	.mmap_capabilities = zero_mmap_capabilities,
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 0e47465ef0fd..5b368f9549d6 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -94,7 +94,8 @@ extern struct file *shmem_kernel_file_setup(const char *name, loff_t size,
 					    unsigned long flags);
 extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt,
 		const char *name, loff_t size, unsigned long flags);
-extern int shmem_zero_setup(struct vm_area_struct *);
+int shmem_zero_setup(struct vm_area_struct *vma);
+int shmem_zero_setup_desc(struct vm_area_desc *desc);
 extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags);
 extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts);
diff --git a/mm/shmem.c b/mm/shmem.c
index 990e33c6a776..cb6ff00eb4cb 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -5893,14 +5893,9 @@ struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name,
 }
 EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt);
 
-/**
- * shmem_zero_setup - setup a shared anonymous mapping
- * @vma: the vma to be mmapped is prepared by do_mmap
- */
-int shmem_zero_setup(struct vm_area_struct *vma)
+static struct file *__shmem_zero_setup(unsigned long start, unsigned long end, vm_flags_t vm_flags)
 {
-	struct file *file;
-	loff_t size = vma->vm_end - vma->vm_start;
+	loff_t size = end - start;
 
 	/*
 	 * Cloning a new file under mmap_lock leads to a lock ordering conflict
@@ -5908,7 +5903,17 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	 * accessible to the user through its mapping, use S_PRIVATE flag to
 	 * bypass file security, in the same way as shmem_kernel_file_setup().
 	 */
-	file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags);
+	return shmem_kernel_file_setup("dev/zero", size, vm_flags);
+}
+
+/**
+ * shmem_zero_setup - setup a shared anonymous mapping
+ * @vma: the vma to be mmapped is prepared by do_mmap
+ */
+int shmem_zero_setup(struct vm_area_struct *vma)
+{
+	struct file *file = __shmem_zero_setup(vma->vm_start, vma->vm_end, vma->vm_flags);
+
 	if (IS_ERR(file))
 		return PTR_ERR(file);
 
@@ -5920,6 +5925,25 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	return 0;
 }
 
+/**
+ * shmem_zero_setup_desc - same as shmem_zero_setup, but determined by VMA
+ * descriptor for convenience.
+ * @desc: Describes VMA
+ * Returns: 0 on success, or error
+ */
+int shmem_zero_setup_desc(struct vm_area_desc *desc)
+{
+	struct file *file = __shmem_zero_setup(desc->start, desc->end, desc->vm_flags);
+
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	desc->vm_file = file;
+	desc->vm_ops = &shmem_anon_vm_ops;
+
+	return 0;
+}
+
 /**
  * shmem_read_folio_gfp - read into page cache, using specified page allocation flags.
  * @mapping:	the folio's address_space
-- 
2.51.0



  parent reply	other threads:[~2025-09-10 20:23 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-10 20:21 [PATCH v2 00/16] expand mmap_prepare functionality, port more users Lorenzo Stoakes
2025-09-10 20:21 ` [PATCH v2 01/16] mm/shmem: update shmem to use mmap_prepare Lorenzo Stoakes
2025-09-11  8:32   ` Jan Kara
2025-09-10 20:21 ` [PATCH v2 02/16] device/dax: update devdax " Lorenzo Stoakes
2025-09-11  8:35   ` Jan Kara
2025-09-10 20:21 ` [PATCH v2 03/16] mm: add vma_desc_size(), vma_desc_pages() helpers Lorenzo Stoakes
2025-09-11  8:36   ` Jan Kara
2025-09-12 17:56   ` David Hildenbrand
2025-09-15 10:12     ` Lorenzo Stoakes
2025-09-10 20:21 ` [PATCH v2 04/16] relay: update relay to use mmap_prepare Lorenzo Stoakes
2025-09-11  8:38   ` Jan Kara
2025-09-10 20:22 ` [PATCH v2 05/16] mm/vma: rename __mmap_prepare() function to avoid confusion Lorenzo Stoakes
2025-09-12 17:57   ` David Hildenbrand
2025-09-15 10:12     ` Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 06/16] mm: add remap_pfn_range_prepare(), remap_pfn_range_complete() Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 07/16] mm: introduce io_remap_pfn_range_[prepare, complete]() Lorenzo Stoakes
2025-09-12 10:23   ` Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 08/16] mm: add ability to take further action in vm_area_desc Lorenzo Stoakes
2025-09-11 22:07   ` Reinette Chatre
2025-09-12 10:18     ` Lorenzo Stoakes
2025-09-12 10:25   ` Lorenzo Stoakes
2025-09-13 22:54   ` Chris Mason
2025-09-15  9:56     ` Lorenzo Stoakes
2025-09-15 10:09   ` Lorenzo Stoakes
2025-09-15 12:11   ` Jason Gunthorpe
2025-09-15 12:23     ` Lorenzo Stoakes
2025-09-15 12:42       ` Jason Gunthorpe
2025-09-15 12:54         ` Lorenzo Stoakes
2025-09-15 13:11           ` Jason Gunthorpe
2025-09-15 13:51             ` Lorenzo Stoakes
2025-09-15 14:34               ` Jason Gunthorpe
2025-09-15 15:04                 ` Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 09/16] doc: update porting, vfs documentation for mmap_prepare actions Lorenzo Stoakes
2025-09-11  8:55   ` Jan Kara
2025-09-12 10:19     ` Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 10/16] mm/hugetlbfs: update hugetlbfs to use mmap_prepare Lorenzo Stoakes
2025-09-10 20:22 ` Lorenzo Stoakes [this message]
2025-09-18 19:11   ` [PATCH v2 11/16] mm: update mem char driver " Chris Mason
2025-09-19  5:13     ` Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 12/16] mm: update resctl " Lorenzo Stoakes
2025-09-11 22:07   ` Reinette Chatre
2025-09-12 10:14     ` Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 13/16] mm: update cramfs " Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 14/16] fs/proc: add the proc_mmap_prepare hook for procfs Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 15/16] fs/proc: update vmcore to use .proc_mmap_prepare Lorenzo Stoakes
2025-09-12 10:14   ` Lorenzo Stoakes
2025-09-10 20:22 ` [PATCH v2 16/16] kcov: update kcov to use mmap_prepare Lorenzo Stoakes
2025-09-15 12:16   ` Jason Gunthorpe
2025-09-15 12:43     ` Lorenzo Stoakes
2025-09-15 12:48       ` Jason Gunthorpe
2025-09-15 13:01         ` Lorenzo Stoakes
2025-09-18 19:45   ` Chris Mason
2025-09-19  5:10     ` Lorenzo Stoakes
2025-09-10 21:38 ` [PATCH v2 00/16] expand mmap_prepare functionality, port more users Andrew Morton
2025-09-11  5:19   ` Lorenzo Stoakes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aeee6a4896304d6dc7515e79d74f8bc5ec424415.1757534913.git.lorenzo.stoakes@oracle.com \
    --to=lorenzo.stoakes@oracle.com \
    --cc=Dave.Martin@arm.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=almaz.alexandrovich@paragon-software.com \
    --cc=andreas@gaisler.com \
    --cc=andreyknvl@gmail.com \
    --cc=arnd@arndb.de \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=bhe@redhat.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=brauner@kernel.org \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=davem@davemloft.net \
    --cc=david@redhat.com \
    --cc=dvyukov@google.com \
    --cc=dyoung@redhat.com \
    --cc=gor@linux.ibm.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=guoren@kernel.org \
    --cc=hca@linux.ibm.com \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=james.morse@arm.com \
    --cc=jannh@google.com \
    --cc=jgg@nvidia.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=kexec@lists.infradead.org \
    --cc=linux-csky@vger.kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mips@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mhocko@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=nico@fluxnic.net \
    --cc=ntfs3@lists.linux.dev \
    --cc=nvdimm@lists.linux.dev \
    --cc=osalvador@suse.de \
    --cc=pfalcato@suse.de \
    --cc=reinette.chatre@intel.com \
    --cc=rppt@kernel.org \
    --cc=sparclinux@vger.kernel.org \
    --cc=surenb@google.com \
    --cc=svens@linux.ibm.com \
    --cc=tony.luck@intel.com \
    --cc=tsbogend@alpha.franken.de \
    --cc=urezki@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=vgoyal@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vishal.l.verma@intel.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox