linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>,
	Matthew Wilcox <willy@infradead.org>, Guo Ren <guoren@kernel.org>,
	Thomas Bogendoerfer <tsbogend@alpha.franken.de>,
	Heiko Carstens <hca@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Alexander Gordeev <agordeev@linux.ibm.com>,
	Christian Borntraeger <borntraeger@linux.ibm.com>,
	Sven Schnelle <svens@linux.ibm.com>,
	"David S . Miller" <davem@davemloft.net>,
	Andreas Larsson <andreas@gaisler.com>,
	Arnd Bergmann <arnd@arndb.de>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	Dan Williams <dan.j.williams@intel.com>,
	Vishal Verma <vishal.l.verma@intel.com>,
	Dave Jiang <dave.jiang@intel.com>,
	Nicolas Pitre <nico@fluxnic.net>,
	Muchun Song <muchun.song@linux.dev>,
	Oscar Salvador <osalvador@suse.de>,
	David Hildenbrand <david@redhat.com>,
	Konstantin Komarov <almaz.alexandrovich@paragon-software.com>,
	Baoquan He <bhe@redhat.com>, Vivek Goyal <vgoyal@redhat.com>,
	Dave Young <dyoung@redhat.com>, Tony Luck <tony.luck@intel.com>,
	Reinette Chatre <reinette.chatre@intel.com>,
	Dave Martin <Dave.Martin@arm.com>,
	James Morse <james.morse@arm.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christian Brauner <brauner@kernel.org>, Jan Kara <jack@suse.cz>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Vlastimil Babka <vbabka@suse.cz>, Mike Rapoport <rppt@kernel.org>,
	Suren Baghdasaryan <surenb@google.com>,
	Michal Hocko <mhocko@suse.com>, Hugh Dickins <hughd@google.com>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	Uladzislau Rezki <urezki@gmail.com>,
	Dmitry Vyukov <dvyukov@google.com>,
	Andrey Konovalov <andreyknvl@gmail.com>,
	Jann Horn <jannh@google.com>, Pedro Falcato <pfalcato@suse.de>,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-fsdevel@vger.kernel.org, linux-csky@vger.kernel.org,
	linux-mips@vger.kernel.org, linux-s390@vger.kernel.org,
	sparclinux@vger.kernel.org, nvdimm@lists.linux.dev,
	linux-cxl@vger.kernel.org, linux-mm@kvack.org,
	ntfs3@lists.linux.dev, kexec@lists.infradead.org,
	kasan-dev@googlegroups.com, Jason Gunthorpe <jgg@nvidia.com>
Subject: [PATCH 10/16] mm/hugetlb: update hugetlbfs to use mmap_prepare, mmap_complete
Date: Mon,  8 Sep 2025 12:10:41 +0100	[thread overview]
Message-ID: <346e2d1e768a2e5bf344c772cfbb0cd1d6f2fd15.1757329751.git.lorenzo.stoakes@oracle.com> (raw)
In-Reply-To: <cover.1757329751.git.lorenzo.stoakes@oracle.com>

We can now update hugetlb to make sure of the new .mmap_prepare() hook, by
deferring the reservation of pages until the VMA is fully established and
handle this in the f_op->mmap_complete() hook.

We hold the VMA write lock throughout so we can't race with faults. rmap
can discover the VMA, but this should not cause a problem.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 fs/hugetlbfs/inode.c | 86 ++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 39 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3cfdf4091001..46d1ddc654c2 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -96,39 +96,14 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
 #define PGOFF_LOFFT_MAX \
 	(((1UL << (PAGE_SHIFT + 1)) - 1) <<  (BITS_PER_LONG - (PAGE_SHIFT + 1)))
 
-static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int hugetlb_file_mmap_complete(struct file *file, struct vm_area_struct *vma,
+				      const void *context)
 {
 	struct inode *inode = file_inode(file);
-	loff_t len, vma_len;
-	int ret;
 	struct hstate *h = hstate_file(file);
-	vm_flags_t vm_flags;
-
-	/*
-	 * vma address alignment (but not the pgoff alignment) has
-	 * already been checked by prepare_hugepage_range.  If you add
-	 * any error returns here, do so after setting VM_HUGETLB, so
-	 * is_vm_hugetlb_page tests below unmap_region go the right
-	 * way when do_mmap unwinds (may be important on powerpc
-	 * and ia64).
-	 */
-	vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
-	vma->vm_ops = &hugetlb_vm_ops;
-
-	/*
-	 * page based offset in vm_pgoff could be sufficiently large to
-	 * overflow a loff_t when converted to byte offset.  This can
-	 * only happen on architectures where sizeof(loff_t) ==
-	 * sizeof(unsigned long).  So, only check in those instances.
-	 */
-	if (sizeof(unsigned long) == sizeof(loff_t)) {
-		if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
-			return -EINVAL;
-	}
-
-	/* must be huge page aligned */
-	if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
-		return -EINVAL;
+	vm_flags_t vm_flags = vma->vm_flags;
+	loff_t len, vma_len;
+	int ret = 0;
 
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
@@ -139,9 +114,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	inode_lock(inode);
 	file_accessed(file);
 
-	ret = -ENOMEM;
-
-	vm_flags = vma->vm_flags;
 	/*
 	 * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
 	 * reserving here. Note: only for SHM hugetlbfs file, the inode
@@ -151,20 +123,55 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 		vm_flags |= VM_NORESERVE;
 
 	if (hugetlb_reserve_pages(inode,
-				vma->vm_pgoff >> huge_page_order(h),
-				len >> huge_page_shift(h), vma,
-				vm_flags) < 0)
+			vma->vm_pgoff >> huge_page_order(h),
+			len >> huge_page_shift(h), vma,
+			vm_flags) < 0) {
+		ret = -ENOMEM;
 		goto out;
+	}
 
-	ret = 0;
 	if (vma->vm_flags & VM_WRITE && inode->i_size < len)
 		i_size_write(inode, len);
+
 out:
 	inode_unlock(inode);
-
 	return ret;
 }
 
+static int hugetlbfs_file_mmap_prepare(struct vm_area_desc *desc)
+{
+	struct file *file = desc->file;
+	struct hstate *h = hstate_file(file);
+
+	/*
+	 * vma address alignment (but not the pgoff alignment) has
+	 * already been checked by prepare_hugepage_range.  If you add
+	 * any error returns here, do so after setting VM_HUGETLB, so
+	 * is_vm_hugetlb_page tests below unmap_region go the right
+	 * way when do_mmap unwinds (may be important on powerpc
+	 * and ia64).
+	 */
+	desc->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+	desc->vm_ops = &hugetlb_vm_ops;
+
+	/*
+	 * page based offset in vm_pgoff could be sufficiently large to
+	 * overflow a loff_t when converted to byte offset.  This can
+	 * only happen on architectures where sizeof(loff_t) ==
+	 * sizeof(unsigned long).  So, only check in those instances.
+	 */
+	if (sizeof(unsigned long) == sizeof(loff_t)) {
+		if (desc->pgoff & PGOFF_LOFFT_MAX)
+			return -EINVAL;
+	}
+
+	/* must be huge page aligned */
+	if (desc->pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
+		return -EINVAL;
+
+	return 0;
+}
+
 /*
  * Called under mmap_write_lock(mm).
  */
@@ -1219,7 +1226,8 @@ static void init_once(void *foo)
 
 static const struct file_operations hugetlbfs_file_operations = {
 	.read_iter		= hugetlbfs_read_iter,
-	.mmap			= hugetlbfs_file_mmap,
+	.mmap_prepare		= hugetlbfs_file_mmap_prepare,
+	.mmap_complete		= hugetlb_file_mmap_complete,
 	.fsync			= noop_fsync,
 	.get_unmapped_area	= hugetlb_get_unmapped_area,
 	.llseek			= default_llseek,
-- 
2.51.0



  parent reply	other threads:[~2025-09-08 11:12 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-08 11:10 [PATCH 00/16] expand mmap_prepare functionality, port more users Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 01/16] mm/shmem: update shmem to use mmap_prepare Lorenzo Stoakes
2025-09-08 14:59   ` David Hildenbrand
2025-09-08 15:28     ` Lorenzo Stoakes
2025-09-09  3:19   ` Baolin Wang
2025-09-09  9:08     ` Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 02/16] device/dax: update devdax " Lorenzo Stoakes
2025-09-08 15:03   ` David Hildenbrand
2025-09-08 15:28     ` Lorenzo Stoakes
2025-09-08 15:31       ` David Hildenbrand
2025-09-08 11:10 ` [PATCH 03/16] mm: add vma_desc_size(), vma_desc_pages() helpers Lorenzo Stoakes
2025-09-08 12:51   ` Jason Gunthorpe
2025-09-08 13:12     ` Lorenzo Stoakes
2025-09-08 13:32       ` Jason Gunthorpe
2025-09-08 14:09         ` Lorenzo Stoakes
2025-09-08 14:20           ` Jason Gunthorpe
2025-09-08 14:47             ` Lorenzo Stoakes
2025-09-08 15:07               ` David Hildenbrand
2025-09-08 15:35                 ` Lorenzo Stoakes
2025-09-08 17:30                   ` David Hildenbrand
2025-09-09  9:21                     ` Lorenzo Stoakes
2025-09-08 15:16               ` Jason Gunthorpe
2025-09-08 15:24                 ` David Hildenbrand
2025-09-08 15:33                   ` Jason Gunthorpe
2025-09-08 15:46                     ` David Hildenbrand
2025-09-08 15:50                       ` David Hildenbrand
2025-09-08 15:56                         ` Jason Gunthorpe
2025-09-08 17:36                           ` David Hildenbrand
2025-09-08 20:24                             ` Lorenzo Stoakes
2025-09-08 15:33                   ` Lorenzo Stoakes
2025-09-08 15:10   ` David Hildenbrand
2025-09-08 11:10 ` [PATCH 04/16] relay: update relay to use mmap_prepare Lorenzo Stoakes
2025-09-08 15:15   ` David Hildenbrand
2025-09-08 15:29     ` Lorenzo Stoakes
2025-09-09  4:09   ` kernel test robot
2025-09-09  9:00     ` Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 05/16] mm/vma: rename mmap internal functions to avoid confusion Lorenzo Stoakes
2025-09-08 15:19   ` David Hildenbrand
2025-09-08 15:31     ` Lorenzo Stoakes
2025-09-08 17:38       ` David Hildenbrand
2025-09-09  9:04         ` Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 06/16] mm: introduce the f_op->mmap_complete, mmap_abort hooks Lorenzo Stoakes
2025-09-08 12:55   ` Jason Gunthorpe
2025-09-08 13:19     ` Lorenzo Stoakes
2025-09-08 15:27   ` David Hildenbrand
2025-09-09  9:13     ` Lorenzo Stoakes
2025-09-09  9:26       ` David Hildenbrand
2025-09-09  9:37         ` Lorenzo Stoakes
2025-09-09 16:43           ` Suren Baghdasaryan
2025-09-09 17:36             ` Lorenzo Stoakes
2025-09-09 16:44   ` Suren Baghdasaryan
2025-09-08 11:10 ` [PATCH 07/16] doc: update porting, vfs documentation for mmap_[complete, abort] Lorenzo Stoakes
2025-09-08 23:17   ` Randy Dunlap
2025-09-09  9:02     ` Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 08/16] mm: add remap_pfn_range_prepare(), remap_pfn_range_complete() Lorenzo Stoakes
2025-09-08 13:00   ` Jason Gunthorpe
2025-09-08 13:27     ` Lorenzo Stoakes
2025-09-08 13:35       ` Jason Gunthorpe
2025-09-08 14:18         ` Lorenzo Stoakes
2025-09-08 16:03           ` Jason Gunthorpe
2025-09-08 16:07             ` Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 09/16] mm: introduce io_remap_pfn_range_prepare, complete Lorenzo Stoakes
2025-09-08 11:10 ` Lorenzo Stoakes [this message]
2025-09-08 13:11   ` [PATCH 10/16] mm/hugetlb: update hugetlbfs to use mmap_prepare, mmap_complete Jason Gunthorpe
2025-09-08 13:37     ` Lorenzo Stoakes
2025-09-08 13:52       ` Jason Gunthorpe
2025-09-08 14:19         ` Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 11/16] mm: update mem char driver " Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 12/16] mm: update resctl to use mmap_prepare, mmap_complete, mmap_abort Lorenzo Stoakes
2025-09-08 13:24   ` Jason Gunthorpe
2025-09-08 13:40     ` Lorenzo Stoakes
2025-09-08 14:27     ` Lorenzo Stoakes
2025-09-09  3:26   ` kernel test robot
2025-09-09  9:27     ` Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 13/16] mm: update cramfs to use mmap_prepare, mmap_complete Lorenzo Stoakes
2025-09-08 13:27   ` Jason Gunthorpe
2025-09-08 13:44     ` Lorenzo Stoakes
2025-09-09  9:41   ` kernel test robot
2025-09-09  9:51     ` Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 14/16] fs/proc: add proc_mmap_[prepare, complete] hooks for procfs Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 15/16] fs/proc: update vmcore to use .proc_mmap_[prepare, complete] Lorenzo Stoakes
2025-09-08 11:10 ` [PATCH 16/16] kcov: update kcov to use mmap_prepare, mmap_complete Lorenzo Stoakes
2025-09-08 13:30   ` Jason Gunthorpe
2025-09-08 13:47     ` Lorenzo Stoakes
2025-09-08 13:27 ` [PATCH 00/16] expand mmap_prepare functionality, port more users Jan Kara
2025-09-08 14:48   ` Lorenzo Stoakes
2025-09-08 15:04     ` Jason Gunthorpe
2025-09-08 15:15       ` Lorenzo Stoakes
2025-09-09  8:31 ` Alexander Gordeev
2025-09-09  8:59   ` Lorenzo Stoakes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=346e2d1e768a2e5bf344c772cfbb0cd1d6f2fd15.1757329751.git.lorenzo.stoakes@oracle.com \
    --to=lorenzo.stoakes@oracle.com \
    --cc=Dave.Martin@arm.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=almaz.alexandrovich@paragon-software.com \
    --cc=andreas@gaisler.com \
    --cc=andreyknvl@gmail.com \
    --cc=arnd@arndb.de \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=bhe@redhat.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=brauner@kernel.org \
    --cc=corbet@lwn.net \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=davem@davemloft.net \
    --cc=david@redhat.com \
    --cc=dvyukov@google.com \
    --cc=dyoung@redhat.com \
    --cc=gor@linux.ibm.com \
    --cc=gregkh@linuxfoundation.org \
    --cc=guoren@kernel.org \
    --cc=hca@linux.ibm.com \
    --cc=hughd@google.com \
    --cc=jack@suse.cz \
    --cc=james.morse@arm.com \
    --cc=jannh@google.com \
    --cc=jgg@nvidia.com \
    --cc=kasan-dev@googlegroups.com \
    --cc=kexec@lists.infradead.org \
    --cc=linux-csky@vger.kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mips@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mhocko@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=nico@fluxnic.net \
    --cc=ntfs3@lists.linux.dev \
    --cc=nvdimm@lists.linux.dev \
    --cc=osalvador@suse.de \
    --cc=pfalcato@suse.de \
    --cc=reinette.chatre@intel.com \
    --cc=rppt@kernel.org \
    --cc=sparclinux@vger.kernel.org \
    --cc=surenb@google.com \
    --cc=svens@linux.ibm.com \
    --cc=tony.luck@intel.com \
    --cc=tsbogend@alpha.franken.de \
    --cc=urezki@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=vgoyal@redhat.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vishal.l.verma@intel.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox