linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Anthony Yznaga <anthony.yznaga@oracle.com>
To: linux-mm@kvack.org
Cc: akpm@linux-foundation.org, andreyknvl@gmail.com, arnd@arndb.de,
	bp@alien8.de, brauner@kernel.org, bsegall@google.com,
	corbet@lwn.net, dave.hansen@linux.intel.com, david@redhat.com,
	dietmar.eggemann@arm.com, ebiederm@xmission.com, hpa@zytor.com,
	jakub.wartak@mailbox.org, jannh@google.com,
	juri.lelli@redhat.com, khalid@kernel.org,
	liam.howlett@oracle.com, linyongting@bytedance.com,
	lorenzo.stoakes@oracle.com, luto@kernel.org,
	markhemm@googlemail.com, maz@kernel.org, mhiramat@kernel.org,
	mgorman@suse.de, mhocko@suse.com, mingo@redhat.com,
	muchun.song@linux.dev, neilb@suse.de, osalvador@suse.de,
	pcc@google.com, peterz@infradead.org, pfalcato@suse.de,
	rostedt@goodmis.org, rppt@kernel.org, shakeel.butt@linux.dev,
	surenb@google.com, tglx@linutronix.de, vasily.averin@linux.dev,
	vbabka@suse.cz, vincent.guittot@linaro.org,
	viro@zeniv.linux.org.uk, vschneid@redhat.com,
	willy@infradead.org, x86@kernel.org, xhao@linux.alibaba.com,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-arch@vger.kernel.org
Subject: [PATCH v3 07/22] mm/mshare: Add mmap support
Date: Tue, 19 Aug 2025 18:04:00 -0700	[thread overview]
Message-ID: <20250820010415.699353-8-anthony.yznaga@oracle.com> (raw)
In-Reply-To: <20250820010415.699353-1-anthony.yznaga@oracle.com>

From: Khalid Aziz <khalid@kernel.org>

Add support for mapping an mshare region into a process after the
region has been established in msharefs. Disallow operations that
could split the resulting msharefs vma such as partial unmaps and
protection changes. Fault handling, mapping, unmapping, and
protection changes for objects mapped into an mshare region will
be done using the shared vmas created for them in the host mm. This
functionality will be added in later patches.

Signed-off-by: Khalid Aziz <khalid@kernel.org>
Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
 mm/mshare.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 132 insertions(+), 1 deletion(-)

diff --git a/mm/mshare.c b/mm/mshare.c
index bf859b176e09..e0dc42602f7f 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -15,16 +15,19 @@
 
 #include <linux/fs.h>
 #include <linux/fs_context.h>
+#include <linux/mman.h>
 #include <uapi/linux/magic.h>
 #include <linux/falloc.h>
 
 const unsigned long mshare_align = P4D_SIZE;
+const unsigned long mshare_base = mshare_align;
 
 #define MSHARE_INITIALIZED	0x1
 
 struct mshare_data {
 	struct mm_struct *mm;
 	refcount_t ref;
+	unsigned long start;
 	unsigned long size;
 	unsigned long flags;
 };
@@ -34,6 +37,130 @@ static inline bool mshare_is_initialized(struct mshare_data *m_data)
 	return test_bit(MSHARE_INITIALIZED, &m_data->flags);
 }
 
+static int mshare_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
+{
+	return -EINVAL;
+}
+
+static int mshare_vm_op_mprotect(struct vm_area_struct *vma, unsigned long start,
+				 unsigned long end, unsigned long newflags)
+{
+	return -EINVAL;
+}
+
+static const struct vm_operations_struct msharefs_vm_ops = {
+	.may_split = mshare_vm_op_split,
+	.mprotect = mshare_vm_op_mprotect,
+};
+
+/*
+ * msharefs_mmap() - mmap an mshare region
+ */
+static int
+msharefs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct mshare_data *m_data = file->private_data;
+
+	vma->vm_private_data = m_data;
+	vm_flags_set(vma, VM_MSHARE | VM_DONTEXPAND);
+	vma->vm_ops = &msharefs_vm_ops;
+
+	return 0;
+}
+
+static unsigned long
+msharefs_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct vm_unmapped_area_info info = {};
+
+	info.length = len;
+	info.low_limit = current->mm->mmap_base;
+	info.high_limit = arch_get_mmap_end(addr, len, flags);
+	info.align_mask = PAGE_MASK & (mshare_align - 1);
+	return vm_unmapped_area(&info);
+}
+
+static unsigned long
+msharefs_get_unmapped_area_topdown(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct vm_unmapped_area_info info = {};
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.length = len;
+	info.low_limit = PAGE_SIZE;
+	info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+	info.align_mask = PAGE_MASK & (mshare_align - 1);
+	addr = vm_unmapped_area(&info);
+
+	/*
+	 * A failed mmap() very likely causes application failure,
+	 * so fall back to the bottom-up function here. This scenario
+	 * can happen with large stack limits and large mmap()
+	 * allocations.
+	 */
+	if (unlikely(offset_in_page(addr))) {
+		VM_BUG_ON(addr != -ENOMEM);
+		info.flags = 0;
+		info.low_limit = current->mm->mmap_base;
+		info.high_limit = arch_get_mmap_end(addr, len, flags);
+		addr = vm_unmapped_area(&info);
+	}
+
+	return addr;
+}
+
+static unsigned long
+msharefs_get_unmapped_area(struct file *file, unsigned long addr,
+		unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+	struct mshare_data *m_data = file->private_data;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma, *prev;
+	unsigned long mshare_start, mshare_size;
+	const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
+
+	mmap_assert_write_locked(mm);
+
+	if ((flags & MAP_TYPE) == MAP_PRIVATE)
+		return -EINVAL;
+
+	if (!mshare_is_initialized(m_data))
+		return -EINVAL;
+
+	mshare_start = m_data->start;
+	mshare_size = m_data->size;
+
+	if (len != mshare_size)
+		return -EINVAL;
+
+	if (len > mmap_end - mmap_min_addr)
+		return -ENOMEM;
+
+	if (flags & MAP_FIXED) {
+		if (!IS_ALIGNED(addr, mshare_align))
+			return -EINVAL;
+		return addr;
+	}
+
+	if (addr) {
+		addr = ALIGN(addr, mshare_align);
+		vma = find_vma_prev(mm, addr, &prev);
+		if (mmap_end - len >= addr && addr >= mmap_min_addr &&
+		    (!vma || addr + len <= vm_start_gap(vma)) &&
+		    (!prev || addr >= vm_end_gap(prev)))
+			return addr;
+	}
+
+	if (!mm_flags_test(MMF_TOPDOWN, mm))
+		return msharefs_get_unmapped_area_bottomup(file, addr, len,
+				pgoff, flags);
+	else
+		return msharefs_get_unmapped_area_topdown(file, addr, len,
+				pgoff, flags);
+}
+
 static int msharefs_set_size(struct mshare_data *m_data, unsigned long size)
 {
 	int error = -EINVAL;
@@ -87,6 +214,8 @@ static const struct inode_operations msharefs_file_inode_ops;
 
 static const struct file_operations msharefs_file_operations = {
 	.open			= simple_open,
+	.mmap			= msharefs_mmap,
+	.get_unmapped_area	= msharefs_get_unmapped_area,
 	.fallocate		= msharefs_fallocate,
 };
 
@@ -101,12 +230,14 @@ msharefs_fill_mm(struct inode *inode)
 	if (!mm)
 		return -ENOMEM;
 
-	mm->mmap_base = mm->task_size = 0;
+	mm->mmap_base = mshare_base;
+	mm->task_size = 0;
 
 	m_data = kzalloc(sizeof(*m_data), GFP_KERNEL);
 	if (!m_data)
 		goto err_free;
 	m_data->mm = mm;
+	m_data->start = mshare_base;
 
 	refcount_set(&m_data->ref, 1);
 	inode->i_private = m_data;
-- 
2.47.1



  parent reply	other threads:[~2025-08-20  1:05 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-20  1:03 [PATCH v3 00/22] Add support for shared PTEs across processes Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 01/22] mm: Add msharefs filesystem Anthony Yznaga
2025-09-08 18:29   ` Liam R. Howlett
2025-09-08 19:09     ` Anthony Yznaga
2025-09-10 12:14   ` Pedro Falcato
2025-09-10 12:46     ` David Hildenbrand
2025-08-20  1:03 ` [PATCH v3 02/22] mm/mshare: pre-populate msharefs with information file Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 03/22] mm/mshare: make msharefs writable and support directories Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 04/22] mm/mshare: allocate an mm_struct for msharefs files Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 05/22] mm/mshare: add ways to set the size of an mshare region Anthony Yznaga
2025-08-20  1:03 ` [PATCH v3 06/22] mm/mshare: Add a vma flag to indicate " Anthony Yznaga
2025-09-08 18:45   ` David Hildenbrand
2025-09-08 18:56     ` Anthony Yznaga
2025-09-08 19:02       ` David Hildenbrand
2025-09-08 19:03         ` Anthony Yznaga
2025-08-20  1:04 ` Anthony Yznaga [this message]
2025-08-20 19:02   ` [PATCH v3 07/22] mm/mshare: Add mmap support kernel test robot
2025-08-20  1:04 ` [PATCH v3 08/22] mm/mshare: flush all TLBs when updating PTEs in an mshare range Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 09/22] sched/numa: do not scan msharefs vmas Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 10/22] mm: add mmap_read_lock_killable_nested() Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 11/22] mm: add and use unmap_page_range vm_ops hook Anthony Yznaga
2025-08-21 15:40   ` kernel test robot
2025-08-20  1:04 ` [PATCH v3 12/22] mm: introduce PUD page table shared count Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 13/22] mm/mshare: prepare for page table sharing support Anthony Yznaga
2025-09-15 15:27   ` Lorenzo Stoakes
2025-08-20  1:04 ` [PATCH v3 14/22] x86/mm: enable page table sharing Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 15/22] mm: create __do_mmap() to take an mm_struct * arg Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 16/22] mm: pass the mm in vma_munmap_struct Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 17/22] sched/mshare: mshare ownership Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 18/22] mm/mshare: Add an ioctl for mapping objects in an mshare region Anthony Yznaga
2025-08-20 20:36   ` kernel test robot
2025-08-20  1:04 ` [PATCH v3 19/22] mm/mshare: Add an ioctl for unmapping " Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 20/22] mm/mshare: support mapping files and anon hugetlb " Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 21/22] mm/mshare: provide a way to identify an mm as an mshare host mm Anthony Yznaga
2025-08-20  1:04 ` [PATCH v3 22/22] mm/mshare: charge fault handling allocations to the mshare owner Anthony Yznaga
2025-09-08 18:50   ` David Hildenbrand
2025-09-08 19:21     ` Anthony Yznaga
2025-09-08 20:28       ` David Hildenbrand
2025-09-08 20:55         ` Anthony Yznaga
2025-09-08 20:32 ` [PATCH v3 00/22] Add support for shared PTEs across processes David Hildenbrand
2025-09-08 20:59   ` Matthew Wilcox
2025-09-08 21:14     ` Anthony Yznaga
2025-09-09  7:53       ` David Hildenbrand
2025-09-09 18:29         ` Anthony Yznaga
2025-09-09 19:06         ` Lorenzo Stoakes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250820010415.699353-8-anthony.yznaga@oracle.com \
    --to=anthony.yznaga@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=andreyknvl@gmail.com \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=brauner@kernel.org \
    --cc=bsegall@google.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=ebiederm@xmission.com \
    --cc=hpa@zytor.com \
    --cc=jakub.wartak@mailbox.org \
    --cc=jannh@google.com \
    --cc=juri.lelli@redhat.com \
    --cc=khalid@kernel.org \
    --cc=liam.howlett@oracle.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linyongting@bytedance.com \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=luto@kernel.org \
    --cc=markhemm@googlemail.com \
    --cc=maz@kernel.org \
    --cc=mgorman@suse.de \
    --cc=mhiramat@kernel.org \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=muchun.song@linux.dev \
    --cc=neilb@suse.de \
    --cc=osalvador@suse.de \
    --cc=pcc@google.com \
    --cc=peterz@infradead.org \
    --cc=pfalcato@suse.de \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=shakeel.butt@linux.dev \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=vasily.averin@linux.dev \
    --cc=vbabka@suse.cz \
    --cc=vincent.guittot@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vschneid@redhat.com \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    --cc=xhao@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox