From: Anthony Yznaga <anthony.yznaga@oracle.com>
To: linux-mm@kvack.org
Cc: akpm@linux-foundation.org, andreyknvl@gmail.com, arnd@arndb.de,
bp@alien8.de, brauner@kernel.org, bsegall@google.com,
corbet@lwn.net, dave.hansen@linux.intel.com, david@redhat.com,
dietmar.eggemann@arm.com, ebiederm@xmission.com, hpa@zytor.com,
jakub.wartak@mailbox.org, jannh@google.com,
juri.lelli@redhat.com, khalid@kernel.org,
liam.howlett@oracle.com, linyongting@bytedance.com,
lorenzo.stoakes@oracle.com, luto@kernel.org,
markhemm@googlemail.com, maz@kernel.org, mhiramat@kernel.org,
mgorman@suse.de, mhocko@suse.com, mingo@redhat.com,
muchun.song@linux.dev, neilb@suse.de, osalvador@suse.de,
pcc@google.com, peterz@infradead.org, pfalcato@suse.de,
rostedt@goodmis.org, rppt@kernel.org, shakeel.butt@linux.dev,
surenb@google.com, tglx@linutronix.de, vasily.averin@linux.dev,
vbabka@suse.cz, vincent.guittot@linaro.org,
viro@zeniv.linux.org.uk, vschneid@redhat.com,
willy@infradead.org, x86@kernel.org, xhao@linux.alibaba.com,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-arch@vger.kernel.org
Subject: [PATCH v3 17/22] sched/mshare: mshare ownership
Date: Tue, 19 Aug 2025 18:04:10 -0700 [thread overview]
Message-ID: <20250820010415.699353-18-anthony.yznaga@oracle.com> (raw)
In-Reply-To: <20250820010415.699353-1-anthony.yznaga@oracle.com>
Ownership of an mshare region is assigned to the process that creates
it. Establishing ownership ensures that accounting the memory in an
mshare region is applied to the owner and not spread among the processes
sharing the memory. It also provides a means for freeing mshare memory
in an OOM situation. Once an mshare owner exits, access to the memory by
a non-owner process results in a SIGSEGV. For this initial implementation
ownership is not shared or transferred through forking or other means.
Signed-off-by: Anthony Yznaga <anthony.yznaga@oracle.com>
---
include/linux/mshare.h | 25 +++++++++++++
include/linux/sched.h | 5 +++
kernel/exit.c | 1 +
kernel/fork.c | 1 +
mm/mshare.c | 83 ++++++++++++++++++++++++++++++++++++++++++
5 files changed, 115 insertions(+)
create mode 100644 include/linux/mshare.h
diff --git a/include/linux/mshare.h b/include/linux/mshare.h
new file mode 100644
index 000000000000..b62f0e54cf84
--- /dev/null
+++ b/include/linux/mshare.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MSHARE_H_
+#define _LINUX_MSHARE_H_
+
+#include <linux/types.h>
+
+struct task_struct;
+
+#ifdef CONFIG_MSHARE
+
+void exit_mshare(struct task_struct *task);
+#define mshare_init_task(task) INIT_LIST_HEAD(&(task)->mshare_mem)
+
+#else
+
+static inline void exit_mshare(struct task_struct *task)
+{
+}
+static inline void mshare_init_task(struct task_struct *task)
+{
+}
+
+#endif
+
+#endif /* _LINUX_MSHARE_H_ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2b272382673d..17f2f3c0b465 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -48,6 +48,7 @@
#include <linux/uidgid_types.h>
#include <linux/tracepoint-defs.h>
#include <linux/unwind_deferred_types.h>
+#include <linux/mshare.h>
#include <asm/kmap_size.h>
/* task_struct member predeclarations (sorted alphabetically): */
@@ -1654,6 +1655,10 @@ struct task_struct {
/* CPU-specific state of this task: */
struct thread_struct thread;
+#ifdef CONFIG_MSHARE
+ struct list_head mshare_mem;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
diff --git a/kernel/exit.c b/kernel/exit.c
index 343eb97543d5..24445109865d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -951,6 +951,7 @@ void __noreturn do_exit(long code)
if (group_dead)
acct_process();
+ exit_mshare(tsk);
exit_sem(tsk);
exit_shm(tsk);
exit_files(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5115be549234..eba6bd709c6e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2143,6 +2143,7 @@ __latent_entropy struct task_struct *copy_process(
#endif
unwind_task_init(p);
+ mshare_init_task(p);
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);
diff --git a/mm/mshare.c b/mm/mshare.c
index f7b7904f0405..8a23b391fa11 100644
--- a/mm/mshare.c
+++ b/mm/mshare.c
@@ -17,6 +17,7 @@
#include <linux/fs_context.h>
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
+#include <linux/mshare.h>
#include <uapi/linux/magic.h>
#include <linux/falloc.h>
#include <asm/tlbflush.h>
@@ -27,6 +28,7 @@ const unsigned long mshare_align = P4D_SIZE;
const unsigned long mshare_base = mshare_align;
#define MSHARE_INITIALIZED 0x1
+#define MSHARE_HAS_OWNER 0x2
struct mshare_data {
struct mm_struct *mm;
@@ -35,6 +37,7 @@ struct mshare_data {
unsigned long size;
unsigned long flags;
struct mmu_notifier mn;
+ struct list_head list;
};
static inline bool mshare_is_initialized(struct mshare_data *m_data)
@@ -42,6 +45,65 @@ static inline bool mshare_is_initialized(struct mshare_data *m_data)
return test_bit(MSHARE_INITIALIZED, &m_data->flags);
}
+static inline bool mshare_has_owner(struct mshare_data *m_data)
+{
+ return test_bit(MSHARE_HAS_OWNER, &m_data->flags);
+}
+
+static bool mshare_data_getref(struct mshare_data *m_data);
+static void mshare_data_putref(struct mshare_data *m_data);
+
+void exit_mshare(struct task_struct *task)
+{
+ for (;;) {
+ struct mshare_data *m_data;
+ int error;
+
+ task_lock(task);
+
+ if (list_empty(&task->mshare_mem)) {
+ task_unlock(task);
+ break;
+ }
+
+ m_data = list_first_entry(&task->mshare_mem, struct mshare_data,
+ list);
+
+ WARN_ON_ONCE(!mshare_data_getref(m_data));
+
+ list_del_init(&m_data->list);
+ task_unlock(task);
+
+ /*
+ * The owner of an mshare region is going away. Unmap
+ * everything in the region and prevent more mappings from
+ * being created.
+ *
+ * XXX
+ * The fact that the unmap can possibly fail is problematic.
+ * One alternative is doing a subset of what exit_mmap() does.
+ * If it's preferrable to preserve the mappings then another
+ * approach is to fail any further faults on the mshare region
+ * and unlink the shared page tables from the page tables of
+ * each sharing process by walking the rmap via the msharefs
+ * inode.
+ * Unmapping everything means mshare memory is freed up when
+ * the owner exits which may be preferrable for OOM situations.
+ */
+
+ clear_bit(MSHARE_HAS_OWNER, &m_data->flags);
+
+ mmap_write_lock(m_data->mm);
+ error = do_munmap(m_data->mm, m_data->start, m_data->size, NULL);
+ mmap_write_unlock(m_data->mm);
+
+ if (error)
+ pr_warn("%s: do_munmap returned %d\n", __func__, error);
+
+ mshare_data_putref(m_data);
+ }
+}
+
static void mshare_invalidate_tlbs(struct mmu_notifier *mn, struct mm_struct *mm,
unsigned long start, unsigned long end)
{
@@ -362,6 +424,11 @@ msharefs_fill_mm(struct inode *inode)
ret = mmu_notifier_register(&m_data->mn, mm);
if (ret)
goto err_free;
+ INIT_LIST_HEAD(&m_data->list);
+ task_lock(current);
+ list_add(&m_data->list, ¤t->mshare_mem);
+ task_unlock(current);
+ set_bit(MSHARE_HAS_OWNER, &m_data->flags);
refcount_set(&m_data->ref, 1);
inode->i_private = m_data;
@@ -380,6 +447,11 @@ msharefs_delmm(struct mshare_data *m_data)
kfree(m_data);
}
+static bool mshare_data_getref(struct mshare_data *m_data)
+{
+ return refcount_inc_not_zero(&m_data->ref);
+}
+
static void mshare_data_putref(struct mshare_data *m_data)
{
if (!refcount_dec_and_test(&m_data->ref))
@@ -543,6 +615,17 @@ msharefs_evict_inode(struct inode *inode)
if (!m_data)
goto out;
+ rcu_read_lock();
+
+ if (!list_empty(&m_data->list)) {
+ struct task_struct *owner = m_data->mm->owner;
+
+ task_lock(owner);
+ list_del_init(&m_data->list);
+ task_unlock(owner);
+ }
+ rcu_read_unlock();
+
mshare_data_putref(m_data);
out:
clear_inode(inode);
--
2.47.1
next prev parent reply other threads:[~2025-08-20 1:05 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-20 1:03 [PATCH v3 00/22] Add support for shared PTEs across processes Anthony Yznaga
2025-08-20 1:03 ` [PATCH v3 01/22] mm: Add msharefs filesystem Anthony Yznaga
2025-09-08 18:29 ` Liam R. Howlett
2025-09-08 19:09 ` Anthony Yznaga
2025-09-10 12:14 ` Pedro Falcato
2025-09-10 12:46 ` David Hildenbrand
2025-08-20 1:03 ` [PATCH v3 02/22] mm/mshare: pre-populate msharefs with information file Anthony Yznaga
2025-08-20 1:03 ` [PATCH v3 03/22] mm/mshare: make msharefs writable and support directories Anthony Yznaga
2025-08-20 1:03 ` [PATCH v3 04/22] mm/mshare: allocate an mm_struct for msharefs files Anthony Yznaga
2025-08-20 1:03 ` [PATCH v3 05/22] mm/mshare: add ways to set the size of an mshare region Anthony Yznaga
2025-08-20 1:03 ` [PATCH v3 06/22] mm/mshare: Add a vma flag to indicate " Anthony Yznaga
2025-09-08 18:45 ` David Hildenbrand
2025-09-08 18:56 ` Anthony Yznaga
2025-09-08 19:02 ` David Hildenbrand
2025-09-08 19:03 ` Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 07/22] mm/mshare: Add mmap support Anthony Yznaga
2025-08-20 19:02 ` kernel test robot
2025-08-20 1:04 ` [PATCH v3 08/22] mm/mshare: flush all TLBs when updating PTEs in an mshare range Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 09/22] sched/numa: do not scan msharefs vmas Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 10/22] mm: add mmap_read_lock_killable_nested() Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 11/22] mm: add and use unmap_page_range vm_ops hook Anthony Yznaga
2025-08-21 15:40 ` kernel test robot
2025-08-20 1:04 ` [PATCH v3 12/22] mm: introduce PUD page table shared count Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 13/22] mm/mshare: prepare for page table sharing support Anthony Yznaga
2025-09-15 15:27 ` Lorenzo Stoakes
2025-08-20 1:04 ` [PATCH v3 14/22] x86/mm: enable page table sharing Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 15/22] mm: create __do_mmap() to take an mm_struct * arg Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 16/22] mm: pass the mm in vma_munmap_struct Anthony Yznaga
2025-08-20 1:04 ` Anthony Yznaga [this message]
2025-08-20 1:04 ` [PATCH v3 18/22] mm/mshare: Add an ioctl for mapping objects in an mshare region Anthony Yznaga
2025-08-20 20:36 ` kernel test robot
2025-08-20 1:04 ` [PATCH v3 19/22] mm/mshare: Add an ioctl for unmapping " Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 20/22] mm/mshare: support mapping files and anon hugetlb " Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 21/22] mm/mshare: provide a way to identify an mm as an mshare host mm Anthony Yznaga
2025-08-20 1:04 ` [PATCH v3 22/22] mm/mshare: charge fault handling allocations to the mshare owner Anthony Yznaga
2025-09-08 18:50 ` David Hildenbrand
2025-09-08 19:21 ` Anthony Yznaga
2025-09-08 20:28 ` David Hildenbrand
2025-09-08 20:55 ` Anthony Yznaga
2025-09-08 20:32 ` [PATCH v3 00/22] Add support for shared PTEs across processes David Hildenbrand
2025-09-08 20:59 ` Matthew Wilcox
2025-09-08 21:14 ` Anthony Yznaga
2025-09-09 7:53 ` David Hildenbrand
2025-09-09 18:29 ` Anthony Yznaga
2025-09-09 19:06 ` Lorenzo Stoakes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250820010415.699353-18-anthony.yznaga@oracle.com \
--to=anthony.yznaga@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=andreyknvl@gmail.com \
--cc=arnd@arndb.de \
--cc=bp@alien8.de \
--cc=brauner@kernel.org \
--cc=bsegall@google.com \
--cc=corbet@lwn.net \
--cc=dave.hansen@linux.intel.com \
--cc=david@redhat.com \
--cc=dietmar.eggemann@arm.com \
--cc=ebiederm@xmission.com \
--cc=hpa@zytor.com \
--cc=jakub.wartak@mailbox.org \
--cc=jannh@google.com \
--cc=juri.lelli@redhat.com \
--cc=khalid@kernel.org \
--cc=liam.howlett@oracle.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linyongting@bytedance.com \
--cc=lorenzo.stoakes@oracle.com \
--cc=luto@kernel.org \
--cc=markhemm@googlemail.com \
--cc=maz@kernel.org \
--cc=mgorman@suse.de \
--cc=mhiramat@kernel.org \
--cc=mhocko@suse.com \
--cc=mingo@redhat.com \
--cc=muchun.song@linux.dev \
--cc=neilb@suse.de \
--cc=osalvador@suse.de \
--cc=pcc@google.com \
--cc=peterz@infradead.org \
--cc=pfalcato@suse.de \
--cc=rostedt@goodmis.org \
--cc=rppt@kernel.org \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=tglx@linutronix.de \
--cc=vasily.averin@linux.dev \
--cc=vbabka@suse.cz \
--cc=vincent.guittot@linaro.org \
--cc=viro@zeniv.linux.org.uk \
--cc=vschneid@redhat.com \
--cc=willy@infradead.org \
--cc=x86@kernel.org \
--cc=xhao@linux.alibaba.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox