linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Bo Li <libo.gcs85@bytedance.com>
To: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, x86@kernel.org, luto@kernel.org,
	kees@kernel.org, akpm@linux-foundation.org, david@redhat.com,
	juri.lelli@redhat.com, vincent.guittot@linaro.org,
	peterz@infradead.org
Cc: dietmar.eggemann@arm.com, hpa@zytor.com, acme@kernel.org,
	namhyung@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	irogers@google.com, adrian.hunter@intel.com,
	kan.liang@linux.intel.com, viro@zeniv.linux.org.uk,
	brauner@kernel.org, jack@suse.cz, lorenzo.stoakes@oracle.com,
	Liam.Howlett@oracle.com, vbabka@suse.cz, rppt@kernel.org,
	surenb@google.com, mhocko@suse.com, rostedt@goodmis.org,
	bsegall@google.com, mgorman@suse.de, vschneid@redhat.com,
	jannh@google.com, pfalcato@suse.de, riel@surriel.com,
	harry.yoo@oracle.com, linux-kernel@vger.kernel.org,
	linux-perf-users@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, duanxiongchun@bytedance.com,
	yinhongbo@bytedance.com, dengliang.1214@bytedance.com,
	xieyongji@bytedance.com, chaiwen.cc@bytedance.com,
	songmuchun@bytedance.com, yuanzhu@bytedance.com,
	chengguozhu@bytedance.com, sunjiadong.lff@bytedance.com,
	Bo Li <libo.gcs85@bytedance.com>
Subject: [RFC v2 07/35] RPAL: enable shared page mmap
Date: Fri, 30 May 2025 17:27:35 +0800	[thread overview]
Message-ID: <11d4a94318efc8af41f77235f5117aabb8795afe.1748594840.git.libo.gcs85@bytedance.com> (raw)
In-Reply-To: <cover.1748594840.git.libo.gcs85@bytedance.com>

RPAL needs to create shared memory between the kernel and user space for
the transfer of states and data.

This patch implements the rpal_mmap() interface. User processes can create
shared memory by calling mmap() on /proc/rpal. To prevent users from
creating excessive memory, rpal_mmap() limits the total size of the shared
memory that can be created. The shared memory is maintained through
reference counting, and rpal_munmap() is implemented for the release of
the shared memory.

Signed-off-by: Bo Li <libo.gcs85@bytedance.com>
---
 arch/x86/rpal/internal.h |  20 ++++++
 arch/x86/rpal/mm.c       | 147 +++++++++++++++++++++++++++++++++++++++
 arch/x86/rpal/proc.c     |   1 +
 arch/x86/rpal/service.c  |   4 ++
 include/linux/rpal.h     |  15 ++++
 mm/mmap.c                |   4 ++
 6 files changed, 191 insertions(+)

diff --git a/arch/x86/rpal/internal.h b/arch/x86/rpal/internal.h
index c102a4c50515..65fd14a26f0e 100644
--- a/arch/x86/rpal/internal.h
+++ b/arch/x86/rpal/internal.h
@@ -9,8 +9,28 @@
 #define RPAL_COMPAT_VERSION 1
 #define RPAL_API_VERSION 1
 
+#include <linux/mm.h>
+#include <linux/file.h>
+
 extern bool rpal_inited;
 
 /* service.c */
 int __init rpal_service_init(void);
 void __init rpal_service_exit(void);
+
+/* mm.c */
+static inline struct rpal_shared_page *
+rpal_get_shared_page(struct rpal_shared_page *rsp)
+{
+	atomic_inc(&rsp->refcnt);
+	return rsp;
+}
+
+static inline void rpal_put_shared_page(struct rpal_shared_page *rsp)
+{
+	atomic_dec(&rsp->refcnt);
+}
+
+int rpal_mmap(struct file *filp, struct vm_area_struct *vma);
+struct rpal_shared_page *rpal_find_shared_page(struct rpal_service *rs,
+					       unsigned long addr);
diff --git a/arch/x86/rpal/mm.c b/arch/x86/rpal/mm.c
index f469bcf57b66..8a738c502d1d 100644
--- a/arch/x86/rpal/mm.c
+++ b/arch/x86/rpal/mm.c
@@ -11,6 +11,8 @@
 #include <linux/mman.h>
 #include <linux/mm.h>
 
+#include "internal.h"
+
 static inline int rpal_balloon_mapping(unsigned long base, unsigned long size)
 {
 	struct vm_area_struct *vma;
@@ -68,3 +70,148 @@ int rpal_balloon_init(unsigned long base)
 
 	return ret;
 }
+
+static void rpal_munmap(struct vm_area_struct *area)
+{
+	struct mm_struct *mm = area->vm_mm;
+	struct rpal_service *rs = mm->rpal_rs;
+	struct rpal_shared_page *rsp = area->vm_private_data;
+
+	if (!rs) {
+		rpal_err(
+			"free shared page after exit_mmap or fork a child process\n");
+		return;
+	}
+
+	mutex_lock(&rs->mutex);
+	if (unlikely(!atomic_dec_and_test(&rsp->refcnt))) {
+		rpal_err("refcnt(%d) of shared page is not 0\n", atomic_read(&rsp->refcnt));
+		send_sig_info(SIGKILL, SEND_SIG_PRIV, rs->group_leader);
+	}
+
+	list_del(&rsp->list);
+	rs->nr_shared_pages -= rsp->npage;
+	__free_pages(virt_to_page(rsp->kernel_start), get_order(rsp->npage));
+	kfree(rsp);
+	mutex_unlock(&rs->mutex);
+}
+
+const struct vm_operations_struct rpal_vm_ops = { .close = rpal_munmap };
+
+#define RPAL_MAX_SHARED_PAGES 8192
+
+int rpal_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct rpal_service *cur = rpal_current_service();
+	struct rpal_shared_page *rsp;
+	struct page *page = NULL;
+	unsigned long size = (unsigned long)(vma->vm_end - vma->vm_start);
+	int npage;
+	int order = -1;
+	int ret = 0;
+
+	if (!cur) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/*
+	 * Check whether the vma is aligned and whether the page number
+	 * is power of 2. This makes shared pages easy to manage.
+	 */
+	if (!IS_ALIGNED(size, PAGE_SIZE) ||
+	    !IS_ALIGNED(vma->vm_start, PAGE_SIZE)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	npage = size >> PAGE_SHIFT;
+	if (!is_power_of_2(npage)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	order = get_order(size);
+
+	mutex_lock(&cur->mutex);
+
+	/* make sure user does not alloc too much pages */
+	if (cur->nr_shared_pages + npage > RPAL_MAX_SHARED_PAGES) {
+		ret = -ENOMEM;
+		goto unlock;
+	}
+
+	rsp = kmalloc(sizeof(*rsp), GFP_KERNEL);
+	if (!rsp) {
+		ret = -EAGAIN;
+		goto unlock;
+	}
+
+	page = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+	if (!page) {
+		ret = -ENOMEM;
+		goto free_rsp;
+	}
+
+	rsp->user_start = vma->vm_start;
+	rsp->kernel_start = (unsigned long)page_address(page);
+	rsp->npage = npage;
+	atomic_set(&rsp->refcnt, 1);
+	INIT_LIST_HEAD(&rsp->list);
+	list_add(&rsp->list, &cur->shared_pages);
+
+	vma->vm_ops = &rpal_vm_ops;
+	vma->vm_private_data = rsp;
+
+	/* map to shared pages userspace */
+	ret = remap_pfn_range(vma, vma->vm_start, page_to_pfn(page), size,
+			      vma->vm_page_prot);
+	if (ret)
+		goto free_page;
+
+	cur->nr_shared_pages += npage;
+	mutex_unlock(&cur->mutex);
+
+	return 0;
+
+free_page:
+	__free_pages(page, order);
+	list_del(&rsp->list);
+free_rsp:
+	kfree(rsp);
+unlock:
+	mutex_unlock(&cur->mutex);
+out:
+	return ret;
+}
+
+struct rpal_shared_page *rpal_find_shared_page(struct rpal_service *rs,
+					       unsigned long addr)
+{
+	struct rpal_service *cur = rpal_current_service();
+	struct rpal_shared_page *rsp, *ret = NULL;
+
+	mutex_lock(&cur->mutex);
+	list_for_each_entry(rsp, &rs->shared_pages, list) {
+		if (rsp->user_start <= addr &&
+		    addr < rsp->user_start + rsp->npage * PAGE_SIZE) {
+			ret = rpal_get_shared_page(rsp);
+			break;
+		}
+	}
+	mutex_unlock(&cur->mutex);
+
+	return ret;
+}
+
+void rpal_exit_mmap(struct mm_struct *mm)
+{
+	struct rpal_service *rs = mm->rpal_rs;
+
+	if (rs) {
+		mm->rpal_rs = NULL;
+		/* all shared pages should be freed at this time */
+		WARN_ON_ONCE(rs->nr_shared_pages != 0);
+		rpal_put_service(rs);
+	}
+}
diff --git a/arch/x86/rpal/proc.c b/arch/x86/rpal/proc.c
index 1ced30e25c15..86947dc233d0 100644
--- a/arch/x86/rpal/proc.c
+++ b/arch/x86/rpal/proc.c
@@ -61,6 +61,7 @@ static long rpal_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 const struct proc_ops proc_rpal_operations = {
 	.proc_open = rpal_open,
 	.proc_ioctl = rpal_ioctl,
+	.proc_mmap = rpal_mmap,
 };
 
 static int __init proc_rpal_init(void)
diff --git a/arch/x86/rpal/service.c b/arch/x86/rpal/service.c
index caa4afa5a2c6..f29a046fc22f 100644
--- a/arch/x86/rpal/service.c
+++ b/arch/x86/rpal/service.c
@@ -173,6 +173,10 @@ struct rpal_service *rpal_register_service(void)
 	if (unlikely(rs->key == RPAL_INVALID_KEY))
 		goto key_fail;
 
+	mutex_init(&rs->mutex);
+	rs->nr_shared_pages = 0;
+	INIT_LIST_HEAD(&rs->shared_pages);
+
 	rs->bad_service = false;
 	rs->base = calculate_base_address(rs->id);
 
diff --git a/include/linux/rpal.h b/include/linux/rpal.h
index 3bc2a2a44265..986dfbd16fc9 100644
--- a/include/linux/rpal.h
+++ b/include/linux/rpal.h
@@ -110,6 +110,12 @@ struct rpal_service {
      * Fields above should never change after initialization.
      * Fields below may change after initialization.
      */
+	/* Mutex for time consuming operations */
+	struct mutex mutex;
+
+	/* pinned pages */
+	int nr_shared_pages;
+	struct list_head shared_pages;
 
 	/* delayed service put work */
 	struct delayed_work delayed_put_work;
@@ -135,6 +141,14 @@ struct rpal_version_info {
 
 /* End */
 
+struct rpal_shared_page {
+	unsigned long user_start;
+	unsigned long kernel_start;
+	int npage;
+	atomic_t refcnt;
+	struct list_head list;
+};
+
 enum rpal_command_type {
 	RPAL_CMD_GET_API_VERSION_AND_CAP,
 	RPAL_CMD_GET_SERVICE_KEY,
@@ -196,6 +210,7 @@ struct rpal_service *rpal_get_service_by_key(u64 key);
 void copy_rpal(struct task_struct *p);
 void exit_rpal(bool group_dead);
 int rpal_balloon_init(unsigned long base);
+void rpal_exit_mmap(struct mm_struct *mm);
 
 extern void rpal_pick_mmap_base(struct mm_struct *mm,
 	struct rlimit *rlim_stack);
diff --git a/mm/mmap.c b/mm/mmap.c
index bd210aaf7ebd..98bb33d2091e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -48,6 +48,7 @@
 #include <linux/sched/mm.h>
 #include <linux/ksm.h>
 #include <linux/memfd.h>
+#include <linux/rpal.h>
 
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
@@ -1319,6 +1320,9 @@ void exit_mmap(struct mm_struct *mm)
 	__mt_destroy(&mm->mm_mt);
 	mmap_write_unlock(mm);
 	vm_unacct_memory(nr_accounted);
+#if IS_ENABLED(CONFIG_RPAL)
+	rpal_exit_mmap(mm);
+#endif
 }
 
 /* Insert vm structure into process list sorted by address
-- 
2.20.1



  parent reply	other threads:[~2025-05-30  9:30 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-30  9:27 [RFC v2 00/35] optimize cost of inter-process communication Bo Li
2025-05-30  9:27 ` [RFC v2 01/35] Kbuild: rpal support Bo Li
2025-05-30  9:27 ` [RFC v2 02/35] RPAL: add struct rpal_service Bo Li
2025-05-30  9:27 ` [RFC v2 03/35] RPAL: add service registration interface Bo Li
2025-05-30  9:27 ` [RFC v2 04/35] RPAL: add member to task_struct and mm_struct Bo Li
2025-05-30  9:27 ` [RFC v2 05/35] RPAL: enable virtual address space partitions Bo Li
2025-05-30  9:27 ` [RFC v2 06/35] RPAL: add user interface Bo Li
2025-05-30  9:27 ` Bo Li [this message]
2025-05-30  9:27 ` [RFC v2 08/35] RPAL: enable sender/receiver registration Bo Li
2025-05-30  9:27 ` [RFC v2 09/35] RPAL: enable address space sharing Bo Li
2025-05-30  9:27 ` [RFC v2 10/35] RPAL: allow service enable/disable Bo Li
2025-05-30  9:27 ` [RFC v2 11/35] RPAL: add service request/release Bo Li
2025-05-30  9:27 ` [RFC v2 12/35] RPAL: enable service disable notification Bo Li
2025-05-30  9:27 ` [RFC v2 13/35] RPAL: add tlb flushing support Bo Li
2025-05-30  9:27 ` [RFC v2 14/35] RPAL: enable page fault handling Bo Li
2025-05-30 13:59   ` Dave Hansen
2025-05-30  9:27 ` [RFC v2 15/35] RPAL: add sender/receiver state Bo Li
2025-05-30  9:27 ` [RFC v2 16/35] RPAL: add cpu lock interface Bo Li
2025-05-30  9:27 ` [RFC v2 17/35] RPAL: add a mapping between fsbase and tasks Bo Li
2025-05-30  9:27 ` [RFC v2 18/35] sched: pick a specified task Bo Li
2025-05-30  9:27 ` [RFC v2 19/35] RPAL: add lazy switch main logic Bo Li
2025-05-30  9:27 ` [RFC v2 20/35] RPAL: add rpal_ret_from_lazy_switch Bo Li
2025-05-30  9:27 ` [RFC v2 21/35] RPAL: add kernel entry handling for lazy switch Bo Li
2025-05-30  9:27 ` [RFC v2 22/35] RPAL: rebuild receiver state Bo Li
2025-05-30  9:27 ` [RFC v2 23/35] RPAL: resume cpumask when fork Bo Li
2025-05-30  9:27 ` [RFC v2 24/35] RPAL: critical section optimization Bo Li
2025-05-30  9:27 ` [RFC v2 25/35] RPAL: add MPK initialization and interface Bo Li
2025-05-30  9:27 ` [RFC v2 26/35] RPAL: enable MPK support Bo Li
2025-05-30 17:03   ` Dave Hansen
2025-05-30  9:27 ` [RFC v2 27/35] RPAL: add epoll support Bo Li
2025-05-30  9:27 ` [RFC v2 28/35] RPAL: add rpal_uds_fdmap() support Bo Li
2025-05-30  9:27 ` [RFC v2 29/35] RPAL: fix race condition in pkru update Bo Li
2025-05-30  9:27 ` [RFC v2 30/35] RPAL: fix pkru setup when fork Bo Li
2025-05-30  9:27 ` [RFC v2 31/35] RPAL: add receiver waker Bo Li
2025-05-30  9:28 ` [RFC v2 32/35] RPAL: fix unknown nmi on AMD CPU Bo Li
2025-05-30  9:28 ` [RFC v2 33/35] RPAL: enable time slice correction Bo Li
2025-05-30  9:28 ` [RFC v2 34/35] RPAL: enable fast epoll wait Bo Li
2025-05-30  9:28 ` [RFC v2 35/35] samples/rpal: add RPAL samples Bo Li
2025-05-30  9:33 ` [RFC v2 00/35] optimize cost of inter-process communication Lorenzo Stoakes
2025-06-03  8:22   ` Bo Li
2025-06-03  9:22     ` Lorenzo Stoakes
2025-05-30  9:41 ` Pedro Falcato
2025-05-30  9:56 ` David Hildenbrand
2025-05-30 22:42 ` Andrew Morton
2025-05-31  7:16 ` Ingo Molnar
2025-06-03 17:49 ` H. Peter Anvin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=11d4a94318efc8af41f77235f5117aabb8795afe.1748594840.git.libo.gcs85@bytedance.com \
    --to=libo.gcs85@bytedance.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=brauner@kernel.org \
    --cc=bsegall@google.com \
    --cc=chaiwen.cc@bytedance.com \
    --cc=chengguozhu@bytedance.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=dengliang.1214@bytedance.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=duanxiongchun@bytedance.com \
    --cc=harry.yoo@oracle.com \
    --cc=hpa@zytor.com \
    --cc=irogers@google.com \
    --cc=jack@suse.cz \
    --cc=jannh@google.com \
    --cc=jolsa@kernel.org \
    --cc=juri.lelli@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=kees@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=pfalcato@suse.de \
    --cc=riel@surriel.com \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=songmuchun@bytedance.com \
    --cc=sunjiadong.lff@bytedance.com \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=vbabka@suse.cz \
    --cc=vincent.guittot@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vschneid@redhat.com \
    --cc=x86@kernel.org \
    --cc=xieyongji@bytedance.com \
    --cc=yinhongbo@bytedance.com \
    --cc=yuanzhu@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox