linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Bo Li <libo.gcs85@bytedance.com>
To: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, x86@kernel.org, luto@kernel.org,
	kees@kernel.org, akpm@linux-foundation.org, david@redhat.com,
	juri.lelli@redhat.com, vincent.guittot@linaro.org,
	peterz@infradead.org
Cc: dietmar.eggemann@arm.com, hpa@zytor.com, acme@kernel.org,
	namhyung@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	irogers@google.com, adrian.hunter@intel.com,
	kan.liang@linux.intel.com, viro@zeniv.linux.org.uk,
	brauner@kernel.org, jack@suse.cz, lorenzo.stoakes@oracle.com,
	Liam.Howlett@oracle.com, vbabka@suse.cz, rppt@kernel.org,
	surenb@google.com, mhocko@suse.com, rostedt@goodmis.org,
	bsegall@google.com, mgorman@suse.de, vschneid@redhat.com,
	jannh@google.com, pfalcato@suse.de, riel@surriel.com,
	harry.yoo@oracle.com, linux-kernel@vger.kernel.org,
	linux-perf-users@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, duanxiongchun@bytedance.com,
	yinhongbo@bytedance.com, dengliang.1214@bytedance.com,
	xieyongji@bytedance.com, chaiwen.cc@bytedance.com,
	songmuchun@bytedance.com, yuanzhu@bytedance.com,
	chengguozhu@bytedance.com, sunjiadong.lff@bytedance.com,
	Bo Li <libo.gcs85@bytedance.com>
Subject: [RFC v2 28/35] RPAL: add rpal_uds_fdmap() support
Date: Fri, 30 May 2025 17:27:56 +0800	[thread overview]
Message-ID: <7d9d805dcfe80358c06f0a02fadd31a7288500b4.1748594841.git.libo.gcs85@bytedance.com> (raw)
In-Reply-To: <cover.1748594840.git.libo.gcs85@bytedance.com>

For a UDS connection between a sender and a receiver, neither side knows
which file descriptor (fd) the other uses to manage the connection. The
sender cannot determine which user space fd's buffer in the receiver to
write data to, necessitating a complex process for both sides to inform
each other of fd mappings. This process incurs significant overhead when
managing a large number of connections, which requires optimization.

This patch introduces the RPAL_IOCTL_UDS_FDMAP interface, which simplifies
the establishment of fd mappings between sender and receiver processes for
files monitored by epoll. This avoids the need for a complex setup process
each time a new connection is created.

Signed-off-by: Bo Li <libo.gcs85@bytedance.com>
---
 arch/x86/rpal/internal.h |   3 +
 arch/x86/rpal/proc.c     | 117 +++++++++++++++++++++++++++++++++++++++
 fs/eventpoll.c           |  19 +++++++
 include/linux/rpal.h     |  11 ++++
 4 files changed, 150 insertions(+)

diff --git a/arch/x86/rpal/internal.h b/arch/x86/rpal/internal.h
index e49febce8645..e03f8a90619d 100644
--- a/arch/x86/rpal/internal.h
+++ b/arch/x86/rpal/internal.h
@@ -11,6 +11,7 @@
 
 #include <linux/mm.h>
 #include <linux/file.h>
+#include <net/af_unix.h>
 
 extern bool rpal_inited;
 
@@ -60,3 +61,5 @@ int rpal_alloc_pkey(struct rpal_service *rs, int pkey);
 int rpal_pkey_setup(struct rpal_service *rs, int pkey);
 void rpal_set_current_pkru(u32 val, int mode);
 void rpal_service_pku_init(void);
+
+extern struct sock *unix_peer_get(struct sock *sk);
diff --git a/arch/x86/rpal/proc.c b/arch/x86/rpal/proc.c
index 2f9cceec4992..b60c099c4a92 100644
--- a/arch/x86/rpal/proc.c
+++ b/arch/x86/rpal/proc.c
@@ -9,6 +9,8 @@
 #include <linux/rpal.h>
 #include <linux/proc_fs.h>
 #include <linux/poll.h>
+#include <net/sock.h>
+#include <net/af_unix.h>
 
 #include "internal.h"
 
@@ -34,6 +36,118 @@ static int rpal_get_api_version_and_cap(void __user *p)
 	return 0;
 }
 
+static void *rpal_uds_peer_data(struct sock *psk, int *pfd)
+{
+	void *ep = NULL;
+	unsigned long flags;
+	struct socket_wq *wq;
+	wait_queue_entry_t *entry;
+	wait_queue_head_t *whead;
+
+	rcu_read_lock();
+	wq = rcu_dereference(psk->sk_wq);
+	if (!skwq_has_sleeper(wq))
+		goto unlock_rcu;
+
+	whead = &wq->wait;
+
+	spin_lock_irqsave(&whead->lock, flags);
+	if (list_empty(&whead->head)) {
+		pr_debug("rpal debug: [%d] cannot find epitem entry\n",
+			 current->pid);
+		goto unlock_spin;
+	}
+	entry = list_first_entry(&whead->head, wait_queue_entry_t, entry);
+	*pfd = rpal_get_epitemfd(entry);
+	if (*pfd < 0) {
+		pr_debug("rpal debug: [%d] cannot find epitem fd\n",
+			 current->pid);
+		goto unlock_spin;
+	}
+	ep = rpal_get_epitemep(entry);
+
+unlock_spin:
+	spin_unlock_irqrestore(&whead->lock, flags);
+unlock_rcu:
+	rcu_read_unlock();
+	return ep;
+}
+
+static int rpal_find_receiver_rid(int id, void *ep)
+{
+	struct task_struct *tsk;
+	struct rpal_service *cur, *tgt;
+	int rid = -1;
+
+	cur = rpal_current_service();
+
+	tgt = rpal_get_mapped_service_by_id(cur, id);
+	if (tgt == NULL)
+		goto out;
+
+	for_each_thread(tgt->group_leader, tsk) {
+		if (!rpal_test_task_thread_flag(tsk, RPAL_RECEIVER_BIT))
+			continue;
+		if (tsk->rpal_rd->ep == ep) {
+			rid = tsk->rpal_rd->rcc->receiver_id;
+			break;
+		}
+	}
+
+	rpal_put_service(tgt);
+out:
+	return rid;
+}
+
+static long rpal_uds_fdmap(unsigned long uarg)
+{
+	struct rpal_uds_fdmap_arg arg;
+	struct socket *sock;
+	struct sock *peer_sk;
+	void *ep;
+	int sfd, rid;
+	struct fd f;
+	long res;
+	int ret;
+
+	ret = copy_from_user(&arg, (void __user *)uarg, sizeof(arg));
+	if (ret)
+		return ret;
+
+	f = fdget(arg.cfd);
+	if (!fd_file(f))
+		goto fd_put;
+
+	sock = sock_from_file(fd_file(f));
+	if (!sock)
+		goto fd_put;
+
+	peer_sk = unix_peer_get(sock->sk);
+	if (peer_sk == NULL)
+		goto fd_put;
+	ep = rpal_uds_peer_data(peer_sk, &sfd);
+	if (ep == NULL) {
+		pr_debug("rpal debug: [%d] cannot find epitem ep\n",
+			 current->pid);
+		goto peer_sock_put;
+	}
+	rid = rpal_find_receiver_rid(arg.service_id, ep);
+	if (rid < 0) {
+		pr_debug("rpal debug: [%d] rpal: cannot find epitem rid\n",
+			 current->pid);
+		goto peer_sock_put;
+	}
+	res = (long)rid << 32 | (long)sfd;
+	ret = put_user(res, arg.res);
+
+peer_sock_put:
+	sock_put(peer_sk);
+fd_put:
+	if (fd_file(f))
+		fdput(f);
+	return ret;
+}
+
 static long rpal_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct rpal_service *cur = rpal_current_service();
@@ -81,6 +195,9 @@ static long rpal_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		ret = put_user(cur->pkey, (int __user *)arg);
 		break;
 #endif
+	case RPAL_IOCTL_UDS_FDMAP:
+		ret = rpal_uds_fdmap(arg);
+		break;
 	default:
 		return -EINVAL;
 	}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 437cd5764c03..791321639561 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -2143,6 +2143,25 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 }
 
 #ifdef CONFIG_RPAL
+void *rpal_get_epitemep(wait_queue_entry_t *wait)
+{
+	struct epitem *epi = ep_item_from_wait(wait);
+
+	if (!epi)
+		return NULL;
+
+	return epi->ep;
+}
+
+int rpal_get_epitemfd(wait_queue_entry_t *wait)
+{
+	struct epitem *epi = ep_item_from_wait(wait);
+
+	if (!epi)
+		return -1;
+
+	return epi->ffd.fd;
+}
 
 void rpal_resume_ep(struct task_struct *tsk)
 {
diff --git a/include/linux/rpal.h b/include/linux/rpal.h
index 5912ffec6e28..7657e6c6393b 100644
--- a/include/linux/rpal.h
+++ b/include/linux/rpal.h
@@ -350,6 +350,12 @@ struct rpal_sender_data {
 	struct task_struct *receiver;
 };
 
+struct rpal_uds_fdmap_arg {
+	int service_id;
+	int cfd;
+	unsigned long *res;
+};
+
 enum rpal_command_type {
 	RPAL_CMD_GET_API_VERSION_AND_CAP,
 	RPAL_CMD_GET_SERVICE_KEY,
@@ -363,6 +369,7 @@ enum rpal_command_type {
 	RPAL_CMD_REQUEST_SERVICE,
 	RPAL_CMD_RELEASE_SERVICE,
 	RPAL_CMD_GET_SERVICE_PKEY,
+	RPAL_CMD_UDS_FDMAP,
 	RPAL_NR_CMD,
 };
 
@@ -393,6 +400,8 @@ enum rpal_command_type {
 	_IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_RELEASE_SERVICE, unsigned long)
 #define RPAL_IOCTL_GET_SERVICE_PKEY \
 	_IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_GET_SERVICE_PKEY, int *)
+#define RPAL_IOCTL_UDS_FDMAP \
+	_IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_UDS_FDMAP, unsigned long)
 
 #define rpal_for_each_requested_service(rs, idx)                             \
 	for (idx = find_first_bit(rs->requested_service_bitmap, RPAL_NR_ID); \
@@ -594,5 +603,7 @@ int rpal_ep_autoremove_wake_function(wait_queue_entry_t *curr,
 	unsigned int mode, int wake_flags,
 	void *key);
 void rpal_resume_ep(struct task_struct *tsk);
+void *rpal_get_epitemep(wait_queue_entry_t *wait);
+int rpal_get_epitemfd(wait_queue_entry_t *wait);
 int rpal_try_send_events(void *ep, struct rpal_receiver_call_context *rcc);
 #endif
-- 
2.20.1



  parent reply	other threads:[~2025-05-30  9:35 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-30  9:27 [RFC v2 00/35] optimize cost of inter-process communication Bo Li
2025-05-30  9:27 ` [RFC v2 01/35] Kbuild: rpal support Bo Li
2025-05-30  9:27 ` [RFC v2 02/35] RPAL: add struct rpal_service Bo Li
2025-05-30  9:27 ` [RFC v2 03/35] RPAL: add service registration interface Bo Li
2025-05-30  9:27 ` [RFC v2 04/35] RPAL: add member to task_struct and mm_struct Bo Li
2025-05-30  9:27 ` [RFC v2 05/35] RPAL: enable virtual address space partitions Bo Li
2025-05-30  9:27 ` [RFC v2 06/35] RPAL: add user interface Bo Li
2025-05-30  9:27 ` [RFC v2 07/35] RPAL: enable shared page mmap Bo Li
2025-05-30  9:27 ` [RFC v2 08/35] RPAL: enable sender/receiver registration Bo Li
2025-05-30  9:27 ` [RFC v2 09/35] RPAL: enable address space sharing Bo Li
2025-05-30  9:27 ` [RFC v2 10/35] RPAL: allow service enable/disable Bo Li
2025-05-30  9:27 ` [RFC v2 11/35] RPAL: add service request/release Bo Li
2025-05-30  9:27 ` [RFC v2 12/35] RPAL: enable service disable notification Bo Li
2025-05-30  9:27 ` [RFC v2 13/35] RPAL: add tlb flushing support Bo Li
2025-05-30  9:27 ` [RFC v2 14/35] RPAL: enable page fault handling Bo Li
2025-05-30 13:59   ` Dave Hansen
2025-05-30  9:27 ` [RFC v2 15/35] RPAL: add sender/receiver state Bo Li
2025-05-30  9:27 ` [RFC v2 16/35] RPAL: add cpu lock interface Bo Li
2025-05-30  9:27 ` [RFC v2 17/35] RPAL: add a mapping between fsbase and tasks Bo Li
2025-05-30  9:27 ` [RFC v2 18/35] sched: pick a specified task Bo Li
2025-05-30  9:27 ` [RFC v2 19/35] RPAL: add lazy switch main logic Bo Li
2025-05-30  9:27 ` [RFC v2 20/35] RPAL: add rpal_ret_from_lazy_switch Bo Li
2025-05-30  9:27 ` [RFC v2 21/35] RPAL: add kernel entry handling for lazy switch Bo Li
2025-05-30  9:27 ` [RFC v2 22/35] RPAL: rebuild receiver state Bo Li
2025-05-30  9:27 ` [RFC v2 23/35] RPAL: resume cpumask when fork Bo Li
2025-05-30  9:27 ` [RFC v2 24/35] RPAL: critical section optimization Bo Li
2025-05-30  9:27 ` [RFC v2 25/35] RPAL: add MPK initialization and interface Bo Li
2025-05-30  9:27 ` [RFC v2 26/35] RPAL: enable MPK support Bo Li
2025-05-30 17:03   ` Dave Hansen
2025-05-30  9:27 ` [RFC v2 27/35] RPAL: add epoll support Bo Li
2025-05-30  9:27 ` Bo Li [this message]
2025-05-30  9:27 ` [RFC v2 29/35] RPAL: fix race condition in pkru update Bo Li
2025-05-30  9:27 ` [RFC v2 30/35] RPAL: fix pkru setup when fork Bo Li
2025-05-30  9:27 ` [RFC v2 31/35] RPAL: add receiver waker Bo Li
2025-05-30  9:28 ` [RFC v2 32/35] RPAL: fix unknown nmi on AMD CPU Bo Li
2025-05-30  9:28 ` [RFC v2 33/35] RPAL: enable time slice correction Bo Li
2025-05-30  9:28 ` [RFC v2 34/35] RPAL: enable fast epoll wait Bo Li
2025-05-30  9:28 ` [RFC v2 35/35] samples/rpal: add RPAL samples Bo Li
2025-05-30  9:33 ` [RFC v2 00/35] optimize cost of inter-process communication Lorenzo Stoakes
2025-06-03  8:22   ` Bo Li
2025-06-03  9:22     ` Lorenzo Stoakes
2025-05-30  9:41 ` Pedro Falcato
2025-05-30  9:56 ` David Hildenbrand
2025-05-30 22:42 ` Andrew Morton
2025-05-31  7:16 ` Ingo Molnar
2025-06-03 17:49 ` H. Peter Anvin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7d9d805dcfe80358c06f0a02fadd31a7288500b4.1748594841.git.libo.gcs85@bytedance.com \
    --to=libo.gcs85@bytedance.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=brauner@kernel.org \
    --cc=bsegall@google.com \
    --cc=chaiwen.cc@bytedance.com \
    --cc=chengguozhu@bytedance.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=dengliang.1214@bytedance.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=duanxiongchun@bytedance.com \
    --cc=harry.yoo@oracle.com \
    --cc=hpa@zytor.com \
    --cc=irogers@google.com \
    --cc=jack@suse.cz \
    --cc=jannh@google.com \
    --cc=jolsa@kernel.org \
    --cc=juri.lelli@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=kees@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=pfalcato@suse.de \
    --cc=riel@surriel.com \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=songmuchun@bytedance.com \
    --cc=sunjiadong.lff@bytedance.com \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=vbabka@suse.cz \
    --cc=vincent.guittot@linaro.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vschneid@redhat.com \
    --cc=x86@kernel.org \
    --cc=xieyongji@bytedance.com \
    --cc=yinhongbo@bytedance.com \
    --cc=yuanzhu@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox