linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nadav Amit <nadav.amit@gmail.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	Nadav Amit <namit@vmware.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Mike Rapoport <rppt@linux.vnet.ibm.com>,
	Peter Xu <peterx@redhat.com>
Subject: [RFC PATCH] userfaultfd: support control over mm of remote PIDs
Date: Sun, 26 Sep 2021 10:06:37 -0700	[thread overview]
Message-ID: <20210926170637.245699-1-namit@vmware.com> (raw)

From: Nadav Amit <namit@vmware.com>

Non-cooperative mode is useful but only for forked processes.
Userfaultfd can be useful to monitor, debug and manage memory of remote
processes.

To support this mode, add a new flag, UFFD_REMOTE_PID, and an optional
second argument to the userfaultfd syscall. When the flag is set, the
second argument is assumed to be the PID of the process that is to be
monitored. Otherwise the flag is ignored.

The syscall enforces that the caller has CAP_SYS_PTRACE to prevent
misuse of this feature.

Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Nadav Amit <namit@vmware.com>

---

I know that I have an RFC regarding the use of iouring with userfaultfd.
I do intend to follow this RFC as well, but it requires some more work.
---
 fs/userfaultfd.c | 71 ++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 59 insertions(+), 12 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 003f0d31743e..cf44e1e13a03 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -2053,10 +2053,39 @@ static void init_once_userfaultfd_ctx(void *mem)
 	seqcount_spinlock_init(&ctx->refile_seq, &ctx->fault_pending_wqh.lock);
 }
 
-SYSCALL_DEFINE1(userfaultfd, int, flags)
+static int userfaultfd_get_remote_mm(struct userfaultfd_ctx *ctx, int pidfd)
 {
-	struct userfaultfd_ctx *ctx;
-	int fd;
+	struct task_struct *task;
+	struct pid *pid;
+	struct fd f;
+	int ret;
+
+	f = fdget(pidfd);
+	if (!f.file)
+		return -EBADF;
+
+	pid = pidfd_pid(f.file);
+
+	task = get_pid_task(pid, PIDTYPE_PID);
+	ret = -ESRCH;
+	if (!task)
+		goto err_out;
+
+	ctx->mm = task->mm;
+	mmgrab(ctx->mm);
+	put_task_struct(task);
+	ret = 0;
+out:
+	return ret;
+err_out:
+	fdput(f);
+	goto out;
+}
+
+SYSCALL_DEFINE2(userfaultfd, int, flags, int, pidfd)
+{
+	struct userfaultfd_ctx *ctx = NULL;
+	int ret;
 
 	if (!sysctl_unprivileged_userfaultfd &&
 	    (flags & UFFD_USER_MODE_ONLY) == 0 &&
@@ -2067,14 +2096,19 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
 		return -EPERM;
 	}
 
+	if ((flags & UFFD_REMOTE_PID) && !capable(CAP_SYS_PTRACE))
+		return -EPERM;
+
 	BUG_ON(!current->mm);
 
 	/* Check the UFFD_* constants for consistency.  */
+	BUILD_BUG_ON(UFFD_REMOTE_PID & UFFD_SHARED_FCNTL_FLAGS);
 	BUILD_BUG_ON(UFFD_USER_MODE_ONLY & UFFD_SHARED_FCNTL_FLAGS);
 	BUILD_BUG_ON(UFFD_CLOEXEC != O_CLOEXEC);
 	BUILD_BUG_ON(UFFD_NONBLOCK != O_NONBLOCK);
 
-	if (flags & ~(UFFD_SHARED_FCNTL_FLAGS | UFFD_USER_MODE_ONLY))
+	if (flags & ~(UFFD_SHARED_FCNTL_FLAGS | UFFD_USER_MODE_ONLY |
+		      UFFD_REMOTE_PID))
 		return -EINVAL;
 
 	ctx = kmem_cache_alloc(userfaultfd_ctx_cachep, GFP_KERNEL);
@@ -2086,17 +2120,30 @@ SYSCALL_DEFINE1(userfaultfd, int, flags)
 	ctx->features = 0;
 	ctx->released = false;
 	atomic_set(&ctx->mmap_changing, 0);
-	ctx->mm = current->mm;
-	/* prevent the mm struct to be freed */
-	mmgrab(ctx->mm);
+	ctx->mm = NULL;
+
+	if (flags & UFFD_REMOTE_PID) {
+		/* the remote mm is grabbed by the following call */
+		ret = userfaultfd_get_remote_mm(ctx, pidfd);
+		if (ret)
+			goto err_out;
+	} else {
+		ctx->mm = current->mm;
+		/* prevent the mm struct to be freed */
+		mmgrab(ctx->mm);
+	}
 
-	fd = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
+	ret = anon_inode_getfd_secure("[userfaultfd]", &userfaultfd_fops, ctx,
 			O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
-	if (fd < 0) {
+	if (ret < 0)
+		goto err_out;
+out:
+	return ret;
+err_out:
+	if (ctx->mm)
 		mmdrop(ctx->mm);
-		kmem_cache_free(userfaultfd_ctx_cachep, ctx);
-	}
-	return fd;
+	kmem_cache_free(userfaultfd_ctx_cachep, ctx);
+	goto out;
 }
 
 static int __init userfaultfd_init(void)
-- 
2.25.1



             reply	other threads:[~2021-09-27  0:37 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-26 17:06 Nadav Amit [this message]
2021-09-27  9:29 ` David Hildenbrand
2021-09-27 10:19   ` Nadav Amit
2021-09-27 17:06     ` David Hildenbrand
2021-09-27 20:08       ` Nadav Amit
2021-09-27 20:11         ` David Hildenbrand
2021-10-13  2:18 ` Peter Xu
2021-10-13 16:02   ` Nadav Amit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210926170637.245699-1-namit@vmware.com \
    --to=nadav.amit@gmail.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=namit@vmware.com \
    --cc=peterx@redhat.com \
    --cc=rppt@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox