From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>,
"Liam R . Howlett" <Liam.Howlett@oracle.com>,
Suren Baghdasaryan <surenb@google.com>,
Arnd Bergmann <arnd@arndb.de>,
Shakeel Butt <shakeel.butt@linux.dev>,
linux-api@vger.kernel.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org, Minchan Kim <minchan@kernel.org>
Subject: [PATCH 1/2] mm/madvise: introduce PR_MADV_SELF flag to process_madvise()
Date: Mon, 23 Sep 2024 17:03:56 +0100 [thread overview]
Message-ID: <077be0d59cb1047870a84c87c62e7b027af1c75d.1727106751.git.lorenzo.stoakes@oracle.com> (raw)
In-Reply-To: <cover.1727106751.git.lorenzo.stoakes@oracle.com>
process_madvise() was conceived as a useful means for performing a vector
of madvise() operations on a remote process's address space.
However it's useful to be able to do so on the current process also. It is
currently rather clunky to do this (requiring a pidfd to be opened for the
current process) and introduces unnecessary overhead in incrementing
reference counts for the task and mm.
Avoid all of this by providing a PR_MADV_SELF flag, which causes
process_madvise() to simply ignore the pidfd parameter and instead apply
the operation to the current process.
Since we are operating on our own process, no restrictions need be applied
on behaviors we can perform, so do not limit these in that case.
Also extend the case of a user specifying the current process via pidfd to
not be restricted on behaviors which can be performed.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
include/uapi/asm-generic/mman-common.h | 2 +
mm/madvise.c | 58 +++++++++++++++++++-------
2 files changed, 44 insertions(+), 16 deletions(-)
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 6ce1f1ceb432..8f59f23dee09 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -87,4 +87,6 @@
#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
PKEY_DISABLE_WRITE)
+#define PR_MADV_SELF (1<<0) /* process_madvise() flag - apply to self */
+
#endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/mm/madvise.c b/mm/madvise.c
index ff139e57cca2..549b36d1463c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1208,7 +1208,8 @@ madvise_behavior_valid(int behavior)
}
}
-static bool process_madvise_behavior_valid(int behavior)
+/* Can we invoke process_madvise() on a remote mm for the specified behavior? */
+static bool process_madvise_remote_valid(int behavior)
{
switch (behavior) {
case MADV_COLD:
@@ -1477,6 +1478,28 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
return do_madvise(current->mm, start, len_in, behavior);
}
+/* Perform an madvise operation over a vector of addresses and lengths. */
+static ssize_t vector_madvise(struct mm_struct *mm, struct iov_iter *iter,
+ int behavior)
+{
+ ssize_t ret = 0;
+ size_t total_len;
+
+ total_len = iov_iter_count(iter);
+
+ while (iov_iter_count(iter)) {
+ ret = do_madvise(mm, (unsigned long)iter_iov_addr(iter),
+ iter_iov_len(iter), behavior);
+ if (ret < 0)
+ break;
+ iov_iter_advance(iter, iter_iov_len(iter));
+ }
+
+ ret = (total_len - iov_iter_count(iter)) ? : ret;
+
+ return ret;
+}
+
SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
size_t, vlen, int, behavior, unsigned int, flags)
{
@@ -1486,10 +1509,9 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
struct iov_iter iter;
struct task_struct *task;
struct mm_struct *mm;
- size_t total_len;
unsigned int f_flags;
- if (flags != 0) {
+ if (flags & ~PR_MADV_SELF) {
ret = -EINVAL;
goto out;
}
@@ -1498,13 +1520,26 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
if (ret < 0)
goto out;
+ /*
+ * Perform an madvise operation on the current process. No restrictions
+ * need be applied, nor do we need to pin the task or mm_struct.
+ */
+ if (flags & PR_MADV_SELF) {
+ ret = vector_madvise(current->mm, &iter, behavior);
+ goto free_iov;
+ }
+
task = pidfd_get_task(pidfd, &f_flags);
if (IS_ERR(task)) {
ret = PTR_ERR(task);
goto free_iov;
}
- if (!process_madvise_behavior_valid(behavior)) {
+ /*
+ * We need only perform this check if we are attempting to manipulate a
+ * remote process's address space.
+ */
+ if (mm != current->mm && !process_madvise_remote_valid(behavior)) {
ret = -EINVAL;
goto release_task;
}
@@ -1518,24 +1553,15 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
/*
* Require CAP_SYS_NICE for influencing process performance. Note that
- * only non-destructive hints are currently supported.
+ * only non-destructive hints are currently supported for remote
+ * processes.
*/
if (mm != current->mm && !capable(CAP_SYS_NICE)) {
ret = -EPERM;
goto release_mm;
}
- total_len = iov_iter_count(&iter);
-
- while (iov_iter_count(&iter)) {
- ret = do_madvise(mm, (unsigned long)iter_iov_addr(&iter),
- iter_iov_len(&iter), behavior);
- if (ret < 0)
- break;
- iov_iter_advance(&iter, iter_iov_len(&iter));
- }
-
- ret = (total_len - iov_iter_count(&iter)) ? : ret;
+ ret = vector_madvise(mm, &iter, behavior);
release_mm:
mmput(mm);
--
2.46.0
next prev parent reply other threads:[~2024-09-23 16:04 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-23 16:03 [PATCH 0/2] unrestrict process_madvise() for current process Lorenzo Stoakes
2024-09-23 16:03 ` Lorenzo Stoakes [this message]
2024-09-23 18:56 ` [PATCH 1/2] mm/madvise: introduce PR_MADV_SELF flag to process_madvise() Shakeel Butt
2024-09-23 19:34 ` Lorenzo Stoakes
2024-09-23 21:49 ` Arnd Bergmann
2024-09-24 7:49 ` Lorenzo Stoakes
2024-09-23 21:20 ` kernel test robot
2024-09-23 21:30 ` kernel test robot
2024-09-24 3:15 ` kernel test robot
2024-09-24 8:47 ` Lorenzo Stoakes
2024-09-23 16:03 ` [PATCH 2/2] selftests/mm: add test for process_madvise PR_MADV_SELF flag use Lorenzo Stoakes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=077be0d59cb1047870a84c87c62e7b027af1c75d.1727106751.git.lorenzo.stoakes@oracle.com \
--to=lorenzo.stoakes@oracle.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=minchan@kernel.org \
--cc=shakeel.butt@linux.dev \
--cc=surenb@google.com \
--cc=vbabka@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox