From: Gregory Price <gourry.memverge@gmail.com>
To: linux-mm@kvack.org
Cc: linux-doc@vger.kernel.org, linux-fsdevel@vger.kernel.org,
linux-api@vger.kernel.org, linux-arch@vger.kernel.org,
linux-kernel@vger.kernel.org, akpm@linux-foundation.org,
arnd@arndb.de, tglx@linutronix.de, luto@kernel.org,
mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
x86@kernel.org, hpa@zytor.com, mhocko@kernel.org, tj@kernel.org,
ying.huang@intel.com, Gregory Price <gregory.price@memverge.com>
Subject: [RFC PATCH 04/11] mm/mempolicy: modify get_mempolicy call stack to take a task argument
Date: Wed, 22 Nov 2023 16:11:53 -0500 [thread overview]
Message-ID: <20231122211200.31620-5-gregory.price@memverge.com> (raw)
In-Reply-To: <20231122211200.31620-1-gregory.price@memverge.com>
To make mempolicy fetchable by external tasks, we must first change
the callstack to take a task as an argument.
Modify the following functions to require a task argument:
do_get_mempolicy
kernel_get_mempolicy
The way the task->mm is acquired must change slightly to enable this
change. Originally, do_get_mempolicy would acquire the task->mm
directly via (current->mm). This is unsafe to do in a non-current
context. However, utilizing get_task_mm would break the original
functionality of do_get_mempolicy due to the following check
in get_task_mm:
if (mm) {
if (task->flags & PF_KTHREAD)
mm = NULL;
else
mmget(mm);
}
To retain the original behavior, if (task == current) we access
the task->mm directly, but if (task != current) we will utilize
get_task_mm to safely access the mm.
We simplify the get/put mechanics by always taking a reference to
the mm, even if we are in the context of (task == current).
Additionally, since the mempolicy will become externally modifiable,
we need to take the task lock to acquire task->mempolicy safely,
regardless of whether we are operating on current or not.
Signed-off-by: Gregory Price <gregory.price@memverge.com>
---
mm/mempolicy.c | 43 +++++++++++++++++++++++++++++--------------
1 file changed, 29 insertions(+), 14 deletions(-)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9ea3e1bfc002..4519f39b1a07 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -899,8 +899,9 @@ static int lookup_node(struct mm_struct *mm, unsigned long addr)
}
/* Retrieve NUMA policy */
-static long do_get_mempolicy(int *policy, nodemask_t *nmask,
- unsigned long addr, unsigned long flags)
+static long do_get_mempolicy(struct task_struct *task, int *policy,
+ nodemask_t *nmask, unsigned long addr,
+ unsigned long flags)
{
int err;
struct mm_struct *mm;
@@ -915,9 +916,9 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
if (flags & (MPOL_F_NODE|MPOL_F_ADDR))
return -EINVAL;
*policy = 0; /* just so it's initialized */
- task_lock(current);
- *nmask = cpuset_current_mems_allowed;
- task_unlock(current);
+ task_lock(task);
+ *nmask = task->mems_allowed;
+ task_unlock(task);
return 0;
}
@@ -928,7 +929,16 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
* vma/shared policy at addr is NULL. We
* want to return MPOL_DEFAULT in this case.
*/
- mm = current->mm;
+ if (task == current) {
+ /*
+ * original behavior allows a kernel task changing its
+ * own policy to avoid the condition in get_task_mm,
+ * so we'll directly access
+ */
+ mm = task->mm;
+ mmget(mm);
+ } else
+ mm = get_task_mm(task);
mmap_read_lock(mm);
vma = vma_lookup(mm, addr);
if (!vma) {
@@ -947,8 +957,10 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
return -EINVAL;
else {
/* take a reference of the task policy now */
- pol = current->mempolicy;
+ task_lock(task);
+ pol = task->mempolicy;
mpol_get(pol);
+ task_unlock(task);
}
if (!pol) {
@@ -962,12 +974,13 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
vma = NULL;
mmap_read_unlock(mm);
err = lookup_node(mm, addr);
+ mmput(mm);
if (err < 0)
goto out;
*policy = err;
- } else if (pol == current->mempolicy &&
+ } else if (pol == task->mempolicy &&
pol->mode == MPOL_INTERLEAVE) {
- *policy = next_node_in(current->il_prev, pol->nodes);
+ *policy = next_node_in(task->il_prev, pol->nodes);
} else {
err = -EINVAL;
goto out;
@@ -987,9 +1000,9 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
if (mpol_store_user_nodemask(pol)) {
*nmask = pol->w.user_nodemask;
} else {
- task_lock(current);
+ task_lock(task);
get_policy_nodemask(pol, nmask);
- task_unlock(current);
+ task_unlock(task);
}
}
@@ -1704,7 +1717,8 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
}
/* Retrieve NUMA policy */
-static int kernel_get_mempolicy(int __user *policy,
+static int kernel_get_mempolicy(struct task_struct *task,
+ int __user *policy,
unsigned long __user *nmask,
unsigned long maxnode,
unsigned long addr,
@@ -1719,7 +1733,7 @@ static int kernel_get_mempolicy(int __user *policy,
addr = untagged_addr(addr);
- err = do_get_mempolicy(&pval, &nodes, addr, flags);
+ err = do_get_mempolicy(task, &pval, &nodes, addr, flags);
if (err)
return err;
@@ -1737,7 +1751,8 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
unsigned long __user *, nmask, unsigned long, maxnode,
unsigned long, addr, unsigned long, flags)
{
- return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags);
+ return kernel_get_mempolicy(current, policy, nmask, maxnode, addr,
+ flags);
}
bool vma_migratable(struct vm_area_struct *vma)
--
2.39.1
next prev parent reply other threads:[~2023-11-22 21:12 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-22 21:11 [RFC PATCH 00/11] mm/mempolicy: Make task->mempolicy externally modifiable via syscall and procfs Gregory Price
2023-11-22 21:11 ` [RFC PATCH 01/11] mm/mempolicy: refactor do_set_mempolicy for code re-use Gregory Price
2023-11-22 21:11 ` Gregory Price [this message]
2023-11-28 14:07 ` [RFC PATCH 04/11] mm/mempolicy: modify get_mempolicy call stack to take a task argument Michal Hocko
[not found] ` <ZWX1U1gCTXC+lFXn@memverge.com>
2023-11-28 14:49 ` Michal Hocko
2023-11-22 21:11 ` [RFC PATCH 07/11] mm/mempolicy: add task mempolicy syscall variants Gregory Price
2023-11-22 21:11 ` [RFC PATCH 08/11] mm/mempolicy: export replace_mempolicy for use by procfs Gregory Price
2023-11-22 21:11 ` [RFC PATCH 10/11] mm/mempolicy: mpol_parse_str should ignore trailing characters in nodelist Gregory Price
2023-11-22 21:12 ` [RFC PATCH 11/11] fs/proc: Add mempolicy attribute to allow read/write of task mempolicy Gregory Price
2023-11-22 21:33 ` [RFC PATCH 00/11] mm/mempolicy: Make task->mempolicy externally modifiable via syscall and procfs Andrew Morton
2023-11-22 21:35 ` Andrew Morton
2023-11-22 22:24 ` Gregory Price
2023-11-27 15:29 ` Michal Hocko
2023-11-27 16:14 ` Gregory Price
2023-11-28 9:45 ` Michal Hocko
2023-11-28 13:15 ` Gregory Price
[not found] ` <20231122211200.31620-3-gregory.price@memverge.com>
2023-11-28 14:07 ` [RFC PATCH 02/11] mm/mempolicy: swap cond reference counting logic in do_get_mempolicy Michal Hocko
[not found] ` <ZWX0ytAwmOdooHdZ@memverge.com>
2023-11-28 14:28 ` Michal Hocko
[not found] ` <20231122211200.31620-6-gregory.price@memverge.com>
2023-11-28 14:07 ` [RFC PATCH 05/11] mm/mempolicy: modify set_mempolicy_home_node to take a task argument Michal Hocko
2023-11-28 14:14 ` Gregory Price
[not found] ` <20231122211200.31620-7-gregory.price@memverge.com>
2023-11-28 14:11 ` [RFC PATCH 06/11] mm/mempolicy: modify do_mbind to operate on task argument instead of current Michal Hocko
2023-11-28 14:51 ` Gregory Price
2023-11-28 18:08 ` Gregory Price
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231122211200.31620-5-gregory.price@memverge.com \
--to=gourry.memverge@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=arnd@arndb.de \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=gregory.price@memverge.com \
--cc=hpa@zytor.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-arch@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=luto@kernel.org \
--cc=mhocko@kernel.org \
--cc=mingo@redhat.com \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=x86@kernel.org \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox