From: Christoph Lameter <cl@linux.com>
To: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
akpm@linux-foundation.org
Subject: Re: [RFC][PATCH] fix move/migrate_pages() race on task struct
Date: Tue, 28 Feb 2012 13:30:19 -0600 (CST) [thread overview]
Message-ID: <alpine.DEB.2.00.1202281329190.25590@router.home> (raw)
In-Reply-To: <alpine.DEB.2.00.1202271636230.6435@router.home>
Here is a cleaned up version of the patch. No longer rely on the
task_struct pointer to be NULL for release of the refcount.
Subject: migration: Fix rcu and task refcounting
Migration functions perform the rcu_read_unlock too early. As a result the
task pointed to may change from under us.
The following patch extend the period of the rcu_read_lock until after the
permissions checks are done. We also take a refcount so that the task
reference is stable when calling security check functions and performing
cpuset node validation (which takes a mutex).
The refcount is dropped before actual page migration occurs so there is no
change to the refcounts held during page migration.
Also move the determination of the mm of the task struct to immediately
before the do_migrate*() calls so that it is clear that we switch from
handling the task during permission checks to the mm for the actual
migration. Since the determination is only done once and we then no longer
use the task_struct we can be sure that we operate on a specific address
space that will not change from under us.
Signed-off-by: Christoph Lameter <cl@linux.com>
---
mm/mempolicy.c | 32 +++++++++++++++++++-------------
mm/migrate.c | 36 +++++++++++++++++++-----------------
2 files changed, 38 insertions(+), 30 deletions(-)
Index: linux-2.6/mm/mempolicy.c
===================================================================
--- linux-2.6.orig/mm/mempolicy.c 2012-02-28 03:56:41.236184783 -0600
+++ linux-2.6/mm/mempolicy.c 2012-02-28 07:22:35.245552282 -0600
@@ -1322,12 +1322,9 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
err = -ESRCH;
goto out;
}
- mm = get_task_mm(task);
- rcu_read_unlock();
+ get_task_struct(task);
err = -EINVAL;
- if (!mm)
- goto out;
/*
* Check if this process has the right to modify the specified
@@ -1335,14 +1332,13 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- rcu_read_lock();
tcred = __task_cred(task);
if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
cred->uid != tcred->suid && cred->uid != tcred->uid &&
!capable(CAP_SYS_NICE)) {
rcu_read_unlock();
err = -EPERM;
- goto out;
+ goto out_put;
}
rcu_read_unlock();
@@ -1350,26 +1346,36 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
/* Is the user allowed to access the target nodes? */
if (!nodes_subset(*new, task_nodes) && !capable(CAP_SYS_NICE)) {
err = -EPERM;
- goto out;
+ goto out_put;
}
if (!nodes_subset(*new, node_states[N_HIGH_MEMORY])) {
err = -EINVAL;
- goto out;
+ goto out_put;
}
err = security_task_movememory(task);
if (err)
- goto out;
+ goto out_put;
- err = do_migrate_pages(mm, old, new,
- capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
-out:
+ mm = get_task_mm(task);
+ put_task_struct(task);
if (mm)
- mmput(mm);
+ err = do_migrate_pages(mm, old, new,
+ capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
+ else
+ err = -EINVAL;
+
+ mmput(mm);
+out:
NODEMASK_SCRATCH_FREE(scratch);
return err;
+
+out_put:
+ put_task_struct(task);
+ goto out;
+
}
Index: linux-2.6/mm/migrate.c
===================================================================
--- linux-2.6.orig/mm/migrate.c 2012-02-28 06:15:38.239956766 -0600
+++ linux-2.6/mm/migrate.c 2012-02-28 07:18:08.237559577 -0600
@@ -1176,20 +1176,17 @@ set_status:
* Migrate an array of page address onto an array of nodes and fill
* the corresponding array of status.
*/
-static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
+static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
unsigned long nr_pages,
const void __user * __user *pages,
const int __user *nodes,
int __user *status, int flags)
{
struct page_to_node *pm;
- nodemask_t task_nodes;
unsigned long chunk_nr_pages;
unsigned long chunk_start;
int err;
- task_nodes = cpuset_mems_allowed(task);
-
err = -ENOMEM;
pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
if (!pm)
@@ -1351,6 +1348,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
struct task_struct *task;
struct mm_struct *mm;
int err;
+ nodemask_t task_nodes;
/* Check flags */
if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1366,11 +1364,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
rcu_read_unlock();
return -ESRCH;
}
- mm = get_task_mm(task);
- rcu_read_unlock();
-
- if (!mm)
- return -EINVAL;
+ get_task_struct(task);
/*
* Check if this process has the right to modify the specified
@@ -1378,7 +1372,6 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- rcu_read_lock();
tcred = __task_cred(task);
if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
cred->uid != tcred->suid && cred->uid != tcred->uid &&
@@ -1393,16 +1386,25 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
if (err)
goto out;
- if (nodes) {
- err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
- flags);
- } else {
- err = do_pages_stat(mm, nr_pages, pages, status);
- }
+ task_nodes = cpuset_mems_allowed(task);
+ mm = get_task_mm(task);
+ put_task_struct(task);
+
+ if (mm) {
+ if (nodes)
+ err = do_pages_move(mm, task_nodes, nr_pages, pages, nodes,
+ status, flags);
+ else
+ err = do_pages_stat(mm, nr_pages, pages, status);
+ } else
+ err = -EINVAL;
-out:
mmput(mm);
return err;
+
+out:
+ put_task_struct(task);
+ return err;
}
/*
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2012-02-28 19:30 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-23 18:07 Dave Hansen
2012-02-23 18:45 ` Andi Kleen
2012-02-23 18:45 ` Christoph Lameter
2012-02-23 19:10 ` Dave Hansen
2012-02-23 19:40 ` Christoph Lameter
2012-02-23 20:04 ` Dave Hansen
2012-02-23 21:41 ` Christoph Lameter
2012-02-24 3:14 ` Eric W. Biederman
2012-02-24 15:20 ` Christoph Lameter
2012-02-24 15:41 ` Eric W. Biederman
2012-02-24 16:48 ` Dave Hansen
2012-02-24 16:54 ` Christoph Lameter
2012-02-24 17:04 ` Dave Hansen
2012-02-24 17:08 ` Christoph Lameter
2012-02-24 17:25 ` Dave Hansen
2012-02-24 17:32 ` Christoph Lameter
2012-02-24 21:37 ` Dave Hansen
2012-02-24 23:12 ` Eric W. Biederman
2012-02-27 16:43 ` Christoph Lameter
2012-02-25 12:13 ` Eric W. Biederman
2012-02-27 19:01 ` Christoph Lameter
2012-02-27 20:15 ` Eric W. Biederman
2012-02-27 22:39 ` Christoph Lameter
2012-02-28 19:30 ` Christoph Lameter [this message]
2012-02-29 20:31 ` Andrew Morton
2012-02-29 20:33 ` Christoph Lameter
2012-02-29 20:36 ` Dave Hansen
2012-02-24 17:07 ` KOSAKI Motohiro
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=alpine.DEB.2.00.1202281329190.25590@router.home \
--to=cl@linux.com \
--cc=akpm@linux-foundation.org \
--cc=dave@linux.vnet.ibm.com \
--cc=ebiederm@xmission.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox