From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
To: Andrew Morton <akpm@linux-foundation.org>,
Linus Torvalds <torvalds@linux-foundation.org>
Cc: kosaki.motohiro@jp.fujitsu.com,
LKML <linux-kernel@vger.kernel.org>,
linux-mm <linux-mm@kvack.org>,
pageexec@freemail.hu, Solar Designer <solar@openwall.com>,
Eugene Teo <eteo@redhat.com>,
Brad Spengler <spender@grsecurity.net>,
Oleg Nesterov <oleg@redhat.com>,
Roland McGrath <roland@redhat.com>
Subject: [resend][PATCH 4/4] oom: don't ignore rss in nascent mm
Date: Mon, 25 Oct 2010 12:29:50 +0900 (JST) [thread overview]
Message-ID: <20101025122914.9173.A69D9226@jp.fujitsu.com> (raw)
In-Reply-To: <20101025122538.9167.A69D9226@jp.fujitsu.com>
ChangeLog
o since v2
- Move ->in_exec_mm from task_struct to signal_struct
- clean up oom_rss_swap_usage()
o since v1
- Always use thread group leader's ->in_exec_mm.
It slightly makes efficient oom when a process has many thread.
- Add the link of Brad's explanation to the description.
-----------------------------------------------------------
Brad Spengler published a local memory-allocation DoS that
evades the OOM-killer (though not the virtual memory RLIMIT):
http://www.grsecurity.net/~spender/64bit_dos.c
Because execve() makes new mm struct and setup stack and
copy argv. It mean the task have two mm while execve() temporary.
Unfortunately this nascent mm is not pointed any tasks, then
OOM-killer can't detect this memory usage. therefore OOM-killer
may kill incorrect task.
Thus, this patch added signal->in_exec_mm member and track
nascent mm usage.
Cc: stable@kernel.org
Cc: pageexec@freemail.hu
Cc: Roland McGrath <roland@redhat.com>
Cc: Solar Designer <solar@openwall.com>
Cc: Eugene Teo <eteo@redhat.com>
Reported-by: Brad Spengler <spender@grsecurity.net>
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
---
fs/compat.c | 4 +++-
fs/exec.c | 16 +++++++++++++++-
include/linux/binfmts.h | 1 +
include/linux/sched.h | 1 +
mm/oom_kill.c | 26 +++++++++++++++++++-------
5 files changed, 39 insertions(+), 9 deletions(-)
diff --git a/fs/compat.c b/fs/compat.c
index 0644a15..a85b196 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1567,8 +1567,10 @@ int compat_do_execve(char * filename,
return retval;
out:
- if (bprm->mm)
+ if (bprm->mm) {
+ set_exec_mm(NULL);
mmput(bprm->mm);
+ }
out_file:
if (bprm->file) {
diff --git a/fs/exec.c b/fs/exec.c
index 94dabd2..2395d10 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -347,6 +347,8 @@ int bprm_mm_init(struct linux_binprm *bprm)
if (err)
goto err;
+ set_exec_mm(mm);
+
return 0;
err:
@@ -759,6 +761,7 @@ static int exec_mmap(struct mm_struct *mm)
tsk->mm = mm;
tsk->active_mm = mm;
activate_mm(active_mm, mm);
+ tsk->signal->in_exec_mm = NULL;
task_unlock(tsk);
arch_pick_mmap_layout(mm);
if (old_mm) {
@@ -1328,6 +1331,15 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
EXPORT_SYMBOL(search_binary_handler);
+void set_exec_mm(struct mm_struct *mm)
+{
+ struct task_struct *leader = current->group_leader;
+
+ task_lock(leader);
+ leader->signal->in_exec_mm = mm;
+ task_unlock(leader);
+}
+
/*
* sys_execve() executes a new program.
*/
@@ -1416,8 +1428,10 @@ int do_execve(const char * filename,
return retval;
out:
- if (bprm->mm)
+ if (bprm->mm) {
+ set_exec_mm(NULL);
mmput (bprm->mm);
+ }
out_file:
if (bprm->file) {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a065612..2fde1ba 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -133,6 +133,7 @@ extern void install_exec_creds(struct linux_binprm *bprm);
extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
extern void set_binfmt(struct linux_binfmt *new);
extern void free_bprm(struct linux_binprm *);
+extern void set_exec_mm(struct mm_struct *mm);
#endif /* __KERNEL__ */
#endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ac65605..b880931 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -630,6 +630,7 @@ struct signal_struct {
struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations
* (notably. ptrace) */
+ struct mm_struct *in_exec_mm; /* temporary nascent mm in execve */
};
/* Context switch must be unlocked if interrupts are to be enabled */
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index d58925e..830065f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -120,6 +120,15 @@ struct task_struct *find_lock_task_mm(struct task_struct *p)
return NULL;
}
+/*
+ * The baseline for the badness score is the proportion of RAM that each
+ * task's rss and swap space use.
+ */
+static unsigned long oom_rss_swap_usage(struct mm_struct *mm)
+{
+ return get_mm_rss(mm) + get_mm_counter(mm, MM_SWAPENTS);
+}
+
/* return true if the task is not adequate as candidate victim task. */
static bool oom_unkillable_task(struct task_struct *p,
const struct mem_cgroup *mem, const nodemask_t *nodemask)
@@ -151,7 +160,7 @@ static bool oom_unkillable_task(struct task_struct *p,
unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *mem,
const nodemask_t *nodemask)
{
- unsigned long points;
+ unsigned long points = 0;
unsigned long points_orig;
int oom_adj = p->signal->oom_adj;
long oom_score_adj = p->signal->oom_score_adj;
@@ -169,15 +178,18 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *mem,
if (p->flags & PF_OOM_ORIGIN)
return ULONG_MAX;
+ /* The task is now processing execve(). then it has second mm */
+ if (unlikely(p->signal->in_exec_mm)) {
+ task_lock(p->group_leader);
+ if (p->signal->in_exec_mm)
+ points = oom_rss_swap_usage(p->signal->in_exec_mm);
+ task_unlock(p->group_leader);
+ }
+
p = find_lock_task_mm(p);
if (!p)
return 0;
-
- /*
- * The baseline for the badness score is the proportion of RAM that each
- * task's rss and swap space use.
- */
- points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS));
+ points += oom_rss_swap_usage(p->mm);
task_unlock(p);
/*
--
1.6.5.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-10-25 3:29 UTC|newest]
Thread overview: 109+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-10-25 3:26 [resend][PATCH 1/4] oom: remove totalpage normalization from oom_badness() KOSAKI Motohiro
2010-10-25 3:27 ` [resend][PATCH 2/4] Revert "oom: deprecate oom_adj tunable" KOSAKI Motohiro
2010-10-25 20:40 ` David Rientjes
2010-10-26 13:01 ` KOSAKI Motohiro
2010-10-26 19:37 ` David Rientjes
2010-11-01 7:06 ` KOSAKI Motohiro
2010-11-01 19:36 ` David Rientjes
2010-11-09 2:26 ` KOSAKI Motohiro
2010-11-09 3:28 ` KOSAKI Motohiro
2010-11-15 0:24 ` KOSAKI Motohiro
2010-11-15 9:59 ` David Rientjes
2010-11-09 23:33 ` David Rientjes
2010-11-09 23:35 ` Alan Cox
2010-11-09 23:48 ` David Rientjes
2010-11-09 23:55 ` [patch] oom: document obsolete oom_adj tunable David Rientjes
2010-11-15 0:22 ` KOSAKI Motohiro
2010-11-15 10:38 ` David Rientjes
2010-11-23 7:16 ` KOSAKI Motohiro
2010-11-14 5:07 ` [resend][PATCH 2/4] Revert "oom: deprecate oom_adj tunable" KOSAKI Motohiro
2010-11-14 21:39 ` David Rientjes
2010-11-23 7:16 ` KOSAKI Motohiro
2010-11-28 1:41 ` David Rientjes
2010-11-30 13:03 ` KOSAKI Motohiro
2010-11-30 20:07 ` David Rientjes
2010-10-25 3:28 ` [resend][PATCH 3/4] move cred_guard_mutex from task_struct to signal_struct KOSAKI Motohiro
2010-10-25 17:26 ` Roland McGrath
2010-10-25 17:42 ` Oleg Nesterov
2010-10-25 17:51 ` Roland McGrath
2010-10-26 13:04 ` KOSAKI Motohiro
2010-10-26 13:18 ` Roland McGrath
2010-10-25 3:29 ` KOSAKI Motohiro [this message]
2010-10-25 11:28 ` [resend][PATCH 4/4] oom: don't ignore rss in nascent mm pageexec
2010-10-26 7:25 ` KOSAKI Motohiro
2010-11-23 14:34 ` Oleg Nesterov
2010-11-24 0:24 ` KOSAKI Motohiro
2010-11-24 11:09 ` Oleg Nesterov
2010-11-25 11:06 ` KOSAKI Motohiro
2010-11-25 14:02 ` Oleg Nesterov
2010-11-25 19:36 ` Oleg Nesterov
2010-11-29 5:25 ` KOSAKI Motohiro
2010-11-29 11:33 ` Oleg Nesterov
2010-11-29 18:23 ` Oleg Nesterov
2010-11-30 19:54 ` [PATCH 0/2] exec: more excessive argument size fixes for 2.6.37/stable Oleg Nesterov
2010-11-30 19:55 ` [PATCH 1/2] exec: make argv/envp memory visible to oom-killer Oleg Nesterov
2010-12-01 0:12 ` KOSAKI Motohiro
2010-12-01 18:07 ` Oleg Nesterov
2010-11-30 19:56 ` [PATCH 2/2] exec: copy-and-paste the fixes into compat_do_execve() paths Oleg Nesterov
2010-12-01 3:04 ` KOSAKI Motohiro
2010-11-30 20:00 ` [PATCH 0/4] exec: unify compat/non-compat code Oleg Nesterov
2010-11-30 20:00 ` [PATCH 1/4] exec: introduce get_arg_ptr() helper Oleg Nesterov
2010-11-30 20:01 ` [PATCH 2/4] exec: introduce "bool compat" argument Oleg Nesterov
2010-11-30 20:01 ` [PATCH 3/4] exec: unify compat_do_execve() code Oleg Nesterov
2010-12-01 17:37 ` (No subject header) Milton Miller
2010-12-01 18:27 ` Oleg Nesterov
2011-02-25 17:52 ` [PATCH 0/4 RESEND] exec: unify compat/non-compat code Oleg Nesterov
2011-02-25 17:52 ` [PATCH 1/5] exec: introduce get_arg_ptr() helper Oleg Nesterov
2011-02-25 17:52 ` [PATCH 2/5] exec: introduce "bool compat" argument Oleg Nesterov
2011-02-25 18:57 ` Linus Torvalds
2011-02-26 12:37 ` Oleg Nesterov
2011-02-25 17:53 ` [PATCH 3/5] exec: unify compat_do_execve() code Oleg Nesterov
2011-02-25 19:10 ` Linus Torvalds
2011-02-26 12:37 ` Oleg Nesterov
2011-02-26 12:57 ` Oleg Nesterov
2011-02-26 15:55 ` Linus Torvalds
2011-02-26 17:44 ` Oleg Nesterov
2011-03-01 20:47 ` [PATCH v2 0/5] exec: unify native/compat code Oleg Nesterov
2011-03-01 20:48 ` [PATCH v2 1/5] exec: introduce get_arg_ptr() helper Oleg Nesterov
2011-03-01 20:48 ` [PATCH v2 2/5] exec: introduce "bool compat" argument Oleg Nesterov
2011-03-01 20:48 ` [PATCH v2 3/5] exec: introduce conditional_user_ptr_t Oleg Nesterov
2011-03-01 20:49 ` [PATCH v2 4/5] exec: unify do_execve/compat_do_execve code Oleg Nesterov
2011-03-01 20:49 ` [PATCH v2 5/5] exec: document acct_arg_size() Oleg Nesterov
2011-03-01 21:39 ` [PATCH v2 0/5] exec: unify native/compat code Linus Torvalds
2011-03-02 16:26 ` [PATCH v3 0/4] " Oleg Nesterov
2011-03-02 16:27 ` [PATCH v3 1/4] exec: introduce get_arg_ptr() helper Oleg Nesterov
2011-03-03 3:01 ` KOSAKI Motohiro
2011-03-03 15:47 ` Oleg Nesterov
2011-03-03 16:07 ` Linus Torvalds
2011-03-05 20:30 ` [PATCH v4 0/4] exec: unify native/compat code Oleg Nesterov
2011-03-05 20:31 ` [PATCH v4 1/4] exec: introduce get_user_arg_ptr() helper Oleg Nesterov
2011-03-05 20:31 ` [PATCH v4 2/4] exec: introduce struct user_arg_ptr Oleg Nesterov
2011-03-05 20:31 ` [PATCH v4 3/4] exec: unify do_execve/compat_do_execve code Oleg Nesterov
2011-03-05 20:52 ` Linus Torvalds
2011-03-05 21:20 ` Oleg Nesterov
2011-03-05 20:31 ` [PATCH v4 4/4] exec: document acct_arg_size() Oleg Nesterov
2011-03-06 12:04 ` [PATCH v4 0/4] exec: unify native/compat code KOSAKI Motohiro
2011-03-06 17:01 ` [PATCH v5 " Oleg Nesterov
2011-03-06 17:02 ` [PATCH v5 1/4] exec: introduce get_user_arg_ptr() helper Oleg Nesterov
2011-03-06 17:02 ` [PATCH v5 2/4] exec: introduce struct user_arg_ptr Oleg Nesterov
2011-03-06 17:02 ` [PATCH v5 3/4] exec: unify do_execve/compat_do_execve code Oleg Nesterov
2011-03-06 17:03 ` [PATCH v5 4/4] exec: document acct_arg_size() Oleg Nesterov
2011-03-02 16:27 ` [PATCH v3 2/4] exec: introduce struct conditional_ptr Oleg Nesterov
2011-03-03 3:08 ` KOSAKI Motohiro
2011-03-02 16:27 ` [PATCH v3 3/4] exec: unify do_execve/compat_do_execve code Oleg Nesterov
2011-03-03 3:13 ` KOSAKI Motohiro
2011-03-02 16:28 ` [PATCH v3 4/4] exec: document acct_arg_size() Oleg Nesterov
2011-03-03 3:09 ` KOSAKI Motohiro
2011-03-02 16:44 ` [PATCH v3 0/4] exec: unify native/compat code Oleg Nesterov
2011-03-02 18:00 ` Linus Torvalds
2011-03-02 19:40 ` David Miller
2011-03-02 19:48 ` Linus Torvalds
2011-03-02 19:54 ` David Miller
2011-02-25 17:53 ` [PATCH 4/5] exec: unexport acct_arg_size() and get_arg_page() Oleg Nesterov
2011-02-25 17:54 ` [PATCH 5/5] exec: document acct_arg_size() Oleg Nesterov
2011-02-25 18:54 ` [PATCH 0/4 RESEND] exec: unify compat/non-compat code Linus Torvalds
2011-02-26 12:35 ` Oleg Nesterov
2010-11-30 20:01 ` [PATCH 4/4] exec: unexport acct_arg_size() and get_arg_page() Oleg Nesterov
2010-12-01 3:09 ` [PATCH 0/4] exec: unify compat/non-compat code KOSAKI Motohiro
2010-11-30 0:06 ` [resend][PATCH 4/4] oom: don't ignore rss in nascent mm KOSAKI Motohiro
2010-10-25 20:37 ` [resend][PATCH 1/4] oom: remove totalpage normalization from oom_badness() David Rientjes
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101025122914.9173.A69D9226@jp.fujitsu.com \
--to=kosaki.motohiro@jp.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=eteo@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=oleg@redhat.com \
--cc=pageexec@freemail.hu \
--cc=roland@redhat.com \
--cc=solar@openwall.com \
--cc=spender@grsecurity.net \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox