From: Dave Peterson <dsp@llnl.gov>
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org, riel@surriel.com, akpm@osdl.org
Subject: [PATCH 1/2] mm: serialize OOM kill operations
Date: Tue, 25 Apr 2006 17:01:31 -0700 [thread overview]
Message-ID: <200604251701.31899.dsp@llnl.gov> (raw)
The patch below modifies the behavior of the OOM killer so that only
one OOM kill operation can be in progress at a time. When running a
test program that eats lots of memory, I was observing behavior where
the OOM killer gets impatient and shoots one or more system daemons
in addition to the program that is eating lots of memory. This fixes
the problematic behavior.
Signed-Off-By: David S. Peterson <dsp@llnl.gov>
---
This patch applies to kernel 2.6.17-rc2-git7.
Index: git7-oom/include/linux/sched.h
===================================================================
--- git7-oom.orig/include/linux/sched.h 2006-04-25 16:19:40.000000000 -0700
+++ git7-oom/include/linux/sched.h 2006-04-25 16:21:48.000000000 -0700
@@ -350,6 +350,8 @@ struct mm_struct {
/* aio bits */
rwlock_t ioctx_list_lock;
struct kioctx *ioctx_list;
+
+ int oom_notify;
};
struct sighand_struct {
Index: git7-oom/include/linux/swap.h
===================================================================
--- git7-oom.orig/include/linux/swap.h 2006-04-25 16:18:06.000000000 -0700
+++ git7-oom/include/linux/swap.h 2006-04-25 16:21:48.000000000 -0700
@@ -147,6 +147,7 @@ struct swap_list_t {
#define vm_swap_full() (nr_swap_pages*2 < total_swap_pages)
/* linux/mm/oom_kill.c */
+extern void oom_kill_finish(void);
extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
/* linux/mm/memory.c */
Index: git7-oom/kernel/fork.c
===================================================================
--- git7-oom.orig/kernel/fork.c 2006-04-25 16:19:40.000000000 -0700
+++ git7-oom/kernel/fork.c 2006-04-25 16:21:48.000000000 -0700
@@ -328,6 +328,7 @@ static struct mm_struct * mm_init(struct
mm->ioctx_list = NULL;
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
+ mm->oom_notify = 0;
if (likely(!mm_alloc_pgd(mm))) {
mm->def_flags = 0;
@@ -379,6 +380,10 @@ void mmput(struct mm_struct *mm)
spin_unlock(&mmlist_lock);
}
put_swap_token(mm);
+
+ if (unlikely(mm->oom_notify))
+ oom_kill_finish();
+
mmdrop(mm);
}
}
Index: git7-oom/mm/oom_kill.c
===================================================================
--- git7-oom.orig/mm/oom_kill.c 2006-04-25 16:19:40.000000000 -0700
+++ git7-oom/mm/oom_kill.c 2006-04-25 16:21:48.000000000 -0700
@@ -21,9 +21,34 @@
#include <linux/timex.h>
#include <linux/jiffies.h>
#include <linux/cpuset.h>
+#include <asm/bitops.h>
/* #define DEBUG */
+volatile unsigned long oom_kill_in_progress = 0;
+
+/*
+ * Attempt to start an OOM kill operation. Return 0 on success, or 1 if an
+ * OOM kill is already in progress.
+ */
+static inline int oom_kill_start(void)
+{
+ return test_and_set_bit(0, &oom_kill_in_progress);
+}
+
+/*
+ * Terminate an OOM kill operation.
+ *
+ * When the OOM killer chooses a victim, it sets the oom_notify flag of the
+ * victim's mm_struct. mmput() then calls this function when the mm_users
+ * count has reached 0 and the contents of the mm_struct have been cleaned
+ * out.
+ */
+void oom_kill_finish(void)
+{
+ clear_bit(0, &oom_kill_in_progress);
+}
+
/**
* oom_badness - calculate a numeric value for how bad this task has been
* @p: task struct of which task we should calculate
@@ -259,27 +284,31 @@ static int oom_kill_task(task_t *p, cons
struct mm_struct *mm;
task_t * g, * q;
+ task_lock(p);
mm = p->mm;
- /* WARNING: mm may not be dereferenced since we did not obtain its
- * value from get_task_mm(p). This is OK since all we need to do is
- * compare mm to q->mm below.
+ if (mm == NULL || mm == &init_mm) {
+ task_unlock(p);
+ return 1;
+ }
+
+ mm->oom_notify = 1;
+ task_unlock(p);
+
+ /* WARNING: mm may no longer be dereferenced since we did not obtain
+ * its value from get_task_mm(p). This is OK since all we need to do
+ * is compare mm to q->mm below.
*
* Furthermore, even if mm contains a non-NULL value, p->mm may
- * change to NULL at any time since we do not hold task_lock(p).
+ * change to NULL at any time since we no longer hold task_lock(p).
* However, this is of no concern to us.
*/
- if (mm == NULL || mm == &init_mm)
- return 1;
-
- __oom_kill_task(p, message);
/*
- * kill all processes that share the ->mm (i.e. all threads),
- * but are in a different thread group
+ * kill all processes that share the ->mm (i.e. all threads)
*/
do_each_thread(g, q)
- if (q->mm == mm && q->tgid != p->tgid)
+ if (q->mm == mm)
__oom_kill_task(q, message);
while_each_thread(g, q);
@@ -317,13 +346,14 @@ void out_of_memory(struct zonelist *zone
{
task_t *p;
unsigned long points = 0;
+ int cancel = 0;
- if (printk_ratelimit()) {
- printk("oom-killer: gfp_mask=0x%x, order=%d\n",
- gfp_mask, order);
- dump_stack();
- show_mem();
- }
+ if (oom_kill_start())
+ return; /* OOM kill already in progress */
+
+ printk("oom-killer: gfp_mask=0x%x, order=%d\n", gfp_mask, order);
+ dump_stack();
+ show_mem();
cpuset_lock();
read_lock(&tasklist_lock);
@@ -334,12 +364,12 @@ void out_of_memory(struct zonelist *zone
*/
switch (constrained_alloc(zonelist, gfp_mask)) {
case CONSTRAINT_MEMORY_POLICY:
- oom_kill_process(current, points,
+ cancel = oom_kill_process(current, points,
"No available memory (MPOL_BIND)");
break;
case CONSTRAINT_CPUSET:
- oom_kill_process(current, points,
+ cancel = oom_kill_process(current, points,
"No available memory in cpuset");
break;
@@ -351,8 +381,10 @@ retry:
*/
p = select_bad_process(&points);
- if (PTR_ERR(p) == -1UL)
+ if (PTR_ERR(p) == -1UL) {
+ cancel = 1;
goto out;
+ }
/* Found nothing?!?! Either we hang forever, or we panic. */
if (!p) {
@@ -371,6 +403,9 @@ out:
read_unlock(&tasklist_lock);
cpuset_unlock();
+ if (cancel)
+ oom_kill_finish(); /* cancel OOM kill operation */
+
/*
* Give "p" a good chance of killing itself before we
* retry to allocate memory unless "p" is current
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next reply other threads:[~2006-04-26 0:01 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-04-26 0:01 Dave Peterson [this message]
2006-04-26 4:10 ` Nick Piggin
2006-04-26 17:14 ` Dave Peterson
2006-04-27 3:33 ` Nick Piggin
2006-04-27 16:56 ` Dave Peterson
2006-04-28 5:00 ` Nick Piggin
2006-04-28 5:05 ` Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200604251701.31899.dsp@llnl.gov \
--to=dsp@llnl.gov \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=riel@surriel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox