linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	cl@linux-foundation.org, rientjes@google.com
Subject: [BUGFIX][PATCH] oom-kill: fix NUMA consraint check with nodemask v3
Date: Wed, 11 Nov 2009 11:24:04 +0900	[thread overview]
Message-ID: <20091111112404.0026e601.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20091110171704.3800f081.kamezawa.hiroyu@jp.fujitsu.com>

From: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>

Fixing node-oriented allocation handling in oom-kill.c
I myself think this as bugfix not as ehnancement.

In these days, things are changed as
  - alloc_pages() eats nodemask as its arguments, __alloc_pages_nodemask().
  - mempolicy don't maintain its own private zonelists.
  (And cpuset doesn't use nodemask for __alloc_pages_nodemask())

But, current oom-killer's doesn't handle nodemask and it's wrong.

This patch does
  - check nodemask, if nodemask && nodemask doesn't cover all
    node_states[N_HIGH_MEMORY], this is CONSTRAINT_MEMORY_POLICY.
  - Scan all zonelist under nodemask, if it hits cpuset's wall
    this faiulre is from cpuset.
And
  - modifies the caller of out_of_memory not to call oom if __GFP_THISNODE.
    This doesn't change "current" behavior.
    If callers use __GFP_THISNODE, it should handle "page allocation failure"
    by itself.

Changelog: 2009/11/11
 - fixed nodes_equal() calculation.
 - return CONSTRAINT_MEMPOLICY always if given nodemask is not enough big.

Changelog: 2009/11/06
 - fixed lack of oom.h

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com>
---
 drivers/char/sysrq.c |    2 +-
 include/linux/oom.h  |    4 +++-
 mm/oom_kill.c        |   44 ++++++++++++++++++++++++++++++++------------
 mm/page_alloc.c      |   10 ++++++++--
 4 files changed, 44 insertions(+), 16 deletions(-)

Index: mm-test-kernel/drivers/char/sysrq.c
===================================================================
--- mm-test-kernel.orig/drivers/char/sysrq.c
+++ mm-test-kernel/drivers/char/sysrq.c
@@ -339,7 +339,7 @@ static struct sysrq_key_op sysrq_term_op
 
 static void moom_callback(struct work_struct *ignored)
 {
-	out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0);
+	out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0, NULL);
 }
 
 static DECLARE_WORK(moom_work, moom_callback);
Index: mm-test-kernel/mm/oom_kill.c
===================================================================
--- mm-test-kernel.orig/mm/oom_kill.c
+++ mm-test-kernel/mm/oom_kill.c
@@ -196,27 +196,45 @@ unsigned long badness(struct task_struct
 /*
  * Determine the type of allocation constraint.
  */
+#ifdef CONFIG_NUMA
 static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
-						    gfp_t gfp_mask)
+				    gfp_t gfp_mask, nodemask_t *nodemask)
 {
-#ifdef CONFIG_NUMA
 	struct zone *zone;
 	struct zoneref *z;
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
-	nodemask_t nodes = node_states[N_HIGH_MEMORY];
+	int ret = CONSTRAINT_NONE;
 
-	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
-		if (cpuset_zone_allowed_softwall(zone, gfp_mask))
-			node_clear(zone_to_nid(zone), nodes);
-		else
+	/*
+ 	 * The nodemask here is a nodemask passed to alloc_pages(). Now,
+ 	 * cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy
+ 	 * feature. mempolicy is an only user of nodemask here.
+ 	 */
+	if (nodemask) {
+		nodemask_t mask;
+		/* check mempolicy's nodemask contains all N_HIGH_MEMORY */
+		nodes_and(mask, *nodemask, node_states[N_HIGH_MEMORY]);
+		if (!nodes_equal(mask, node_states[N_HIGH_MEMORY]))
+			return CONSTRAINT_MEMORY_POLICY;
+	}
+
+	/* Check this allocation failure is caused by cpuset's wall function */
+	for_each_zone_zonelist_nodemask(zone, z, zonelist,
+			high_zoneidx, nodemask)
+		if (!cpuset_zone_allowed_softwall(zone, gfp_mask))
 			return CONSTRAINT_CPUSET;
 
-	if (!nodes_empty(nodes))
-		return CONSTRAINT_MEMORY_POLICY;
-#endif
+	/* __GFP_THISNODE never calls OOM.*/
 
 	return CONSTRAINT_NONE;
 }
+#else
+static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
+				gfp_t gfp_mask, nodemask_t *nodemask)
+{
+	return CONSTRAINT_NONE;
+}
+#endif
 
 /*
  * Simple selection loop. We chose the process with the highest
@@ -603,7 +621,8 @@ rest_and_return:
  * OR try to be smart about which process to kill. Note that we
  * don't have to be perfect here, we just have to be good.
  */
-void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
+void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
+		int order, nodemask_t *nodemask)
 {
 	unsigned long freed = 0;
 	enum oom_constraint constraint;
@@ -622,11 +641,12 @@ void out_of_memory(struct zonelist *zone
 	 * Check if there were limitations on the allocation (only relevant for
 	 * NUMA) that may require different handling.
 	 */
-	constraint = constrained_alloc(zonelist, gfp_mask);
+	constraint = constrained_alloc(zonelist, gfp_mask, nodemask);
 	read_lock(&tasklist_lock);
 
 	switch (constraint) {
 	case CONSTRAINT_MEMORY_POLICY:
+		/* kill by process's its own memory alloc limitation */
 		oom_kill_process(current, gfp_mask, order, 0, NULL,
 				"No available memory (MPOL_BIND)");
 		break;
Index: mm-test-kernel/mm/page_alloc.c
===================================================================
--- mm-test-kernel.orig/mm/page_alloc.c
+++ mm-test-kernel/mm/page_alloc.c
@@ -1667,9 +1667,15 @@ __alloc_pages_may_oom(gfp_t gfp_mask, un
 	/* The OOM killer will not help higher order allocs */
 	if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL))
 		goto out;
-
+	/*
+	 * In usual, GFP_THISNODE contains __GFP_NORETRY and we never hit this.
+	 * Sanity check for bare calls of __GFP_THISNODE, not real OOM.
+	 * Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
+	 */
+	if (gfp_mask & __GFP_THISNODE)
+		goto out;
 	/* Exhausted what can be done so it's blamo time */
-	out_of_memory(zonelist, gfp_mask, order);
+	out_of_memory(zonelist, gfp_mask, order, nodemask);
 
 out:
 	clear_zonelist_oom(zonelist, gfp_mask);
Index: mm-test-kernel/include/linux/oom.h
===================================================================
--- mm-test-kernel.orig/include/linux/oom.h
+++ mm-test-kernel/include/linux/oom.h
@@ -10,6 +10,7 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
+#include <linux/nodemask.h>
 
 struct zonelist;
 struct notifier_block;
@@ -26,7 +27,8 @@ enum oom_constraint {
 extern int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
 
-extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
+extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
+		int order, nodemask_t *mask);
 extern int register_oom_notifier(struct notifier_block *nb);
 extern int unregister_oom_notifier(struct notifier_block *nb);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2009-11-11  2:26 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-04  8:09 [BUGFIX][PATCH] oom-kill: fix NUMA consraint check with nodemask KAMEZAWA Hiroyuki
2009-11-06  0:02 ` [BUGFIX][PATCH] oom-kill: fix NUMA consraint check with nodemask v2 KAMEZAWA Hiroyuki
2009-11-10  7:24   ` KOSAKI Motohiro
2009-11-10  7:24     ` KAMEZAWA Hiroyuki
2009-11-10  7:39       ` KOSAKI Motohiro
2009-11-10  7:40         ` KAMEZAWA Hiroyuki
2009-11-10  8:03           ` Daisuke Nishimura
2009-11-10  8:17             ` KAMEZAWA Hiroyuki
2009-11-11  2:24               ` KAMEZAWA Hiroyuki [this message]
2009-11-11  2:36                 ` [BUGFIX][PATCH] oom-kill: fix NUMA consraint check with nodemask v3 KOSAKI Motohiro
2009-11-11  2:49                 ` David Rientjes
2009-11-11  3:02                   ` KOSAKI Motohiro
2009-11-11  3:10                     ` KAMEZAWA Hiroyuki
2009-11-11  3:14                     ` David Rientjes
2009-11-11  3:23                       ` KOSAKI Motohiro
2009-11-11  3:27                         ` David Rientjes
2009-11-11  3:04                   ` KAMEZAWA Hiroyuki
2009-11-11  4:45                 ` [BUGFIX][PATCH] oom-kill: fix NUMA consraint check with nodemask v4 KAMEZAWA Hiroyuki
2009-11-11  5:28                   ` [BUGFIX][PATCH] oom-kill: fix NUMA consraint check with nodemask v4.1 KAMEZAWA Hiroyuki
2009-11-11  5:58                     ` David Rientjes
2009-11-11  6:20                       ` KAMEZAWA Hiroyuki
2009-11-11  6:26                         ` David Rientjes
2009-11-11  6:34                           ` [BUGFIX][PATCH] oom-kill: fix NUMA consraint check with nodemask v4.2 KAMEZAWA Hiroyuki
2009-11-11  7:32                             ` David Rientjes
2009-11-18  0:11                             ` David Rientjes
2009-11-18  0:58                               ` KAMEZAWA Hiroyuki
2009-11-18  2:13                                 ` David Rientjes
2009-12-15  1:16                                   ` Andrew Morton
2009-12-15  1:32                                     ` KAMEZAWA Hiroyuki
2009-12-15  1:38                                       ` KOSAKI Motohiro
2009-12-15  4:30                                       ` David Rientjes
2009-12-15  4:35                                         ` KAMEZAWA Hiroyuki
2009-12-15  4:54                                           ` David Rientjes
2009-12-15  5:19                                             ` KOSAKI Motohiro
2009-12-17 22:21                                               ` David Rientjes
2009-12-18  4:30                                                 ` KOSAKI Motohiro
2009-12-18 10:04                                                   ` David Rientjes
2009-12-15  4:57                                           ` KAMEZAWA Hiroyuki
2009-12-15  4:43                                         ` KAMEZAWA Hiroyuki
2009-12-15  4:57                                           ` David Rientjes
2009-12-15  5:09                                             ` KAMEZAWA Hiroyuki
2009-12-17 22:23                                               ` David Rientjes
2009-12-17 23:33                                                 ` KAMEZAWA Hiroyuki
2009-12-15  4:47                                         ` KOSAKI Motohiro
2009-12-15  5:03                                           ` David Rientjes
2009-11-18  1:41                               ` Daisuke Nishimura

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091111112404.0026e601.kamezawa.hiroyu@jp.fujitsu.com \
    --to=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=akpm@linux-foundation.org \
    --cc=cl@linux-foundation.org \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nishimura@mxp.nes.nec.co.jp \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox