linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [patch 1/3] oom: suppress nodes that are not allowed from meminfo on oom kill
@ 2011-01-12  1:13 David Rientjes
  2011-01-12  1:13 ` [patch 2/3] oom: suppress show_mem() for many nodes in irq context on page alloc failure David Rientjes
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: David Rientjes @ 2011-01-12  1:13 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Mel Gorman, KAMEZAWA Hiroyuki, linux-mm

The oom killer is extremely verbose for machines with a large number of
cpus and/or nodes.  This verbosity can often be harmful if it causes
other important messages to be scrolled from the kernel log and incurs a
signicant time delay, specifically for kernels with
CONFIG_NODES_SHIFT > 8.

This patch causes only memory information to be displayed for nodes that
are allowed by current's cpuset when dumping the VM state.  Information
for all other nodes is irrelevant to the oom condition; we don't care if
there's an abundance of memory elsewhere if we can't access it.

This only affects the behavior of dumping memory information when an oom
is triggered.  Other dumps, such as for sysrq+m, still display the
unfiltered form when using the existing show_mem() interface.

Additionally, the per-cpu pageset statistics are extremely verbose in oom
killer output, so it is now suppressed.  This removes

	nodes_weight(current->mems_allowed) * (1 + nr_cpus)

lines from the oom killer output.

Callers may use __show_mem(SHOW_MEM_FILTER_NODES) to filter disallowed
nodes.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 include/linux/mm.h |    8 ++++++++
 lib/show_mem.c     |    9 +++++++--
 mm/oom_kill.c      |    2 +-
 mm/page_alloc.c    |   34 +++++++++++++++++++++++++++++++++-
 4 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -742,7 +742,14 @@ extern void pagefault_out_of_memory(void);
 
 #define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
 
+/*
+ * Flags passed to __show_mem() and __show_free_areas() to suppress output in
+ * various contexts.
+ */
+#define SHOW_MEM_FILTER_NODES	(0x0001u)	/* filter disallowed nodes */
+
 extern void show_free_areas(void);
+extern void __show_free_areas(unsigned int flags);
 
 int shmem_lock(struct file *file, int lock, struct user_struct *user);
 struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags);
@@ -1226,6 +1233,7 @@ extern void calculate_zone_inactive_ratio(struct zone *zone);
 extern void mem_init(void);
 extern void __init mmap_init(void);
 extern void show_mem(void);
+extern void __show_mem(unsigned int flags);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
 extern int after_bootmem;
diff --git a/lib/show_mem.c b/lib/show_mem.c
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -9,14 +9,14 @@
 #include <linux/nmi.h>
 #include <linux/quicklist.h>
 
-void show_mem(void)
+void __show_mem(unsigned int filter)
 {
 	pg_data_t *pgdat;
 	unsigned long total = 0, reserved = 0, shared = 0,
 		nonshared = 0, highmem = 0;
 
 	printk("Mem-Info:\n");
-	show_free_areas();
+	__show_free_areas(filter);
 
 	for_each_online_pgdat(pgdat) {
 		unsigned long i, flags;
@@ -61,3 +61,8 @@ void show_mem(void)
 		quicklist_total_size());
 #endif
 }
+
+void show_mem(void)
+{
+	__show_mem(0);
+}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -396,7 +396,7 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 	task_unlock(current);
 	dump_stack();
 	mem_cgroup_print_oom_info(mem, p);
-	show_mem();
+	__show_mem(SHOW_MEM_FILTER_NODES);
 	if (sysctl_oom_dump_tasks)
 		dump_tasks(mem, nodemask);
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2358,19 +2358,42 @@ void si_meminfo_node(struct sysinfo *val, int nid)
 }
 #endif
 
+/*
+ * Determine whether the zone's node should be displayed or not, depending on
+ * whether SHOW_MEM_FILTER_NODES was passed to __show_free_areas().
+ */
+static bool skip_free_areas_zone(unsigned int flags, const struct zone *zone)
+{
+	bool ret = false;
+
+	if (!(flags & SHOW_MEM_FILTER_NODES))
+		goto out;
+
+	get_mems_allowed();
+	ret = !node_isset(zone->zone_pgdat->node_id,
+				cpuset_current_mems_allowed);
+	put_mems_allowed();
+out:
+	return ret;
+}
+
 #define K(x) ((x) << (PAGE_SHIFT-10))
 
 /*
  * Show free area list (used inside shift_scroll-lock stuff)
  * We also calculate the percentage fragmentation. We do this by counting the
  * memory on each free list with the exception of the first item on the list.
+ * Suppresses nodes that are not allowed by current's cpuset if
+ * SHOW_MEM_FILTER_NODES is passed.
  */
-void show_free_areas(void)
+void __show_free_areas(unsigned int filter)
 {
 	int cpu;
 	struct zone *zone;
 
 	for_each_populated_zone(zone) {
+		if (skip_free_areas_zone(filter, zone))
+			continue;
 		show_node(zone);
 		printk("%s per-cpu:\n", zone->name);
 
@@ -2412,6 +2435,8 @@ void show_free_areas(void)
 	for_each_populated_zone(zone) {
 		int i;
 
+		if (skip_free_areas_zone(filter, zone))
+			continue;
 		show_node(zone);
 		printk("%s"
 			" free:%lukB"
@@ -2479,6 +2504,8 @@ void show_free_areas(void)
 	for_each_populated_zone(zone) {
 		unsigned long nr[MAX_ORDER], flags, order, total = 0;
 
+		if (skip_free_areas_zone(filter, zone))
+			continue;
 		show_node(zone);
 		printk("%s: ", zone->name);
 
@@ -2498,6 +2525,11 @@ void show_free_areas(void)
 	show_swap_cache_info();
 }
 
+void show_free_areas(void)
+{
+	__show_free_areas(0);
+}
+
 static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
 {
 	zoneref->zone = zone;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [patch 2/3] oom: suppress show_mem() for many nodes in irq context on page alloc failure
  2011-01-12  1:13 [patch 1/3] oom: suppress nodes that are not allowed from meminfo on oom kill David Rientjes
@ 2011-01-12  1:13 ` David Rientjes
  2011-01-12  1:13 ` [patch 3/3] oom: suppress nodes that are not allowed from meminfo " David Rientjes
  2011-01-20  2:58 ` [patch 1/3] oom: suppress nodes that are not allowed from meminfo on oom kill David Rientjes
  2 siblings, 0 replies; 4+ messages in thread
From: David Rientjes @ 2011-01-12  1:13 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Mel Gorman, KAMEZAWA Hiroyuki, linux-mm

When a page allocation failure occurs, show_mem() is called to dump the
state of the VM so users may understand what happened to get into that
condition.

This output, however, can be extremely verbose.  In irq context, it may
result in significant delays that incur NMI watchdog timeouts when the
machine is large (we use CONFIG_NODES_SHIFT > 8 here to define a "large"
machine since the length of the show_mem() output is proportional to the
number of possible nodes).

This patch suppresses the show_mem() call in irq context when the kernel
has CONFIG_NODES_SHIFT > 8.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 mm/page_alloc.c |   17 ++++++++++++++++-
 1 files changed, 16 insertions(+), 1 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1700,6 +1700,20 @@ try_next_zone:
 	return page;
 }
 
+/*
+ * Large machines with many possible nodes should not always dump per-node
+ * meminfo in irq context.
+ */
+static inline bool should_suppress_show_mem(void)
+{
+	bool ret = false;
+
+#if NODES_SHIFT > 8
+	ret = in_interrupt();
+#endif
+	return ret;
+}
+
 static inline int
 should_alloc_retry(gfp_t gfp_mask, unsigned int order,
 				unsigned long pages_reclaimed)
@@ -2110,7 +2124,8 @@ nopage:
 			" order:%d, mode:0x%x\n",
 			p->comm, order, gfp_mask);
 		dump_stack();
-		show_mem();
+		if (!should_suppress_show_mem())
+			show_mem();
 	}
 	return page;
 got_pg:

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [patch 3/3] oom: suppress nodes that are not allowed from meminfo on page alloc failure
  2011-01-12  1:13 [patch 1/3] oom: suppress nodes that are not allowed from meminfo on oom kill David Rientjes
  2011-01-12  1:13 ` [patch 2/3] oom: suppress show_mem() for many nodes in irq context on page alloc failure David Rientjes
@ 2011-01-12  1:13 ` David Rientjes
  2011-01-20  2:58 ` [patch 1/3] oom: suppress nodes that are not allowed from meminfo on oom kill David Rientjes
  2 siblings, 0 replies; 4+ messages in thread
From: David Rientjes @ 2011-01-12  1:13 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Mel Gorman, KAMEZAWA Hiroyuki, linux-mm

Displaying extremely verbose meminfo for all nodes on the system is
overkill for page allocation failures when the context restricts that
allocation to only a subset of nodes.  We don't particularly care about
the state of all nodes when some are not allowed in the current context,
they can have an abundance of memory but we can't allocate from that part
of memory.

This patch suppresses disallowed nodes from the meminfo dump on a page
allocation failure if the context requires it.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 mm/page_alloc.c |   19 ++++++++++++++++---
 1 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2120,12 +2120,25 @@ rebalance:
 
 nopage:
 	if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit()) {
-		printk(KERN_WARNING "%s: page allocation failure."
-			" order:%d, mode:0x%x\n",
+		unsigned int filter = SHOW_MEM_FILTER_NODES;
+
+		/*
+		 * This documents exceptions given to allocations in certain
+		 * contexts that are allowed to allocate outside current's set
+		 * of allowed nodes.
+		 */
+		if (!(gfp_mask & __GFP_NOMEMALLOC))
+			if (test_thread_flag(TIF_MEMDIE) ||
+			    (current->flags & (PF_MEMALLOC | PF_EXITING)))
+				filter &= ~SHOW_MEM_FILTER_NODES;
+		if (in_interrupt() || !wait)
+			filter &= ~SHOW_MEM_FILTER_NODES;
+
+		pr_warning("%s: page allocation failure. order:%d, mode:0x%x\n",
 			p->comm, order, gfp_mask);
 		dump_stack();
 		if (!should_suppress_show_mem())
-			show_mem();
+			__show_mem(filter);
 	}
 	return page;
 got_pg:

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch 1/3] oom: suppress nodes that are not allowed from meminfo on oom kill
  2011-01-12  1:13 [patch 1/3] oom: suppress nodes that are not allowed from meminfo on oom kill David Rientjes
  2011-01-12  1:13 ` [patch 2/3] oom: suppress show_mem() for many nodes in irq context on page alloc failure David Rientjes
  2011-01-12  1:13 ` [patch 3/3] oom: suppress nodes that are not allowed from meminfo " David Rientjes
@ 2011-01-20  2:58 ` David Rientjes
  2 siblings, 0 replies; 4+ messages in thread
From: David Rientjes @ 2011-01-20  2:58 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Mel Gorman, KAMEZAWA Hiroyuki, linux-mm

On Tue, 11 Jan 2011, David Rientjes wrote:

> The oom killer is extremely verbose for machines with a large number of
> cpus and/or nodes.  This verbosity can often be harmful if it causes
> other important messages to be scrolled from the kernel log and incurs a
> signicant time delay, specifically for kernels with
> CONFIG_NODES_SHIFT > 8.
> 
> This patch causes only memory information to be displayed for nodes that
> are allowed by current's cpuset when dumping the VM state.  Information
> for all other nodes is irrelevant to the oom condition; we don't care if
> there's an abundance of memory elsewhere if we can't access it.
> 
> This only affects the behavior of dumping memory information when an oom
> is triggered.  Other dumps, such as for sysrq+m, still display the
> unfiltered form when using the existing show_mem() interface.
> 
> Additionally, the per-cpu pageset statistics are extremely verbose in oom
> killer output, so it is now suppressed.  This removes
> 
> 	nodes_weight(current->mems_allowed) * (1 + nr_cpus)
> 
> lines from the oom killer output.
> 
> Callers may use __show_mem(SHOW_MEM_FILTER_NODES) to filter disallowed
> nodes.

Are there any objections to merging this series in -mm?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-01-20  2:58 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-01-12  1:13 [patch 1/3] oom: suppress nodes that are not allowed from meminfo on oom kill David Rientjes
2011-01-12  1:13 ` [patch 2/3] oom: suppress show_mem() for many nodes in irq context on page alloc failure David Rientjes
2011-01-12  1:13 ` [patch 3/3] oom: suppress nodes that are not allowed from meminfo " David Rientjes
2011-01-20  2:58 ` [patch 1/3] oom: suppress nodes that are not allowed from meminfo on oom kill David Rientjes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox