linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: William Lee Irwin III <wli@holomorphy.com>
To: Andrew Morton <akpm@digeo.com>
Cc: lkml <linux-kernel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>
Subject: Re: 2.5.42-mm2
Date: Sun, 13 Oct 2002 03:19:49 -0700	[thread overview]
Message-ID: <20021013101949.GB2032@holomorphy.com> (raw)
In-Reply-To: <3DA7C3A5.98FCC13E@digeo.com>

On Fri, Oct 11, 2002 at 11:39:33PM -0700, Andrew Morton wrote:
> url: http://www.zip.com.au/~akpm/linux/patches/2.5/2.5.42/2.5.42-mm2/

This patch does 5 things:

(1) when the OOM killer fails and the system panics, calls
	show_free_areas()
(2) reorganizes show_free_areas() to use for_each_zone()
(3) adds per-cpu stats to show_free_areas()
(4) tags output from show_free_areas() with node and zone information
(5) initializes zone->per_cpu_pageset[cpu].pcp[temperature].reserved
	in free_area_init_core()

The net effect is better reporting of where memory went, which was
essential to determining the cause of this failure, and that the
reserved page stuff can actually boot. Prior to this it was getting
total garbage in ->reserved after free_area_init_core():

Node 0, Zone DMA: per-cpu:
cpu 0 hot: low 32, high 96, batch 16, reserved 1683971840
cpu 0 cold: low 0, high 32, batch 16, reserved 1953719651
cpu 1 hot: low 32, high 96, batch 16, reserved 1702256479
cpu 1 cold: low 0, high 32, batch 16, reserved 825241951

And this caused a false bootmem OOM. It would have been impossible to
determine the cause of failure without show_free_areas() modifications,
and this is a box-killing bug that wipes out a significant fraction of
the high-end developer base from 2.5.x contributions as well as
preventing all i386 NUMA boxen, which the highest volume high-end
configurations, from booting. Furthermore, it also cleans up
show_free_areas() in a very straightforward fashion.

Against 2.5.42-mm2.


diff -urpN mm-2.5.42/mm/oom_kill.c virgin-2.5.42/mm/oom_kill.c
--- mm-2.5.42/mm/oom_kill.c	2002-10-11 21:22:08.000000000 -0700
+++ virgin-2.5.42/mm/oom_kill.c	2002-10-13 01:35:51.000000000 -0700
@@ -172,8 +172,10 @@ static void oom_kill(void)
 	p = select_bad_process();
 
 	/* Found nothing?!?! Either we hang forever, or we panic. */
-	if (p == NULL)
+	if (!p) {
+		show_free_areas();
 		panic("Out of memory and no killable processes...\n");
+	}
 
 	/* kill all processes that share the ->mm (i.e. all threads) */
 	do_each_thread(g, q)
diff -urpN mm-2.5.42/mm/page_alloc.c virgin-2.5.42/mm/page_alloc.c
--- mm-2.5.42/mm/page_alloc.c	2002-10-13 02:37:25.000000000 -0700
+++ virgin-2.5.42/mm/page_alloc.c	2002-10-13 02:05:12.000000000 -0700
@@ -830,11 +830,11 @@ void si_meminfo(struct sysinfo *val)
  */
 void show_free_areas(void)
 {
-	pg_data_t *pgdat;
 	struct page_state ps;
-	int type;
+	int cpu, temperature;
 	unsigned long active;
 	unsigned long inactive;
+	struct zone *zone;
 
 	get_page_state(&ps);
 	get_zone_counts(&active, &inactive);
@@ -843,26 +843,24 @@ void show_free_areas(void)
 		K(nr_free_pages()),
 		K(nr_free_highpages()));
 
-	for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next)
-		for (type = 0; type < MAX_NR_ZONES; ++type) {
-			struct zone *zone = &pgdat->node_zones[type];
-			printk("Zone:%s"
-				" freepages:%6lukB"
-				" min:%6lukB"
-				" low:%6lukB"
-				" high:%6lukB"
-				" active:%6lukB"
-				" inactive:%6lukB"
-				"\n",
-				zone->name,
-				K(zone->free_pages),
-				K(zone->pages_min),
-				K(zone->pages_low),
-				K(zone->pages_high),
-				K(zone->nr_active),
-				K(zone->nr_inactive)
-				);
-		}
+	for_each_zone(zone)
+		printk("Node %d, Zone:%s"
+			" freepages:%6lukB"
+			" min:%6lukB"
+			" low:%6lukB"
+			" high:%6lukB"
+			" active:%6lukB"
+			" inactive:%6lukB"
+			"\n",
+			zone->zone_pgdat->node_id,
+			zone->name,
+			K(zone->free_pages),
+			K(zone->pages_min),
+			K(zone->pages_low),
+			K(zone->pages_high),
+			K(zone->nr_active),
+			K(zone->nr_inactive)
+			);
 
 	printk("( Active:%lu inactive:%lu dirty:%lu writeback:%lu free:%u )\n",
 		active,
@@ -871,26 +869,49 @@ void show_free_areas(void)
 		ps.nr_writeback,
 		nr_free_pages());
 
-	for (pgdat = pgdat_list; pgdat; pgdat = pgdat->pgdat_next)
-		for (type = 0; type < MAX_NR_ZONES; type++) {
-			struct list_head *elem;
-			struct zone *zone = &pgdat->node_zones[type];
- 			unsigned long nr, flags, order, total = 0;
+	for_each_zone(zone) {
+		struct list_head *elem;
+ 		unsigned long nr, flags, order, total = 0;
+
+		printk("Node %d, Zone %s: ", zone->zone_pgdat->node_id, zone->name);
+		if (!zone->present_pages) {
+			printk("empty\n");
+			continue;
+		}
 
-			if (!zone->present_pages)
-				continue;
+		spin_lock_irqsave(&zone->lock, flags);
+		for (order = 0; order < MAX_ORDER; order++) {
+			nr = 0;
+			list_for_each(elem, &zone->free_area[order].free_list)
+				++nr;
+			total += nr << order;
+			printk("%lu*%lukB ", nr, K(1UL) << order);
+		}
+		spin_unlock_irqrestore(&zone->lock, flags);
+		printk("= %lukB)\n", K(total));
+	}
 
-			spin_lock_irqsave(&zone->lock, flags);
-			for (order = 0; order < MAX_ORDER; order++) {
-				nr = 0;
-				list_for_each(elem, &zone->free_area[order].free_list)
-					++nr;
-				total += nr << order;
-				printk("%lu*%lukB ", nr, K(1UL) << order);
-			}
-			spin_unlock_irqrestore(&zone->lock, flags);
-			printk("= %lukB)\n", K(total));
+	for_each_zone(zone) {
+		printk("Node %d, Zone %s: per-cpu:", zone->zone_pgdat->node_id, zone->name);
+
+		if (!zone->present_pages) {
+			printk(" empty\n");
+			continue;
+		} else
+			printk("\n");
+
+		for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+			struct per_cpu_pageset *pageset = zone->pageset + cpu;
+			for (temperature = 0; temperature < 2; temperature++)
+				printk("cpu %d %s: low %d, high %d, batch %d, reserved %d\n",
+					cpu,
+					temperature ? "cold" : "hot",
+					pageset->pcp[temperature].low,
+					pageset->pcp[temperature].high,
+					pageset->pcp[temperature].batch,
+					pageset->pcp[temperature].reserved);
 		}
+	}
 
 	show_swap_cache_info();
 }
@@ -1097,6 +1118,7 @@ static void __init free_area_init_core(s
 			pcp->low = 32;
 			pcp->high = 96;
 			pcp->batch = 16;
+			pcp->reserved = 0;
 			INIT_LIST_HEAD(&pcp->list);
 
 			pcp = &zone->pageset[cpu].pcp[1];	/* cold */
@@ -1104,6 +1126,7 @@ static void __init free_area_init_core(s
 			pcp->low = 0;
 			pcp->high = 32;
 			pcp->batch = 16;
+			pcp->reserved = 0;
 			INIT_LIST_HEAD(&pcp->list);
 		}
 		INIT_LIST_HEAD(&zone->active_list);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

  parent reply	other threads:[~2002-10-13 10:19 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-10-12  6:39 2.5.42-mm2 Andrew Morton
2002-10-12 13:19 ` 2.5.42-mm2 Ed Tomlinson
2002-10-12 16:22 ` 2.5.42-mm2 Ingo Oeser
2002-10-12 17:26   ` 2.5.42-mm2 Andrew Morton
2002-10-13 10:56     ` 2.5.42-mm2 Ingo Oeser
2002-10-13 16:43       ` 2.5.42-mm2 Kai Makisara
2002-10-13 10:19 ` William Lee Irwin III [this message]
2002-10-13 17:47   ` 2.5.42-mm2 Andrew Morton
2002-10-13 19:52     ` 2.5.42-mm2 William Lee Irwin III
2002-10-13 20:04       ` 2.5.42-mm2 Rik van Riel
2002-10-13 20:42         ` 2.5.42-mm2 William Lee Irwin III
2002-10-27  7:32   ` 2.5.42-mm2 Andrew Morton
2002-10-13 21:22 ` 2.5.42-mm2 William Lee Irwin III

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20021013101949.GB2032@holomorphy.com \
    --to=wli@holomorphy.com \
    --cc=akpm@digeo.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox