linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Dave Hansen <haveblue@us.ibm.com>
To: linux-mm@kvack.org
Cc: colpatch@us.ibm.com, kravetz@us.ibm.com, mbligh@aracnet.com,
	anton@samba.org, Dave Hansen <haveblue@us.ibm.com>,
	ygoto@us.fujitsu.com, apw@shadowen.org, kmannth@us.ibm.com
Subject: [PATCH 4/5] allow SRAT to parse empty nodes
Date: Thu, 24 Feb 2005 09:29:28 -0800	[thread overview]
Message-ID: <E1D4Mnp-00078t-00@kernel.beaverton.ibm.com> (raw)

This patch is to allow the booting of a numa srat base i386 system
without requiring memory to be in all of it's nodes.  It breaks the
assumption that all nodes have memory during bootup.

Signed-off-by: Keith Mannthey <kmannth@us.ibm.com>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 sparse-dave/arch/i386/kernel/numaq.c    |    4 +---
 sparse-dave/arch/i386/kernel/srat.c     |   14 ++++++++++++--
 sparse-dave/arch/i386/mm/discontig.c    |   32 +++++++++++++++++++-------------
 sparse-dave/include/asm-i386/topology.h |    6 ++++++
 sparse-dave/include/linux/topology.h    |    5 ++++-
 5 files changed, 42 insertions(+), 19 deletions(-)

diff -puN arch/i386/kernel/srat.c~A3.2-fix_nomem_on_node arch/i386/kernel/srat.c
--- sparse/arch/i386/kernel/srat.c~A3.2-fix_nomem_on_node	2005-02-24 08:56:40.000000000 -0800
+++ sparse-dave/arch/i386/kernel/srat.c	2005-02-24 08:56:40.000000000 -0800
@@ -30,6 +30,7 @@
 #include <linux/acpi.h>
 #include <linux/nodemask.h>
 #include <asm/srat.h>
+#include <asm/topology.h>
 
 /*
  * proximity macros and definitions
@@ -58,8 +59,6 @@ static int num_memory_chunks;		/* total 
 static int zholes_size_init;
 static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
 
-extern unsigned long node_start_pfn[], node_end_pfn[], node_remap_size[];
-
 extern void * boot_ioremap(unsigned long, unsigned long);
 
 /* Identify CPU proximity domains */
@@ -273,6 +272,17 @@ static int __init acpi20_parse_srat(stru
 		int been_here_before = 0;
 
 		for (j = 0; j < num_memory_chunks; j++){
+			/*
+			 * Only add present memroy to node_end/start_pfn
+			 * There is no guarantee from the srat that the memory
+			 * is present at boot time.
+			 */
+			if (node_memory_chunk[j].start_pfn >= max_pfn) {
+				printk (KERN_INFO "Ignoring chunk of memory reported in the SRAT (could be hot-add zone?)\n");
+				printk (KERN_INFO "chunk is reported from pfn %04x to %04x\n",
+					node_memory_chunk[j].start_pfn, node_memory_chunk[j].end_pfn);
+				continue;
+			}
 			if (node_memory_chunk[j].nid == nid) {
 				if (been_here_before == 0) {
 					node_start_pfn[nid] = node_memory_chunk[j].start_pfn;
diff -puN arch/i386/mm/discontig.c~A3.2-fix_nomem_on_node arch/i386/mm/discontig.c
--- sparse/arch/i386/mm/discontig.c~A3.2-fix_nomem_on_node	2005-02-24 08:56:40.000000000 -0800
+++ sparse-dave/arch/i386/mm/discontig.c	2005-02-24 08:56:40.000000000 -0800
@@ -154,7 +154,7 @@ static void __init find_max_pfn_node(int
  */
 static void __init allocate_pgdat(int nid)
 {
-	if (nid)
+	if (nid && node_has_online_mem(nid))
 		NODE_DATA(nid) = (pg_data_t *)node_remap_start_vaddr[nid];
 	else {
 		NODE_DATA(nid) = (pg_data_t *)(__va(min_low_pfn << PAGE_SHIFT));
@@ -188,6 +188,9 @@ static unsigned long calculate_numa_rema
 	for_each_online_node(nid) {
 		if (nid == 0)
 			continue;
+		if (!node_remap_size[nid])
+			continue;
+
 		/* ensure the remap includes space for the pgdat. */
 		size = node_remap_size[nid] + sizeof(pg_data_t);
 
@@ -299,24 +302,27 @@ void __init zone_sizes_init(void)
 
 		max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 
-		if (start > low) {
+		if (node_has_online_mem(nid)){
+			if (start > low) {
 #ifdef CONFIG_HIGHMEM
-			BUG_ON(start > high);
-			zones_size[ZONE_HIGHMEM] = high - start;
+				BUG_ON(start > high);
+				zones_size[ZONE_HIGHMEM] = high - start;
 #endif
-		} else {
-			if (low < max_dma)
-				zones_size[ZONE_DMA] = low;
-			else {
-				BUG_ON(max_dma > low);
-				BUG_ON(low > high);
-				zones_size[ZONE_DMA] = max_dma;
-				zones_size[ZONE_NORMAL] = low - max_dma;
+			} else {
+				if (low < max_dma)
+					zones_size[ZONE_DMA] = low;
+				else {
+					BUG_ON(max_dma > low);
+					BUG_ON(low > high);
+					zones_size[ZONE_DMA] = max_dma;
+					zones_size[ZONE_NORMAL] = low - max_dma;
 #ifdef CONFIG_HIGHMEM
-				zones_size[ZONE_HIGHMEM] = high - low;
+					zones_size[ZONE_HIGHMEM] = high - low;
 #endif
+				}
 			}
 		}
+
 		zholes_size = get_zholes_size(nid);
 		/*
 		 * We let the lmem_map for node 0 be allocated from the
diff -puN include/asm-i386/topology.h~A3.2-fix_nomem_on_node include/asm-i386/topology.h
--- sparse/include/asm-i386/topology.h~A3.2-fix_nomem_on_node	2005-02-24 08:56:40.000000000 -0800
+++ sparse-dave/include/asm-i386/topology.h	2005-02-24 08:56:40.000000000 -0800
@@ -88,6 +88,12 @@ static inline cpumask_t pcibus_to_cpumas
 	.nr_balance_failed	= 0,			\
 }
 
+extern unsigned long node_start_pfn[];
+extern unsigned long node_end_pfn[];
+extern unsigned long node_remap_size[];
+
+#define node_has_online_mem(nid) (node_start_pfn[nid] != node_end_pfn[nid])
+
 #else /* !CONFIG_NUMA */
 /*
  * Other i386 platforms should define their own version of the 
diff -puN include/linux/topology.h~A3.2-fix_nomem_on_node include/linux/topology.h
--- sparse/include/linux/topology.h~A3.2-fix_nomem_on_node	2005-02-24 08:56:40.000000000 -0800
+++ sparse-dave/include/linux/topology.h	2005-02-24 08:56:40.000000000 -0800
@@ -31,9 +31,12 @@
 #include <linux/bitops.h>
 #include <linux/mmzone.h>
 #include <linux/smp.h>
-
 #include <asm/topology.h>
 
+#ifndef node_has_online_mem
+#define node_has_online_mem(nid) (1)
+#endif
+
 #ifndef nr_cpus_node
 #define nr_cpus_node(node)							\
 	({									\
diff -puN arch/i386/kernel/numaq.c~A3.2-fix_nomem_on_node arch/i386/kernel/numaq.c
--- sparse/arch/i386/kernel/numaq.c~A3.2-fix_nomem_on_node	2005-02-24 08:56:40.000000000 -0800
+++ sparse-dave/arch/i386/kernel/numaq.c	2005-02-24 08:56:40.000000000 -0800
@@ -30,9 +30,7 @@
 #include <linux/module.h>
 #include <linux/nodemask.h>
 #include <asm/numaq.h>
-
-/* These are needed before the pgdat's are created */
-extern long node_start_pfn[], node_end_pfn[], node_remap_size[];
+#include <asm/topology.h>
 
 #define	MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
 
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

             reply	other threads:[~2005-02-24 17:29 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-02-24 17:29 Dave Hansen [this message]
2005-02-28 18:54 Dave Hansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=E1D4Mnp-00078t-00@kernel.beaverton.ibm.com \
    --to=haveblue@us.ibm.com \
    --cc=anton@samba.org \
    --cc=apw@shadowen.org \
    --cc=colpatch@us.ibm.com \
    --cc=kmannth@us.ibm.com \
    --cc=kravetz@us.ibm.com \
    --cc=linux-mm@kvack.org \
    --cc=mbligh@aracnet.com \
    --cc=ygoto@us.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox