linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: keith mannthey <kmannth@us.ibm.com>
To: lkml <linux-kernel@vger.kernel.org>
Cc: linux-mm <linux-mm@kvack.org>, mbligh@mbligh.org, akpm@osdl.org
Subject: [Patch] convert i386 NUMA KVA space to bootmem
Date: Wed, 05 Jul 2006 14:26:13 -0700	[thread overview]
Message-ID: <1152134773.5799.34.camel@keithlap> (raw)

[-- Attachment #1: Type: text/plain, Size: 1218 bytes --]

Hello Andrew,
  I posted this patch a while ago but I didn't get any feedback.  I
would like to submit this patch to your tree.  
  
  The patch itself addresses a long standing issue of booting with an
initrd on an i386 numa system.  Currently (and always) the numa kva area
is mapped into low memory by finding the end of low memory and moving
that mark down (thus creating space for the kva).  The issue with this
is that Grub loads initrds into this similar space so when the kernel
check the initrd it finds it outside max_low_pfn and disables it (it
thinks the initrd is not mapped into usable memory) thus initrd enabled
kernels can't boot i386 numa :(

  My solution to the problem just converts the numa kva area to use the
bootmem allocator to save it's area (instead of moving the end of low
memory).  Using bootmem allows the kva area to be mapped into more
diverse addresses (not just the end of low memory) and enables the kva
area to be mapped below the initrd if present. 

  I have tested this patch on numaq(no initrd) and summit(initrd) i386
numa based systems.  It was diffed on 2.6.17-git26 but should apply to
just about any recent kernel. 


Signed-off-by:  Keith Mannthey <kmannth@us.ibm.com>


[-- Attachment #2: patch-2.6.17-numa-kva-v3 --]
[-- Type: text/x-patch, Size: 3331 bytes --]

diff -urN linux-2.6.17/arch/i386/kernel/setup.c linux-2.6.17-git24/arch/i386/kernel/setup.c
--- linux-2.6.17/arch/i386/kernel/setup.c	2006-07-04 22:35:20.000000000 -0700
+++ linux-2.6.17-git24/arch/i386/kernel/setup.c	2006-07-04 22:27:31.000000000 -0700
@@ -1203,6 +1203,9 @@
 extern void zone_sizes_init(void);
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
+#ifdef CONFIG_NUMA
+extern void numa_kva_reserve(void);
+#endif
 void __init setup_bootmem_allocator(void)
 {
 	unsigned long bootmap_size;
@@ -1258,7 +1261,9 @@
 	 */
 	find_smp_config();
 #endif
-
+#ifdef CONFIG_NUMA
+	numa_kva_reserve();
+#endif 
 #ifdef CONFIG_BLK_DEV_INITRD
 	if (LOADER_TYPE && INITRD_START) {
 		if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
diff -urN linux-2.6.17/arch/i386/mm/discontig.c linux-2.6.17-git24/arch/i386/mm/discontig.c
--- linux-2.6.17/arch/i386/mm/discontig.c	2006-07-04 22:35:20.000000000 -0700
+++ linux-2.6.17-git24/arch/i386/mm/discontig.c	2006-07-04 22:27:31.000000000 -0700
@@ -117,7 +117,8 @@
 
 void *node_remap_end_vaddr[MAX_NUMNODES];
 void *node_remap_alloc_vaddr[MAX_NUMNODES];
-
+static unsigned long kva_start_pfn;
+static unsigned long kva_pages;
 /*
  * FLAT - support for basic PC memory model with discontig enabled, essentially
  *        a single node with all available processors in it with a flat
@@ -286,7 +287,6 @@
 {
 	int nid;
 	unsigned long system_start_pfn, system_max_low_pfn;
-	unsigned long reserve_pages;
 
 	/*
 	 * When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -298,14 +298,23 @@
 	find_max_pfn();
 	get_memcfg_numa();
 
-	reserve_pages = calculate_numa_remap_pages();
+	kva_pages = calculate_numa_remap_pages();
 
 	/* partially used pages are not usable - thus round upwards */
 	system_start_pfn = min_low_pfn = PFN_UP(init_pg_tables_end);
 
-	system_max_low_pfn = max_low_pfn = find_max_low_pfn() - reserve_pages;
-	printk("reserve_pages = %ld find_max_low_pfn() ~ %ld\n",
-			reserve_pages, max_low_pfn + reserve_pages);
+	kva_start_pfn = find_max_low_pfn() - kva_pages;
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	/* Numa kva area is below the initrd */
+	if (LOADER_TYPE && INITRD_START) 
+		kva_start_pfn = PFN_DOWN(INITRD_START)  - kva_pages;
+#endif 
+	kva_start_pfn -= kva_start_pfn & (PTRS_PER_PTE-1);
+
+	system_max_low_pfn = max_low_pfn = find_max_low_pfn();
+	printk("kva_start_pfn ~ %ld find_max_low_pfn() ~ %ld\n", 
+		kva_start_pfn, max_low_pfn);
 	printk("max_pfn = %ld\n", max_pfn);
 #ifdef CONFIG_HIGHMEM
 	highstart_pfn = highend_pfn = max_pfn;
@@ -323,7 +332,7 @@
 			(ulong) pfn_to_kaddr(max_low_pfn));
 	for_each_online_node(nid) {
 		node_remap_start_vaddr[nid] = pfn_to_kaddr(
-				highstart_pfn + node_remap_offset[nid]);
+				kva_start_pfn + node_remap_offset[nid]);
 		/* Init the node remap allocator */
 		node_remap_end_vaddr[nid] = node_remap_start_vaddr[nid] +
 			(node_remap_size[nid] * PAGE_SIZE);
@@ -338,7 +347,6 @@
 	}
 	printk("High memory starts at vaddr %08lx\n",
 			(ulong) pfn_to_kaddr(highstart_pfn));
-	vmalloc_earlyreserve = reserve_pages * PAGE_SIZE;
 	for_each_online_node(nid)
 		find_max_pfn_node(nid);
 
@@ -348,6 +356,12 @@
 	return max_low_pfn;
 }
 
+void __init numa_kva_reserve (void) 
+{
+	reserve_bootmem(PFN_PHYS(kva_start_pfn),PFN_PHYS(kva_pages));
+
+}
+
 void __init zone_sizes_init(void)
 {
 	int nid;

                 reply	other threads:[~2006-07-05 21:26 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1152134773.5799.34.camel@keithlap \
    --to=kmannth@us.ibm.com \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mbligh@mbligh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox