From: keith mannthey <kmannth@us.ibm.com>
To: lhms-devel <lhms-devel@lists.sourceforge.net>
Cc: linux-mm <linux-mm@kvack.org>, konrad <darnok@us.ibm.com>,
Prarit Bhargava--redhat <prarit@redhat.com>,
ak@suse.de
Subject: [RFC] patch [1/1] x86_64 numa aware sparsemem add_memory functinality
Date: Tue, 20 Jun 2006 22:43:01 -0700 [thread overview]
Message-ID: <1150868581.8518.28.camel@keithlap> (raw)
[-- Attachment #1: Type: text/plain, Size: 1179 bytes --]
Hello all,
This patch is an attempt to add a numa ware add_memory functionality
to x86_64 using CONFIG_SPARSEMEM. The add memory function today just
grabs the pgdat from node 0 and adds the memory there. On a numa system
this is functional but not optimal/correct.
The SRAT can expose future memory locality. This information is
already tracked by the nodes_add data structure (it keeps the
memory/node locality information) from the SRAT code. The code in
srat.c is built around RESERVE_HOTADD. This patch is a little subtle in
the way it uses the existing code for use with sparsemem. Perhaps
acpi_numa_memory_affinity_init needs a larger refactor to fit both
RESERVE_HOTADD and sparsemem.
This patch still hotadd_percent as a flag to the whole srat parsing
code to disable and contain broken bios. It's functionality is retained
and an on off switch to sparsemem hot-add. Without changing the safety
mechanisms build into the current SRAT code I have provided a path for
the sparsemem hot-add path to get to the nodes_add data for use at
runtime.
This is a 1st run at the patch, it works with 2.6.17
Signed-off-by: Keith Mannthey <kmannth@us.ibm.com>
[-- Attachment #2: patch-2.6.17-nodes-add-v1.patch --]
[-- Type: text/x-patch, Size: 3550 bytes --]
diff -urN linux-2.6.17/arch/x86_64/mm/init.c linux-2.6.17-work/arch/x86_64/mm/init.c
--- linux-2.6.17/arch/x86_64/mm/init.c 2006-06-17 21:49:35.000000000 -0400
+++ linux-2.6.17-work/arch/x86_64/mm/init.c 2006-06-20 21:41:30.000000000 -0400
@@ -553,7 +553,7 @@
*/
int add_memory(u64 start, u64 size)
{
- struct pglist_data *pgdat = NODE_DATA(0);
+ struct pglist_data *pgdat = NODE_DATA(new_memory_to_node(start,start+size));
struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
diff -urN linux-2.6.17/arch/x86_64/mm/srat.c linux-2.6.17-work/arch/x86_64/mm/srat.c
--- linux-2.6.17/arch/x86_64/mm/srat.c 2006-06-20 20:25:33.000000000 -0400
+++ linux-2.6.17-work/arch/x86_64/mm/srat.c 2006-06-20 21:44:54.000000000 -0400
@@ -32,10 +32,10 @@
static nodemask_t nodes_parsed __initdata;
static nodemask_t nodes_found __initdata;
static struct bootnode nodes[MAX_NUMNODES] __initdata;
-static struct bootnode nodes_add[MAX_NUMNODES] __initdata;
+static struct bootnode nodes_add[MAX_NUMNODES];
static int found_add_area __initdata;
int hotadd_percent __initdata = 0;
-#ifndef RESERVE_HOTADD
+#if !defined(RESERVE_HOTADD) && !defined(CONFIG_MEMORY_HOTPLUG)
#define hotadd_percent 0 /* Ignore all settings */
#endif
static u8 pxm2node[256] = { [0 ... 255] = 0xff };
@@ -219,9 +219,9 @@
allocated += mem;
return 1;
}
-
+#endif
/*
- * It is fine to add this area to the nodes data it will be used later
+ * It is fine to add this area to the nodes_add data it will be used later
* This code supports one contigious hot add area per node.
*/
static int reserve_hotadd(int node, unsigned long start, unsigned long end)
@@ -247,15 +247,14 @@
printk(KERN_ERR "SRAT: Hotplug area has existing memory\n");
return -1;
}
-
+#ifdef RESERVE_HOTADD
if (!hotadd_enough_memory(&nodes_add[node])) {
printk(KERN_ERR "SRAT: Hotplug area too large\n");
return -1;
}
-
+#endif
/* Looks good */
- found_add_area = 1;
if (nd->start == nd->end) {
nd->start = start;
nd->end = end;
@@ -273,14 +272,16 @@
printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n");
}
- if ((nd->end >> PAGE_SHIFT) > end_pfn)
- end_pfn = nd->end >> PAGE_SHIFT;
-
if (changed)
printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end);
+#ifdef RESERVE_HOTADD
+ found_add_area = 1;
+ if ((nd->end >> PAGE_SHIFT) > end_pfn)
+ end_pfn = nd->end >> PAGE_SHIFT;
return 0;
+#endif
+ return -1;
}
-#endif
/* Callback for parsing of the Proximity Domain <-> Memory Area mappings */
void __init
@@ -338,7 +339,6 @@
printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm,
nd->start, nd->end);
-#ifdef RESERVE_HOTADD
if (ma->flags.hot_pluggable && reserve_hotadd(node, start, end) < 0) {
/* Ignore hotadd region. Undo damage */
printk(KERN_NOTICE "SRAT: Hotplug region ignored\n");
@@ -346,7 +346,6 @@
if ((nd->start | nd->end) == 0)
node_clear(node, nodes_parsed);
}
-#endif
}
/* Sanity check to catch more bad SRATs (they are amazingly common).
@@ -479,5 +478,15 @@
index = acpi_slit->localities * node_to_pxm(a);
return acpi_slit->entry[index + node_to_pxm(b)];
}
-
EXPORT_SYMBOL(__node_distance);
+
+int new_memory_to_node(unsigned long start, unsigned long end) {
+ int i,ret;
+ ret=0;
+ for_each_node(i){
+ if (nodes_add[i].start <= start && nodes_add[i].end >= end)
+ ret = i;
+ }
+ return ret;
+}
+EXPORT_SYMBOL(new_memory_to_node);
next reply other threads:[~2006-06-21 5:43 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-06-21 5:43 keith mannthey [this message]
2006-06-21 6:06 ` [Lhms-devel] " KAMEZAWA Hiroyuki
2006-06-21 6:25 ` keith mannthey
2006-06-21 6:37 ` KAMEZAWA Hiroyuki
2006-06-21 6:31 ` Yasunori Goto
2006-06-23 17:13 ` Dave Hansen
2006-06-23 17:57 ` [Lhms-devel] " keith mannthey
2006-06-24 2:05 ` [RFC] Patch [1/4] x86_64 sparsmem add- save nodes_add data for later keith mannthey
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1150868581.8518.28.camel@keithlap \
--to=kmannth@us.ibm.com \
--cc=ak@suse.de \
--cc=darnok@us.ibm.com \
--cc=lhms-devel@lists.sourceforge.net \
--cc=linux-mm@kvack.org \
--cc=prarit@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox