* [PATCH 3/5] abstract discontigmem setup
@ 2005-02-28 18:54 Dave Hansen
2005-02-28 23:30 ` Dave Hansen
2005-03-01 6:21 ` Andrew Morton
0 siblings, 2 replies; 5+ messages in thread
From: Dave Hansen @ 2005-02-28 18:54 UTC (permalink / raw)
To: linux-mm; +Cc: akpm, kmannth, linux-kernel, Dave Hansen, ygoto, apw
memory_present() is how each arch/subarch will tell sparsemem
and discontigmem where all of its memory is. This is what
triggers sparse to go out and create its mappings for the memory,
as well as allocate the mem_map[].
By: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
sparse-dave/arch/i386/Kconfig | 10 +++++++
sparse-dave/arch/i386/kernel/numaq.c | 6 +++-
sparse-dave/arch/i386/kernel/srat.c | 9 +++++-
sparse-dave/arch/i386/mm/discontig.c | 49 ++++++++++++++++++++++-------------
sparse-dave/arch/ppc64/mm/numa.c | 19 ++++++++++---
sparse-dave/include/linux/mmzone.h | 11 +++++++
6 files changed, 79 insertions(+), 25 deletions(-)
diff -puN arch/i386/kernel/numaq.c~A3.1-abstract-discontig arch/i386/kernel/numaq.c
--- sparse/arch/i386/kernel/numaq.c~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/i386/kernel/numaq.c 2005-02-24 08:56:39.000000000 -0800
@@ -32,7 +32,7 @@
#include <asm/numaq.h>
/* These are needed before the pgdat's are created */
-extern long node_start_pfn[], node_end_pfn[];
+extern long node_start_pfn[], node_end_pfn[], node_remap_size[];
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
@@ -59,6 +59,10 @@ static void __init smp_dump_qct(void)
eq->hi_shrd_mem_start - eq->priv_mem_size);
node_end_pfn[node] = MB_TO_PAGES(
eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
+
+ memory_present(node,
+ node_start_pfn[node], node_end_pfn[node]);
+ node_remap_size[node] = node_memmap_size_bytes(node);
}
}
}
diff -puN arch/i386/kernel/srat.c~A3.1-abstract-discontig arch/i386/kernel/srat.c
--- sparse/arch/i386/kernel/srat.c~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/i386/kernel/srat.c 2005-02-24 08:56:39.000000000 -0800
@@ -58,7 +58,7 @@ static int num_memory_chunks; /* total
static int zholes_size_init;
static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
-extern unsigned long node_start_pfn[], node_end_pfn[];
+extern unsigned long node_start_pfn[], node_end_pfn[], node_remap_size[];
extern void * boot_ioremap(unsigned long, unsigned long);
@@ -286,6 +286,13 @@ static int __init acpi20_parse_srat(stru
}
}
}
+ for_each_online_node(nid) {
+ unsigned long start = node_start_pfn[nid];
+ unsigned long end = node_end_pfn[nid];
+
+ memory_present(nid, start, end);
+ node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
+ }
return 1;
out_fail:
return 0;
diff -puN arch/i386/mm/discontig.c~A3.1-abstract-discontig arch/i386/mm/discontig.c
--- sparse/arch/i386/mm/discontig.c~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/i386/mm/discontig.c 2005-02-24 08:56:39.000000000 -0800
@@ -60,6 +60,32 @@ bootmem_data_t node0_bdata;
*/
s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1};
+void memory_present(int nid, unsigned long start, unsigned long end)
+{
+ unsigned long pfn;
+
+ printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n",
+ nid, start, end);
+ printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid);
+ printk(KERN_DEBUG " ");
+ for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
+ physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
+ printk(KERN_DEBUG "%ld ", pfn);
+ }
+ printk(KERN_DEBUG "\n");
+}
+
+unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long nr_pages = end_pfn - start_pfn;
+
+ if (!nr_pages)
+ return 0;
+
+ return (nr_pages + 1) * sizeof(struct page);
+}
+
unsigned long node_start_pfn[MAX_NUMNODES];
unsigned long node_end_pfn[MAX_NUMNODES];
@@ -162,9 +188,9 @@ static unsigned long calculate_numa_rema
for_each_online_node(nid) {
if (nid == 0)
continue;
- /* calculate the size of the mem_map needed in bytes */
- size = (node_end_pfn[nid] - node_start_pfn[nid] + 1)
- * sizeof(struct page) + sizeof(pg_data_t);
+ /* ensure the remap includes space for the pgdat. */
+ size = node_remap_size[nid] + sizeof(pg_data_t);
+
/* convert size to large (pmd size) pages, rounding up */
size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
/* now the roundup is correct, convert to PAGE_SIZE pages */
@@ -189,7 +215,7 @@ unsigned long __init setup_memory(void)
{
int nid;
unsigned long system_start_pfn, system_max_low_pfn;
- unsigned long reserve_pages, pfn;
+ unsigned long reserve_pages;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -198,22 +224,9 @@ unsigned long __init setup_memory(void)
* this space and use it to adjust the boundry between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
+ find_max_pfn();
get_memcfg_numa();
- /* Fill in the physnode_map */
- for_each_online_node(nid) {
- printk("Node: %d, start_pfn: %ld, end_pfn: %ld\n",
- nid, node_start_pfn[nid], node_end_pfn[nid]);
- printk(" Setting physnode_map array to node %d for pfns:\n ",
- nid);
- for (pfn = node_start_pfn[nid]; pfn < node_end_pfn[nid];
- pfn += PAGES_PER_ELEMENT) {
- physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
- printk("%ld ", pfn);
- }
- printk("\n");
- }
-
reserve_pages = calculate_numa_remap_pages();
/* partially used pages are not usable - thus round upwards */
diff -puN arch/ppc64/mm/numa.c~A3.1-abstract-discontig arch/ppc64/mm/numa.c
--- sparse/arch/ppc64/mm/numa.c~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/ppc64/mm/numa.c 2005-02-24 08:56:39.000000000 -0800
@@ -58,6 +58,17 @@ EXPORT_SYMBOL(numa_memory_lookup_table);
EXPORT_SYMBOL(numa_cpumask_lookup_table);
EXPORT_SYMBOL(nr_cpus_in_node);
+void memory_present(int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long i;
+ unsigned long start_addr = start << PAGE_SHIFT;
+ unsigned long end_addr = end << PAGE_SHIFT;
+
+ for (i = start ; i < end; i += MEMORY_INCREMENT)
+ numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = nid;
+}
+
static inline void map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
@@ -378,9 +389,8 @@ new_range:
size / PAGE_SIZE;
}
- for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
- numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
- numa_domain;
+ memory_present(numa_domain, start >> PAGE_SHIFT,
+ (start + size) >> PAGE_SHIFT);
ranges--;
if (ranges)
@@ -428,8 +438,7 @@ static void __init setup_nonnuma(void)
init_node_data[0].node_start_pfn = 0;
init_node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE;
- for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
- numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
+ memory_present(0, 0, init_node_data[0].node_spanned_pages);
node0_io_hole_size = top_of_ram - total_ram;
}
diff -puN include/linux/mmzone.h~A3.1-abstract-discontig include/linux/mmzone.h
--- sparse/include/linux/mmzone.h~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/include/linux/mmzone.h 2005-02-24 08:56:39.000000000 -0800
@@ -11,6 +11,7 @@
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/numa.h>
+#include <linux/init.h>
#include <asm/atomic.h>
/* Free memory management - zoned buddy allocator. */
@@ -278,6 +279,16 @@ void wakeup_kswapd(struct zone *zone, in
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int alloc_type, int can_try_harder, int gfp_high);
+#ifdef CONFIG_HAVE_MEMORY_PRESENT
+void memory_present(int nid, unsigned long start, unsigned long end);
+#else
+static inline void memory_present(int nid, unsigned long start, unsigned long end) {}
+#endif
+
+#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
+unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
+#endif
+
/*
* zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
*/
diff -puN arch/i386/Kconfig~A3.1-abstract-discontig arch/i386/Kconfig
--- sparse/arch/i386/Kconfig~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/i386/Kconfig 2005-02-24 08:56:39.000000000 -0800
@@ -769,6 +769,16 @@ config HAVE_ARCH_BOOTMEM_NODE
depends on NUMA
default y
+config HAVE_MEMORY_PRESENT
+ bool
+ depends on DISCONTIGMEM
+ default y
+
+config NEED_NODE_MEMMAP_SIZE
+ bool
+ depends on DISCONTIGMEM
+ default y
+
config HIGHPTE
bool "Allocate 3rd-level pagetables from highmem"
depends on HIGHMEM4G || HIGHMEM64G
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 3/5] abstract discontigmem setup
2005-02-28 18:54 [PATCH 3/5] abstract discontigmem setup Dave Hansen
@ 2005-02-28 23:30 ` Dave Hansen
2005-03-01 6:21 ` Andrew Morton
1 sibling, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2005-02-28 23:30 UTC (permalink / raw)
To: linux-mm
Cc: Andrew Morton, kmannth, Linux Kernel Mailing List, Andy Whitcroft
[-- Attachment #1: Type: text/plain, Size: 169 bytes --]
The $SUBJECT patch has a small, obvious, compile bug in it on the
NUMA-Q, which I introduced while cleaning it up. Please apply this
patch on top of that one.
-- Dave
[-- Attachment #2: A3.2.1-fix-numaq.patch --]
[-- Type: text/x-patch, Size: 889 bytes --]
The "abstract discontigmem setup" patch has a small compile bug in
it on the NUMA-Q, which I introduced while "cleaning it up."
Please apply after that patch.
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
memhotplug-dave/arch/i386/kernel/numaq.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletion(-)
diff -puN arch/i386/kernel/numaq.c~A3.2.1-fix-numaq arch/i386/kernel/numaq.c
--- memhotplug/arch/i386/kernel/numaq.c~A3.2.1-fix-numaq 2005-02-28 14:16:23.000000000 -0800
+++ memhotplug-dave/arch/i386/kernel/numaq.c 2005-02-28 14:16:59.000000000 -0800
@@ -62,7 +62,9 @@ static void __init smp_dump_qct(void)
memory_present(node,
node_start_pfn[node], node_end_pfn[node]);
- node_remap_size[node] = node_memmap_size_bytes(node);
+ node_remap_size[node] = node_memmap_size_bytes(node,
+ node_start_pfn[node],
+ node_end_pfn[node]);
}
}
}
_
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 3/5] abstract discontigmem setup
2005-02-28 18:54 [PATCH 3/5] abstract discontigmem setup Dave Hansen
2005-02-28 23:30 ` Dave Hansen
@ 2005-03-01 6:21 ` Andrew Morton
2005-03-01 7:16 ` Dave Hansen
1 sibling, 1 reply; 5+ messages in thread
From: Andrew Morton @ 2005-03-01 6:21 UTC (permalink / raw)
To: Dave Hansen; +Cc: linux-mm, kmannth, linux-kernel, ygoto, apw
Dave Hansen <haveblue@us.ibm.com> wrote:
>
> memory_present() is how each arch/subarch will tell sparsemem
> and discontigmem where all of its memory is. This is what
> triggers sparse to go out and create its mappings for the memory,
> as well as allocate the mem_map[].
There are cross-compilers at http://developer.osdl.org/dev/plm/cross_compile/
This also needs runtime testing on ppc64, does it not?
arch/ppc64/mm/numa.c:63: error: redefinition of `memory_present'
include/linux/mmzone.h:285: error: `memory_present' previously defined here
arch/ppc64/mm/numa.c: In function `memory_present':
arch/ppc64/mm/numa.c:65: error: `start' undeclared (first use in this function)
arch/ppc64/mm/numa.c:65: error: (Each undeclared identifier is reported only once
arch/ppc64/mm/numa.c:65: error: for each function it appears in.)
arch/ppc64/mm/numa.c:66: error: `end' undeclared (first use in this function)
arch/ppc64/mm/numa.c:65: warning: unused variable `start_addr'
arch/ppc64/mm/numa.c:66: warning: unused variable `end_addr'
Signed-off-by: Andrew Morton <akpm@osdl.org>
---
25-akpm/arch/ppc64/Kconfig | 10 ++++++++++
25-akpm/arch/ppc64/mm/numa.c | 6 +++---
2 files changed, 13 insertions(+), 3 deletions(-)
diff -puN arch/ppc64/Kconfig~x86-abstract-discontigmem-setup-ppc64-fix arch/ppc64/Kconfig
--- 25/arch/ppc64/Kconfig~x86-abstract-discontigmem-setup-ppc64-fix 2005-03-01 03:58:15.000000000 -0700
+++ 25-akpm/arch/ppc64/Kconfig 2005-03-01 03:58:15.000000000 -0700
@@ -203,6 +203,16 @@ config DISCONTIGMEM
bool "Discontiguous Memory Support"
depends on SMP && PPC_PSERIES
+config HAVE_MEMORY_PRESENT
+ bool
+ depends on DISCONTIGMEM
+ default y
+
+config NEED_NODE_MEMMAP_SIZE
+ bool
+ depends on DISCONTIGMEM
+ default y
+
config NUMA
bool "NUMA support"
depends on DISCONTIGMEM
diff -puN arch/ppc64/mm/numa.c~x86-abstract-discontigmem-setup-ppc64-fix arch/ppc64/mm/numa.c
--- 25/arch/ppc64/mm/numa.c~x86-abstract-discontigmem-setup-ppc64-fix 2005-03-01 03:58:37.000000000 -0700
+++ 25-akpm/arch/ppc64/mm/numa.c 2005-03-01 03:59:15.000000000 -0700
@@ -62,10 +62,10 @@ void memory_present(int nid, unsigned lo
unsigned long end_pfn)
{
unsigned long i;
- unsigned long start_addr = start << PAGE_SHIFT;
- unsigned long end_addr = end << PAGE_SHIFT;
+ unsigned long start_addr = start_pfn << PAGE_SHIFT;
+ unsigned long end_addr = end_pfn << PAGE_SHIFT;
- for (i = start ; i < end; i += MEMORY_INCREMENT)
+ for (i = start_addr; i < end_addr; i += MEMORY_INCREMENT)
numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = nid;
}
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 3/5] abstract discontigmem setup
2005-03-01 6:21 ` Andrew Morton
@ 2005-03-01 7:16 ` Dave Hansen
0 siblings, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2005-03-01 7:16 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-mm, kmannth, Linux Kernel Mailing List, Yasunori Goto,
Andy Whitcroft
[-- Attachment #1: Type: text/plain, Size: 845 bytes --]
On Mon, 2005-02-28 at 22:21 -0800, Andrew Morton wrote:
> Dave Hansen <haveblue@us.ibm.com> wrote:
> >
> > memory_present() is how each arch/subarch will tell sparsemem
> > and discontigmem where all of its memory is. This is what
> > triggers sparse to go out and create its mappings for the memory,
> > as well as allocate the mem_map[].
>
> There are cross-compilers at http://developer.osdl.org/dev/plm/cross_compile/
>
> This also needs runtime testing on ppc64, does it not?
It does, indeed. Because they are independent, we can drop the ppc64
portion for now, and we'll submit the tested changes at least before the
sparsemem merge for that arch.
I've attached the i386-only version, along with the NUMA-Q fix, which
replaces the original patch. I can also wait until the next -mm to
ensure that it is tested properly.
-- Dave
[-- Attachment #2: A3.1-abstract-discontig.patch --]
[-- Type: text/x-patch, Size: 7081 bytes --]
memory_present() is how each arch/subarch will tell sparsemem
and discontigmem where all of its memory is. This is what
triggers sparse to go out and create its mappings for the memory,
as well as allocate the mem_map[].
By: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
memhotplug-dave/arch/i386/Kconfig | 10 ++++++
memhotplug-dave/arch/i386/kernel/numaq.c | 8 ++++-
memhotplug-dave/arch/i386/kernel/srat.c | 9 +++++
memhotplug-dave/arch/i386/mm/discontig.c | 49 +++++++++++++++++++------------
memhotplug-dave/arch/ppc64/mm/numa.c | 19 ++++++++----
memhotplug-dave/include/linux/mmzone.h | 11 ++++++
6 files changed, 81 insertions(+), 25 deletions(-)
diff -puN arch/i386/kernel/numaq.c~A3.1-abstract-discontig arch/i386/kernel/numaq.c
--- memhotplug/arch/i386/kernel/numaq.c~A3.1-abstract-discontig 2005-02-28 22:42:18.000000000 -0800
+++ memhotplug-dave/arch/i386/kernel/numaq.c 2005-02-28 22:45:48.000000000 -0800
@@ -32,7 +32,7 @@
#include <asm/numaq.h>
/* These are needed before the pgdat's are created */
-extern long node_start_pfn[], node_end_pfn[];
+extern long node_start_pfn[], node_end_pfn[], node_remap_size[];
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
@@ -59,6 +59,12 @@ static void __init smp_dump_qct(void)
eq->hi_shrd_mem_start - eq->priv_mem_size);
node_end_pfn[node] = MB_TO_PAGES(
eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
+
+ memory_present(node,
+ node_start_pfn[node], node_end_pfn[node]);
+ node_remap_size[node] = node_memmap_size_bytes(node,
+ node_start_pfn[node],
+ node_end_pfn[node]);
}
}
}
diff -puN arch/i386/kernel/srat.c~A3.1-abstract-discontig arch/i386/kernel/srat.c
--- memhotplug/arch/i386/kernel/srat.c~A3.1-abstract-discontig 2005-02-28 22:42:18.000000000 -0800
+++ memhotplug-dave/arch/i386/kernel/srat.c 2005-02-28 22:45:48.000000000 -0800
@@ -58,7 +58,7 @@ static int num_memory_chunks; /* total
static int zholes_size_init;
static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
-extern unsigned long node_start_pfn[], node_end_pfn[];
+extern unsigned long node_start_pfn[], node_end_pfn[], node_remap_size[];
extern void * boot_ioremap(unsigned long, unsigned long);
@@ -286,6 +286,13 @@ static int __init acpi20_parse_srat(stru
}
}
}
+ for_each_online_node(nid) {
+ unsigned long start = node_start_pfn[nid];
+ unsigned long end = node_end_pfn[nid];
+
+ memory_present(nid, start, end);
+ node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
+ }
return 1;
out_fail:
return 0;
diff -puN arch/i386/mm/discontig.c~A3.1-abstract-discontig arch/i386/mm/discontig.c
--- memhotplug/arch/i386/mm/discontig.c~A3.1-abstract-discontig 2005-02-28 22:42:18.000000000 -0800
+++ memhotplug-dave/arch/i386/mm/discontig.c 2005-02-28 22:45:48.000000000 -0800
@@ -60,6 +60,32 @@ bootmem_data_t node0_bdata;
*/
s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1};
+void memory_present(int nid, unsigned long start, unsigned long end)
+{
+ unsigned long pfn;
+
+ printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n",
+ nid, start, end);
+ printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid);
+ printk(KERN_DEBUG " ");
+ for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
+ physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
+ printk(KERN_DEBUG "%ld ", pfn);
+ }
+ printk(KERN_DEBUG "\n");
+}
+
+unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long nr_pages = end_pfn - start_pfn;
+
+ if (!nr_pages)
+ return 0;
+
+ return (nr_pages + 1) * sizeof(struct page);
+}
+
unsigned long node_start_pfn[MAX_NUMNODES];
unsigned long node_end_pfn[MAX_NUMNODES];
@@ -162,9 +188,9 @@ static unsigned long calculate_numa_rema
for_each_online_node(nid) {
if (nid == 0)
continue;
- /* calculate the size of the mem_map needed in bytes */
- size = (node_end_pfn[nid] - node_start_pfn[nid] + 1)
- * sizeof(struct page) + sizeof(pg_data_t);
+ /* ensure the remap includes space for the pgdat. */
+ size = node_remap_size[nid] + sizeof(pg_data_t);
+
/* convert size to large (pmd size) pages, rounding up */
size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
/* now the roundup is correct, convert to PAGE_SIZE pages */
@@ -189,7 +215,7 @@ unsigned long __init setup_memory(void)
{
int nid;
unsigned long system_start_pfn, system_max_low_pfn;
- unsigned long reserve_pages, pfn;
+ unsigned long reserve_pages;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -198,22 +224,9 @@ unsigned long __init setup_memory(void)
* this space and use it to adjust the boundry between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
+ find_max_pfn();
get_memcfg_numa();
- /* Fill in the physnode_map */
- for_each_online_node(nid) {
- printk("Node: %d, start_pfn: %ld, end_pfn: %ld\n",
- nid, node_start_pfn[nid], node_end_pfn[nid]);
- printk(" Setting physnode_map array to node %d for pfns:\n ",
- nid);
- for (pfn = node_start_pfn[nid]; pfn < node_end_pfn[nid];
- pfn += PAGES_PER_ELEMENT) {
- physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
- printk("%ld ", pfn);
- }
- printk("\n");
- }
-
reserve_pages = calculate_numa_remap_pages();
/* partially used pages are not usable - thus round upwards */
diff -puN include/linux/mmzone.h~A3.1-abstract-discontig include/linux/mmzone.h
--- memhotplug/include/linux/mmzone.h~A3.1-abstract-discontig 2005-02-28 22:42:18.000000000 -0800
+++ memhotplug-dave/include/linux/mmzone.h 2005-02-28 22:43:10.000000000 -0800
@@ -11,6 +11,7 @@
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/numa.h>
+#include <linux/init.h>
#include <asm/atomic.h>
/* Free memory management - zoned buddy allocator. */
@@ -278,6 +279,16 @@ void wakeup_kswapd(struct zone *zone, in
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int alloc_type, int can_try_harder, int gfp_high);
+#ifdef CONFIG_HAVE_MEMORY_PRESENT
+void memory_present(int nid, unsigned long start, unsigned long end);
+#else
+static inline void memory_present(int nid, unsigned long start, unsigned long end) {}
+#endif
+
+#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
+unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
+#endif
+
/*
* zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
*/
diff -puN arch/i386/Kconfig~A3.1-abstract-discontig arch/i386/Kconfig
--- memhotplug/arch/i386/Kconfig~A3.1-abstract-discontig 2005-02-28 22:42:18.000000000 -0800
+++ memhotplug-dave/arch/i386/Kconfig 2005-02-28 22:43:10.000000000 -0800
@@ -769,6 +769,16 @@ config HAVE_ARCH_BOOTMEM_NODE
depends on NUMA
default y
+config HAVE_MEMORY_PRESENT
+ bool
+ depends on DISCONTIGMEM
+ default y
+
+config NEED_NODE_MEMMAP_SIZE
+ bool
+ depends on DISCONTIGMEM
+ default y
+
config HIGHPTE
bool "Allocate 3rd-level pagetables from highmem"
depends on HIGHMEM4G || HIGHMEM64G
_
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 3/5] abstract discontigmem setup
@ 2005-02-24 17:29 Dave Hansen
0 siblings, 0 replies; 5+ messages in thread
From: Dave Hansen @ 2005-02-24 17:29 UTC (permalink / raw)
To: linux-mm; +Cc: colpatch, kravetz, mbligh, anton, Dave Hansen, ygoto, apw
memory_present() is how each arch/subarch will tell sparsemem
and discontigmem where all of its memory is. This is what
triggers sparse to go out and create its mappings for the memory,
as well as allocate the mem_map[].
By: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---
sparse-dave/arch/i386/Kconfig | 10 +++++++
sparse-dave/arch/i386/kernel/numaq.c | 6 +++-
sparse-dave/arch/i386/kernel/srat.c | 9 +++++-
sparse-dave/arch/i386/mm/discontig.c | 49 ++++++++++++++++++++++-------------
sparse-dave/arch/ppc64/mm/numa.c | 19 ++++++++++---
sparse-dave/include/linux/mmzone.h | 11 +++++++
6 files changed, 79 insertions(+), 25 deletions(-)
diff -puN arch/i386/kernel/numaq.c~A3.1-abstract-discontig arch/i386/kernel/numaq.c
--- sparse/arch/i386/kernel/numaq.c~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/i386/kernel/numaq.c 2005-02-24 08:56:39.000000000 -0800
@@ -32,7 +32,7 @@
#include <asm/numaq.h>
/* These are needed before the pgdat's are created */
-extern long node_start_pfn[], node_end_pfn[];
+extern long node_start_pfn[], node_end_pfn[], node_remap_size[];
#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
@@ -59,6 +59,10 @@ static void __init smp_dump_qct(void)
eq->hi_shrd_mem_start - eq->priv_mem_size);
node_end_pfn[node] = MB_TO_PAGES(
eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
+
+ memory_present(node,
+ node_start_pfn[node], node_end_pfn[node]);
+ node_remap_size[node] = node_memmap_size_bytes(node);
}
}
}
diff -puN arch/i386/kernel/srat.c~A3.1-abstract-discontig arch/i386/kernel/srat.c
--- sparse/arch/i386/kernel/srat.c~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/i386/kernel/srat.c 2005-02-24 08:56:39.000000000 -0800
@@ -58,7 +58,7 @@ static int num_memory_chunks; /* total
static int zholes_size_init;
static unsigned long zholes_size[MAX_NUMNODES * MAX_NR_ZONES];
-extern unsigned long node_start_pfn[], node_end_pfn[];
+extern unsigned long node_start_pfn[], node_end_pfn[], node_remap_size[];
extern void * boot_ioremap(unsigned long, unsigned long);
@@ -286,6 +286,13 @@ static int __init acpi20_parse_srat(stru
}
}
}
+ for_each_online_node(nid) {
+ unsigned long start = node_start_pfn[nid];
+ unsigned long end = node_end_pfn[nid];
+
+ memory_present(nid, start, end);
+ node_remap_size[nid] = node_memmap_size_bytes(nid, start, end);
+ }
return 1;
out_fail:
return 0;
diff -puN arch/i386/mm/discontig.c~A3.1-abstract-discontig arch/i386/mm/discontig.c
--- sparse/arch/i386/mm/discontig.c~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/i386/mm/discontig.c 2005-02-24 08:56:39.000000000 -0800
@@ -60,6 +60,32 @@ bootmem_data_t node0_bdata;
*/
s8 physnode_map[MAX_ELEMENTS] = { [0 ... (MAX_ELEMENTS - 1)] = -1};
+void memory_present(int nid, unsigned long start, unsigned long end)
+{
+ unsigned long pfn;
+
+ printk(KERN_INFO "Node: %d, start_pfn: %ld, end_pfn: %ld\n",
+ nid, start, end);
+ printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid);
+ printk(KERN_DEBUG " ");
+ for (pfn = start; pfn < end; pfn += PAGES_PER_ELEMENT) {
+ physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
+ printk(KERN_DEBUG "%ld ", pfn);
+ }
+ printk(KERN_DEBUG "\n");
+}
+
+unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long nr_pages = end_pfn - start_pfn;
+
+ if (!nr_pages)
+ return 0;
+
+ return (nr_pages + 1) * sizeof(struct page);
+}
+
unsigned long node_start_pfn[MAX_NUMNODES];
unsigned long node_end_pfn[MAX_NUMNODES];
@@ -162,9 +188,9 @@ static unsigned long calculate_numa_rema
for_each_online_node(nid) {
if (nid == 0)
continue;
- /* calculate the size of the mem_map needed in bytes */
- size = (node_end_pfn[nid] - node_start_pfn[nid] + 1)
- * sizeof(struct page) + sizeof(pg_data_t);
+ /* ensure the remap includes space for the pgdat. */
+ size = node_remap_size[nid] + sizeof(pg_data_t);
+
/* convert size to large (pmd size) pages, rounding up */
size = (size + LARGE_PAGE_BYTES - 1) / LARGE_PAGE_BYTES;
/* now the roundup is correct, convert to PAGE_SIZE pages */
@@ -189,7 +215,7 @@ unsigned long __init setup_memory(void)
{
int nid;
unsigned long system_start_pfn, system_max_low_pfn;
- unsigned long reserve_pages, pfn;
+ unsigned long reserve_pages;
/*
* When mapping a NUMA machine we allocate the node_mem_map arrays
@@ -198,22 +224,9 @@ unsigned long __init setup_memory(void)
* this space and use it to adjust the boundry between ZONE_NORMAL
* and ZONE_HIGHMEM.
*/
+ find_max_pfn();
get_memcfg_numa();
- /* Fill in the physnode_map */
- for_each_online_node(nid) {
- printk("Node: %d, start_pfn: %ld, end_pfn: %ld\n",
- nid, node_start_pfn[nid], node_end_pfn[nid]);
- printk(" Setting physnode_map array to node %d for pfns:\n ",
- nid);
- for (pfn = node_start_pfn[nid]; pfn < node_end_pfn[nid];
- pfn += PAGES_PER_ELEMENT) {
- physnode_map[pfn / PAGES_PER_ELEMENT] = nid;
- printk("%ld ", pfn);
- }
- printk("\n");
- }
-
reserve_pages = calculate_numa_remap_pages();
/* partially used pages are not usable - thus round upwards */
diff -puN arch/ppc64/mm/numa.c~A3.1-abstract-discontig arch/ppc64/mm/numa.c
--- sparse/arch/ppc64/mm/numa.c~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/ppc64/mm/numa.c 2005-02-24 08:56:39.000000000 -0800
@@ -58,6 +58,17 @@ EXPORT_SYMBOL(numa_memory_lookup_table);
EXPORT_SYMBOL(numa_cpumask_lookup_table);
EXPORT_SYMBOL(nr_cpus_in_node);
+void memory_present(int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long i;
+ unsigned long start_addr = start << PAGE_SHIFT;
+ unsigned long end_addr = end << PAGE_SHIFT;
+
+ for (i = start ; i < end; i += MEMORY_INCREMENT)
+ numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = nid;
+}
+
static inline void map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
@@ -378,9 +389,8 @@ new_range:
size / PAGE_SIZE;
}
- for (i = start ; i < (start+size); i += MEMORY_INCREMENT)
- numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] =
- numa_domain;
+ memory_present(numa_domain, start >> PAGE_SHIFT,
+ (start + size) >> PAGE_SHIFT);
ranges--;
if (ranges)
@@ -428,8 +438,7 @@ static void __init setup_nonnuma(void)
init_node_data[0].node_start_pfn = 0;
init_node_data[0].node_spanned_pages = lmb_end_of_DRAM() / PAGE_SIZE;
- for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT)
- numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0;
+ memory_present(0, 0, init_node_data[0].node_spanned_pages);
node0_io_hole_size = top_of_ram - total_ram;
}
diff -puN include/linux/mmzone.h~A3.1-abstract-discontig include/linux/mmzone.h
--- sparse/include/linux/mmzone.h~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/include/linux/mmzone.h 2005-02-24 08:56:39.000000000 -0800
@@ -11,6 +11,7 @@
#include <linux/cache.h>
#include <linux/threads.h>
#include <linux/numa.h>
+#include <linux/init.h>
#include <asm/atomic.h>
/* Free memory management - zoned buddy allocator. */
@@ -278,6 +279,16 @@ void wakeup_kswapd(struct zone *zone, in
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int alloc_type, int can_try_harder, int gfp_high);
+#ifdef CONFIG_HAVE_MEMORY_PRESENT
+void memory_present(int nid, unsigned long start, unsigned long end);
+#else
+static inline void memory_present(int nid, unsigned long start, unsigned long end) {}
+#endif
+
+#ifdef CONFIG_NEED_NODE_MEMMAP_SIZE
+unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
+#endif
+
/*
* zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
*/
diff -puN arch/i386/Kconfig~A3.1-abstract-discontig arch/i386/Kconfig
--- sparse/arch/i386/Kconfig~A3.1-abstract-discontig 2005-02-24 08:56:39.000000000 -0800
+++ sparse-dave/arch/i386/Kconfig 2005-02-24 08:56:39.000000000 -0800
@@ -769,6 +769,16 @@ config HAVE_ARCH_BOOTMEM_NODE
depends on NUMA
default y
+config HAVE_MEMORY_PRESENT
+ bool
+ depends on DISCONTIGMEM
+ default y
+
+config NEED_NODE_MEMMAP_SIZE
+ bool
+ depends on DISCONTIGMEM
+ default y
+
config HIGHPTE
bool "Allocate 3rd-level pagetables from highmem"
depends on HIGHMEM4G || HIGHMEM64G
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2005-03-01 7:16 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-02-28 18:54 [PATCH 3/5] abstract discontigmem setup Dave Hansen
2005-02-28 23:30 ` Dave Hansen
2005-03-01 6:21 ` Andrew Morton
2005-03-01 7:16 ` Dave Hansen
-- strict thread matches above, loose matches on Subject: below --
2005-02-24 17:29 Dave Hansen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox