From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: linux-mm@kvack.org, clameter@engr.sgi.com, apw@shadowen.org
Subject: [RFC][PATCH] vmemmap on sparsemem v2 [1/5] generic vmemmap on sparsemem
Date: Tue, 5 Dec 2006 21:49:02 +0900 [thread overview]
Message-ID: <20061205214902.b8454d67.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20061205214517.5ad924f6.kamezawa.hiroyu@jp.fujitsu.com>
This patch implements arch-independent-part of virtuam mem_map for sparsemem.
memory-hotplug is not supproted. (supported by later patch.)
Declarations which an arch has to add to use vmem_map/sparsemem is
* declare 'struct page *vmem_map or vmem_map[] and setup its value.
* set ARCH_SPARSEMEM_VMEMMAP in Kconfig
maybe asm/sparsemem.h is suitable as ia64 patch(later) does.
We can assume that total size of mem_map per section is aligned to PAGE_SIZE.
By this, pfn_valid()(of sparsemem) works fine.
This code has its own page-mapping routine just because it has to be called
before page struct is available.
Consideration:
I know some people tries to use large page for vmem_map. It seems attractive
but this patch doesn't support hooks for that.
Maybe rewriting map_virtual_mem_map() is enough. (if you doesn't consider
memory hotplug.)
IMO, generic interface to map large pages in the kernel should be discussed
before doing such special hack.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
include/linux/mmzone.h | 8 +++
mm/Kconfig | 9 ++++
mm/sparse.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++---
3 files changed, 113 insertions(+), 5 deletions(-)
Index: devel-2.6.19-rc6-mm2/mm/Kconfig
===================================================================
--- devel-2.6.19-rc6-mm2.orig/mm/Kconfig 2006-12-05 17:24:30.000000000 +0900
+++ devel-2.6.19-rc6-mm2/mm/Kconfig 2006-12-05 17:24:58.000000000 +0900
@@ -112,6 +112,15 @@
def_bool y
depends on SPARSEMEM && !SPARSEMEM_STATIC
+config SPARSEMEM_VMEMMAP
+ bool "virtual memmap support for sparsemem"
+ depends on SPARSEMEM && !SPARSEMEM_STATIC && ARCH_SPARSEMEM_VMEMMAP
+ help
+ If selected, sparsemem uses virtually contiguous address for mem_map.
+ Some functions of sparsemem (pfn_to_page/page_to_pfn) can be very
+ very simple and fast. But this will consume huge amount of virtual
+ address space.
+
# eventually, we can have this option just 'select SPARSEMEM'
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
Index: devel-2.6.19-rc6-mm2/include/linux/mmzone.h
===================================================================
--- devel-2.6.19-rc6-mm2.orig/include/linux/mmzone.h 2006-12-05 17:24:28.000000000 +0900
+++ devel-2.6.19-rc6-mm2/include/linux/mmzone.h 2006-12-05 19:53:41.000000000 +0900
@@ -714,12 +714,23 @@
#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
#define SECTION_NID_SHIFT 2
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * sparse_vmem_map_start is defined by each arch.
+ * vmem_map is declared by each arch.
+ */
+static inline struct page *__section_mem_map_addr(struct mem_section *section)
+{
+ return vmem_map;
+}
+#else
static inline struct page *__section_mem_map_addr(struct mem_section *section)
{
unsigned long map = section->section_mem_map;
map &= SECTION_MAP_MASK;
return (struct page *)map;
}
+#endif
static inline int valid_section(struct mem_section *section)
{
Index: devel-2.6.19-rc6-mm2/mm/sparse.c
===================================================================
--- devel-2.6.19-rc6-mm2.orig/mm/sparse.c 2006-12-05 17:24:30.000000000 +0900
+++ devel-2.6.19-rc6-mm2/mm/sparse.c 2006-12-05 19:53:13.000000000 +0900
@@ -9,6 +9,7 @@
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <asm/dma.h>
+#include <asm/pgalloc.h>
/*
* Permanent SPARSEMEM data:
@@ -99,6 +100,105 @@
}
#endif
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+
+static void* __init pte_alloc_vmem_map(int node)
+{
+ return alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE);
+}
+
+/*
+ * We can expect mem_map in section is always contigous.
+ */
+static unsigned long
+__init sparse_phys_mem_map_get(unsigned long section,
+ unsigned long vmap,
+ int node)
+{
+ struct mem_section *ms = __nr_to_section(section);
+ unsigned long map = ms->section_mem_map & SECTION_MAP_MASK;
+ unsigned long vmap_start;
+
+ vmap_start = (unsigned long)pfn_to_page(section_nr_to_pfn(section));
+
+ if (system_state == SYSTEM_BOOTING) {
+ unsigned long offset;
+ map = (unsigned long)((struct page*)(map) +
+ section_nr_to_pfn(section));
+ offset = (vmap - vmap_start) >> PAGE_SHIFT;
+ map = __pa(map);
+ return (map >> PAGE_SHIFT) + offset;
+ }
+ BUG(); /* handled by memory hotplug */
+}
+
+/*
+ * map_pos(section,offset) returns pfn of physical address of mem_map
+ * in section at index. (see boot_memmap_pos()).
+ * Returns 1 if succeed.
+ */
+static int __meminit map_virtual_mem_map(unsigned long section, int node)
+{
+ unsigned long vmap_start, vmap_end, vmap;
+ void *pg;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ vmap_start = (unsigned long)pfn_to_page(section_nr_to_pfn(section));
+ vmap_end = vmap_start + PAGES_PER_SECTION * sizeof(struct page);
+
+ for (vmap = vmap_start;
+ vmap != vmap_end;
+ vmap += PAGE_SIZE)
+ {
+ pgd = pgd_offset_k(vmap);
+ if (pgd_none(*pgd)) {
+ pg = pte_alloc_vmem_map(node);
+ if (!pg)
+ goto error_out;
+ pgd_populate(&init_mm, pgd, pg);
+ }
+ pud = pud_offset(pgd, vmap);
+ if (pud_none(*pud)) {
+ pg = pte_alloc_vmem_map(node);
+ if (!pg)
+ goto error_out;
+ pud_populate(&init_mm, pud, pg);
+ }
+ pmd = pmd_offset(pud, vmap);
+ if (pmd_none(*pmd)) {
+ pg = pte_alloc_vmem_map(node);
+ if (!pg)
+ goto error_out;
+ pmd_populate_kernel(&init_mm, pmd, pg);
+ }
+ pte = pte_offset_kernel(pmd, vmap);
+ if (pte_none(*pte)) {
+ unsigned long pfn;
+ pfn = sparse_phys_mem_map_get(section, vmap, node);
+ if (!pfn)
+ goto error_out;
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+ }
+ }
+ flush_cache_vmap(vmap_start, vmap_end);
+ return 1;
+error_out:
+ return -ENOMEM;
+}
+
+#else
+
+static inline int map_virtual_mem_map(int section, int node)
+{
+ return 1;
+}
+
+#endif
+
/*
* Although written for the SPARSEMEM_EXTREME case, this happens
* to also work for the flat array case becase
@@ -198,15 +298,14 @@
}
static int sparse_init_one_section(struct mem_section *ms,
- unsigned long pnum, struct page *mem_map)
+ unsigned long pnum, struct page *mem_map, int nid)
{
if (!valid_section(ms))
return -EINVAL;
ms->section_mem_map &= ~SECTION_MAP_MASK;
ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum);
-
- return 1;
+ return map_virtual_mem_map(pnum, nid);
}
static struct page *sparse_early_mem_map_alloc(unsigned long pnum)
@@ -284,7 +383,8 @@
map = sparse_early_mem_map_alloc(pnum);
if (!map)
continue;
- sparse_init_one_section(__nr_to_section(pnum), pnum, map);
+ sparse_init_one_section(__nr_to_section(pnum), pnum, map,
+ sparse_early_nid(__nr_to_section(pnum)));
}
}
@@ -319,7 +419,7 @@
}
ms->section_mem_map |= SECTION_MARKED_PRESENT;
- ret = sparse_init_one_section(ms, section_nr, memmap);
+ ret = sparse_init_one_section(ms, section_nr, memmap, pgdat->node_id);
out:
pgdat_resize_unlock(pgdat, &flags);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2006-12-05 12:49 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-12-05 12:45 [RFC][PATCH] vmemmap on sparsemem v2 KAMEZAWA Hiroyuki
2006-12-05 12:49 ` KAMEZAWA Hiroyuki [this message]
2006-12-06 18:13 ` [RFC][PATCH] vmemmap on sparsemem v2 [1/5] generic vmemmap on sparsemem Heiko Carstens
2006-12-06 18:17 ` Christoph Lameter
2006-12-07 0:20 ` KAMEZAWA Hiroyuki
2006-12-07 0:20 ` Christoph Lameter
2006-12-07 10:11 ` Heiko Carstens
2006-12-07 10:50 ` KAMEZAWA Hiroyuki
2006-12-07 10:06 ` Heiko Carstens
2006-12-07 10:17 ` KAMEZAWA Hiroyuki
2006-12-08 3:06 ` KAMEZAWA Hiroyuki
2006-12-05 12:53 ` [RFC][PATCH] vmemmap on sparsemem v2 [2/5] memory hotplug support KAMEZAWA Hiroyuki
2006-12-05 12:59 ` [RFC][PATCH] vmemmap on sparsemem v2 [3/5] ia64 vmemamp on sparsemem KAMEZAWA Hiroyuki
2006-12-08 1:09 ` KAMEZAWA Hiroyuki
2006-12-05 13:09 ` [RFC][PATCH] vmemmap on sparsemem v2 [4/5] optimized pfn_valid KAMEZAWA Hiroyuki
2006-12-05 13:10 ` [RFC][PATCH] vmemmap on sparsemem v2 [5/5] optimzied pfn_valid support for ia64 KAMEZAWA Hiroyuki
2006-12-10 13:37 ` [RFC][PATCH] vmemmap on sparsemem v2 Andy Whitcroft
2006-12-10 15:19 ` Heiko Carstens
2006-12-11 1:09 ` KAMEZAWA Hiroyuki
2006-12-11 17:23 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061205214902.b8454d67.kamezawa.hiroyu@jp.fujitsu.com \
--to=kamezawa.hiroyu@jp.fujitsu.com \
--cc=apw@shadowen.org \
--cc=clameter@engr.sgi.com \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox