linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: linux-mm@kvack.org, clameter@engr.sgi.com, apw@shadowen.org
Subject: [RFC][PATCH] vmemmap on sparsemem v2 [1/5] generic vmemmap on sparsemem
Date: Tue, 5 Dec 2006 21:49:02 +0900	[thread overview]
Message-ID: <20061205214902.b8454d67.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20061205214517.5ad924f6.kamezawa.hiroyu@jp.fujitsu.com>

This patch implements arch-independent-part of virtuam mem_map for sparsemem.
memory-hotplug is not supproted. (supported by later patch.)

Declarations which an arch has to add to use vmem_map/sparsemem is

* declare 'struct page *vmem_map or vmem_map[] and setup its value.
* set ARCH_SPARSEMEM_VMEMMAP in Kconfig

maybe asm/sparsemem.h is suitable as ia64 patch(later) does.

We can assume that total size of mem_map per section is aligned to PAGE_SIZE.
By this, pfn_valid()(of sparsemem) works fine.

This code has its own page-mapping routine just because it has to be called
before page struct is available.

Consideration:
I know some people tries to use large page for vmem_map. It seems attractive
but this patch doesn't support hooks for that.
Maybe rewriting map_virtual_mem_map() is enough. (if you doesn't consider
memory hotplug.)
IMO, generic interface to map large pages in the kernel should be discussed
before doing such special hack.

Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

 include/linux/mmzone.h |    8 +++
 mm/Kconfig             |    9 ++++
 mm/sparse.c            |  101 ++++++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 113 insertions(+), 5 deletions(-)

Index: devel-2.6.19-rc6-mm2/mm/Kconfig
===================================================================
--- devel-2.6.19-rc6-mm2.orig/mm/Kconfig	2006-12-05 17:24:30.000000000 +0900
+++ devel-2.6.19-rc6-mm2/mm/Kconfig	2006-12-05 17:24:58.000000000 +0900
@@ -112,6 +112,15 @@
 	def_bool y
 	depends on SPARSEMEM && !SPARSEMEM_STATIC
 
+config SPARSEMEM_VMEMMAP
+	bool	"virtual memmap support for sparsemem"
+	depends on SPARSEMEM && !SPARSEMEM_STATIC && ARCH_SPARSEMEM_VMEMMAP
+	help
+	  If selected, sparsemem uses virtually contiguous address for mem_map.
+	  Some functions of sparsemem (pfn_to_page/page_to_pfn) can be very
+	  very simple and fast. But this will consume huge amount of virtual
+	  address space.
+
 # eventually, we can have this option just 'select SPARSEMEM'
 config MEMORY_HOTPLUG
 	bool "Allow for memory hot-add"
Index: devel-2.6.19-rc6-mm2/include/linux/mmzone.h
===================================================================
--- devel-2.6.19-rc6-mm2.orig/include/linux/mmzone.h	2006-12-05 17:24:28.000000000 +0900
+++ devel-2.6.19-rc6-mm2/include/linux/mmzone.h	2006-12-05 19:53:41.000000000 +0900
@@ -714,12 +714,23 @@
 #define SECTION_MAP_MASK	(~(SECTION_MAP_LAST_BIT-1))
 #define SECTION_NID_SHIFT	2
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+/*
+ * sparse_vmem_map_start is defined by each arch.
+ * vmem_map is declared by each arch.
+ */
+static inline struct page *__section_mem_map_addr(struct mem_section *section)
+{
+	return vmem_map;
+}
+#else
 static inline struct page *__section_mem_map_addr(struct mem_section *section)
 {
 	unsigned long map = section->section_mem_map;
 	map &= SECTION_MAP_MASK;
 	return (struct page *)map;
 }
+#endif
 
 static inline int valid_section(struct mem_section *section)
 {
Index: devel-2.6.19-rc6-mm2/mm/sparse.c
===================================================================
--- devel-2.6.19-rc6-mm2.orig/mm/sparse.c	2006-12-05 17:24:30.000000000 +0900
+++ devel-2.6.19-rc6-mm2/mm/sparse.c	2006-12-05 19:53:13.000000000 +0900
@@ -9,6 +9,7 @@
 #include <linux/spinlock.h>
 #include <linux/vmalloc.h>
 #include <asm/dma.h>
+#include <asm/pgalloc.h>
 
 /*
  * Permanent SPARSEMEM data:
@@ -99,6 +100,105 @@
 }
 #endif
 
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+
+static void* __init pte_alloc_vmem_map(int node)
+{
+	return alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE);
+}
+
+/*
+ * We can expect mem_map in section is always contigous.
+ */
+static unsigned long
+__init sparse_phys_mem_map_get(unsigned long section,
+				    unsigned long vmap,
+				    int node)
+{
+	struct mem_section *ms = __nr_to_section(section);
+	unsigned long map = ms->section_mem_map & SECTION_MAP_MASK;
+	unsigned long vmap_start;
+
+	vmap_start = (unsigned long)pfn_to_page(section_nr_to_pfn(section));
+
+	if (system_state == SYSTEM_BOOTING) {
+		unsigned long offset;
+		map = (unsigned long)((struct page*)(map) +
+				       section_nr_to_pfn(section));
+		offset = (vmap - vmap_start) >> PAGE_SHIFT;
+		map = __pa(map);
+		return (map >> PAGE_SHIFT) + offset;
+	}
+	BUG(); /* handled by memory hotplug */
+}
+
+/*
+ * map_pos(section,offset) returns pfn of physical address of mem_map
+ * in section at index. (see boot_memmap_pos()).
+ * Returns 1 if succeed.
+ */
+static int __meminit map_virtual_mem_map(unsigned long section, int node)
+{
+	unsigned long vmap_start, vmap_end, vmap;
+	void *pg;
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	vmap_start = (unsigned long)pfn_to_page(section_nr_to_pfn(section));
+	vmap_end = vmap_start + PAGES_PER_SECTION * sizeof(struct page);
+
+	for (vmap = vmap_start;
+	     vmap != vmap_end;
+	     vmap += PAGE_SIZE)
+	{
+		pgd = pgd_offset_k(vmap);
+		if (pgd_none(*pgd)) {
+			pg = pte_alloc_vmem_map(node);
+			if (!pg)
+				goto error_out;
+			pgd_populate(&init_mm, pgd, pg);
+		}
+		pud = pud_offset(pgd, vmap);
+		if (pud_none(*pud)) {
+			pg = pte_alloc_vmem_map(node);
+			if (!pg)
+				goto error_out;
+			pud_populate(&init_mm, pud, pg);
+		}
+		pmd = pmd_offset(pud, vmap);
+		if (pmd_none(*pmd)) {
+			pg = pte_alloc_vmem_map(node);
+			if (!pg)
+				goto error_out;
+			pmd_populate_kernel(&init_mm, pmd, pg);
+		}
+		pte = pte_offset_kernel(pmd, vmap);
+		if (pte_none(*pte)) {
+			unsigned long pfn;
+			pfn = sparse_phys_mem_map_get(section, vmap, node);
+			if (!pfn)
+				goto error_out;
+			set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
+		}
+	}
+	flush_cache_vmap(vmap_start, vmap_end);
+	return 1;
+error_out:
+	return -ENOMEM;
+}
+
+#else
+
+static inline int map_virtual_mem_map(int section, int node)
+{
+	return 1;
+}
+
+#endif
+
 /*
  * Although written for the SPARSEMEM_EXTREME case, this happens
  * to also work for the flat array case becase
@@ -198,15 +298,14 @@
 }
 
 static int sparse_init_one_section(struct mem_section *ms,
-		unsigned long pnum, struct page *mem_map)
+		unsigned long pnum, struct page *mem_map, int nid)
 {
 	if (!valid_section(ms))
 		return -EINVAL;
 
 	ms->section_mem_map &= ~SECTION_MAP_MASK;
 	ms->section_mem_map |= sparse_encode_mem_map(mem_map, pnum);
-
-	return 1;
+	return map_virtual_mem_map(pnum, nid);
 }
 
 static struct page *sparse_early_mem_map_alloc(unsigned long pnum)
@@ -284,7 +383,8 @@
 		map = sparse_early_mem_map_alloc(pnum);
 		if (!map)
 			continue;
-		sparse_init_one_section(__nr_to_section(pnum), pnum, map);
+		sparse_init_one_section(__nr_to_section(pnum), pnum, map,
+				sparse_early_nid(__nr_to_section(pnum)));
 	}
 }
 
@@ -319,7 +419,7 @@
 	}
 	ms->section_mem_map |= SECTION_MARKED_PRESENT;
 
-	ret = sparse_init_one_section(ms, section_nr, memmap);
+	ret = sparse_init_one_section(ms, section_nr, memmap, pgdat->node_id);
 
 out:
 	pgdat_resize_unlock(pgdat, &flags);

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2006-12-05 12:49 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-12-05 12:45 [RFC][PATCH] vmemmap on sparsemem v2 KAMEZAWA Hiroyuki
2006-12-05 12:49 ` KAMEZAWA Hiroyuki [this message]
2006-12-06 18:13   ` [RFC][PATCH] vmemmap on sparsemem v2 [1/5] generic vmemmap on sparsemem Heiko Carstens
2006-12-06 18:17     ` Christoph Lameter
2006-12-07  0:20       ` KAMEZAWA Hiroyuki
2006-12-07  0:20         ` Christoph Lameter
2006-12-07 10:11         ` Heiko Carstens
2006-12-07 10:50           ` KAMEZAWA Hiroyuki
2006-12-07 10:06       ` Heiko Carstens
2006-12-07 10:17         ` KAMEZAWA Hiroyuki
2006-12-08  3:06   ` KAMEZAWA Hiroyuki
2006-12-05 12:53 ` [RFC][PATCH] vmemmap on sparsemem v2 [2/5] memory hotplug support KAMEZAWA Hiroyuki
2006-12-05 12:59 ` [RFC][PATCH] vmemmap on sparsemem v2 [3/5] ia64 vmemamp on sparsemem KAMEZAWA Hiroyuki
2006-12-08  1:09   ` KAMEZAWA Hiroyuki
2006-12-05 13:09 ` [RFC][PATCH] vmemmap on sparsemem v2 [4/5] optimized pfn_valid KAMEZAWA Hiroyuki
2006-12-05 13:10 ` [RFC][PATCH] vmemmap on sparsemem v2 [5/5] optimzied pfn_valid support for ia64 KAMEZAWA Hiroyuki
2006-12-10 13:37 ` [RFC][PATCH] vmemmap on sparsemem v2 Andy Whitcroft
2006-12-10 15:19   ` Heiko Carstens
2006-12-11  1:09     ` KAMEZAWA Hiroyuki
2006-12-11 17:23     ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061205214902.b8454d67.kamezawa.hiroyu@jp.fujitsu.com \
    --to=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=apw@shadowen.org \
    --cc=clameter@engr.sgi.com \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox