* [Patch 001/005](memory hotplug) register sectin/node id to free
2008-04-03 5:37 [Patch 000/005](memory hotplug) freeing pages allocated by bootmem for hotremove v2 Yasunori Goto
@ 2008-04-03 5:39 ` Yasunori Goto
2008-04-03 5:40 ` [Patch 002/005](memory hotplug) align memmap to page size Yasunori Goto
` (3 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Yasunori Goto @ 2008-04-03 5:39 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: Andrew Morton, Linux Kernel ML, Yinghai Lu, linux-mm
This is to register information which is node or section's id.
Kernel can distinguish which node/section uses the pages
allcated by bootmem. This is basis for hot-remove sections or nodes.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
include/linux/memory_hotplug.h | 18 +++++++
include/linux/mmzone.h | 1
mm/bootmem.c | 1
mm/memory_hotplug.c | 97 ++++++++++++++++++++++++++++++++++++++++-
mm/sparse.c | 3 -
5 files changed, 117 insertions(+), 3 deletions(-)
Index: current/mm/bootmem.c
===================================================================
--- current.orig/mm/bootmem.c 2008-03-28 20:04:34.000000000 +0900
+++ current/mm/bootmem.c 2008-04-01 20:02:01.000000000 +0900
@@ -458,6 +458,7 @@
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
{
+ register_page_bootmem_info_node(pgdat);
return free_all_bootmem_core(pgdat);
}
Index: current/include/linux/memory_hotplug.h
===================================================================
--- current.orig/include/linux/memory_hotplug.h 2008-03-28 20:04:34.000000000 +0900
+++ current/include/linux/memory_hotplug.h 2008-03-28 20:04:37.000000000 +0900
@@ -11,6 +11,15 @@
struct mem_section;
#ifdef CONFIG_MEMORY_HOTPLUG
+
+/*
+ * Magic number for free bootmem.
+ * The normal smallest mapcount is -1. Here is smaller value than it.
+ */
+#define SECTION_INFO 0xfffffffe
+#define MIX_INFO 0xfffffffd
+#define NODE_INFO 0xfffffffc
+
/*
* pgdat resizing functions
*/
@@ -145,6 +154,9 @@
#endif /* CONFIG_NUMA */
#endif /* CONFIG_HAVE_ARCH_NODEDATA_EXTENSION */
+extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
+extern void put_page_bootmem(struct page *page);
+
#else /* ! CONFIG_MEMORY_HOTPLUG */
/*
* Stub functions for when hotplug is off
@@ -172,6 +184,10 @@
return -ENOSYS;
}
+static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
+{
+}
+
#endif /* ! CONFIG_MEMORY_HOTPLUG */
#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -192,5 +208,7 @@
extern int sparse_add_one_section(struct zone *zone, unsigned long start_pfn,
int nr_pages);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms);
+extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
+ unsigned long pnum);
#endif /* __LINUX_MEMORY_HOTPLUG_H */
Index: current/include/linux/mmzone.h
===================================================================
--- current.orig/include/linux/mmzone.h 2008-03-28 20:04:34.000000000 +0900
+++ current/include/linux/mmzone.h 2008-03-28 20:04:37.000000000 +0900
@@ -938,6 +938,7 @@
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
}
extern int __section_nr(struct mem_section* ms);
+extern unsigned long usemap_size(void);
/*
* We use the lower bits of the mem_map pointer to store
Index: current/mm/memory_hotplug.c
===================================================================
--- current.orig/mm/memory_hotplug.c 2008-03-28 20:04:34.000000000 +0900
+++ current/mm/memory_hotplug.c 2008-04-01 18:03:02.000000000 +0900
@@ -59,8 +59,103 @@
return;
}
-
#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
+static void get_page_bootmem(unsigned long info, struct page *page, int magic)
+{
+ atomic_set(&page->_mapcount, magic);
+ SetPagePrivate(page);
+ set_page_private(page, info);
+ atomic_inc(&page->_count);
+}
+
+void put_page_bootmem(struct page *page)
+{
+ int magic;
+
+ magic = atomic_read(&page->_mapcount);
+ BUG_ON(magic >= -1);
+
+ if (atomic_dec_return(&page->_count) == 1) {
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ reset_page_mapcount(page);
+ __free_pages_bootmem(page, 0);
+ }
+
+}
+
+void register_page_bootmem_info_section(unsigned long start_pfn)
+{
+ unsigned long *usemap, mapsize, section_nr, i;
+ struct mem_section *ms;
+ struct page *page, *memmap;
+
+ if (!pfn_valid(start_pfn))
+ return;
+
+ section_nr = pfn_to_section_nr(start_pfn);
+ ms = __nr_to_section(section_nr);
+
+ /* Get section's memmap address */
+ memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
+
+ /*
+ * Get page for the memmap's phys address
+ * XXX: need more consideration for sparse_vmemmap...
+ */
+ page = virt_to_page(memmap);
+ mapsize = sizeof(struct page) * PAGES_PER_SECTION;
+ mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
+
+ /* remember memmap's page */
+ for (i = 0; i < mapsize; i++, page++)
+ get_page_bootmem(section_nr, page, SECTION_INFO);
+
+ usemap = __nr_to_section(section_nr)->pageblock_flags;
+ page = virt_to_page(usemap);
+
+ mapsize = PAGE_ALIGN(usemap_size()) >> PAGE_SHIFT;
+
+ for (i = 0; i < mapsize; i++, page++)
+ get_page_bootmem(section_nr, page, MIX_INFO);
+
+}
+
+void register_page_bootmem_info_node(struct pglist_data *pgdat)
+{
+ unsigned long i, pfn, end_pfn, nr_pages;
+ int node = pgdat->node_id;
+ struct page *page;
+ struct zone *zone;
+
+ nr_pages = PAGE_ALIGN(sizeof(struct pglist_data)) >> PAGE_SHIFT;
+ page = virt_to_page(pgdat);
+
+ for (i = 0; i < nr_pages; i++, page++)
+ get_page_bootmem(node, page, NODE_INFO);
+
+ zone = &pgdat->node_zones[0];
+ for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
+ if (zone->wait_table) {
+ nr_pages = zone->wait_table_hash_nr_entries
+ * sizeof(wait_queue_head_t);
+ nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
+ page = virt_to_page(zone->wait_table);
+
+ for (i = 0; i < nr_pages; i++, page++)
+ get_page_bootmem(node, page, NODE_INFO);
+ }
+ }
+
+ pfn = pgdat->node_start_pfn;
+ end_pfn = pfn + pgdat->node_spanned_pages;
+
+ /* register_section info */
+ for (; pfn < end_pfn; pfn += PAGES_PER_SECTION)
+ register_page_bootmem_info_section(pfn);
+
+}
+
static int __add_zone(struct zone *zone, unsigned long phys_start_pfn)
{
struct pglist_data *pgdat = zone->zone_pgdat;
Index: current/mm/sparse.c
===================================================================
--- current.orig/mm/sparse.c 2008-03-28 20:04:34.000000000 +0900
+++ current/mm/sparse.c 2008-04-01 20:02:05.000000000 +0900
@@ -200,7 +200,6 @@
/*
* Decode mem_map from the coded memmap
*/
-static
struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum)
{
/* mask off the extra low bits of information */
@@ -223,7 +222,7 @@
return 1;
}
-static unsigned long usemap_size(void)
+unsigned long usemap_size(void)
{
unsigned long size_bytes;
size_bytes = roundup(SECTION_BLOCKFLAGS_BITS, 8) / 8;
--
Yasunori Goto
^ permalink raw reply [flat|nested] 11+ messages in thread* [Patch 002/005](memory hotplug) align memmap to page size
2008-04-03 5:37 [Patch 000/005](memory hotplug) freeing pages allocated by bootmem for hotremove v2 Yasunori Goto
2008-04-03 5:39 ` [Patch 001/005](memory hotplug) register sectin/node id to free Yasunori Goto
@ 2008-04-03 5:40 ` Yasunori Goto
2008-04-03 5:41 ` [Patch 003/005](memory hotplug) make alloc_bootmem_section() Yasunori Goto
` (2 subsequent siblings)
4 siblings, 0 replies; 11+ messages in thread
From: Yasunori Goto @ 2008-04-03 5:40 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: Andrew Morton, Linux Kernel ML, Yinghai Lu, linux-mm
To free memmap easier, this patch aligns it to page size.
Bootmem allocater may mix some objects in one pages.
It's not good for freeing memmap of memory hot-remove.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
mm/sparse.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: current/mm/sparse.c
===================================================================
--- current.orig/mm/sparse.c 2008-04-01 20:56:45.000000000 +0900
+++ current/mm/sparse.c 2008-04-01 20:58:52.000000000 +0900
@@ -263,8 +263,8 @@
if (map)
return map;
- map = alloc_bootmem_node(NODE_DATA(nid),
- sizeof(struct page) * PAGES_PER_SECTION);
+ map = alloc_bootmem_pages_node(NODE_DATA(nid),
+ PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION));
return map;
}
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
--
Yasunori Goto
^ permalink raw reply [flat|nested] 11+ messages in thread* [Patch 003/005](memory hotplug) make alloc_bootmem_section()
2008-04-03 5:37 [Patch 000/005](memory hotplug) freeing pages allocated by bootmem for hotremove v2 Yasunori Goto
2008-04-03 5:39 ` [Patch 001/005](memory hotplug) register sectin/node id to free Yasunori Goto
2008-04-03 5:40 ` [Patch 002/005](memory hotplug) align memmap to page size Yasunori Goto
@ 2008-04-03 5:41 ` Yasunori Goto
2008-04-03 5:52 ` Yinghai Lu
2008-04-03 5:44 ` [Patch 004/005](memory hotplug)allocate usemap on the section with pgdat Yasunori Goto
2008-04-03 5:45 ` [Patch 005/005](memory hotplug) free memmaps allocated by bootmem Yasunori Goto
4 siblings, 1 reply; 11+ messages in thread
From: Yasunori Goto @ 2008-04-03 5:41 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: Andrew Morton, Linux Kernel ML, Yinghai Lu, linux-mm
alloc_bootmem_section() can allocate specified section's area.
This is used for usemap to keep same section with pgdat by later patch.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
include/linux/bootmem.h | 2 ++
mm/bootmem.c | 25 +++++++++++++++++++++++++
2 files changed, 27 insertions(+)
Index: current/include/linux/bootmem.h
===================================================================
--- current.orig/include/linux/bootmem.h 2008-04-01 20:56:39.000000000 +0900
+++ current/include/linux/bootmem.h 2008-04-01 20:59:02.000000000 +0900
@@ -101,6 +101,8 @@
extern void free_bootmem_node(pg_data_t *pgdat,
unsigned long addr,
unsigned long size);
+extern void *alloc_bootmem_section(unsigned long size,
+ unsigned long section_nr);
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
#define alloc_bootmem_node(pgdat, x) \
Index: current/mm/bootmem.c
===================================================================
--- current.orig/mm/bootmem.c 2008-04-01 20:56:39.000000000 +0900
+++ current/mm/bootmem.c 2008-04-01 20:59:02.000000000 +0900
@@ -540,6 +540,31 @@
return __alloc_bootmem(size, align, goal);
}
+void * __init alloc_bootmem_section(unsigned long size,
+ unsigned long section_nr)
+{
+ void *ptr;
+ unsigned long limit, goal, start_nr, end_nr, pfn;
+ struct pglist_data *pgdat;
+
+ pfn = section_nr_to_pfn(section_nr);
+ goal = PFN_PHYS(pfn);
+ limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
+ pgdat = NODE_DATA(early_pfn_to_nid(pfn));
+ ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
+ limit);
+
+ start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr)));
+ end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size));
+ if (start_nr != section_nr || end_nr != section_nr) {
+ printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n",
+ section_nr);
+ free_bootmem_core(pgdat->bdata, __pa(ptr), size);
+ ptr = NULL;
+ }
+
+ return ptr;
+}
#ifndef ARCH_LOW_ADDRESS_LIMIT
#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
#endif
--
Yasunori Goto
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [Patch 003/005](memory hotplug) make alloc_bootmem_section()
2008-04-03 5:41 ` [Patch 003/005](memory hotplug) make alloc_bootmem_section() Yasunori Goto
@ 2008-04-03 5:52 ` Yinghai Lu
2008-04-03 6:07 ` Yasunori Goto
0 siblings, 1 reply; 11+ messages in thread
From: Yinghai Lu @ 2008-04-03 5:52 UTC (permalink / raw)
To: Yasunori Goto; +Cc: Badari Pulavarty, Andrew Morton, Linux Kernel ML, linux-mm
On Wed, Apr 2, 2008 at 10:41 PM, Yasunori Goto <y-goto@jp.fujitsu.com> wrote:
> alloc_bootmem_section() can allocate specified section's area.
> This is used for usemap to keep same section with pgdat by later patch.
>
> Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
>
> include/linux/bootmem.h | 2 ++
> mm/bootmem.c | 25 +++++++++++++++++++++++++
> 2 files changed, 27 insertions(+)
>
> Index: current/include/linux/bootmem.h
> ===================================================================
> --- current.orig/include/linux/bootmem.h 2008-04-01 20:56:39.000000000 +0900
> +++ current/include/linux/bootmem.h 2008-04-01 20:59:02.000000000 +0900
> @@ -101,6 +101,8 @@
> extern void free_bootmem_node(pg_data_t *pgdat,
> unsigned long addr,
> unsigned long size);
> +extern void *alloc_bootmem_section(unsigned long size,
> + unsigned long section_nr);
>
> #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
> #define alloc_bootmem_node(pgdat, x) \
> Index: current/mm/bootmem.c
> ===================================================================
> --- current.orig/mm/bootmem.c 2008-04-01 20:56:39.000000000 +0900
> +++ current/mm/bootmem.c 2008-04-01 20:59:02.000000000 +0900
> @@ -540,6 +540,31 @@
> return __alloc_bootmem(size, align, goal);
> }
>
> +void * __init alloc_bootmem_section(unsigned long size,
> + unsigned long section_nr)
> +{
> + void *ptr;
> + unsigned long limit, goal, start_nr, end_nr, pfn;
> + struct pglist_data *pgdat;
> +
> + pfn = section_nr_to_pfn(section_nr);
> + goal = PFN_PHYS(pfn);
> + limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
> + pgdat = NODE_DATA(early_pfn_to_nid(pfn));
> + ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
> + limit);
> +
> + start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr)));
> + end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size));
> + if (start_nr != section_nr || end_nr != section_nr) {
> + printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n",
> + section_nr);
> + free_bootmem_core(pgdat->bdata, __pa(ptr), size);
> + ptr = NULL;
> + }
how about __alloc_bootmem_core return NULL?
YH
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [Patch 003/005](memory hotplug) make alloc_bootmem_section()
2008-04-03 5:52 ` Yinghai Lu
@ 2008-04-03 6:07 ` Yasunori Goto
0 siblings, 0 replies; 11+ messages in thread
From: Yasunori Goto @ 2008-04-03 6:07 UTC (permalink / raw)
To: Yinghai Lu; +Cc: Badari Pulavarty, Andrew Morton, Linux Kernel ML, linux-mm
> On Wed, Apr 2, 2008 at 10:41 PM, Yasunori Goto <y-goto@jp.fujitsu.com> wrote:
> > alloc_bootmem_section() can allocate specified section's area.
> > This is used for usemap to keep same section with pgdat by later patch.
> >
> > Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
> >
> > include/linux/bootmem.h | 2 ++
> > mm/bootmem.c | 25 +++++++++++++++++++++++++
> > 2 files changed, 27 insertions(+)
> >
> > Index: current/include/linux/bootmem.h
> > ===================================================================
> > --- current.orig/include/linux/bootmem.h 2008-04-01 20:56:39.000000000 +0900
> > +++ current/include/linux/bootmem.h 2008-04-01 20:59:02.000000000 +0900
> > @@ -101,6 +101,8 @@
> > extern void free_bootmem_node(pg_data_t *pgdat,
> > unsigned long addr,
> > unsigned long size);
> > +extern void *alloc_bootmem_section(unsigned long size,
> > + unsigned long section_nr);
> >
> > #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
> > #define alloc_bootmem_node(pgdat, x) \
> > Index: current/mm/bootmem.c
> > ===================================================================
> > --- current.orig/mm/bootmem.c 2008-04-01 20:56:39.000000000 +0900
> > +++ current/mm/bootmem.c 2008-04-01 20:59:02.000000000 +0900
> > @@ -540,6 +540,31 @@
> > return __alloc_bootmem(size, align, goal);
> > }
> >
> > +void * __init alloc_bootmem_section(unsigned long size,
> > + unsigned long section_nr)
> > +{
> > + void *ptr;
> > + unsigned long limit, goal, start_nr, end_nr, pfn;
> > + struct pglist_data *pgdat;
> > +
> > + pfn = section_nr_to_pfn(section_nr);
> > + goal = PFN_PHYS(pfn);
> > + limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
> > + pgdat = NODE_DATA(early_pfn_to_nid(pfn));
> > + ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
> > + limit);
> > +
> > + start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr)));
> > + end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size));
> > + if (start_nr != section_nr || end_nr != section_nr) {
> > + printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n",
> > + section_nr);
> > + free_bootmem_core(pgdat->bdata, __pa(ptr), size);
> > + ptr = NULL;
> > + }
>
> how about __alloc_bootmem_core return NULL?
Oops. Yes, it should be checked.
Thanks for your review.
Here is fixed version.
----
alloc_bootmem_section() can allocate specified section's area.
This is used for usemap to keep same section with pgdat by later patch.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
---
include/linux/bootmem.h | 2 ++
mm/bootmem.c | 31 +++++++++++++++++++++++++++++++
2 files changed, 33 insertions(+)
Index: current/include/linux/bootmem.h
===================================================================
--- current.orig/include/linux/bootmem.h 2008-04-03 12:54:06.000000000 +0900
+++ current/include/linux/bootmem.h 2008-04-03 14:48:57.000000000 +0900
@@ -101,6 +101,8 @@
extern void free_bootmem_node(pg_data_t *pgdat,
unsigned long addr,
unsigned long size);
+extern void *alloc_bootmem_section(unsigned long size,
+ unsigned long section_nr);
#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
#define alloc_bootmem_node(pgdat, x) \
Index: current/mm/bootmem.c
===================================================================
--- current.orig/mm/bootmem.c 2008-04-03 14:48:55.000000000 +0900
+++ current/mm/bootmem.c 2008-04-03 15:44:04.000000000 +0900
@@ -540,6 +540,37 @@
return __alloc_bootmem(size, align, goal);
}
+#ifdef CONFIG_SPARSEMEM
+void * __init alloc_bootmem_section(unsigned long size,
+ unsigned long section_nr)
+{
+ void *ptr;
+ unsigned long limit, goal, start_nr, end_nr, pfn;
+ struct pglist_data *pgdat;
+
+ pfn = section_nr_to_pfn(section_nr);
+ goal = PFN_PHYS(pfn);
+ limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1;
+ pgdat = NODE_DATA(early_pfn_to_nid(pfn));
+ ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal,
+ limit);
+
+ if (!ptr)
+ return NULL;
+
+ start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr)));
+ end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size));
+ if (start_nr != section_nr || end_nr != section_nr) {
+ printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n",
+ section_nr);
+ free_bootmem_core(pgdat->bdata, __pa(ptr), size);
+ ptr = NULL;
+ }
+
+ return ptr;
+}
+#endif
+
#ifndef ARCH_LOW_ADDRESS_LIMIT
#define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
#endif
--
Yasunori Goto
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Patch 004/005](memory hotplug)allocate usemap on the section with pgdat
2008-04-03 5:37 [Patch 000/005](memory hotplug) freeing pages allocated by bootmem for hotremove v2 Yasunori Goto
` (2 preceding siblings ...)
2008-04-03 5:41 ` [Patch 003/005](memory hotplug) make alloc_bootmem_section() Yasunori Goto
@ 2008-04-03 5:44 ` Yasunori Goto
2008-04-03 5:45 ` [Patch 005/005](memory hotplug) free memmaps allocated by bootmem Yasunori Goto
4 siblings, 0 replies; 11+ messages in thread
From: Yasunori Goto @ 2008-04-03 5:44 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: Andrew Morton, Linux Kernel ML, Yinghai Lu, linux-mm
Usemaps are allocated on the section which has pgdat by this.
Because usemap size is very small, many usemaps for sections
are allocated on only one page. The page will be quite hard to
be removed until removing all other sections.
This dependency is not desirable for memory removing.
Pgdat has similar feature. If sections has pgdat area, it
must be the last section for removing on the node.
This is to collect the cause pages of its dependency on one section.
If other sections doesn't have any dependency, this section will
be able to be removed finally.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
mm/sparse.c | 16 +++++++++++++---
1 file changed, 13 insertions(+), 3 deletions(-)
Index: current/mm/sparse.c
===================================================================
--- current.orig/mm/sparse.c 2008-04-01 20:59:07.000000000 +0900
+++ current/mm/sparse.c 2008-04-01 20:59:09.000000000 +0900
@@ -238,13 +238,23 @@
}
#endif /* CONFIG_MEMORY_HOTPLUG */
-static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
+static unsigned long *__init sparse_early_usemap_alloc(int pnum)
{
- unsigned long *usemap;
+ unsigned long *usemap, section_nr;
struct mem_section *ms = __nr_to_section(pnum);
int nid = sparse_early_nid(ms);
+ struct pglist_data *pgdat = NODE_DATA(nid);
+
+ /*
+ * This is allocated on same section of pgdat.
+ * It will not be freed until other sections hot-removing on the node.
+ * Pgdat has same feature. This collects all usemap on the same
+ * section.
+ */
+
+ section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
+ usemap = alloc_bootmem_section(usemap_size(), section_nr);
- usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
if (usemap)
return usemap;
--
Yasunori Goto
^ permalink raw reply [flat|nested] 11+ messages in thread* [Patch 005/005](memory hotplug) free memmaps allocated by bootmem
2008-04-03 5:37 [Patch 000/005](memory hotplug) freeing pages allocated by bootmem for hotremove v2 Yasunori Goto
` (3 preceding siblings ...)
2008-04-03 5:44 ` [Patch 004/005](memory hotplug)allocate usemap on the section with pgdat Yasunori Goto
@ 2008-04-03 5:45 ` Yasunori Goto
4 siblings, 0 replies; 11+ messages in thread
From: Yasunori Goto @ 2008-04-03 5:45 UTC (permalink / raw)
To: Badari Pulavarty; +Cc: Andrew Morton, Linux Kernel ML, Yinghai Lu, linux-mm
This patch is to free memmaps which is allocated by bootmem.
Freeing usemap is not necessary. The pages of usemap may be necessary
for other sections. If removing section is last section on the node,
its page must be isolated from page allocator to remove it.
Then it shouldn't be freed and kept as it is.
Signed-off-by: Yasunori Goto <y-goto@jp.fujitsu.com>
mm/internal.h | 3 +--
mm/page_alloc.c | 2 +-
mm/sparse.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++----
3 files changed, 48 insertions(+), 7 deletions(-)
Index: current/mm/sparse.c
===================================================================
--- current.orig/mm/sparse.c 2008-04-01 20:58:52.000000000 +0900
+++ current/mm/sparse.c 2008-04-01 20:59:07.000000000 +0900
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
+#include "internal.h"
#include <asm/dma.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
@@ -348,6 +349,10 @@
{
return; /* XXX: Not implemented yet */
}
+static void free_map_bootmem(struct page *page, unsigned long nr_pages)
+{
+ return; /* XXX: Not implemented yet */
+}
#else
static struct page *__kmalloc_section_memmap(unsigned long nr_pages)
{
@@ -385,17 +390,45 @@
free_pages((unsigned long)memmap,
get_order(sizeof(struct page) * nr_pages));
}
+
+static void free_map_bootmem(struct page *page, unsigned long nr_pages)
+{
+ unsigned long maps_section_nr, removing_section_nr, i;
+ int magic;
+
+ for (i = 0; i < nr_pages; i++, page++) {
+ magic = atomic_read(&page->_mapcount);
+
+ BUG_ON(magic == NODE_INFO);
+
+ maps_section_nr = pfn_to_section_nr(page_to_pfn(page));
+ removing_section_nr = page->private;
+
+ /*
+ * If removing section's memmap is placed on other section,
+ * it must be free.
+ * Else, nothing is necessary. the memmap is already isolated
+ * against page allocator, and it is not used any more.
+ */
+ if (maps_section_nr != removing_section_nr)
+ put_page_bootmem(page);
+ }
+}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
static void free_section_usemap(struct page *memmap, unsigned long *usemap)
{
+ struct page *usemap_page;
+ unsigned long nr_pages;
+
if (!usemap)
return;
+ usemap_page = virt_to_page(usemap);
/*
* Check to see if allocation came from hot-plug-add
*/
- if (PageSlab(virt_to_page(usemap))) {
+ if (PageSlab(usemap_page)) {
kfree(usemap);
if (memmap)
__kfree_section_memmap(memmap, PAGES_PER_SECTION);
@@ -403,10 +436,19 @@
}
/*
- * TODO: Allocations came from bootmem - how do I free up ?
+ * The usemap came from bootmem. This is packed with other usemaps
+ * on the section which has pgdat at boot time. Just keep it as is now.
*/
- printk(KERN_WARNING "Not freeing up allocations from bootmem "
- "- leaking memory\n");
+
+ if (memmap) {
+ struct page *memmap_page;
+ memmap_page = virt_to_page(memmap);
+
+ nr_pages = PAGE_ALIGN(PAGES_PER_SECTION * sizeof(struct page))
+ >> PAGE_SHIFT;
+
+ free_map_bootmem(memmap_page, nr_pages);
+ }
}
/*
Index: current/mm/page_alloc.c
===================================================================
--- current.orig/mm/page_alloc.c 2008-04-01 20:56:45.000000000 +0900
+++ current/mm/page_alloc.c 2008-04-01 20:59:07.000000000 +0900
@@ -564,7 +564,7 @@
/*
* permit the bootmem allocator to evade page validation on high-order frees
*/
-void __init __free_pages_bootmem(struct page *page, unsigned int order)
+void __free_pages_bootmem(struct page *page, unsigned int order)
{
if (order == 0) {
__ClearPageReserved(page);
Index: current/mm/internal.h
===================================================================
--- current.orig/mm/internal.h 2008-04-01 20:56:45.000000000 +0900
+++ current/mm/internal.h 2008-04-01 20:59:07.000000000 +0900
@@ -34,8 +34,7 @@
atomic_dec(&page->_count);
}
-extern void __init __free_pages_bootmem(struct page *page,
- unsigned int order);
+extern void __free_pages_bootmem(struct page *page, unsigned int order);
/*
* function for dealing with page's order in buddy system.
--
Yasunori Goto
^ permalink raw reply [flat|nested] 11+ messages in thread