* [RFC} memory unplug patchset prep [1/16] zone ids cleanup
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
@ 2007-03-06 4:42 ` KAMEZAWA Hiroyuki
2007-03-06 15:36 ` David Rientjes
2007-03-06 4:43 ` [RFC} memory unplug patchset prep [2/16] gathering alloc_zeroed_user_highpage() KAMEZAWA Hiroyuki
` (15 subsequent siblings)
16 siblings, 1 reply; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:42 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
OThis patch defines ZONE_DMA,DMA32,HIGHMEM on *any* config.
MAX_NR_ZONES is unchanged and not-configured zones's id is greater than it.
Now, you can check zone is configured or not by (zone_id < MAX_NR_ZONES).
Good bye #ifdefs. Compiler will do enough work, I think.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/mmzone.h | 33 ++++++++++++------------
mm/page_alloc.c | 65 ++++++++++++++++++++++++++-----------------------
2 files changed, 51 insertions(+), 47 deletions(-)
Index: devel-tree-2.6.20-mm2/include/linux/mmzone.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/mmzone.h
+++ devel-tree-2.6.20-mm2/include/linux/mmzone.h
@@ -142,9 +142,24 @@ enum zone_type {
*/
ZONE_HIGHMEM,
#endif
- MAX_NR_ZONES
+ MAX_NR_ZONES,
+#ifndef CONFIG_ZONE_DMA
+ ZONE_DMA,
+#endif
+#ifndef CONFIG_ZONE_DMA32
+ ZONE_DMA32,
+#endif
+#ifndef CONFIG_HIGHMEM
+ ZONE_HIGHMEM,
+#endif
+ MAX_POSSIBLE_ZONES
};
+static inline int is_configured_zone(enum zone_type type)
+{
+ return (type < MAX_NR_ZONES);
+}
+
/*
* When a memory allocation must conform to specific limitations (such
* as being suitable for DMA) the caller will pass in hints to the
@@ -500,11 +515,7 @@ static inline int populated_zone(struct
static inline int is_highmem_idx(enum zone_type idx)
{
-#ifdef CONFIG_HIGHMEM
return (idx == ZONE_HIGHMEM);
-#else
- return 0;
-#endif
}
static inline int is_normal_idx(enum zone_type idx)
@@ -520,11 +531,7 @@ static inline int is_normal_idx(enum zon
*/
static inline int is_highmem(struct zone *zone)
{
-#ifdef CONFIG_HIGHMEM
return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
-#else
- return 0;
-#endif
}
static inline int is_normal(struct zone *zone)
@@ -534,20 +541,12 @@ static inline int is_normal(struct zone
static inline int is_dma32(struct zone *zone)
{
-#ifdef CONFIG_ZONE_DMA32
return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
-#else
- return 0;
-#endif
}
static inline int is_dma(struct zone *zone)
{
-#ifdef CONFIG_ZONE_DMA
return zone == zone->zone_pgdat->node_zones + ZONE_DMA;
-#else
- return 0;
-#endif
}
/* These two functions are used to setup the per zone pages min values */
Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
+++ devel-tree-2.6.20-mm2/mm/page_alloc.c
@@ -72,32 +72,34 @@ static void __free_pages_ok(struct page
* TBD: should special case ZONE_DMA32 machines here - in those we normally
* don't need any ZONE_NORMAL reservation
*/
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
-#ifdef CONFIG_ZONE_DMA
- 256,
-#endif
-#ifdef CONFIG_ZONE_DMA32
- 256,
-#endif
-#ifdef CONFIG_HIGHMEM
- 32
-#endif
-};
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
EXPORT_SYMBOL(totalram_pages);
-static char * const zone_names[MAX_NR_ZONES] = {
-#ifdef CONFIG_ZONE_DMA
- "DMA",
-#endif
-#ifdef CONFIG_ZONE_DMA32
- "DMA32",
-#endif
- "Normal",
-#ifdef CONFIG_HIGHMEM
- "HighMem"
-#endif
-};
+static char *zone_names[MAX_POSSIBLE_ZONES];
+
+static char name_dma[] = "DMA";
+static char name_dma32[] = "DMA32";
+static char name_normal[] = "Normal";
+static char name_highmem[] = "Highmem";
+
+static inline void __meminit zone_variables_init(void)
+{
+ if (zone_names[0] != NULL)
+ return;
+ zone_names[ZONE_DMA] = name_dma;
+ zone_names[ZONE_DMA32] = name_dma32;
+ zone_names[ZONE_NORMAL] = name_normal;
+ zone_names[ZONE_HIGHMEM] = name_highmem;
+
+ /* ZONE below NORAML has ratio 256 */
+ if (is_configured_zone(ZONE_DMA))
+ sysctl_lowmem_reserve_ratio[ZONE_DMA] = 256;
+ if (is_configured_zone(ZONE_DMA32))
+ sysctl_lowmem_reserve_ratio[ZONE_DMA32] = 256;
+ if (is_configured_zone(ZONE_HIGHMEM))
+ sysctl_lowmem_reserve_ratio[ZONE_HIGHMEM] = 32;
+}
int min_free_kbytes = 1024;
@@ -1638,14 +1640,16 @@ void si_meminfo_node(struct sysinfo *val
val->totalram = pgdat->node_present_pages;
val->freeram = node_page_state(nid, NR_FREE_PAGES);
-#ifdef CONFIG_HIGHMEM
- val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].present_pages;
- val->freehigh = zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
+ if (is_configured_zone(ZONE_HIGHMEM)) {
+ val->totalhigh =
+ pgdat->node_zones[ZONE_HIGHMEM].present_pages;
+ val->freehigh =
+ zone_page_state(&pgdat->node_zones[ZONE_HIGHMEM],
NR_FREE_PAGES);
-#else
- val->totalhigh = 0;
- val->freehigh = 0;
-#endif
+ } else {
+ val->totalhigh = 0;
+ val->freehigh = 0;
+ }
val->mem_unit = PAGE_SIZE;
}
#endif
@@ -3048,7 +3052,8 @@ void __init free_area_init_nodes(unsigne
{
unsigned long nid;
enum zone_type i;
-
+ /* Parameter Setup */
+ zone_variables_init();
/* Sort early_node_map as initialisation assumes it is sorted */
sort_node_map();
@@ -3119,6 +3124,7 @@ EXPORT_SYMBOL(contig_page_data);
void __init free_area_init(unsigned long *zones_size)
{
+ zone_variables_init();
free_area_init_node(0, NODE_DATA(0), zones_size,
__pa(PAGE_OFFSET) >> PAGE_SHIFT, NULL);
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [1/16] zone ids cleanup
2007-03-06 4:42 ` [RFC} memory unplug patchset prep [1/16] zone ids cleanup KAMEZAWA Hiroyuki
@ 2007-03-06 15:36 ` David Rientjes
2007-03-07 1:21 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 34+ messages in thread
From: David Rientjes @ 2007-03-06 15:36 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
> OThis patch defines ZONE_DMA,DMA32,HIGHMEM on *any* config.
> MAX_NR_ZONES is unchanged and not-configured zones's id is greater than it.
> Now, you can check zone is configured or not by (zone_id < MAX_NR_ZONES).
>
> Good bye #ifdefs. Compiler will do enough work, I think.
>
Eliminating the abundance of #ifdef's certainly seems like a worthwhile
goal.
Few comments below.
> Index: devel-tree-2.6.20-mm2/include/linux/mmzone.h
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/include/linux/mmzone.h
> +++ devel-tree-2.6.20-mm2/include/linux/mmzone.h
> @@ -142,9 +142,24 @@ enum zone_type {
> */
> ZONE_HIGHMEM,
> #endif
> - MAX_NR_ZONES
> + MAX_NR_ZONES,
> +#ifndef CONFIG_ZONE_DMA
> + ZONE_DMA,
> +#endif
> +#ifndef CONFIG_ZONE_DMA32
> + ZONE_DMA32,
> +#endif
> +#ifndef CONFIG_HIGHMEM
> + ZONE_HIGHMEM,
> +#endif
> + MAX_POSSIBLE_ZONES
> };
>
> +static inline int is_configured_zone(enum zone_type type)
> +{
> + return (type < MAX_NR_ZONES);
> +}
> +
> /*
> * When a memory allocation must conform to specific limitations (such
> * as being suitable for DMA) the caller will pass in hints to the
> @@ -500,11 +515,7 @@ static inline int populated_zone(struct
>
> static inline int is_highmem_idx(enum zone_type idx)
> {
> -#ifdef CONFIG_HIGHMEM
> return (idx == ZONE_HIGHMEM);
> -#else
> - return 0;
> -#endif
> }
>
Doesn't this need a check for is_configured_zone(idx) as well since this
will return 1 if we pass in idx == ZONE_HIGHMEM even though it's above
MAX_NR_ZONES?
> static inline int is_normal_idx(enum zone_type idx)
> @@ -520,11 +531,7 @@ static inline int is_normal_idx(enum zon
> */
> static inline int is_highmem(struct zone *zone)
> {
> -#ifdef CONFIG_HIGHMEM
> return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
> -#else
> - return 0;
> -#endif
> }
>
The only call site for this after your patchset is applied is in i386 code
which you can probably remove with the identity idx.
> static inline int is_normal(struct zone *zone)
> @@ -534,20 +541,12 @@ static inline int is_normal(struct zone
>
> static inline int is_dma32(struct zone *zone)
> {
> -#ifdef CONFIG_ZONE_DMA32
> return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
> -#else
> - return 0;
> -#endif
> }
>
> static inline int is_dma(struct zone *zone)
> {
> -#ifdef CONFIG_ZONE_DMA
> return zone == zone->zone_pgdat->node_zones + ZONE_DMA;
> -#else
> - return 0;
> -#endif
> }
>
Neither is_dma32() nor is_dma() are even used anymore.
> /* These two functions are used to setup the per zone pages min values */
> Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
> +++ devel-tree-2.6.20-mm2/mm/page_alloc.c
> @@ -72,32 +72,34 @@ static void __free_pages_ok(struct page
> * TBD: should special case ZONE_DMA32 machines here - in those we normally
> * don't need any ZONE_NORMAL reservation
> */
> -int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
> -#ifdef CONFIG_ZONE_DMA
> - 256,
> -#endif
> -#ifdef CONFIG_ZONE_DMA32
> - 256,
> -#endif
> -#ifdef CONFIG_HIGHMEM
> - 32
> -#endif
> -};
> +int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
>
Probably an easier way to initialize these instead of
zone_variables_init() is like this:
int sysctl_lowmem_reserve_ratio[MAX_POSSIBLE_ZONES-1] = {
[ZONE_DMA] = 256,
[ZONE_DMA32] = 256,
[ZONE_HIGHMEM] = 32 };
> EXPORT_SYMBOL(totalram_pages);
>
> -static char * const zone_names[MAX_NR_ZONES] = {
> -#ifdef CONFIG_ZONE_DMA
> - "DMA",
> -#endif
> -#ifdef CONFIG_ZONE_DMA32
> - "DMA32",
> -#endif
> - "Normal",
> -#ifdef CONFIG_HIGHMEM
> - "HighMem"
> -#endif
> -};
> +static char *zone_names[MAX_POSSIBLE_ZONES];
> +
Likewise:
static const char *zone_names[MAX_POSSIBLE_ZONES-1] = {
[ZONE_DMA] = "DMA",
[ZONE_DMA32] = "DMA32",
[ZONE_NORMAL] = "Normal",
[ZONE_HIGHMEM] = "HighMem" };
> +static char name_dma[] = "DMA";
> +static char name_dma32[] = "DMA32";
> +static char name_normal[] = "Normal";
> +static char name_highmem[] = "Highmem";
> +
> +static inline void __meminit zone_variables_init(void)
> +{
> + if (zone_names[0] != NULL)
> + return;
> + zone_names[ZONE_DMA] = name_dma;
> + zone_names[ZONE_DMA32] = name_dma32;
> + zone_names[ZONE_NORMAL] = name_normal;
> + zone_names[ZONE_HIGHMEM] = name_highmem;
> +
> + /* ZONE below NORAML has ratio 256 */
> + if (is_configured_zone(ZONE_DMA))
> + sysctl_lowmem_reserve_ratio[ZONE_DMA] = 256;
> + if (is_configured_zone(ZONE_DMA32))
> + sysctl_lowmem_reserve_ratio[ZONE_DMA32] = 256;
> + if (is_configured_zone(ZONE_HIGHMEM))
> + sysctl_lowmem_reserve_ratio[ZONE_HIGHMEM] = 32;
> +}
>
Then you can avoid this.
David
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [1/16] zone ids cleanup
2007-03-06 15:36 ` David Rientjes
@ 2007-03-07 1:21 ` KAMEZAWA Hiroyuki
0 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-07 1:21 UTC (permalink / raw)
To: David Rientjes; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007 07:36:30 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:
> > static inline int is_highmem_idx(enum zone_type idx)
> > {
> > -#ifdef CONFIG_HIGHMEM
> > return (idx == ZONE_HIGHMEM);
> > -#else
> > - return 0;
> > -#endif
> > }
> >
>
> Doesn't this need a check for is_configured_zone(idx) as well since this
> will return 1 if we pass in idx == ZONE_HIGHMEM even though it's above
> MAX_NR_ZONES?
Hmm, I'll add
==
BUG_ON(idx >= MAX_NR_ZONES)
==
here.
>
> > static inline int is_normal_idx(enum zone_type idx)
> > @@ -520,11 +531,7 @@ static inline int is_normal_idx(enum zon
> > */
> > static inline int is_highmem(struct zone *zone)
> > {
> > -#ifdef CONFIG_HIGHMEM
> > return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
> > -#else
> > - return 0;
> > -#endif
> > }
> >
>
> The only call site for this after your patchset is applied is in i386 code
> which you can probably remove with the identity idx.
Ok, look into.
>
> > static inline int is_normal(struct zone *zone)
> > @@ -534,20 +541,12 @@ static inline int is_normal(struct zone
> >
> > static inline int is_dma32(struct zone *zone)
> > {
> > -#ifdef CONFIG_ZONE_DMA32
> > return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
> > -#else
> > - return 0;
> > -#endif
> > }
> >
> > static inline int is_dma(struct zone *zone)
> > {
> > -#ifdef CONFIG_ZONE_DMA
> > return zone == zone->zone_pgdat->node_zones + ZONE_DMA;
> > -#else
> > - return 0;
> > -#endif
> > }
> >
>
> Neither is_dma32() nor is_dma() are even used anymore.
I see. maybe removing entire call patch should be applied before this.
>
> > /* These two functions are used to setup the per zone pages min values */
> > Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
> > +++ devel-tree-2.6.20-mm2/mm/page_alloc.c
> > @@ -72,32 +72,34 @@ static void __free_pages_ok(struct page
> > * TBD: should special case ZONE_DMA32 machines here - in those we normally
> > * don't need any ZONE_NORMAL reservation
> > */
> > -int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = {
> > -#ifdef CONFIG_ZONE_DMA
> > - 256,
> > -#endif
> > -#ifdef CONFIG_ZONE_DMA32
> > - 256,
> > -#endif
> > -#ifdef CONFIG_HIGHMEM
> > - 32
> > -#endif
> > -};
> > +int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1];
> >
>
> Probably an easier way to initialize these instead of
> zone_variables_init() is like this:
>
> int sysctl_lowmem_reserve_ratio[MAX_POSSIBLE_ZONES-1] = {
> [ZONE_DMA] = 256,
> [ZONE_DMA32] = 256,
> [ZONE_HIGHMEM] = 32 };
>
AH, I didn't know this initialization method. thanks. will try.
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread
* [RFC} memory unplug patchset prep [2/16] gathering alloc_zeroed_user_highpage()
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
2007-03-06 4:42 ` [RFC} memory unplug patchset prep [1/16] zone ids cleanup KAMEZAWA Hiroyuki
@ 2007-03-06 4:43 ` KAMEZAWA Hiroyuki
2007-03-06 15:54 ` David Rientjes
2007-03-06 4:44 ` [RFC} memory unplug patchset prep [3/16] define is_identity_mapped KAMEZAWA Hiroyuki
` (14 subsequent siblings)
16 siblings, 1 reply; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:43 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Definitions of alloc_zeroed_user_highpage() is scattered.
This patch gathers them to linux/highmem.h
To do so, added CONFIG_ARCH_HAS_PREZERO_USERPAGE and
CONFIG_ARCH_HAS_FLUSH_USERNEWZEROPAGE.
If you know better config name, please tell me.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
arch/alpha/Kconfig | 3 +++
arch/cris/Kconfig | 3 +++
arch/h8300/Kconfig | 4 ++++
arch/i386/Kconfig | 3 +++
arch/ia64/Kconfig | 6 ++++++
arch/m32r/Kconfig | 3 +++
arch/m68knommu/Kconfig | 3 +++
arch/s390/Kconfig | 3 +++
arch/x86_64/Kconfig | 3 +++
include/asm-alpha/page.h | 3 ---
include/asm-cris/page.h | 3 ---
include/asm-h8300/page.h | 3 ---
include/asm-i386/page.h | 3 ---
include/asm-ia64/page.h | 10 +---------
include/asm-m32r/page.h | 3 ---
include/asm-m68knommu/page.h | 3 ---
include/asm-s390/page.h | 2 --
include/asm-x86_64/page.h | 2 --
include/linux/highmem.h | 16 +++++++++++++++-
19 files changed, 47 insertions(+), 32 deletions(-)
Index: devel-tree-2.6.20-mm2/arch/alpha/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/alpha/Kconfig
+++ devel-tree-2.6.20-mm2/arch/alpha/Kconfig
@@ -551,6 +551,9 @@ config ARCH_DISCONTIGMEM_ENABLE
or have huge holes in the physical address space for other reasons.
See <file:Documentation/vm/numa> for more.
+config ARCH_HAS_PREZERO_USERPAGE
+ def_bool y
+
source "mm/Kconfig"
config NUMA
Index: devel-tree-2.6.20-mm2/include/asm-alpha/page.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/asm-alpha/page.h
+++ devel-tree-2.6.20-mm2/include/asm-alpha/page.h
@@ -17,9 +17,6 @@
extern void clear_page(void *page);
#define clear_user_page(page, vaddr, pg) clear_page(page)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vmaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-
extern void copy_page(void * _to, void * _from);
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
Index: devel-tree-2.6.20-mm2/arch/cris/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/cris/Kconfig
+++ devel-tree-2.6.20-mm2/arch/cris/Kconfig
@@ -97,6 +97,9 @@ config PREEMPT
Say Y here if you are building a kernel for a desktop, embedded
or real-time system. Say N if you are unsure.
+config ARCH_HAS_PREZERO_USERPAGE
+ def_bool y
+
source mm/Kconfig
endmenu
Index: devel-tree-2.6.20-mm2/include/asm-cris/page.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/asm-cris/page.h
+++ devel-tree-2.6.20-mm2/include/asm-cris/page.h
@@ -20,9 +20,6 @@
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-
/*
* These are used to make use of C type-checking..
*/
Index: devel-tree-2.6.20-mm2/arch/h8300/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/h8300/Kconfig
+++ devel-tree-2.6.20-mm2/arch/h8300/Kconfig
@@ -68,6 +68,10 @@ config PCI
bool
default n
+config ARCH_HAS_PREZERO_USERPAGE
+ bool
+ default y
+
source "init/Kconfig"
source "arch/h8300/Kconfig.cpu"
Index: devel-tree-2.6.20-mm2/include/asm-h8300/page.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/asm-h8300/page.h
+++ devel-tree-2.6.20-mm2/include/asm-h8300/page.h
@@ -22,9 +22,6 @@
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-
/*
* These are used to make use of C type-checking..
*/
Index: devel-tree-2.6.20-mm2/arch/i386/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/i386/Kconfig
+++ devel-tree-2.6.20-mm2/arch/i386/Kconfig
@@ -675,6 +675,9 @@ config ARCH_SELECT_MEMORY_MODEL
config ARCH_POPULATES_NODE_MAP
def_bool y
+config ARCH_HAS_PREZERO_USERPAGE
+ def_bool y
+
source "mm/Kconfig"
config HIGHPTE
Index: devel-tree-2.6.20-mm2/include/asm-i386/page.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/asm-i386/page.h
+++ devel-tree-2.6.20-mm2/include/asm-i386/page.h
@@ -34,9 +34,6 @@
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-
/*
* These are used to make use of C type-checking..
*/
Index: devel-tree-2.6.20-mm2/arch/ia64/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/ia64/Kconfig
+++ devel-tree-2.6.20-mm2/arch/ia64/Kconfig
@@ -329,6 +329,12 @@ config PREEMPT
Say Y here if you are building a kernel for a desktop, embedded
or real-time system. Say N if you are unsure.
+config ARCH_HAS_PREZERO_USERPAGE
+ def_bool y
+
+config ARCH_HAS_FLUSH_USERNEWZEROPAGE
+ def_bool y
+
source "mm/Kconfig"
config ARCH_SELECT_MEMORY_MODEL
Index: devel-tree-2.6.20-mm2/include/asm-ia64/page.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/asm-ia64/page.h
+++ devel-tree-2.6.20-mm2/include/asm-ia64/page.h
@@ -87,15 +87,7 @@ do { \
} while (0)
-#define alloc_zeroed_user_highpage(vma, vaddr) \
-({ \
- struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \
- if (page) \
- flush_dcache_page(page); \
- page; \
-})
-
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#define flush_user_newzeropage(page) flush_dcache_page(page)
#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
Index: devel-tree-2.6.20-mm2/arch/m32r/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/m32r/Kconfig
+++ devel-tree-2.6.20-mm2/arch/m32r/Kconfig
@@ -193,6 +193,9 @@ config ARCH_DISCONTIGMEM_ENABLE
depends on CHIP_M32700 || CHIP_M32102 || CHIP_VDEC2 || CHIP_OPSP || CHIP_M32104
default y
+config ARCH_HAS_PREZERO_USERPAGE
+ def_bool y
+
source "mm/Kconfig"
config IRAM_START
Index: devel-tree-2.6.20-mm2/include/asm-m32r/page.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/asm-m32r/page.h
+++ devel-tree-2.6.20-mm2/include/asm-m32r/page.h
@@ -15,9 +15,6 @@ extern void copy_page(void *to, void *fr
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-
/*
* These are used to make use of C type-checking..
*/
Index: devel-tree-2.6.20-mm2/arch/m68knommu/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/m68knommu/Kconfig
+++ devel-tree-2.6.20-mm2/arch/m68knommu/Kconfig
@@ -627,6 +627,9 @@ config ROMKERNEL
endchoice
+config ARCH_HAS_PREZERO_USERPAGE
+ def_bool y
+
source "mm/Kconfig"
endmenu
Index: devel-tree-2.6.20-mm2/include/asm-m68knommu/page.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/asm-m68knommu/page.h
+++ devel-tree-2.6.20-mm2/include/asm-m68knommu/page.h
@@ -22,9 +22,6 @@
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
-
/*
* These are used to make use of C type-checking..
*/
Index: devel-tree-2.6.20-mm2/arch/s390/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/s390/Kconfig
+++ devel-tree-2.6.20-mm2/arch/s390/Kconfig
@@ -272,6 +272,9 @@ config WARN_STACK_SIZE
config ARCH_POPULATES_NODE_MAP
def_bool y
+config ARCH_HAS_PREZERO_USERPAGE
+ def_bool y
+
source "mm/Kconfig"
config HOLES_IN_ZONE
Index: devel-tree-2.6.20-mm2/include/asm-s390/page.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/asm-s390/page.h
+++ devel-tree-2.6.20-mm2/include/asm-s390/page.h
@@ -64,8 +64,6 @@ static inline void copy_page(void *to, v
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
* These are used to make use of C type-checking..
Index: devel-tree-2.6.20-mm2/arch/x86_64/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/x86_64/Kconfig
+++ devel-tree-2.6.20-mm2/arch/x86_64/Kconfig
@@ -400,6 +400,9 @@ config ARCH_FLATMEM_ENABLE
def_bool y
depends on !NUMA
+config ARCH_HAS_PREZERO_USERPAGE
+ def_bool y
+
source "mm/Kconfig"
config MEMORY_HOTPLUG_RESERVE
Index: devel-tree-2.6.20-mm2/include/asm-x86_64/page.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/asm-x86_64/page.h
+++ devel-tree-2.6.20-mm2/include/asm-x86_64/page.h
@@ -51,8 +51,6 @@ void copy_page(void *, void *);
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
-#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
* These are used to make use of C type-checking..
*/
Index: devel-tree-2.6.20-mm2/include/linux/highmem.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/highmem.h
+++ devel-tree-2.6.20-mm2/include/linux/highmem.h
@@ -60,8 +60,22 @@ static inline void clear_user_highpage(s
/* Make sure this page is cleared on other CPU's too before using it */
smp_wmb();
}
+#ifndef CONFIG_ARCH_HAS_FLUSH_USER_NEWZEROPAGE
+#define flush_user_newzeroapge(page) do{}while(0);
+#endif
-#ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+#ifdef CONFIG_ARCH_HAS_PREZERO_USERPAGE
+static inline struct page *
+alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
+{
+ struct page *page;
+ page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr);
+ if (page)
+ flush_user_newzeropage(page);
+ return page;
+}
+
+#else
static inline struct page *
alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
{
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [2/16] gathering alloc_zeroed_user_highpage()
2007-03-06 4:43 ` [RFC} memory unplug patchset prep [2/16] gathering alloc_zeroed_user_highpage() KAMEZAWA Hiroyuki
@ 2007-03-06 15:54 ` David Rientjes
2007-03-07 1:46 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 34+ messages in thread
From: David Rientjes @ 2007-03-06 15:54 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, Andrew Morton
On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
> Definitions of alloc_zeroed_user_highpage() is scattered.
> This patch gathers them to linux/highmem.h
>
> To do so, added CONFIG_ARCH_HAS_PREZERO_USERPAGE and
> CONFIG_ARCH_HAS_FLUSH_USERNEWZEROPAGE.
>
Previous to this patch, __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE was never
configurable by the user and was totally dependant on the architecture,
which seems appropriate. Are there cases when a user would actually
prefer to disable the new CONFIG_ARCH_HAS_PREZERO_USERPAGE to avoid
__GFP_ZERO allocations?
> Index: devel-tree-2.6.20-mm2/include/linux/highmem.h
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/include/linux/highmem.h
> +++ devel-tree-2.6.20-mm2/include/linux/highmem.h
> @@ -60,8 +60,22 @@ static inline void clear_user_highpage(s
> /* Make sure this page is cleared on other CPU's too before using it */
> smp_wmb();
> }
> +#ifndef CONFIG_ARCH_HAS_FLUSH_USER_NEWZEROPAGE
> +#define flush_user_newzeroapge(page) do{}while(0);
> +#endif
>
Well, I guess this supports my point. It doesn't appear as it was ever
tested in disabling __GFP_ZERO allocations because
flush_user_newzeropage() is misspelled above so it wouldn't even compile.
David
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [2/16] gathering alloc_zeroed_user_highpage()
2007-03-06 15:54 ` David Rientjes
@ 2007-03-07 1:46 ` KAMEZAWA Hiroyuki
0 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-07 1:46 UTC (permalink / raw)
To: David Rientjes; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007 07:54:29 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:
> On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
>
> > Definitions of alloc_zeroed_user_highpage() is scattered.
> > This patch gathers them to linux/highmem.h
> >
> > To do so, added CONFIG_ARCH_HAS_PREZERO_USERPAGE and
> > CONFIG_ARCH_HAS_FLUSH_USERNEWZEROPAGE.
> >
>
> Previous to this patch, __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE was never
> configurable by the user and was totally dependant on the architecture,
> which seems appropriate. Are there cases when a user would actually
> prefer to disable the new CONFIG_ARCH_HAS_PREZERO_USERPAGE to avoid
> __GFP_ZERO allocations?
>
no case. I like CONFIG_ARCH_xx rather than #define in header file.
> > Index: devel-tree-2.6.20-mm2/include/linux/highmem.h
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/include/linux/highmem.h
> > +++ devel-tree-2.6.20-mm2/include/linux/highmem.h
> > @@ -60,8 +60,22 @@ static inline void clear_user_highpage(s
> > /* Make sure this page is cleared on other CPU's too before using it */
> > smp_wmb();
> > }
> > +#ifndef CONFIG_ARCH_HAS_FLUSH_USER_NEWZEROPAGE
> > +#define flush_user_newzeroapge(page) do{}while(0);
> > +#endif
> >
>
> Well, I guess this supports my point. It doesn't appear as it was ever
> tested in disabling __GFP_ZERO allocations because
> flush_user_newzeropage() is misspelled above so it wouldn't even compile.
>
Ah, okay. I'll add i386 to my testset, at least.
Thanks,
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread
* [RFC} memory unplug patchset prep [3/16] define is_identity_mapped
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
2007-03-06 4:42 ` [RFC} memory unplug patchset prep [1/16] zone ids cleanup KAMEZAWA Hiroyuki
2007-03-06 4:43 ` [RFC} memory unplug patchset prep [2/16] gathering alloc_zeroed_user_highpage() KAMEZAWA Hiroyuki
@ 2007-03-06 4:44 ` KAMEZAWA Hiroyuki
2007-03-06 15:55 ` David Rientjes
2007-03-06 4:45 ` [RFC} memory unplug patchset prep [4/16] ZONE_MOVABLE KAMEZAWA Hiroyuki
` (13 subsequent siblings)
16 siblings, 1 reply; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:44 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Add is_identity_map() functon and rewrite is_highmem() user to
to use is_identity_map().
(*) prepare for adding extra zone ZONE_MOVABLE.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/mmzone.h | 10 ++++++++++
include/linux/page-flags.h | 2 +-
kernel/power/snapshot.c | 12 ++++++------
kernel/power/swsusp.c | 2 +-
mm/page_alloc.c | 8 ++++----
5 files changed, 22 insertions(+), 12 deletions(-)
Index: devel-tree-2.6.20-mm2/include/linux/mmzone.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/mmzone.h
+++ devel-tree-2.6.20-mm2/include/linux/mmzone.h
@@ -523,6 +523,13 @@ static inline int is_normal_idx(enum zon
return (idx == ZONE_NORMAL);
}
+static inline int is_identity_map_idx(enum zone_type idx)
+{
+ if (is_configured_zone(ZONE_HIGHMEM))
+ return (idx < ZONE_HIGHMEM);
+ else
+ return 1;
+}
/**
* is_highmem - helper function to quickly check if a struct zone is a
* highmem zone or not. This is an attempt to keep references
@@ -549,6 +556,14 @@ static inline int is_dma(struct zone *zo
return zone == zone->zone_pgdat->node_zones + ZONE_DMA;
}
+static inline int is_identity_map(struct zone *zone)
+{
+ if (is_configured_zone(ZONE_HIGHMEM)
+ return zone_idx(zone) < ZONE_HIGHMEM;
+ else
+ return 1;
+}
+
/* These two functions are used to setup the per zone pages min values */
struct ctl_table;
struct file;
Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
+++ devel-tree-2.6.20-mm2/mm/page_alloc.c
@@ -2090,7 +2090,7 @@ void __meminit memmap_init_zone(unsigned
INIT_LIST_HEAD(&page->lru);
#ifdef WANT_PAGE_VIRTUAL
/* The shift won't overflow because ZONE_NORMAL is below 4G. */
- if (!is_highmem_idx(zone))
+ if (is_identity_map_idx(zone))
set_page_address(page, __va(pfn << PAGE_SHIFT));
#endif
#ifdef CONFIG_PAGE_OWNER
@@ -2769,7 +2769,7 @@ static void __meminit free_area_init_cor
zone_names[0], dma_reserve);
}
- if (!is_highmem_idx(j))
+ if (is_identity_map_idx(j))
nr_kernel_pages += realsize;
nr_all_pages += realsize;
@@ -3235,7 +3235,7 @@ void setup_per_zone_pages_min(void)
/* Calculate total number of !ZONE_HIGHMEM pages */
for_each_zone(zone) {
- if (!is_highmem(zone))
+ if (is_identity_map(zone))
lowmem_pages += zone->present_pages;
}
@@ -3245,7 +3245,7 @@ void setup_per_zone_pages_min(void)
spin_lock_irqsave(&zone->lru_lock, flags);
tmp = (u64)pages_min * zone->present_pages;
do_div(tmp, lowmem_pages);
- if (is_highmem(zone)) {
+ if (!is_identity_map(zone)) {
/*
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
* need highmem pages, so cap pages_min to a small
Index: devel-tree-2.6.20-mm2/include/linux/page-flags.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/page-flags.h
+++ devel-tree-2.6.20-mm2/include/linux/page-flags.h
@@ -162,7 +162,7 @@ static inline void SetPageUptodate(struc
#define __ClearPageSlab(page) __clear_bit(PG_slab, &(page)->flags)
#ifdef CONFIG_HIGHMEM
-#define PageHighMem(page) is_highmem(page_zone(page))
+#define PageHighMem(page) (!is_identitiy_map(page_zone(page)))
#else
#define PageHighMem(page) 0 /* needed to optimize away at compile time */
#endif
Index: devel-tree-2.6.20-mm2/kernel/power/snapshot.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/kernel/power/snapshot.c
+++ devel-tree-2.6.20-mm2/kernel/power/snapshot.c
@@ -590,7 +590,7 @@ static unsigned int count_free_highmem_p
unsigned int cnt = 0;
for_each_zone(zone)
- if (populated_zone(zone) && is_highmem(zone))
+ if (populated_zone(zone) && !is_identity_map(zone))
cnt += zone_page_state(zone, NR_FREE_PAGES);
return cnt;
@@ -634,7 +634,7 @@ unsigned int count_highmem_pages(void)
for_each_zone(zone) {
unsigned long pfn, max_zone_pfn;
- if (!is_highmem(zone))
+ if (is_identity_map(zone))
continue;
mark_free_pages(zone);
@@ -702,7 +702,7 @@ unsigned int count_data_pages(void)
unsigned int n = 0;
for_each_zone(zone) {
- if (is_highmem(zone))
+ if (!is_identity_map(zone))
continue;
mark_free_pages(zone);
@@ -729,8 +729,8 @@ static inline void do_copy_page(long *ds
static inline struct page *
page_is_saveable(struct zone *zone, unsigned long pfn)
{
- return is_highmem(zone) ?
- saveable_highmem_page(pfn) : saveable_page(pfn);
+ return is_identity_map(zone) ?
+ saveable_page(pfn) : savable_highmem_page(pfn);
}
static inline void
@@ -868,7 +868,7 @@ static int enough_free_mem(unsigned int
for_each_zone(zone) {
meta += snapshot_additional_pages(zone);
- if (!is_highmem(zone))
+ if (is_identity_map(zone))
free += zone_page_state(zone, NR_FREE_PAGES);
}
Index: devel-tree-2.6.20-mm2/kernel/power/swsusp.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/kernel/power/swsusp.c
+++ devel-tree-2.6.20-mm2/kernel/power/swsusp.c
@@ -229,7 +229,7 @@ int swsusp_shrink_memory(void)
size += highmem_size;
for_each_zone (zone)
if (populated_zone(zone)) {
- if (is_highmem(zone)) {
+ if (!is_identity_map(zone)) {
highmem_size -=
zone_page_state(zone, NR_FREE_PAGES);
} else {
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [3/16] define is_identity_mapped
2007-03-06 4:44 ` [RFC} memory unplug patchset prep [3/16] define is_identity_mapped KAMEZAWA Hiroyuki
@ 2007-03-06 15:55 ` David Rientjes
2007-03-07 1:48 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 34+ messages in thread
From: David Rientjes @ 2007-03-06 15:55 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
> Index: devel-tree-2.6.20-mm2/include/linux/mmzone.h
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/include/linux/mmzone.h
> +++ devel-tree-2.6.20-mm2/include/linux/mmzone.h
> @@ -523,6 +523,13 @@ static inline int is_normal_idx(enum zon
> return (idx == ZONE_NORMAL);
> }
>
> +static inline int is_identity_map_idx(enum zone_type idx)
> +{
> + if (is_configured_zone(ZONE_HIGHMEM))
> + return (idx < ZONE_HIGHMEM);
> + else
> + return 1;
> +}
> /**
> * is_highmem - helper function to quickly check if a struct zone is a
> * highmem zone or not. This is an attempt to keep references
> @@ -549,6 +556,14 @@ static inline int is_dma(struct zone *zo
> return zone == zone->zone_pgdat->node_zones + ZONE_DMA;
> }
>
> +static inline int is_identity_map(struct zone *zone)
> +{
> + if (is_configured_zone(ZONE_HIGHMEM)
> + return zone_idx(zone) < ZONE_HIGHMEM;
> + else
> + return 1;
> +}
> +
is_identity_map() isn't specific to any particular architecture nor is it
dependent on a configuration option. Since there's a missing ) in its
conditional, I'm wondering how this entire patch was ever tested.
> Index: devel-tree-2.6.20-mm2/include/linux/page-flags.h
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/include/linux/page-flags.h
> +++ devel-tree-2.6.20-mm2/include/linux/page-flags.h
> @@ -162,7 +162,7 @@ static inline void SetPageUptodate(struc
> #define __ClearPageSlab(page) __clear_bit(PG_slab, &(page)->flags)
>
> #ifdef CONFIG_HIGHMEM
> -#define PageHighMem(page) is_highmem(page_zone(page))
> +#define PageHighMem(page) (!is_identitiy_map(page_zone(page)))
> #else
> #define PageHighMem(page) 0 /* needed to optimize away at compile time */
> #endif
I assume this should be defined to !is_identity_map(page_zone(page)).
David
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [3/16] define is_identity_mapped
2007-03-06 15:55 ` David Rientjes
@ 2007-03-07 1:48 ` KAMEZAWA Hiroyuki
0 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-07 1:48 UTC (permalink / raw)
To: David Rientjes; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007 07:55:54 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:
> On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
>
> > Index: devel-tree-2.6.20-mm2/include/linux/mmzone.h
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/include/linux/mmzone.h
> > +++ devel-tree-2.6.20-mm2/include/linux/mmzone.h
> > @@ -523,6 +523,13 @@ static inline int is_normal_idx(enum zon
> > return (idx == ZONE_NORMAL);
> > }
> >
> > +static inline int is_identity_map_idx(enum zone_type idx)
> > +{
> > + if (is_configured_zone(ZONE_HIGHMEM))
> > + return (idx < ZONE_HIGHMEM);
> > + else
> > + return 1;
> > +}
> > /**
> > * is_highmem - helper function to quickly check if a struct zone is a
> > * highmem zone or not. This is an attempt to keep references
> > @@ -549,6 +556,14 @@ static inline int is_dma(struct zone *zo
> > return zone == zone->zone_pgdat->node_zones + ZONE_DMA;
> > }
> >
> > +static inline int is_identity_map(struct zone *zone)
> > +{
> > + if (is_configured_zone(ZONE_HIGHMEM)
> > + return zone_idx(zone) < ZONE_HIGHMEM;
> > + else
> > + return 1;
> > +}
> > +
>
> is_identity_map() isn't specific to any particular architecture nor is it
> dependent on a configuration option. Since there's a missing ) in its
> conditional, I'm wondering how this entire patch was ever tested.
>
I tested and my tree looks fine...maybe this is patch refresh miss...sorry.
> > Index: devel-tree-2.6.20-mm2/include/linux/page-flags.h
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/include/linux/page-flags.h
> > +++ devel-tree-2.6.20-mm2/include/linux/page-flags.h
> > @@ -162,7 +162,7 @@ static inline void SetPageUptodate(struc
> > #define __ClearPageSlab(page) __clear_bit(PG_slab, &(page)->flags)
> >
> > #ifdef CONFIG_HIGHMEM
> > -#define PageHighMem(page) is_highmem(page_zone(page))
> > +#define PageHighMem(page) (!is_identitiy_map(page_zone(page)))
> > #else
> > #define PageHighMem(page) 0 /* needed to optimize away at compile time */
> > #endif
>
> I assume this should be defined to !is_identity_map(page_zone(page)).
>
ok.
Thanks,
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread
* [RFC} memory unplug patchset prep [4/16] ZONE_MOVABLE
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (2 preceding siblings ...)
2007-03-06 4:44 ` [RFC} memory unplug patchset prep [3/16] define is_identity_mapped KAMEZAWA Hiroyuki
@ 2007-03-06 4:45 ` KAMEZAWA Hiroyuki
2007-03-06 16:06 ` David Rientjes
2007-03-06 4:47 ` [RFC} memory unplug patchset prep [5/16] GFP_MOVABLE KAMEZAWA Hiroyuki
` (12 subsequent siblings)
16 siblings, 1 reply; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:45 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Add ZONE_MOVABLE.
This zone is only used for migratable/reclaimable pages.
zone order is
[ZONE_DMA],
[ZONE_DMA32],
ZONE_NORMAL,
[ZONE_HIGHMEM],
[ZONE_MOVABLE],
MAX_NR_ZONES
if highmem is configured, movable zone is not identitiy-mapped.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/mmzone.h | 29 +++++++
mm/Kconfig | 4 +
mm/page_alloc.c | 180 ++++++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 211 insertions(+), 2 deletions(-)
Index: devel-tree-2.6.20-mm2/include/linux/mmzone.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/mmzone.h
+++ devel-tree-2.6.20-mm2/include/linux/mmzone.h
@@ -142,6 +142,16 @@ enum zone_type {
*/
ZONE_HIGHMEM,
#endif
+#ifdef CONFIG_ZONE_MOVABLE
+ /*
+ * This memory area is used only for migratable pages.
+ * We have a chance to hot-remove memory in this zone.
+ * Currently, anonymous memory and usual page cache etc. are included.
+ * if HIGHMEM is configured, MOVABLE zone is treated as
+ * not-direct-mapped-memory for kernel;.
+ */
+ ZONE_MOVABLE,
+#endif
MAX_NR_ZONES,
#ifndef CONFIG_ZONE_DMA
ZONE_DMA,
@@ -152,6 +162,9 @@ enum zone_type {
#ifndef CONFIG_HIGHMEM
ZONE_HIGHMEM,
#endif
+#ifndef CONFIG_ZONE_MOVABLE
+ ZONE_MOVABLE,
+#endif
MAX_POSSIBLE_ZONES
};
@@ -172,13 +185,18 @@ static inline int is_configured_zone(enu
* Count the active zones. Note that the use of defined(X) outside
* #if and family is not necessarily defined so ensure we cannot use
* it later. Use __ZONE_COUNT to work out how many shift bits we need.
+ *
+ * Assumes ZONE_DMA32,ZONE_HIGHMEM, ZONE_MOVABLE can't be configured at
+ * the same time.
*/
#define __ZONE_COUNT ( \
defined(CONFIG_ZONE_DMA) \
+ defined(CONFIG_ZONE_DMA32) \
+ 1 \
+ defined(CONFIG_HIGHMEM) \
+ + defined(CONFIG_ZONE_MOVABLE) \
)
+
#if __ZONE_COUNT < 2
#define ZONES_SHIFT 0
#elif __ZONE_COUNT <= 2
@@ -513,6 +531,11 @@ static inline int populated_zone(struct
return (!!zone->present_pages);
}
+static inline int is_movable_dix(enum zone_type idx)
+{
+ return (idx == ZONE_MOVABLE);
+}
+
static inline int is_highmem_idx(enum zone_type idx)
{
return (idx == ZONE_HIGHMEM);
@@ -536,6 +559,12 @@ static inline int is_identity_map_idx(en
* to ZONE_{DMA/NORMAL/HIGHMEM/etc} in general code to a minimum.
* @zone - pointer to struct zone variable
*/
+
+static inline int is_movable(struct zone *zone)
+{
+ return zone == zone->zone_pgdat->node_zones + ZONE_MOVABLE;
+}
+
static inline int is_highmem(struct zone *zone)
{
return zone == zone->zone_pgdat->node_zones + ZONE_HIGHMEM;
Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
+++ devel-tree-2.6.20-mm2/mm/page_alloc.c
@@ -82,6 +82,7 @@ static char name_dma[] = "DMA";
static char name_dma32[] = "DMA32";
static char name_normal[] = "Normal";
static char name_highmem[] = "Highmem";
+static char name_movable[] = "Movable";
static inline void __meminit zone_variables_init(void)
{
@@ -91,6 +92,7 @@ static inline void __meminit zone_variab
zone_names[ZONE_DMA32] = name_dma32;
zone_names[ZONE_NORMAL] = name_normal;
zone_names[ZONE_HIGHMEM] = name_highmem;
+ zone_names[ZONE_MOVABLE] = name_movable;
/* ZONE below NORAML has ratio 256 */
if (is_configured_zone(ZONE_DMA))
@@ -99,6 +101,8 @@ static inline void __meminit zone_variab
sysctl_lowmem_reserve_ratio[ZONE_DMA32] = 256;
if (is_configured_zone(ZONE_HIGHMEM))
sysctl_lowmem_reserve_ratio[ZONE_HIGHMEM] = 32;
+ if (is_configured_zone(ZONE_MOVABLE))
+ sysctl_lowmem_reserve_ratio[ZONE_MOVABLE] = 32;
}
int min_free_kbytes = 1024;
@@ -3065,11 +3069,17 @@ void __init free_area_init_nodes(unsigne
arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
for (i = 1; i < MAX_NR_ZONES; i++) {
+ if (i == ZONE_MOVABLE)
+ continue;
arch_zone_lowest_possible_pfn[i] =
arch_zone_highest_possible_pfn[i-1];
arch_zone_highest_possible_pfn[i] =
max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
}
+ if (is_configured_zone(ZONE_MOVABLE)) {
+ arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
+ arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
+ }
/* Print out the page size for debugging meminit problems */
printk(KERN_DEBUG "sizeof(struct page) = %zd\n", sizeof(struct page));
@@ -3097,6 +3107,7 @@ void __init free_area_init_nodes(unsigne
find_min_pfn_for_node(nid), NULL);
}
}
+
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
/**
Index: devel-tree-2.6.20-mm2/mm/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/Kconfig
+++ devel-tree-2.6.20-mm2/mm/Kconfig
@@ -163,6 +163,10 @@ config ZONE_DMA_FLAG
default "0" if !ZONE_DMA
default "1"
+config ZONE_MOVABLE
+ bool "Create zones for MOVABLE pages"
+ depends on ARCH_POPULATES_NODE_MAP
+ depends on MIGRATION
#
# Adaptive file readahead
#
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [4/16] ZONE_MOVABLE
2007-03-06 4:45 ` [RFC} memory unplug patchset prep [4/16] ZONE_MOVABLE KAMEZAWA Hiroyuki
@ 2007-03-06 16:06 ` David Rientjes
2007-03-07 1:51 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 34+ messages in thread
From: David Rientjes @ 2007-03-06 16:06 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
> Index: devel-tree-2.6.20-mm2/include/linux/mmzone.h
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/include/linux/mmzone.h
> +++ devel-tree-2.6.20-mm2/include/linux/mmzone.h
> @@ -142,6 +142,16 @@ enum zone_type {
> */
> ZONE_HIGHMEM,
> #endif
> +#ifdef CONFIG_ZONE_MOVABLE
> + /*
> + * This memory area is used only for migratable pages.
> + * We have a chance to hot-remove memory in this zone.
> + * Currently, anonymous memory and usual page cache etc. are included.
> + * if HIGHMEM is configured, MOVABLE zone is treated as
> + * not-direct-mapped-memory for kernel;.
> + */
> + ZONE_MOVABLE,
> +#endif
> MAX_NR_ZONES,
> #ifndef CONFIG_ZONE_DMA
> ZONE_DMA,
> @@ -152,6 +162,9 @@ enum zone_type {
> #ifndef CONFIG_HIGHMEM
> ZONE_HIGHMEM,
> #endif
> +#ifndef CONFIG_ZONE_MOVABLE
> + ZONE_MOVABLE,
> +#endif
> MAX_POSSIBLE_ZONES
> };
>
> @@ -172,13 +185,18 @@ static inline int is_configured_zone(enu
> * Count the active zones. Note that the use of defined(X) outside
> * #if and family is not necessarily defined so ensure we cannot use
> * it later. Use __ZONE_COUNT to work out how many shift bits we need.
> + *
> + * Assumes ZONE_DMA32,ZONE_HIGHMEM, ZONE_MOVABLE can't be configured at
> + * the same time.
> */
> #define __ZONE_COUNT ( \
> defined(CONFIG_ZONE_DMA) \
> + defined(CONFIG_ZONE_DMA32) \
> + 1 \
> + defined(CONFIG_HIGHMEM) \
> + + defined(CONFIG_ZONE_MOVABLE) \
> )
> +
> #if __ZONE_COUNT < 2
> #define ZONES_SHIFT 0
> #elif __ZONE_COUNT <= 2
> @@ -513,6 +531,11 @@ static inline int populated_zone(struct
> return (!!zone->present_pages);
> }
>
> +static inline int is_movable_dix(enum zone_type idx)
> +{
> + return (idx == ZONE_MOVABLE);
> +}
> +
Should be is_movable_idx() maybe? I assume this function is here for
completeness since it's never referenced in the patchset.
> Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
> +++ devel-tree-2.6.20-mm2/mm/page_alloc.c
> @@ -82,6 +82,7 @@ static char name_dma[] = "DMA";
> static char name_dma32[] = "DMA32";
> static char name_normal[] = "Normal";
> static char name_highmem[] = "Highmem";
> +static char name_movable[] = "Movable";
>
> static inline void __meminit zone_variables_init(void)
> {
> @@ -91,6 +92,7 @@ static inline void __meminit zone_variab
> zone_names[ZONE_DMA32] = name_dma32;
> zone_names[ZONE_NORMAL] = name_normal;
> zone_names[ZONE_HIGHMEM] = name_highmem;
> + zone_names[ZONE_MOVABLE] = name_movable;
>
> /* ZONE below NORAML has ratio 256 */
> if (is_configured_zone(ZONE_DMA))
> @@ -99,6 +101,8 @@ static inline void __meminit zone_variab
> sysctl_lowmem_reserve_ratio[ZONE_DMA32] = 256;
> if (is_configured_zone(ZONE_HIGHMEM))
> sysctl_lowmem_reserve_ratio[ZONE_HIGHMEM] = 32;
> + if (is_configured_zone(ZONE_MOVABLE))
> + sysctl_lowmem_reserve_ratio[ZONE_MOVABLE] = 32;
> }
>
> int min_free_kbytes = 1024;
> @@ -3065,11 +3069,17 @@ void __init free_area_init_nodes(unsigne
> arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
> arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
> for (i = 1; i < MAX_NR_ZONES; i++) {
> + if (i == ZONE_MOVABLE)
> + continue;
> arch_zone_lowest_possible_pfn[i] =
> arch_zone_highest_possible_pfn[i-1];
> arch_zone_highest_possible_pfn[i] =
> max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
> }
> + if (is_configured_zone(ZONE_MOVABLE)) {
> + arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
> + arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
> + }
>
> /* Print out the page size for debugging meminit problems */
> printk(KERN_DEBUG "sizeof(struct page) = %zd\n", sizeof(struct page));
Aren't the arch_zone_{lowest|highest}_possible_pfn's for ZONE_MOVABLE
already at 0? If not, it should definitely be memset early on to avoid
any possible assignment mistakes amongst all these conditionals.
> Index: devel-tree-2.6.20-mm2/mm/Kconfig
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/mm/Kconfig
> +++ devel-tree-2.6.20-mm2/mm/Kconfig
> @@ -163,6 +163,10 @@ config ZONE_DMA_FLAG
> default "0" if !ZONE_DMA
> default "1"
>
> +config ZONE_MOVABLE
> + bool "Create zones for MOVABLE pages"
> + depends on ARCH_POPULATES_NODE_MAP
> + depends on MIGRATION
> #
> # Adaptive file readahead
> #
>
This patchset is heavily dependent on Mel Gorman's work with ZONE_MOVABLE
so perhaps it would be better to base it off of the latest -mm with his
patchset applied? And if CONFIG_ZONE_MOVABLE wasn't documented in Kconfig
prior to this, it might be a good opportunity to do so if you're going to
get community adoption.
David
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [4/16] ZONE_MOVABLE
2007-03-06 16:06 ` David Rientjes
@ 2007-03-07 1:51 ` KAMEZAWA Hiroyuki
0 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-07 1:51 UTC (permalink / raw)
To: David Rientjes; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007 08:06:33 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:
> > +static inline int is_movable_dix(enum zone_type idx)
> > +{
> > + return (idx == ZONE_MOVABLE);
> > +}
> > +
>
> Should be is_movable_idx() maybe? I assume this function is here for
> completeness since it's never referenced in the patchset.
>
yes.. this is never called. I may drop this function.
> > Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
> > +++ devel-tree-2.6.20-mm2/mm/page_alloc.c
> > @@ -82,6 +82,7 @@ static char name_dma[] = "DMA";
> > static char name_dma32[] = "DMA32";
> > static char name_normal[] = "Normal";
> > static char name_highmem[] = "Highmem";
> > +static char name_movable[] = "Movable";
> >
> > static inline void __meminit zone_variables_init(void)
> > {
> > @@ -91,6 +92,7 @@ static inline void __meminit zone_variab
> > zone_names[ZONE_DMA32] = name_dma32;
> > zone_names[ZONE_NORMAL] = name_normal;
> > zone_names[ZONE_HIGHMEM] = name_highmem;
> > + zone_names[ZONE_MOVABLE] = name_movable;
> >
> > /* ZONE below NORAML has ratio 256 */
> > if (is_configured_zone(ZONE_DMA))
> > @@ -99,6 +101,8 @@ static inline void __meminit zone_variab
> > sysctl_lowmem_reserve_ratio[ZONE_DMA32] = 256;
> > if (is_configured_zone(ZONE_HIGHMEM))
> > sysctl_lowmem_reserve_ratio[ZONE_HIGHMEM] = 32;
> > + if (is_configured_zone(ZONE_MOVABLE))
> > + sysctl_lowmem_reserve_ratio[ZONE_MOVABLE] = 32;
> > }
> >
> > int min_free_kbytes = 1024;
> > @@ -3065,11 +3069,17 @@ void __init free_area_init_nodes(unsigne
> > arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
> > arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
> > for (i = 1; i < MAX_NR_ZONES; i++) {
> > + if (i == ZONE_MOVABLE)
> > + continue;
> > arch_zone_lowest_possible_pfn[i] =
> > arch_zone_highest_possible_pfn[i-1];
> > arch_zone_highest_possible_pfn[i] =
> > max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
> > }
> > + if (is_configured_zone(ZONE_MOVABLE)) {
> > + arch_zone_lowest_possible_pfn[ZONE_MOVABLE] = 0;
> > + arch_zone_highest_possible_pfn[ZONE_MOVABLE] = 0;
> > + }
> >
> > /* Print out the page size for debugging meminit problems */
> > printk(KERN_DEBUG "sizeof(struct page) = %zd\n", sizeof(struct page));
>
> Aren't the arch_zone_{lowest|highest}_possible_pfn's for ZONE_MOVABLE
> already at 0? If not, it should definitely be memset early on to avoid
> any possible assignment mistakes amongst all these conditionals.
>
ok.
> > Index: devel-tree-2.6.20-mm2/mm/Kconfig
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/mm/Kconfig
> > +++ devel-tree-2.6.20-mm2/mm/Kconfig
> > @@ -163,6 +163,10 @@ config ZONE_DMA_FLAG
> > default "0" if !ZONE_DMA
> > default "1"
> >
> > +config ZONE_MOVABLE
> > + bool "Create zones for MOVABLE pages"
> > + depends on ARCH_POPULATES_NODE_MAP
> > + depends on MIGRATION
> > #
> > # Adaptive file readahead
> > #
> >
>
> This patchset is heavily dependent on Mel Gorman's work with ZONE_MOVABLE
> so perhaps it would be better to base it off of the latest -mm with his
> patchset applied? And if CONFIG_ZONE_MOVABLE wasn't documented in Kconfig
> prior to this, it might be a good opportunity to do so if you're going to
> get community adoption.
>
I'm very glad to see the latest -mm including Mel's.
And yes. I'll use his patch set.
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread
* [RFC} memory unplug patchset prep [5/16] GFP_MOVABLE
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (3 preceding siblings ...)
2007-03-06 4:45 ` [RFC} memory unplug patchset prep [4/16] ZONE_MOVABLE KAMEZAWA Hiroyuki
@ 2007-03-06 4:47 ` KAMEZAWA Hiroyuki
2007-03-06 4:48 ` [RFC} memory unplug patchset prep [6/16] alloc_zeroed_user_high_movable KAMEZAWA Hiroyuki
` (11 subsequent siblings)
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:47 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Add GFP_MOVABLE support
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/gfp.h | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
Index: devel-tree-2.6.20-mm2/include/linux/gfp.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/gfp.h
+++ devel-tree-2.6.20-mm2/include/linux/gfp.h
@@ -46,6 +46,7 @@ struct vm_area_struct;
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
+#define __GFP_MOVABLE ((__force gfp_t)0x80000u) /* Movable page allocation */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -54,7 +55,7 @@ struct vm_area_struct;
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
- __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
+ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE | __GFP_MOVABLE)
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
@@ -66,6 +67,8 @@ struct vm_area_struct;
#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
__GFP_HIGHMEM)
+#define GFP_HIGH_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
+ __GFP_HIGHMEM | __GFP_MOVABLE)
#ifdef CONFIG_NUMA
#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
@@ -93,6 +96,10 @@ static inline enum zone_type gfp_zone(gf
if (flags & __GFP_DMA32)
return ZONE_DMA32;
#endif
+#ifdef CONFIG_ZONE_MOVABLE
+ if (flags & __GFP_MOVABLE) /* we can try to alloc movable pages. */
+ return ZONE_MOVABLE;
+#endif
#ifdef CONFIG_HIGHMEM
if (flags & __GFP_HIGHMEM)
return ZONE_HIGHMEM;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* [RFC} memory unplug patchset prep [6/16] alloc_zeroed_user_high_movable
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (4 preceding siblings ...)
2007-03-06 4:47 ` [RFC} memory unplug patchset prep [5/16] GFP_MOVABLE KAMEZAWA Hiroyuki
@ 2007-03-06 4:48 ` KAMEZAWA Hiroyuki
2007-03-06 4:49 ` [RFC} memory unplug patchset prep [7/16] change caller's gfp_mask KAMEZAWA Hiroyuki
` (10 subsequent siblings)
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:48 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Add function alloc_zeroed_user_highmovable() for allocating user pages
from ZONE_MOVABLE.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/highmem.h | 25 +++++++++++++++++++++----
1 file changed, 21 insertions(+), 4 deletions(-)
Index: devel-tree-2.6.20-mm2/include/linux/highmem.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/highmem.h
+++ devel-tree-2.6.20-mm2/include/linux/highmem.h
@@ -66,10 +66,11 @@ static inline void clear_user_highpage(s
#ifdef CONFIG_ARCH_HAS_PREZERO_USERPAGE
static inline struct page *
-alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
+__alloc_zeroed_user_highpage(gfp_t movable,
+ struct vm_area_struct *vma, unsigned long vaddr)
{
struct page *page;
- page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr);
+ page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movable, vma, vaddr);
if (page)
flush_user_newzeropage(page);
return page;
@@ -77,9 +78,10 @@ alloc_zeroed_user_highpage(struct vm_are
#else
static inline struct page *
-alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
+__alloc_zeroed_user_highpage(gfp_t movable,
+ struct vm_area_struct *vma, unsigned long vaddr)
{
- struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr);
+ struct page *page = alloc_page_vma(GFP_HIGHUSER | movable, vma, vaddr);
if (page)
clear_user_highpage(page, vaddr);
@@ -88,6 +90,21 @@ alloc_zeroed_user_highpage(struct vm_are
}
#endif
+
+static inline struct page *
+alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
+{
+ return __alloc_zeroed_user_highpage(0, vma, vaddr);
+}
+
+static inline struct page *
+alloc_zeroed_user_highmovable(struct vm_area_struct *vma, unsigned long vaddr)
+{
+ return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr);
+}
+
+
+
static inline void clear_highpage(struct page *page)
{
void *kaddr = kmap_atomic(page, KM_USER0);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* [RFC} memory unplug patchset prep [7/16] change caller's gfp_mask
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (5 preceding siblings ...)
2007-03-06 4:48 ` [RFC} memory unplug patchset prep [6/16] alloc_zeroed_user_high_movable KAMEZAWA Hiroyuki
@ 2007-03-06 4:49 ` KAMEZAWA Hiroyuki
2007-03-06 4:50 ` [RFC} memory unplug patchset prep [8/16] counter for ZONE_MOVABLE KAMEZAWA Hiroyuki
` (9 subsequent siblings)
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:49 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Changes callers of GFP_HIGHUSER to use GFP_HIGH_MOVABLE if it can
some of alloc_zeroed_user_highpage are changed to
alloc_zeroed_user_high_movable.
I think I need more study in this area.
Signed-Off-By: KAMEZAWA Hiruyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
fs/inode.c | 6 +++++-
fs/namei.c | 1 +
fs/ramfs/inode.c | 1 +
mm/filemap.c | 2 +-
mm/memory.c | 8 ++++----
mm/mempolicy.c | 4 ++--
mm/migrate.c | 2 +-
mm/shmem.c | 5 ++++-
mm/swap_prefetch.c | 2 +-
mm/swap_state.c | 2 +-
10 files changed, 21 insertions(+), 12 deletions(-)
Index: devel-tree-2.6.20-mm2/fs/inode.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/fs/inode.c
+++ devel-tree-2.6.20-mm2/fs/inode.c
@@ -145,7 +145,7 @@ static struct inode *alloc_inode(struct
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
+ mapping_set_gfp_mask(mapping, GFP_HIGH_MOVABLE);
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
@@ -522,6 +522,10 @@ repeat:
* @sb: superblock
*
* Allocates a new inode for given superblock.
+ * Newly allocated inode's gfp_flag is set to GFP_HIGH_MOVABLE(default).
+ * If fs doesn't support page migration, is hould be overriden
+ * by GFP_HIGHUSER.
+ * mapping_set_gfp_mask() can be used for this purpose.
*/
struct inode *new_inode(struct super_block *sb)
{
Index: devel-tree-2.6.20-mm2/fs/ramfs/inode.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/fs/ramfs/inode.c
+++ devel-tree-2.6.20-mm2/fs/ramfs/inode.c
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct sup
inode->i_blocks = 0;
inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch (mode & S_IFMT) {
default:
Index: devel-tree-2.6.20-mm2/mm/memory.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/memory.c
+++ devel-tree-2.6.20-mm2/mm/memory.c
@@ -1761,11 +1761,11 @@ gotten:
if (unlikely(anon_vma_prepare(vma)))
goto oom;
if (old_page == ZERO_PAGE(address)) {
- new_page = alloc_zeroed_user_highpage(vma, address);
+ new_page = alloc_zeroed_user_highmovable(vma, address);
if (!new_page)
goto oom;
} else {
- new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+ new_page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, address);
if (!new_page)
goto oom;
cow_user_page(new_page, old_page, address, vma);
@@ -2283,7 +2283,7 @@ static int do_anonymous_page(struct mm_s
if (unlikely(anon_vma_prepare(vma)))
goto oom;
- page = alloc_zeroed_user_highpage(vma, address);
+ page = alloc_zeroed_user_highmovable(vma, address);
if (!page)
goto oom;
@@ -2384,7 +2384,7 @@ retry:
if (unlikely(anon_vma_prepare(vma)))
goto oom;
- page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+ page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, address);
if (!page)
goto oom;
copy_user_highpage(page, new_page, address, vma);
Index: devel-tree-2.6.20-mm2/mm/mempolicy.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/mempolicy.c
+++ devel-tree-2.6.20-mm2/mm/mempolicy.c
@@ -603,7 +603,7 @@ static void migrate_page_add(struct page
static struct page *new_node_page(struct page *page, unsigned long node, int **x)
{
- return alloc_pages_node(node, GFP_HIGHUSER, 0);
+ return alloc_pages_node(node, GFP_HIGH_MOVABLE, 0);
}
/*
@@ -719,7 +719,7 @@ static struct page *new_vma_page(struct
{
struct vm_area_struct *vma = (struct vm_area_struct *)private;
- return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma));
+ return alloc_page_vma(GFP_HIGH_MOVABLE, vma, page_address_in_vma(page, vma));
}
#else
Index: devel-tree-2.6.20-mm2/mm/migrate.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/migrate.c
+++ devel-tree-2.6.20-mm2/mm/migrate.c
@@ -755,7 +755,7 @@ static struct page *new_page_node(struct
*result = &pm->status;
- return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0);
+ return alloc_pages_node(pm->node, GFP_HIGH_MOVABLE | GFP_THISNODE, 0);
}
/*
Index: devel-tree-2.6.20-mm2/mm/shmem.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/shmem.c
+++ devel-tree-2.6.20-mm2/mm/shmem.c
@@ -93,8 +93,11 @@ static inline struct page *shmem_dir_all
* The above definition of ENTRIES_PER_PAGE, and the use of
* BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
* might be reconsidered if it ever diverges from PAGE_SIZE.
+ *
+ * shmem's dir is not movable page.
*/
- return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
+ return alloc_pages(gfp_mask & ~__GFP_MOVABLE,
+ PAGE_CACHE_SHIFT-PAGE_SHIFT);
}
static inline void shmem_dir_free(struct page *page)
Index: devel-tree-2.6.20-mm2/mm/swap_prefetch.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/swap_prefetch.c
+++ devel-tree-2.6.20-mm2/mm/swap_prefetch.c
@@ -204,7 +204,7 @@ static enum trickle_return trickle_swap_
* Get a new page to read from swap. We have already checked the
* watermarks so __alloc_pages will not call on reclaim.
*/
- page = alloc_pages_node(node, GFP_HIGHUSER & ~__GFP_WAIT, 0);
+ page = alloc_pages_node(node, GFP_HIGH_MOVABLE & ~__GFP_WAIT, 0);
if (unlikely(!page)) {
ret = TRICKLE_DELAY;
goto out;
Index: devel-tree-2.6.20-mm2/mm/swap_state.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/swap_state.c
+++ devel-tree-2.6.20-mm2/mm/swap_state.c
@@ -340,7 +340,7 @@ struct page *read_swap_cache_async(swp_e
* Get a new page to read into from swap.
*/
if (!new_page) {
- new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+ new_page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, addr);
if (!new_page)
break; /* Out of memory */
}
Index: devel-tree-2.6.20-mm2/fs/namei.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/fs/namei.c
+++ devel-tree-2.6.20-mm2/fs/namei.c
@@ -2691,6 +2691,7 @@ int __page_symlink(struct inode *inode,
int err;
char *kaddr;
+ gfp_mask &= ~(__GFP_MOVABLE);
retry:
err = -ENOMEM;
page = find_or_create_page(mapping, 0, gfp_mask);
Index: devel-tree-2.6.20-mm2/mm/filemap.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/filemap.c
+++ devel-tree-2.6.20-mm2/mm/filemap.c
@@ -423,7 +423,7 @@ int filemap_write_and_wait_range(struct
int add_to_page_cache(struct page *page, struct address_space *mapping,
pgoff_t offset, gfp_t gfp_mask)
{
- int error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+ int error = radix_tree_preload(gfp_mask & ~(__GFP_HIGHMEM | __GFP_MOVABLE));
if (error == 0) {
write_lock_irq(&mapping->tree_lock);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* [RFC} memory unplug patchset prep [8/16] counter for ZONE_MOVABLE
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (6 preceding siblings ...)
2007-03-06 4:49 ` [RFC} memory unplug patchset prep [7/16] change caller's gfp_mask KAMEZAWA Hiroyuki
@ 2007-03-06 4:50 ` KAMEZAWA Hiroyuki
2007-03-06 16:11 ` David Rientjes
2007-03-06 4:52 ` [RFC} memory unplug patchset prep [9/16] create movable zone at boot KAMEZAWA Hiroyuki
` (8 subsequent siblings)
16 siblings, 1 reply; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:50 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Show #of Movable pages and vmstat.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
fs/proc/proc_misc.c | 8 ++++++++
include/linux/kernel.h | 2 ++
include/linux/vmstat.h | 8 +++++++-
mm/page_alloc.c | 28 +++++++++++++++++++++++++++-
mm/vmstat.c | 8 +++++++-
5 files changed, 51 insertions(+), 3 deletions(-)
Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
+++ devel-tree-2.6.20-mm2/mm/page_alloc.c
@@ -58,6 +58,7 @@ unsigned long totalram_pages __read_most
unsigned long totalreserve_pages __read_mostly;
long nr_swap_pages;
int percpu_pagelist_fraction;
+unsigned long total_movable_pages __read_mostly;
static void __free_pages_ok(struct page *page, unsigned int order);
@@ -1571,6 +1572,20 @@ static unsigned int nr_free_zone_pages(i
return sum;
}
+unsigned int nr_free_movable_pages(void)
+{
+ unsigned long nr_pages = 0;
+ struct zone *zone;
+ int nid;
+ if (is_configured_zone(ZONE_MOVABLE)) {
+ /* we want to count *only* pages in movable zone */
+ for_each_online_node(nid) {
+ zone = &(NODE_DATA(nid)->node_zones[ZONE_MOVABLE]);
+ nr_pages += zone_page_state(zone, NR_FREE_PAGES);
+ }
+ }
+ return nr_pages;
+}
/*
* Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
*/
@@ -1584,7 +1599,7 @@ unsigned int nr_free_buffer_pages(void)
*/
unsigned int nr_free_pagecache_pages(void)
{
- return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
+ return nr_free_zone_pages(gfp_zone(GFP_HIGH_MOVABLE));
}
/*
@@ -1633,6 +1648,8 @@ void si_meminfo(struct sysinfo *val)
val->totalhigh = totalhigh_pages;
val->freehigh = nr_free_highpages();
val->mem_unit = PAGE_SIZE;
+ val->movable = total_movable_pages;
+ val->free_movable = nr_free_movable_pages();
}
EXPORT_SYMBOL(si_meminfo);
@@ -1654,6 +1671,13 @@ void si_meminfo_node(struct sysinfo *val
val->totalhigh = 0;
val->freehigh = 0;
}
+ if (is_configured_zone(ZONE_MOVABLE)) {
+ val->movable +=
+ pgdat->node_zones[ZONE_MOVABLE].present_pages;
+ val->free_movable +=
+ zone_page_state(&pgdat->node_zones[ZONE_MOVABLE],
+ NR_FREE_PAGES);
+ }
val->mem_unit = PAGE_SIZE;
}
#endif
@@ -2779,6 +2803,8 @@ static void __meminit free_area_init_cor
zone->spanned_pages = size;
zone->present_pages = realsize;
+ if (j == ZONE_MOVABLE)
+ total_movable_pages += realsize;
#ifdef CONFIG_NUMA
zone->node = nid;
zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
Index: devel-tree-2.6.20-mm2/include/linux/kernel.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/kernel.h
+++ devel-tree-2.6.20-mm2/include/linux/kernel.h
@@ -329,6 +329,8 @@ struct sysinfo {
unsigned short pad; /* explicit padding for m68k */
unsigned long totalhigh; /* Total high memory size */
unsigned long freehigh; /* Available high memory size */
+ unsigned long movable; /* pages used only for data */
+ unsigned long free_movable; /* Avaiable pages in movable */
unsigned int mem_unit; /* Memory unit size in bytes */
char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */
};
Index: devel-tree-2.6.20-mm2/fs/proc/proc_misc.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/fs/proc/proc_misc.c
+++ devel-tree-2.6.20-mm2/fs/proc/proc_misc.c
@@ -160,6 +160,10 @@ static int meminfo_read_proc(char *page,
"LowTotal: %8lu kB\n"
"LowFree: %8lu kB\n"
#endif
+#ifdef CONFIG_ZONE_MOVABLE
+ "MovableTotal: %8lu kB\n"
+ "MovableFree: %8lu kB\n"
+#endif
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n"
"Dirty: %8lu kB\n"
@@ -191,6 +195,10 @@ static int meminfo_read_proc(char *page,
K(i.totalram-i.totalhigh),
K(i.freeram-i.freehigh),
#endif
+#ifdef CONFIG_ZONE_MOVABLE
+ K(i.movable),
+ K(i.free_movable),
+#endif
K(i.totalswap),
K(i.freeswap),
K(global_page_state(NR_FILE_DIRTY)),
Index: devel-tree-2.6.20-mm2/include/linux/vmstat.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/vmstat.h
+++ devel-tree-2.6.20-mm2/include/linux/vmstat.h
@@ -25,7 +25,13 @@
#define HIGHMEM_ZONE(xx)
#endif
-#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx)
+#ifdef CONFIG_ZONE_MOVABLE
+#define MOVABLE_ZONE(xx) , xx##_MOVABLE
+#else
+#define MOVABLE_ZONE(xx)
+#endif
+
+#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) MOVABLE_ZONE(xx)
enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
FOR_ALL_ZONES(PGALLOC),
Index: devel-tree-2.6.20-mm2/mm/vmstat.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/vmstat.c
+++ devel-tree-2.6.20-mm2/mm/vmstat.c
@@ -426,8 +426,14 @@ const struct seq_operations fragmentatio
#define TEXT_FOR_HIGHMEM(xx)
#endif
+#ifdef CONFIG_ZONE_MOVABLE
+#define TEXT_FOR_MOVABLE(xx) xx "_movable",
+#else
+#define TXT_FOR_MOVABLE(xx)
+#endif
+
#define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
- TEXT_FOR_HIGHMEM(xx)
+ TEXT_FOR_HIGHMEM(xx) TEXT_FOR_MOVABLE(xx)
static const char * const vmstat_text[] = {
/* Zoned VM counters */
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [8/16] counter for ZONE_MOVABLE
2007-03-06 4:50 ` [RFC} memory unplug patchset prep [8/16] counter for ZONE_MOVABLE KAMEZAWA Hiroyuki
@ 2007-03-06 16:11 ` David Rientjes
2007-03-07 1:55 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 34+ messages in thread
From: David Rientjes @ 2007-03-06 16:11 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
> Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
> +++ devel-tree-2.6.20-mm2/mm/page_alloc.c
> @@ -58,6 +58,7 @@ unsigned long totalram_pages __read_most
> unsigned long totalreserve_pages __read_mostly;
> long nr_swap_pages;
> int percpu_pagelist_fraction;
> +unsigned long total_movable_pages __read_mostly;
>
> static void __free_pages_ok(struct page *page, unsigned int order);
>
> @@ -1571,6 +1572,20 @@ static unsigned int nr_free_zone_pages(i
> return sum;
> }
>
> +unsigned int nr_free_movable_pages(void)
> +{
> + unsigned long nr_pages = 0;
> + struct zone *zone;
> + int nid;
> + if (is_configured_zone(ZONE_MOVABLE)) {
> + /* we want to count *only* pages in movable zone */
> + for_each_online_node(nid) {
> + zone = &(NODE_DATA(nid)->node_zones[ZONE_MOVABLE]);
> + nr_pages += zone_page_state(zone, NR_FREE_PAGES);
> + }
> + }
> + return nr_pages;
> +}
> /*
> * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
> */
On each online node, zone should be
zone = NODE_DATA(nid)->node_sizes + ZONE_MOVABLE;
Also, you should probably only declare this function on #ifdef
CONFIG_ZONE_MOVABLE and #define it to "do {} while(0)" otherwise.
> @@ -1584,7 +1599,7 @@ unsigned int nr_free_buffer_pages(void)
> */
> unsigned int nr_free_pagecache_pages(void)
> {
> - return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
> + return nr_free_zone_pages(gfp_zone(GFP_HIGH_MOVABLE));
> }
>
> /*
> @@ -1633,6 +1648,8 @@ void si_meminfo(struct sysinfo *val)
> val->totalhigh = totalhigh_pages;
> val->freehigh = nr_free_highpages();
> val->mem_unit = PAGE_SIZE;
> + val->movable = total_movable_pages;
> + val->free_movable = nr_free_movable_pages();
> }
>
> EXPORT_SYMBOL(si_meminfo);
> @@ -1654,6 +1671,13 @@ void si_meminfo_node(struct sysinfo *val
> val->totalhigh = 0;
> val->freehigh = 0;
> }
> + if (is_configured_zone(ZONE_MOVABLE)) {
> + val->movable +=
> + pgdat->node_zones[ZONE_MOVABLE].present_pages;
> + val->free_movable +=
> + zone_page_state(&pgdat->node_zones[ZONE_MOVABLE],
> + NR_FREE_PAGES);
> + }
> val->mem_unit = PAGE_SIZE;
> }
> #endif
Don't you want assignments here instead of accumulations? val->movable
and val->free_movable probably shouldn't be the only members in
si_meminfo_node() that accumulate.
Your first patch in this patchset actually sets val->totalhigh and
val->freehigh both to 0 in the !is_configured_zone(ZONE_HIGHMEM) case. Do
these need the same assignments for movable and free_movable in the
!is_configured_zone(ZONE_MOVABLE) case?
> Index: devel-tree-2.6.20-mm2/include/linux/kernel.h
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/include/linux/kernel.h
> +++ devel-tree-2.6.20-mm2/include/linux/kernel.h
> @@ -329,6 +329,8 @@ struct sysinfo {
> unsigned short pad; /* explicit padding for m68k */
> unsigned long totalhigh; /* Total high memory size */
> unsigned long freehigh; /* Available high memory size */
> + unsigned long movable; /* pages used only for data */
> + unsigned long free_movable; /* Avaiable pages in movable */
> unsigned int mem_unit; /* Memory unit size in bytes */
> char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */
> };
Please add #ifdef's to CONFIG_ZONE_MOVABLE around these members in struct
sysinfo so we incur no penalty if we choose not to enable this option.
> Index: devel-tree-2.6.20-mm2/mm/vmstat.c
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/mm/vmstat.c
> +++ devel-tree-2.6.20-mm2/mm/vmstat.c
> @@ -426,8 +426,14 @@ const struct seq_operations fragmentatio
> #define TEXT_FOR_HIGHMEM(xx)
> #endif
>
> +#ifdef CONFIG_ZONE_MOVABLE
> +#define TEXT_FOR_MOVABLE(xx) xx "_movable",
> +#else
> +#define TXT_FOR_MOVABLE(xx)
> +#endif
> +
> #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
> - TEXT_FOR_HIGHMEM(xx)
> + TEXT_FOR_HIGHMEM(xx) TEXT_FOR_MOVABLE(xx)
>
> static const char * const vmstat_text[] = {
> /* Zoned VM counters */
>
This broke my build because TEXT_FOR_MOVABLE() is misspelled on
!CONFIG_ZONE_MOVABLE.
David
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [8/16] counter for ZONE_MOVABLE
2007-03-06 16:11 ` David Rientjes
@ 2007-03-07 1:55 ` KAMEZAWA Hiroyuki
0 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-07 1:55 UTC (permalink / raw)
To: David Rientjes; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007 08:11:22 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:
> On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
>
> > Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
> > +++ devel-tree-2.6.20-mm2/mm/page_alloc.c
> > @@ -58,6 +58,7 @@ unsigned long totalram_pages __read_most
> > unsigned long totalreserve_pages __read_mostly;
> > long nr_swap_pages;
> > int percpu_pagelist_fraction;
> > +unsigned long total_movable_pages __read_mostly;
> >
> > static void __free_pages_ok(struct page *page, unsigned int order);
> >
> > @@ -1571,6 +1572,20 @@ static unsigned int nr_free_zone_pages(i
> > return sum;
> > }
> >
> > +unsigned int nr_free_movable_pages(void)
> > +{
> > + unsigned long nr_pages = 0;
> > + struct zone *zone;
> > + int nid;
> > + if (is_configured_zone(ZONE_MOVABLE)) {
> > + /* we want to count *only* pages in movable zone */
> > + for_each_online_node(nid) {
> > + zone = &(NODE_DATA(nid)->node_zones[ZONE_MOVABLE]);
> > + nr_pages += zone_page_state(zone, NR_FREE_PAGES);
> > + }
> > + }
> > + return nr_pages;
> > +}
> > /*
> > * Amount of free RAM allocatable within ZONE_DMA and ZONE_NORMAL
> > */
>
> On each online node, zone should be
>
> zone = NODE_DATA(nid)->node_sizes + ZONE_MOVABLE;
>
> Also, you should probably only declare this function on #ifdef
> CONFIG_ZONE_MOVABLE and #define it to "do {} while(0)" otherwise.
>
is_configure_zone() does enough work. (But I'll move to the latest -mm.)
> > @@ -1584,7 +1599,7 @@ unsigned int nr_free_buffer_pages(void)
> > */
> > unsigned int nr_free_pagecache_pages(void)
> > {
> > - return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
> > + return nr_free_zone_pages(gfp_zone(GFP_HIGH_MOVABLE));
> > }
> >
> > /*
> > @@ -1633,6 +1648,8 @@ void si_meminfo(struct sysinfo *val)
> > val->totalhigh = totalhigh_pages;
> > val->freehigh = nr_free_highpages();
> > val->mem_unit = PAGE_SIZE;
> > + val->movable = total_movable_pages;
> > + val->free_movable = nr_free_movable_pages();
> > }
> >
> > EXPORT_SYMBOL(si_meminfo);
> > @@ -1654,6 +1671,13 @@ void si_meminfo_node(struct sysinfo *val
> > val->totalhigh = 0;
> > val->freehigh = 0;
> > }
> > + if (is_configured_zone(ZONE_MOVABLE)) {
> > + val->movable +=
> > + pgdat->node_zones[ZONE_MOVABLE].present_pages;
> > + val->free_movable +=
> > + zone_page_state(&pgdat->node_zones[ZONE_MOVABLE],
> > + NR_FREE_PAGES);
> > + }
> > val->mem_unit = PAGE_SIZE;
> > }
> > #endif
>
> Don't you want assignments here instead of accumulations? val->movable
> and val->free_movable probably shouldn't be the only members in
> si_meminfo_node() that accumulate.
>
> Your first patch in this patchset actually sets val->totalhigh and
> val->freehigh both to 0 in the !is_configured_zone(ZONE_HIGHMEM) case. Do
> these need the same assignments for movable and free_movable in the
> !is_configured_zone(ZONE_MOVABLE) case?
>
> > Index: devel-tree-2.6.20-mm2/include/linux/kernel.h
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/include/linux/kernel.h
> > +++ devel-tree-2.6.20-mm2/include/linux/kernel.h
> > @@ -329,6 +329,8 @@ struct sysinfo {
> > unsigned short pad; /* explicit padding for m68k */
> > unsigned long totalhigh; /* Total high memory size */
> > unsigned long freehigh; /* Available high memory size */
> > + unsigned long movable; /* pages used only for data */
> > + unsigned long free_movable; /* Avaiable pages in movable */
> > unsigned int mem_unit; /* Memory unit size in bytes */
> > char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */
> > };
>
> Please add #ifdef's to CONFIG_ZONE_MOVABLE around these members in struct
> sysinfo so we incur no penalty if we choose not to enable this option.
>
I just do this becasue highmem is not covered by CONFIG_HIGHMEM.
> > Index: devel-tree-2.6.20-mm2/mm/vmstat.c
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/mm/vmstat.c
> > +++ devel-tree-2.6.20-mm2/mm/vmstat.c
> > @@ -426,8 +426,14 @@ const struct seq_operations fragmentatio
> > #define TEXT_FOR_HIGHMEM(xx)
> > #endif
> >
> > +#ifdef CONFIG_ZONE_MOVABLE
> > +#define TEXT_FOR_MOVABLE(xx) xx "_movable",
> > +#else
> > +#define TXT_FOR_MOVABLE(xx)
> > +#endif
> > +
> > #define TEXTS_FOR_ZONES(xx) TEXT_FOR_DMA(xx) TEXT_FOR_DMA32(xx) xx "_normal", \
> > - TEXT_FOR_HIGHMEM(xx)
> > + TEXT_FOR_HIGHMEM(xx) TEXT_FOR_MOVABLE(xx)
> >
> > static const char * const vmstat_text[] = {
> > /* Zoned VM counters */
> >
>
> This broke my build because TEXT_FOR_MOVABLE() is misspelled on
> !CONFIG_ZONE_MOVABLE.
>
Ah.. sigh. ok. I'll do better test in the next time.
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread
* [RFC} memory unplug patchset prep [9/16] create movable zone at boot
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (7 preceding siblings ...)
2007-03-06 4:50 ` [RFC} memory unplug patchset prep [8/16] counter for ZONE_MOVABLE KAMEZAWA Hiroyuki
@ 2007-03-06 4:52 ` KAMEZAWA Hiroyuki
2007-03-06 16:06 ` David Rientjes
2007-03-06 4:53 ` [RFC} memory unplug patchset prep [10/16] ia64 support KAMEZAWA Hiroyuki
` (7 subsequent siblings)
16 siblings, 1 reply; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:52 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
This patch adds codes for creating movable zones.
Add 2 kernel paramers.
- kernel_core_pages=XXX[KMG]
- kernel_core_ratio=xx
When kernel_core_pages is specified, create zone(s) for not-movable pages
from lower address and make the amount of it to specified size.
Maybe good for non-NUMA environment and node-hot-remove.
When kernel_core_ratio is specified, create zone(s) for not-movable pages
on each node. The amount of not-movable-zone is calucated as
memory_on_node * kernel_core_ratio/100.
Maybe good for NUMA environment and just want to use MOVABLE zone.
Note:
Changes to zone_spanned_pages_in_node()/absent_pages_in_node() looks ugly...
And, this boot option is just a sample. I'll change this when I find a better
way to go.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
Documentation/kernel-parameters.txt | 11 ++
include/linux/mmzone.h | 3
mm/page_alloc.c | 198 +++++++++++++++++++++++++++++++++---
3 files changed, 199 insertions(+), 13 deletions(-)
Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
+++ devel-tree-2.6.20-mm2/mm/page_alloc.c
@@ -137,12 +137,16 @@ static unsigned long __initdata dma_rese
int __initdata nr_nodemap_entries;
unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
+ unsigned long __initdata lowest_movable_pfn[MAX_NUMNODES];
+ unsigned long kernel_core_ratio;
+ unsigned long kernel_core_pages;
#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
unsigned long __initdata node_boundary_start_pfn[MAX_NUMNODES];
unsigned long __initdata node_boundary_end_pfn[MAX_NUMNODES];
#endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
#ifdef CONFIG_DEBUG_VM
static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
{
@@ -2604,6 +2608,8 @@ void __init get_pfn_range_for_nid(unsign
*/
unsigned long __init zone_spanned_pages_in_node(int nid,
unsigned long zone_type,
+ unsigned long *start_pfn,
+ unsigned long *end_pfn,
unsigned long *ignored)
{
unsigned long node_start_pfn, node_end_pfn;
@@ -2611,8 +2617,30 @@ unsigned long __init zone_spanned_pages_
/* Get the start and end of the node and zone */
get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
- zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
- zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
+ if (start_pfn)
+ *start_pfn = 0;
+ if (end_pfn)
+ *end_pfn = 0;
+ if (!is_configured_zone(ZONE_MOVABLE) ||
+ lowest_movable_pfn[nid] == 0) {
+ /* we don't use ZONE_MOVABLE */
+ zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
+ zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
+ } else if (zone_type == ZONE_MOVABLE) {
+ zone_start_pfn = lowest_movable_pfn[nid];
+ zone_end_pfn = node_end_pfn;
+ } else {
+ /* adjust range to lowest_movable_pfn[] */
+ zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
+ zone_start_pfn = max(zone_start_pfn, node_start_pfn);
+
+ if (zone_start_pfn >= lowest_movable_pfn[nid])
+ return 0;
+ zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
+ zone_end_pfn = min(zone_end_pfn, node_end_pfn);
+ if (zone_end_pfn > lowest_movable_pfn[nid])
+ zone_end_pfn = lowest_movable_pfn[nid];
+ }
/* Check that this node has pages within the zone's required range */
if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
@@ -2621,8 +2649,11 @@ unsigned long __init zone_spanned_pages_
/* Move the zone boundaries inside the node if necessary */
zone_end_pfn = min(zone_end_pfn, node_end_pfn);
zone_start_pfn = max(zone_start_pfn, node_start_pfn);
-
/* Return the spanned pages */
+ if (start_pfn)
+ *start_pfn = zone_start_pfn;
+ if (end_pfn)
+ *end_pfn = zone_end_pfn;
return zone_end_pfn - zone_start_pfn;
}
@@ -2692,16 +2723,24 @@ unsigned long __init absent_pages_in_ran
/* Return the number of page frames in holes in a zone on a node */
unsigned long __init zone_absent_pages_in_node(int nid,
unsigned long zone_type,
+ unsigned long start,
+ unsigned long end,
unsigned long *ignored)
{
unsigned long node_start_pfn, node_end_pfn;
unsigned long zone_start_pfn, zone_end_pfn;
get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
- zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type],
- node_start_pfn);
- zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
- node_end_pfn);
+ if (start == 0 && end == 0) {
+ zone_start_pfn = max(arch_zone_lowest_possible_pfn[zone_type],
+ node_start_pfn);
+ zone_end_pfn = min(arch_zone_highest_possible_pfn[zone_type],
+ node_end_pfn);
+ } else {
+ /* ZONE_MOVABLE always use passed params */
+ zone_start_pfn = start;
+ zone_end_pfn = end;
+ }
return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
}
@@ -2709,13 +2748,22 @@ unsigned long __init zone_absent_pages_i
#else
static inline unsigned long zone_spanned_pages_in_node(int nid,
unsigned long zone_type,
+ unsigned long *start_pfn,
+ unsigned long *end_pfn,
unsigned long *zones_size)
{
+ /* this will not be used by caller*/
+ if (start_pfn)
+ *start_pfn = 0;
+ if (end_pfn)
+ *end_pfn = 0;
return zones_size[zone_type];
}
static inline unsigned long zone_absent_pages_in_node(int nid,
unsigned long zone_type,
+ unsigned long start,
+ unsigned long end,
unsigned long *zholes_size)
{
if (!zholes_size)
@@ -2733,20 +2781,115 @@ static void __init calculate_node_totalp
enum zone_type i;
for (i = 0; i < MAX_NR_ZONES; i++)
- totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
+ totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, NULL, NULL,
zones_size);
pgdat->node_spanned_pages = totalpages;
realtotalpages = totalpages;
for (i = 0; i < MAX_NR_ZONES; i++)
realtotalpages -=
- zone_absent_pages_in_node(pgdat->node_id, i,
+ zone_absent_pages_in_node(pgdat->node_id, i, 0, 0,
zholes_size);
pgdat->node_present_pages = realtotalpages;
printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
realtotalpages);
}
+#ifdef CONFIG_ZONE_MOVABLE
+
+unsigned long calc_zone_alignment(unsigned long pfn)
+{
+#ifdef CONFIG_SPARSEMEM
+ return (pfn + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK;
+#else
+ return (pfn + MAX_ORDER_NR_PAGES - 1) & ~(MAX_ORDER_NR_PAGES - 1)
+#endif
+}
+
+
+static void alloc_core_pages_from_low(void)
+{
+ unsigned long nr_pages, start_pfn, end_pfn, pfn;
+ int i, nid;
+ long kcore_pages = kernel_core_pages;
+ for_each_online_node(nid) {
+ for_each_active_range_index_in_nid(i, nid) {
+ start_pfn = early_node_map[i].start_pfn;
+ end_pfn = early_node_map[i].end_pfn;
+ nr_pages = end_pfn - start_pfn;
+ if (nr_pages > kcore_pages) {
+ pfn = start_pfn + kcore_pages;
+ pfn = calc_zone_alignment(pfn);
+ if (pfn < end_pfn) {
+ lowest_movable_pfn[nid] = pfn;
+ kcore_pages = 0;
+ break;
+ } else {
+ kcore_pages = 0;
+ }
+ } else {
+ kcore_pages -= nr_pages;
+ }
+ }
+ }
+ return;
+}
+
+static void split_movable_pages(void)
+{
+ int i, nid;
+ unsigned long total_pages, nr_pages, start_pfn, end_pfn, pfn;
+ long core;
+ for_each_online_node(nid) {
+ lowest_movable_pfn[nid] = 0;
+ pfn = 0;
+ total_pages = 0;
+ for_each_active_range_index_in_nid(i, nid) {
+ start_pfn = early_node_map[i].start_pfn;
+ end_pfn = early_node_map[i].end_pfn;
+ total_pages += end_pfn - start_pfn;
+ }
+ core = total_pages * kernel_core_ratio/100;
+ for_each_active_range_index_in_nid(i, nid) {
+ start_pfn = early_node_map[i].start_pfn;
+ end_pfn = early_node_map[i].end_pfn;
+ nr_pages = end_pfn - start_pfn;
+ if (nr_pages > core) {
+ pfn = start_pfn + core;
+ pfn = calc_zone_alignment(pfn);
+ if (pfn < end_pfn) {
+ lowest_movable_pfn[nid] = pfn;
+ break;
+ } else {
+ core -= nr_pages;
+ if (core < 0)
+ core = 0;
+ }
+ } else {
+ core -= nr_pages;
+ }
+ }
+ }
+ return;
+}
+
+
+static void reserve_movable_pages(void)
+{
+ memset(lowest_movable_pfn, 0, MAX_NUMNODES);
+ if (kernel_core_pages) {
+ alloc_core_pages_from_low();
+ } else if (kernel_core_ratio) {
+ split_movable_pages();
+ }
+ return;
+}
+#else
+static void reserve_movable_pages(void)
+{
+ return;
+}
+#endif
/*
* Set up the zone data structures:
* - mark all pages reserved
@@ -2768,10 +2911,10 @@ static void __meminit free_area_init_cor
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
- unsigned long size, realsize, memmap_pages;
+ unsigned long size, realsize, memmap_pages, start, end;
- size = zone_spanned_pages_in_node(nid, j, zones_size);
- realsize = size - zone_absent_pages_in_node(nid, j,
+ size = zone_spanned_pages_in_node(nid, j, &start, &end, zones_size);
+ realsize = size - zone_absent_pages_in_node(nid, j, start, end,
zholes_size);
/*
@@ -3065,6 +3208,7 @@ unsigned long __init find_max_pfn_with_a
return max_pfn;
}
+
/**
* free_area_init_nodes - Initialise all pg_data_t and zone data
* @max_zone_pfn: an array of max PFNs for each zone
@@ -3127,6 +3271,8 @@ void __init free_area_init_nodes(unsigne
/* Initialise every node */
setup_nr_node_ids();
+ /* setup movable pages */
+ reserve_movable_pages();
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
free_area_init_node(nid, pgdat, NULL,
@@ -3542,6 +3688,33 @@ void *__init alloc_large_system_hash(con
return table;
}
+#ifdef CONFIG_ZONE_MOVABLE
+
+char * __init parse_kernel_core_pages(char *p)
+{
+ unsigned long long coremem;
+ if (!p)
+ return NULL;
+ coremem = memparse(p, &p);
+ kernel_core_pages = coremem >> PAGE_SHIFT;
+ return p;
+}
+
+char * __init parse_kernel_core_ratio(char *p)
+{
+ int ratio[1];
+ ratio[0] = 0;
+ if (!p)
+ return NULL;
+ p = get_options(p, 1, ratio);
+ if (ratio[0])
+ kernel_core_ratio = ratio[0];
+ if (kernel_core_ratio > 100)
+ kernel_core_ratio = 0; /* ll memory is not movable */
+ return p;
+}
+#endif /* CONFIG_ZONE_MOVABLE */
+
#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
struct page *pfn_to_page(unsigned long pfn)
{
@@ -3555,4 +3728,3 @@ EXPORT_SYMBOL(pfn_to_page);
EXPORT_SYMBOL(page_to_pfn);
#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
-
Index: devel-tree-2.6.20-mm2/Documentation/kernel-parameters.txt
===================================================================
--- devel-tree-2.6.20-mm2.orig/Documentation/kernel-parameters.txt
+++ devel-tree-2.6.20-mm2/Documentation/kernel-parameters.txt
@@ -764,6 +764,17 @@ and is between 256 and 4096 characters.
keepinitrd [HW,ARM]
+ kernel_core_pages=nn[KMG] [KNL, BOOT] divide the whole memory into
+ not-movable and movable. movable memory can be
+ used only for page cache and user data. This option
+ specifies the amount of not-movable pages, called core
+ pages. core pages are allocated from the lower address.
+
+ kernel_core_ratio=nn [KND, BOOT] specifies the amount of the core
+ pages(see kernel_core_pages) by the ratio against
+ total memory. If NUMA, core pages are allocated for
+ each node by this ratio. "0" is not allowed.
+
kstack=N [IA-32,X86-64] Print N words from the kernel stack
in oops dumps.
Index: devel-tree-2.6.20-mm2/include/linux/mmzone.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/mmzone.h
+++ devel-tree-2.6.20-mm2/include/linux/mmzone.h
@@ -608,6 +608,9 @@ int sysctl_min_unmapped_ratio_sysctl_han
int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
struct file *, void __user *, size_t *, loff_t *);
+extern char* parse_kernel_core_pages(char *cp);
+extern char* parse_kernel_core_ratio(char *cp);
+
#include <linux/topology.h>
/* Returns the number of the current Node. */
#ifndef numa_node_id
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [9/16] create movable zone at boot
2007-03-06 4:52 ` [RFC} memory unplug patchset prep [9/16] create movable zone at boot KAMEZAWA Hiroyuki
@ 2007-03-06 16:06 ` David Rientjes
2007-03-07 2:02 ` KAMEZAWA Hiroyuki
0 siblings, 1 reply; 34+ messages in thread
From: David Rientjes @ 2007-03-06 16:06 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
> This patch adds codes for creating movable zones.
>
> Add 2 kernel paramers.
> - kernel_core_pages=XXX[KMG]
> - kernel_core_ratio=xx
>
These would never be specified together, right?
> Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
> +++ devel-tree-2.6.20-mm2/mm/page_alloc.c
> @@ -137,12 +137,16 @@ static unsigned long __initdata dma_rese
> int __initdata nr_nodemap_entries;
> unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
> unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
> + unsigned long __initdata lowest_movable_pfn[MAX_NUMNODES];
> + unsigned long kernel_core_ratio;
> + unsigned long kernel_core_pages;
> #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
> unsigned long __initdata node_boundary_start_pfn[MAX_NUMNODES];
> unsigned long __initdata node_boundary_end_pfn[MAX_NUMNODES];
> #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
> #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
>
> +
> #ifdef CONFIG_DEBUG_VM
> static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
> {
You could probably get away with:
union {
unsigned long kernel_core_ratio;
unsigned long kernel_core_pages;
};
> @@ -2604,6 +2608,8 @@ void __init get_pfn_range_for_nid(unsign
> */
> unsigned long __init zone_spanned_pages_in_node(int nid,
> unsigned long zone_type,
> + unsigned long *start_pfn,
> + unsigned long *end_pfn,
> unsigned long *ignored)
> {
> unsigned long node_start_pfn, node_end_pfn;
> @@ -2611,8 +2617,30 @@ unsigned long __init zone_spanned_pages_
>
> /* Get the start and end of the node and zone */
> get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
> - zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
> - zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
> + if (start_pfn)
> + *start_pfn = 0;
> + if (end_pfn)
> + *end_pfn = 0;
> + if (!is_configured_zone(ZONE_MOVABLE) ||
> + lowest_movable_pfn[nid] == 0) {
> + /* we don't use ZONE_MOVABLE */
> + zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
> + zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
> + } else if (zone_type == ZONE_MOVABLE) {
> + zone_start_pfn = lowest_movable_pfn[nid];
> + zone_end_pfn = node_end_pfn;
> + } else {
> + /* adjust range to lowest_movable_pfn[] */
> + zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
> + zone_start_pfn = max(zone_start_pfn, node_start_pfn);
> +
> + if (zone_start_pfn >= lowest_movable_pfn[nid])
> + return 0;
> + zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
> + zone_end_pfn = min(zone_end_pfn, node_end_pfn);
> + if (zone_end_pfn > lowest_movable_pfn[nid])
> + zone_end_pfn = lowest_movable_pfn[nid];
> + }
>
> /* Check that this node has pages within the zone's required range */
> if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
These hacks of returning start_pfn and end_pfn depending on where it was
called from and testing for things like start_pfn == end_pfn doesn't make
much sense. It'd probably be better to separate this logic out into a
helper function and then call it from zone_absent_pages_in_node() and
zone_spanned_pages_in_node(), respectively.
> @@ -2733,20 +2781,115 @@ static void __init calculate_node_totalp
> enum zone_type i;
>
> for (i = 0; i < MAX_NR_ZONES; i++)
> - totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
> + totalpages += zone_spanned_pages_in_node(pgdat->node_id, i, NULL, NULL,
> zones_size);
> pgdat->node_spanned_pages = totalpages;
>
> realtotalpages = totalpages;
> for (i = 0; i < MAX_NR_ZONES; i++)
> realtotalpages -=
> - zone_absent_pages_in_node(pgdat->node_id, i,
> + zone_absent_pages_in_node(pgdat->node_id, i, 0, 0,
> zholes_size);
> pgdat->node_present_pages = realtotalpages;
> printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
> realtotalpages);
> }
>
> +#ifdef CONFIG_ZONE_MOVABLE
> +
> +unsigned long calc_zone_alignment(unsigned long pfn)
> +{
> +#ifdef CONFIG_SPARSEMEM
> + return (pfn + PAGES_PER_SECTION - 1) & PAGE_SECTION_MASK;
> +#else
> + return (pfn + MAX_ORDER_NR_PAGES - 1) & ~(MAX_ORDER_NR_PAGES - 1)
> +#endif
> +}
> +
Another missing semicolon.
> +static void split_movable_pages(void)
> +{
> + int i, nid;
> + unsigned long total_pages, nr_pages, start_pfn, end_pfn, pfn;
> + long core;
> + for_each_online_node(nid) {
> + lowest_movable_pfn[nid] = 0;
> + pfn = 0;
> + total_pages = 0;
> + for_each_active_range_index_in_nid(i, nid) {
> + start_pfn = early_node_map[i].start_pfn;
> + end_pfn = early_node_map[i].end_pfn;
> + total_pages += end_pfn - start_pfn;
> + }
> + core = total_pages * kernel_core_ratio/100;
> + for_each_active_range_index_in_nid(i, nid) {
> + start_pfn = early_node_map[i].start_pfn;
> + end_pfn = early_node_map[i].end_pfn;
> + nr_pages = end_pfn - start_pfn;
> + if (nr_pages > core) {
> + pfn = start_pfn + core;
> + pfn = calc_zone_alignment(pfn);
> + if (pfn < end_pfn) {
> + lowest_movable_pfn[nid] = pfn;
> + break;
> + } else {
> + core -= nr_pages;
> + if (core < 0)
> + core = 0;
> + }
> + } else {
> + core -= nr_pages;
> + }
> + }
> + }
> + return;
> +}
> +
> +
> +static void reserve_movable_pages(void)
> +{
> + memset(lowest_movable_pfn, 0, MAX_NUMNODES);
> + if (kernel_core_pages) {
> + alloc_core_pages_from_low();
> + } else if (kernel_core_ratio) {
> + split_movable_pages();
> + }
> + return;
> +}
> +#else
> +static void reserve_movable_pages(void)
> +{
> + return;
> +}
> +#endif
> /*
> * Set up the zone data structures:
> * - mark all pages reserved
reserve_movable_pages() and it's two helper functions,
alloc_core_pages_from_low() and split_movable_pages(), can be __init?
If so, then both kernel_core_pages and kernel_core_ratio should be
__initdata.
> Index: devel-tree-2.6.20-mm2/Documentation/kernel-parameters.txt
> ===================================================================
> --- devel-tree-2.6.20-mm2.orig/Documentation/kernel-parameters.txt
> +++ devel-tree-2.6.20-mm2/Documentation/kernel-parameters.txt
> @@ -764,6 +764,17 @@ and is between 256 and 4096 characters.
>
> keepinitrd [HW,ARM]
>
> + kernel_core_pages=nn[KMG] [KNL, BOOT] divide the whole memory into
> + not-movable and movable. movable memory can be
> + used only for page cache and user data. This option
> + specifies the amount of not-movable pages, called core
> + pages. core pages are allocated from the lower address.
> +
> + kernel_core_ratio=nn [KND, BOOT] specifies the amount of the core
> + pages(see kernel_core_pages) by the ratio against
> + total memory. If NUMA, core pages are allocated for
> + each node by this ratio. "0" is not allowed.
> +
> kstack=N [IA-32,X86-64] Print N words from the kernel stack
> in oops dumps.
>
This documentation doesn't specify that we can't use both parameters
together even though we can't.
David
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [9/16] create movable zone at boot
2007-03-06 16:06 ` David Rientjes
@ 2007-03-07 2:02 ` KAMEZAWA Hiroyuki
0 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-07 2:02 UTC (permalink / raw)
To: David Rientjes; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007 08:06:48 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:
> On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
>
> > This patch adds codes for creating movable zones.
> >
> > Add 2 kernel paramers.
> > - kernel_core_pages=XXX[KMG]
> > - kernel_core_ratio=xx
> >
>
> These would never be specified together, right?
>
No. never be specified together.
> > Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
> > ===================================================================
> > --- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
> > +++ devel-tree-2.6.20-mm2/mm/page_alloc.c
> > @@ -137,12 +137,16 @@ static unsigned long __initdata dma_rese
> > int __initdata nr_nodemap_entries;
> > unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
> > unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
> > + unsigned long __initdata lowest_movable_pfn[MAX_NUMNODES];
> > + unsigned long kernel_core_ratio;
> > + unsigned long kernel_core_pages;
> > #ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
> > unsigned long __initdata node_boundary_start_pfn[MAX_NUMNODES];
> > unsigned long __initdata node_boundary_end_pfn[MAX_NUMNODES];
> > #endif /* CONFIG_MEMORY_HOTPLUG_RESERVE */
> > #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
> >
> > +
> > #ifdef CONFIG_DEBUG_VM
> > static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
> > {
>
> You could probably get away with:
>
> union {
> unsigned long kernel_core_ratio;
> unsigned long kernel_core_pages;
> };
>
> > @@ -2604,6 +2608,8 @@ void __init get_pfn_range_for_nid(unsign
> > */
> > unsigned long __init zone_spanned_pages_in_node(int nid,
> > unsigned long zone_type,
> > + unsigned long *start_pfn,
> > + unsigned long *end_pfn,
> > unsigned long *ignored)
> > {
> > unsigned long node_start_pfn, node_end_pfn;
> > @@ -2611,8 +2617,30 @@ unsigned long __init zone_spanned_pages_
> >
> > /* Get the start and end of the node and zone */
> > get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
> > - zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
> > - zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
> > + if (start_pfn)
> > + *start_pfn = 0;
> > + if (end_pfn)
> > + *end_pfn = 0;
> > + if (!is_configured_zone(ZONE_MOVABLE) ||
> > + lowest_movable_pfn[nid] == 0) {
> > + /* we don't use ZONE_MOVABLE */
> > + zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
> > + zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
> > + } else if (zone_type == ZONE_MOVABLE) {
> > + zone_start_pfn = lowest_movable_pfn[nid];
> > + zone_end_pfn = node_end_pfn;
> > + } else {
> > + /* adjust range to lowest_movable_pfn[] */
> > + zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
> > + zone_start_pfn = max(zone_start_pfn, node_start_pfn);
> > +
> > + if (zone_start_pfn >= lowest_movable_pfn[nid])
> > + return 0;
> > + zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
> > + zone_end_pfn = min(zone_end_pfn, node_end_pfn);
> > + if (zone_end_pfn > lowest_movable_pfn[nid])
> > + zone_end_pfn = lowest_movable_pfn[nid];
> > + }
> >
> > /* Check that this node has pages within the zone's required range */
> > if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
>
> These hacks of returning start_pfn and end_pfn depending on where it was
> called from and testing for things like start_pfn == end_pfn doesn't make
> much sense. It'd probably be better to separate this logic out into a
> helper function and then call it from zone_absent_pages_in_node() and
> zone_spanned_pages_in_node(), respectively.
>
Hmm. This whole logic is different from Mel's.
I'll look into his and reconsider again.
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread
* [RFC} memory unplug patchset prep [10/16] ia64 support
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (8 preceding siblings ...)
2007-03-06 4:52 ` [RFC} memory unplug patchset prep [9/16] create movable zone at boot KAMEZAWA Hiroyuki
@ 2007-03-06 4:53 ` KAMEZAWA Hiroyuki
2007-03-06 4:55 ` [RFC} memory unplug patchset prep [11/16] page isolation core KAMEZAWA Hiroyuki
` (6 subsequent siblings)
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:53 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Add ia64 support for kernel_core_pages/kernel_core_ratio.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
arch/ia64/kernel/efi.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
Index: devel-tree-2.6.20-mm2/arch/ia64/kernel/efi.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/ia64/kernel/efi.c
+++ devel-tree-2.6.20-mm2/arch/ia64/kernel/efi.c
@@ -424,7 +424,11 @@ efi_init (void)
max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
} else if (memcmp(cp, "min_addr=", 9) == 0) {
min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
- } else {
+ } else if (memcmp(cp, "kernel_core_pages=",18) == 0) {
+ cp = parse_kernel_core_pages(cp + 18);
+ } else if (memcmp(cp, "kernel_core_ratio=", 18) == 0) {
+ cp = parse_kernel_core_ratio(cp + 18);
+ } else {
while (*cp != ' ' && *cp)
++cp;
while (*cp == ' ')
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* [RFC} memory unplug patchset prep [11/16] page isolation core
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (9 preceding siblings ...)
2007-03-06 4:53 ` [RFC} memory unplug patchset prep [10/16] ia64 support KAMEZAWA Hiroyuki
@ 2007-03-06 4:55 ` KAMEZAWA Hiroyuki
2007-03-06 4:56 ` [RFC} memory unplug patchset prep [12/16] drain all pages KAMEZAWA Hiroyuki
` (5 subsequent siblings)
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:55 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
This patch is for supporting making page unused.
Isolate pages by capturing freed pages before inserting free_area[],
buddy allocator.
If you have an idea for avoiding spin_lock(), please advise me.
Isolating pages in free_area[] is implemented in other patch.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/mmzone.h | 8 +
include/linux/page_isolation.h | 52 +++++++++++
mm/Kconfig | 7 +
mm/page_alloc.c | 184 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 251 insertions(+)
Index: devel-tree-2.6.20-mm2/include/linux/mmzone.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/mmzone.h
+++ devel-tree-2.6.20-mm2/include/linux/mmzone.h
@@ -315,6 +315,14 @@ struct zone {
/* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
unsigned long zone_start_pfn;
+#ifdef CONFIG_PAGE_ISOLATION
+ /*
+ * For pages which are not used but not free.
+ * See include/linux/page_isolation.h
+ */
+ spinlock_t isolation_lock;
+ struct list_head isolation_list;
+#endif
/*
* zone_start_pfn, spanned_pages and present_pages are all
* protected by span_seqlock. It is a seqlock because it has
Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
+++ devel-tree-2.6.20-mm2/mm/page_alloc.c
@@ -41,6 +41,7 @@
#include <linux/pfn.h>
#include <linux/backing-dev.h>
#include <linux/fault-inject.h>
+#include <linux/page_isolation.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -421,6 +422,9 @@ static inline void __free_one_page(struc
if (unlikely(PageCompound(page)))
destroy_compound_page(page, order);
+ if (page_under_isolation(zone, page, order))
+ return;
+
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
VM_BUG_ON(page_idx & (order_size - 1));
@@ -2969,6 +2973,10 @@ static void __meminit free_area_init_cor
zone->nr_scan_inactive = 0;
zap_zone_vm_stats(zone);
atomic_set(&zone->reclaim_in_progress, 0);
+#ifdef CONFIG_PAGE_ISOLATION
+ spin_lock_init(&zone->isolation_lock);
+ INIT_LIST_HEAD(&zone->isolation_list);
+#endif
if (!size)
continue;
@@ -3728,3 +3736,179 @@ EXPORT_SYMBOL(pfn_to_page);
EXPORT_SYMBOL(page_to_pfn);
#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */
+#ifdef CONFIG_PAGE_ISOLATION
+/*
+ * Page Isolation.
+ *
+ * If a page is removed from usual free_list and will never be used,
+ * It is linked to "struct isolation_info" and set Reserved, Private
+ * bit. page->mapping points to isolation_info in it.
+ * and page_count(page) is 0.
+ *
+ * This can be used for creating a chunk of contiguous *unused* memory.
+ *
+ * current user is Memory-Hot-Remove.
+ * maybe move to some other file is better.
+ */
+static void
+isolate_page_nolock(struct isolation_info *info, struct page *page, int order)
+{
+ int pagenum;
+ pagenum = 1 << order;
+ while (pagenum > 0) {
+ SetPageReserved(page);
+ SetPagePrivate(page);
+ page->private = (unsigned long)info;
+ list_add(&page->lru, &info->pages);
+ page++;
+ pagenum--;
+ }
+}
+
+/*
+ * This function is called from page_under_isolation()l
+ */
+
+int __page_under_isolation(struct zone *zone, struct page *page, int order)
+{
+ struct isolation_info *info;
+ unsigned long pfn = page_to_pfn(page);
+ unsigned long flags;
+ int found = 0;
+
+ spin_lock_irqsave(&zone->isolation_lock,flags);
+ list_for_each_entry(info, &zone->isolation_list, list) {
+ if (info->start_pfn <= pfn && pfn < info->end_pfn) {
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ isolate_page_nolock(info, page, order);
+ }
+ spin_unlock_irqrestore(&zone->isolation_lock, flags);
+ return found;
+}
+
+/*
+ * start and end must be in the same zone.
+ *
+ */
+struct isolation_info *
+register_isolation(unsigned long start, unsigned long end)
+{
+ struct zone *zone;
+ struct isolation_info *info = NULL, *tmp;
+ unsigned long flags;
+ unsigned long last_pfn = end - 1;
+
+ if (!pfn_valid(start) || !pfn_valid(last_pfn) || (start >= end))
+ return ERR_PTR(-EINVAL);
+ /* check start and end is in the same zone */
+ zone = page_zone(pfn_to_page(start));
+
+ if (zone != page_zone(pfn_to_page(last_pfn)))
+ return ERR_PTR(-EINVAL);
+ /* target range has to match MAX_ORDER alignmet */
+ if ((start & (MAX_ORDER_NR_PAGES - 1)) ||
+ (end & (MAX_ORDER_NR_PAGES - 1)))
+ return ERR_PTR(-EINVAL);
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+ spin_lock_irqsave(&zone->isolation_lock, flags);
+ /* we don't allow overlap among isolation areas */
+ if (!list_empty(&zone->isolation_list)) {
+ list_for_each_entry(tmp, &zone->isolation_list, list) {
+ if (start < tmp->end_pfn && end > tmp->start_pfn) {
+ goto out_free;
+ }
+ }
+ }
+ info->start_pfn = start;
+ info->end_pfn = end;
+ info->zone = zone;
+ INIT_LIST_HEAD(&info->list);
+ INIT_LIST_HEAD(&info->pages);
+ list_add(&info->list, &zone->isolation_list);
+out_unlock:
+ spin_unlock_irqrestore(&zone->isolation_lock, flags);
+ return info;
+out_free:
+ kfree(info);
+ info = ERR_PTR(-EBUSY);
+ goto out_unlock;
+}
+/*
+ * Remove IsolationInfo from zone.
+ * After this, we can unuse memory in info or
+ * free back to freelist.
+ */
+
+void
+detach_isolation_info_zone(struct isolation_info *info)
+{
+ unsigned long flags;
+ struct zone *zone = info->zone;
+ spin_lock_irqsave(&zone->isolation_lock,flags);
+ list_del(&info->list);
+ info->zone = NULL;
+ spin_unlock_irqrestore(&zone->isolation_lock,flags);
+}
+
+/*
+ * All pages in info->pages should be remvoed before calling this.
+ * And info should be detached from zone.
+ */
+void
+free_isolation_info(struct isolation_info *info)
+{
+ BUG_ON(!list_empty(&info->pages));
+ BUG_ON(info->zone);
+ kfree(info);
+ return;
+}
+
+/*
+ * Mark All pages in the isolation_info to be Reserved.
+ * When onlining these pages again, a user must check
+ * which page is usable by IORESOURCE_RAM
+ * please see memory_hotplug.c/online_pages() if unclear.
+ *
+ * info should be detached from zone before calling this.
+ */
+void
+unuse_all_isolated_pages(struct isolation_info *info)
+{
+ struct page *page, *n;
+ BUG_ON(info->zone);
+ list_for_each_entry_safe(page, n, &info->pages, lru) {
+ SetPageReserved(page);
+ page->private = 0;
+ ClearPagePrivate(page);
+ list_del(&page->lru);
+ }
+}
+
+/*
+ * Free all pages connected in isolation list.
+ * pages are moved back to free_list.
+ */
+void
+free_all_isolated_pages(struct isolation_info *info)
+{
+ struct page *page, *n;
+ BUG_ON(info->zone);
+ list_for_each_entry_safe(page, n ,&info->pages, lru) {
+ ClearPagePrivate(page);
+ ClearPageReserved(page);
+ page->private = 0;
+ list_del(&page->lru);
+ set_page_count(page, 0);
+ set_page_refcounted(page);
+ /* This is sage because info is detached from zone */
+ __free_page(page);
+ }
+}
+
+#endif
Index: devel-tree-2.6.20-mm2/mm/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/Kconfig
+++ devel-tree-2.6.20-mm2/mm/Kconfig
@@ -224,3 +224,10 @@ config DEBUG_READAHEAD
Say N for production servers.
+config PAGE_ISOLATION
+ bool "Page Isolation Framework"
+ help
+ This option adds page isolation framework to mm.
+ This is used for isolate amount of contiguous pages from linux
+ memory management.
+ Say N if unsure.
Index: devel-tree-2.6.20-mm2/include/linux/page_isolation.h
===================================================================
--- /dev/null
+++ devel-tree-2.6.20-mm2/include/linux/page_isolation.h
@@ -0,0 +1,52 @@
+#ifndef __LINIX_PAGE_ISOLATION_H
+#define __LINUX_PAGE_ISOLATION_H
+
+#ifdef CONFIG_PAGE_ISOLATION
+
+struct isolation_info {
+ struct list_head list;
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+ struct zone *zone;
+ struct list_head pages;
+};
+
+extern int
+__page_under_isolation(struct zone *zone, struct page *page, int order);
+
+static inline int
+page_under_isolation(struct zone *zone, struct page *page, int order)
+{
+ if (likely(list_empty(&zone->isolation_list)))
+ return 0;
+ return __page_under_isolation(zone, page, order);
+}
+
+static inline int
+is_page_isolated(struct isolation_info *info, struct page *page)
+{
+ if (PageReserved(page) && PagePrivate(page) &&
+ page_count(page) == 0 &&
+ page->private == (unsigned long)info)
+ return 1;
+ return 0;
+}
+
+extern struct isolation_info *
+register_isolation(unsigned long start, unsigned long end);
+
+extern void detach_isolation_info_zone(struct isolation_info *info);
+extern void free_isolation_info(struct isolation_info *info);
+extern void unuse_all_isolated_pages(struct isolation_info *info);
+extern void free_all_isolated_pages(struct isolation_info *info);
+
+#else
+
+static inline int
+page_under_isolation(struct zone *zone, struct page *page, int order)
+{
+ return 0;
+}
+
+#endif
+#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* [RFC} memory unplug patchset prep [12/16] drain all pages
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (10 preceding siblings ...)
2007-03-06 4:55 ` [RFC} memory unplug patchset prep [11/16] page isolation core KAMEZAWA Hiroyuki
@ 2007-03-06 4:56 ` KAMEZAWA Hiroyuki
2007-03-06 4:57 ` [RFC} memory unplug patchset prep [13/16] isolate freed pages KAMEZAWA Hiroyuki
` (4 subsequent siblings)
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:56 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
This patch add function drain_all_pages(void) to drain all
pages on per-cpu-freelist.
Page isolation will catch them in free_one_page.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/page_isolation.h | 1 +
mm/page_alloc.c | 17 ++++++++++++++++-
2 files changed, 17 insertions(+), 1 deletion(-)
Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
+++ devel-tree-2.6.20-mm2/mm/page_alloc.c
@@ -822,6 +822,9 @@ void mark_free_pages(struct zone *zone)
spin_unlock_irqrestore(&zone->lock, flags);
}
+#endif /* CONFIG_PM */
+
+#if defined(CONFIG_PM) || defined(CONFIG_PAGE_ISOLATION)
/*
* Spill all of this CPU's per-cpu pages back into the buddy allocator.
@@ -834,8 +837,20 @@ void drain_local_pages(void)
__drain_pages(smp_processor_id());
local_irq_restore(flags);
}
-#endif /* CONFIG_PM */
+#endif
+#ifdef CONFIG_PAGE_ISOLATION
+static void drain_local_zone_pages(struct work_struct *work)
+{
+ drain_local_pages();
+}
+
+void drain_all_pages(void)
+{
+ schedule_on_each_cpu(drain_local_zone_pages);
+}
+
+#endif
/*
* Free a 0-order page
*/
Index: devel-tree-2.6.20-mm2/include/linux/page_isolation.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/page_isolation.h
+++ devel-tree-2.6.20-mm2/include/linux/page_isolation.h
@@ -39,6 +39,7 @@ extern void detach_isolation_info_zone(s
extern void free_isolation_info(struct isolation_info *info);
extern void unuse_all_isolated_pages(struct isolation_info *info);
extern void free_all_isolated_pages(struct isolation_info *info);
+extern void drain_all_pages(void);
#else
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* [RFC} memory unplug patchset prep [13/16] isolate freed pages.
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (11 preceding siblings ...)
2007-03-06 4:56 ` [RFC} memory unplug patchset prep [12/16] drain all pages KAMEZAWA Hiroyuki
@ 2007-03-06 4:57 ` KAMEZAWA Hiroyuki
2007-03-06 4:59 ` [RFC} memory unplug patchset prep [14/16] memory unplug core KAMEZAWA Hiroyuki
` (3 subsequent siblings)
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:57 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Isolate all freed pages (means in buddy_list) in the range.
See page_buddy() and free_one_page() function if unsure.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/page_isolation.h | 2 +
mm/page_alloc.c | 48 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+)
Index: devel-tree-2.6.20-mm2/mm/page_alloc.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/page_alloc.c
+++ devel-tree-2.6.20-mm2/mm/page_alloc.c
@@ -3927,10 +3929,59 @@ free_all_isolated_pages(struct isolation
list_del(&page->lru);
set_page_count(page, 0);
set_page_refcounted(page);
/* This is sage because info is detached from zone */
__free_page(page);
}
}
+
+/*
+ * Isolate already freed pages.
+ */
+int
+capture_isolate_freed_pages(struct isolation_info *info)
+{
+ struct zone *zone;
+ unsigned long pfn;
+ struct page *page;
+ int order, order_size;
+ int nr_pages = 0;
+ unsigned long last_pfn = info->end_pfn - 1;
+ pfn = info->start_pfn;
+ if (!pfn_valid(pfn))
+ return -EINVAL;
+ zone = info->zone;
+ if ((zone != page_zone(pfn_to_page(pfn))) ||
+ (zone != page_zone(pfn_to_page(last_pfn))))
+ return -EINVAL;
+ drain_all_pages();
+ spin_lock(&zone->lock);
+ while (pfn < info->end_pfn) {
+ if (!pfn_valid(pfn)) {
+ pfn++;
+ continue;
+ }
+ page = pfn_to_page(pfn);
+ /* See page_is_buddy() */
+ if (page_count(page) == 0 && PageBuddy(page)) {
+ order = page_order(page);
+ order_size = 1 << order;
+ zone->free_area[order].nr_free--;
+ __mod_zone_page_state(zone, NR_FREE_PAGES, -order_size);
+ list_del(&page->lru);
+ rmv_page_order(page);
+ isolate_page_nolock(info, page, order);
+ nr_pages += order_size;
+ pfn += order_size;
+ } else {
+ pfn++;
+ }
+ }
+ spin_unlock(&zone->lock);
+ return nr_pages;
+}
+
+
#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* [RFC} memory unplug patchset prep [14/16] memory unplug core
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (12 preceding siblings ...)
2007-03-06 4:57 ` [RFC} memory unplug patchset prep [13/16] isolate freed pages KAMEZAWA Hiroyuki
@ 2007-03-06 4:59 ` KAMEZAWA Hiroyuki
2007-03-06 5:00 ` [RFC} memory unplug patchset prep [15/16] hot-unplug interface for ia64 KAMEZAWA Hiroyuki
` (2 subsequent siblings)
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 4:59 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Add MEMORY_HOTREMOVE config and implements basic algorythm.
This config selects ZONE_MOVABLE and PAGE_ISOLATION
how work:
1. register Isololation area of specified section
2. search mem_map and migrate pages.
3. detach isolation and make pages unused.
This works on my easy test, but I think I need more work on loop algorythm
and policy.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/memory_hotplug.h | 1
mm/Kconfig | 9 +
mm/memory_hotplug.c | 219 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 228 insertions(+), 1 deletion(-)
Index: devel-tree-2.6.20-mm2/mm/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/Kconfig
+++ devel-tree-2.6.20-mm2/mm/Kconfig
@@ -126,6 +126,13 @@ config MEMORY_HOTPLUG_SPARSE
def_bool y
depends on SPARSEMEM && MEMORY_HOTPLUG
+config MEMORY_HOTREMOVE
+ bool "Allow for memory hot-remvoe"
+ depends on MEMORY_HOTPLUG_SPARSE
+ select ZONE_MOVABLE
+ select MIGRATION
+ select PAGE_ISOLATION
+
# Heavily threaded applications may benefit from splitting the mm-wide
# page_table_lock, so that faults on different parts of the user address
# space can be handled with less contention: split it at this NR_CPUS.
@@ -145,7 +152,7 @@ config SPLIT_PTLOCK_CPUS
config MIGRATION
bool "Page migration"
def_bool y
- depends on NUMA
+ depends on NUMA || MEMORY_HOTREMOVE
help
Allows the migration of the physical location of pages of processes
while the virtual addresses are not changed. This is useful for
Index: devel-tree-2.6.20-mm2/mm/memory_hotplug.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/memory_hotplug.c
+++ devel-tree-2.6.20-mm2/mm/memory_hotplug.c
@@ -23,6 +23,9 @@
#include <linux/vmalloc.h>
#include <linux/ioport.h>
#include <linux/cpuset.h>
+#include <linux/page_isolation.h>
+#include <linux/delay.h>
+#include <linux/migrate.h>
#include <asm/tlbflush.h>
@@ -308,3 +311,221 @@ error:
return ret;
}
EXPORT_SYMBOL_GPL(add_memory);
+
+
+
+#ifdef CONFIG_MEMORY_HOTREMOVE
+
+/*
+ * Just an easy implementation.
+ */
+static struct page *
+hotremove_migrate_alloc(struct page *page,
+ unsigned long private,
+ int **x)
+{
+ return alloc_page(GFP_HIGH_MOVABLE);
+}
+
+/* scans # of pages per itelation */
+#define HOTREMOVE_UNIT (1024)
+
+static int do_migrate_and_isolate_pages(struct isolation_info *info,
+ unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ int move_pages = HOTREMOVE_UNIT;
+ int ret, managed, not_managed;
+ unsigned long pfn;
+ struct page *page;
+ LIST_HEAD(source);
+
+ not_managed = 0;
+ for (pfn = start_pfn; pfn < end_pfn && move_pages > 0; pfn++) {
+ if (!pfn_valid(pfn)) /* never happens in sparsemem */
+ continue;
+ page = pfn_to_page(pfn);
+ if (is_page_isolated(info,page))
+ continue;
+ ret = isolate_lru_page(page, &source);
+
+ if (ret == 1) {
+ move_pages--;
+ managed++;
+ } else {
+ if (page_count(page))
+ not_managed++; /* someone uses this */
+ }
+ }
+ ret = -EBUSY;
+ if (not_managed) {
+ if (!list_empty(&source))
+ putback_lru_pages(&source);
+ goto out;
+ }
+ ret = 0;
+ if (list_empty(&source))
+ goto out;
+ /* this function returns # of failed pages */
+ ret = migrate_pages(&source, hotremove_migrate_alloc,
+ (unsigned long)info);
+out:
+ return ret;
+}
+
+
+/*
+ * Check All pages registered as IORESOURCE_RAM are isolated or not.
+ */
+static int check_removal_success(struct isolation_info *info)
+{
+ struct resource res;
+ unsigned long section_end;
+ unsigned long start_pfn, i, nr_pages;
+ struct page *page;
+ int removed = 0;
+ res.start = info->start_pfn << PAGE_SHIFT;
+ res.end = (info->end_pfn - 1) << PAGE_SHIFT;
+ res.flags = IORESOURCE_MEM;
+ section_end = res.end;
+ while ((res.start < res.end) && (find_next_system_ram(&res) >= 0)) {
+ start_pfn =(res.start >> PAGE_SHIFT);
+ nr_pages = (res.end + 1UL - res.start) >> PAGE_SHIFT;
+ for (i = 0; i < nr_pages; i++) {
+ page = pfn_to_page(start_pfn + i);
+ if (!is_page_isolated(info,page))
+ return -EBUSY;
+ removed++;
+ }
+ res.start = res.end + 1;
+ res.end = section_end;
+ }
+ return removed;
+}
+/*
+ * start_pfn and end_pfn myst be aligned to SECTION_SIZE.
+ * start_pfn and end_pfn must be in the same zone.
+ * target page range must be in ZONE_MOVABLE.
+ *
+ * Under this, [start_pfn, end_pfn) pages are isolated.
+ * All freed pages in the range is captured info isolation_info.
+ *
+ * If all pages in the range are isolated, offline_pages() returns 0.
+ *
+ * Note: memory holes in section are marked as Reserved Memory.
+ * So we igonre Reserved pages in the first check.
+ * But bootmem is aslo makred as Reserved.
+ * We check memory resouce information and confirm we freed
+ * All necessary pages.
+ */
+
+int offline_pages(unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long timeout)
+{
+ struct isolation_info *info;
+ struct page *page;
+ LIST_HEAD(pagelist);
+ int ret, nr_pages;
+ unsigned long expire = jiffies + timeout;
+ struct zone *zone;
+ unsigned long pfn, offlined_pages;
+
+ if (start_pfn & (PAGES_PER_SECTION - 1))
+ return -EINVAL;
+ if (end_pfn & (PAGES_PER_SECTION - 1))
+ return -EINVAL;
+
+ zone = page_zone(pfn_to_page(start_pfn));
+
+ if (!is_movable(zone))
+ return -EBUSY;
+
+ for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ if (!pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ if (PageSlab(page) ||
+ PageUncached(page) ||
+ PageCompound(page))
+ break;
+ }
+ if (pfn < end_pfn)
+ return -EBUSY;
+
+ info = register_isolation(start_pfn, end_pfn);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+ /* start memory hot removal */
+
+ ret = capture_isolate_freed_pages(info);
+ if(ret < 0)
+ goto failed_removal;
+
+ nr_pages = end_pfn - start_pfn;
+ pfn = start_pfn;
+repeat:
+ ret = -EAGAIN;
+ if (time_after(jiffies, expire))
+ goto failed_removal;
+ ret = -EINTR;
+ if (signal_pending(current))
+ goto failed_removal;
+
+ lru_add_drain_all();
+
+ for(;pfn < end_pfn;pfn++) {
+ if (!pfn_valid(pfn))
+ continue;
+ page = pfn_to_page(pfn);
+ if (PageReserved(page)) /* ignore Resrved page for now */
+ continue;
+ if (!is_page_isolated(info,page))
+ break;
+ }
+
+ if (pfn != end_pfn) {
+ ret = do_migrate_and_isolate_pages(info, pfn, end_pfn);
+ if (!ret) {
+ cond_resched();
+ goto repeat;
+ } else if (ret < 0) {
+ ret = -EBUSY;
+ goto failed_removal;
+ } else if (ret > 0) {
+ /* some congestion found. sleep a bit */
+ msleep(10);
+ goto repeat;
+ }
+ }
+ /* check memory holes and bootmem */
+ ret = check_removal_success(info);
+ if (ret < 0) {
+ goto failed_removal;
+ }
+ offlined_pages = ret;
+ /* all pages are isolated */
+ detach_isolation_info_zone(info);
+ unuse_all_isolated_pages(info);
+ free_isolation_info(info);
+ zone->present_pages -= offlined_pages;
+ zone->zone_pgdat->node_present_pages -= offlined_pages;
+ totalram_pages -= offlined_pages;
+ num_physpages -= offlined_pages;
+ vm_total_pages = nr_free_pagecache_pages();
+ writeback_set_ratelimit();
+ return 0;
+
+failed_removal:
+ if (ret == -EBUSY) {
+ printk("some unremovable pages are included in %lx to %lx\n",
+ info->start_pfn, info->end_pfn);
+ }
+ /* push back to free_list */
+ detach_isolation_info_zone(info);
+ free_all_isolated_pages(info);
+ free_isolation_info(info);
+ return ret;
+}
+
+#endif
Index: devel-tree-2.6.20-mm2/include/linux/memory_hotplug.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/memory_hotplug.h
+++ devel-tree-2.6.20-mm2/include/linux/memory_hotplug.h
@@ -59,6 +59,7 @@ extern int add_one_highpage(struct page
extern void online_page(struct page *page);
/* VM interface that may be used by firmware interface */
extern int online_pages(unsigned long, unsigned long);
+extern int offline_pages(unsigned long, unsigned long, unsigned long);
/* reasonably generic interface to expand the physical pages in a zone */
extern int __add_pages(struct zone *zone, unsigned long start_pfn,
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* [RFC} memory unplug patchset prep [15/16] hot-unplug interface for ia64
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (13 preceding siblings ...)
2007-03-06 4:59 ` [RFC} memory unplug patchset prep [14/16] memory unplug core KAMEZAWA Hiroyuki
@ 2007-03-06 5:00 ` KAMEZAWA Hiroyuki
2007-03-06 5:02 ` [RFC} memory unplug patchset prep [16/16] migration nocontext KAMEZAWA Hiroyuki
2007-03-06 15:24 ` [RFC} memory unplug patchset prep [0/16] David Rientjes
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 5:00 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Call offline pages from remove_memory().
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
arch/ia64/mm/init.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
Index: devel-tree-2.6.20-mm2/arch/ia64/mm/init.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/arch/ia64/mm/init.c
+++ devel-tree-2.6.20-mm2/arch/ia64/mm/init.c
@@ -759,7 +759,18 @@ int arch_add_memory(int nid, u64 start,
int remove_memory(u64 start, u64 size)
{
- return -EINVAL;
+ unsigned long start_pfn, end_pfn;
+ unsigned long timeout = 120 * HZ;
+ int ret;
+ start_pfn = start >> PAGE_SHIFT;
+ end_pfn = start_pfn + (size >> PAGE_SHIFT);
+ ret = offline_pages(start_pfn, end_pfn, timeout);
+ if (ret)
+ goto out;
+ /* we can free mem_map at this point */
+out:
+ return ret;
}
+
EXPORT_SYMBOL_GPL(remove_memory);
#endif
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* [RFC} memory unplug patchset prep [16/16] migration nocontext
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (14 preceding siblings ...)
2007-03-06 5:00 ` [RFC} memory unplug patchset prep [15/16] hot-unplug interface for ia64 KAMEZAWA Hiroyuki
@ 2007-03-06 5:02 ` KAMEZAWA Hiroyuki
2007-03-06 15:24 ` [RFC} memory unplug patchset prep [0/16] David Rientjes
16 siblings, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-06 5:02 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, akpm
Delaying freeing anon_vma until migration finishes.
We cannot trust page->mapping (of ANON) when page_mapcount(page) ==0.
page migration puts page_mocount(page) to be 0. So we have to
guarantee anon_vma pointed by page->mapping is valid by some hook.
Usual page migration guarantees this by mm->sem. but we can't do it.
So, just delaying freeing anon_vma.
Signed-Off-By: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/migrate.h | 2 ++
include/linux/rmap.h | 21 +++++++++++++++++++++
mm/Kconfig | 12 ++++++++++++
mm/memory_hotplug.c | 4 ++--
mm/migrate.c | 35 +++++++++++++++++++++++++++++------
mm/rmap.c | 36 +++++++++++++++++++++++++++++++++++-
6 files changed, 101 insertions(+), 9 deletions(-)
Index: devel-tree-2.6.20-mm2/mm/migrate.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/migrate.c
+++ devel-tree-2.6.20-mm2/mm/migrate.c
@@ -601,7 +601,7 @@ static int move_to_new_page(struct page
* to the newly allocated page in newpage.
*/
static int unmap_and_move(new_page_t get_new_page, unsigned long private,
- struct page *page, int force)
+ struct page *page, int force, int nocontext)
{
int rc = 0;
int *result = NULL;
@@ -626,7 +626,10 @@ static int unmap_and_move(new_page_t get
goto unlock;
wait_on_page_writeback(page);
}
-
+ if (PageAnon(page) && nocontext) {
+ /* hold this anon_vma until remove_migration_ptes() finishes */
+ anon_vma_hold(page);
+ }
/*
* Establish migration ptes or remove ptes
*/
@@ -634,8 +637,14 @@ static int unmap_and_move(new_page_t get
if (!page_mapped(page))
rc = move_to_new_page(newpage, page);
- if (rc)
+ if (rc) {
remove_migration_ptes(page, page);
+ if (PageAnon(page) && nocontext)
+ anon_vma_release(page);
+ } else {
+ if (PageAnon(newpage) && nocontext)
+ anon_vma_release(page);
+ }
unlock:
unlock_page(page);
@@ -680,8 +689,8 @@ move_newpage:
*
* Return: Number of pages not migrated or error code.
*/
-int migrate_pages(struct list_head *from,
- new_page_t get_new_page, unsigned long private)
+static int __migrate_pages(struct list_head *from,
+ new_page_t get_new_page, unsigned long private, int nocontext)
{
int retry = 1;
int nr_failed = 0;
@@ -701,7 +710,7 @@ int migrate_pages(struct list_head *from
cond_resched();
rc = unmap_and_move(get_new_page, private,
- page, pass > 2);
+ page, pass > 2, nocontext);
switch(rc) {
case -ENOMEM:
@@ -731,6 +740,20 @@ out:
return nr_failed + retry;
}
+int migrate_pages(struct list_head *from,
+ new_page_t get_new_page, unsigned long private)
+{
+ return __migrate_pages(from, get_new_page, private, 0);
+}
+
+#ifdef CONFIG_MIGRATION_NOCONTEXT
+int migrate_pages_nocontext(struct list_head *from,
+ new_page_t get_new_page, unsigned long private)
+{
+ return __migrate_pages(from, get_new_page, private, 1);
+}
+#endif
+
#ifdef CONFIG_NUMA
/*
* Move a list of individual pages
Index: devel-tree-2.6.20-mm2/include/linux/rmap.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/rmap.h
+++ devel-tree-2.6.20-mm2/include/linux/rmap.h
@@ -26,6 +26,9 @@
struct anon_vma {
spinlock_t lock; /* Serialize access to vma list */
struct list_head head; /* List of private "related" vmas */
+#ifdef CONFIG_MIGRATION_NOCONTEXT
+ atomic_t hold; /* == 0 if we can free this immediately */
+#endif
};
#ifdef CONFIG_MMU
@@ -37,10 +40,14 @@ static inline struct anon_vma *anon_vma_
return kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
}
+#ifndef CONFIG_MIGRATION_NOCONTEXT
static inline void anon_vma_free(struct anon_vma *anon_vma)
{
kmem_cache_free(anon_vma_cachep, anon_vma);
}
+#else
+extern void anon_vma_free(struct anon_vma *anon_vma);
+#endif
static inline void anon_vma_lock(struct vm_area_struct *vma)
{
@@ -74,6 +81,20 @@ void page_add_new_anon_rmap(struct page
void page_add_file_rmap(struct page *);
void page_remove_rmap(struct page *, struct vm_area_struct *);
+#ifdef CONFIG_MIGRATION_NOCONTEXT
+/*
+ * While Page migration without any process context, we doesn't have
+ * mm->sem. Because page->mapcount goes down to 0 while migration,
+ * we cannot trust page->mapping value.
+ * THese two functions prevents anon_vma from being freed while
+ * migration.
+ */
+void anon_vma_hold(struct page *page);
+void anon_vma_release(struct page *page);
+#else
+#define anon_vma_hold(page) do{}while(0)
+#define anon_vma_release(page) do{}while(0)
+#endif
/**
* page_dup_rmap - duplicate pte mapping to a page
* @page: the page to add the mapping to
Index: devel-tree-2.6.20-mm2/mm/rmap.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/rmap.c
+++ devel-tree-2.6.20-mm2/mm/rmap.c
@@ -155,8 +155,9 @@ void anon_vma_unlink(struct vm_area_stru
empty = list_empty(&anon_vma->head);
spin_unlock(&anon_vma->lock);
- if (empty)
+ if (empty) {
anon_vma_free(anon_vma);
+ }
}
static void anon_vma_ctor(void *data, struct kmem_cache *cachep,
@@ -939,3 +940,36 @@ int try_to_unmap(struct page *page, int
return ret;
}
+#ifdef CONFIG_MIGRATION_NOCONTEXT
+
+void anon_vma_free(struct anon_vma *anon)
+{
+ if (atomic_read(&anon->hold) == 0) {
+ kmem_cache_free(anon_vma_cachep, anon);
+ }
+}
+
+void anon_vma_hold(struct page *page)
+{
+ struct anon_vma *anon_vma;
+ anon_vma = page_lock_anon_vma(page);
+ if (!anon_vma)
+ return;
+ atomic_set(&anon_vma->hold, 1);
+ spin_unlock(&anon_vma->lock);
+}
+
+void anon_vma_release(struct page *page)
+{
+ struct anon_vma *anon_vma;
+ int empty;
+ anon_vma = page_lock_anon_vma(page);
+ if (!anon_vma)
+ return;
+ atomic_set(&anon_vma->hold, 0);
+ empty = list_empty(&anon_vma->head);
+ spin_unlock(&anon_vma->lock);
+ if (empty)
+ anon_vma_free(anon_vma);
+}
+#endif
Index: devel-tree-2.6.20-mm2/mm/Kconfig
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/Kconfig
+++ devel-tree-2.6.20-mm2/mm/Kconfig
@@ -132,6 +132,7 @@ config MEMORY_HOTREMOVE
select ZONE_MOVABLE
select MIGRATION
select PAGE_ISOLATION
+ select MIGRATION_NOCONTEXT
# Heavily threaded applications may benefit from splitting the mm-wide
# page_table_lock, so that faults on different parts of the user address
@@ -159,6 +160,17 @@ config MIGRATION
example on NUMA systems to put pages nearer to the processors accessing
the page.
+config MIGRATION_NOCONTEXT
+ bool "Page migration without process context"
+ def_bool y
+ depends on MEMORY_HOTREMOVE
+ help
+ When Memory-Hotremove is executed, page migraion runs.
+ But a process which does page migraion doesn't have context of
+ migration target pages. This has a small race condition.
+ If this config is selected, some workaround for fix them is enabled.
+ This may be add slight performance influence.
+
config RESOURCES_64BIT
bool "64 bit Memory and IO resources (EXPERIMENTAL)" if (!64BIT && EXPERIMENTAL)
default 64BIT
Index: devel-tree-2.6.20-mm2/include/linux/migrate.h
===================================================================
--- devel-tree-2.6.20-mm2.orig/include/linux/migrate.h
+++ devel-tree-2.6.20-mm2/include/linux/migrate.h
@@ -11,6 +11,8 @@ extern int putback_lru_pages(struct list
extern int migrate_page(struct address_space *,
struct page *, struct page *);
extern int migrate_pages(struct list_head *l, new_page_t x, unsigned long);
+extern int migrate_pages_nocontext(struct list_head *l,
+ new_page_t x, unsigned long);
extern int fail_migrate_page(struct address_space *,
struct page *, struct page *);
Index: devel-tree-2.6.20-mm2/mm/memory_hotplug.c
===================================================================
--- devel-tree-2.6.20-mm2.orig/mm/memory_hotplug.c
+++ devel-tree-2.6.20-mm2/mm/memory_hotplug.c
@@ -345,7 +345,7 @@ static int do_migrate_and_isolate_pages(
if (!pfn_valid(pfn)) /* never happens in sparsemem */
continue;
page = pfn_to_page(pfn);
- if (is_page_isolated(info,page))
+ if (PageReserved(page))
continue;
ret = isolate_lru_page(page, &source);
@@ -367,7 +367,7 @@ static int do_migrate_and_isolate_pages(
if (list_empty(&source))
goto out;
/* this function returns # of failed pages */
- ret = migrate_pages(&source, hotremove_migrate_alloc,
+ ret = migrate_pages_nocontext(&source, hotremove_migrate_alloc,
(unsigned long)info);
out:
return ret;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [0/16]
2007-03-06 4:32 [RFC} memory unplug patchset prep [0/16] KAMEZAWA Hiroyuki
` (15 preceding siblings ...)
2007-03-06 5:02 ` [RFC} memory unplug patchset prep [16/16] migration nocontext KAMEZAWA Hiroyuki
@ 2007-03-06 15:24 ` David Rientjes
2007-03-07 2:24 ` KAMEZAWA Hiroyuki
16 siblings, 1 reply; 34+ messages in thread
From: David Rientjes @ 2007-03-06 15:24 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: Linux-MM, mel, clameter, akpm
On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
> My purpose is to show how memory-unplug can be implemented on ZONE_MOVABLE.
> *Any* comments are welcome. This patch just support ia64, which I can test.
> If you want me to add arch support, please mail me.
>
It's great to see progress being made in the memory hot-unplug direction.
The implementation seems to be following the plan you had in our
coversations from the end of last year.
> This patch is a bit old and against 2.6.20-mm2. I'll rebase this and reflect
> your comments in the next post (may not soon).
> Well booted on ia64 and passed *quick* memory offline test.
>
When it's rebased, it might be better to apply it to the latest -mm with
Mel Gorman's patch series merged. They're in 2.6.21-rc2-mm2.
It appears as though you're using a subset of the ZONE_MOVABLE patches as
posted from March 1. What about the additional capabilities of
ZONE_MOVABLE that aren't included in your patchset, such as allowing
hugetlb pages from being allocated under GFP_HIGH_MOVABLE, are going to
need to be changed to support memory hot-unplug? Since your patchset
wasn't based on the entire ZONE_MOVABLE set, it leads me to believe that
some of what it does diverges with the memory hot-unplug use case.
Are you aiming to target both ia64 and x86_64 with this patchset or are
you focusing on ia64 exclusively at the moment?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread* Re: [RFC} memory unplug patchset prep [0/16]
2007-03-06 15:24 ` [RFC} memory unplug patchset prep [0/16] David Rientjes
@ 2007-03-07 2:24 ` KAMEZAWA Hiroyuki
2007-03-07 2:31 ` David Rientjes
0 siblings, 1 reply; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-07 2:24 UTC (permalink / raw)
To: David Rientjes; +Cc: linux-mm, mel, clameter, akpm
On Tue, 6 Mar 2007 07:24:52 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:
> On Tue, 6 Mar 2007, KAMEZAWA Hiroyuki wrote:
>
> > My purpose is to show how memory-unplug can be implemented on ZONE_MOVABLE.
> > *Any* comments are welcome. This patch just support ia64, which I can test.
> > If you want me to add arch support, please mail me.
> >
>
> It's great to see progress being made in the memory hot-unplug direction.
> The implementation seems to be following the plan you had in our
> coversations from the end of last year.
>
> > This patch is a bit old and against 2.6.20-mm2. I'll rebase this and reflect
> > your comments in the next post (may not soon).
> > Well booted on ia64 and passed *quick* memory offline test.
> >
>
> When it's rebased, it might be better to apply it to the latest -mm with
> Mel Gorman's patch series merged. They're in 2.6.21-rc2-mm2.
>
I will do.
> It appears as though you're using a subset of the ZONE_MOVABLE patches as
> posted from March 1. What about the additional capabilities of
> ZONE_MOVABLE that aren't included in your patchset, such as allowing
> hugetlb pages from being allocated under GFP_HIGH_MOVABLE, are going to
> need to be changed to support memory hot-unplug?
We need extra patches for "migrating HUGEPAGE". It's in my plan but not
scheduled yet.
> Since your patchset
> wasn't based on the entire ZONE_MOVABLE set, it leads me to believe that
> some of what it does diverges with the memory hot-unplug use case.
>
> Are you aiming to target both ia64 and x86_64 with this patchset or are
> you focusing on ia64 exclusively at the moment?
>
Just because a machine, which I can use as much as I want, is ia64.
I don't have x86_64 now. I'll add i386 in the next post.
I think all arch which support MEMORY_HOTPLUG will support unplug at last.
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [RFC} memory unplug patchset prep [0/16]
2007-03-07 2:24 ` KAMEZAWA Hiroyuki
@ 2007-03-07 2:31 ` David Rientjes
2007-03-07 2:44 ` KAMEZAWA Hiroyuki
2007-03-07 19:44 ` Mark Gross
0 siblings, 2 replies; 34+ messages in thread
From: David Rientjes @ 2007-03-07 2:31 UTC (permalink / raw)
To: KAMEZAWA Hiroyuki; +Cc: linux-mm, mel, clameter, Andrew Morton, mgross
On Wed, 7 Mar 2007, KAMEZAWA Hiroyuki wrote:
> > Are you aiming to target both ia64 and x86_64 with this patchset or are
> > you focusing on ia64 exclusively at the moment?
> >
> Just because a machine, which I can use as much as I want, is ia64.
> I don't have x86_64 now. I'll add i386 in the next post.
> I think all arch which support MEMORY_HOTPLUG will support unplug at last.
>
Ok, sounds good. I can offer quite extensive x86_64 testing coverage. I
think it's going to be much better to base this patchset on 2.6.21-rc2-mm2
so we don't have a couple different GFP_MOVABLE implementations floating
around.
I'll await your next patchset and then I'll play around with it for
x86_64. I'd like to eventually combine your memory unplug work with Mark
Gross's PM-memory enabling node flags (cc'd). We can wire it up through a
sysfs interface for userspace manipulation and see it working in action.
Looking forward to the next series.
David
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [RFC} memory unplug patchset prep [0/16]
2007-03-07 2:31 ` David Rientjes
@ 2007-03-07 2:44 ` KAMEZAWA Hiroyuki
2007-03-07 19:44 ` Mark Gross
1 sibling, 0 replies; 34+ messages in thread
From: KAMEZAWA Hiroyuki @ 2007-03-07 2:44 UTC (permalink / raw)
To: David Rientjes; +Cc: linux-mm, mel, clameter, akpm, mgross
On Tue, 6 Mar 2007 18:31:35 -0800 (PST)
David Rientjes <rientjes@google.com> wrote:
> On Wed, 7 Mar 2007, KAMEZAWA Hiroyuki wrote:
>
> > > Are you aiming to target both ia64 and x86_64 with this patchset or are
> > > you focusing on ia64 exclusively at the moment?
> > >
> > Just because a machine, which I can use as much as I want, is ia64.
> > I don't have x86_64 now. I'll add i386 in the next post.
> > I think all arch which support MEMORY_HOTPLUG will support unplug at last.
> >
>
> Ok, sounds good. I can offer quite extensive x86_64 testing coverage. I
> think it's going to be much better to base this patchset on 2.6.21-rc2-mm2
> so we don't have a couple different GFP_MOVABLE implementations floating
> around.
>
Thank you and I'll rebase to -mm and write next patch set.
(But I sometimes stops by other works. please wait.)
> I'll await your next patchset and then I'll play around with it for
> x86_64. I'd like to eventually combine your memory unplug work with Mark
> Gross's PM-memory enabling node flags (cc'd). We can wire it up through a
> sysfs interface for userspace manipulation and see it working in action.
>
looks interesiting :)
Thanks,
-Kame
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread
* Re: [RFC} memory unplug patchset prep [0/16]
2007-03-07 2:31 ` David Rientjes
2007-03-07 2:44 ` KAMEZAWA Hiroyuki
@ 2007-03-07 19:44 ` Mark Gross
1 sibling, 0 replies; 34+ messages in thread
From: Mark Gross @ 2007-03-07 19:44 UTC (permalink / raw)
To: David Rientjes; +Cc: KAMEZAWA Hiroyuki, linux-mm, mel, clameter, Andrew Morton
On Tue, Mar 06, 2007 at 06:31:35PM -0800, David Rientjes wrote:
> On Wed, 7 Mar 2007, KAMEZAWA Hiroyuki wrote:
>
> > > Are you aiming to target both ia64 and x86_64 with this patchset or are
> > > you focusing on ia64 exclusively at the moment?
> > >
> > Just because a machine, which I can use as much as I want, is ia64.
> > I don't have x86_64 now. I'll add i386 in the next post.
> > I think all arch which support MEMORY_HOTPLUG will support unplug at last.
> >
>
> Ok, sounds good. I can offer quite extensive x86_64 testing coverage. I
> think it's going to be much better to base this patchset on 2.6.21-rc2-mm2
> so we don't have a couple different GFP_MOVABLE implementations floating
> around.
>
> I'll await your next patchset and then I'll play around with it for
> x86_64. I'd like to eventually combine your memory unplug work with Mark
> Gross's PM-memory enabling node flags (cc'd). We can wire it up through a
> sysfs interface for userspace manipulation and see it working in action.
>
> Looking forward to the next series.
Me too!
--mgross
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 34+ messages in thread