* [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated
@ 2006-11-30 17:07 Mel Gorman
2006-12-01 1:31 ` Andrew Morton
0 siblings, 1 reply; 38+ messages in thread
From: Mel Gorman @ 2006-11-30 17:07 UTC (permalink / raw)
To: akpm; +Cc: clameter, linux-mm, linux-kernel
Am reporting this patch after there were no further comments on the last
version.
It is often known at allocation time when a page may be migrated or not. This
page adds a flag called __GFP_MOVABLE and GFP_HIGH_MOVABLE. Allocations using
the __GFP_MOVABLE can be either migrated using the page migration mechanism
or reclaimed by syncing with backing storage and discarding.
Additional credit goes to Christoph Lameter and Linus Torvalds for shaping
the concept. Credit to Hugh Dickens for catching issues with shmem swap
vector and ramfs allocations.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/fs/compat.c linux-2.6.19-rc6-mm2-mark_highmovable/fs/compat.c
--- linux-2.6.19-rc6-mm2-clean/fs/compat.c 2006-11-29 10:31:09.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/fs/compat.c 2006-11-29 10:34:12.000000000 +0000
@@ -1419,7 +1419,7 @@ static int compat_copy_strings(int argc,
page = bprm->page[i];
new = 0;
if (!page) {
- page = alloc_page(GFP_HIGHUSER);
+ page = alloc_page(GFP_HIGH_MOVABLE);
bprm->page[i] = page;
if (!page) {
ret = -ENOMEM;
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/fs/exec.c linux-2.6.19-rc6-mm2-mark_highmovable/fs/exec.c
--- linux-2.6.19-rc6-mm2-clean/fs/exec.c 2006-11-29 10:31:09.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/fs/exec.c 2006-11-29 10:34:12.000000000 +0000
@@ -239,7 +239,7 @@ static int copy_strings(int argc, char _
page = bprm->page[i];
new = 0;
if (!page) {
- page = alloc_page(GFP_HIGHUSER);
+ page = alloc_page(GFP_HIGH_MOVABLE);
bprm->page[i] = page;
if (!page) {
ret = -ENOMEM;
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/fs/inode.c linux-2.6.19-rc6-mm2-mark_highmovable/fs/inode.c
--- linux-2.6.19-rc6-mm2-clean/fs/inode.c 2006-11-29 10:31:09.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/fs/inode.c 2006-11-29 10:34:12.000000000 +0000
@@ -146,7 +146,7 @@ static struct inode *alloc_inode(struct
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
+ mapping_set_gfp_mask(mapping, GFP_HIGH_MOVABLE);
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/fs/ramfs/inode.c linux-2.6.19-rc6-mm2-mark_highmovable/fs/ramfs/inode.c
--- linux-2.6.19-rc6-mm2-clean/fs/ramfs/inode.c 2006-11-16 04:03:40.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/fs/ramfs/inode.c 2006-11-29 10:34:12.000000000 +0000
@@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct sup
inode->i_blocks = 0;
inode->i_mapping->a_ops = &ramfs_aops;
inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
+ mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
switch (mode & S_IFMT) {
default:
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-alpha/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-alpha/page.h
--- linux-2.6.19-rc6-mm2-clean/include/asm-alpha/page.h 2006-11-16 04:03:40.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-alpha/page.h 2006-11-29 10:34:12.000000000 +0000
@@ -17,7 +17,7 @@
extern void clear_page(void *page);
#define clear_user_page(page, vaddr, pg) clear_page(page)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vmaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGH_MOVABLE | __GFP_ZERO, vma, vmaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
extern void copy_page(void * _to, void * _from);
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-cris/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-cris/page.h
--- linux-2.6.19-rc6-mm2-clean/include/asm-cris/page.h 2006-11-16 04:03:40.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-cris/page.h 2006-11-29 10:34:12.000000000 +0000
@@ -20,7 +20,7 @@
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGH_MOVABLE | __GFP_ZERO, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-h8300/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-h8300/page.h
--- linux-2.6.19-rc6-mm2-clean/include/asm-h8300/page.h 2006-11-16 04:03:40.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-h8300/page.h 2006-11-29 10:34:12.000000000 +0000
@@ -22,7 +22,7 @@
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGH_MOVABLE | __GFP_ZERO, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-i386/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-i386/page.h
--- linux-2.6.19-rc6-mm2-clean/include/asm-i386/page.h 2006-11-29 10:31:10.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-i386/page.h 2006-11-29 10:34:12.000000000 +0000
@@ -35,7 +35,7 @@
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGH_MOVABLE|__GFP_ZERO, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-ia64/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-ia64/page.h
--- linux-2.6.19-rc6-mm2-clean/include/asm-ia64/page.h 2006-11-16 04:03:40.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-ia64/page.h 2006-11-29 10:34:12.000000000 +0000
@@ -89,7 +89,7 @@ do { \
#define alloc_zeroed_user_highpage(vma, vaddr) \
({ \
- struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \
+ struct page *page = alloc_page_vma(GFP_HIGH_MOVABLE | __GFP_ZERO, vma, vaddr); \
if (page) \
flush_dcache_page(page); \
page; \
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-m32r/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-m32r/page.h
--- linux-2.6.19-rc6-mm2-clean/include/asm-m32r/page.h 2006-11-16 04:03:40.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-m32r/page.h 2006-11-29 10:34:12.000000000 +0000
@@ -16,7 +16,7 @@ extern void copy_page(void *to, void *fr
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGH_MOVABLE | __GFP_ZERO, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-s390/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-s390/page.h
--- linux-2.6.19-rc6-mm2-clean/include/asm-s390/page.h 2006-11-16 04:03:40.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-s390/page.h 2006-11-29 10:34:12.000000000 +0000
@@ -64,7 +64,7 @@ static inline void copy_page(void *to, v
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGH_MOVABLE | __GFP_ZERO, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-x86_64/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-x86_64/page.h
--- linux-2.6.19-rc6-mm2-clean/include/asm-x86_64/page.h 2006-11-16 04:03:40.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-x86_64/page.h 2006-11-29 10:34:12.000000000 +0000
@@ -51,7 +51,7 @@ void copy_page(void *, void *);
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGH_MOVABLE|__GFP_ZERO, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
* These are used to make use of C type-checking..
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/linux/gfp.h linux-2.6.19-rc6-mm2-mark_highmovable/include/linux/gfp.h
--- linux-2.6.19-rc6-mm2-clean/include/linux/gfp.h 2006-11-29 10:31:10.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/linux/gfp.h 2006-11-29 10:34:12.000000000 +0000
@@ -30,6 +30,9 @@ struct vm_area_struct;
* cannot handle allocation failures.
*
* __GFP_NORETRY: The VM implementation must not retry indefinitely.
+ *
+ * __GFP_MOVABLE: Flag that this page will be movable by the page migration
+ * mechanism
*/
#define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */
#define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */
@@ -46,6 +49,7 @@ struct vm_area_struct;
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
#define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */
+#define __GFP_MOVABLE ((__force gfp_t)0x80000u) /* Page is movable */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -54,7 +58,8 @@ struct vm_area_struct;
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
- __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE)
+ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE|\
+ __GFP_MOVABLE)
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
@@ -66,6 +71,9 @@ struct vm_area_struct;
#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
__GFP_HIGHMEM)
+#define GFP_HIGH_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
+ __GFP_HARDWALL | __GFP_HIGHMEM | \
+ __GFP_MOVABLE)
#ifdef CONFIG_NUMA
#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/linux/highmem.h linux-2.6.19-rc6-mm2-mark_highmovable/include/linux/highmem.h
--- linux-2.6.19-rc6-mm2-clean/include/linux/highmem.h 2006-11-29 10:31:10.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/include/linux/highmem.h 2006-11-29 10:34:12.000000000 +0000
@@ -65,7 +65,7 @@ static inline void clear_user_highpage(s
static inline struct page *
alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
{
- struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr);
+ struct page *page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, vaddr);
if (page)
clear_user_highpage(page, vaddr);
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/memory.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/memory.c
--- linux-2.6.19-rc6-mm2-clean/mm/memory.c 2006-11-29 10:31:10.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/memory.c 2006-11-29 10:34:12.000000000 +0000
@@ -1564,7 +1564,7 @@ gotten:
if (!new_page)
goto oom;
} else {
- new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+ new_page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, address);
if (!new_page)
goto oom;
cow_user_page(new_page, old_page, address);
@@ -2188,7 +2188,7 @@ retry:
if (unlikely(anon_vma_prepare(vma)))
goto oom;
- page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+ page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, address);
if (!page)
goto oom;
copy_user_highpage(page, new_page, address);
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/mempolicy.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/mempolicy.c
--- linux-2.6.19-rc6-mm2-clean/mm/mempolicy.c 2006-11-29 10:31:10.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/mempolicy.c 2006-11-29 10:34:12.000000000 +0000
@@ -598,7 +598,7 @@ static void migrate_page_add(struct page
static struct page *new_node_page(struct page *page, unsigned long node, int **x)
{
- return alloc_pages_node(node, GFP_HIGHUSER, 0);
+ return alloc_pages_node(node, GFP_HIGH_MOVABLE, 0);
}
/*
@@ -714,7 +714,7 @@ static struct page *new_vma_page(struct
{
struct vm_area_struct *vma = (struct vm_area_struct *)private;
- return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma));
+ return alloc_page_vma(GFP_HIGH_MOVABLE, vma, page_address_in_vma(page, vma));
}
#else
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/migrate.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/migrate.c
--- linux-2.6.19-rc6-mm2-clean/mm/migrate.c 2006-11-29 10:31:10.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/migrate.c 2006-11-29 10:34:12.000000000 +0000
@@ -748,7 +748,7 @@ static struct page *new_page_node(struct
*result = &pm->status;
- return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0);
+ return alloc_pages_node(pm->node, GFP_HIGH_MOVABLE | GFP_THISNODE, 0);
}
/*
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/shmem.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/shmem.c
--- linux-2.6.19-rc6-mm2-clean/mm/shmem.c 2006-11-29 10:31:10.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/shmem.c 2006-11-29 10:34:12.000000000 +0000
@@ -93,8 +93,11 @@ static inline struct page *shmem_dir_all
* The above definition of ENTRIES_PER_PAGE, and the use of
* BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE:
* might be reconsidered if it ever diverges from PAGE_SIZE.
+ *
+ * __GFP_MOVABLE is masked out as swap vectors cannot move
*/
- return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT);
+ return alloc_pages((gfp_mask & ~__GFP_MOVABLE) | __GFP_ZERO,
+ PAGE_CACHE_SHIFT-PAGE_SHIFT);
}
static inline void shmem_dir_free(struct page *page)
@@ -372,7 +375,7 @@ static swp_entry_t *shmem_swp_alloc(stru
}
spin_unlock(&info->lock);
- page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO);
+ page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping));
if (page)
set_page_private(page, 0);
spin_lock(&info->lock);
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/swap_prefetch.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/swap_prefetch.c
--- linux-2.6.19-rc6-mm2-clean/mm/swap_prefetch.c 2006-11-29 10:31:10.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/swap_prefetch.c 2006-11-29 10:34:12.000000000 +0000
@@ -204,7 +204,7 @@ static enum trickle_return trickle_swap_
* Get a new page to read from swap. We have already checked the
* watermarks so __alloc_pages will not call on reclaim.
*/
- page = alloc_pages_node(node, GFP_HIGHUSER & ~__GFP_WAIT, 0);
+ page = alloc_pages_node(node, GFP_HIGH_MOVABLE & ~__GFP_WAIT, 0);
if (unlikely(!page)) {
ret = TRICKLE_DELAY;
goto out;
diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/swap_state.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/swap_state.c
--- linux-2.6.19-rc6-mm2-clean/mm/swap_state.c 2006-11-29 10:31:10.000000000 +0000
+++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/swap_state.c 2006-11-29 10:34:12.000000000 +0000
@@ -343,7 +343,7 @@ struct page *read_swap_cache_async(swp_e
* Get a new page to read into from swap.
*/
if (!new_page) {
- new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+ new_page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, addr);
if (!new_page)
break; /* Out of memory */
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 38+ messages in thread* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-11-30 17:07 [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated Mel Gorman @ 2006-12-01 1:31 ` Andrew Morton 2006-12-01 9:54 ` Mel Gorman 0 siblings, 1 reply; 38+ messages in thread From: Andrew Morton @ 2006-12-01 1:31 UTC (permalink / raw) To: Mel Gorman; +Cc: clameter, linux-mm, linux-kernel On Thu, 30 Nov 2006 17:07:46 +0000 mel@skynet.ie (Mel Gorman) wrote: > Am reporting this patch after there were no further comments on the last > version. Am not sure what to do with it - nothing actually uses __GFP_MOVABLE. > It is often known at allocation time when a page may be migrated or not. "often", yes. > This > page adds a flag called __GFP_MOVABLE and GFP_HIGH_MOVABLE. Allocations using > the __GFP_MOVABLE can be either migrated using the page migration mechanism > or reclaimed by syncing with backing storage and discarding. > > Additional credit goes to Christoph Lameter and Linus Torvalds for shaping > the concept. Credit to Hugh Dickens for catching issues with shmem swap > vector and ramfs allocations. > > ... > > @@ -65,7 +65,7 @@ static inline void clear_user_highpage(s > static inline struct page * > alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) > { > - struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr); > + struct page *page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, vaddr); > > if (page) > clear_user_highpage(page, vaddr); But this change is presumptuous. alloc_zeroed_user_highpage() doesn't know that its caller is going to use the page for moveable purposes. (Ditto lots of other places in this patch). -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-01 1:31 ` Andrew Morton @ 2006-12-01 9:54 ` Mel Gorman 2006-12-01 19:01 ` Andrew Morton 0 siblings, 1 reply; 38+ messages in thread From: Mel Gorman @ 2006-12-01 9:54 UTC (permalink / raw) To: Andrew Morton Cc: clameter, Linux Memory Management List, Linux Kernel Mailing List On Thu, 30 Nov 2006, Andrew Morton wrote: > On Thu, 30 Nov 2006 17:07:46 +0000 > mel@skynet.ie (Mel Gorman) wrote: > >> Am reporting this patch after there were no further comments on the last >> version. > > Am not sure what to do with it - nothing actually uses __GFP_MOVABLE. > Nothing yet. To begin with, this is just a documentation mechanism. I'll be trying to push page clustering one piece at a time which will need this. The markings may also be of interest to containers and to pagesets because it will clearly flag what are allocations in use by userspace. >> It is often known at allocation time when a page may be migrated or not. > > "often", yes. > >> This >> page adds a flag called __GFP_MOVABLE and GFP_HIGH_MOVABLE. Allocations using >> the __GFP_MOVABLE can be either migrated using the page migration mechanism >> or reclaimed by syncing with backing storage and discarding. >> >> Additional credit goes to Christoph Lameter and Linus Torvalds for shaping >> the concept. Credit to Hugh Dickens for catching issues with shmem swap >> vector and ramfs allocations. >> >> ... >> >> @@ -65,7 +65,7 @@ static inline void clear_user_highpage(s >> static inline struct page * >> alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) >> { >> - struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr); >> + struct page *page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, vaddr); >> >> if (page) >> clear_user_highpage(page, vaddr); > > But this change is presumptuous. alloc_zeroed_user_highpage() doesn't know > that its caller is going to use the page for moveable purposes. (Ditto lots > of other places in this patch). > according to grep -r, alloc_zeroed_user_highpage() is only used in two places, do_wp_page() (when write faulting the zero page)[1] and do_anonymous_page() (when mapping the zero page for the first time and writing). In these cases, they are known to be movable. What am I missing? [1] I missed a call to GFP_HIGHUSER in do_wp_page() that should have been GFP_HIGH_MOVABLE. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-01 9:54 ` Mel Gorman @ 2006-12-01 19:01 ` Andrew Morton 2006-12-04 14:07 ` Mel Gorman 0 siblings, 1 reply; 38+ messages in thread From: Andrew Morton @ 2006-12-01 19:01 UTC (permalink / raw) To: Mel Gorman Cc: clameter, Linux Memory Management List, Linux Kernel Mailing List On Fri, 1 Dec 2006 09:54:11 +0000 (GMT) Mel Gorman <mel@csn.ul.ie> wrote: > >> @@ -65,7 +65,7 @@ static inline void clear_user_highpage(s > >> static inline struct page * > >> alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) > >> { > >> - struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr); > >> + struct page *page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, vaddr); > >> > >> if (page) > >> clear_user_highpage(page, vaddr); > > > > But this change is presumptuous. alloc_zeroed_user_highpage() doesn't know > > that its caller is going to use the page for moveable purposes. (Ditto lots > > of other places in this patch). > > > > according to grep -r, alloc_zeroed_user_highpage() is only used in two > places, do_wp_page() (when write faulting the zero page)[1] and > do_anonymous_page() (when mapping the zero page for the first time and > writing). In these cases, they are known to be movable. What am I missing? We shouldn't implement a function which "knows" how its callers are using it in this manner. You've gone and changed alloc_zeroed_user_highpage() into alloc_user_zeroed_highpage_which_you_must_use_in_an_application_where_it_is_movable(). Now, if we want to put a big fat comment over these functions saying that the caller must honour the promise we've made on the caller's behalf then OK(ish). But it'd be better (albeit perhaps bloaty) to require the caller to pass in the gfp-flags. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-01 19:01 ` Andrew Morton @ 2006-12-04 14:07 ` Mel Gorman 2006-12-04 19:30 ` Andrew Morton 0 siblings, 1 reply; 38+ messages in thread From: Mel Gorman @ 2006-12-04 14:07 UTC (permalink / raw) To: Andrew Morton Cc: clameter, Linux Memory Management List, Linux Kernel Mailing List On (01/12/06 11:01), Andrew Morton didst pronounce: > On Fri, 1 Dec 2006 09:54:11 +0000 (GMT) > Mel Gorman <mel@csn.ul.ie> wrote: > > > >> @@ -65,7 +65,7 @@ static inline void clear_user_highpage(s > > >> static inline struct page * > > >> alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) > > >> { > > >> - struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr); > > >> + struct page *page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, vaddr); > > >> > > >> if (page) > > >> clear_user_highpage(page, vaddr); > > > > > > But this change is presumptuous. alloc_zeroed_user_highpage() doesn't know > > > that its caller is going to use the page for moveable purposes. (Ditto lots > > > of other places in this patch). > > > > > > > according to grep -r, alloc_zeroed_user_highpage() is only used in two > > places, do_wp_page() (when write faulting the zero page)[1] and > > do_anonymous_page() (when mapping the zero page for the first time and > > writing). In these cases, they are known to be movable. What am I missing? > > We shouldn't implement a function which "knows" how its callers are using > it in this manner. > I see. > You've gone and changed alloc_zeroed_user_highpage() into alloc_user_zeroed_highpage_which_you_must_use_in_an_application_where_it_is_movable(). > Now, if we want to put a big fat comment over these functions saying that the caller > must honour the promise we've made on the caller's behalf then OK(ish). But it'd > be better (albeit perhaps bloaty) to require the caller to pass in the gfp-flags. It is a bit more bloaty all right but it makes more sense so I've made some changes to the patch and posted it below. There are three important differences from the first patch. o copy_strings() and variants are no longer setting the flag as the pages are not obviously movable when I took a much closer look. o The arch function alloc_zeroed_user_highpage() is now called __alloc_zeroed_user_highpage and takes flags related to movability that will be applied. alloc_zeroed_user_highpage() calls __alloc_zeroed_user_highpage() with no additional flags to preserve existing behavior of the API for out-of-tree users and alloc_zeroed_user_highpage_movable() sets the __GFP_MOVABLE flag. o new_inode() documents that it uses GFP_HIGH_MOVABLE and callers are expected to call mapping_set_gfp_mask() if that is not suitable. Signed-off-by: Mel Gorman <mel@csn.ul.ie> diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/fs/inode.c linux-2.6.19-rc6-mm2-mark_highmovable/fs/inode.c --- linux-2.6.19-rc6-mm2-clean/fs/inode.c 2006-11-29 10:31:09.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/fs/inode.c 2006-12-04 11:44:24.000000000 +0000 @@ -146,7 +146,7 @@ static struct inode *alloc_inode(struct mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; - mapping_set_gfp_mask(mapping, GFP_HIGHUSER); + mapping_set_gfp_mask(mapping, GFP_HIGH_MOVABLE); mapping->assoc_mapping = NULL; mapping->backing_dev_info = &default_backing_dev_info; @@ -527,7 +527,13 @@ repeat: * new_inode - obtain an inode * @sb: superblock * - * Allocates a new inode for given superblock. + * Allocates a new inode for given superblock. The gfp_mask used for + * allocations related to inode->i_mapping is GFP_HIGH_MOVABLE. If + * HIGHMEM pages are unsuitable or it is known that pages allocated + * for the page cache are not reclaimable on demand, + * mapping_set_gfp_mask() must be called with suitable flags after + * new_inode() + * */ struct inode *new_inode(struct super_block *sb) { diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/fs/ramfs/inode.c linux-2.6.19-rc6-mm2-mark_highmovable/fs/ramfs/inode.c --- linux-2.6.19-rc6-mm2-clean/fs/ramfs/inode.c 2006-11-16 04:03:40.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/fs/ramfs/inode.c 2006-12-04 10:18:33.000000000 +0000 @@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct sup inode->i_blocks = 0; inode->i_mapping->a_ops = &ramfs_aops; inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; + mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { default: diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-alpha/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-alpha/page.h --- linux-2.6.19-rc6-mm2-clean/include/asm-alpha/page.h 2006-11-16 04:03:40.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-alpha/page.h 2006-12-04 11:15:23.000000000 +0000 @@ -17,7 +17,9 @@ extern void clear_page(void *page); #define clear_user_page(page, vaddr, pg) clear_page(page) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vmaddr) +#define __alloc_zeroed_user_highpage(gfp_movableflags, vma, vaddr) alloc_page_vma(\ + set_movable_flags(GFP_HIGHUSER | __GFP_ZERO, gfp_movableflags), \ + vma, vmaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE extern void copy_page(void * _to, void * _from); diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-cris/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-cris/page.h --- linux-2.6.19-rc6-mm2-clean/include/asm-cris/page.h 2006-11-16 04:03:40.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-cris/page.h 2006-12-04 11:16:59.000000000 +0000 @@ -20,7 +20,9 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(gfp_movableflags, vma, vaddr) alloc_page_vma(\ + set_movable_flags(GFP_HIGHUSER | __GFP_ZERO, gfp_movableflags), \ + vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-h8300/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-h8300/page.h --- linux-2.6.19-rc6-mm2-clean/include/asm-h8300/page.h 2006-11-16 04:03:40.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-h8300/page.h 2006-12-04 11:17:29.000000000 +0000 @@ -22,7 +22,9 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(gfp_movableflags, vma, vaddr) alloc_page_vma(\ + set_movable_flags(GFP_HIGHUSER | __GFP_ZERO, gfp_movableflags), \ + vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-i386/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-i386/page.h --- linux-2.6.19-rc6-mm2-clean/include/asm-i386/page.h 2006-11-29 10:31:10.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-i386/page.h 2006-12-04 11:15:31.000000000 +0000 @@ -35,7 +35,9 @@ #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(gfp_movableflags, vma, vaddr) alloc_page_vma(\ + set_movable_flags(GFP_HIGHUSER | __GFP_ZERO, gfp_movableflags), \ + vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-ia64/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-ia64/page.h --- linux-2.6.19-rc6-mm2-clean/include/asm-ia64/page.h 2006-11-16 04:03:40.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-ia64/page.h 2006-12-04 11:18:45.000000000 +0000 @@ -87,9 +87,11 @@ do { \ } while (0) -#define alloc_zeroed_user_highpage(vma, vaddr) \ +#define __alloc_zeroed_user_highpage(gfp_movableflags, vma, vaddr) \ ({ \ - struct page *page = alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr); \ + struct page *page = alloc_page_vma( \ + set_movable_flags(GFP_HIGHUSER | __GFP_ZERO, gfp_movableflags), \ + vma, vaddr); \ if (page) \ flush_dcache_page(page); \ page; \ diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-m32r/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-m32r/page.h --- linux-2.6.19-rc6-mm2-clean/include/asm-m32r/page.h 2006-11-16 04:03:40.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-m32r/page.h 2006-12-04 11:19:05.000000000 +0000 @@ -16,7 +16,9 @@ extern void copy_page(void *to, void *fr #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(gfp_movableflags, vma, vaddr) alloc_page_vma(\ + set_movable_flags(GFP_HIGHUSER | __GFP_ZERO, gfp_movableflags), \ + vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-s390/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-s390/page.h --- linux-2.6.19-rc6-mm2-clean/include/asm-s390/page.h 2006-11-16 04:03:40.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-s390/page.h 2006-12-04 11:19:35.000000000 +0000 @@ -64,7 +64,9 @@ static inline void copy_page(void *to, v #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(gfp_movableflags, vma, vaddr) alloc_page_vma(\ + set_movable_flags(GFP_HIGHUSER | __GFP_ZERO, gfp_movableflags), \ + vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/asm-x86_64/page.h linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-x86_64/page.h --- linux-2.6.19-rc6-mm2-clean/include/asm-x86_64/page.h 2006-11-16 04:03:40.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/asm-x86_64/page.h 2006-12-04 11:19:58.000000000 +0000 @@ -51,7 +51,9 @@ void copy_page(void *, void *); #define clear_user_page(page, vaddr, pg) clear_page(page) #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) -#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __alloc_zeroed_user_highpage(gfp_movableflags, vma, vaddr) alloc_page_vma(\ + set_movable_flags(GFP_HIGHUSER | __GFP_ZERO, gfp_movableflags), \ + vma, vaddr) #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE /* * These are used to make use of C type-checking.. diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/linux/gfp.h linux-2.6.19-rc6-mm2-mark_highmovable/include/linux/gfp.h --- linux-2.6.19-rc6-mm2-clean/include/linux/gfp.h 2006-11-29 10:31:10.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/linux/gfp.h 2006-12-04 11:04:36.000000000 +0000 @@ -30,6 +30,9 @@ struct vm_area_struct; * cannot handle allocation failures. * * __GFP_NORETRY: The VM implementation must not retry indefinitely. + * + * __GFP_MOVABLE: Flag that this page will be movable by the page migration + * mechanism */ #define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */ #define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */ @@ -46,6 +49,7 @@ struct vm_area_struct; #define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */ #define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */ #define __GFP_THISNODE ((__force gfp_t)0x40000u)/* No fallback, no policies */ +#define __GFP_MOVABLE ((__force gfp_t)0x80000u) /* Page is movable */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) @@ -54,7 +58,17 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE) + __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_THISNODE|\ + __GFP_MOVABLE) + +/* Mask of GFP flags related to migration or reclaimation */ +#define GFP_MOVABLE_FLAGS (__GFP_MOVABLE) + +static inline gfp_t set_movable_flags(gfp_t gfp, gfp_t migrate_flags) +{ + return (gfp & ~(GFP_MOVABLE_FLAGS)) | + (migrate_flags & GFP_MOVABLE_FLAGS); +} /* This equals 0, but use constants in case they ever change */ #define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) @@ -66,6 +80,9 @@ struct vm_area_struct; #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ __GFP_HIGHMEM) +#define GFP_HIGH_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \ + __GFP_HARDWALL | __GFP_HIGHMEM | \ + __GFP_MOVABLE) #ifdef CONFIG_NUMA #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/include/linux/highmem.h linux-2.6.19-rc6-mm2-mark_highmovable/include/linux/highmem.h --- linux-2.6.19-rc6-mm2-clean/include/linux/highmem.h 2006-11-29 10:31:10.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/include/linux/highmem.h 2006-12-04 11:10:35.000000000 +0000 @@ -62,10 +62,28 @@ static inline void clear_user_highpage(s } #ifndef __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE +/** + * __alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA with caller-specified movable GFP flags + * @gfp_movableflags: The GFP flags related to the pages future ability to move like __GFP_MOVABLE + * @vma: The VMA the page is to be allocated for + * @vaddr: The virtual address the page will be inserted into + * + * This function will allocate a page for a VMA but the caller is expected + * to specify via gfp_movableflags whether the page will be movable in the + * future or not + * + * An architecture may override this function by defining + * __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE and providing their own + * implementation. + */ static inline struct page * -alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) +__alloc_zeroed_user_highpage(gfp_t gfp_movableflags, + struct vm_area_struct *vma, + unsigned long vaddr) { - struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr); + struct page *page = alloc_page_vma( + set_movable_flags(GFP_HIGHUSER, gfp_movableflags), + vma, vaddr); if (page) clear_user_highpage(page, vaddr); @@ -74,6 +92,36 @@ alloc_zeroed_user_highpage(struct vm_are } #endif +/** + * alloc_zeroed_user_highpage - Allocate a zeroed HIGHMEM page for a VMA + * @vma: The VMA the page is to be allocated for + * @vaddr: The virtual address the page will be inserted into + * + * This function will allocate a page for a VMA that the caller knows will + * not be able to move in the future using move_pages() or reclaim. If it + * is known that the page can move, use alloc_zeroed_user_highpage_movable + */ +static inline struct page * +alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr) +{ + return __alloc_zeroed_user_highpage(0, vma, vaddr); +} + +/** + * alloc_zeroed_user_highpage_movable - Allocate a zeroed HIGHMEM page for a VMA that the caller knows can move + * @vma: The VMA the page is to be allocated for + * @vaddr: The virtual address the page will be inserted into + * + * This function will allocate a page for a VMA that the caller knows will + * be able to migrate in the future using move_pages() or reclaimed + */ +static inline struct page * +alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, + unsigned long vaddr) +{ + return __alloc_zeroed_user_highpage(__GFP_MOVABLE, vma, vaddr); +} + static inline void clear_highpage(struct page *page) { void *kaddr = kmap_atomic(page, KM_USER0); diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/memory.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/memory.c --- linux-2.6.19-rc6-mm2-clean/mm/memory.c 2006-11-29 10:31:10.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/memory.c 2006-12-04 10:57:13.000000000 +0000 @@ -1560,11 +1560,11 @@ gotten: if (unlikely(anon_vma_prepare(vma))) goto oom; if (old_page == ZERO_PAGE(address)) { - new_page = alloc_zeroed_user_highpage(vma, address); + new_page = alloc_zeroed_user_highpage_movable(vma, address); if (!new_page) goto oom; } else { - new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); + new_page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, address); if (!new_page) goto oom; cow_user_page(new_page, old_page, address); @@ -2085,7 +2085,7 @@ static int do_anonymous_page(struct mm_s if (unlikely(anon_vma_prepare(vma))) goto oom; - page = alloc_zeroed_user_highpage(vma, address); + page = alloc_zeroed_user_highpage_movable(vma, address); if (!page) goto oom; @@ -2188,7 +2188,7 @@ retry: if (unlikely(anon_vma_prepare(vma))) goto oom; - page = alloc_page_vma(GFP_HIGHUSER, vma, address); + page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, address); if (!page) goto oom; copy_user_highpage(page, new_page, address); diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/mempolicy.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/mempolicy.c --- linux-2.6.19-rc6-mm2-clean/mm/mempolicy.c 2006-11-29 10:31:10.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/mempolicy.c 2006-12-04 10:18:33.000000000 +0000 @@ -598,7 +598,7 @@ static void migrate_page_add(struct page static struct page *new_node_page(struct page *page, unsigned long node, int **x) { - return alloc_pages_node(node, GFP_HIGHUSER, 0); + return alloc_pages_node(node, GFP_HIGH_MOVABLE, 0); } /* @@ -714,7 +714,7 @@ static struct page *new_vma_page(struct { struct vm_area_struct *vma = (struct vm_area_struct *)private; - return alloc_page_vma(GFP_HIGHUSER, vma, page_address_in_vma(page, vma)); + return alloc_page_vma(GFP_HIGH_MOVABLE, vma, page_address_in_vma(page, vma)); } #else diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/migrate.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/migrate.c --- linux-2.6.19-rc6-mm2-clean/mm/migrate.c 2006-11-29 10:31:10.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/migrate.c 2006-12-04 10:18:33.000000000 +0000 @@ -748,7 +748,7 @@ static struct page *new_page_node(struct *result = &pm->status; - return alloc_pages_node(pm->node, GFP_HIGHUSER | GFP_THISNODE, 0); + return alloc_pages_node(pm->node, GFP_HIGH_MOVABLE | GFP_THISNODE, 0); } /* diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/shmem.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/shmem.c --- linux-2.6.19-rc6-mm2-clean/mm/shmem.c 2006-11-29 10:31:10.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/shmem.c 2006-12-04 10:18:33.000000000 +0000 @@ -93,8 +93,11 @@ static inline struct page *shmem_dir_all * The above definition of ENTRIES_PER_PAGE, and the use of * BLOCKS_PER_PAGE on indirect pages, assume PAGE_CACHE_SIZE: * might be reconsidered if it ever diverges from PAGE_SIZE. + * + * __GFP_MOVABLE is masked out as swap vectors cannot move */ - return alloc_pages(gfp_mask, PAGE_CACHE_SHIFT-PAGE_SHIFT); + return alloc_pages((gfp_mask & ~__GFP_MOVABLE) | __GFP_ZERO, + PAGE_CACHE_SHIFT-PAGE_SHIFT); } static inline void shmem_dir_free(struct page *page) @@ -372,7 +375,7 @@ static swp_entry_t *shmem_swp_alloc(stru } spin_unlock(&info->lock); - page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping) | __GFP_ZERO); + page = shmem_dir_alloc(mapping_gfp_mask(inode->i_mapping)); if (page) set_page_private(page, 0); spin_lock(&info->lock); diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/swap_prefetch.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/swap_prefetch.c --- linux-2.6.19-rc6-mm2-clean/mm/swap_prefetch.c 2006-11-29 10:31:10.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/swap_prefetch.c 2006-12-04 10:18:33.000000000 +0000 @@ -204,7 +204,7 @@ static enum trickle_return trickle_swap_ * Get a new page to read from swap. We have already checked the * watermarks so __alloc_pages will not call on reclaim. */ - page = alloc_pages_node(node, GFP_HIGHUSER & ~__GFP_WAIT, 0); + page = alloc_pages_node(node, GFP_HIGH_MOVABLE & ~__GFP_WAIT, 0); if (unlikely(!page)) { ret = TRICKLE_DELAY; goto out; diff -rup -X /usr/src/patchset-0.6/bin//dontdiff linux-2.6.19-rc6-mm2-clean/mm/swap_state.c linux-2.6.19-rc6-mm2-mark_highmovable/mm/swap_state.c --- linux-2.6.19-rc6-mm2-clean/mm/swap_state.c 2006-11-29 10:31:10.000000000 +0000 +++ linux-2.6.19-rc6-mm2-mark_highmovable/mm/swap_state.c 2006-12-04 10:18:33.000000000 +0000 @@ -343,7 +343,7 @@ struct page *read_swap_cache_async(swp_e * Get a new page to read into from swap. */ if (!new_page) { - new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + new_page = alloc_page_vma(GFP_HIGH_MOVABLE, vma, addr); if (!new_page) break; /* Out of memory */ } -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 14:07 ` Mel Gorman @ 2006-12-04 19:30 ` Andrew Morton 2006-12-04 19:41 ` Christoph Lameter ` (2 more replies) 0 siblings, 3 replies; 38+ messages in thread From: Andrew Morton @ 2006-12-04 19:30 UTC (permalink / raw) To: Mel Gorman Cc: clameter, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006 14:07:47 +0000 mel@skynet.ie (Mel Gorman) wrote: > o copy_strings() and variants are no longer setting the flag as the pages > are not obviously movable when I took a much closer look. > > o The arch function alloc_zeroed_user_highpage() is now called > __alloc_zeroed_user_highpage and takes flags related to > movability that will be applied. alloc_zeroed_user_highpage() > calls __alloc_zeroed_user_highpage() with no additional flags to > preserve existing behavior of the API for out-of-tree users and > alloc_zeroed_user_highpage_movable() sets the __GFP_MOVABLE flag. > > o new_inode() documents that it uses GFP_HIGH_MOVABLE and callers are expected > to call mapping_set_gfp_mask() if that is not suitable. umm, OK. Could we please have some sort of statement pinning down the exact semantics of __GFP_MOVABLE, and what its envisaged applications are? My concern is that __GFP_MOVABLE is useful for fragmentation-avoidance, but useless for memory hot-unplug. So that if/when hot-unplug comes along we'll add more gunk which is a somewhat-superset of the GFP_MOVABLE infrastructure, hence we didn't need the GFP_MOVABLE code. Or something. That depends on how we do hot-unplug, if we do it. I continue to suspect that it'll be done via memory zones: effectively by resurrecting GFP_HIGHMEM. In which case there's little overlap with anti-frag. (btw, I have a suspicion that the most important application of memory hot-unplug will be power management: destructively turning off DIMMs). I'd also like to pin down the situation with lumpy-reclaim versus anti-fragmentation. No offence, but I would of course prefer to avoid merging the anti-frag patches simply based on their stupendous size. It seems to me that lumpy-reclaim is suitable for the e1000 problem, but perhaps not for the hugetlbpage problem. Whereas anti-fragmentation adds vastly more code, but can address both problems? Or something. IOW: big-picture where-do-we-go-from-here stuff. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 19:30 ` Andrew Morton @ 2006-12-04 19:41 ` Christoph Lameter 2006-12-04 20:06 ` Andrew Morton 2006-12-04 20:34 ` Mel Gorman 2006-12-04 20:37 ` Peter Zijlstra 2 siblings, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-04 19:41 UTC (permalink / raw) To: Andrew Morton Cc: Mel Gorman, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006, Andrew Morton wrote: > My concern is that __GFP_MOVABLE is useful for fragmentation-avoidance, but > useless for memory hot-unplug. So that if/when hot-unplug comes along > we'll add more gunk which is a somewhat-superset of the GFP_MOVABLE > infrastructure, hence we didn't need the GFP_MOVABLE code. Or something. It is useless for memory unplug until we implement limits for unmovable pages in a zone (per MA_ORDER area? That would fit nicely into the anti frag scheme) or until we have logic that makes !GFP_MOVABLE allocations fall back to a node that is not removable. > That depends on how we do hot-unplug, if we do it. I continue to suspect > that it'll be done via memory zones: effectively by resurrecting > GFP_HIGHMEM. In which case there's little overlap with anti-frag. (btw, I > have a suspicion that the most important application of memory hot-unplug > will be power management: destructively turning off DIMMs). There are numerous other uses as well (besides DIMM and node unplug): 1. Faulty DIMM isolation 2. Virtual memory managers can reduce memory without resorting to balloons. 3. Physical removal and exchange of memory while a system is running (Likely necessary to complement hotplug cpu, cpus usually come with memory). The multi zone approach does not work with NUMA. NUMA only supports a single zone for memory policy control etc. Also multiple zones carry with it a management overhead that is unnecessary for the MOVABLE/UNMOVABLE distinction. > perhaps not for the hugetlbpage problem. Whereas anti-fragmentation adds > vastly more code, but can address both problems? Or something. I'd favor adding full defragmentation. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 19:41 ` Christoph Lameter @ 2006-12-04 20:06 ` Andrew Morton 2006-12-04 20:17 ` Christoph Lameter 0 siblings, 1 reply; 38+ messages in thread From: Andrew Morton @ 2006-12-04 20:06 UTC (permalink / raw) To: Christoph Lameter Cc: Mel Gorman, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006 11:41:42 -0800 (PST) Christoph Lameter <clameter@sgi.com> wrote: > > That depends on how we do hot-unplug, if we do it. I continue to suspect > > that it'll be done via memory zones: effectively by resurrecting > > GFP_HIGHMEM. In which case there's little overlap with anti-frag. (btw, I > > have a suspicion that the most important application of memory hot-unplug > > will be power management: destructively turning off DIMMs). > > There are numerous other uses as well (besides DIMM and node unplug): > > 1. Faulty DIMM isolation > 2. Virtual memory managers can reduce memory without resorting to > balloons. > 3. Physical removal and exchange of memory while a system is running > (Likely necessary to complement hotplug cpu, cpus usually come > with memory). > > The multi zone approach does not work with NUMA. NUMA only supports a > single zone for memory policy control etc. Wot? memory policies are a per-vma thing? Plus NUMA of course supports more that a single zone. Perhaps you meant one zone per node. If you did, that's a pretty dumb-sounding restriction and I don't know where you got it from. > Also multiple zones carry with > it a management overhead that is unnecessary for the MOVABLE/UNMOVABLE > distinction. I suspect you'll have to live with that. I've yet to see a vaguely sane proposal to otherwise prevent unreclaimable, unmoveable kernel allocations from landing in a hot-unpluggable physical memory region. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 20:06 ` Andrew Morton @ 2006-12-04 20:17 ` Christoph Lameter 2006-12-04 21:19 ` Andrew Morton 0 siblings, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-04 20:17 UTC (permalink / raw) To: Andrew Morton Cc: Mel Gorman, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006, Andrew Morton wrote: > > The multi zone approach does not work with NUMA. NUMA only supports a > > single zone for memory policy control etc. > > Wot? memory policies are a per-vma thing? They only apply to "policy_zone" of a node. policy_zone can only take a single type of zone (has been like it forever). Multiple zones could become a nightmare with an exploding number of zones on zonelists. I.e. instead of 1k zones on a nodelist we now have 2k for two or even 4k if you want to have support for memory policies for 4 zones per node. We will then increase the search time through zonelists and have to manage all the memory in the different zones. Balancing is going to be difficult. > I suspect you'll have to live with that. I've yet to see a vaguely sane > proposal to otherwise prevent unreclaimable, unmoveable kernel allocations > from landing in a hot-unpluggable physical memory region. Mel's approach already mananges memory in a chunks of MAX_ORDER. It is easy to just restrict the unmovable types of allocation to a section of the zone. Then we should be doing some work to cut down the number of unmovable allocations. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 20:17 ` Christoph Lameter @ 2006-12-04 21:19 ` Andrew Morton 2006-12-04 21:43 ` Christoph Lameter 0 siblings, 1 reply; 38+ messages in thread From: Andrew Morton @ 2006-12-04 21:19 UTC (permalink / raw) To: Christoph Lameter Cc: Mel Gorman, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006 12:17:26 -0800 (PST) Christoph Lameter <clameter@sgi.com> wrote: > > I suspect you'll have to live with that. I've yet to see a vaguely sane > > proposal to otherwise prevent unreclaimable, unmoveable kernel allocations > > from landing in a hot-unpluggable physical memory region. > > Mel's approach already mananges memory in a chunks of MAX_ORDER. It is > easy to just restrict the unmovable types of allocation to a section of > the zone. What happens when we need to run reclaim against just a section of a zone? Lumpy-reclaim could be used here; perhaps that's Mel's approach too? We'd need new infrastructure to perform the section-of-a-zone<->physical-memory-block mapping, and to track various states of the section-of-a-zone. This will be complex, and buggy. It will probably require the introduction of some sort of "sub-zone" structure. At which stage people would be justified in asking "why didn't you just use zones - that's what they're for?" > Then we should be doing some work to cut down the number of unmovable > allocations. That's rather pointless. A feature is either reliable or it is not. We'll never be able to make all kernel allocations reclaimable/moveable so we'll never be reliable with this approach. I don't see any alternative to the never-allocate-kernel-objects-in-removeable-memory approach. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 21:19 ` Andrew Morton @ 2006-12-04 21:43 ` Christoph Lameter 2006-12-04 22:22 ` Andrew Morton 0 siblings, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-04 21:43 UTC (permalink / raw) To: Andrew Morton Cc: Mel Gorman, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006, Andrew Morton wrote: > What happens when we need to run reclaim against just a section of a zone? > Lumpy-reclaim could be used here; perhaps that's Mel's approach too? Why would we run reclaim against a section of a zone? > We'd need new infrastructure to perform the > section-of-a-zone<->physical-memory-block mapping, and to track various > states of the section-of-a-zone. This will be complex, and buggy. It will > probably require the introduction of some sort of "sub-zone" structure. At > which stage people would be justified in asking "why didn't you just use > zones - that's what they're for?" Mel aready has that for anti-frag. The sections are per MAX_ORDER area and the only states are movable unmovable and reclaimable. There is nothing more to it. No other state information should be added. Why would we need sub zones? For what purpose? > > Then we should be doing some work to cut down the number of unmovable > > allocations. > > That's rather pointless. A feature is either reliable or it is not. We'll > never be able to make all kernel allocations reclaimable/moveable so we'll > never be reliable with this approach. I don't see any alternative to the > never-allocate-kernel-objects-in-removeable-memory approach. What feature are you talking about? Why would all allocations need to be movable when we have a portion for unmovable allocations? -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 21:43 ` Christoph Lameter @ 2006-12-04 22:22 ` Andrew Morton 2006-12-05 16:00 ` Christoph Lameter 2006-12-05 18:10 ` Mel Gorman 0 siblings, 2 replies; 38+ messages in thread From: Andrew Morton @ 2006-12-04 22:22 UTC (permalink / raw) To: Christoph Lameter Cc: Mel Gorman, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006 13:43:44 -0800 (PST) Christoph Lameter <clameter@sgi.com> wrote: > On Mon, 4 Dec 2006, Andrew Morton wrote: > > > What happens when we need to run reclaim against just a section of a zone? > > Lumpy-reclaim could be used here; perhaps that's Mel's approach too? > > Why would we run reclaim against a section of a zone? Strange question. Because all the pages are in use for something else. > > We'd need new infrastructure to perform the > > section-of-a-zone<->physical-memory-block mapping, and to track various > > states of the section-of-a-zone. This will be complex, and buggy. It will > > probably require the introduction of some sort of "sub-zone" structure. At > > which stage people would be justified in asking "why didn't you just use > > zones - that's what they're for?" > > Mel aready has that for anti-frag. The sections are per MAX_ORDER area > and the only states are movable unmovable and reclaimable. There is > nothing more to it. No other state information should be added. Why would > we need sub zones? For what purpose? You're proposing that for memory hot-unplug, we take a single zone and by some means subdivide that into sections which correspond to physically hot-unpluggable memory. That certainly does not map onto MAX_ORDER sections. > > > Then we should be doing some work to cut down the number of unmovable > > > allocations. > > > > That's rather pointless. A feature is either reliable or it is not. We'll > > never be able to make all kernel allocations reclaimable/moveable so we'll > > never be reliable with this approach. I don't see any alternative to the > > never-allocate-kernel-objects-in-removeable-memory approach. > > What feature are you talking about? Memory hot-unplug, of course. > Why would all allocations need to be movable when we have a portion for > unmovable allocations? So you're proposing that we take a single zone, then divide that zone up into two sections. One section is non-hot-unpluggable and is for un-moveable allocations. The other section is hot-unpluggable and only moveable allocations may be performed there. If so, then this will require addition of new infrastructure which will be to some extent duplicative of zones and I see no reason to do that: it'd be simpler to divide the physical memory arena into two separate zones. If that is not what you are proposing then please tell us what you are proposing, completely, and with sufficient detail for us to work out what the heck you're trying to tell us. Please try to avoid uninformative rhetorical questions, for they are starting to get quite irritating. Thanks. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 22:22 ` Andrew Morton @ 2006-12-05 16:00 ` Christoph Lameter 2006-12-05 19:25 ` Andrew Morton 2006-12-05 18:10 ` Mel Gorman 1 sibling, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-05 16:00 UTC (permalink / raw) To: Andrew Morton Cc: Mel Gorman, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006, Andrew Morton wrote: > > > What happens when we need to run reclaim against just a section of a zone? > > > Lumpy-reclaim could be used here; perhaps that's Mel's approach too? > > > > Why would we run reclaim against a section of a zone? > > Strange question. Because all the pages are in use for something else. We always run reclaim against the whole zone not against parts. Why would we start running reclaim against a portion of a zone? > > Mel aready has that for anti-frag. The sections are per MAX_ORDER area > > and the only states are movable unmovable and reclaimable. There is > > nothing more to it. No other state information should be added. Why would > > we need sub zones? For what purpose? > > You're proposing that for memory hot-unplug, we take a single zone and by > some means subdivide that into sections which correspond to physically > hot-unpluggable memory. That certainly does not map onto MAX_ORDER > sections. Mel's patches are already managing "sections" (if you want to call it that) of a zone in units of MAX_ORDER. If we memorize where the lowest unmovable MAX_ORDER block is then we have the necessary separation and can do memory unplug on the remainder of the zone. > > What feature are you talking about? > > Memory hot-unplug, of course. There are multiple issues that we discuss here. Please be clear. Categorical demands for perfection certainly wont help us. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 16:00 ` Christoph Lameter @ 2006-12-05 19:25 ` Andrew Morton 2006-12-05 20:01 ` Christoph Lameter 0 siblings, 1 reply; 38+ messages in thread From: Andrew Morton @ 2006-12-05 19:25 UTC (permalink / raw) To: Christoph Lameter Cc: Mel Gorman, Linux Memory Management List, Linux Kernel Mailing List On Tue, 5 Dec 2006 08:00:39 -0800 (PST) Christoph Lameter <clameter@sgi.com> wrote: > On Mon, 4 Dec 2006, Andrew Morton wrote: > > > > > What happens when we need to run reclaim against just a section of a zone? > > > > Lumpy-reclaim could be used here; perhaps that's Mel's approach too? > > > > > > Why would we run reclaim against a section of a zone? > > > > Strange question. Because all the pages are in use for something else. > > We always run reclaim against the whole zone not against parts. Why > would we start running reclaim against a portion of a zone? Oh for gawd's sake. If you want to allocate a page from within the first 1/4 of a zone, and if all those pages are in use for something else then you'll need to run reclaim against the first 1/4 of that zone. Or fail the allocation. Or run reclaim against the entire zone. The second two options are self-evidently dumb. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 19:25 ` Andrew Morton @ 2006-12-05 20:01 ` Christoph Lameter 2006-12-05 21:47 ` Mel Gorman 0 siblings, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-05 20:01 UTC (permalink / raw) To: Andrew Morton Cc: Mel Gorman, Linux Memory Management List, Linux Kernel Mailing List On Tue, 5 Dec 2006, Andrew Morton wrote: > > We always run reclaim against the whole zone not against parts. Why > > would we start running reclaim against a portion of a zone? > > Oh for gawd's sake. Yes indeed. Another failure to answer a simple question. > If you want to allocate a page from within the first 1/4 of a zone, and if > all those pages are in use for something else then you'll need to run > reclaim against the first 1/4 of that zone. Or fail the allocation. Or > run reclaim against the entire zone. The second two options are > self-evidently dumb. Why would one want to allocate from the 1/4th of a zone? (Are we still discussing Mel's antifrag scheme or what is this about?) -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 20:01 ` Christoph Lameter @ 2006-12-05 21:47 ` Mel Gorman 2006-12-05 23:33 ` Christoph Lameter 0 siblings, 1 reply; 38+ messages in thread From: Mel Gorman @ 2006-12-05 21:47 UTC (permalink / raw) To: Christoph Lameter Cc: Andrew Morton, Linux Memory Management List, Linux Kernel Mailing List On (05/12/06 12:01), Christoph Lameter didst pronounce: > On Tue, 5 Dec 2006, Andrew Morton wrote: > > > > We always run reclaim against the whole zone not against parts. Why > > > would we start running reclaim against a portion of a zone? > > > > Oh for gawd's sake. > > Yes indeed. Another failure to answer a simple question. > There are times you want to reclaim just part of a zone - specifically satisfying a high-order allocations. See sitations 1 and 2 from elsewhere in this thread. On a similar vein, there will be times when you want to migrate a PFN range for similar reasons. > > If you want to allocate a page from within the first 1/4 of a zone, and if > > all those pages are in use for something else then you'll need to run > > reclaim against the first 1/4 of that zone. Or fail the allocation. Or > > run reclaim against the entire zone. The second two options are > > self-evidently dumb. > > Why would one want to allocate from the 1/4th of a zone? (Are we still > discussing Mel's antifrag scheme or what is this about?) > Because you wanted contiguous blocks of pages. This is related to anti-frag because with anti-frag, reclaiming memory or migration memory will free up contiguous blocks. Without it, you're probably wasting your time. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 21:47 ` Mel Gorman @ 2006-12-05 23:33 ` Christoph Lameter 2006-12-06 9:31 ` Mel Gorman 0 siblings, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-05 23:33 UTC (permalink / raw) To: Mel Gorman Cc: Andrew Morton, Linux Memory Management List, Linux Kernel Mailing List On Tue, 5 Dec 2006, Mel Gorman wrote: > There are times you want to reclaim just part of a zone - specifically > satisfying a high-order allocations. See sitations 1 and 2 from elsewhere > in this thread. On a similar vein, there will be times when you want to > migrate a PFN range for similar reasons. This is confusing reclaim with defragmentation. I think we are in conceptually unclean territory because we mix the two. If you must use reclaim to get a portion of contiguous memory free then yes we have this problem. If you can migrate pages then no there is no need for reclaiming a part of a zone. You can occasionally shuffle pages around to get a large continous chunk. If there is not enough memory then an independent reclaim subsystem can take care of freeing a sufficient amount of memory. Marrying the two seems to be getting a bit complex and maybe very difficult to get right. The classification of the memory allocations is useful to find a potential starting point to reduce the minimum number of pages to move to open up that hole. > > Why would one want to allocate from the 1/4th of a zone? (Are we still > > discussing Mel's antifrag scheme or what is this about?) > Because you wanted contiguous blocks of pages. This is related to anti-frag > because with anti-frag, reclaiming memory or migration memory will free up > contiguous blocks. Without it, you're probably wasting your time. I am still not sure how this should work. Reclaim in a portion of the reclaimable/movable portion of the zone? Or pick a huge page and simply reclaim all the pages in that range? This is required for anti-frag regardless of additonal zones right? BTW If one would successfully do this partial reclaim thing then we also have no need anymore DMA zones because we can free up memory in the DMA area of a zone at will if we run short on memory there. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 23:33 ` Christoph Lameter @ 2006-12-06 9:31 ` Mel Gorman 2006-12-06 17:31 ` Christoph Lameter 0 siblings, 1 reply; 38+ messages in thread From: Mel Gorman @ 2006-12-06 9:31 UTC (permalink / raw) To: Christoph Lameter Cc: Andrew Morton, Linux Memory Management List, Linux Kernel Mailing List On Tue, 5 Dec 2006, Christoph Lameter wrote: > On Tue, 5 Dec 2006, Mel Gorman wrote: > >> There are times you want to reclaim just part of a zone - specifically >> satisfying a high-order allocations. See sitations 1 and 2 from elsewhere >> in this thread. On a similar vein, there will be times when you want to >> migrate a PFN range for similar reasons. > > This is confusing reclaim with defragmentation. No, I'm not. What is important is the objective. Objective: Get contiguous block of free pages Required: Pages that can move Move means: Migrating them or reclaiming How we do it for high-order allocations: Take a page from the LRU, move the pages within that high-order block How we do it for unplug: Take the pages within the range of interest, move all the pages out of that range In both cases, you are taking a subsection of a zone and doing something to it. In the beginning, we'll be reclaiming because it's easier and it's relatively well understood. Once stable, then work can start on defrag properly. > I think we are in > conceptually unclean territory because we mix the two. If you must use > reclaim to get a portion of contiguous memory free then yes we have this > problem. The way I see it working is that defragmentation is a kernel thread starts compacting memory (possibly kswapd) when external fragmentation gets above a watermark. This is to avoid multiple defragment processes migrating into each others area of interest which would be locking hilarity. When a process fails to allocate a high-order block, it's because defragmentation was ineffective, probably due to low memory, and it enters direct reclaim as normal - just like a process enters direct reclaim because kswapd was not able to keep enough free memory. > If you can migrate pages then no there is no need for reclaiming > a part of a zone. You can occasionally shuffle pages around to > get a large continous chunk. If there is not enough memory then an > independent reclaim subsystem can take care of freeing a sufficient amount > of memory. Marrying the two seems to be getting a bit complex and maybe > very difficult to get right. > I don't intend to marry the two. However, I intend to handle reclaim first because it's needed whether defrag exists or not. > The classification of the memory allocations is useful > to find a potential starting point to reduce the minimum number of pages > to move to open up that hole. > Agreed. >>> Why would one want to allocate from the 1/4th of a zone? (Are we still >>> discussing Mel's antifrag scheme or what is this about?) >> Because you wanted contiguous blocks of pages. This is related to anti-frag >> because with anti-frag, reclaiming memory or migration memory will free up >> contiguous blocks. Without it, you're probably wasting your time. > > I am still not sure how this should work. Reclaim in a portion of the > reclaimable/movable portion of the zone? Or pick a huge page and simply > reclaim all the pages in that range? > Reclaim in a portion of the reclaimable/movable portion of the zone by; 1. Take a leader page from the LRU lists 2. Move the pages within that order-aligned block > This is required for anti-frag regardless of additonal zones right? > Right. > BTW If one would successfully do this partial reclaim thing then we also > have no need anymore DMA zones because we can free up memory in the DMA > area of a zone at will if we run short on memory there. > Possibly, but probably not. As well as providing an easy way to reclaim within a PFN range and have range-specific LRU lists, zones help keep pages from a PFN range that could have used a different PFN range. If the DMA range got filled with kmalloc() slab pages that could have been allocated from ZONE_NORMAL, directed reclaim won't help you. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-06 9:31 ` Mel Gorman @ 2006-12-06 17:31 ` Christoph Lameter 2006-12-08 1:21 ` Jeremy Fitzhardinge 0 siblings, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-06 17:31 UTC (permalink / raw) To: Mel Gorman Cc: Andrew Morton, Linux Memory Management List, Linux Kernel Mailing List On Wed, 6 Dec 2006, Mel Gorman wrote: > Objective: Get contiguous block of free pages > Required: Pages that can move > Move means: Migrating them or reclaiming > How we do it for high-order allocations: Take a page from the LRU, move > the pages within that high-order block > How we do it for unplug: Take the pages within the range of interest, move > all the pages out of that range This is mostly the same. For unplug we would clear the freelists of page in the unplug range and take the pages off the LRU that are in the range of interest and then move them. Page migration takes pages off the LRU. > In both cases, you are taking a subsection of a zone and doing something to > it. In the beginning, we'll be reclaiming because it's easier and it's > relatively well understood. Once stable, then work can start on defrag > properly. Both cases require a scanning of the LRU or freelists for pages in that range. We are not actually doing reclaim since we do not age the pages. We evict them all and are not doing reclaim in the usual way. > I don't intend to marry the two. However, I intend to handle reclaim first > because it's needed whether defrag exists or not. Yes and we already have reclaim implemented. It can be used for freeing up memory in a zone. But if you want to open up a specific range then what we do may look a bit like reclaim but its fundamentally different since we unconditionally clear the range regardless of aging. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-06 17:31 ` Christoph Lameter @ 2006-12-08 1:21 ` Jeremy Fitzhardinge 2006-12-08 2:20 ` Christoph Lameter 0 siblings, 1 reply; 38+ messages in thread From: Jeremy Fitzhardinge @ 2006-12-08 1:21 UTC (permalink / raw) To: Christoph Lameter Cc: Mel Gorman, Andrew Morton, Linux Memory Management List, Linux Kernel Mailing List Christoph Lameter wrote: > On Wed, 6 Dec 2006, Mel Gorman wrote: > >> Objective: Get contiguous block of free pages >> Required: Pages that can move >> Move means: Migrating them or reclaiming >> How we do it for high-order allocations: Take a page from the LRU, move >> the pages within that high-order block >> How we do it for unplug: Take the pages within the range of interest, move >> all the pages out of that range >> > > This is mostly the same. For unplug we would clear the freelists of > page in the unplug range and take the pages off the LRU that are in the > range of interest and then move them. Page migration takes pages off the > LRU. > You can also deal with memory hotplug by adding a Xen-style pseudo-physical vs machine address abstraction. This doesn't help with making space for contiguous allocations, but it does allow you to move "physical" pages from one machine page to another if you want to. The paravirt ops infrastructure has already appeared in -git, and I'll soon have patches to allow Xen's paravirtualized mmu mode to work with it, which is a superset of what would be required to implement movable pages for hotpluggable memory. (I don't know if you actually want to consider this approach; I'm just pointing out that it definitely a bad idea to conflate the two problems of memory fragmentation and hotplug.) J -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-08 1:21 ` Jeremy Fitzhardinge @ 2006-12-08 2:20 ` Christoph Lameter 2006-12-08 6:11 ` Jeremy Fitzhardinge 0 siblings, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-08 2:20 UTC (permalink / raw) To: Jeremy Fitzhardinge Cc: Mel Gorman, Andrew Morton, Linux Memory Management List, Linux Kernel Mailing List On Thu, 7 Dec 2006, Jeremy Fitzhardinge wrote: > You can also deal with memory hotplug by adding a Xen-style > pseudo-physical vs machine address abstraction. This doesn't help with > making space for contiguous allocations, but it does allow you to move > "physical" pages from one machine page to another if you want to. The > paravirt ops infrastructure has already appeared in -git, and I'll soon > have patches to allow Xen's paravirtualized mmu mode to work with it, > which is a superset of what would be required to implement movable pages > for hotpluggable memory. > > (I don't know if you actually want to consider this approach; I'm just > pointing out that it definitely a bad idea to conflate the two problems > of memory fragmentation and hotplug.) The same can be done using the virtual->physical mappings that exist on many platforms for the kernel address space (ia64 dynamically calculates those, x86_64 uses a page table with 2M pages for mapping the kernel). The problem is that the 1-1 mapping between physical and virtual addresses will have to be (at least partially) sacrificed which may lead to complications with DMA devices. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-08 2:20 ` Christoph Lameter @ 2006-12-08 6:11 ` Jeremy Fitzhardinge 0 siblings, 0 replies; 38+ messages in thread From: Jeremy Fitzhardinge @ 2006-12-08 6:11 UTC (permalink / raw) To: Christoph Lameter Cc: Mel Gorman, Andrew Morton, Linux Memory Management List, Linux Kernel Mailing List Christoph Lameter wrote: > The same can be done using the virtual->physical mappings that exist on > many platforms for the kernel address space (ia64 dynamically calculates > those, x86_64 uses a page table with 2M pages for mapping the kernel). Yes, that's basically what Xen does - there's a nonlinear mapping from kernel virtual to machine pages (and usermode pages are put through the same transformation before being mapped). > The > problem is that the 1-1 mapping between physical and virtual addresses > will have to be (at least partially) sacrificed which may lead to > complications with DMA devices. > Yes, any driver which expects contigious kernel pages to be physically contigious will be sorely disappointed. This isn't too hard to deal with (since such drivers are often buggy anyway, making poor assumptions about the relationship between physical addresses and bus addresses). An IOMMU could help as well. J -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 22:22 ` Andrew Morton 2006-12-05 16:00 ` Christoph Lameter @ 2006-12-05 18:10 ` Mel Gorman 1 sibling, 0 replies; 38+ messages in thread From: Mel Gorman @ 2006-12-05 18:10 UTC (permalink / raw) To: Andrew Morton Cc: Christoph Lameter, Linux Memory Management List, Linux Kernel Mailing List On (04/12/06 14:22), Andrew Morton didst pronounce: > On Mon, 4 Dec 2006 13:43:44 -0800 (PST) > Christoph Lameter <clameter@sgi.com> wrote: > > > On Mon, 4 Dec 2006, Andrew Morton wrote: > > > > > What happens when we need to run reclaim against just a section of a zone? > > > Lumpy-reclaim could be used here; perhaps that's Mel's approach too? > > > > Why would we run reclaim against a section of a zone? > > Strange question. Because all the pages are in use for something else. > Indeed. If trying to get contiguous pages, we would need to reclaim within a PFN range. However, lumpy-reclaim will be doing something like that already so the code can be reused. > > > We'd need new infrastructure to perform the > > > section-of-a-zone<->physical-memory-block mapping, and to track various > > > states of the section-of-a-zone. This will be complex, and buggy. It will > > > probably require the introduction of some sort of "sub-zone" structure. At > > > which stage people would be justified in asking "why didn't you just use > > > zones - that's what they're for?" > > > > Mel aready has that for anti-frag. The sections are per MAX_ORDER area > > and the only states are movable unmovable and reclaimable. There is > > nothing more to it. No other state information should be added. Why would > > we need sub zones? For what purpose? > > You're proposing that for memory hot-unplug, we take a single zone and by > some means subdivide that into sections which correspond to physically > hot-unpluggable memory. That certainly does not map onto MAX_ORDER > sections. Not exactly because implies we are just going to do something zone-like to prevent kernel allocations ever using unpluggable memory in which case we should just use zones. The idea instead would be that unmovable allocations would reside at the lower PFNs only but not 100% strictly enforced. I group MAX_ORDER_NR_PAGES together based on their type of allocation but selecting which block of pages to use for an allocation type is relatively rare because we depend on the free-lists to contain free pages of a specific type. The failure case is where at some point during the lifetime of the system, there were a very large number of active unmovable allocations that scattered throughout the address space (large number of processes with large numbers of page tables might trigger it). When that happens, anti-frag would have failed to offline large portions of memory simply because of the workloads quantity of unmovable pages was too large and it fell back rather than going OOM due to zone restrictions. > > > > > Then we should be doing some work to cut down the number of unmovable > > > > allocations. > > > > > > That's rather pointless. A feature is either reliable or it is not. We'll > > > never be able to make all kernel allocations reclaimable/moveable so we'll > > > never be reliable with this approach. I don't see any alternative to the > > > never-allocate-kernel-objects-in-removeable-memory approach. > > > > What feature are you talking about? > > Memory hot-unplug, of course. > > > Why would all allocations need to be movable when we have a portion for > > unmovable allocations? > > So you're proposing that we take a single zone, then divide that zone up > into two sections. One section is non-hot-unpluggable and is for > un-moveable allocations. The other section is hot-unpluggable and only > moveable allocations may be performed there. > > If so, then this will require addition of new infrastructure which will be > to some extent duplicative of zones and I see no reason to do that: it'd be > simpler to divide the physical memory arena into two separate zones. > > If that is not what you are proposing then please tell us what you are > proposing, completely, and with sufficient detail for us to work out what > the heck you're trying to tell us. Please try to avoid uninformative > rhetorical questions, for they are starting to get quite irritating. > Thanks. Anti-frag groups pages based on type (unmovable, reapable, movable). Each type has it's own set of free-lists which we use as much as possible. When those lists deplete, we look at the free-lists for the other types, steal a free block and put it on the requested allocations free lists. For anti-frag to help hotplug-remove of a section, the MAX_ORDER_NR_PAGES needs to be the same as the number of pages in a section. In most cases, this is not unreasonable as sections tend to be fairly small. To hot-remove a DIMM, we need to keep unmovable pages out of DIMMS. The proposal is to keep unmovable pages at the lower PFNs so that DIMMS belonging to the higher DIMMS remain freeable. As anti-frag clusters MAX_ORDER_NR_PAGES, you need to select what blocks of MAX_ORDER_NR_PAGES are used a bit more carefully. You can do this in three ways, depending on how aggressive we're willing to be. Option 1: Search for the largest free block in the other lists. Of those blocks, select the one with the lowest PFN. Pros: Easy, very little work Cons: No guarantee that a large free block is also a low PFN Option 2: Remember what the highest unmovable PFN is. Take all the free pages in the next MAX_ORDER_NR_PAGES block and put them on the unmovable free lists. Pageout or migrate all other pages in that block Pros: Keeps unmovable pages at the lower pfns as much as possible Cons: More code required, more IO and page copying. Workloads with sudden spikes in the number of unmovable pages may cause problems Option 3: Similar to 2, keep track of the highest unmovable PFN is. When the free lists for unmovable pages are depleted, search for all movable and reclaimable pages below that PFN (probably via the LRU) and move them out of the way. Try again. If it still fails, then take the next MAX_ORDER_NR_PAGES as in option 2 Pros: Would be pretty strict about keeping unmovable pages out of the way. The failure case is when there are as many unmovable pages as there is physical memory in which case, you're not unplugging anyway Cons: More scanning when unmovable freelists deplete, more IO and page copying This is not the same as zones which would prevent kernel allocations taking place in ZONE_MOVABLE. That approach leaves you with a kernel that will still fail e1000 allocations due to external fragmentation in the allowable zone and kernel allocations might trigger OOM because all the free memory was in ZONE_MOVABLE. The options above should not require zone infrastructure other than the LRU lists for scanning. Is this sufficient detail? -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 19:30 ` Andrew Morton 2006-12-04 19:41 ` Christoph Lameter @ 2006-12-04 20:34 ` Mel Gorman 2006-12-04 22:34 ` Andrew Morton 2006-12-05 15:48 ` Andy Whitcroft 2006-12-04 20:37 ` Peter Zijlstra 2 siblings, 2 replies; 38+ messages in thread From: Mel Gorman @ 2006-12-04 20:34 UTC (permalink / raw) To: Andrew Morton Cc: clameter, Andy Whitcroft, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006, Andrew Morton wrote: > On Mon, 4 Dec 2006 14:07:47 +0000 > mel@skynet.ie (Mel Gorman) wrote: > >> o copy_strings() and variants are no longer setting the flag as the pages >> are not obviously movable when I took a much closer look. >> >> o The arch function alloc_zeroed_user_highpage() is now called >> __alloc_zeroed_user_highpage and takes flags related to >> movability that will be applied. alloc_zeroed_user_highpage() >> calls __alloc_zeroed_user_highpage() with no additional flags to >> preserve existing behavior of the API for out-of-tree users and >> alloc_zeroed_user_highpage_movable() sets the __GFP_MOVABLE flag. >> >> o new_inode() documents that it uses GFP_HIGH_MOVABLE and callers are expected >> to call mapping_set_gfp_mask() if that is not suitable. > > umm, OK. Could we please have some sort of statement pinning down the > exact semantics of __GFP_MOVABLE, and what its envisaged applications are? > "An allocation marked __GFP_MOVABLE may be moved using either page migration or by paging out." Right now, it's paging out. It isn't smart enough to use page migration. Bottom line, if a __GFP_MOVABLE allocation is in an awkward place, it can be got rid of somewhow. > My concern is that __GFP_MOVABLE is useful for fragmentation-avoidance, but > useless for memory hot-unplug. Anti-fragmentation did allow SPARSEMEM sections to be off-lined when it was tested a long time ago so it's not useless. Where it could help general hotplug remove is by keeping non-movable allocations at the lower PFNs as much as possible. > So that if/when hot-unplug comes along > we'll add more gunk which is a somewhat-superset of the GFP_MOVABLE > infrastructure, hence we didn't need the GFP_MOVABLE code. Or something. > If/when hot-unplug comes along, it's going to need some way of identifying pages that are safe to place in a hot-unpluggable areas so you'll end up with something like __GFP_MOVABLE. > That depends on how we do hot-unplug, if we do it. I continue to suspect > that it'll be done via memory zones: effectively by resurrecting > GFP_HIGHMEM. In which case there's little overlap with anti-frag. And will introduce a zone that must be tuned at boot-time which is undesirable but doable. With arch-independent zone-sizing in place, it's considerably easier to create such a zone and then use __GFP_MOVABLE as a zone modifier within the allocator. I have really old patches that do something like this that I can bring up to date. However, that zone will only be usable by __GFP_MOVABLE pages and will not help the e1000 case for example. On the other hand anti-frag (exists) + keeping non-movable pages at lowest-possible-pfn (doesn't exist yet) would allow some DIMMs to be unplugged without needing additional zones or tuning. > (btw, I > have a suspicion that the most important application of memory hot-unplug > will be power management: destructively turning off DIMMs). > You're probably right. > I'd also like to pin down the situation with lumpy-reclaim versus > anti-fragmentation. No offence None taken. >, but I would of course prefer to avoid > merging the anti-frag patches simply based on their stupendous size. > It seems to me that lumpy-reclaim is suitable for the e1000 problem >, but perhaps not for the hugetlbpage problem. I believe you'll hit similar problems even with lumpy-reclaim for the e1000 (I've added Andy to the cc so he can comment more). Lumpy provides a much smarter way of freeing higher-order contiguous blocks without having to reclaim 95%+ of memory - this is good. However, if you are currently seeing situations where the allocations fails even after you page out everything possible, smarter reclaim that eventually pages out everything anyway will not help you (chances are it's something like page tables that are in your way). This is where anti-frag comes in. It clusters pages together based on their type - unmovable, reapable (inode caches, short-lived kernel allocations, skbuffs etc) and movable. When kswapd kicks in, the slab caches will be reaped. As reapable pages are clustered together, that will free some contiguous areas - probably enough for the e1000 allocations to succeed! If that doesn't work, kswapd and direct reclaim will start reclaiming the "movable" pages. Without lumpy reclaim, 95%+ of memory could be paged out which is bad. Lumpy finds the contiguous pages faster and with less IO, that's why it's important. Tests I am aware of show that lumpy-reclaim on it's own makes little or no difference to the hugetlb page problem. However, with anti-frag, hugetlb-sized allocations succeed much more often even when under memory pressure. > Whereas anti-fragmentation adds > vastly more code, but can address both problems? Or something. > Anti-frag goes a long way to addressing both problems. Lumpy-reclaim increases it's success rates under memory pressure and reduces the amount of reclaim that occurs. > IOW: big-picture where-do-we-go-from-here stuff. > Start with lumpy reclaim, then I'd like to merge page clustering piece by piece, ideally with one of the people with e1000 problems testing to see does it make a difference. Assuming they are shown to help, where we'd go from there would be stuff like; 1. Keep non-movable and reapable allocations at the lower PFNs as much as possible. This is so DIMMS for higher PFNs can be removed (doesn't exist) 2. Use page migration to compact memory rather than depending solely on reclaim (doesn't exist) 3. Introduce a mechanism for marking a group of pages as being offlined so that they are not reallocated (code that does something like this exists) 4. Resurrect the hotplug-remove code (exists, but probably very stale) 5. Allow allocations for hugepages outside of the pool as long as the process remains with it's locked_vm limits (patches were posted to libhugetlbfs last Friday. will post to linux-mm tomorrow). -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 20:34 ` Mel Gorman @ 2006-12-04 22:34 ` Andrew Morton 2006-12-04 23:45 ` Mel Gorman 2006-12-05 15:52 ` Andy Whitcroft 2006-12-05 15:48 ` Andy Whitcroft 1 sibling, 2 replies; 38+ messages in thread From: Andrew Morton @ 2006-12-04 22:34 UTC (permalink / raw) To: Mel Gorman Cc: clameter, Andy Whitcroft, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006 20:34:29 +0000 (GMT) Mel Gorman <mel@csn.ul.ie> wrote: > > IOW: big-picture where-do-we-go-from-here stuff. > > > > Start with lumpy reclaim, I had lumpy-reclaim in my todo-queue but it seems to have gone away. I think I need a lumpy-reclaim resend, please. > then I'd like to merge page clustering piece by > piece, ideally with one of the people with e1000 problems testing to see > does it make a difference. > > Assuming they are shown to help, where we'd go from there would be stuff > like; > > 1. Keep non-movable and reapable allocations at the lower PFNs as much as > possible. This is so DIMMS for higher PFNs can be removed (doesn't > exist) "as much as possible" won't suffice, I suspect. If there's any chance at all that a non-moveable page can land in a hot-unpluggable region then there will be failure scenarios. Easy-to-hit ones, I suspect. > 2. Use page migration to compact memory rather than depending solely on > reclaim (doesn't exist) Yup. > 3. Introduce a mechanism for marking a group of pages as being offlined so > that they are not reallocated (code that does something like this > exists) yup. > 4. Resurrect the hotplug-remove code (exists, but probably very stale) I don't even remember what that looks like. > 5. Allow allocations for hugepages outside of the pool as long as the > process remains with it's locked_vm limits (patches were posted to > libhugetlbfs last Friday. will post to linux-mm tomorrow). hm. I'm not saying that we need to do memory hot-unplug immediately. But the overlaps between this and anti-frag and lumpiness are sufficient that I do think that we need to work out how we'll implement hot-unplug, so we don't screw ourselves up later on. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 22:34 ` Andrew Morton @ 2006-12-04 23:45 ` Mel Gorman 2006-12-05 1:16 ` KAMEZAWA Hiroyuki 2006-12-05 16:14 ` Christoph Lameter 2006-12-05 15:52 ` Andy Whitcroft 1 sibling, 2 replies; 38+ messages in thread From: Mel Gorman @ 2006-12-04 23:45 UTC (permalink / raw) To: Andrew Morton Cc: clameter, Andy Whitcroft, Linux Memory Management List, Linux Kernel Mailing List On (04/12/06 14:34), Andrew Morton didst pronounce: > On Mon, 4 Dec 2006 20:34:29 +0000 (GMT) > Mel Gorman <mel@csn.ul.ie> wrote: > > > > IOW: big-picture where-do-we-go-from-here stuff. > > > > > > > Start with lumpy reclaim, > > I had lumpy-reclaim in my todo-queue but it seems to have gone away. I > think I need a lumpy-reclaim resend, please. > I believe the patches conflicted with the latest -mm and it was going through another rebase and retest cycle. > > then I'd like to merge page clustering piece by > > piece, ideally with one of the people with e1000 problems testing to see > > does it make a difference. > > > > Assuming they are shown to help, where we'd go from there would be stuff > > like; > > > > 1. Keep non-movable and reapable allocations at the lower PFNs as much as > > possible. This is so DIMMS for higher PFNs can be removed (doesn't > > exist) > > "as much as possible" won't suffice, I suspect. If there's any chance at > all that a non-moveable page can land in a hot-unpluggable region then > there will be failure scenarios. Easy-to-hit ones, I suspect. > There are five situations of interest I can think of 1. Satisfying high-order allocations such as those required for e1000 2. Being able to grow the hugepage pool when the system has been running for any length of time 3. Offlining a SPARSEMEM section of memory 4. Offlining a DIMM 5. Offlining a Node anti-frag + lumpy-reclaim definitly help situation 2 in the test situations I've used. I cannot trigger situation 1 on demand but if situation 2 works at all, I imagine situation 1 does as well. Situation 3 used to be helped by anti-frag, particularly if a MAX_ORDER_NR_PAGES == NUMBER_OF_PAGES_IN_A_SECTION. I was at one point able to offline memory on an x86 although the stability left a lot to be desired. Zones are overkill here. For Situation 4, a zone may be needed because MAX_ORDER_NR_PAGES would have to be set to too high for anti-frag to be effective. However, zones would have to be tuned at boot-time and that would be an annoying restriction. If DIMMs are being offlined for power reasons, it would be sufficient to be best-effort. Situation 5 requires that a hotpluggable node only allows __GFP_MOVABLE allocations in the zonelists. This would probably involving having one zone that only allowed __GFP_MOVABLE. What is particularly important here is that using a zone would solve situation 3, 4 or 5 a reliable fashion but it does not help situations 1 and 2 at all. anti-frag+lumpy-reclaim greatly improve the situation for situations 1-3. By keeping non-movable allocations at lower PFNs, situation 4 will sometimes work but not always. In other words, to properly address all situations, we may need anti-frag and zones, not one or the other. > > 2. Use page migration to compact memory rather than depending solely on > > reclaim (doesn't exist) > > Yup. > > > 3. Introduce a mechanism for marking a group of pages as being offlined so > > that they are not reallocated (code that does something like this > > exists) > > yup. > > > 4. Resurrect the hotplug-remove code (exists, but probably very stale) > > I don't even remember what that looks like. > I don't fully recall either. When I last got it working though 10 months or so ago, it wasn't in the best of shape. What I recall is that it worked by marking a memory section as going offline. All pages within that section were marked as under "page-capture" and once freed, never allocated again. It would then reap caches and reclaiming pages until the section could be marked fully offline. > > 5. Allow allocations for hugepages outside of the pool as long as the > > process remains with it's locked_vm limits (patches were posted to > > libhugetlbfs last Friday. will post to linux-mm tomorrow). > > hm. > > > I'm not saying that we need to do memory hot-unplug immediately. But the > overlaps between this and anti-frag and lumpiness are sufficient that I do > think that we need to work out how we'll implement hot-unplug, so we don't > screw ourselves up later on. Ok, how about this is a rough roadmap. It is mainly concerned with how to place pages. 1. Use lumpy-reclaim to intelligently reclaim contigous pages. The same logic can be used to reclaim within a PFN range 2. Merge anti-frag to help high-order allocations, hugetlbpage allocations and freeing up SPARSEMEM sections of memory 3. Anti-frag includes support for page flags that affected a MAX_ORDER block of pages. These flags can be used to mark a section of memory that should not be allocated from. This is of interest to both hugetlb page allocatoin and memory hot-remove. Use the flags to mark a MAX_ORDER_NR_PAGES that is currently being freed up and shouldn't be allocated. 4. Use anti-frag fallback logic to bias unmovable allocations to the lower PFNs. 5. Add arch support where possible for offlining sections of memory that can be powered down. 6. Add arch support where possible to power down a DIMM when the memory sections that make it up have been offlined. This is an extenstion of step 5 only. 7. Add a zone that only allows __GFP_MOVABLE allocations so that sections can 100% be reclaimed and powered-down 8. Allow nodes to only have a zone for __GFP_MOVABLE allocations so that whole nodes can be offlined. Does that make any sense? -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 23:45 ` Mel Gorman @ 2006-12-05 1:16 ` KAMEZAWA Hiroyuki 2006-12-05 10:03 ` Mel Gorman 2006-12-05 16:14 ` Christoph Lameter 1 sibling, 1 reply; 38+ messages in thread From: KAMEZAWA Hiroyuki @ 2006-12-05 1:16 UTC (permalink / raw) To: Mel Gorman; +Cc: akpm, clameter, apw, linux-mm, linux-kernel Hi, your plan looks good to me. some comments. On Mon, 4 Dec 2006 23:45:32 +0000 (GMT) Mel Gorman <mel@csn.ul.ie> wrote: > 1. Use lumpy-reclaim to intelligently reclaim contigous pages. The same > logic can be used to reclaim within a PFN range > 2. Merge anti-frag to help high-order allocations, hugetlbpage > allocations and freeing up SPARSEMEM sections of memory For freeing up SPARSEMEM sections of memory ? It looks that you assumes MAX_ORDER_NR_PAGES equals to PAGES_PER_SECTION. plz don't assume that when you talk about generic arch code. > 3. Anti-frag includes support for page flags that affected a MAX_ORDER block > of pages. These flags can be used to mark a section of memory that should > not be allocated from. This is of interest to both hugetlb page allocatoin > and memory hot-remove. Use the flags to mark a MAX_ORDER_NR_PAGES that > is currently being freed up and shouldn't be allocated. > 4. Use anti-frag fallback logic to bias unmovable allocations to the lower > PFNs. I think this can be one of the most diffcult things ;) > 5. Add arch support where possible for offlining sections of memory that > can be powered down. I had a patch for ACPI-memory-hot-unplug, which ties memory sections to memory chunk on ACPI. > 6. Add arch support where possible to power down a DIMM when the memory > sections that make it up have been offlined. This is an extenstion of > step 5 only. > 7. Add a zone that only allows __GFP_MOVABLE allocations so that > sections can 100% be reclaimed and powered-down > 8. Allow nodes to only have a zone for __GFP_MOVABLE allocations so that > whole nodes can be offlined. > I (numa-node-hotplug) needs 7 and 8 basically. And Other people may not. IMHO: For DIMM unplug, I suspect that we'll finally divide memory to core-memory and hot-pluggable by pgdat even on SMP. If we use pgdat for that purpose, almost all necessary infrastructure(statistics ,etc..) is ready. But if we find better way, it's good. -Kame -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 1:16 ` KAMEZAWA Hiroyuki @ 2006-12-05 10:03 ` Mel Gorman 2006-12-05 16:05 ` Christoph Lameter 0 siblings, 1 reply; 38+ messages in thread From: Mel Gorman @ 2006-12-05 10:03 UTC (permalink / raw) To: KAMEZAWA Hiroyuki; +Cc: akpm, clameter, apw, linux-mm, linux-kernel On Tue, 5 Dec 2006, KAMEZAWA Hiroyuki wrote: > Hi, your plan looks good to me. Thanks. > some comments. > > On Mon, 4 Dec 2006 23:45:32 +0000 (GMT) > Mel Gorman <mel@csn.ul.ie> wrote: >> 1. Use lumpy-reclaim to intelligently reclaim contigous pages. The same >> logic can be used to reclaim within a PFN range >> 2. Merge anti-frag to help high-order allocations, hugetlbpage >> allocations and freeing up SPARSEMEM sections of memory > > For freeing up SPARSEMEM sections of memory ? Yes. > It looks that you assumes MAX_ORDER_NR_PAGES equals to PAGES_PER_SECTION. > plz don't assume that when you talk about generic arch code. > Yes, I was making the assumption that MAX_ORDER would be increased when memory hot-remove was possible so that MAX_ORDER_NR_PAGES == PAGES_PER_SECTION. I think it would be a reasonable restriction unless section sizes can get really large. >> 3. Anti-frag includes support for page flags that affected a MAX_ORDER block >> of pages. These flags can be used to mark a section of memory that should >> not be allocated from. This is of interest to both hugetlb page allocatoin >> and memory hot-remove. Use the flags to mark a MAX_ORDER_NR_PAGES that >> is currently being freed up and shouldn't be allocated. > >> 4. Use anti-frag fallback logic to bias unmovable allocations to the lower >> PFNs. > > I think this can be one of the most diffcult things ;) > It depends on how aggressive the bias is. If it's just "try and keep them low but don't work too hard", then it's a simple case of searching the free list at the order we are falling back to. If the lowest MAX_ORDER_NR_PAGES must be reclaimed for use by unmovable allocations, it gets harder but it's not impossible. >> 5. Add arch support where possible for offlining sections of memory that >> can be powered down. > > I had a patch for ACPI-memory-hot-unplug, which ties memory sections to memory > chunk on ACPI. > Great, I had a strong feeling something like that existed. >> 6. Add arch support where possible to power down a DIMM when the memory >> sections that make it up have been offlined. This is an extenstion of >> step 5 only. >> 7. Add a zone that only allows __GFP_MOVABLE allocations so that >> sections can 100% be reclaimed and powered-down >> 8. Allow nodes to only have a zone for __GFP_MOVABLE allocations so that >> whole nodes can be offlined. >> > I (numa-node-hotplug) needs 7 and 8 basically. And Other people may not. Power would be more interested in 2 for SPARSEMEM sections. Also, while getting 7 and 8 right will be important, it won't help stuff like the e1000 problem which is why I put it towards the end. I'm going to relook at the adding-zone patches today and see if they can be brought forward so we can take a proper look. > IMHO: > For DIMM unplug, I suspect that we'll finally divide memory to core-memory and > hot-pluggable by pgdat even on SMP. If we use pgdat for that purpose, almost all > necessary infrastructure(statistics ,etc..) is ready. > But if we find better way, it's good. > That is one possibility. There are people working on fake nodes for containers at the moment. If that pans out, the infrastructure would be available to create one node per DIMM. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 10:03 ` Mel Gorman @ 2006-12-05 16:05 ` Christoph Lameter 2006-12-05 18:26 ` Andrew Morton 0 siblings, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-05 16:05 UTC (permalink / raw) To: Mel Gorman; +Cc: KAMEZAWA Hiroyuki, akpm, apw, linux-mm, linux-kernel On Tue, 5 Dec 2006, Mel Gorman wrote: > That is one possibility. There are people working on fake nodes for containers > at the moment. If that pans out, the infrastructure would be available to > create one node per DIMM. Right that is a hack in use for one project. We would be adding huge amounts of VM overhead if we do a node per DIMM. So a desktop system with two dimms is to be treated like a NUMA system? Or how else do we deal with the multitude of load balancing situations that the additional nodes will generate? -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 16:05 ` Christoph Lameter @ 2006-12-05 18:26 ` Andrew Morton 2006-12-05 19:59 ` Christoph Lameter 0 siblings, 1 reply; 38+ messages in thread From: Andrew Morton @ 2006-12-05 18:26 UTC (permalink / raw) To: Christoph Lameter Cc: Mel Gorman, KAMEZAWA Hiroyuki, apw, linux-mm, linux-kernel On Tue, 5 Dec 2006 08:05:16 -0800 (PST) Christoph Lameter <clameter@sgi.com> wrote: > On Tue, 5 Dec 2006, Mel Gorman wrote: > > > That is one possibility. There are people working on fake nodes for containers > > at the moment. If that pans out, the infrastructure would be available to > > create one node per DIMM. > > Right that is a hack in use for one project. Other projects can use it too. It has the distinct advantage that it works with today's VM. > We would be adding huge > amounts of VM overhead if we do a node per DIMM. No we wouldn't. > So a desktop system with two dimms is to be treated like a NUMA > system? Could do that. Or make them separate zones. > Or how else do we deal with the multitude of load balancing > situations that the additional nodes will generate? No such problems are known. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 18:26 ` Andrew Morton @ 2006-12-05 19:59 ` Christoph Lameter 0 siblings, 0 replies; 38+ messages in thread From: Christoph Lameter @ 2006-12-05 19:59 UTC (permalink / raw) To: Andrew Morton; +Cc: Mel Gorman, KAMEZAWA Hiroyuki, apw, linux-mm, linux-kernel On Tue, 5 Dec 2006, Andrew Morton wrote: > On Tue, 5 Dec 2006 08:05:16 -0800 (PST) > Christoph Lameter <clameter@sgi.com> wrote: > > > On Tue, 5 Dec 2006, Mel Gorman wrote: > > > > > That is one possibility. There are people working on fake nodes for containers > > > at the moment. If that pans out, the infrastructure would be available to > > > create one node per DIMM. > > > > Right that is a hack in use for one project. > > Other projects can use it too. It has the distinct advantage that it works > with today's VM. I'd be glad to make NUMA the default config. That allows us to completely get rid of zones. Just keep nodes around. Then we have a DMA node and a DMA32 node (working like a headless memory node) and a highmem node. This would simplify the VM. But please do not have multiple nodes and multiple zones. If we do not like the term nodes then lets call them zones and get rid of the nodes. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 23:45 ` Mel Gorman 2006-12-05 1:16 ` KAMEZAWA Hiroyuki @ 2006-12-05 16:14 ` Christoph Lameter 2006-12-05 17:17 ` Mel Gorman 1 sibling, 1 reply; 38+ messages in thread From: Christoph Lameter @ 2006-12-05 16:14 UTC (permalink / raw) To: Mel Gorman Cc: Andrew Morton, Andy Whitcroft, Linux Memory Management List, Linux Kernel Mailing List On Mon, 4 Dec 2006, Mel Gorman wrote: > 4. Offlining a DIMM > 5. Offlining a Node > > For Situation 4, a zone may be needed because MAX_ORDER_NR_PAGES would have > to be set to too high for anti-frag to be effective. However, zones would > have to be tuned at boot-time and that would be an annoying restriction. If > DIMMs are being offlined for power reasons, it would be sufficient to be > best-effort. We are able to depopularize a portion of the pages in a MAX_ORDER chunk if the page structs pages on the borders of that portion are not stored on the DIMM. Set a flag in the page struct of those page struct pages straddling the border and free the page struct pages describing only memory in the DIMM. > Situation 5 requires that a hotpluggable node only allows __GFP_MOVABLE > allocations in the zonelists. This would probably involving having one > zone that only allowed __GFP_MOVABLE. This is *node* hotplug and we already have a node/zone structure etc where we could set some option to require only movable allocations. Note that NUMA nodes have always had only a single effective zone. There are some exceptions on some architectures where we have additional DMA zones on the first or first two nodes but NUMA memory policies will *not* allow to exercise control over allocations from those zones. > In other words, to properly address all situations, we may need anti-frag > and zones, not one or the other. I still do not see a need for additional zones. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 16:14 ` Christoph Lameter @ 2006-12-05 17:17 ` Mel Gorman 2006-12-05 19:54 ` Christoph Lameter 0 siblings, 1 reply; 38+ messages in thread From: Mel Gorman @ 2006-12-05 17:17 UTC (permalink / raw) To: Christoph Lameter Cc: Andrew Morton, Andy Whitcroft, Linux Memory Management List, Linux Kernel Mailing List On (05/12/06 08:14), Christoph Lameter didst pronounce: > On Mon, 4 Dec 2006, Mel Gorman wrote: > > > 4. Offlining a DIMM > > 5. Offlining a Node > > > > For Situation 4, a zone may be needed because MAX_ORDER_NR_PAGES would have > > to be set to too high for anti-frag to be effective. However, zones would > > have to be tuned at boot-time and that would be an annoying restriction. If > > DIMMs are being offlined for power reasons, it would be sufficient to be > > best-effort. > > We are able to depopularize a portion of the pages in a MAX_ORDER chunk if > the page structs pages on the borders of that portion are not stored on > the DIMM. Portions of it sure, but to offline the DIMM, all pages must be removed from it. To guarantee the offlining, that means only __GFP_MOVABLE allocations are allowed within that area and a zone is the easiest way to do it. Now, that said, if anti-fragmentation only uses lower PFNs, the number of active unmovable pages has to be large enough to span all DIMMs before the offlining would fail. This problem will be hit in some situations. > Set a flag in the page struct of those page struct pages > straddling the border and free the page struct pages describing only > memory in the DIMM. > I'm not sure what you mean by this. If I wanted to offline a DIMM and I had anti-frag (specifically the portion of it that allows a flag that affects a whole block of pages), I would mark all the MAX_ORDER_NR_PAGES blocks there as going offline so that the pages will not be reallocated. Some time in the future, the DIMM will be offlined but it could be an indefinte length of time. If the DIMM consisted of just ZONE_MOVABLE, it could be offlined in the length of time it takes to migrate all pages elsewhere or page them out. > > Situation 5 requires that a hotpluggable node only allows __GFP_MOVABLE > > allocations in the zonelists. This would probably involving having one > > zone that only allowed __GFP_MOVABLE. > > This is *node* hotplug and we already have a node/zone structure etc where > we could set some option to require only movable allocations. True. It would be a bit of a hack, but it's work without needing zones. > Note that > NUMA nodes have always had only a single effective zone. There are some > exceptions on some architectures where we have additional DMA zones on the > first or first two nodes but NUMA memory policies will *not* allow to > exercise control over allocations from those zones. > > > In other words, to properly address all situations, we may need anti-frag > > and zones, not one or the other. > > I still do not see a need for additional zones. It's needed if you want to 100% guarantee the ability to offline a DIMM under all circumstances. However, ZONE_MOVABLE comes with it's own problems such as not allowing kernel allocations like network buffers. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-05 17:17 ` Mel Gorman @ 2006-12-05 19:54 ` Christoph Lameter 0 siblings, 0 replies; 38+ messages in thread From: Christoph Lameter @ 2006-12-05 19:54 UTC (permalink / raw) To: Mel Gorman Cc: Andrew Morton, Andy Whitcroft, Linux Memory Management List, Linux Kernel Mailing List [-- Attachment #1: Type: TEXT/PLAIN, Size: 3029 bytes --] On Tue, 5 Dec 2006, Mel Gorman wrote: > Portions of it sure, but to offline the DIMM, all pages must be removed from > it. To guarantee the offlining, that means only __GFP_MOVABLE allocations > are allowed within that area and a zone is the easiest way to do it. We were talking about the memory map (page struct array) not the page in the DIMM per se. The memory map could also be made movable to deal with pages overlapping boundaries (I am not sure that there is really a problem at this time, we can probably afford to require the complete removal of a page full of page structs. This is true in the the vmemmap case. Sparsemem has large memmap chunks that may span multiple pages but that could be fixed by changing the chunk size. > Now, that said, if anti-fragmentation only uses lower PFNs, the number > of active unmovable pages has to be large enough to span all DIMMs > before the offlining would fail. This problem will be hit in some > situations. > > > Set a flag in the page struct of those page struct pages > > straddling the border and free the page struct pages describing only > > memory in the DIMM. > > > > I'm not sure what you mean by this. If I wanted to offline a DIMM and I had > anti-frag (specifically the portion of it that allows a flag that affects a > whole block of pages), I would mark all the MAX_ORDER_NR_PAGES blocks there > as going offline so that the pages will not be reallocated. Some time in > the future, the DIMM will be offlined but it could be an indefinte length > of time. If the DIMM consisted of just ZONE_MOVABLE, it could be offlined > in the length of time it takes to migrate all pages elsewhere or page them out. You have a block full of page structs (that is placed on other memory than the DIMM). Some of the pages belonging to the page structs are in the area to be offlined and other are not. Then you can remove the pages to be offlined from the freelist (if they are on it) and from usage (by migration or recleaim) and then mark them as unused. Marking them as unused could then be as simple as setting PG_reserved. > > > This is *node* hotplug and we already have a node/zone structure etc where > > we could set some option to require only movable allocations. > > True. It would be a bit of a hack, but it's work without needing zones. We must have some means of marking a node as removalbe anyways in order to support node hotplug® What is so hackish about it? > > I still do not see a need for additional zones. > > It's needed if you want to 100% guarantee the ability to offline a DIMM under > all circumstances. However, ZONE_MOVABLE comes with it's own problems such > as not allowing kernel allocations like network buffers. You cannot offline all DIMMS since (at least at this point in time) we need memory that is not movable. If you have multiple DIMMS then the additional DIMMS may be placed in areas of a zone that cannot take unmovable MAX_ORDER_NR_PAGES blocks. ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 22:34 ` Andrew Morton 2006-12-04 23:45 ` Mel Gorman @ 2006-12-05 15:52 ` Andy Whitcroft 1 sibling, 0 replies; 38+ messages in thread From: Andy Whitcroft @ 2006-12-05 15:52 UTC (permalink / raw) To: Andrew Morton Cc: Mel Gorman, clameter, Linux Memory Management List, Linux Kernel Mailing List Andrew Morton wrote: > On Mon, 4 Dec 2006 20:34:29 +0000 (GMT) > Mel Gorman <mel@csn.ul.ie> wrote: > >>> IOW: big-picture where-do-we-go-from-here stuff. >>> >> Start with lumpy reclaim, > > I had lumpy-reclaim in my todo-queue but it seems to have gone away. I > think I need a lumpy-reclaim resend, please. There was a clash with it against 2.6.19-r6-mm2, I've respun it and am just retesting it. When thats done I'll drop it out to you. -apw -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 20:34 ` Mel Gorman 2006-12-04 22:34 ` Andrew Morton @ 2006-12-05 15:48 ` Andy Whitcroft 1 sibling, 0 replies; 38+ messages in thread From: Andy Whitcroft @ 2006-12-05 15:48 UTC (permalink / raw) To: Mel Gorman Cc: Andrew Morton, clameter, Linux Memory Management List, Linux Kernel Mailing List Mel Gorman wrote: > On Mon, 4 Dec 2006, Andrew Morton wrote: >> , but I would of course prefer to avoid >> merging the anti-frag patches simply based on their stupendous size. >> It seems to me that lumpy-reclaim is suitable for the e1000 problem >> , but perhaps not for the hugetlbpage problem. > > I believe you'll hit similar problems even with lumpy-reclaim for the > e1000 (I've added Andy to the cc so he can comment more). Lumpy provides > a much smarter way of freeing higher-order contiguous blocks without > having to reclaim 95%+ of memory - this is good. However, if you are > currently seeing situations where the allocations fails even after you > page out everything possible, smarter reclaim that eventually pages out > everything anyway will not help you (chances are it's something like > page tables that are in your way). The pre-lumpy algorithm is capable of producing reasonable numbers of very low order pages. Lumpy should improve success rates producing sucessful reclaim at higher order than that. Its success is limited however by the percentage of non-reclaimable pages and their distribution. The e1000 problem is that it wants order=3 pages ie. 8 pages in size. For lumpy to have a high chance of success we would need the average unmovable page count to be significantly less than 1 in 8 pages (assuming a random distribution) (<12% pinned). In stress testing we find we can reclaim of the order of 70% of memory, this tends to indicates that the pinned memory is more like 25% than 10%. It would suggest that we are going to find reclaim rates above order=2 are poor without explicit placement control. Obviously this all depends on the workload. Our test workloads are known to be fairly hostile in terms of fragmentation. So I would love to see lumpy tested in the problem scenario to get some data on that setup. > This is where anti-frag comes in. It clusters pages together based on > their type - unmovable, reapable (inode caches, short-lived kernel > allocations, skbuffs etc) and movable. When kswapd kicks in, the slab > caches will be reaped. As reapable pages are clustered together, that > will free some contiguous areas - probably enough for the e1000 > allocations to succeed! > > If that doesn't work, kswapd and direct reclaim will start reclaiming > the "movable" pages. Without lumpy reclaim, 95%+ of memory could be > paged out which is bad. Lumpy finds the contiguous pages faster and with > less IO, that's why it's important. > > Tests I am aware of show that lumpy-reclaim on it's own makes little or > no difference to the hugetlb page problem. However, with anti-frag, > hugetlb-sized allocations succeed much more often even when under memory > pressure. At high order both traditional and lumpy reclaim are next to useless without placement controls. > >> Whereas anti-fragmentation adds >> vastly more code, but can address both problems? Or something. >> > > Anti-frag goes a long way to addressing both problems. Lumpy-reclaim > increases it's success rates under memory pressure and reduces the > amount of reclaim that occurs. > >> IOW: big-picture where-do-we-go-from-here stuff. >> > > Start with lumpy reclaim, then I'd like to merge page clustering piece > by piece, ideally with one of the people with e1000 problems testing to > see does it make a difference. > > Assuming they are shown to help, where we'd go from there would be stuff > like; > > 1. Keep non-movable and reapable allocations at the lower PFNs as much as > possible. This is so DIMMS for higher PFNs can be removed (doesn't > exist) > 2. Use page migration to compact memory rather than depending solely on > reclaim (doesn't exist) > 3. Introduce a mechanism for marking a group of pages as being offlined so > that they are not reallocated (code that does something like this > exists) > 4. Resurrect the hotplug-remove code (exists, but probably very stale) > 5. Allow allocations for hugepages outside of the pool as long as the > process remains with it's locked_vm limits (patches were posted to > libhugetlbfs last Friday. will post to linux-mm tomorrow). -apw -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 19:30 ` Andrew Morton 2006-12-04 19:41 ` Christoph Lameter 2006-12-04 20:34 ` Mel Gorman @ 2006-12-04 20:37 ` Peter Zijlstra 2006-12-06 14:18 ` Andy Whitcroft 2 siblings, 1 reply; 38+ messages in thread From: Peter Zijlstra @ 2006-12-04 20:37 UTC (permalink / raw) To: Andrew Morton Cc: Mel Gorman, clameter, Linux Memory Management List, Linux Kernel Mailing List, Andy Whitcroft On Mon, 2006-12-04 at 11:30 -0800, Andrew Morton wrote: > I'd also like to pin down the situation with lumpy-reclaim versus > anti-fragmentation. No offence, but I would of course prefer to avoid > merging the anti-frag patches simply based on their stupendous size. It > seems to me that lumpy-reclaim is suitable for the e1000 problem, but > perhaps not for the hugetlbpage problem. Whereas anti-fragmentation adds > vastly more code, but can address both problems? Or something. >From my understanding they complement each other nicely. Without some form of anti fragmentation there is no guarantee lumpy reclaim will ever free really high order pages. Although it might succeed nicely for the network sized allocations we now have problems with. - Andy, do you have any number on non largepage order allocations? But anti fragmentation as per Mel's patches is not good enough to provide largepage allocations since we would need to shoot down most of the LRU to obtain such a large contiguous area. Lumpy reclaim however can quickly achieve these sizes. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
* Re: [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated 2006-12-04 20:37 ` Peter Zijlstra @ 2006-12-06 14:18 ` Andy Whitcroft 0 siblings, 0 replies; 38+ messages in thread From: Andy Whitcroft @ 2006-12-06 14:18 UTC (permalink / raw) To: Peter Zijlstra Cc: Andrew Morton, Mel Gorman, clameter, Linux Memory Management List, Linux Kernel Mailing List Peter Zijlstra wrote: > On Mon, 2006-12-04 at 11:30 -0800, Andrew Morton wrote: > >> I'd also like to pin down the situation with lumpy-reclaim versus >> anti-fragmentation. No offence, but I would of course prefer to avoid >> merging the anti-frag patches simply based on their stupendous size. It >> seems to me that lumpy-reclaim is suitable for the e1000 problem, but >> perhaps not for the hugetlbpage problem. Whereas anti-fragmentation adds >> vastly more code, but can address both problems? Or something. > >>From my understanding they complement each other nicely. Without some > form of anti fragmentation there is no guarantee lumpy reclaim will ever > free really high order pages. Although it might succeed nicely for the > network sized allocations we now have problems with. > > - Andy, do you have any number on non largepage order allocations? Currently no, we have focused on the worst case huge pages and assumed lower orders would be easier and more successful. Though it is (now) on my todo list to see if we can do the same tests at some lower order; with the aim of trying that on base+lumpy. > But anti fragmentation as per Mel's patches is not good enough to > provide largepage allocations since we would need to shoot down most of > the LRU to obtain such a large contiguous area. Lumpy reclaim however > can quickly achieve these sizes. -apw -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 38+ messages in thread
end of thread, other threads:[~2006-12-08 6:11 UTC | newest] Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2006-11-30 17:07 [PATCH] Add __GFP_MOVABLE for callers to flag allocations that may be migrated Mel Gorman 2006-12-01 1:31 ` Andrew Morton 2006-12-01 9:54 ` Mel Gorman 2006-12-01 19:01 ` Andrew Morton 2006-12-04 14:07 ` Mel Gorman 2006-12-04 19:30 ` Andrew Morton 2006-12-04 19:41 ` Christoph Lameter 2006-12-04 20:06 ` Andrew Morton 2006-12-04 20:17 ` Christoph Lameter 2006-12-04 21:19 ` Andrew Morton 2006-12-04 21:43 ` Christoph Lameter 2006-12-04 22:22 ` Andrew Morton 2006-12-05 16:00 ` Christoph Lameter 2006-12-05 19:25 ` Andrew Morton 2006-12-05 20:01 ` Christoph Lameter 2006-12-05 21:47 ` Mel Gorman 2006-12-05 23:33 ` Christoph Lameter 2006-12-06 9:31 ` Mel Gorman 2006-12-06 17:31 ` Christoph Lameter 2006-12-08 1:21 ` Jeremy Fitzhardinge 2006-12-08 2:20 ` Christoph Lameter 2006-12-08 6:11 ` Jeremy Fitzhardinge 2006-12-05 18:10 ` Mel Gorman 2006-12-04 20:34 ` Mel Gorman 2006-12-04 22:34 ` Andrew Morton 2006-12-04 23:45 ` Mel Gorman 2006-12-05 1:16 ` KAMEZAWA Hiroyuki 2006-12-05 10:03 ` Mel Gorman 2006-12-05 16:05 ` Christoph Lameter 2006-12-05 18:26 ` Andrew Morton 2006-12-05 19:59 ` Christoph Lameter 2006-12-05 16:14 ` Christoph Lameter 2006-12-05 17:17 ` Mel Gorman 2006-12-05 19:54 ` Christoph Lameter 2006-12-05 15:52 ` Andy Whitcroft 2006-12-05 15:48 ` Andy Whitcroft 2006-12-04 20:37 ` Peter Zijlstra 2006-12-06 14:18 ` Andy Whitcroft
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox