* [PATCH 1/5] Light fragmentation avoidance without usemap: 001_antidefrag_flags
2005-11-22 19:17 [PATCH 0/5] Light fragmentation avoidance without usemap Mel Gorman
@ 2005-11-22 19:17 ` Mel Gorman
2005-11-23 3:45 ` Paul Jackson
2005-11-22 19:17 ` [PATCH 2/5] Light fragmentation avoidance without usemap: 002_fragcore Mel Gorman
` (3 subsequent siblings)
4 siblings, 1 reply; 7+ messages in thread
From: Mel Gorman @ 2005-11-22 19:17 UTC (permalink / raw)
To: linux-mm; +Cc: Mel Gorman, nickpiggin, ak, linux-kernel, lhms-devel, mingo
This patch adds a flag __GFP_EASYRCLM. Allocations using the __GFP_EASYRCLM
flag are expected to be easily reclaimed by syncing with backing storage (be
it a file or swap) or cleaning the buffers and discarding.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/fs/buffer.c linux-2.6.15-rc1-mm2-001_antidefrag_flags/fs/buffer.c
--- linux-2.6.15-rc1-mm2-clean/fs/buffer.c 2005-11-21 19:44:32.000000000 +0000
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/fs/buffer.c 2005-11-22 16:49:23.000000000 +0000
@@ -1113,7 +1113,8 @@ grow_dev_page(struct block_device *bdev,
struct page *page;
struct buffer_head *bh;
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, index,
+ GFP_NOFS|__GFP_EASYRCLM);
if (!page)
return NULL;
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/fs/compat.c linux-2.6.15-rc1-mm2-001_antidefrag_flags/fs/compat.c
--- linux-2.6.15-rc1-mm2-clean/fs/compat.c 2005-11-21 19:44:32.000000000 +0000
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/fs/compat.c 2005-11-22 16:49:23.000000000 +0000
@@ -1359,7 +1359,7 @@ static int compat_copy_strings(int argc,
page = bprm->page[i];
new = 0;
if (!page) {
- page = alloc_page(GFP_HIGHUSER);
+ page = alloc_page(GFP_HIGHUSER|__GFP_EASYRCLM);
bprm->page[i] = page;
if (!page) {
ret = -ENOMEM;
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/fs/exec.c linux-2.6.15-rc1-mm2-001_antidefrag_flags/fs/exec.c
--- linux-2.6.15-rc1-mm2-clean/fs/exec.c 2005-11-21 19:44:32.000000000 +0000
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/fs/exec.c 2005-11-22 16:49:23.000000000 +0000
@@ -238,7 +238,7 @@ static int copy_strings(int argc, char _
page = bprm->page[i];
new = 0;
if (!page) {
- page = alloc_page(GFP_HIGHUSER);
+ page = alloc_page(GFP_HIGHUSER|__GFP_EASYRCLM);
bprm->page[i] = page;
if (!page) {
ret = -ENOMEM;
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/fs/inode.c linux-2.6.15-rc1-mm2-001_antidefrag_flags/fs/inode.c
--- linux-2.6.15-rc1-mm2-clean/fs/inode.c 2005-11-21 19:44:32.000000000 +0000
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/fs/inode.c 2005-11-22 16:49:23.000000000 +0000
@@ -146,7 +146,7 @@ static struct inode *alloc_inode(struct
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
- mapping_set_gfp_mask(mapping, GFP_HIGHUSER);
+ mapping_set_gfp_mask(mapping, GFP_HIGHUSER|__GFP_EASYRCLM);
mapping->assoc_mapping = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/include/asm-i386/page.h linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/asm-i386/page.h
--- linux-2.6.15-rc1-mm2-clean/include/asm-i386/page.h 2005-10-28 01:02:08.000000000 +0100
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/asm-i386/page.h 2005-11-22 16:49:23.000000000 +0000
@@ -36,7 +36,8 @@
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)
-#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr)
+#define alloc_zeroed_user_highpage(vma, vaddr) \
+ alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | __GFP_EASYRCLM, vma, vaddr)
#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
/*
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/include/linux/gfp.h linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/linux/gfp.h
--- linux-2.6.15-rc1-mm2-clean/include/linux/gfp.h 2005-11-21 19:44:33.000000000 +0000
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/linux/gfp.h 2005-11-22 16:49:23.000000000 +0000
@@ -47,6 +47,7 @@ struct vm_area_struct;
#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
+#define __GFP_EASYRCLM ((__force gfp_t)0x40000u) /* Easily reclaimed page */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
@@ -55,7 +56,7 @@ struct vm_area_struct;
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
- __GFP_NOMEMALLOC|__GFP_HARDWALL)
+ __GFP_NOMEMALLOC|__GFP_HARDWALL|__GFP_EASYRCLM)
/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
#define GFP_ATOMIC (__GFP_HIGH)
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/include/linux/highmem.h linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/linux/highmem.h
--- linux-2.6.15-rc1-mm2-clean/include/linux/highmem.h 2005-10-28 01:02:08.000000000 +0100
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/linux/highmem.h 2005-11-22 16:49:23.000000000 +0000
@@ -47,7 +47,8 @@ static inline void clear_user_highpage(s
static inline struct page *
alloc_zeroed_user_highpage(struct vm_area_struct *vma, unsigned long vaddr)
{
- struct page *page = alloc_page_vma(GFP_HIGHUSER, vma, vaddr);
+ struct page *page = alloc_page_vma(GFP_HIGHUSER|__GFP_EASYRCLM,
+ vma, vaddr);
if (page)
clear_user_highpage(page, vaddr);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/mm/memory.c linux-2.6.15-rc1-mm2-001_antidefrag_flags/mm/memory.c
--- linux-2.6.15-rc1-mm2-clean/mm/memory.c 2005-11-21 19:44:33.000000000 +0000
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/mm/memory.c 2005-11-22 16:49:23.000000000 +0000
@@ -1378,7 +1378,8 @@ gotten:
if (!new_page)
goto oom;
} else {
- new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+ new_page = alloc_page_vma(GFP_HIGHUSER|__GFP_EASYRCLM,
+ vma, address);
if (!new_page)
goto oom;
copy_user_highpage(new_page, src_page, address);
@@ -1985,7 +1986,8 @@ retry:
if (unlikely(anon_vma_prepare(vma)))
goto oom;
- page = alloc_page_vma(GFP_HIGHUSER, vma, address);
+ page = alloc_page_vma(GFP_HIGHUSER|__GFP_EASYRCLM,
+ vma, address);
if (!page)
goto oom;
copy_user_highpage(page, new_page, address);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/mm/shmem.c linux-2.6.15-rc1-mm2-001_antidefrag_flags/mm/shmem.c
--- linux-2.6.15-rc1-mm2-clean/mm/shmem.c 2005-11-21 19:44:33.000000000 +0000
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/mm/shmem.c 2005-11-22 16:49:23.000000000 +0000
@@ -921,7 +921,7 @@ shmem_alloc_page(gfp_t gfp, struct shmem
pvma.vm_policy = mpol_shared_policy_lookup(&info->policy, idx);
pvma.vm_pgoff = idx;
pvma.vm_end = PAGE_SIZE;
- page = alloc_page_vma(gfp | __GFP_ZERO, &pvma, 0);
+ page = alloc_page_vma(gfp | __GFP_ZERO | __GFP_EASYRCLM, &pvma, 0);
mpol_free(pvma.vm_policy);
return page;
}
@@ -936,7 +936,7 @@ shmem_swapin(struct shmem_inode_info *in
static inline struct page *
shmem_alloc_page(gfp_t gfp,struct shmem_inode_info *info, unsigned long idx)
{
- return alloc_page(gfp | __GFP_ZERO);
+ return alloc_page(gfp | __GFP_ZERO | __GFP_EASYRCLM);
}
#endif
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-clean/mm/swap_state.c linux-2.6.15-rc1-mm2-001_antidefrag_flags/mm/swap_state.c
--- linux-2.6.15-rc1-mm2-clean/mm/swap_state.c 2005-11-21 19:44:33.000000000 +0000
+++ linux-2.6.15-rc1-mm2-001_antidefrag_flags/mm/swap_state.c 2005-11-22 16:49:23.000000000 +0000
@@ -341,7 +341,8 @@ struct page *read_swap_cache_async(swp_e
* Get a new page to read into from swap.
*/
if (!new_page) {
- new_page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+ new_page = alloc_page_vma(GFP_HIGHUSER|__GFP_EASYRCLM,
+ vma, addr);
if (!new_page)
break; /* Out of memory */
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 2/5] Light fragmentation avoidance without usemap: 002_fragcore
2005-11-22 19:17 [PATCH 0/5] Light fragmentation avoidance without usemap Mel Gorman
2005-11-22 19:17 ` [PATCH 1/5] Light fragmentation avoidance without usemap: 001_antidefrag_flags Mel Gorman
@ 2005-11-22 19:17 ` Mel Gorman
2005-11-22 19:17 ` [PATCH 3/5] Light fragmentation avoidance without usemap: 003_percpu Mel Gorman
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Mel Gorman @ 2005-11-22 19:17 UTC (permalink / raw)
To: linux-mm; +Cc: Mel Gorman, nickpiggin, ak, linux-kernel, lhms-devel, mingo
This patch adds the core of the anti-fragmentation strategy. It works by
grouping related allocation types together. The idea is that large groups of
pages that may be reclaimed are placed near each other. The zone->free_area
list is broken into RCLM_TYPES number of lists.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/linux/mmzone.h linux-2.6.15-rc1-mm2-002_fragcore/include/linux/mmzone.h
--- linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/linux/mmzone.h 2005-11-21 19:44:33.000000000 +0000
+++ linux-2.6.15-rc1-mm2-002_fragcore/include/linux/mmzone.h 2005-11-22 16:50:09.000000000 +0000
@@ -22,8 +22,16 @@
#define MAX_ORDER CONFIG_FORCE_MAX_ZONEORDER
#endif
+#define RCLM_NORCLM 0
+#define RCLM_EASY 1
+#define RCLM_TYPES 2
+
+#define for_each_rclmtype_order(type, order) \
+ for (order = 0; order < MAX_ORDER; order++) \
+ for (type = 0; type < RCLM_TYPES; type++)
+
struct free_area {
- struct list_head free_list;
+ struct list_head free_list[RCLM_TYPES];
unsigned long nr_free;
};
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/linux/page-flags.h linux-2.6.15-rc1-mm2-002_fragcore/include/linux/page-flags.h
--- linux-2.6.15-rc1-mm2-001_antidefrag_flags/include/linux/page-flags.h 2005-11-21 19:44:33.000000000 +0000
+++ linux-2.6.15-rc1-mm2-002_fragcore/include/linux/page-flags.h 2005-11-22 16:50:09.000000000 +0000
@@ -76,6 +76,7 @@
#define PG_reclaim 17 /* To be reclaimed asap */
#define PG_nosave_free 18 /* Free, should not be written */
#define PG_uncached 19 /* Page has been mapped as uncached */
+#define PG_easyrclm 20 /* Page is in an easy reclaim block */
/*
* Global page accounting. One instance per CPU. Only unsigned longs are
@@ -304,6 +305,12 @@ extern void __mod_page_state(unsigned lo
#define SetPageUncached(page) set_bit(PG_uncached, &(page)->flags)
#define ClearPageUncached(page) clear_bit(PG_uncached, &(page)->flags)
+#define PageEasyRclm(page) test_bit(PG_easyrclm, &(page)->flags)
+#define SetPageEasyRclm(page) set_bit(PG_easyrclm, &(page)->flags)
+#define ClearPageEasyRclm(page) clear_bit(PG_easyrclm, &(page)->flags)
+#define __SetPageEasyRclm(page) __set_bit(PG_easyrclm, &(page)->flags)
+#define __ClearPageEasyRclm(page) __clear_bit(PG_easyrclm, &(page)->flags)
+
struct page; /* forward declaration */
int test_clear_page_dirty(struct page *page);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-001_antidefrag_flags/mm/page_alloc.c linux-2.6.15-rc1-mm2-002_fragcore/mm/page_alloc.c
--- linux-2.6.15-rc1-mm2-001_antidefrag_flags/mm/page_alloc.c 2005-11-21 19:44:33.000000000 +0000
+++ linux-2.6.15-rc1-mm2-002_fragcore/mm/page_alloc.c 2005-11-22 16:50:09.000000000 +0000
@@ -68,6 +68,16 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_Z
EXPORT_SYMBOL(totalram_pages);
+static inline int get_pageblock_type(struct page *page)
+{
+ return (PageEasyRclm(page) != 0);
+}
+
+static inline int gfpflags_to_alloctype(unsigned long gfp_flags)
+{
+ return ((gfp_flags & __GFP_EASYRCLM) != 0);
+}
+
/*
* Used by page_zone() to look up the address of the struct zone whose
* id is encoded in the upper bits of page->flags
@@ -314,11 +324,13 @@ static inline void __free_pages_bulk (st
{
unsigned long page_idx;
int order_size = 1 << order;
+ int alloctype = get_pageblock_type(page);
if (unlikely(order))
destroy_compound_page(page, order);
page_idx = page_to_pfn(page) & ((1 << MAX_ORDER) - 1);
+ __SetPageEasyRclm(page);
BUG_ON(page_idx & (order_size - 1));
BUG_ON(bad_range(zone, page));
@@ -326,7 +338,6 @@ static inline void __free_pages_bulk (st
zone->free_pages += order_size;
while (order < MAX_ORDER-1) {
unsigned long combined_idx;
- struct free_area *area;
struct page *buddy;
combined_idx = __find_combined_index(page_idx, order);
@@ -337,15 +348,14 @@ static inline void __free_pages_bulk (st
if (!page_is_buddy(buddy, order))
break; /* Move the buddy up one level. */
list_del(&buddy->lru);
- area = zone->free_area + order;
- area->nr_free--;
+ zone->free_area[order].nr_free--;
rmv_page_order(buddy);
page = page + (combined_idx - page_idx);
page_idx = combined_idx;
order++;
}
set_page_order(page, order);
- list_add(&page->lru, &zone->free_area[order].free_list);
+ list_add(&page->lru, &zone->free_area[order].free_list[alloctype]);
zone->free_area[order].nr_free++;
}
@@ -450,7 +460,8 @@ void __free_pages_ok(struct page *page,
*/
static inline struct page *
expand(struct zone *zone, struct page *page,
- int low, int high, struct free_area *area)
+ int low, int high, struct free_area *area,
+ int alloctype)
{
unsigned long size = 1 << high;
@@ -459,7 +470,7 @@ expand(struct zone *zone, struct page *p
high--;
size >>= 1;
BUG_ON(bad_range(zone, &page[size]));
- list_add(&page[size].lru, &area->free_list);
+ list_add(&page[size].lru, &area->free_list[alloctype]);
area->nr_free++;
set_page_order(&page[size], high);
}
@@ -520,30 +531,79 @@ static int prep_new_page(struct page *pa
return 0;
}
+/* Remove an element from the buddy allocator from the fallback list */
+static struct page *__rmqueue_fallback(struct zone *zone, int order,
+ int alloctype)
+{
+ struct free_area * area;
+ int current_order;
+ struct page *page;
+
+ /* Find the largest possible block of pages in the other list */
+ alloctype = !alloctype;
+ for (current_order = MAX_ORDER-1; current_order >= order;
+ --current_order) {
+ area = &(zone->free_area[current_order]);
+ if (list_empty(&area->free_list[alloctype]))
+ continue;
+
+ page = list_entry(area->free_list[alloctype].next,
+ struct page, lru);
+ area->nr_free--;
+
+ /*
+ * If breaking a large block of pages, place the buddies
+ * on the preferred allocation list
+ */
+ if (unlikely(current_order >= MAX_ORDER / 2))
+ alloctype = !alloctype;
+
+ list_del(&page->lru);
+ rmv_page_order(page);
+ zone->free_pages -= 1UL << order;
+ return expand(zone, page, order, current_order, area,
+ alloctype);
+
+ }
+
+ return NULL;
+}
+
/*
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
*/
-static struct page *__rmqueue(struct zone *zone, unsigned int order)
+static struct page *__rmqueue(struct zone *zone, unsigned int order,
+ int alloctype)
{
struct free_area * area;
unsigned int current_order;
struct page *page;
+ /* Find a page of the appropriate size in the preferred list */
for (current_order = order; current_order < MAX_ORDER; ++current_order) {
- area = zone->free_area + current_order;
- if (list_empty(&area->free_list))
+ area = &(zone->free_area[current_order]);
+ if (list_empty(&area->free_list[alloctype]))
continue;
- page = list_entry(area->free_list.next, struct page, lru);
+ page = list_entry(area->free_list[alloctype].next,
+ struct page, lru);
list_del(&page->lru);
rmv_page_order(page);
area->nr_free--;
zone->free_pages -= 1UL << order;
- return expand(zone, page, order, current_order, area);
+ page = expand(zone, page, order, current_order, area,
+ alloctype);
+ goto got_page;
}
- return NULL;
+ page = __rmqueue_fallback(zone, order, alloctype);
+
+got_page:
+ if (unlikely(alloctype == RCLM_NORCLM))
+ __ClearPageEasyRclm(page);
+
+ return page;
}
/*
@@ -552,7 +612,8 @@ static struct page *__rmqueue(struct zon
* Returns the number of new pages which were placed at *list.
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
- unsigned long count, struct list_head *list)
+ unsigned long count, struct list_head *list,
+ int alloctype)
{
unsigned long flags;
int i;
@@ -561,7 +622,7 @@ static int rmqueue_bulk(struct zone *zon
spin_lock_irqsave(&zone->lock, flags);
for (i = 0; i < count; ++i) {
- page = __rmqueue(zone, order);
+ page = __rmqueue(zone, order, alloctype);
if (page == NULL)
break;
allocated++;
@@ -627,7 +688,7 @@ static void __drain_pages(unsigned int c
void mark_free_pages(struct zone *zone)
{
unsigned long zone_pfn, flags;
- int order;
+ int order, t;
struct list_head *curr;
if (!zone->spanned_pages)
@@ -638,13 +699,15 @@ void mark_free_pages(struct zone *zone)
ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn));
for (order = MAX_ORDER - 1; order >= 0; --order)
- list_for_each(curr, &zone->free_area[order].free_list) {
+ for_each_rclmtype_order(t, order) {
+ list_for_each(curr, &zone->free_area[order].free_list[t]) {
unsigned long start_pfn, i;
start_pfn = page_to_pfn(list_entry(curr, struct page, lru));
for (i=0; i < (1<<order); i++)
SetPageNosaveFree(pfn_to_page(start_pfn+i));
+ }
}
spin_unlock_irqrestore(&zone->lock, flags);
}
@@ -748,6 +811,7 @@ buffered_rmqueue(struct zone *zone, int
unsigned long flags;
struct page *page;
int cold = !!(gfp_flags & __GFP_COLD);
+ int alloctype = gfpflags_to_alloctype(gfp_flags);
again:
if (order == 0) {
@@ -758,7 +822,8 @@ again:
local_irq_save(flags);
if (pcp->count <= pcp->low)
pcp->count += rmqueue_bulk(zone, 0,
- pcp->batch, &pcp->list);
+ pcp->batch, &pcp->list,
+ alloctype);
if (pcp->count) {
page = list_entry(pcp->list.next, struct page, lru);
list_del(&page->lru);
@@ -768,7 +833,7 @@ again:
put_cpu();
} else {
spin_lock_irqsave(&zone->lock, flags);
- page = __rmqueue(zone, order);
+ page = __rmqueue(zone, order, alloctype);
spin_unlock_irqrestore(&zone->lock, flags);
}
@@ -1791,7 +1856,8 @@ void zone_init_free_lists(struct pglist_
{
int order;
for (order = 0; order < MAX_ORDER ; order++) {
- INIT_LIST_HEAD(&zone->free_area[order].free_list);
+ INIT_LIST_HEAD(&zone->free_area[order].free_list[RCLM_NORCLM]);
+ INIT_LIST_HEAD(&zone->free_area[order].free_list[RCLM_EASY]);
zone->free_area[order].nr_free = 0;
}
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 3/5] Light fragmentation avoidance without usemap: 003_percpu
2005-11-22 19:17 [PATCH 0/5] Light fragmentation avoidance without usemap Mel Gorman
2005-11-22 19:17 ` [PATCH 1/5] Light fragmentation avoidance without usemap: 001_antidefrag_flags Mel Gorman
2005-11-22 19:17 ` [PATCH 2/5] Light fragmentation avoidance without usemap: 002_fragcore Mel Gorman
@ 2005-11-22 19:17 ` Mel Gorman
2005-11-22 19:17 ` [PATCH 4/5] Light fragmentation avoidance without usemap: 004_configurable Mel Gorman
2005-11-22 19:17 ` [PATCH 5/5] Light fragmentation avoidance without usemap: 005_drainpercpu Mel Gorman
4 siblings, 0 replies; 7+ messages in thread
From: Mel Gorman @ 2005-11-22 19:17 UTC (permalink / raw)
To: linux-mm; +Cc: Mel Gorman, nickpiggin, ak, linux-kernel, lhms-devel, mingo
The freelists for each allocation type can slowly become corrupted due to
the per-cpu list. Consider what happens when the following happens
1. A 2^(MAX_ORDER-1) list is reserved for __GFP_EASYRCLM pages
2. An order-0 page is allocated from the newly reserved block
3. The page is freed and placed on the per-cpu list
4. alloc_page() is called with GFP_KERNEL as the gfp_mask
5. The per-cpu list is used to satisfy the allocation
This results in a kernel page is in the middle of a RCLM_EASY region. This
means that over long periods of the time, the anti-fragmentation scheme
slowly degrades to the standard allocator.
This patch divides the per-cpu lists into RCLM_TYPES number of lists.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-002_fragcore/include/linux/mmzone.h linux-2.6.15-rc1-mm2-003_percpu/include/linux/mmzone.h
--- linux-2.6.15-rc1-mm2-002_fragcore/include/linux/mmzone.h 2005-11-22 16:50:09.000000000 +0000
+++ linux-2.6.15-rc1-mm2-003_percpu/include/linux/mmzone.h 2005-11-22 16:52:10.000000000 +0000
@@ -26,6 +26,8 @@
#define RCLM_EASY 1
#define RCLM_TYPES 2
+#define for_each_rclmtype(type) \
+ for (type = 0; type < RCLM_TYPES; type++)
#define for_each_rclmtype_order(type, order) \
for (order = 0; order < MAX_ORDER; order++) \
for (type = 0; type < RCLM_TYPES; type++)
@@ -53,11 +55,11 @@ struct zone_padding {
#endif
struct per_cpu_pages {
- int count; /* number of pages in the list */
+ int count[RCLM_TYPES]; /* Number of pages on the lists */
int low; /* low watermark, refill needed */
int high; /* high watermark, emptying needed */
int batch; /* chunk size for buddy add/remove */
- struct list_head list; /* the list of pages */
+ struct list_head list[RCLM_TYPES]; /* the lists of pages */
};
struct per_cpu_pageset {
@@ -72,6 +74,11 @@ struct per_cpu_pageset {
#endif
} ____cacheline_aligned_in_smp;
+static inline int pcp_count(struct per_cpu_pages *pcp)
+{
+ return pcp->count[RCLM_NORCLM] + pcp->count[RCLM_EASY];
+}
+
#ifdef CONFIG_NUMA
#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
#else
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-002_fragcore/mm/page_alloc.c linux-2.6.15-rc1-mm2-003_percpu/mm/page_alloc.c
--- linux-2.6.15-rc1-mm2-002_fragcore/mm/page_alloc.c 2005-11-22 16:50:09.000000000 +0000
+++ linux-2.6.15-rc1-mm2-003_percpu/mm/page_alloc.c 2005-11-22 16:52:10.000000000 +0000
@@ -637,7 +637,7 @@ static int rmqueue_bulk(struct zone *zon
void drain_remote_pages(void)
{
struct zone *zone;
- int i;
+ int i, pindex;
unsigned long flags;
local_irq_save(flags);
@@ -653,9 +653,16 @@ void drain_remote_pages(void)
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- if (pcp->count)
- pcp->count -= free_pages_bulk(zone, pcp->count,
- &pcp->list, 0);
+ for_each_rclmtype(pindex) {
+ if (!pcp->count[pindex])
+ continue;
+
+ /* Try remove all pages from the pcpu list */
+ pcp->count[pindex] -=
+ free_pages_bulk(zone,
+ pcp->count[pindex],
+ &pcp->list[pindex], 0);
+ }
}
}
local_irq_restore(flags);
@@ -666,7 +673,7 @@ void drain_remote_pages(void)
static void __drain_pages(unsigned int cpu)
{
struct zone *zone;
- int i;
+ int i, pindex;
for_each_zone(zone) {
struct per_cpu_pageset *pset;
@@ -676,8 +683,16 @@ static void __drain_pages(unsigned int c
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- pcp->count -= free_pages_bulk(zone, pcp->count,
- &pcp->list, 0);
+ for_each_rclmtype(pindex) {
+ if (!pcp->count[pindex])
+ continue;
+
+ /* Try remove all pages from the pcpu list */
+ pcp->count[pindex] -=
+ free_pages_bulk(zone,
+ pcp->count[pindex],
+ &pcp->list[pindex], 0);
+ }
}
}
}
@@ -758,6 +773,7 @@ static void FASTCALL(free_hot_cold_page(
static void fastcall free_hot_cold_page(struct page *page, int cold)
{
struct zone *zone = page_zone(page);
+ int pindex = get_pageblock_type(page);
struct per_cpu_pages *pcp;
unsigned long flags;
@@ -773,10 +789,11 @@ static void fastcall free_hot_cold_page(
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags);
- list_add(&page->lru, &pcp->list);
- pcp->count++;
- if (pcp->count >= pcp->high)
- pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
+ list_add(&page->lru, &pcp->list[pindex]);
+ pcp->count[pindex]++;
+ if (pcp->count[pindex] >= pcp->high)
+ pcp->count[pindex] -= free_pages_bulk(zone, pcp->batch,
+ &pcp->list[pindex], 0);
local_irq_restore(flags);
put_cpu();
}
@@ -820,14 +837,16 @@ again:
page = NULL;
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags);
- if (pcp->count <= pcp->low)
- pcp->count += rmqueue_bulk(zone, 0,
- pcp->batch, &pcp->list,
+ if (pcp->count[alloctype] <= pcp->low)
+ pcp->count[alloctype] += rmqueue_bulk(zone, 0,
+ pcp->batch,
+ &pcp->list[alloctype],
alloctype);
- if (pcp->count) {
- page = list_entry(pcp->list.next, struct page, lru);
+ if (pcp->count[alloctype]) {
+ page = list_entry(pcp->list[alloctype].next,
+ struct page, lru);
list_del(&page->lru);
- pcp->count--;
+ pcp->count[alloctype]--;
}
local_irq_restore(flags);
put_cpu();
@@ -1478,7 +1497,7 @@ void show_free_areas(void)
pageset->pcp[temperature].low,
pageset->pcp[temperature].high,
pageset->pcp[temperature].batch,
- pageset->pcp[temperature].count);
+ pcp_count(&pageset->pcp[temperature]));
}
}
@@ -1920,18 +1939,23 @@ inline void setup_pageset(struct per_cpu
memset(p, 0, sizeof(*p));
pcp = &p->pcp[0]; /* hot */
- pcp->count = 0;
+ pcp->count[RCLM_NORCLM] = 0;
+ pcp->count[RCLM_EASY] = 0;
pcp->low = 0;
pcp->high = 6 * batch;
pcp->batch = max(1UL, 1 * batch);
- INIT_LIST_HEAD(&pcp->list);
+ INIT_LIST_HEAD(&pcp->list[RCLM_NORCLM]);
+ INIT_LIST_HEAD(&pcp->list[RCLM_EASY]);
pcp = &p->pcp[1]; /* cold*/
- pcp->count = 0;
+
+ pcp->count[RCLM_NORCLM] = 0;
+ pcp->count[RCLM_EASY] = 0;
pcp->low = 0;
pcp->high = 2 * batch;
pcp->batch = max(1UL, batch/2);
- INIT_LIST_HEAD(&pcp->list);
+ INIT_LIST_HEAD(&pcp->list[RCLM_NORCLM]);
+ INIT_LIST_HEAD(&pcp->list[RCLM_EASY]);
}
#ifdef CONFIG_NUMA
@@ -2328,7 +2352,7 @@ static int zoneinfo_show(struct seq_file
"\n high: %i"
"\n batch: %i",
i, j,
- pageset->pcp[j].count,
+ pcp_count(&pageset->pcp[j]),
pageset->pcp[j].low,
pageset->pcp[j].high,
pageset->pcp[j].batch);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 4/5] Light fragmentation avoidance without usemap: 004_configurable
2005-11-22 19:17 [PATCH 0/5] Light fragmentation avoidance without usemap Mel Gorman
` (2 preceding siblings ...)
2005-11-22 19:17 ` [PATCH 3/5] Light fragmentation avoidance without usemap: 003_percpu Mel Gorman
@ 2005-11-22 19:17 ` Mel Gorman
2005-11-22 19:17 ` [PATCH 5/5] Light fragmentation avoidance without usemap: 005_drainpercpu Mel Gorman
4 siblings, 0 replies; 7+ messages in thread
From: Mel Gorman @ 2005-11-22 19:17 UTC (permalink / raw)
To: linux-mm; +Cc: Mel Gorman, nickpiggin, ak, linux-kernel, lhms-devel, mingo
The anti-defragmentation strategy has memory overhead. This patch allows
the strategy to be disabled for small memory systems or if it is known the
workload is suffering because of the strategy. It also acts to show where
the anti-defrag strategy interacts with the standard buddy allocator.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-003_percpu/include/linux/mmzone.h linux-2.6.15-rc1-mm2-004_configurable/include/linux/mmzone.h
--- linux-2.6.15-rc1-mm2-003_percpu/include/linux/mmzone.h 2005-11-22 16:52:10.000000000 +0000
+++ linux-2.6.15-rc1-mm2-004_configurable/include/linux/mmzone.h 2005-11-22 16:53:03.000000000 +0000
@@ -74,10 +74,17 @@ struct per_cpu_pageset {
#endif
} ____cacheline_aligned_in_smp;
+#ifdef CONFIG_PAGEALLOC_ANTIDEFRAG
static inline int pcp_count(struct per_cpu_pages *pcp)
{
return pcp->count[RCLM_NORCLM] + pcp->count[RCLM_EASY];
}
+#else
+static inline int pcp_count(struct per_cpu_pages *pcp)
+{
+ return pcp->count[RCLM_NORCLM];
+}
+#endif /* CONFIG_PAGEALLOC_ANTIDEFRAG */
#ifdef CONFIG_NUMA
#define zone_pcp(__z, __cpu) ((__z)->pageset[(__cpu)])
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-003_percpu/init/Kconfig linux-2.6.15-rc1-mm2-004_configurable/init/Kconfig
--- linux-2.6.15-rc1-mm2-003_percpu/init/Kconfig 2005-11-21 19:44:33.000000000 +0000
+++ linux-2.6.15-rc1-mm2-004_configurable/init/Kconfig 2005-11-22 16:53:03.000000000 +0000
@@ -396,6 +396,18 @@ config CC_ALIGN_FUNCTIONS
32-byte boundary only if this can be done by skipping 23 bytes or less.
Zero means use compiler's default.
+config PAGEALLOC_ANTIDEFRAG
+ bool "Avoid fragmentation in the page allocator"
+ def_bool n
+ help
+ The standard allocator will fragment memory over time which means that
+ high order allocations will fail even if kswapd is running. If this
+ option is set, the allocator will try and group page types into
+ two groups, kernel and easy reclaimable. The gain is a best effort
+ attempt at lowering fragmentation which a few workloads care about.
+ The loss is a more complex allocactor that performs slower.
+ If unsure, say N
+
config CC_ALIGN_LABELS
int "Label alignment" if EMBEDDED
default 0
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-003_percpu/mm/page_alloc.c linux-2.6.15-rc1-mm2-004_configurable/mm/page_alloc.c
--- linux-2.6.15-rc1-mm2-003_percpu/mm/page_alloc.c 2005-11-22 16:52:10.000000000 +0000
+++ linux-2.6.15-rc1-mm2-004_configurable/mm/page_alloc.c 2005-11-22 16:53:03.000000000 +0000
@@ -68,6 +68,7 @@ int sysctl_lowmem_reserve_ratio[MAX_NR_Z
EXPORT_SYMBOL(totalram_pages);
+#ifdef CONFIG_PAGEALLOC_ANTIDEFRAG
static inline int get_pageblock_type(struct page *page)
{
return (PageEasyRclm(page) != 0);
@@ -77,6 +78,17 @@ static inline int gfpflags_to_alloctype(
{
return ((gfp_flags & __GFP_EASYRCLM) != 0);
}
+#else
+static inline int get_pageblock_type(struct page *page)
+{
+ return RCLM_NORCLM;
+}
+
+static inline int gfpflags_to_alloctype(unsigned long gfp_flags)
+{
+ return RCLM_NORCLM;
+}
+#endif /* CONFIG_PAGEALLOC_ANTIDEFRAG */
/*
* Used by page_zone() to look up the address of the struct zone whose
@@ -531,6 +543,7 @@ static int prep_new_page(struct page *pa
return 0;
}
+#ifdef CONFIG_PAGEALLOC_ANTIDEFRAG
/* Remove an element from the buddy allocator from the fallback list */
static struct page *__rmqueue_fallback(struct zone *zone, int order,
int alloctype)
@@ -568,6 +581,13 @@ static struct page *__rmqueue_fallback(s
return NULL;
}
+#else
+static struct page *__rmqueue_fallback(struct zone *zone, unsigned int order,
+ int alloctype)
+{
+ return NULL;
+}
+#endif /* CONFIG_PAGEALLOC_ANTIDEFRAG */
/*
* Do the hard work of removing an element from the buddy allocator.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 5/5] Light fragmentation avoidance without usemap: 005_drainpercpu
2005-11-22 19:17 [PATCH 0/5] Light fragmentation avoidance without usemap Mel Gorman
` (3 preceding siblings ...)
2005-11-22 19:17 ` [PATCH 4/5] Light fragmentation avoidance without usemap: 004_configurable Mel Gorman
@ 2005-11-22 19:17 ` Mel Gorman
4 siblings, 0 replies; 7+ messages in thread
From: Mel Gorman @ 2005-11-22 19:17 UTC (permalink / raw)
To: linux-mm; +Cc: Mel Gorman, nickpiggin, ak, linux-kernel, lhms-devel, mingo
Per-cpu pages can accidentally cause fragmentation because they are free, but
pinned pages in an otherwise contiguous block. When this patch is applied,
the per-cpu caches are drained after the direct-reclaim is entered if the
requested order is greater than 3. It simply reuses the code used by suspend
and hotplug and only is triggered when anti-defragmentation is enabled.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.15-rc1-mm2-004_configurable/mm/page_alloc.c linux-2.6.15-rc1-mm2-005_drainpercpu/mm/page_alloc.c
--- linux-2.6.15-rc1-mm2-004_configurable/mm/page_alloc.c 2005-11-22 16:53:03.000000000 +0000
+++ linux-2.6.15-rc1-mm2-005_drainpercpu/mm/page_alloc.c 2005-11-22 16:53:45.000000000 +0000
@@ -689,7 +689,9 @@ void drain_remote_pages(void)
}
#endif
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
+#if defined(CONFIG_PM) || \
+ defined(CONFIG_HOTPLUG_CPU) || \
+ defined(CONFIG_PAGEALLOC_ANTIDEFRAG)
static void __drain_pages(unsigned int cpu)
{
struct zone *zone;
@@ -716,10 +718,9 @@ static void __drain_pages(unsigned int c
}
}
}
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
+#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU || CONFIG_PAGEALLOC_ANTIDEFRAG */
#ifdef CONFIG_PM
-
void mark_free_pages(struct zone *zone)
{
unsigned long zone_pfn, flags;
@@ -746,7 +747,9 @@ void mark_free_pages(struct zone *zone)
}
spin_unlock_irqrestore(&zone->lock, flags);
}
+#endif /* CONFIG_PM */
+#if defined(CONFIG_PM) || defined(CONFIG_PAGEALLOC_ANTIDEFRAG)
/*
* Spill all of this CPU's per-cpu pages back into the buddy allocator.
*/
@@ -758,7 +761,28 @@ void drain_local_pages(void)
__drain_pages(smp_processor_id());
local_irq_restore(flags);
}
-#endif /* CONFIG_PM */
+
+void smp_drain_local_pages(void *arg)
+{
+ drain_local_pages();
+}
+
+/*
+ * Spill all the per-cpu pages from all CPUs back into the buddy allocator
+ */
+void drain_all_local_pages(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __drain_pages(smp_processor_id());
+ local_irq_restore(flags);
+
+ smp_call_function(smp_drain_local_pages, NULL, 0, 1);
+}
+#else
+void drain_all_local_pages(void) {}
+#endif /* CONFIG_PAGEALLOC_ANTIDEFRAG */
void zone_statistics(struct zonelist *zonelist, struct zone *z)
{
@@ -1109,6 +1133,9 @@ rebalance:
did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
+ if (order > 3)
+ drain_all_local_pages();
+
p->reclaim_state = NULL;
p->flags &= ~PF_MEMALLOC;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread