Index: 2.6.13-joel2/include/linux/mmzone.h =================================================================== --- 2.6.13-joel2.orig/include/linux/mmzone.h 2005-09-26 13:58:59.%N -0500 +++ 2.6.13-joel2/include/linux/mmzone.h 2005-09-26 13:59:38.%N -0500 @@ -57,13 +57,28 @@ struct zone_padding { #else #define ZONE_PADDING(name) #endif +/* + * The pcpu_list is to keep kernel and userrclm allocations + * apart while still allowing all allocation types to have + * per-cpu lists + */ +struct pcpu_list { + int count; + struct list_head list; +} ____cacheline_aligned_in_smp; + + +/* Indices into pcpu_list */ +#define PCPU_KERN 0 +#define PCPU_USER 1 +#define PCPU_LIST_SIZE 2 struct per_cpu_pages { - int count; /* number of pages in the list */ - int low; /* low watermark, refill needed */ - int high; /* high watermark, emptying needed */ - int batch; /* chunk size for buddy add/remove */ - struct list_head list; /* the list of pages */ + int count; /* number of pages in the list */ + struct pcpu_list pcpu_list[PCPU_LIST_SIZE]; + int low; /* low watermark, refill needed */ + int high; /* high watermark, emptying needed */ + int batch; /* chunk size for buddy add/remove */ }; struct per_cpu_pageset { Index: 2.6.13-joel2/mm/page_alloc.c =================================================================== --- 2.6.13-joel2.orig/mm/page_alloc.c 2005-09-26 13:59:27.%N -0500 +++ 2.6.13-joel2/mm/page_alloc.c 2005-09-26 13:59:38.%N -0500 @@ -775,9 +775,18 @@ void drain_remote_pages(void) struct per_cpu_pages *pcp; pcp = &pset->pcp[i]; - if (pcp->count) - pcp->count -= free_pages_bulk(zone, pcp->count, - &pcp->list, 0); + if (pcp->pcpu_list[PCPU_KERN].count) + pcp->pcpu_list[PCPU_KERN].count -= + free_pages_bulk(zone, + pcp->pcpu_list[PCPU_KERN].count, + &pcp->pcpu_list[PCPU_KERN].list, + 0); + if (pcp->pcpu_list[PCPU_USER].count) + pcp->pcpu_list[PCPU_USER].count -= + free_pages_bulk(zone, + pcp->pcpu_list[PCPU_USER].count, + &pcp->pcpu_list[PCPU_USER].list, + 0); } } local_irq_restore(flags); @@ -798,8 +807,18 @@ static void __drain_pages(unsigned int c struct per_cpu_pages *pcp; pcp = &pset->pcp[i]; - pcp->count -= free_pages_bulk(zone, pcp->count, - &pcp->list, 0); + pcp->pcpu_list[PCPU_KERN].count -= + free_pages_bulk(zone, + pcp->pcpu_list[PCPU_KERN].count, + &pcp->pcpu_list[PCPU_KERN].list, + 0); + + pcp->pcpu_list[PCPU_USER].count -= + free_pages_bulk(zone, + pcp->pcpu_list[PCPU_USER].count, + &pcp->pcpu_list[PCPU_USER].list, + 0); + } } } @@ -881,6 +900,7 @@ static void fastcall free_hot_cold_page( struct zone *zone = page_zone(page); struct per_cpu_pages *pcp; unsigned long flags; + struct pcpu_list *plist; arch_free_page(page, 0); @@ -890,11 +910,24 @@ static void fastcall free_hot_cold_page( page->mapping = NULL; free_pages_check(__FUNCTION__, page); pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; + + /* + * Strictly speaking, we should not be accessing the zone information + * here wihtout the zone lock. In this case, it does not matter if + * the read is incorrect. + */ + if (get_pageblock_type(zone, page) == RCLM_USER) + plist = &pcp->pcpu_list[PCPU_USER]; + else + plist = &pcp->pcpu_list[PCPU_KERN]; + + if (plist->count >= pcp->high) + plist->count -= free_pages_bulk(zone, pcp->batch, + &plist->list, 0); + local_irq_save(flags); - list_add(&page->lru, &pcp->list); - pcp->count++; - if (pcp->count >= pcp->high) - pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0); + list_add(&page->lru, &plist->list); + plist->count++; local_irq_restore(flags); put_cpu(); } @@ -930,19 +963,28 @@ buffered_rmqueue(struct zone *zone, int unsigned long flags; struct page *page = NULL; int cold = !!(gfp_flags & __GFP_COLD); + struct pcpu_list *plist; if (order == 0) { struct per_cpu_pages *pcp; pcp = &zone_pcp(zone, get_cpu())->pcp[cold]; local_irq_save(flags); - if (pcp->count <= pcp->low) - pcp->count += rmqueue_bulk(zone, pcp->batch, - &pcp->list, alloctype); - if (pcp->count) { - page = list_entry(pcp->list.next, struct page, lru); + + if (alloctype == __GFP_USER) + plist = &pcp->pcpu_list[PCPU_USER]; + else + plist = &pcp->pcpu_list[PCPU_KERN]; + + if (plist->count <= pcp->low) + plist->count += rmqueue_bulk(zone, + pcp->batch, + &plist->list, + alloctype); + if (plist->count) { + page = list_entry(plist->list.next, struct page, lru); list_del(&page->lru); - pcp->count--; + plist->count--; } local_irq_restore(flags); put_cpu(); @@ -2001,18 +2043,23 @@ inline void setup_pageset(struct per_cpu struct per_cpu_pages *pcp; pcp = &p->pcp[0]; /* hot */ - pcp->count = 0; + pcp->pcpu_list[PCPU_KERN].count = 0; + pcp->pcpu_list[PCPU_USER].count = 0; pcp->low = 2 * batch; pcp->high = 6 * batch; pcp->batch = max(1UL, 1 * batch); - INIT_LIST_HEAD(&pcp->list); + INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_KERN].list); + INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_USER].list); pcp = &p->pcp[1]; /* cold*/ - pcp->count = 0; + pcp->pcpu_list[PCPU_KERN].count = 0; + pcp->pcpu_list[PCPU_USER].count = 0; pcp->low = 0; pcp->high = 2 * batch; pcp->batch = max(1UL, 1 * batch); - INIT_LIST_HEAD(&pcp->list); + INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_KERN].list); + INIT_LIST_HEAD(&pcp->pcpu_list[PCPU_USER].list); + } #ifdef CONFIG_NUMA