* [RFC] Page allocator: Get rid of the list of cold pages
@ 2007-11-13 4:42 Christoph Lameter
2007-11-14 18:41 ` Mel Gorman
0 siblings, 1 reply; 5+ messages in thread
From: Christoph Lameter @ 2007-11-13 4:42 UTC (permalink / raw)
To: linux-mm; +Cc: Mel Gorman
We have repeatedly discussed if the cold pages still have a point. There is
one way to join the two lists: Use a single list and put the cold pages at the
end and the hot pages at the beginning. That way a single list can serve for
both types of allocations.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
include/linux/mmzone.h | 2 -
mm/page_alloc.c | 55 +++++++++++++++++++++++--------------------------
mm/vmstat.c | 24 ++++++++-------------
3 files changed, 36 insertions(+), 45 deletions(-)
Index: linux-2.6/include/linux/mmzone.h
===================================================================
--- linux-2.6.orig/include/linux/mmzone.h 2007-11-12 20:16:24.814260361 -0800
+++ linux-2.6/include/linux/mmzone.h 2007-11-12 20:17:35.267759790 -0800
@@ -113,7 +113,7 @@ struct per_cpu_pages {
};
struct per_cpu_pageset {
- struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */
+ struct per_cpu_pages pcp;
#ifdef CONFIG_NUMA
s8 expire;
#endif
Index: linux-2.6/mm/vmstat.c
===================================================================
--- linux-2.6.orig/mm/vmstat.c 2007-11-12 20:16:24.822260116 -0800
+++ linux-2.6/mm/vmstat.c 2007-11-12 20:29:18.912816791 -0800
@@ -332,7 +332,7 @@ void refresh_cpu_vm_stats(int cpu)
* Check if there are pages remaining in this pageset
* if not then there is nothing to expire.
*/
- if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count))
+ if (!p->expire || !p->pcp.count)
continue;
/*
@@ -347,11 +347,8 @@ void refresh_cpu_vm_stats(int cpu)
if (p->expire)
continue;
- if (p->pcp[0].count)
- drain_zone_pages(zone, p->pcp + 0);
-
- if (p->pcp[1].count)
- drain_zone_pages(zone, p->pcp + 1);
+ if (p->pcp.count)
+ drain_zone_pages(zone, &p->pcp);
#endif
}
}
@@ -685,20 +682,17 @@ static void zoneinfo_show_print(struct s
"\n pagesets");
for_each_online_cpu(i) {
struct per_cpu_pageset *pageset;
- int j;
pageset = CPU_PTR(zone->pageset, i);
- for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) {
- seq_printf(m,
- "\n cpu: %i pcp: %i"
+ seq_printf(m,
+ "\n cpu: %i"
"\n count: %i"
"\n high: %i"
"\n batch: %i",
- i, j,
- pageset->pcp[j].count,
- pageset->pcp[j].high,
- pageset->pcp[j].batch);
- }
+ i,
+ pageset->pcp.count,
+ pageset->pcp.high,
+ pageset->pcp.batch);
#ifdef CONFIG_SMP
seq_printf(m, "\n vm stats threshold: %d",
pageset->stat_threshold);
Index: linux-2.6/mm/page_alloc.c
===================================================================
--- linux-2.6.orig/mm/page_alloc.c 2007-11-12 20:16:24.830259956 -0800
+++ linux-2.6/mm/page_alloc.c 2007-11-12 20:26:44.766259839 -0800
@@ -885,24 +885,21 @@ static void __drain_pages(unsigned int c
{
unsigned long flags;
struct zone *zone;
- int i;
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ struct per_cpu_pages *pcp;
if (!populated_zone(zone))
continue;
pset = CPU_PTR(zone->pageset, cpu);
- for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
- struct per_cpu_pages *pcp;
- pcp = &pset->pcp[i];
- local_irq_save(flags);
- free_pages_bulk(zone, pcp->count, &pcp->list, 0);
- pcp->count = 0;
- local_irq_restore(flags);
- }
+ pcp = &pset->pcp;
+ local_irq_save(flags);
+ free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+ pcp->count = 0;
+ local_irq_restore(flags);
}
}
@@ -993,9 +990,12 @@ static void fastcall free_hot_cold_page(
kernel_map_pages(page, 1, 0);
local_irq_save(flags);
- pcp = &THIS_CPU(zone->pageset)->pcp[cold];
+ pcp = &THIS_CPU(zone->pageset)->pcp;
__count_vm_event(PGFREE);
- list_add(&page->lru, &pcp->list);
+ if (cold)
+ list_add_tail(&page->lru, &pcp->list);
+ else
+ list_add(&page->lru, &pcp->list);
set_page_private(page, get_pageblock_migratetype(page));
pcp->count++;
if (pcp->count >= pcp->high) {
@@ -1051,7 +1051,7 @@ again:
struct per_cpu_pages *pcp;
local_irq_save(flags);
- pcp = &THIS_CPU(zone->pageset)->pcp[cold];
+ pcp = &THIS_CPU(zone->pageset)->pcp;
if (!pcp->count) {
pcp->count = rmqueue_bulk(zone, 0,
pcp->batch, &pcp->list, migratetype);
@@ -1060,9 +1060,15 @@ again:
}
/* Find a page of the appropriate migrate type */
- list_for_each_entry(page, &pcp->list, lru)
- if (page_private(page) == migratetype)
- break;
+ if (cold) {
+ list_for_each_entry_reverse(page, &pcp->list, lru)
+ if (page_private(page) == migratetype)
+ break;
+ } else {
+ list_for_each_entry(page, &pcp->list, lru)
+ if (page_private(page) == migratetype)
+ break;
+ }
/* Allocate more to the pcp list if necessary */
if (unlikely(&page->lru == &pcp->list)) {
@@ -1787,12 +1793,9 @@ void show_free_areas(void)
pageset = CPU_PTR(zone->pageset, cpu);
- printk("CPU %4d: Hot: hi:%5d, btch:%4d usd:%4d "
- "Cold: hi:%5d, btch:%4d usd:%4d\n",
- cpu, pageset->pcp[0].high,
- pageset->pcp[0].batch, pageset->pcp[0].count,
- pageset->pcp[1].high, pageset->pcp[1].batch,
- pageset->pcp[1].count);
+ printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
+ cpu, pageset->pcp.high,
+ pageset->pcp.batch, pageset->pcp.count);
}
}
@@ -2590,17 +2593,11 @@ inline void setup_pageset(struct per_cpu
memset(p, 0, sizeof(*p));
- pcp = &p->pcp[0]; /* hot */
+ pcp = &p->pcp;
pcp->count = 0;
pcp->high = 6 * batch;
pcp->batch = max(1UL, 1 * batch);
INIT_LIST_HEAD(&pcp->list);
-
- pcp = &p->pcp[1]; /* cold*/
- pcp->count = 0;
- pcp->high = 2 * batch;
- pcp->batch = max(1UL, batch/2);
- INIT_LIST_HEAD(&pcp->list);
}
/*
@@ -2613,7 +2610,7 @@ static void setup_pagelist_highmark(stru
{
struct per_cpu_pages *pcp;
- pcp = &p->pcp[0]; /* hot list */
+ pcp = &p->pcp;
pcp->high = high;
pcp->batch = max(1UL, high/4);
if ((high/4) > (PAGE_SHIFT * 8))
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [RFC] Page allocator: Get rid of the list of cold pages 2007-11-13 4:42 [RFC] Page allocator: Get rid of the list of cold pages Christoph Lameter @ 2007-11-14 18:41 ` Mel Gorman 2007-11-14 18:51 ` Christoph Lameter 0 siblings, 1 reply; 5+ messages in thread From: Mel Gorman @ 2007-11-14 18:41 UTC (permalink / raw) To: Christoph Lameter; +Cc: linux-mm, apw On (12/11/07 20:42), Christoph Lameter didst pronounce: > We have repeatedly discussed if the cold pages still have a point. Yep, no one would put their neck out and say whether it is or not. > There is > one way to join the two lists: Use a single list and put the cold pages at the > end and the hot pages at the beginning. That way a single list can serve for > both types of allocations. > For sure. > Signed-off-by: Christoph Lameter <clameter@sgi.com> > What was this based against? It didn't apply cleanly to 2.6.24-rc2 but it was fairly trivial to fix up the rejects. I tested on a few machines just to see what happened. The performance results for kernbench, dbench, tbench and aim9[1] and were generally good. First up is elm3b6 (from tko). It's a 4-way Opteron with 8GiB of RAM. The results were KernBench Timing Comparison --------------------------- 2.6.24-rc2-clean 2.6.24-rc2-one-percpu %diff User CPU time 445.99 445.60 0.09% System CPU time 41.72 41.37 0.84% Total CPU time 487.71 486.97 0.15% Elapsed time 125.93 125.93 0.00% KernBench Deviation Comparison ------------------------------ User CPU stddev 0.14 0.10 28.57% System CPU stddev 0.22 0.01 95.45% Total CPU stddev 0.09 0.11 -22.22% Elapsed stddev 0.07 0.10 -42.86% AIM9 Comparison --------------- 2.6.24-rc2-clean 2.6.24-rc2-one-percpu 1 creat-clo 139360.11 152057.99 12697.88 9.11% File Creations and Closes/second 2 page_test 242747.88 272322.95 29575.07 12.18% System Allocations & Pages/second 3 brk_test 918260.58 1287183.33 368922.75 40.18% System Memory Allocations/second 4 jmp_test 11710264.96 11718963.51 8698.55 0.07% Non-local gotos/second 5 signal_test 460523.25 477037.16 16513.91 3.59% Signal Traps/second 6 exec_test 174.33 176.52 2.19 1.26% Program Loads/second 7 fork_test 3013.49 3237.25 223.76 7.43% Task Creations/second 8 link_test 46963.45 51495.15 4531.70 9.65% Link/Unlink Pairs/second DBench Comparison ----------------- 2.6.24-rc2-clean 2.6.24-rc2-one-percpu %diff Throughput 1 procs 206.819 206.269 -0.27% TBench Comparison ----------------- 2.6.24-rc2-clean 2.6.24-rc2-one-percpu %diff Throughput 1 proc 170.291 147.613 -13.32% Generally good there. I wouldn't worry too much about dbench and tbench on this machine because it tends to be flaky anyway on this machine. I thought it was interesting that both the System CPU and User CPU times were improved for kernbench. It implies that the allocator is doing less work but not getting punished by worse cache hotness. Next up is a 2-way opteron with 2GB of RAM. KernBench Timing Comparison --------------------------- 2.6.24-rc2-clean 2.6.24-rc2-one-percpu %diff User CPU time 268.90 269.33 -0.16% System CPU time 37.88 36.35 4.04% Total CPU time 306.78 305.68 0.36% Elapsed time 155.99 155.43 0.36% KernBench Deviation Comparison ------------------------------ User CPU stddev 0.02 0.15 -650.00% System CPU stddev 0.09 0.09 0.00% Total CPU stddev 0.07 0.05 28.57% Elapsed stddev 0.11 0.11 0.00% AIM9 Comparison --------------- 2.6.24-rc2-clean 2.6.24-rc2-one-percpu 1 creat-clo 149125.15 154624.23 5499.08 3.69% File Creations and Closes/second 2 page_test 170566.57 169433.43 -1133.14 -0.66% System Allocations & Pages/second 3 brk_test 2508498.58 1879603.40 -628895.18 -25.07% System Memory Allocations/second 4 jmp_test 24270950.00 24286333.33 15383.33 0.06% Non-local gotos/second 5 signal_test 574466.67 500466.67 -74000.00 -12.88% Signal Traps/second 6 exec_test 230.04 228.63 -1.41 -0.61% Program Loads/second 7 fork_test 3470.51 3528.82 58.31 1.68% Task Creations/second 8 link_test 53508.53 53700.65 192.12 0.36% Link/Unlink Pairs/second DBench Comparison ----------------- 2.6.24-rc2-clean 2.6.24-rc2-one-percpu %diff Throughput 1 procs 260.708 209.52 -19.63% TBench Comparison ----------------- 2.6.24-rc2-clean 2.6.24-rc2-one-percpu %diff Throughput 1 proc 254.032 293.737 15.63% The KernBench figures looked ok although the page_test and brk_test were of minor concern. These tests can be variable but are very sensitive to cache affects which might explain why User CPU time was slightly down for kernbench. This is worth trying on a few more machines. Unlike elm3b6, this machine also showed improvements for hackbench but I'm not sure what sort of conclusion to draw from that. I'm still waiting on results to come in from a PPC64 machine but initially indicators are this is not a bad idea because you are not abandoning the idea of giving hot pages when requested, just altering a little how they are found. I suspect your main motivation is reducing the size of a per-cpu structure? You should be able to see improvements in a number of cases that have little to do with hotness/coldness. Currently, if a caller asks for a hot page and there are no hot pages, they go to the main allocator, acquire locks etc. It makes a lot more sense that they get a cold per-cpu page instead. Where you may get hit is that your combined per-cpu lists are smaller than the separate ones as you do not update the pcp->high value to be hot+cold. However, the opposite is also true. Currently, if someone is doing a lot of file-readahead, they regularly will go to the main allocator as the cold per-cpu lists get emptied. Now they will be able to take hot pages for a cold user instead which may be noticable in some cases. However, in the event we cannot prove whether separate hot/cold lists are worth it or not, we might as well collapse them for smaller per-cpu structures. [1] Not exactly comprehensive testing I know, but it is easily available. > --- > include/linux/mmzone.h | 2 - > mm/page_alloc.c | 55 +++++++++++++++++++++++-------------------------- > mm/vmstat.c | 24 ++++++++------------- > 3 files changed, 36 insertions(+), 45 deletions(-) > > Index: linux-2.6/include/linux/mmzone.h > =================================================================== > --- linux-2.6.orig/include/linux/mmzone.h 2007-11-12 20:16:24.814260361 -0800 > +++ linux-2.6/include/linux/mmzone.h 2007-11-12 20:17:35.267759790 -0800 > @@ -113,7 +113,7 @@ struct per_cpu_pages { > }; > > struct per_cpu_pageset { > - struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */ > + struct per_cpu_pages pcp; > #ifdef CONFIG_NUMA > s8 expire; > #endif > Index: linux-2.6/mm/vmstat.c > =================================================================== > --- linux-2.6.orig/mm/vmstat.c 2007-11-12 20:16:24.822260116 -0800 > +++ linux-2.6/mm/vmstat.c 2007-11-12 20:29:18.912816791 -0800 > @@ -332,7 +332,7 @@ void refresh_cpu_vm_stats(int cpu) > * Check if there are pages remaining in this pageset > * if not then there is nothing to expire. > */ > - if (!p->expire || (!p->pcp[0].count && !p->pcp[1].count)) > + if (!p->expire || !p->pcp.count) > continue; > > /* > @@ -347,11 +347,8 @@ void refresh_cpu_vm_stats(int cpu) > if (p->expire) > continue; > > - if (p->pcp[0].count) > - drain_zone_pages(zone, p->pcp + 0); > - > - if (p->pcp[1].count) > - drain_zone_pages(zone, p->pcp + 1); > + if (p->pcp.count) > + drain_zone_pages(zone, &p->pcp); > #endif > } > } > @@ -685,20 +682,17 @@ static void zoneinfo_show_print(struct s > "\n pagesets"); > for_each_online_cpu(i) { > struct per_cpu_pageset *pageset; > - int j; > > pageset = CPU_PTR(zone->pageset, i); > - for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { > - seq_printf(m, > - "\n cpu: %i pcp: %i" > + seq_printf(m, > + "\n cpu: %i" > "\n count: %i" > "\n high: %i" > "\n batch: %i", > - i, j, > - pageset->pcp[j].count, > - pageset->pcp[j].high, > - pageset->pcp[j].batch); > - } > + i, > + pageset->pcp.count, > + pageset->pcp.high, > + pageset->pcp.batch); > #ifdef CONFIG_SMP > seq_printf(m, "\n vm stats threshold: %d", > pageset->stat_threshold); All pretty straight-forward. > Index: linux-2.6/mm/page_alloc.c > =================================================================== > --- linux-2.6.orig/mm/page_alloc.c 2007-11-12 20:16:24.830259956 -0800 > +++ linux-2.6/mm/page_alloc.c 2007-11-12 20:26:44.766259839 -0800 > @@ -885,24 +885,21 @@ static void __drain_pages(unsigned int c > { > unsigned long flags; > struct zone *zone; > - int i; > > for_each_zone(zone) { > struct per_cpu_pageset *pset; > + struct per_cpu_pages *pcp; > > if (!populated_zone(zone)) > continue; > > pset = CPU_PTR(zone->pageset, cpu); > - for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { > - struct per_cpu_pages *pcp; > > - pcp = &pset->pcp[i]; > - local_irq_save(flags); > - free_pages_bulk(zone, pcp->count, &pcp->list, 0); > - pcp->count = 0; > - local_irq_restore(flags); > - } > + pcp = &pset->pcp; > + local_irq_save(flags); > + free_pages_bulk(zone, pcp->count, &pcp->list, 0); > + pcp->count = 0; > + local_irq_restore(flags); Not that this is performance critical or anything, but it's less messing with IRQ flags. > } > } > > @@ -993,9 +990,12 @@ static void fastcall free_hot_cold_page( > kernel_map_pages(page, 1, 0); > > local_irq_save(flags); > - pcp = &THIS_CPU(zone->pageset)->pcp[cold]; > + pcp = &THIS_CPU(zone->pageset)->pcp; > __count_vm_event(PGFREE); > - list_add(&page->lru, &pcp->list); > + if (cold) > + list_add_tail(&page->lru, &pcp->list); > + else > + list_add(&page->lru, &pcp->list); There is scope here for a list function that adds to the head or tail depending on the value of a parameter. I know Andy has the prototype of such a function lying around so you may be able to share. > set_page_private(page, get_pageblock_migratetype(page)); > pcp->count++; > if (pcp->count >= pcp->high) { > @@ -1051,7 +1051,7 @@ again: > struct per_cpu_pages *pcp; > > local_irq_save(flags); > - pcp = &THIS_CPU(zone->pageset)->pcp[cold]; > + pcp = &THIS_CPU(zone->pageset)->pcp; > if (!pcp->count) { > pcp->count = rmqueue_bulk(zone, 0, > pcp->batch, &pcp->list, migratetype); > @@ -1060,9 +1060,15 @@ again: > } > > /* Find a page of the appropriate migrate type */ > - list_for_each_entry(page, &pcp->list, lru) > - if (page_private(page) == migratetype) > - break; > + if (cold) { > + list_for_each_entry_reverse(page, &pcp->list, lru) > + if (page_private(page) == migratetype) > + break; > + } else { > + list_for_each_entry(page, &pcp->list, lru) > + if (page_private(page) == migratetype) > + break; > + } > The code looks ok but it might be nice to add a comment explaining how hot/cold pages are added to the list in the header and point the reader to it here. > /* Allocate more to the pcp list if necessary */ > if (unlikely(&page->lru == &pcp->list)) { > @@ -1787,12 +1793,9 @@ void show_free_areas(void) > > pageset = CPU_PTR(zone->pageset, cpu); > > - printk("CPU %4d: Hot: hi:%5d, btch:%4d usd:%4d " > - "Cold: hi:%5d, btch:%4d usd:%4d\n", > - cpu, pageset->pcp[0].high, > - pageset->pcp[0].batch, pageset->pcp[0].count, > - pageset->pcp[1].high, pageset->pcp[1].batch, > - pageset->pcp[1].count); > + printk("CPU %4d: hi:%5d, btch:%4d usd:%4d\n", > + cpu, pageset->pcp.high, > + pageset->pcp.batch, pageset->pcp.count); > } > } > > @@ -2590,17 +2593,11 @@ inline void setup_pageset(struct per_cpu > > memset(p, 0, sizeof(*p)); > > - pcp = &p->pcp[0]; /* hot */ > + pcp = &p->pcp; > pcp->count = 0; > pcp->high = 6 * batch; > pcp->batch = max(1UL, 1 * batch); > INIT_LIST_HEAD(&pcp->list); > - > - pcp = &p->pcp[1]; /* cold*/ > - pcp->count = 0; > - pcp->high = 2 * batch; > - pcp->batch = max(1UL, batch/2); > - INIT_LIST_HEAD(&pcp->list); Before - per-cpu high count was 8 * batch. After, it is 6 * batch. This may be noticable in some corner case involving page readahead requesting cold pages. > } > > /* > @@ -2613,7 +2610,7 @@ static void setup_pagelist_highmark(stru > { > struct per_cpu_pages *pcp; > > - pcp = &p->pcp[0]; /* hot list */ > + pcp = &p->pcp; > pcp->high = high; > pcp->batch = max(1UL, high/4); > if ((high/4) > (PAGE_SHIFT * 8)) > All in all, pretty straight-forward. I think it's worth wider testing at least. I think it'll be hard to show for sure whether this is having a negative performance impact or not but initial results look ok. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC] Page allocator: Get rid of the list of cold pages 2007-11-14 18:41 ` Mel Gorman @ 2007-11-14 18:51 ` Christoph Lameter 2007-11-15 10:40 ` Mel Gorman 0 siblings, 1 reply; 5+ messages in thread From: Christoph Lameter @ 2007-11-14 18:51 UTC (permalink / raw) To: Mel Gorman; +Cc: linux-mm, apw On Wed, 14 Nov 2007, Mel Gorman wrote: > What was this based against? It didn't apply cleanly to 2.6.24-rc2 but it > was fairly trivial to fix up the rejects. I tested on a few machines just > to see what happened. The performance results for kernbench, dbench, tbench > and aim9[1] and were generally good. It was against git current (hmm.... Maybe one or the other patchset was in there too). Thanks for the evaluation. > I'm still waiting on results to come in from a PPC64 machine but initially > indicators are this is not a bad idea because you are not abandoning the > idea of giving hot pages when requested, just altering a little how they > are found. I suspect your main motivation is reducing the size of a per-cpu > structure? Yes. I can put more pagesets into a single cacheline if the cpu_alloc patchset is also applied. The major benefit will only be reached together with another patchset. > However, the opposite is also true. Currently, if someone is doing a lot of > file-readahead, they regularly will go to the main allocator as the cold > per-cpu lists get emptied. Now they will be able to take hot pages for a > cold user instead which may be noticable in some cases. This means that they will be able to use large batchsizes. This may actually improve that situation. > However, in the event we cannot prove whether separate hot/cold lists are > worth it or not, we might as well collapse them for smaller per-cpu structures. If we cannot prove that they are worth it then we should take them out. > > local_irq_save(flags); > > - pcp = &THIS_CPU(zone->pageset)->pcp[cold]; > > + pcp = &THIS_CPU(zone->pageset)->pcp; > > __count_vm_event(PGFREE); > > - list_add(&page->lru, &pcp->list); > > + if (cold) > > + list_add_tail(&page->lru, &pcp->list); > > + else > > + list_add(&page->lru, &pcp->list); > > There is scope here for a list function that adds to the head or tail depending > on the value of a parameter. I know Andy has the prototype of such a function > lying around so you may be able to share. I use a similar thing in SLUB. So if Andy has something then we may be able to use it in both places. > > + pcp = &p->pcp; > > pcp->count = 0; > > pcp->high = 6 * batch; > > pcp->batch = max(1UL, 1 * batch); > > INIT_LIST_HEAD(&pcp->list); > > - > > - pcp = &p->pcp[1]; /* cold*/ > > - pcp->count = 0; > > - pcp->high = 2 * batch; > > - pcp->batch = max(1UL, batch/2); > > - INIT_LIST_HEAD(&pcp->list); > > Before - per-cpu high count was 8 * batch. After, it is 6 * batch. This > may be noticable in some corner case involving page readahead requesting > cold pages. Actually it is the other way around. Readahead used the 2 * batch size for readahead. Now it uses 6 * batch. So the queue size is improved 3 fold. Should be better. > All in all, pretty straight-forward. I think it's worth wider testing at > least. I think it'll be hard to show for sure whether this is having a > negative performance impact or not but initial results look ok. Thanks for the thorough evaluation. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC] Page allocator: Get rid of the list of cold pages 2007-11-14 18:51 ` Christoph Lameter @ 2007-11-15 10:40 ` Mel Gorman 2007-11-15 18:16 ` Christoph Lameter 0 siblings, 1 reply; 5+ messages in thread From: Mel Gorman @ 2007-11-15 10:40 UTC (permalink / raw) To: Christoph Lameter; +Cc: linux-mm, apw On (14/11/07 10:51), Christoph Lameter didst pronounce: > On Wed, 14 Nov 2007, Mel Gorman wrote: > > > What was this based against? It didn't apply cleanly to 2.6.24-rc2 but it > > was fairly trivial to fix up the rejects. I tested on a few machines just > > to see what happened. The performance results for kernbench, dbench, tbench > > and aim9[1] and were generally good. > > It was against git current (hmm.... Maybe one or the other patchset was in > there too). Thanks for the evaluation. > Ok, makes sense. > > I'm still waiting on results to come in from a PPC64 machine but initially > > indicators are this is not a bad idea because you are not abandoning the > > idea of giving hot pages when requested, just altering a little how they > > are found. I suspect your main motivation is reducing the size of a per-cpu > > structure? > > Yes. I can put more pagesets into a single cacheline if the cpu_alloc > patchset is also applied. The major benefit will only be reached together > with another patchset. > Sounds promising. > > However, the opposite is also true. Currently, if someone is doing a lot of > > file-readahead, they regularly will go to the main allocator as the cold > > per-cpu lists get emptied. Now they will be able to take hot pages for a > > cold user instead which may be noticable in some cases. > > This means that they will be able to use large batchsizes. This may > actually improve that situation. > It would improve readahead but if there are active processes looking for hot pages, they could be impacted because readahead has used up hot pages. Basically, it could go either way but justifying that splitting the lists is the right thing to do in all situations is difficult to justify too. I think you could justify either approach with about the same amount of hand-waving and not be able to prove anything conclusively. > > However, in the event we cannot prove whether separate hot/cold lists are > > worth it or not, we might as well collapse them for smaller per-cpu structures. > > If we cannot prove that they are worth it then we should take them out. > > > > local_irq_save(flags); > > > - pcp = &THIS_CPU(zone->pageset)->pcp[cold]; > > > + pcp = &THIS_CPU(zone->pageset)->pcp; > > > __count_vm_event(PGFREE); > > > - list_add(&page->lru, &pcp->list); > > > + if (cold) > > > + list_add_tail(&page->lru, &pcp->list); > > > + else > > > + list_add(&page->lru, &pcp->list); > > > > There is scope here for a list function that adds to the head or tail depending > > on the value of a parameter. I know Andy has the prototype of such a function > > lying around so you may be able to share. > > I use a similar thing in SLUB. So if Andy has something then we may be > able to use it in both places. > > > > + pcp = &p->pcp; > > > pcp->count = 0; > > > pcp->high = 6 * batch; > > > pcp->batch = max(1UL, 1 * batch); > > > INIT_LIST_HEAD(&pcp->list); > > > - > > > - pcp = &p->pcp[1]; /* cold*/ > > > - pcp->count = 0; > > > - pcp->high = 2 * batch; > > > - pcp->batch = max(1UL, batch/2); > > > - INIT_LIST_HEAD(&pcp->list); > > > > Before - per-cpu high count was 8 * batch. After, it is 6 * batch. This > > may be noticable in some corner case involving page readahead requesting > > cold pages. > > Actually it is the other way around. Readahead used the 2 * batch size for > readahead. Now it uses 6 * batch. So the queue size is improved 3 fold. > Should be better. > I was referring to the size of the two lists combined rather than each list individually, but point taken. > > All in all, pretty straight-forward. I think it's worth wider testing at > > least. I think it'll be hard to show for sure whether this is having a > > negative performance impact or not but initial results look ok. > > Thanks for the thorough evaluation. > You're welcome. The PPC64 results came through as well. The difference between the two kernels is negligible. There are very slight improvements with your patch but it's in the noise. What I have seen so far is that things are no worse with your patch than without which is the important thing. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [RFC] Page allocator: Get rid of the list of cold pages 2007-11-15 10:40 ` Mel Gorman @ 2007-11-15 18:16 ` Christoph Lameter 0 siblings, 0 replies; 5+ messages in thread From: Christoph Lameter @ 2007-11-15 18:16 UTC (permalink / raw) To: Mel Gorman; +Cc: linux-mm, apw On Thu, 15 Nov 2007, Mel Gorman wrote: > It would improve readahead but if there are active processes looking > for hot pages, they could be impacted because readahead has used up hot > pages. Basically, it could go either way but justifying that splitting the > lists is the right thing to do in all situations is difficult to justify > too. I think you could justify either approach with about the same amount > of hand-waving and not be able to prove anything conclusively. Readahead is a rather slow process so its likely that the competing fast allocating process that is faulting in anonymous pages will replenish the pcp pages multiple times between accesses of readahead to the pcp pages. I guess this is some handwaving. However, if there is no conclusive proof either way then lets remove it. > You're welcome. The PPC64 results came through as well. The difference > between the two kernels is negligible. There are very slight > improvements with your patch but it's in the noise. > > What I have seen so far is that things are no worse with your patch than > without which is the important thing. Right. -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a> ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2007-11-15 18:16 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2007-11-13 4:42 [RFC] Page allocator: Get rid of the list of cold pages Christoph Lameter 2007-11-14 18:41 ` Mel Gorman 2007-11-14 18:51 ` Christoph Lameter 2007-11-15 10:40 ` Mel Gorman 2007-11-15 18:16 ` Christoph Lameter
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox