* [PATCH] page coloring for 2.4.17 kernel
@ 2002-01-14 1:46 Jason Papadopoulos
2002-01-14 22:46 ` Stephen C. Tweedie
0 siblings, 1 reply; 5+ messages in thread
From: Jason Papadopoulos @ 2002-01-14 1:46 UTC (permalink / raw)
To: linux-mm
Hello. Please be patient with this, my first post to linux-mm.
The included patch modifies the free list in the 2.4.17 kernel
to support round-robin page coloring. It seems to work okay
on an Alpha and speeds up a lot of number-crunching code I
have lying around (lmbench reports some higher bandwidths too).
The patch is a port of the 2.2.20 version that I recently posted
to the linux kernelmailing list.
I'd be grateful if the folks on this list can try out other
architectures and benchmarks. I've also been told that the code
which generates a random color needs to be more portable and cannot
rely on 64-bit data types. The coloring scheme is a little simplistic,
and it would be nice to use another scheme that is more efficient
but involves more extensive changes to the kernel.
Thanks in advance for your help.
jasonp
PS: Note that three of the "diff" command lines below wrap around
to the next line. Stupid Eudora...
-----------------------------------------------------------------
diff -ruN linux-2.4.17/drivers/char/Config.in linux-2.4.17a/drivers/char/Config.in
--- linux-2.4.17/drivers/char/Config.in Mon Nov 12 12:34:16 2001
+++ linux-2.4.17a/drivers/char/Config.in Sat Jan 12 09:35:03 2002
@@ -174,6 +174,8 @@
fi
endmenu
+tristate 'Page Coloring' CONFIG_PAGE_COLORING
+
if [ "$CONFIG_ARCH_NETWINDER" = "y" ]; then
tristate 'NetWinder thermometer support' CONFIG_DS1620
tristate 'NetWinder Button' CONFIG_NWBUTTON
diff -ruN linux-2.4.17/drivers/char/Makefile linux-2.4.17a/drivers/char/Makefile
--- linux-2.4.17/drivers/char/Makefile Sun Nov 11 13:09:32 2001
+++ linux-2.4.17a/drivers/char/Makefile Sat Jan 12 09:42:23 2002
@@ -240,6 +240,11 @@
obj-y += mwave/mwave.o
endif
+ifeq ($(CONFIG_PAGE_COLORING),m)
+ CONFIG_PAGE_COLORING_MODULE=y
+ obj-m += page_color.o
+endif
+
include $(TOPDIR)/Rules.make
fastdep:
diff -ruN linux-2.4.17/drivers/char/page_color.c linux-2.4.17a/drivers/char/page_color.c
--- linux-2.4.17/drivers/char/page_color.c Wed Dec 31 19:00:00 1969
+++ linux-2.4.17a/drivers/char/page_color.c Sun Jan 13 17:47:44 2002
@@ -0,0 +1,167 @@
+/*
+ * This module implements page coloring, a systematic way
+ * to get the most performance out of the expensive cache
+ * memory your computer has. At present the code is *only*
+ * to be built as a loadable kernel module.
+ *
+ * After building the kernel and rebooting, load the module
+ * and specify the cache size to use, like so:
+ *
+ * insmod <path to page_color.o> cache_size=X
+ *
+ * where X is the size of the largest cache your system has.
+ * For machines with three cache levels (Alpha 21164, AMD K6-III)
+ * this will be the size in bytes of the L3 cache, and for all
+ * others it will be the size of the L2 cache. If your system
+ * doesn't have at least L2 cache, fer cryin' out loud GET SOME!
+ * When specifying the cache size you can use 'K' or 'M' to signify
+ * kilobytes or megabytes, respectively. In any case, the cache
+ * size *must* be a power of two.
+ *
+ * insmod will create a module called 'page_color' which changes
+ * the way Linux allocates pages from the free list. It is always
+ * safe to start and stop the module while other processes are running.
+ *
+ * If linux is configured for a /proc filesystem, the module will
+ * also create /proc/page_color as a means of reporting statistics.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/version.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+#include <linux/mm.h>
+#include <linux/proc_fs.h>
+
+extern unsigned int page_miss_count;
+extern unsigned int page_hit_count;
+extern unsigned int page_colors;
+extern unsigned int page_alloc_count;
+extern struct list_head *page_color_table;
+
+void page_color_start(void);
+void page_color_stop(void);
+
+#if defined(__alpha__)
+#define CACHE_SIZE_GUESS (4*1024*1024)
+#elif defined(__i386__)
+#define CACHE_SIZE_GUESS (256*1024)
+#else
+#define CACHE_SIZE_GUESS (1*1024*1024)
+#endif
+
+#ifdef CONFIG_PROC_FS
+
+int page_color_getinfo(char *buf, char **start, off_t fpos, int length)
+{
+ int i, j, k, count, num_colors;
+ struct list_head *queue, *curr;
+ char *p = buf;
+
+ p += sprintf(p, "colors: %d\n", page_colors);
+ p += sprintf(p, "hits: %d\n", page_hit_count);
+ p += sprintf(p, "misses: %d\n", page_miss_count);
+ p += sprintf(p, "pages allocated: %d\n", page_alloc_count);
+
+ queue = page_color_table;
+ for(i=0; i<MAX_NR_ZONES; i++) {
+ num_colors = page_colors;
+
+ for(j=0; j<MAX_ORDER; j++) {
+ for(k=0; k<num_colors; k++) {
+ count = 0;
+ if (!queue->next)
+ goto getinfo_done;
+
+ list_for_each(curr, queue) {
+ count++;
+ }
+ p += sprintf(p, "%d ", count);
+ queue++;
+ }
+
+ p += sprintf(p, "\n");
+ if (num_colors > 1)
+ num_colors >>= 1;
+ }
+ }
+
+getinfo_done:
+ return p - buf;
+}
+
+#endif
+
+void cleanup_module(void)
+{
+ printk("page_color: terminating page coloring\n");
+
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("page_color", NULL);
+#endif
+
+ page_color_stop();
+ kfree(page_color_table);
+}
+
+static char *cache_size;
+MODULE_PARM(cache_size, "s");
+
+int init_module(void)
+{
+ unsigned int cache_size_int;
+ unsigned int alloc_size;
+
+ if (cache_size) {
+ cache_size_int = simple_strtoul(cache_size,
+ (char **)NULL, 10);
+ if ( strchr(cache_size, 'M') ||
+ strchr(cache_size, 'm') )
+ cache_size_int *= 1024*1024;
+
+ if ( strchr(cache_size, 'K') ||
+ strchr(cache_size, 'k') )
+ cache_size_int *= 1024;
+ }
+ else {
+ cache_size_int = CACHE_SIZE_GUESS;
+ }
+
+ if( (-cache_size_int & cache_size_int) != cache_size_int ) {
+ printk ("page_color: cache size is not a power of two\n");
+ return 1;
+ }
+
+ page_colors = cache_size_int / PAGE_SIZE;
+ page_hit_count = 0;
+ page_miss_count = 0;
+ page_alloc_count = 0;
+ alloc_size = MAX_NR_ZONES * sizeof(struct list_head) *
+ (2 * page_colors + MAX_ORDER);
+ page_color_table = (struct list_head *)kmalloc(alloc_size, GFP_KERNEL);
+ if (!page_color_table) {
+ printk("page_color: memory allocation failed\n");
+ return 1;
+ }
+ memset(page_color_table, 0, alloc_size);
+
+ page_color_start();
+
+#ifdef CONFIG_PROC_FS
+ create_proc_info_entry("page_color", 0, NULL, page_color_getinfo);
+#endif
+
+ printk("page_color: starting with %d colors\n", page_colors );
+ return 0;
+}
diff -ruN linux-2.4.17/include/linux/mmzone.h linux-2.4.17a/include/linux/mmzone.h
--- linux-2.4.17/include/linux/mmzone.h Sat Jan 12 10:21:34 2002
+++ linux-2.4.17a/include/linux/mmzone.h Sat Jan 12 23:34:28 2002
@@ -21,6 +21,12 @@
typedef struct free_area_struct {
struct list_head free_list;
unsigned long *map;
+
+#ifdef CONFIG_PAGE_COLORING_MODULE
+ unsigned long count;
+ struct list_head *color_list;
+#endif
+
} free_area_t;
struct pglist_data;
diff -ruN linux-2.4.17/include/linux/sched.h linux-2.4.17a/include/linux/sched.h
--- linux-2.4.17/include/linux/sched.h Sat Jan 12 10:21:34 2002
+++ linux-2.4.17a/include/linux/sched.h Sat Jan 12 23:34:28 2002
@@ -410,6 +410,11 @@
/* journalling filesystem info */
void *journal_info;
+
+#ifdef CONFIG_PAGE_COLORING_MODULE
+ unsigned int color_init;
+ unsigned int target_color;
+#endif
};
/*
diff -ruN linux-2.4.17/kernel/ksyms.c linux-2.4.17a/kernel/ksyms.c
--- linux-2.4.17/kernel/ksyms.c Fri Dec 28 21:51:25 2001
+++ linux-2.4.17a/kernel/ksyms.c Sun Jan 13 17:46:28 2002
@@ -559,3 +559,21 @@
EXPORT_SYMBOL(tasklist_lock);
EXPORT_SYMBOL(pidhash);
+
+#ifdef CONFIG_PAGE_COLORING_MODULE
+extern unsigned int page_miss_count;
+extern unsigned int page_hit_count;
+extern unsigned int page_alloc_count;
+extern unsigned int page_colors;
+extern struct list_head *page_color_table;
+void page_color_start(void);
+void page_color_stop(void);
+
+EXPORT_SYMBOL_NOVERS(page_miss_count);
+EXPORT_SYMBOL_NOVERS(page_hit_count);
+EXPORT_SYMBOL_NOVERS(page_alloc_count);
+EXPORT_SYMBOL_NOVERS(page_colors);
+EXPORT_SYMBOL_NOVERS(page_color_table);
+EXPORT_SYMBOL_NOVERS(page_color_start);
+EXPORT_SYMBOL_NOVERS(page_color_stop);
+#endif
diff -ruN linux-2.4.17/mm/page_alloc.c linux-2.4.17a/mm/page_alloc.c
--- linux-2.4.17/mm/page_alloc.c Mon Nov 19 19:35:40 2001
+++ linux-2.4.17a/mm/page_alloc.c Sun Jan 13 18:46:31 2002
@@ -56,6 +56,288 @@
*/
#define BAD_RANGE(zone,x) (((zone) != (x)->zone) || (((x)-mem_map) < (zone)->zone_start_mapnr) || (((x)-mem_map) >= (zone)->zone_start_mapnr+(zone)->size))
+
+#ifdef CONFIG_PAGE_COLORING_MODULE
+
+#ifdef CONFIG_DISCONTIGMEM
+#error "Page coloring implementation cannot handle NUMA architectures"
+#endif
+
+unsigned int page_coloring = 0;
+unsigned int page_miss_count;
+unsigned int page_hit_count;
+unsigned int page_alloc_count;
+unsigned int page_colors = 0;
+struct list_head *page_color_table;
+
+#define COLOR(x) ((x) & cache_mask)
+
+void page_color_start(void)
+{
+ /* Empty the free list in each zone. For each
+ queue in the free list, transfer the entries
+ in the queue over to another set of queues
+ (the destination queue is determined by the
+ color of each entry). */
+
+ int i, j, k;
+ unsigned int num_colors, cache_mask;
+ unsigned long index;
+ unsigned long flags[MAX_NR_ZONES];
+ struct list_head *color_list_start, *head, *curr;
+ free_area_t *area;
+ struct page *page;
+ zone_t *zone;
+ pg_data_t *pgdata;
+
+ cache_mask = page_colors - 1;
+ color_list_start = page_color_table;
+ pgdata = &contig_page_data;
+
+ /* Stop all allocation of free pages while the
+ reshuffling is taking place */
+
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone = pgdata->node_zones + i;
+ if (zone->size)
+ spin_lock_irqsave(&zone->lock, flags[i]);
+ }
+
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ num_colors = page_colors;
+ zone = pgdata->node_zones + i;
+
+ if (!zone->size)
+ continue;
+
+ for(j = 0; j < MAX_ORDER; j++) {
+ area = zone->free_area + j;
+ area->count = 0;
+ area->color_list = color_list_start;
+ head = &area->free_list;
+ curr = memlist_next(head);
+
+ for(k = 0; k < num_colors; k++)
+ memlist_init(color_list_start + k);
+
+ while(curr != head) {
+ page = memlist_entry(curr, struct page, list);
+ memlist_del(curr);
+ index = page - zone->zone_mem_map;
+ memlist_add_head(curr, area->color_list +
+ (COLOR(index) >> j));
+ area->count++;
+ curr = memlist_next(head);
+ }
+
+ color_list_start += num_colors;
+ if (num_colors > 1)
+ num_colors >>= 1;
+ }
+ }
+
+ /* Allocation of free pages can continue */
+
+ page_coloring = 1;
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone = pgdata->node_zones + i;
+ if (zone->size)
+ spin_unlock_irqrestore(&zone->lock, flags[i]);
+ }
+}
+
+void page_color_stop(void)
+{
+ /* Reverse the operation of page_color_start(). */
+
+ int i, j, k;
+ unsigned int num_colors;
+ unsigned long flags[MAX_NR_ZONES];
+ struct list_head *head, *curr;
+ free_area_t *area;
+ zone_t *zone;
+ pg_data_t *pgdata;
+
+ pgdata = &contig_page_data;
+
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone = pgdata->node_zones + i;
+ if (zone->size)
+ spin_lock_irqsave(&zone->lock, flags[i]);
+ }
+
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ num_colors = page_colors;
+ zone = pgdata->node_zones + i;
+
+ if (!zone->size)
+ continue;
+
+ for(j = 0; j<MAX_ORDER; j++) {
+ area = zone->free_area + j;
+ area->count = 0;
+
+ for(k = 0; k < num_colors; k++) {
+ head = area->color_list + k;
+ curr = memlist_next(head);
+ while(curr != head) {
+ memlist_del(curr);
+ memlist_add_head(curr,
+ &area->free_list);
+ curr = memlist_next(head);
+ }
+ }
+
+ if (num_colors > 1)
+ num_colors >>= 1;
+ }
+ }
+
+ page_coloring = 0;
+ for(i = 0; i < MAX_NR_ZONES; i++) {
+ zone = pgdata->node_zones + i;
+ if (zone->size)
+ spin_unlock_irqrestore(&zone->lock, flags[i]);
+ }
+}
+
+unsigned int rand_carry = 0x01234567;
+unsigned int rand_seed = 0x89abcdef;
+
+#define MULT 2131995753
+
+static inline unsigned int get_rand(void)
+{
+ /* A multiply-with-carry random number generator by
+ George Marsaglia. The period is about 1<<63, and
+ each call to get_rand() returns 32 random bits */
+
+ unsigned long long prod;
+
+ prod = (unsigned long long)rand_seed *
+ (unsigned long long)MULT +
+ (unsigned long long)rand_carry;
+ rand_seed = (unsigned int)prod;
+ rand_carry = (unsigned int)(prod >> 32);
+
+ return rand_seed;
+}
+
+static struct page *alloc_pages_by_color(zone_t *zone, unsigned int order)
+{
+ unsigned int i;
+ unsigned int mask, color;
+ unsigned long page_idx;
+ free_area_t *area;
+ struct list_head *curr, *head;
+ struct page *page;
+ unsigned int cache_mask = page_colors - 1;
+
+ /* If this process hasn't asked for free pages
+ before, assign it a random starting color. */
+
+ if (current->color_init != current->pid) {
+ current->color_init = current->pid;
+ current->target_color = COLOR(get_rand());
+ }
+
+ /* Round the target color to look for up to the
+ next 1<<order boundary. */
+
+ mask = (1 << order) - 1;
+ color = current->target_color;
+ color = COLOR((color + mask) & ~mask);
+
+ /* Find out early if there are no free pages at all. */
+
+ for(i = order; i < MAX_ORDER; i++)
+ if (zone->free_area[i].count)
+ break;
+
+ if (i == MAX_ORDER)
+ return NULL;
+
+ /* The memory allocation is guaranteed to succeed
+ (although we may not find the correct color) */
+
+ while(1) {
+ area = zone->free_area + order;
+ for(i = order; i < MAX_ORDER; i++) {
+ head = area->color_list + (color >> i);
+ curr = memlist_next(head);
+ if (curr != head)
+ goto alloc_page_done;
+ area++;
+ }
+
+ page_miss_count++;
+ color = COLOR(color + (1<<order));
+ }
+
+alloc_page_done:
+ page = memlist_entry(curr, struct page, list);
+ if (BAD_RANGE(zone,page))
+ BUG();
+
+ memlist_del(curr);
+ page_idx = page - zone->zone_mem_map;
+ zone->free_pages -= 1 << order;
+ area->count--;
+
+ if (i < (MAX_ORDER - 1))
+ __change_bit(page_idx >> (1+i), area->map);
+
+ while (i > order) {
+
+ /* Return 1<<order contiguous pages out of
+ the 1<<i available now. Without page coloring
+ it would suffice to keep chopping the number of
+ pages in half and return the last 1<<order of
+ them. Here, the bottom bits of the index to
+ return must match the target color. We have to
+ keep chopping 1<<i in half but we can
+ only ignore the halves that don't match the
+ bit pattern of the target color. */
+
+ i--;
+ area--;
+ mask = 1 << i;
+ area->count++;
+ __change_bit(page_idx >> (1+i), area->map);
+ if (color & mask) {
+ if (BAD_RANGE(zone,page + mask))
+ BUG();
+
+ memlist_add_head(&page->list, area->color_list +
+ (COLOR(page_idx) >> i));
+ page_idx += mask;
+ page += mask;
+ }
+ else {
+ memlist_add_head(&(page + mask)->list,
+ area->color_list +
+ (COLOR(page_idx + mask) >> i));
+ }
+ }
+
+ set_page_count(page, 1);
+
+ if (BAD_RANGE(zone,page))
+ BUG();
+ if (PageLRU(page))
+ BUG();
+ if (PageActive(page))
+ BUG();
+
+ current->target_color = COLOR(color + (1<<order));
+ page_hit_count++;
+ page_alloc_count += 1 << order;
+ return page;
+}
+
+#endif /* CONFIG_PAGE_COLORING_MODULE */
+
+
/*
* Buddy system. Hairy. You really aren't expected to understand this
*
@@ -125,12 +407,26 @@
if (BAD_RANGE(zone,buddy2))
BUG();
+#ifdef CONFIG_PAGE_COLORING_MODULE
+ area->count--;
+ order++;
+#endif
memlist_del(&buddy1->list);
mask <<= 1;
area++;
index >>= 1;
page_idx &= mask;
}
+
+#ifdef CONFIG_PAGE_COLORING_MODULE
+ if (page_coloring == 1) {
+ unsigned long cache_mask = page_colors - 1;
+ memlist_add_head(&(base + page_idx)->list,
+ area->color_list + (COLOR(page_idx) >> order));
+ spin_unlock_irqrestore(&zone->lock, flags);
+ return;
+ }
+#endif
memlist_add_head(&(base + page_idx)->list, &area->free_list);
spin_unlock_irqrestore(&zone->lock, flags);
@@ -181,6 +477,15 @@
struct page *page;
spin_lock_irqsave(&zone->lock, flags);
+
+#ifdef CONFIG_PAGE_COLORING_MODULE
+ if (page_coloring == 1) {
+ page = alloc_pages_by_color(zone, order);
+ spin_unlock_irqrestore(&zone->lock, flags);
+ return page;
+ }
+#endif
+
do {
head = &area->free_list;
curr = memlist_next(head);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] page coloring for 2.4.17 kernel
2002-01-14 1:46 [PATCH] page coloring for 2.4.17 kernel Jason Papadopoulos
@ 2002-01-14 22:46 ` Stephen C. Tweedie
2002-01-15 4:55 ` Jason Papadopoulos
0 siblings, 1 reply; 5+ messages in thread
From: Stephen C. Tweedie @ 2002-01-14 22:46 UTC (permalink / raw)
To: Jason Papadopoulos; +Cc: linux-mm
Hi,
On Sun, Jan 13, 2002 at 08:46:10PM -0500, Jason Papadopoulos wrote:
> Hello. Please be patient with this, my first post to linux-mm.
> The included patch modifies the free list in the 2.4.17 kernel
> to support round-robin page coloring. It seems to work okay
> on an Alpha and speeds up a lot of number-crunching code I
> have lying around (lmbench reports some higher bandwidths too).
> The patch is a port of the 2.2.20 version that I recently posted
> to the linux kernelmailing list.
Do you have numbers to show the sort of performance difference it
makes?
Cheers,
Stephen
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] page coloring for 2.4.17 kernel
2002-01-14 22:46 ` Stephen C. Tweedie
@ 2002-01-15 4:55 ` Jason Papadopoulos
2002-01-15 11:22 ` Stephen C. Tweedie
0 siblings, 1 reply; 5+ messages in thread
From: Jason Papadopoulos @ 2002-01-15 4:55 UTC (permalink / raw)
To: Stephen C. Tweedie; +Cc: linux-mm
At 10:46 PM 1/14/02 +0000, you wrote:
>> Hello. Please be patient with this, my first post to linux-mm.
>> The included patch modifies the free list in the 2.4.17 kernel
>> to support round-robin page coloring. It seems to work okay
>> on an Alpha and speeds up a lot of number-crunching code I
>> have lying around (lmbench reports some higher bandwidths too).
>> The patch is a port of the 2.2.20 version that I recently posted
>> to the linux kernelmailing list.
>
>Do you have numbers to show the sort of performance difference it
>makes?
It's a little difficult to tell with lmbench, since results can vary
slightly from run to run. The following numbers seem to be fairly
repeatable (DS10 Alphaserver with 466MHz ev6 processor and 2MB L2 cache).
The 2.2.20 numbers are with the original page coloring patch, the first
2.4.17 numbers are for the unpatched kernel and the second set uses
page coloring.
*Local* Communication latencies in microseconds - smaller is better
-------------------------------------------------------------------
Host OS 2p/0K Pipe AF UDP RPC/ TCP RPC/ TCP
ctxsw UNIX UDP TCP conn
--------- ------------- ----- ----- ---- ----- ----- ----- ----- ----
alpha-lin Linux 2.2.20 1 6 15 22 39
alpha-lin Linux 2.4.17 1 8 20 38 64 212
alpha-lin Linux 2.4.17 1 11 19 28 37 163
File & VM system latencies in microseconds - smaller is better
--------------------------------------------------------------
Host OS 0K File 10K File Mmap Prot Page
Create Delete Create Delete Latency Fault Fault
--------- ------------- ------ ------ ------ ------ ------- ----- -----
alpha-lin Linux 2.2.20 6 1 13 1 9699 1 0.7K
alpha-lin Linux 2.4.17 3 0 9 2 619 0 0.0K
alpha-lin Linux 2.4.17 3 0 10 2 624 0 0.0K
*Local* Communication bandwidths in MB/s - bigger is better
-----------------------------------------------------------
Host OS Pipe AF TCP File Mmap Bcopy Bcopy Mem Mem
UNIX reread reread (libc) (hand) read write
--------- ------------- ---- ---- ---- ------ ------ ------ ------ ---- -----
alpha-lin Linux 2.2.20 261 255 -1 189 370 262 209 370 330
alpha-lin Linux 2.4.17 373 327 137 203 370 268 208 371 335
alpha-lin Linux 2.4.17 362 371 196 259 371 262 203 370 332
Memory latencies in nanoseconds - smaller is better
(WARNING - may not be correct, check graphs)
---------------------------------------------------
Host OS Mhz L1 $ L2 $ Main mem Guesses
--------- ------------- --- ---- ---- -------- -------
alpha-lin Linux 2.2.20 461 6 32 199
alpha-lin Linux 2.4.17 462 6 75 199
alpha-lin Linux 2.4.17 462 6 32 199
For computational workloads whose working set fit in L2 it makes a
huge difference (I've seen 30% speedups in FFT benchmarks). Kernel
compiles also seem to go 1-2% faster. I also have a memory latency
tester written in assembly language for the ev6, and without the
patch it seems you can only get high bandwidth out of L2 for working
sets of about 256k in size. With page coloring turned on you get
constant high memory bandwidth all the way out to the full L2 cache
size.
It would be interesting to see how i386 machines benefit from page
coloring, since they have very fast but somewhat tiny L2 caches with
high associativity.
jasonp
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] page coloring for 2.4.17 kernel
2002-01-15 4:55 ` Jason Papadopoulos
@ 2002-01-15 11:22 ` Stephen C. Tweedie
0 siblings, 0 replies; 5+ messages in thread
From: Stephen C. Tweedie @ 2002-01-15 11:22 UTC (permalink / raw)
To: Jason Papadopoulos; +Cc: Stephen C. Tweedie, linux-mm
Hi,
On Mon, Jan 14, 2002 at 11:55:55PM -0500, Jason Papadopoulos wrote:
> At 10:46 PM 1/14/02 +0000, you wrote:
>
> >> Hello. Please be patient with this, my first post to linux-mm.
> >> The included patch modifies the free list in the 2.4.17 kernel
> >> to support round-robin page coloring. It seems to work okay
> >> on an Alpha and speeds up a lot of number-crunching code I
> >> have lying around (lmbench reports some higher bandwidths too).
> >> The patch is a port of the 2.2.20 version that I recently posted
> >> to the linux kernelmailing list.
> >
> >Do you have numbers to show the sort of performance difference it
> >makes?
>
> It's a little difficult to tell with lmbench, since results can vary
> slightly from run to run.
That's an important aspect. Page colouring can often drastically
reduce the variance in run times for cache-intensive tasks. Average
and variance are both very much worth reporting.
Cheers,
Stephen
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] page coloring for 2.4.17 kernel
@ 2002-01-15 6:16 Jason Papadopoulos
0 siblings, 0 replies; 5+ messages in thread
From: Jason Papadopoulos @ 2002-01-15 6:16 UTC (permalink / raw)
To: linux-mm
I know it's bad form to respond to my own post, but I've done some
more careful lmbench runs and many of the glaring differences in the
2.4.17 kernel with and without page coloring seem to damp out. The
AF UNIX and File reread bandwidths are consistently better across multiple
runs, though.
Sorry about that,
jasonp
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2002-01-15 11:22 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2002-01-14 1:46 [PATCH] page coloring for 2.4.17 kernel Jason Papadopoulos
2002-01-14 22:46 ` Stephen C. Tweedie
2002-01-15 4:55 ` Jason Papadopoulos
2002-01-15 11:22 ` Stephen C. Tweedie
2002-01-15 6:16 Jason Papadopoulos
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox