linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Pageset Localization V2
@ 2005-03-30  5:51 Christoph Lameter
  2005-03-30 11:14 ` Christoph Hellwig
  2005-03-30 13:40 ` Matthew Wilcox
  0 siblings, 2 replies; 10+ messages in thread
From: Christoph Lameter @ 2005-03-30  5:51 UTC (permalink / raw)
  To: Manfred Spraul, Andrew Morton; +Cc: linux-kernel, linux-ia64, linux-mm, shai

This patch modifies the way pagesets in struct zone are allocated. It relocates
the pagesets contained in a zone for each cpu to the node that is nearest to
the cpu instead keeping the pagesets in the (possibly remote) target zone.
This means that the operations to manage caches of pages on remote zones can
be done with information available in the local zone.

The patch depends on the API changes to the slab allocator posted before
this patch.

AIM7 benchmark on a 32 CPU SMP system:

w/o patches:
Tasks    jobs/min  jti  jobs/min/task      real       cpu
    1      484.68  100       484.6769     12.01      1.97   Fri Mar 25 11:01:42 2005
  100    27140.46   89       271.4046     21.44    148.71   Fri Mar 25 11:02:04 2005
  200    30792.02   82       153.9601     37.80    296.72   Fri Mar 25 11:02:42 2005
  300    32209.27   81       107.3642     54.21    451.34   Fri Mar 25 11:03:37 2005
  400    34962.83   78        87.4071     66.59    588.97   Fri Mar 25 11:04:44 2005
  500    31676.92   75        63.3538     91.87    742.71   Fri Mar 25 11:06:16 2005
  600    36032.69   73        60.0545     96.91    885.44   Fri Mar 25 11:07:54 2005
  700    35540.43   77        50.7720    114.63   1024.28   Fri Mar 25 11:09:49 2005
  800    33906.70   74        42.3834    137.32   1181.65   Fri Mar 25 11:12:06 2005
  900    34120.67   73        37.9119    153.51   1325.26   Fri Mar 25 11:14:41 2005
 1000    34802.37   74        34.8024    167.23   1465.26   Fri Mar 25 11:17:28 2005

with Slab API changes and pageset patch:

Tasks    jobs/min  jti  jobs/min/task      real       cpu
    1      485.00  100       485.0000     12.00      1.96   Fri Mar 25 11:46:18 2005
  100    28000.96   89       280.0096     20.79    150.45   Fri Mar 25 11:46:39 2005
  200    32285.80   79       161.4290     36.05    293.37   Fri Mar 25 11:47:16 2005
  300    40424.15   84       134.7472     43.19    438.42   Fri Mar 25 11:47:59 2005
  400    39155.01   79        97.8875     59.46    590.05   Fri Mar 25 11:48:59 2005
  500    37881.25   82        75.7625     76.82    730.19   Fri Mar 25 11:50:16 2005
  600    39083.14   78        65.1386     89.35    872.79   Fri Mar 25 11:51:46 2005
  700    38627.83   77        55.1826    105.47   1022.46   Fri Mar 25 11:53:32 2005
  800    39631.94   78        49.5399    117.48   1169.94   Fri Mar 25 11:55:30 2005
  900    36903.70   79        41.0041    141.94   1310.78   Fri Mar 25 11:57:53 2005
 1000    36201.23   77        36.2012    160.77   1458.31   Fri Mar 25 12:00:34 2005

The major improvement is in the mid range when running 100-600 tasks. For 1 task
there is barely any improvement since most data will be locally allocated. In the high
range other factors seem to become important.

Patch against 2.6.11.6-bk3

Signed-off-by: Christoph Lameter <christoph@lameter.com>
Signed-off-by: Shobhit Dayal <shobhit@calsoftinc.com>
Signed-off-by: Shai Fultheim <Shai@Scalex86.org>

Index: linux-2.6.11/drivers/base/node.c
===================================================================
--- linux-2.6.11.orig/drivers/base/node.c	2005-03-21 13:18:06.000000000 -0800
+++ linux-2.6.11/drivers/base/node.c	2005-03-21 13:22:06.000000000 -0800
@@ -87,7 +87,7 @@ static ssize_t node_read_numastat(struct
 	for (i = 0; i < MAX_NR_ZONES; i++) {
 		struct zone *z = &pg->node_zones[i];
 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
-			struct per_cpu_pageset *ps = &z->pageset[cpu];
+			struct per_cpu_pageset *ps = z->pageset[cpu];
 			numa_hit += ps->numa_hit;
 			numa_miss += ps->numa_miss;
 			numa_foreign += ps->numa_foreign;
Index: linux-2.6.11/include/linux/mm.h
===================================================================
--- linux-2.6.11.orig/include/linux/mm.h	2005-03-21 13:18:06.000000000 -0800
+++ linux-2.6.11/include/linux/mm.h	2005-03-21 13:22:06.000000000 -0800
@@ -691,6 +691,7 @@ extern void mem_init(void);
 extern void show_mem(void);
 extern void si_meminfo(struct sysinfo * val);
 extern void si_meminfo_node(struct sysinfo *val, int nid);
+extern void setup_per_cpu_pageset(void);

 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
Index: linux-2.6.11/include/linux/mmzone.h
===================================================================
--- linux-2.6.11.orig/include/linux/mmzone.h	2005-03-21 13:21:59.000000000 -0800
+++ linux-2.6.11/include/linux/mmzone.h	2005-03-21 13:22:06.000000000 -0800
@@ -122,7 +122,7 @@ struct zone {
 	 */
 	unsigned long		lowmem_reserve[MAX_NR_ZONES];

-	struct per_cpu_pageset	pageset[NR_CPUS];
+	struct per_cpu_pageset	*pageset[NR_CPUS];

 	/*
 	 * free areas of different sizes
Index: linux-2.6.11/init/main.c
===================================================================
--- linux-2.6.11.orig/init/main.c	2005-03-21 13:18:06.000000000 -0800
+++ linux-2.6.11/init/main.c	2005-03-21 13:22:06.000000000 -0800
@@ -490,6 +490,7 @@ asmlinkage void __init start_kernel(void
 	vfs_caches_init_early();
 	mem_init();
 	kmem_cache_init();
+	setup_per_cpu_pageset();
 	numa_policy_init();
 	if (late_time_init)
 		late_time_init();
Index: linux-2.6.11/mm/mempolicy.c
===================================================================
--- linux-2.6.11.orig/mm/mempolicy.c	2005-03-21 13:18:06.000000000 -0800
+++ linux-2.6.11/mm/mempolicy.c	2005-03-21 13:22:06.000000000 -0800
@@ -721,7 +721,7 @@ static struct page *alloc_page_interleav
 	zl = NODE_DATA(nid)->node_zonelists + (gfp & GFP_ZONEMASK);
 	page = __alloc_pages(gfp, order, zl);
 	if (page && page_zone(page) == zl->zones[0]) {
-		zl->zones[0]->pageset[get_cpu()].interleave_hit++;
+		zl->zones[0]->pageset[get_cpu()]->interleave_hit++;
 		put_cpu();
 	}
 	return page;
Index: linux-2.6.11/mm/page_alloc.c
===================================================================
--- linux-2.6.11.orig/mm/page_alloc.c	2005-03-21 13:18:06.000000000 -0800
+++ linux-2.6.11/mm/page_alloc.c	2005-03-21 13:22:06.000000000 -0800
@@ -68,6 +68,7 @@ EXPORT_SYMBOL(nr_swap_pages);
  */
 struct zone *zone_table[1 << (ZONES_SHIFT + NODES_SHIFT)];
 EXPORT_SYMBOL(zone_table);
+struct per_cpu_pageset pageset_table[MAX_NR_ZONES*MAX_NUMNODES*NR_CPUS] __initdata;

 static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
@@ -518,7 +519,7 @@ static void __drain_pages(unsigned int c
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset;

-		pset = &zone->pageset[cpu];
+		pset = zone->pageset[cpu];
 		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
 			struct per_cpu_pages *pcp;

@@ -581,12 +582,12 @@ static void zone_statistics(struct zonel

 	local_irq_save(flags);
 	cpu = smp_processor_id();
-	p = &z->pageset[cpu];
+	p = z->pageset[cpu];
 	if (pg == orig) {
-		z->pageset[cpu].numa_hit++;
+		z->pageset[cpu]->numa_hit++;
 	} else {
 		p->numa_miss++;
-		zonelist->zones[0]->pageset[cpu].numa_foreign++;
+		zonelist->zones[0]->pageset[cpu]->numa_foreign++;
 	}
 	if (pg == NODE_DATA(numa_node_id()))
 		p->local_node++;
@@ -613,7 +614,7 @@ static void fastcall free_hot_cold_page(
 	if (PageAnon(page))
 		page->mapping = NULL;
 	free_pages_check(__FUNCTION__, page);
-	pcp = &zone->pageset[get_cpu()].pcp[cold];
+	pcp = &zone->pageset[get_cpu()]->pcp[cold];
 	local_irq_save(flags);
 	if (pcp->count >= pcp->high)
 		pcp->count -= free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
@@ -657,7 +658,7 @@ buffered_rmqueue(struct zone *zone, int
 	if (order == 0) {
 		struct per_cpu_pages *pcp;

-		pcp = &zone->pageset[get_cpu()].pcp[cold];
+		pcp = &zone->pageset[get_cpu()]->pcp[cold];
 		local_irq_save(flags);
 		if (pcp->count <= pcp->low)
 			pcp->count += rmqueue_bulk(zone, 0,
@@ -1228,7 +1229,7 @@ void show_free_areas(void)
 			if (!cpu_possible(cpu))
 				continue;

-			pageset = zone->pageset + cpu;
+			pageset = zone->pageset[cpu];

 			for (temperature = 0; temperature < 2; temperature++)
 				printk("cpu %d %s: low %d, high %d, batch %d\n",
@@ -1612,6 +1613,122 @@ void zone_init_free_lists(struct pglist_
 	memmap_init_zone((size), (nid), (zone), (start_pfn))
 #endif

+#define MAKE_LIST(list, nlist)  \
+	do {    \
+		if(list_empty(&list))      \
+			INIT_LIST_HEAD(nlist);          \
+		else {  nlist->next->prev = nlist;      \
+			nlist->prev->next = nlist;      \
+		}                                       \
+	}while(0)
+
+/*
+ * Dynamicaly allocate memory for the
+ * per cpu pageset array in struct zone.
+ */
+static inline int __devinit process_zones(int cpu)
+{
+	struct zone *zone, *dzone;
+
+	for_each_zone(zone) {
+		struct per_cpu_pageset *npageset = NULL;
+
+		npageset = kmalloc_node(sizeof(struct per_cpu_pageset),
+					 GFP_KERNEL, cpu_to_node(cpu));
+		if(!npageset) {
+			zone->pageset[cpu] = NULL;
+			goto bad;
+		}
+
+		if(zone->pageset[cpu]) {
+			memcpy(npageset, zone->pageset[cpu], sizeof(struct per_cpu_pageset));
+			MAKE_LIST(zone->pageset[cpu]->pcp[0].list, (&npageset->pcp[0].list));
+			MAKE_LIST(zone->pageset[cpu]->pcp[1].list, (&npageset->pcp[1].list));
+		}
+		else {
+			struct per_cpu_pages *pcp;
+			unsigned long batch;
+
+			batch = zone->present_pages / 1024;
+			if (batch * PAGE_SIZE > 256 * 1024)
+				batch = (256 * 1024) / PAGE_SIZE;
+			batch /= 4;             /* We effectively *= 4 below */
+			if (batch < 1)
+				batch = 1;
+
+			pcp = &npageset->pcp[0];		/* hot */
+			pcp->count = 0;
+			pcp->low = 2 * batch;
+			pcp->high = 6 * batch;
+			pcp->batch = 1 * batch;
+			INIT_LIST_HEAD(&pcp->list);
+
+			pcp = &npageset->pcp[1];		/* cold*/
+			pcp->count = 0;
+			pcp->low = 0;
+			pcp->high = 2 * batch;
+			pcp->batch = 1 * batch;
+			INIT_LIST_HEAD(&pcp->list);
+		}
+		zone->pageset[cpu] = npageset;
+	}
+
+	return 0;
+bad:
+	for_each_zone(dzone) {
+		if(dzone == zone)
+			break;
+		kfree(dzone->pageset[cpu]);
+		dzone->pageset[cpu] = NULL;
+	}
+	return -ENOBUFS;
+}
+
+static int __devinit pageset_cpuup_callback(struct notifier_block *nfb,
+		unsigned long action,
+		void *hcpu)
+{
+	int cpu = (long)hcpu;
+
+	switch(action) {
+		case CPU_UP_PREPARE:
+			if(process_zones(cpu))
+				goto bad;
+			break;
+#ifdef CONFIG_HOTPLUG_CPU
+		case CPU_DEAD:
+			{
+				struct zone *zone;
+				for_each_zone(zone) {
+					struct per_cpu_pageset *pset;
+
+					pset = zone->pageset[cpu];
+					zone->pageset[cpu] = NULL;
+
+					kfree(pset);
+				}
+			}
+			break;
+#endif
+		default:
+			break;
+	}
+	return NOTIFY_OK;
+bad:
+	return NOTIFY_BAD;
+}
+struct notifier_block pageset_notifier = { &pageset_cpuup_callback, NULL, 0 };
+
+void __init setup_per_cpu_pageset()
+{
+	/*Iintialize per_cpu_pageset for cpu 0.
+	  A cpuup callback will do this for every cpu
+	  as it comes online
+	 */
+	BUG_ON(process_zones(smp_processor_id()));
+	register_cpu_notifier(&pageset_notifier);
+}
+
 /*
  * Set up the zone data structures:
  *   - mark all pages reserved
@@ -1670,15 +1787,17 @@ static void __init free_area_init_core(s

 		for (cpu = 0; cpu < NR_CPUS; cpu++) {
 			struct per_cpu_pages *pcp;
+			struct per_cpu_pageset *pgset = &pageset_table[nid*MAX_NR_ZONES*NR_CPUS + (j * NR_CPUS) + cpu];

-			pcp = &zone->pageset[cpu].pcp[0];	/* hot */
+			zone->pageset[cpu] = pgset;
+			pcp = &pgset->pcp[0];			/* hot */
 			pcp->count = 0;
 			pcp->low = 2 * batch;
 			pcp->high = 6 * batch;
 			pcp->batch = 1 * batch;
 			INIT_LIST_HEAD(&pcp->list);

-			pcp = &zone->pageset[cpu].pcp[1];	/* cold */
+			pcp = &pgset->pcp[1];			/* cold */
 			pcp->count = 0;
 			pcp->low = 0;
 			pcp->high = 2 * batch;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Pageset Localization V2
  2005-03-30  5:51 [PATCH] Pageset Localization V2 Christoph Lameter
@ 2005-03-30 11:14 ` Christoph Hellwig
       [not found]   ` <bab4333005033003295f487e3d@mail.gmail.com>
  2005-03-30 13:40 ` Matthew Wilcox
  1 sibling, 1 reply; 10+ messages in thread
From: Christoph Hellwig @ 2005-03-30 11:14 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Manfred Spraul, Andrew Morton, linux-kernel, linux-ia64, linux-mm, shai

> +#define MAKE_LIST(list, nlist)  \
> +	do {    \
> +		if(list_empty(&list))      \
> +			INIT_LIST_HEAD(nlist);          \
> +		else {  nlist->next->prev = nlist;      \
> +			nlist->prev->next = nlist;      \
> +		}                                       \
> +	}while(0)

This is horrible.  Where are the nlist pointers supposed to point to?
What's so magic you need the INIT_LIST_HEAD only conditionally?

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fwd: [PATCH] Pageset Localization V2
       [not found]   ` <bab4333005033003295f487e3d@mail.gmail.com>
@ 2005-03-30 13:06     ` shobhit dayal
  2005-03-31 14:32       ` Christoph Hellwig
  0 siblings, 1 reply; 10+ messages in thread
From: shobhit dayal @ 2005-03-30 13:06 UTC (permalink / raw)
  To: hch
  Cc: christoph, manfred, akpm, linux-kernel, linux-ia64, linux-mm,
	Shai Fultheim

The goal here is to replace the head of a existing list pointed to by
'list' with a new head pointed to by 'nlist'. 
First there is a memcpy that copies the contents of list to nlist then
this macro is called.
The macro makes sure that if the old head was empty then INIT_LIST_HEAD
the 'nlist', if not then make sure that the nodes before and after the
head now correclty point to nlist instead of list.

regards
shobhit


> ---------- Forwarded message ----------
> From: Christoph Hellwig <hch@infradead.org>
> Date: Wed, 30 Mar 2005 12:14:39 +0100
> Subject: Re: [PATCH] Pageset Localization V2
> To: Christoph Lameter <christoph@lameter.com>
> Cc: Manfred Spraul <manfred@colorfullife.com>, Andrew Morton
> <akpm@osdl.org>, linux-kernel@vger.kernel.org,
> linux-ia64@vger.kernel.org, linux-mm@kvack.org, shai@scalex86.org
> 
> 
> > +#define MAKE_LIST(list, nlist)  \
> > +     do {    \
> > +             if(list_empty(&list))      \
> > +                     INIT_LIST_HEAD(nlist);          \
> > +             else {  nlist->next->prev = nlist;      \
> > +                     nlist->prev->next = nlist;      \
> > +             }                                       \
> > +     }while(0)
> 
> This is horrible.  Where are the nlist pointers supposed to point to?
> What's so magic you need the INIT_LIST_HEAD only conditionally?
> 
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Pageset Localization V2
  2005-03-30  5:51 [PATCH] Pageset Localization V2 Christoph Lameter
  2005-03-30 11:14 ` Christoph Hellwig
@ 2005-03-30 13:40 ` Matthew Wilcox
  2005-03-31  3:50   ` Christoph Lameter
  1 sibling, 1 reply; 10+ messages in thread
From: Matthew Wilcox @ 2005-03-30 13:40 UTC (permalink / raw)
  To: Christoph Lameter
  Cc: Manfred Spraul, Andrew Morton, linux-kernel, linux-ia64, linux-mm, shai

On Tue, Mar 29, 2005 at 09:51:08PM -0800, Christoph Lameter wrote:
> +	BUG_ON(process_zones(smp_processor_id()));

No.  Who told you this was a good idea?  This is the *worst* kind of
assert, calling a function with side-effects.

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] Pageset Localization V2
  2005-03-30 13:40 ` Matthew Wilcox
@ 2005-03-31  3:50   ` Christoph Lameter
  0 siblings, 0 replies; 10+ messages in thread
From: Christoph Lameter @ 2005-03-31  3:50 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Manfred Spraul, Andrew Morton, linux-kernel, linux-ia64,
	linux-mm, shai, Christoph Hellwig

Patch to fix the issues mentioned so far. The MAKE_LIST macro would also
not be good to some things that I have planned so lets drop it.

Index: linux-2.6.11/mm/page_alloc.c
===================================================================
--- linux-2.6.11.orig/mm/page_alloc.c	2005-03-30 19:45:23.000000000 -0800
+++ linux-2.6.11/mm/page_alloc.c	2005-03-30 19:46:23.000000000 -0800
@@ -1613,15 +1613,6 @@ void zone_init_free_lists(struct pglist_
 	memmap_init_zone((size), (nid), (zone), (start_pfn))
 #endif

-#define MAKE_LIST(list, nlist)  \
-	do {    \
-		if(list_empty(&list))      \
-			INIT_LIST_HEAD(nlist);          \
-		else {  nlist->next->prev = nlist;      \
-			nlist->prev->next = nlist;      \
-		}                                       \
-	}while(0)
-
 /*
  * Dynamicaly allocate memory for the
  * per cpu pageset array in struct zone.
@@ -1629,6 +1620,7 @@ void zone_init_free_lists(struct pglist_
 static inline int __devinit process_zones(int cpu)
 {
 	struct zone *zone, *dzone;
+	int i;

 	for_each_zone(zone) {
 		struct per_cpu_pageset *npageset = NULL;
@@ -1642,9 +1634,17 @@ static inline int __devinit process_zone

 		if(zone->pageset[cpu]) {
 			memcpy(npageset, zone->pageset[cpu], sizeof(struct per_cpu_pageset));
-			MAKE_LIST(zone->pageset[cpu]->pcp[0].list, (&npageset->pcp[0].list));
-			MAKE_LIST(zone->pageset[cpu]->pcp[1].list, (&npageset->pcp[1].list));
-		}
+
+			/* Fix up the list pointers */
+			for(i = 0; i<2; i++) {
+				if (list_empty(&zone->pageset[cpu]->pcp[i].list))
+					INIT_LIST_HEAD(&npageset->pcp[i].list);
+				else {
+					npageset->pcp[i].list.next->prev = &npageset->pcp[i].list;
+					npageset->pcp[i].list.prev->next = &npageset->pcp[i].list;
+				}
+			}
+ 		}
 		else {
 			struct per_cpu_pages *pcp;
 			unsigned long batch;
@@ -1721,11 +1721,14 @@ struct notifier_block pageset_notifier =

 void __init setup_per_cpu_pageset()
 {
-	/*Iintialize per_cpu_pageset for cpu 0.
-	  A cpuup callback will do this for every cpu
-	  as it comes online
+	int err;
+
+	/* Initialize per_cpu_pageset for cpu 0.
+	 * A cpuup callback will do this for every cpu
+	 * as it comes online
 	 */
-	BUG_ON(process_zones(smp_processor_id()));
+	err = process_zones(smp_processor_id());
+	BUG_ON(err);
 	register_cpu_notifier(&pageset_notifier);
 }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fwd: [PATCH] Pageset Localization V2
  2005-03-30 13:06     ` Fwd: " shobhit dayal
@ 2005-03-31 14:32       ` Christoph Hellwig
  2005-03-31 14:47         ` Matthew Wilcox
  2005-03-31 15:35         ` Christoph Lameter
  0 siblings, 2 replies; 10+ messages in thread
From: Christoph Hellwig @ 2005-03-31 14:32 UTC (permalink / raw)
  To: shobhit dayal
  Cc: hch, christoph, manfred, akpm, linux-kernel, linux-ia64,
	linux-mm, Shai Fultheim

On Wed, Mar 30, 2005 at 06:36:18PM +0530, shobhit dayal wrote:
> The goal here is to replace the head of a existing list pointed to by
> 'list' with a new head pointed to by 'nlist'. 
> First there is a memcpy that copies the contents of list to nlist then
> this macro is called.
> The macro makes sure that if the old head was empty then INIT_LIST_HEAD
> the 'nlist', if not then make sure that the nodes before and after the
> head now correclty point to nlist instead of list.

Which would be much nicer done using INIT_LIST_HEAD on the new head
always and then calling list_replace (of which currently only a _rcu variant
exists).

Note to Christoph:  Just duplicating the code doesn't make it better ;-)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fwd: [PATCH] Pageset Localization V2
  2005-03-31 14:32       ` Christoph Hellwig
@ 2005-03-31 14:47         ` Matthew Wilcox
  2005-03-31 15:36           ` Christoph Lameter
  2005-03-31 15:53           ` Christoph Lameter
  2005-03-31 15:35         ` Christoph Lameter
  1 sibling, 2 replies; 10+ messages in thread
From: Matthew Wilcox @ 2005-03-31 14:47 UTC (permalink / raw)
  To: Christoph Hellwig, shobhit dayal, christoph, manfred, akpm,
	linux-kernel, linux-ia64, linux-mm, Shai Fultheim

On Thu, Mar 31, 2005 at 03:32:35PM +0100, Christoph Hellwig wrote:
> Which would be much nicer done using INIT_LIST_HEAD on the new head
> always and then calling list_replace (of which currently only a _rcu variant
> exists).

INIT_LIST_HEAD followed by list_splice() should do the trick, I think.
BTW, is it a problem that the list head which the list was copied from
still points into the list?

-- 
"Next the statesmen will invent cheap lies, putting the blame upon 
the nation that is attacked, and every man will be glad of those
conscience-soothing falsities, and will diligently study them, and refuse
to examine any refutations of them; and thus he will by and by convince 
himself that the war is just, and will thank God for the better sleep 
he enjoys after this process of grotesque self-deception." -- Mark Twain
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fwd: [PATCH] Pageset Localization V2
  2005-03-31 14:32       ` Christoph Hellwig
  2005-03-31 14:47         ` Matthew Wilcox
@ 2005-03-31 15:35         ` Christoph Lameter
  1 sibling, 0 replies; 10+ messages in thread
From: Christoph Lameter @ 2005-03-31 15:35 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: shobhit dayal, manfred, akpm, linux-kernel, linux-ia64, linux-mm,
	Shai Fultheim

On Thu, 31 Mar 2005, Christoph Hellwig wrote:

> On Wed, Mar 30, 2005 at 06:36:18PM +0530, shobhit dayal wrote:
> > The goal here is to replace the head of a existing list pointed to by
> > 'list' with a new head pointed to by 'nlist'.
> > First there is a memcpy that copies the contents of list to nlist then
> > this macro is called.
> > The macro makes sure that if the old head was empty then INIT_LIST_HEAD
> > the 'nlist', if not then make sure that the nodes before and after the
> > head now correclty point to nlist instead of list.
>
> Which would be much nicer done using INIT_LIST_HEAD on the new head
> always and then calling list_replace (of which currently only a _rcu variant
> exists).
>
> Note to Christoph:  Just duplicating the code doesn't make it better ;-)

I will need the loop there for the  prezeroing stuff later.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fwd: [PATCH] Pageset Localization V2
  2005-03-31 14:47         ` Matthew Wilcox
@ 2005-03-31 15:36           ` Christoph Lameter
  2005-03-31 15:53           ` Christoph Lameter
  1 sibling, 0 replies; 10+ messages in thread
From: Christoph Lameter @ 2005-03-31 15:36 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christoph Hellwig, shobhit dayal, manfred, akpm, linux-kernel,
	linux-ia64, linux-mm, Shai Fultheim

On Thu, 31 Mar 2005, Matthew Wilcox wrote:

> On Thu, Mar 31, 2005 at 03:32:35PM +0100, Christoph Hellwig wrote:
> > Which would be much nicer done using INIT_LIST_HEAD on the new head
> > always and then calling list_replace (of which currently only a _rcu variant
> > exists).
>
> INIT_LIST_HEAD followed by list_splice() should do the trick, I think.
> BTW, is it a problem that the list head which the list was copied from
> still points into the list?

The code runs during startup and the section containing the old pointers
is discarded at the end of the boot process.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: Fwd: [PATCH] Pageset Localization V2
  2005-03-31 14:47         ` Matthew Wilcox
  2005-03-31 15:36           ` Christoph Lameter
@ 2005-03-31 15:53           ` Christoph Lameter
  1 sibling, 0 replies; 10+ messages in thread
From: Christoph Lameter @ 2005-03-31 15:53 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Christoph Hellwig, shobhit dayal, manfred, akpm, linux-kernel,
	linux-ia64, linux-mm, Shai Fultheim

On Thu, 31 Mar 2005, Matthew Wilcox wrote:

> On Thu, Mar 31, 2005 at 03:32:35PM +0100, Christoph Hellwig wrote:
> > Which would be much nicer done using INIT_LIST_HEAD on the new head
> > always and then calling list_replace (of which currently only a _rcu variant
> > exists).
>
> INIT_LIST_HEAD followed by list_splice() should do the trick, I think.
> BTW, is it a problem that the list head which the list was copied from
> still points into the list?

New patch replacing the old fix patch following your recipe:

Index: linux-2.6.11/mm/page_alloc.c
===================================================================
--- linux-2.6.11.orig/mm/page_alloc.c	2005-03-30 19:45:23.000000000 -0800
+++ linux-2.6.11/mm/page_alloc.c	2005-03-31 07:52:10.000000000 -0800
@@ -1613,15 +1613,6 @@ void zone_init_free_lists(struct pglist_
 	memmap_init_zone((size), (nid), (zone), (start_pfn))
 #endif

-#define MAKE_LIST(list, nlist)  \
-	do {    \
-		if(list_empty(&list))      \
-			INIT_LIST_HEAD(nlist);          \
-		else {  nlist->next->prev = nlist;      \
-			nlist->prev->next = nlist;      \
-		}                                       \
-	}while(0)
-
 /*
  * Dynamicaly allocate memory for the
  * per cpu pageset array in struct zone.
@@ -1629,6 +1620,7 @@ void zone_init_free_lists(struct pglist_
 static inline int __devinit process_zones(int cpu)
 {
 	struct zone *zone, *dzone;
+	int i;

 	for_each_zone(zone) {
 		struct per_cpu_pageset *npageset = NULL;
@@ -1642,10 +1634,13 @@ static inline int __devinit process_zone

 		if(zone->pageset[cpu]) {
 			memcpy(npageset, zone->pageset[cpu], sizeof(struct per_cpu_pageset));
-			MAKE_LIST(zone->pageset[cpu]->pcp[0].list, (&npageset->pcp[0].list));
-			MAKE_LIST(zone->pageset[cpu]->pcp[1].list, (&npageset->pcp[1].list));
-		}
-		else {
+
+			/* Relocate lists */
+			for(i = 0; i<2; i++) {
+				INIT_LIST_HEAD(&npageset->pcp[i].list);
+				list_splice(&zone->pageset[cpu]->pcp[i].list, &npageset->pcp[i].list);
+			}
+ 		} else {
 			struct per_cpu_pages *pcp;
 			unsigned long batch;

@@ -1721,11 +1716,14 @@ struct notifier_block pageset_notifier =

 void __init setup_per_cpu_pageset()
 {
-	/*Iintialize per_cpu_pageset for cpu 0.
-	  A cpuup callback will do this for every cpu
-	  as it comes online
+	int err;
+
+	/* Initialize per_cpu_pageset for cpu 0.
+	 * A cpuup callback will do this for every cpu
+	 * as it comes online
 	 */
-	BUG_ON(process_zones(smp_processor_id()));
+	err = process_zones(smp_processor_id());
+	BUG_ON(err);
 	register_cpu_notifier(&pageset_notifier);
 }

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2005-03-31 15:53 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-03-30  5:51 [PATCH] Pageset Localization V2 Christoph Lameter
2005-03-30 11:14 ` Christoph Hellwig
     [not found]   ` <bab4333005033003295f487e3d@mail.gmail.com>
2005-03-30 13:06     ` Fwd: " shobhit dayal
2005-03-31 14:32       ` Christoph Hellwig
2005-03-31 14:47         ` Matthew Wilcox
2005-03-31 15:36           ` Christoph Lameter
2005-03-31 15:53           ` Christoph Lameter
2005-03-31 15:35         ` Christoph Lameter
2005-03-30 13:40 ` Matthew Wilcox
2005-03-31  3:50   ` Christoph Lameter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox