linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] bootmem speedup from the IA64 tree
@ 2003-04-10 10:24 Christoph Hellwig
  2003-04-10 10:35 ` Andrew Morton
                   ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Christoph Hellwig @ 2003-04-10 10:24 UTC (permalink / raw)
  To: akpm; +Cc: davidm, linux-mm

This patch is from the IA64 tree, with some minor cleanups by me.
David described it as:

  This is a performance speed up and some minor indendation fixups.

  The problem is that the bootmem code is (a) hugely slow and (b) has
  execution that grow quadratically with the size of the bootmap bitmap.
  This causes noticable slowdowns, especially on machines with (relatively)
  large holes in the physical memory map.  Issue (b) is addressed by
  maintaining the "last_success" cache, so that we start the next search
  from the place where we last found some memory (this part of the patch
  could stand additional reviewing/testing).  Issue (a) is addressed by
  using find_next_zero_bit() instead of the slow bit-by-bit testing.


--- 1.14/mm/bootmem.c	Sat Dec 14 12:42:15 2002
+++ edited/mm/bootmem.c	Thu Apr 10 07:28:20 2003
@@ -135,26 +135,24 @@
  * is not a problem.
  *
  * On low memory boxes we get it right in 100% of the cases.
- */
-
-/*
+ *
  * alignment has to be a power of 2 value.
+ *
+ * NOTE:  This function is _not_ reenetrant.
  */
-static void * __init __alloc_bootmem_core (bootmem_data_t *bdata, 
-	unsigned long size, unsigned long align, unsigned long goal)
+static void * __init
+__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
+		unsigned long align, unsigned long goal)
 {
-	unsigned long i, start = 0;
+	unsigned long offset, remaining_size, areasize, preferred;
+	unsigned long i, start = 0, incr, eidx;
+	static unsigned long last_success;
 	void *ret;
-	unsigned long offset, remaining_size;
-	unsigned long areasize, preferred, incr;
-	unsigned long eidx = bdata->node_low_pfn - (bdata->node_boot_start >>
-							PAGE_SHIFT);
-
-	if (!size) BUG();
 
-	if (align & (align-1))
-		BUG();
+	BUG_ON(!size);
+	BUG_ON(align & (align-1));
 
+	eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
 	offset = 0;
 	if (align &&
 	    (bdata->node_boot_start & (align - 1UL)) != 0)
@@ -166,8 +164,11 @@
 	 * first, then we try to allocate lower pages.
 	 */
 	if (goal && (goal >= bdata->node_boot_start) && 
-			((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
+	    ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
 		preferred = goal - bdata->node_boot_start;
+
+		if (last_success >= preferred)
+			preferred = last_success;
 	} else
 		preferred = 0;
 
@@ -179,6 +180,8 @@
 restart_scan:
 	for (i = preferred; i < eidx; i += incr) {
 		unsigned long j;
+		i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+		i = (i + incr - 1) & -incr;
 		if (test_bit(i, bdata->node_bootmem_map))
 			continue;
 		for (j = i + 1; j < i + areasize; ++j) {
@@ -189,31 +192,33 @@
 		}
 		start = i;
 		goto found;
-	fail_block:;
+	fail_block:
+		;
 	}
+
 	if (preferred) {
 		preferred = offset;
 		goto restart_scan;
 	}
 	return NULL;
+
 found:
-	if (start >= eidx)
-		BUG();
+	last_success = start << PAGE_SHIFT;
+	BUG_ON(start >= eidx);
 
 	/*
 	 * Is the next page of the previous allocation-end the start
 	 * of this allocation's buffer? If yes then we can 'merge'
 	 * the previous partial page with this allocation.
 	 */
-	if (align < PAGE_SIZE
-	    && bdata->last_offset && bdata->last_pos+1 == start) {
+	if (align < PAGE_SIZE &&
+	    bdata->last_offset && bdata->last_pos+1 == start) {
 		offset = (bdata->last_offset+align-1) & ~(align-1);
-		if (offset > PAGE_SIZE)
-			BUG();
+		BUG_ON(offset > PAGE_SIZE);
 		remaining_size = PAGE_SIZE-offset;
 		if (size < remaining_size) {
 			areasize = 0;
-			// last_pos unchanged
+			/* last_pos unchanged */
 			bdata->last_offset = offset+size;
 			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
 						bdata->node_boot_start);
@@ -231,11 +236,12 @@
 		bdata->last_offset = size & ~PAGE_MASK;
 		ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
 	}
+
 	/*
 	 * Reserve the area now:
 	 */
 	for (i = start; i < start+areasize; i++)
-		if (test_and_set_bit(i, bdata->node_bootmem_map))
+		if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
 			BUG();
 	memset(ret, 0, size);
 	return ret;
@@ -256,21 +262,21 @@
 	map = bdata->node_bootmem_map;
 	for (i = 0; i < idx; ) {
 		unsigned long v = ~map[i / BITS_PER_LONG];
-		if (v) { 
+		if (v) {
 			unsigned long m;
-			for (m = 1; m && i < idx; m<<=1, page++, i++) { 
+			for (m = 1; m && i < idx; m<<=1, page++, i++) {
 				if (v & m) {
-			count++;
-			ClearPageReserved(page);
-			set_page_count(page, 1);
-			__free_page(page);
-		}
-	}
+					count++;
+					ClearPageReserved(page);
+					set_page_count(page, 1);
+					__free_page(page);
+				}
+			}
 		} else {
 			i+=BITS_PER_LONG;
-			page+=BITS_PER_LONG; 
-		} 	
-	}	
+			page += BITS_PER_LONG;
+		}
+	}
 	total += count;
 
 	/*
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 10:24 [PATCH] bootmem speedup from the IA64 tree Christoph Hellwig
@ 2003-04-10 10:35 ` Andrew Morton
  2003-04-10 14:18   ` Martin J. Bligh
  2003-04-10 16:46   ` Martin J. Bligh
  2003-04-10 13:59 ` Benjamin LaHaise
  2003-04-10 15:25 ` Anton Blanchard
  2 siblings, 2 replies; 14+ messages in thread
From: Andrew Morton @ 2003-04-10 10:35 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: davidm, linux-mm

Christoph Hellwig <hch@lst.de> wrote:
>
> This patch is from the IA64 tree, with some minor cleanups by me.
> David described it as:
> 
>   This is a performance speed up and some minor indendation fixups.

OK, thanks - I'll queue this up for a bit of testing.

Martin, can you please also test this?
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 10:24 [PATCH] bootmem speedup from the IA64 tree Christoph Hellwig
  2003-04-10 10:35 ` Andrew Morton
@ 2003-04-10 13:59 ` Benjamin LaHaise
  2003-04-10 20:43   ` Andrew Morton
  2003-04-10 15:25 ` Anton Blanchard
  2 siblings, 1 reply; 14+ messages in thread
From: Benjamin LaHaise @ 2003-04-10 13:59 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: akpm, davidm, linux-mm

On Thu, Apr 10, 2003 at 12:24:21PM +0200, Christoph Hellwig wrote:
>  	if (goal && (goal >= bdata->node_boot_start) && 
> -			((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
> +	    ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
>  		preferred = goal - bdata->node_boot_start;
> +
> +		if (last_success >= preferred)
> +			preferred = last_success;

I suspect you need a range check on last_success here for machines which have 
multiple nodes of memory, or else store it in bdata.

		-ben
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 10:35 ` Andrew Morton
@ 2003-04-10 14:18   ` Martin J. Bligh
  2003-04-10 16:46   ` Martin J. Bligh
  1 sibling, 0 replies; 14+ messages in thread
From: Martin J. Bligh @ 2003-04-10 14:18 UTC (permalink / raw)
  To: Andrew Morton, Christoph Hellwig; +Cc: davidm, linux-mm

>> This patch is from the IA64 tree, with some minor cleanups by me.
>> David described it as:
>> 
>>   This is a performance speed up and some minor indendation fixups.
> 
> OK, thanks - I'll queue this up for a bit of testing.
> 
> Martin, can you please also test this?

Sure. Will do both of those together.

M.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 10:24 [PATCH] bootmem speedup from the IA64 tree Christoph Hellwig
  2003-04-10 10:35 ` Andrew Morton
  2003-04-10 13:59 ` Benjamin LaHaise
@ 2003-04-10 15:25 ` Anton Blanchard
  2 siblings, 0 replies; 14+ messages in thread
From: Anton Blanchard @ 2003-04-10 15:25 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: akpm, davidm, linux-mm

>   This is a performance speed up and some minor indendation fixups.
> 
>   The problem is that the bootmem code is (a) hugely slow and (b) has
>   execution that grow quadratically with the size of the bootmap bitmap.
>   This causes noticable slowdowns, especially on machines with (relatively)
>   large holes in the physical memory map.  Issue (b) is addressed by
>   maintaining the "last_success" cache, so that we start the next search
>   from the place where we last found some memory (this part of the patch
>   could stand additional reviewing/testing).  Issue (a) is addressed by
>   using find_next_zero_bit() instead of the slow bit-by-bit testing.

FYI I have some ppc64 machines with a memory layout of

1GB MEM
3GB IO
63GB MEM

And see the same problem.

Anton
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 10:35 ` Andrew Morton
  2003-04-10 14:18   ` Martin J. Bligh
@ 2003-04-10 16:46   ` Martin J. Bligh
  1 sibling, 0 replies; 14+ messages in thread
From: Martin J. Bligh @ 2003-04-10 16:46 UTC (permalink / raw)
  To: Andrew Morton, Christoph Hellwig; +Cc: davidm, linux-mm

>> This patch is from the IA64 tree, with some minor cleanups by me.
>> David described it as:
>> 
>>   This is a performance speed up and some minor indendation fixups.
> 
> OK, thanks - I'll queue this up for a bit of testing.
> 
> Martin, can you please also test this?

Compile-tested against every config I had lying around, and run tested
on my big wierdo-box. Works fine.

M.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 20:43   ` Andrew Morton
@ 2003-04-10 20:34     ` Martin J. Bligh
  2003-04-10 21:07       ` Andrew Morton
  2003-04-10 21:02     ` William Lee Irwin III
  2003-04-11 20:32     ` Rik van Riel
  2 siblings, 1 reply; 14+ messages in thread
From: Martin J. Bligh @ 2003-04-10 20:34 UTC (permalink / raw)
  To: Andrew Morton, Benjamin LaHaise; +Cc: hch, davidm, linux-mm

> Bootmem igornamus says:
> 
> Do we have a problem with using an `unsigned long' byte address in there on
> ia32 PAE?  Or are we guaranteed that this will only ever be used in the lower
> 4G of physical memory?

IIRC, only ZONE_NORMAL goes into bootmem, so we should be OK.

M.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 13:59 ` Benjamin LaHaise
@ 2003-04-10 20:43   ` Andrew Morton
  2003-04-10 20:34     ` Martin J. Bligh
                       ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: Andrew Morton @ 2003-04-10 20:43 UTC (permalink / raw)
  To: Benjamin LaHaise; +Cc: hch, davidm, linux-mm, Martin J. Bligh

Benjamin LaHaise <bcrl@redhat.com> wrote:
>
> On Thu, Apr 10, 2003 at 12:24:21PM +0200, Christoph Hellwig wrote:
> >  	if (goal && (goal >= bdata->node_boot_start) && 
> > -			((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
> > +	    ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
> >  		preferred = goal - bdata->node_boot_start;
> > +
> > +		if (last_success >= preferred)
> > +			preferred = last_success;
> 
> I suspect you need a range check on last_success here for machines which have 
> multiple nodes of memory, or else store it in bdata.

Agreed.  I've updated the patch thusly.

Bootmem igornamus says:

Do we have a problem with using an `unsigned long' byte address in there on
ia32 PAE?  Or are we guaranteed that this will only ever be used in the lower
4G of physical memory?

Does the last_success cache ever need to be updated if someone frees some
previously-allocated memory?


From: Christoph Hellwig <hch@lst.de>

This patch is from the IA64 tree, with some minor cleanups by me.
David described it as:

  This is a performance speed up and some minor indendation fixups.

  The problem is that the bootmem code is (a) hugely slow and (b) has
  execution that grow quadratically with the size of the bootmap bitmap.
  This causes noticable slowdowns, especially on machines with (relatively)
  large holes in the physical memory map.  Issue (b) is addressed by
  maintaining the "last_success" cache, so that we start the next search
  from the place where we last found some memory (this part of the patch
  could stand additional reviewing/testing).  Issue (a) is addressed by
  using find_next_zero_bit() instead of the slow bit-by-bit testing.



 25-akpm/include/linux/bootmem.h |    2 +
 25-akpm/mm/bootmem.c            |   75 +++++++++++++++++++++-------------------
 2 files changed, 42 insertions(+), 35 deletions(-)

diff -puN mm/bootmem.c~bootmem-speedup mm/bootmem.c
--- 25/mm/bootmem.c~bootmem-speedup	Thu Apr 10 13:35:15 2003
+++ 25-akpm/mm/bootmem.c	Thu Apr 10 13:40:16 2003
@@ -135,26 +135,23 @@ static void __init free_bootmem_core(boo
  * is not a problem.
  *
  * On low memory boxes we get it right in 100% of the cases.
- */
-
-/*
+ *
  * alignment has to be a power of 2 value.
+ *
+ * NOTE:  This function is _not_ reenetrant.
  */
-static void * __init __alloc_bootmem_core (bootmem_data_t *bdata, 
-	unsigned long size, unsigned long align, unsigned long goal)
+static void * __init
+__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
+		unsigned long align, unsigned long goal)
 {
-	unsigned long i, start = 0;
+	unsigned long offset, remaining_size, areasize, preferred;
+	unsigned long i, start = 0, incr, eidx;
 	void *ret;
-	unsigned long offset, remaining_size;
-	unsigned long areasize, preferred, incr;
-	unsigned long eidx = bdata->node_low_pfn - (bdata->node_boot_start >>
-							PAGE_SHIFT);
 
-	if (!size) BUG();
-
-	if (align & (align-1))
-		BUG();
+	BUG_ON(!size);
+	BUG_ON(align & (align-1));
 
+	eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
 	offset = 0;
 	if (align &&
 	    (bdata->node_boot_start & (align - 1UL)) != 0)
@@ -166,8 +163,11 @@ static void * __init __alloc_bootmem_cor
 	 * first, then we try to allocate lower pages.
 	 */
 	if (goal && (goal >= bdata->node_boot_start) && 
-			((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
+	    ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
 		preferred = goal - bdata->node_boot_start;
+
+		if (bdata->last_success >= preferred)
+			preferred = bdata->last_success;
 	} else
 		preferred = 0;
 
@@ -179,6 +179,8 @@ static void * __init __alloc_bootmem_cor
 restart_scan:
 	for (i = preferred; i < eidx; i += incr) {
 		unsigned long j;
+		i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+		i = (i + incr - 1) & -incr;
 		if (test_bit(i, bdata->node_bootmem_map))
 			continue;
 		for (j = i + 1; j < i + areasize; ++j) {
@@ -189,31 +191,33 @@ restart_scan:
 		}
 		start = i;
 		goto found;
-	fail_block:;
+	fail_block:
+		;
 	}
+
 	if (preferred) {
 		preferred = offset;
 		goto restart_scan;
 	}
 	return NULL;
+
 found:
-	if (start >= eidx)
-		BUG();
+	bdata->last_success = start << PAGE_SHIFT;
+	BUG_ON(start >= eidx);
 
 	/*
 	 * Is the next page of the previous allocation-end the start
 	 * of this allocation's buffer? If yes then we can 'merge'
 	 * the previous partial page with this allocation.
 	 */
-	if (align < PAGE_SIZE
-	    && bdata->last_offset && bdata->last_pos+1 == start) {
+	if (align < PAGE_SIZE &&
+	    bdata->last_offset && bdata->last_pos+1 == start) {
 		offset = (bdata->last_offset+align-1) & ~(align-1);
-		if (offset > PAGE_SIZE)
-			BUG();
+		BUG_ON(offset > PAGE_SIZE);
 		remaining_size = PAGE_SIZE-offset;
 		if (size < remaining_size) {
 			areasize = 0;
-			// last_pos unchanged
+			/* last_pos unchanged */
 			bdata->last_offset = offset+size;
 			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
 						bdata->node_boot_start);
@@ -231,11 +235,12 @@ found:
 		bdata->last_offset = size & ~PAGE_MASK;
 		ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
 	}
+
 	/*
 	 * Reserve the area now:
 	 */
 	for (i = start; i < start+areasize; i++)
-		if (test_and_set_bit(i, bdata->node_bootmem_map))
+		if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
 			BUG();
 	memset(ret, 0, size);
 	return ret;
@@ -256,21 +261,21 @@ static unsigned long __init free_all_boo
 	map = bdata->node_bootmem_map;
 	for (i = 0; i < idx; ) {
 		unsigned long v = ~map[i / BITS_PER_LONG];
-		if (v) { 
+		if (v) {
 			unsigned long m;
-			for (m = 1; m && i < idx; m<<=1, page++, i++) { 
+			for (m = 1; m && i < idx; m<<=1, page++, i++) {
 				if (v & m) {
-			count++;
-			ClearPageReserved(page);
-			set_page_count(page, 1);
-			__free_page(page);
-		}
-	}
+					count++;
+					ClearPageReserved(page);
+					set_page_count(page, 1);
+					__free_page(page);
+				}
+			}
 		} else {
 			i+=BITS_PER_LONG;
-			page+=BITS_PER_LONG; 
-		} 	
-	}	
+			page += BITS_PER_LONG;
+		}
+	}
 	total += count;
 
 	/*
diff -puN include/linux/bootmem.h~bootmem-speedup include/linux/bootmem.h
--- 25/include/linux/bootmem.h~bootmem-speedup	Thu Apr 10 13:38:43 2003
+++ 25-akpm/include/linux/bootmem.h	Thu Apr 10 13:39:17 2003
@@ -32,6 +32,8 @@ typedef struct bootmem_data {
 	void *node_bootmem_map;
 	unsigned long last_offset;
 	unsigned long last_pos;
+	unsigned long last_success;	/* Previous allocation point.  To speed
+					 * up searching */
 } bootmem_data_t;
 
 extern unsigned long __init bootmem_bootmap_pages (unsigned long);

_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 20:43   ` Andrew Morton
  2003-04-10 20:34     ` Martin J. Bligh
@ 2003-04-10 21:02     ` William Lee Irwin III
  2003-04-11 20:32     ` Rik van Riel
  2 siblings, 0 replies; 14+ messages in thread
From: William Lee Irwin III @ 2003-04-10 21:02 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Benjamin LaHaise, hch, davidm, linux-mm, Martin J. Bligh

On Thu, Apr 10, 2003 at 01:43:34PM -0700, Andrew Morton wrote:
> Agreed.  I've updated the patch thusly.
> Bootmem igornamus says:
> Do we have a problem with using an `unsigned long' byte address in there on
> ia32 PAE?  Or are we guaranteed that this will only ever be used in the lower
> 4G of physical memory?

It's only ever used for lowmem on ia32, which is even below 1GB.


On Thu, Apr 10, 2003 at 01:43:34PM -0700, Andrew Morton wrote:
> Does the last_success cache ever need to be updated if someone frees some
> previously-allocated memory?

Setting preferred only puts a finger on where to begin a search. The
search (and validity checking) are still carried out as usual. It could
be suboptimal to set it to somewhere that's not as good as possible
after a free, but it's only advice as to where to start a search and so
doesn't affect correctness so long as it's in-bounds.

I'm just going to grab a barfbag and run now.


-- wli
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 20:34     ` Martin J. Bligh
@ 2003-04-10 21:07       ` Andrew Morton
  0 siblings, 0 replies; 14+ messages in thread
From: Andrew Morton @ 2003-04-10 21:07 UTC (permalink / raw)
  To: Martin J. Bligh; +Cc: bcrl, hch, davidm, linux-mm

"Martin J. Bligh" <mbligh@aracnet.com> wrote:
>
> > Bootmem igornamus says:
> > 
> > Do we have a problem with using an `unsigned long' byte address in there on
> > ia32 PAE?  Or are we guaranteed that this will only ever be used in the lower
> > 4G of physical memory?
> 
> IIRC, only ZONE_NORMAL goes into bootmem, so we should be OK.
> 

OK, thanks.  You didn't answer my other question ;)

I stuck

        if (addr < bdata->last_success)
                bdata->last_success = addr;

in free_bootmem_core().


diff -puN mm/bootmem.c~bootmem-speedup mm/bootmem.c
--- 25/mm/bootmem.c~bootmem-speedup	Thu Apr 10 13:35:15 2003
+++ 25-akpm/mm/bootmem.c	Thu Apr 10 14:06:54 2003
@@ -115,6 +115,9 @@ static void __init free_bootmem_core(boo
 	if (end > bdata->node_low_pfn)
 		BUG();
 
+	if (addr < bdata->last_success)
+		bdata->last_success = addr;
+
 	/*
 	 * Round up the beginning of the address.
 	 */
@@ -135,26 +138,23 @@ static void __init free_bootmem_core(boo
  * is not a problem.
  *
  * On low memory boxes we get it right in 100% of the cases.
- */
-
-/*
+ *
  * alignment has to be a power of 2 value.
+ *
+ * NOTE:  This function is _not_ reenetrant.
  */
-static void * __init __alloc_bootmem_core (bootmem_data_t *bdata, 
-	unsigned long size, unsigned long align, unsigned long goal)
+static void * __init
+__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
+		unsigned long align, unsigned long goal)
 {
-	unsigned long i, start = 0;
+	unsigned long offset, remaining_size, areasize, preferred;
+	unsigned long i, start = 0, incr, eidx;
 	void *ret;
-	unsigned long offset, remaining_size;
-	unsigned long areasize, preferred, incr;
-	unsigned long eidx = bdata->node_low_pfn - (bdata->node_boot_start >>
-							PAGE_SHIFT);
-
-	if (!size) BUG();
 
-	if (align & (align-1))
-		BUG();
+	BUG_ON(!size);
+	BUG_ON(align & (align-1));
 
+	eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
 	offset = 0;
 	if (align &&
 	    (bdata->node_boot_start & (align - 1UL)) != 0)
@@ -166,8 +166,11 @@ static void * __init __alloc_bootmem_cor
 	 * first, then we try to allocate lower pages.
 	 */
 	if (goal && (goal >= bdata->node_boot_start) && 
-			((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
+	    ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
 		preferred = goal - bdata->node_boot_start;
+
+		if (bdata->last_success >= preferred)
+			preferred = bdata->last_success;
 	} else
 		preferred = 0;
 
@@ -179,6 +182,8 @@ static void * __init __alloc_bootmem_cor
 restart_scan:
 	for (i = preferred; i < eidx; i += incr) {
 		unsigned long j;
+		i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+		i = (i + incr - 1) & -incr;
 		if (test_bit(i, bdata->node_bootmem_map))
 			continue;
 		for (j = i + 1; j < i + areasize; ++j) {
@@ -189,31 +194,33 @@ restart_scan:
 		}
 		start = i;
 		goto found;
-	fail_block:;
+	fail_block:
+		;
 	}
+
 	if (preferred) {
 		preferred = offset;
 		goto restart_scan;
 	}
 	return NULL;
+
 found:
-	if (start >= eidx)
-		BUG();
+	bdata->last_success = start << PAGE_SHIFT;
+	BUG_ON(start >= eidx);
 
 	/*
 	 * Is the next page of the previous allocation-end the start
 	 * of this allocation's buffer? If yes then we can 'merge'
 	 * the previous partial page with this allocation.
 	 */
-	if (align < PAGE_SIZE
-	    && bdata->last_offset && bdata->last_pos+1 == start) {
+	if (align < PAGE_SIZE &&
+	    bdata->last_offset && bdata->last_pos+1 == start) {
 		offset = (bdata->last_offset+align-1) & ~(align-1);
-		if (offset > PAGE_SIZE)
-			BUG();
+		BUG_ON(offset > PAGE_SIZE);
 		remaining_size = PAGE_SIZE-offset;
 		if (size < remaining_size) {
 			areasize = 0;
-			// last_pos unchanged
+			/* last_pos unchanged */
 			bdata->last_offset = offset+size;
 			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
 						bdata->node_boot_start);
@@ -231,11 +238,12 @@ found:
 		bdata->last_offset = size & ~PAGE_MASK;
 		ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
 	}
+
 	/*
 	 * Reserve the area now:
 	 */
 	for (i = start; i < start+areasize; i++)
-		if (test_and_set_bit(i, bdata->node_bootmem_map))
+		if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
 			BUG();
 	memset(ret, 0, size);
 	return ret;
@@ -256,21 +264,21 @@ static unsigned long __init free_all_boo
 	map = bdata->node_bootmem_map;
 	for (i = 0; i < idx; ) {
 		unsigned long v = ~map[i / BITS_PER_LONG];
-		if (v) { 
+		if (v) {
 			unsigned long m;
-			for (m = 1; m && i < idx; m<<=1, page++, i++) { 
+			for (m = 1; m && i < idx; m<<=1, page++, i++) {
 				if (v & m) {
-			count++;
-			ClearPageReserved(page);
-			set_page_count(page, 1);
-			__free_page(page);
-		}
-	}
+					count++;
+					ClearPageReserved(page);
+					set_page_count(page, 1);
+					__free_page(page);
+				}
+			}
 		} else {
 			i+=BITS_PER_LONG;
-			page+=BITS_PER_LONG; 
-		} 	
-	}	
+			page += BITS_PER_LONG;
+		}
+	}
 	total += count;
 
 	/*
diff -puN include/linux/bootmem.h~bootmem-speedup include/linux/bootmem.h
--- 25/include/linux/bootmem.h~bootmem-speedup	Thu Apr 10 13:38:43 2003
+++ 25-akpm/include/linux/bootmem.h	Thu Apr 10 13:39:17 2003
@@ -32,6 +32,8 @@ typedef struct bootmem_data {
 	void *node_bootmem_map;
 	unsigned long last_offset;
 	unsigned long last_pos;
+	unsigned long last_success;	/* Previous allocation point.  To speed
+					 * up searching */
 } bootmem_data_t;
 
 extern unsigned long __init bootmem_bootmap_pages (unsigned long);

_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-10 20:43   ` Andrew Morton
  2003-04-10 20:34     ` Martin J. Bligh
  2003-04-10 21:02     ` William Lee Irwin III
@ 2003-04-11 20:32     ` Rik van Riel
  2003-04-11 20:57       ` Andrew Morton
  2003-04-11 20:58       ` David Mosberger
  2 siblings, 2 replies; 14+ messages in thread
From: Rik van Riel @ 2003-04-11 20:32 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Benjamin LaHaise, hch, davidm, linux-mm, Martin J. Bligh

On Thu, 10 Apr 2003, Andrew Morton wrote:

> Does the last_success cache ever need to be updated if someone frees
> some previously-allocated memory?

I've heard rumours that some IA64 trees can't boot without
this "optimisation", suggesting that they use bootmem after
freeing it.

Doesn't make the optimisation any less valid, though ...

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-11 20:32     ` Rik van Riel
@ 2003-04-11 20:57       ` Andrew Morton
  2003-04-11 21:27         ` David Mosberger
  2003-04-11 20:58       ` David Mosberger
  1 sibling, 1 reply; 14+ messages in thread
From: Andrew Morton @ 2003-04-11 20:57 UTC (permalink / raw)
  To: Rik van Riel; +Cc: bcrl, hch, davidm, linux-mm, mbligh

Rik van Riel <riel@redhat.com> wrote:
>
> On Thu, 10 Apr 2003, Andrew Morton wrote:
> 
> > Does the last_success cache ever need to be updated if someone frees
> > some previously-allocated memory?
> 
> I've heard rumours that some IA64 trees can't boot without
> this "optimisation", suggesting that they use bootmem after
> freeing it.

hm.  Well I assume there's only one functional ia64 2.5 tree at present, and
that's David.

David, could you please test this?


diff -puN mm/bootmem.c~bootmem-speedup mm/bootmem.c
--- 25/mm/bootmem.c~bootmem-speedup	Thu Apr 10 13:35:15 2003
+++ 25-akpm/mm/bootmem.c	Thu Apr 10 14:06:54 2003
@@ -115,6 +115,9 @@ static void __init free_bootmem_core(boo
 	if (end > bdata->node_low_pfn)
 		BUG();
 
+	if (addr < bdata->last_success)
+		bdata->last_success = addr;
+
 	/*
 	 * Round up the beginning of the address.
 	 */
@@ -135,26 +138,23 @@ static void __init free_bootmem_core(boo
  * is not a problem.
  *
  * On low memory boxes we get it right in 100% of the cases.
- */
-
-/*
+ *
  * alignment has to be a power of 2 value.
+ *
+ * NOTE:  This function is _not_ reenetrant.
  */
-static void * __init __alloc_bootmem_core (bootmem_data_t *bdata, 
-	unsigned long size, unsigned long align, unsigned long goal)
+static void * __init
+__alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size,
+		unsigned long align, unsigned long goal)
 {
-	unsigned long i, start = 0;
+	unsigned long offset, remaining_size, areasize, preferred;
+	unsigned long i, start = 0, incr, eidx;
 	void *ret;
-	unsigned long offset, remaining_size;
-	unsigned long areasize, preferred, incr;
-	unsigned long eidx = bdata->node_low_pfn - (bdata->node_boot_start >>
-							PAGE_SHIFT);
-
-	if (!size) BUG();
 
-	if (align & (align-1))
-		BUG();
+	BUG_ON(!size);
+	BUG_ON(align & (align-1));
 
+	eidx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
 	offset = 0;
 	if (align &&
 	    (bdata->node_boot_start & (align - 1UL)) != 0)
@@ -166,8 +166,11 @@ static void * __init __alloc_bootmem_cor
 	 * first, then we try to allocate lower pages.
 	 */
 	if (goal && (goal >= bdata->node_boot_start) && 
-			((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
+	    ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) {
 		preferred = goal - bdata->node_boot_start;
+
+		if (bdata->last_success >= preferred)
+			preferred = bdata->last_success;
 	} else
 		preferred = 0;
 
@@ -179,6 +182,8 @@ static void * __init __alloc_bootmem_cor
 restart_scan:
 	for (i = preferred; i < eidx; i += incr) {
 		unsigned long j;
+		i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i);
+		i = (i + incr - 1) & -incr;
 		if (test_bit(i, bdata->node_bootmem_map))
 			continue;
 		for (j = i + 1; j < i + areasize; ++j) {
@@ -189,31 +194,33 @@ restart_scan:
 		}
 		start = i;
 		goto found;
-	fail_block:;
+	fail_block:
+		;
 	}
+
 	if (preferred) {
 		preferred = offset;
 		goto restart_scan;
 	}
 	return NULL;
+
 found:
-	if (start >= eidx)
-		BUG();
+	bdata->last_success = start << PAGE_SHIFT;
+	BUG_ON(start >= eidx);
 
 	/*
 	 * Is the next page of the previous allocation-end the start
 	 * of this allocation's buffer? If yes then we can 'merge'
 	 * the previous partial page with this allocation.
 	 */
-	if (align < PAGE_SIZE
-	    && bdata->last_offset && bdata->last_pos+1 == start) {
+	if (align < PAGE_SIZE &&
+	    bdata->last_offset && bdata->last_pos+1 == start) {
 		offset = (bdata->last_offset+align-1) & ~(align-1);
-		if (offset > PAGE_SIZE)
-			BUG();
+		BUG_ON(offset > PAGE_SIZE);
 		remaining_size = PAGE_SIZE-offset;
 		if (size < remaining_size) {
 			areasize = 0;
-			// last_pos unchanged
+			/* last_pos unchanged */
 			bdata->last_offset = offset+size;
 			ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset +
 						bdata->node_boot_start);
@@ -231,11 +238,12 @@ found:
 		bdata->last_offset = size & ~PAGE_MASK;
 		ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start);
 	}
+
 	/*
 	 * Reserve the area now:
 	 */
 	for (i = start; i < start+areasize; i++)
-		if (test_and_set_bit(i, bdata->node_bootmem_map))
+		if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map)))
 			BUG();
 	memset(ret, 0, size);
 	return ret;
@@ -256,21 +264,21 @@ static unsigned long __init free_all_boo
 	map = bdata->node_bootmem_map;
 	for (i = 0; i < idx; ) {
 		unsigned long v = ~map[i / BITS_PER_LONG];
-		if (v) { 
+		if (v) {
 			unsigned long m;
-			for (m = 1; m && i < idx; m<<=1, page++, i++) { 
+			for (m = 1; m && i < idx; m<<=1, page++, i++) {
 				if (v & m) {
-			count++;
-			ClearPageReserved(page);
-			set_page_count(page, 1);
-			__free_page(page);
-		}
-	}
+					count++;
+					ClearPageReserved(page);
+					set_page_count(page, 1);
+					__free_page(page);
+				}
+			}
 		} else {
 			i+=BITS_PER_LONG;
-			page+=BITS_PER_LONG; 
-		} 	
-	}	
+			page += BITS_PER_LONG;
+		}
+	}
 	total += count;
 
 	/*
diff -puN include/linux/bootmem.h~bootmem-speedup include/linux/bootmem.h
--- 25/include/linux/bootmem.h~bootmem-speedup	Thu Apr 10 13:38:43 2003
+++ 25-akpm/include/linux/bootmem.h	Thu Apr 10 13:39:17 2003
@@ -32,6 +32,8 @@ typedef struct bootmem_data {
 	void *node_bootmem_map;
 	unsigned long last_offset;
 	unsigned long last_pos;
+	unsigned long last_success;	/* Previous allocation point.  To speed
+					 * up searching */
 } bootmem_data_t;
 
 extern unsigned long __init bootmem_bootmap_pages (unsigned long);

_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-11 20:32     ` Rik van Riel
  2003-04-11 20:57       ` Andrew Morton
@ 2003-04-11 20:58       ` David Mosberger
  1 sibling, 0 replies; 14+ messages in thread
From: David Mosberger @ 2003-04-11 20:58 UTC (permalink / raw)
  To: Rik van Riel
  Cc: Andrew Morton, Benjamin LaHaise, hch, davidm, linux-mm, Martin J. Bligh

>>>>> On Fri, 11 Apr 2003 16:32:09 -0400 (EDT), Rik van Riel <riel@redhat.com> said:

  Rik> On Thu, 10 Apr 2003, Andrew Morton wrote:
  >> Does the last_success cache ever need to be updated if someone frees
  >> some previously-allocated memory?

  Rik> I've heard rumours that some IA64 trees can't boot without
  Rik> this "optimisation", suggesting that they use bootmem after
  Rik> freeing it.

Huh?  Where do you hear such rumors?

	--david
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] bootmem speedup from the IA64 tree
  2003-04-11 20:57       ` Andrew Morton
@ 2003-04-11 21:27         ` David Mosberger
  0 siblings, 0 replies; 14+ messages in thread
From: David Mosberger @ 2003-04-11 21:27 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Rik van Riel, bcrl, hch, davidm, linux-mm, mbligh

>>>>> On Fri, 11 Apr 2003 13:57:07 -0700, Andrew Morton <akpm@digeo.com> said:

  Andrew> Rik van Riel <riel@redhat.com> wrote:

  >> On Thu, 10 Apr 2003, Andrew Morton wrote:

  >> > Does the last_success cache ever need to be updated if someone frees
  >> > some previously-allocated memory?

  >> I've heard rumours that some IA64 trees can't boot without
  >> this "optimisation", suggesting that they use bootmem after
  >> freeing it.

  Andrew> hm.  Well I assume there's only one functional ia64 2.5 tree at present, and
  Andrew> that's David.

  Andrew> David, could you please test this?

I tried the patch with the Ski simulator (simulating a 4GB hole) and
it booted as fast as ever.  Looks great to me.

The new code is in my tree now so it will be exposed to real hardware
today and over the next couple of days.  I don't anticipate any
problems, but something unexpected crops up, I'll let you know.

Thanks,

	--david
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2003-04-11 21:27 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-04-10 10:24 [PATCH] bootmem speedup from the IA64 tree Christoph Hellwig
2003-04-10 10:35 ` Andrew Morton
2003-04-10 14:18   ` Martin J. Bligh
2003-04-10 16:46   ` Martin J. Bligh
2003-04-10 13:59 ` Benjamin LaHaise
2003-04-10 20:43   ` Andrew Morton
2003-04-10 20:34     ` Martin J. Bligh
2003-04-10 21:07       ` Andrew Morton
2003-04-10 21:02     ` William Lee Irwin III
2003-04-11 20:32     ` Rik van Riel
2003-04-11 20:57       ` Andrew Morton
2003-04-11 21:27         ` David Mosberger
2003-04-11 20:58       ` David Mosberger
2003-04-10 15:25 ` Anton Blanchard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox