[RFC] buddy allocator without bitmap(2) [1/3]

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [RFC] buddy allocator without bitmap(2) [1/3]
@ 2004-08-31 10:41 Hiroyuki KAMEZAWA
  2004-08-31 16:30 ` Dave Hansen
  0 siblings, 1 reply; 5+ messages in thread
From: Hiroyuki KAMEZAWA @ 2004-08-31 10:41 UTC (permalink / raw)
  To: Linux Kernel ML; +Cc: linux-mm, LHMS

This is 2nd file.
Implements initialization code for buddy allocator.


- Kame

------------

This patch removes bitmap allocation in zone_init_free_lists() and
page_to_bitmap_size();

calculate_aligned_end() works
(a) detects mem_map is aligned or not.
(b) if start of mem_map is not aligned, add PG_buddyend flags to pages
     which has no lower address buddy.
(c) if end of mem_map is not aligned, reserve it by reserve_bootmem()



-- Kame


---

  linux-2.6.9-rc1-mm1-k-kamezawa/mm/page_alloc.c |  112 ++++++++++++++++---------
  1 files changed, 73 insertions(+), 39 deletions(-)

diff -puN mm/page_alloc.c~eliminate-bitmap-init mm/page_alloc.c
--- linux-2.6.9-rc1-mm1-k/mm/page_alloc.c~eliminate-bitmap-init	2004-08-31 18:37:14.596519040 +0900
+++ linux-2.6.9-rc1-mm1-k-kamezawa/mm/page_alloc.c	2004-08-31 18:43:30.723339072 +0900
@@ -1499,6 +1499,70 @@ static void __init calculate_zone_totalp
  	printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
  }

+/*
+ * (1) checks a mem_map is aligned to max_order or not.
+ * (2) if mem_map is not aligned in its start address, find pages which are
+ * lower-end of buddy.
+ * (3) if mem_map is not aligned in end addres,, find a page which is higher
+ * end of buddy and remove it from buddy allocator.
+ * All found pages are marked as PG_buddyend. These marked page has speccial
+ * meaning in free_pages().
+ */
+
+
+static void __init calculate_aligned_end(struct zone *zone,
+					 unsigned long start_pfn,
+					 int nr_pages)
+{
+	struct page *base;
+	unsigned long mask;
+	long start_idx, end_idx;
+	
+	start_idx = start_pfn - zone->zone_start_pfn;
+	end_idx = start_idx + nr_pages - 1;
+	mask = (1 << MAX_ORDER) - 1;
+	base = zone->zone_mem_map;
+	
+	if (start_idx & mask) {
+		long edge_idx, buddy_idx;
+		int order;
+		edge_idx = start_idx;
+		/*
+		 * Mark all pages which can be  higher half of buddy in
+		 * its index, but its lower half is never available.
+		 */
+		for (edge_idx = start_idx,order = 0;
+		     order < MAX_ORDER;
+		     order++) {
+			if (edge_idx > end_idx)
+				break;
+			buddy_idx = edge_idx ^ (1 << order);
+			if (buddy_idx < edge_idx) {
+				SetPageBuddyend(base + edge_idx);
+				edge_idx += (1 << order);
+			}
+		}
+	}
+	if ((end_idx & mask) != mask) {
+		unsigned long end_address;
+		/*
+		 * Reserve the last page as the stopper for buddy allocator.
+		 * This page is a victim to make buddy allocator work fine.
+		 *
+		 * Note:
+                 * We are using reserve_bootmem() here, is this correct ?
+		 */
+		SetPageBuddyend(base + end_idx);
+		SetPagePrivate(base + end_idx);
+		end_address = (zone->zone_start_pfn + end_idx) << PAGE_SHIFT;
+#ifndef CONFIG_DISCONTIGMEM
+		reserve_bootmem(end_address,PAGE_SIZE);
+#else
+		reserve_bootmem_node(zone->zone_pgdat,end_address,PAGE_SIZE);
+#endif
+	}
+	return;
+}

  /*
   * Initially all pages are reserved - free ones are freed
@@ -1510,7 +1574,9 @@ void __init memmap_init_zone(unsigned lo
  {
  	struct page *start = pfn_to_page(start_pfn);
  	struct page *page;
-
+	unsigned long saved_start_pfn = start_pfn;
+	struct zone *zonep = zone_table[NODEZONE(nid, zone)];
+	
  	for (page = start; page < (start + size); page++) {
  		set_page_zone(page, NODEZONE(nid, zone));
  		set_page_count(page, 0);
@@ -1524,51 +1590,19 @@ void __init memmap_init_zone(unsigned lo
  #endif
  		start_pfn++;
  	}
-}
-
-/*
- * Page buddy system uses "index >> (i+1)", where "index" is
- * at most "size-1".
- *
- * The extra "+3" is to round down to byte size (8 bits per byte
- * assumption). Thus we get "(size-1) >> (i+4)" as the last byte
- * we can access.
- *
- * The "+1" is because we want to round the byte allocation up
- * rather than down. So we should have had a "+7" before we shifted
- * down by three. Also, we have to add one as we actually _use_ the
- * last bit (it's [0,n] inclusive, not [0,n[).
- *
- * So we actually had +7+1 before we shift down by 3. But
- * (n+8) >> 3 == (n >> 3) + 1 (modulo overflows, which we do not have).
- *
- * Finally, we LONG_ALIGN because all bitmap operations are on longs.
- */
-unsigned long pages_to_bitmap_size(unsigned long order, unsigned long nr_pages)
-{
-	unsigned long bitmap_size;
-
-	bitmap_size = (nr_pages-1) >> (order+4);
-	bitmap_size = LONG_ALIGN(bitmap_size+1);
+	/* Because memmap_init_zone() is called in suitable way
+	 * even if zone has memory holes,
+	 * calling calculate_aligned_end(zone) here is reasonable
+	 */
+	calculate_aligned_end(zonep, saved_start_pfn, size);

-	return bitmap_size;
  }

  void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, unsigned long size)
  {
  	int order;
-	for (order = 0; ; order++) {
-		unsigned long bitmap_size;
-
+	for (order = 0 ; order < MAX_ORDER ; order++) {
  		INIT_LIST_HEAD(&zone->free_area[order].free_list);
-		if (order == MAX_ORDER-1) {
-			zone->free_area[order].map = NULL;
-			break;
-		}
-
-		bitmap_size = pages_to_bitmap_size(order, size);
-		zone->free_area[order].map =
-		  (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
  	}
  }


_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] buddy allocator without bitmap(2) [1/3]
  2004-08-31 10:41 [RFC] buddy allocator without bitmap(2) [1/3] Hiroyuki KAMEZAWA
@ 2004-08-31 16:30 ` Dave Hansen
  2004-08-31 22:55   ` [Lhms-devel] " Hiroyuki KAMEZAWA
  0 siblings, 1 reply; 5+ messages in thread
From: Dave Hansen @ 2004-08-31 16:30 UTC (permalink / raw)
  To: Hiroyuki KAMEZAWA; +Cc: Linux Kernel ML, linux-mm, lhms

On Tue, 2004-08-31 at 03:41, Hiroyuki KAMEZAWA wrote:
> +static void __init calculate_aligned_end(struct zone *zone,
> +					 unsigned long start_pfn,
> +					 int nr_pages)
...
> +		end_address = (zone->zone_start_pfn + end_idx) << PAGE_SHIFT;
> +#ifndef CONFIG_DISCONTIGMEM
> +		reserve_bootmem(end_address,PAGE_SIZE);
> +#else
> +		reserve_bootmem_node(zone->zone_pgdat,end_address,PAGE_SIZE);
> +#endif
> +	}
> +	return;
> +}

What if someone has already reserved that address?  You might not be
able to grow the zone, right?

>   /*
>    * Initially all pages are reserved - free ones are freed
> @@ -1510,7 +1574,9 @@ void __init memmap_init_zone(unsigned lo
>   {
>   	struct page *start = pfn_to_page(start_pfn);
>   	struct page *page;
> -
> +	unsigned long saved_start_pfn = start_pfn;
> +	struct zone *zonep = zone_table[NODEZONE(nid, zone)];

If you're going to calculate NODEZONE() twice, you might as well just
move it into its own variable.  

> +	/* Because memmap_init_zone() is called in suitable way
> +	 * even if zone has memory holes,
> +	 * calling calculate_aligned_end(zone) here is reasonable
> +	 */
> +	calculate_aligned_end(zonep, saved_start_pfn, size);

Could you please elaborate on "suitable way".  That comment really
doesn't say anything.

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Lhms-devel] Re: [RFC] buddy allocator without bitmap(2) [1/3]
  2004-08-31 16:30 ` Dave Hansen
@ 2004-08-31 22:55   ` Hiroyuki KAMEZAWA
  2004-08-31 23:12     ` Dave Hansen
  0 siblings, 1 reply; 5+ messages in thread
From: Hiroyuki KAMEZAWA @ 2004-08-31 22:55 UTC (permalink / raw)
  To: Dave Hansen; +Cc: Linux Kernel ML, linux-mm, lhms

Dave Hansen wrote:

> On Tue, 2004-08-31 at 03:41, Hiroyuki KAMEZAWA wrote:
> 
>>+static void __init calculate_aligned_end(struct zone *zone,
>>+					 unsigned long start_pfn,
>>+					 int nr_pages)
> 
> ...
> 
>>+		end_address = (zone->zone_start_pfn + end_idx) << PAGE_SHIFT;
>>+#ifndef CONFIG_DISCONTIGMEM
>>+		reserve_bootmem(end_address,PAGE_SIZE);
>>+#else
>>+		reserve_bootmem_node(zone->zone_pgdat,end_address,PAGE_SIZE);
>>+#endif
>>+	}
>>+	return;
>>+}
> 
> 
> What if someone has already reserved that address?  You might not be
> able to grow the zone, right?
> 
1) If someone has already reserved that address,  it (the page) will not join to
   buddy allocator and it's no problem.

2) No, I can grow the zone.
   A reserved page is the last page of "not aligned contiguous mem_map", not zone.

I answer your question ?

I know this patch contains some BUG, if a page is allocateed when calculate_alinged_end()
is called, and is freed after calling this, it is never reserved and join to buddy system.

> 
>>+	/* Because memmap_init_zone() is called in suitable way
>>+	 * even if zone has memory holes,
>>+	 * calling calculate_aligned_end(zone) here is reasonable
>>+	 */
>>+	calculate_aligned_end(zonep, saved_start_pfn, size);
> 
> 
> Could you please elaborate on "suitable way".  That comment really
> doesn't say anything. 
I'll rewrite this.
/*
 *  calculate_aligned_end() has to be called by each contiguous mem_map.
 */




-- 
--the clue is these footmarks leading to the door.--
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Lhms-devel] Re: [RFC] buddy allocator without bitmap(2) [1/3]
  2004-08-31 22:55   ` [Lhms-devel] " Hiroyuki KAMEZAWA
@ 2004-08-31 23:12     ` Dave Hansen
  2004-08-31 23:36       ` Hiroyuki KAMEZAWA
  0 siblings, 1 reply; 5+ messages in thread
From: Dave Hansen @ 2004-08-31 23:12 UTC (permalink / raw)
  To: Hiroyuki KAMEZAWA; +Cc: Linux Kernel ML, linux-mm, lhms

On Tue, 2004-08-31 at 15:55, Hiroyuki KAMEZAWA wrote:
> Dave Hansen wrote:
> 
> > On Tue, 2004-08-31 at 03:41, Hiroyuki KAMEZAWA wrote:
> > 
> >>+static void __init calculate_aligned_end(struct zone *zone,
> >>+					 unsigned long start_pfn,
> >>+					 int nr_pages)
> > 
> > ...
> > 
> >>+		end_address = (zone->zone_start_pfn + end_idx) << PAGE_SHIFT;
> >>+#ifndef CONFIG_DISCONTIGMEM
> >>+		reserve_bootmem(end_address,PAGE_SIZE);
> >>+#else
> >>+		reserve_bootmem_node(zone->zone_pgdat,end_address,PAGE_SIZE);
> >>+#endif
> >>+	}
> >>+	return;
> >>+}
> > 
> > 
> > What if someone has already reserved that address?  You might not be
> > able to grow the zone, right?
> > 
> 1) If someone has already reserved that address,  it (the page) will not join to
>    buddy allocator and it's no problem.
> 
> 2) No, I can grow the zone.
>    A reserved page is the last page of "not aligned contiguous mem_map", not zone.
> 
> I answer your question ?

If the end of the zone isn't aligned, you simply waste memory until it becomes aligned, right?

> I know this patch contains some BUG, if a page is allocateed when calculate_alinged_end()
> is called, and is freed after calling this, it is never reserved and join to buddy system.

If you adjust the zone_spanned pages properly, this shouldn't happen.

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [Lhms-devel] Re: [RFC] buddy allocator without bitmap(2) [1/3]
  2004-08-31 23:12     ` Dave Hansen
@ 2004-08-31 23:36       ` Hiroyuki KAMEZAWA
  0 siblings, 0 replies; 5+ messages in thread
From: Hiroyuki KAMEZAWA @ 2004-08-31 23:36 UTC (permalink / raw)
  To: Dave Hansen; +Cc: Linux Kernel ML, linux-mm, lhms

Dave Hansen wrote:

> On Tue, 2004-08-31 at 15:55, Hiroyuki KAMEZAWA wrote:
> 
>>Dave Hansen wrote:
>>
>>
>>>On Tue, 2004-08-31 at 03:41, Hiroyuki KAMEZAWA wrote:
>>>
>>>
>>>>+static void __init calculate_aligned_end(struct zone *zone,
>>>>+					 unsigned long start_pfn,
>>>>+					 int nr_pages)
>>>
>>>...
>>>
>>>
>>>>+		end_address = (zone->zone_start_pfn + end_idx) << PAGE_SHIFT;
>>>>+#ifndef CONFIG_DISCONTIGMEM
>>>>+		reserve_bootmem(end_address,PAGE_SIZE);
>>>>+#else
>>>>+		reserve_bootmem_node(zone->zone_pgdat,end_address,PAGE_SIZE);
>>>>+#endif
>>>>+	}
>>>>+	return;
>>>>+}
>>>
>>>
>>>What if someone has already reserved that address?  You might not be
>>>able to grow the zone, right?
>>>
>>
>>1) If someone has already reserved that address,  it (the page) will not join to
>>   buddy allocator and it's no problem.
>>
>>2) No, I can grow the zone.
>>   A reserved page is the last page of "not aligned contiguous mem_map", not zone.
>>
>>I answer your question ?
> 
> 
> If the end of the zone isn't aligned, you simply waste memory until it becomes aligned, right?
> 
No. I waste just one page, the end page of mem_map.
When the end of mem_map is not aligned, there are 2 cases.

case 1) length of mem_map is even number.
 -------------------------------
 |  |  |  |  |C |  |B |  |A | X|  no-page-area    order=0
 -------------------------------
 |     |     |C    |B    |                        order=1
 -------------------------
 |           |C          |                        order=2
 -------------------------
X is reserved and will not join to buddy system.
By doing this,
page "A" has no boddy in order=0, "X" is reserved.
page "B" has no buddy in order=1, "A" is order 0.
page "C" has no buddy in order=2, "A" is order 0.
..........

case 2) length of mem_map is odd number.
-----------------------------
 |  |  |  |  |C |  |B |  |X |    no-page-area    order=0
 ----------------------------
 |     |     |C    |B    |                       order=1
 -------------------------
 |           |C          |                       order=2
 -------------------------
page "B" has no buddy in order=1, X is reserved.
.........

Access to no-page-area in buddy system does not occur.

-- Kame

-- 
--the clue is these footmarks leading to the door.--
KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2004-08-31 23:31 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2004-08-31 10:41 [RFC] buddy allocator without bitmap(2) [1/3] Hiroyuki KAMEZAWA
2004-08-31 16:30 ` Dave Hansen
2004-08-31 22:55   ` [Lhms-devel] " Hiroyuki KAMEZAWA
2004-08-31 23:12     ` Dave Hansen
2004-08-31 23:36       ` Hiroyuki KAMEZAWA

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox