alloc_pages_bulk - Martin J. Bligh

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: "Martin J. Bligh" <Martin.Bligh@us.ibm.com>
To: Andrew Morton <akpm@zip.com.au>
Cc: Bill Irwin <wli@holomorphy.com>, linux-mm@kvack.org
Subject: alloc_pages_bulk
Date: Mon, 22 Jul 2002 11:40:48 -0700	[thread overview]
Message-ID: <1615040000.1027363248@flay> (raw)

Below is a first cut at a bulk page allocator. This has no testing whatsoever,
not even being compiled ... I just want to get some feedback on the approach,
so if I get slapped, I'm less far down the path that I have to back out of.
The __alloc_pages cleanup is also tacked on the end because I'm lazy at
creating diff trees - sorry ;-)

Comments, opinions, abuse?

M.

diff -urN virgin-2.5.25/include/linux/gfp.h 2.5.25-A04-alloc_pages_bulk/include/linux/gfp.h
--- virgin-2.5.25/include/linux/gfp.h	Fri Jul  5 16:42:19 2002
+++ 2.5.25-A04-alloc_pages_bulk/include/linux/gfp.h	Mon Jul 22 11:27:49 2002
@@ -45,12 +45,23 @@
 
 static inline struct page * alloc_pages(unsigned int gfp_mask, unsigned int order)
 {
+	struct page *page;
 	/*
 	 * Gets optimized away by the compiler.
 	 */
 	if (order >= MAX_ORDER)
 		return NULL;
-	return _alloc_pages(gfp_mask, order);
+	return _alloc_pages(gfp_mask, order, 1, &page);
+}
+
+static inline struct page * alloc_pages_bulk(unsigned int gfp_mask, unsigned int order, unsigned long count, struct page **pages)
+{
+	/*
+	 * Gets optimized away by the compiler.
+	 */
+	if (order >= MAX_ORDER)
+		return NULL;
+	return _alloc_pages(gfp_mask, order, count, pages);
 }
 
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
diff -urN virgin-2.5.25/mm/numa.c 2.5.25-A04-alloc_pages_bulk/mm/numa.c
--- virgin-2.5.25/mm/numa.c	Fri Jul  5 16:42:20 2002
+++ 2.5.25-A04-alloc_pages_bulk/mm/numa.c	Mon Jul 22 11:14:53 2002
@@ -31,32 +31,29 @@
 
 #endif /* !CONFIG_DISCONTIGMEM */
 
-struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order)
+struct page * alloc_pages_node(int nid, unsigned int gfp_mask, 
+		unsigned int order)
 {
-#ifdef CONFIG_NUMA
-	return __alloc_pages(gfp_mask, order, NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
-#else
-	return alloc_pages(gfp_mask, order);
-#endif
+	struct page *page;
+
+	return __alloc_pages(gfp_mask, order, 
+		NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK)
+		1, &page);
 }
 
 #ifdef CONFIG_DISCONTIGMEM
 
 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
 
-static spinlock_t node_lock = SPIN_LOCK_UNLOCKED;
-
 void show_free_areas_node(pg_data_t *pgdat)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&node_lock, flags);
 	show_free_areas_core(pgdat);
-	spin_unlock_irqrestore(&node_lock, flags);
 }
 
 /*
- * Nodes can be initialized parallely, in no particular order.
+ * Nodes can be initialized in parallel, in no particular order.
  */
 void __init free_area_init_node(int nid, pg_data_t *pgdat, struct page *pmap,
 	unsigned long *zones_size, unsigned long zone_start_paddr, 
@@ -82,49 +79,69 @@
 	memset(pgdat->valid_addr_bitmap, 0, size);
 }
 
-static struct page * alloc_pages_pgdat(pg_data_t *pgdat, unsigned int gfp_mask,
-	unsigned int order)
+/* 
+ * Walk the global list of pg_data_t's starting at specified start point 
+ */
+static inline unsigned long _alloc_pages_pgdat_walk(pg_data_t *start,
+		unsigned int gfp_mask, unsigned int order, 
+		unsigned long count, struct page **pages)
 {
-	return __alloc_pages(gfp_mask, order, pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK));
+	unsigned long page_count = 0;
+	pg_data_t *pgdat = start;
+	
+	/* walk the second part of the list from start to the list tail */
+	for (pgdat = start; pgdat != NULL; pgdat = pgdat->node_next) {
+		if ((page_count = __alloc_pages(pgdat, gfp_mask, order, 
+						pgdat->node_zonelists + 
+						(gfp_mask & GFP_ZONEMASK)
+						count, pages)))
+			return(page_count);
+	}
+	/* walk the first part of the list from the list head to start */
+	for (pgdat = pgdat_list; pgdat != start; pgdat = pgdat->node_next) {
+		if ((page_count = __alloc_pages(pgdat, gfp_mask, order, 
+						pgdat->node_zonelists + 
+						(gfp_mask & GFP_ZONEMASK)
+						count, pages)))
+			return(page_count);
+	}
+	return(0);
 }
 
+#ifdef CONFIG_NUMA
+
 /*
  * This can be refined. Currently, tries to do round robin, instead
  * should do concentratic circle search, starting from current node.
  */
-struct page * _alloc_pages(unsigned int gfp_mask, unsigned int order)
+unsigned long _alloc_pages(unsigned int gfp_mask, unsigned int order,
+		unsigned long count, struct page **pages)
+{
+	/* start at the current node, then do round robin */	
+	return _alloc_pages_pgdat_walk(NODE_DATA(numa_node_id()), 
+		gfp_mask, order, count, pages);
+}
+
+#else /* !CONFIG_NUMA */
+
+unsigned long _alloc_pages(unsigned int gfp_mask, unsigned int order,
+		unsigned long count, struct page **pages)
 {
-	struct page *ret = 0;
-	pg_data_t *start, *temp;
-#ifndef CONFIG_NUMA
 	unsigned long flags;
 	static pg_data_t *next = 0;
-#endif
+	pgdat_t *temp;
 
-	if (order >= MAX_ORDER)
-		return NULL;
-#ifdef CONFIG_NUMA
-	temp = NODE_DATA(numa_node_id());
-#else
-	spin_lock_irqsave(&node_lock, flags);
+	/* 
+	 * As the next ptr is static, it saves position between calls
+	 * and we round robin between memory segments to balance pressure
+	 */
 	if (!next) next = pgdat_list;
 	temp = next;
 	next = next->node_next;
-	spin_unlock_irqrestore(&node_lock, flags);
-#endif
-	start = temp;
-	while (temp) {
-		if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
-			return(ret);
-		temp = temp->node_next;
-	}
-	temp = pgdat_list;
-	while (temp != start) {
-		if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
-			return(ret);
-		temp = temp->node_next;
-	}
-	return(0);
+
+	return _alloc_pages_pgdat_walk(temp, gfp_mask, order, count, pages);
 }
+
+#endif /* CONFIG_NUMA */
 
 #endif /* CONFIG_DISCONTIGMEM */
diff -urN virgin-2.5.25/mm/page_alloc.c 2.5.25-A04-alloc_pages_bulk/mm/page_alloc.c
--- virgin-2.5.25/mm/page_alloc.c	Fri Jul  5 16:42:03 2002
+++ 2.5.25-A04-alloc_pages_bulk/mm/page_alloc.c	Mon Jul 22 11:15:00 2002
@@ -187,16 +187,13 @@
 	set_page_count(page, 1);
 }
 
-static FASTCALL(struct page * rmqueue(zone_t *zone, unsigned int order));
-static struct page * rmqueue(zone_t *zone, unsigned int order)
+static inline struct page * __rmqueue(zone_t *zone, unsigned int order)
 {
 	free_area_t * area = zone->free_area + order;
 	unsigned int curr_order = order;
 	struct list_head *head, *curr;
-	unsigned long flags;
 	struct page *page;
 
-	spin_lock_irqsave(&zone->lock, flags);
 	do {
 		head = &area->free_list;
 		curr = head->next;
@@ -213,21 +210,40 @@
 			zone->free_pages -= 1UL << order;
 
 			page = expand(zone, page, index, order, curr_order, area);
-			spin_unlock_irqrestore(&zone->lock, flags);
-
-			if (bad_range(zone, page))
-				BUG();
-			prep_new_page(page);
 			return page;	
 		}
 		curr_order++;
 		area++;
 	} while (curr_order < MAX_ORDER);
-	spin_unlock_irqrestore(&zone->lock, flags);
 
 	return NULL;
 }
 
+static FASTCALL(unsigned long rmqueue(zone_t *zone, unsigned int order, 
+			unsigned long count, struct page *pages[]));
+static unsigned long rmqueue(zone_t *zone, unsigned int order, 
+		unsigned long count, struct page **pages)
+{
+	unsigned long flags;
+	int i, allocated = 0;
+
+	spin_lock_irqsave(&zone->lock, flags);
+	for (i = 0; i < count; ++i) {
+		pages[i] = __rmqueue(zone, order);
+		if (pages[i] == NULL)
+			break;
+		++allocated;
+	}
+	spin_unlock_irqrestore(&zone->lock, flags);
+	
+	for (i = 0; i < allocated; ++i) {
+		if (bad_range(zone, pages[i]))
+			BUG();
+		prep_new_page(pages[i]);
+	}
+	return allocated;	
+}
+
 #ifdef CONFIG_SOFTWARE_SUSPEND
 int is_head_of_free_region(struct page *page)
 {
@@ -253,10 +269,12 @@
 #endif /* CONFIG_SOFTWARE_SUSPEND */
 
 #ifndef CONFIG_DISCONTIGMEM
-struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
+unsigned long _alloc_pages(unsigned int gfp_mask, unsigned int order,
+		unsigned long count, struct page **pages)
 {
 	return __alloc_pages(gfp_mask, order,
-		contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
+		contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK)
+		count, pages);
 }
 #endif
 
@@ -316,26 +334,27 @@
 /*
  * This is the 'heart' of the zoned buddy allocator:
  */
-struct page * __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)
+unsigned long __alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist, unsigned long count, struct page **pages)
 {
 	unsigned long min;
-	zone_t **zone, * classzone;
+	zone_t **zones, * classzone;
 	struct page * page;
-	int freed;
+	int freed, i;
 
-	zone = zonelist->zones;
-	classzone = *zone;
-	if (classzone == NULL)
+	zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
+	classzone = zones[0]; 
+	if (classzone == NULL)    /* no zones in the zonelist */
 		return NULL;
-	min = 1UL << order;
-	for (;;) {
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
 
+	/* Go through the zonelist once, looking for a zone with enough free */
+	min = count << order;
+	for (i = 0; zones[i] != NULL; i++) {
+		zone_t *z = *zones[i];
+
+		/* the incremental min is allegedly to discourage fallback */
 		min += z->pages_low;
 		if (z->free_pages > min) {
-			page = rmqueue(z, order);
+			page = rmqueue(z, order, count, pages);
 			if (page)
 				return page;
 		}
@@ -343,23 +362,22 @@
 
 	classzone->need_balance = 1;
 	mb();
+	/* we're somewhat low on memory, failed to find what we needed */
 	if (waitqueue_active(&kswapd_wait))
 		wake_up_interruptible(&kswapd_wait);
 
-	zone = zonelist->zones;
-	min = 1UL << order;
-	for (;;) {
+	/* Go through the zonelist again, taking __GFP_HIGH into account */
+	min = count << order;
+	for (i = 0; zones[i] != NULL; i++) {
 		unsigned long local_min;
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
+		zone_t *z = *zones[i];
 
 		local_min = z->pages_min;
 		if (gfp_mask & __GFP_HIGH)
 			local_min >>= 2;
 		min += local_min;
 		if (z->free_pages > min) {
-			page = rmqueue(z, order);
+			page = rmqueue(z, order, count, pages);
 			if (page)
 				return page;
 		}
@@ -369,13 +387,11 @@
 
 rebalance:
 	if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) {
-		zone = zonelist->zones;
-		for (;;) {
-			zone_t *z = *(zone++);
-			if (!z)
-				break;
+		/* go through the zonelist yet again, ignoring mins */
+		for (i = 0; zones[i] != NULL; i++) {
+			zone_t *z = *zones[i];
 
-			page = rmqueue(z, order);
+			page = rmqueue(z, order, count, pages);
 			if (page)
 				return page;
 		}
@@ -396,16 +412,14 @@
 	if (page)
 		return page;
 
-	zone = zonelist->zones;
-	min = 1UL << order;
-	for (;;) {
-		zone_t *z = *(zone++);
-		if (!z)
-			break;
+	/* go through the zonelist yet one more time */
+	min = count << order;
+	for (i = 0; zones[i] != NULL; i++) {
+		zone_t *z = *zones[i];
 
 		min += z->pages_min;
 		if (z->free_pages > min) {
-			page = rmqueue(z, order);
+			page = rmqueue(z, order, count, pages);
 			if (page)
 				return page;
 		}

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

next             reply	other threads:[~2002-07-22 18:40 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-07-22 18:40 Martin J. Bligh [this message]
2002-07-22 18:56 ` alloc_pages_bulk Benjamin LaHaise
2002-07-22 20:05   ` alloc_pages_bulk Martin J. Bligh
2002-07-22 20:26 ` alloc_pages_bulk Andrew Morton
2002-07-22 20:35   ` alloc_pages_bulk Martin J. Bligh
2002-07-22 20:58     ` alloc_pages_bulk Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1615040000.1027363248@flay \
    --to=martin.bligh@us.ibm.com \
    --cc=akpm@zip.com.au \
    --cc=linux-mm@kvack.org \
    --cc=wli@holomorphy.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox