linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH]: Clean up of __alloc_pages
@ 2005-10-01 19:00 Seth, Rohit
  2005-10-02  3:09 ` Nick Piggin
  2005-10-03 15:34 ` Christoph Lameter
  0 siblings, 2 replies; 45+ messages in thread
From: Seth, Rohit @ 2005-10-01 19:00 UTC (permalink / raw)
  To: akpm; +Cc: linux-mm, linux-kernel

	[PATCH]: Below is the cleaning up of __alloc_pages code.  Few 
		 things different from original version are

	1: remove the initial direct reclaim logic 
	2: order zero pages are now first looked into pcp list upfront
	3: GFP_HIGH pages are allowed to go little below low watermark sooner
	4: Search for free pages unconditionally after direct reclaim

	Signed-off-by: Rohit Seth <rohit.seth@intel.com>


--- linux-2.6.14-rc2-mm1.org/mm/page_alloc.c	2005-09-27 10:03:51.000000000 -0700
+++ linux-2.6.14-rc2-mm1/mm/page_alloc.c	2005-10-01 10:40:06.000000000 -0700
@@ -722,7 +722,8 @@
  * or two.
  */
 static struct page *
-buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags)
+buffered_rmqueue(struct zone *zone, int order, unsigned int __nocast gfp_flags,
+			int replenish)
 {
 	unsigned long flags;
 	struct page *page = NULL;
@@ -733,7 +734,7 @@
 
 		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 		local_irq_save(flags);
-		if (pcp->count <= pcp->low)
+		if ((pcp->count <= pcp->low) && replenish)
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
 		if (pcp->count) {
@@ -743,9 +744,7 @@
 		}
 		local_irq_restore(flags);
 		put_cpu();
-	}
-
-	if (page == NULL) {
+	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
 		spin_unlock_irqrestore(&zone->lock, flags);
@@ -858,6 +857,44 @@
 }
 #endif /* CONFIG_PAGE_OWNER */
 
+/* This function get_page_from_freeliest loops through all the possible zones
+ * to find out if it can allocate a page.  can_try_harder can have following 
+ * values:
+ * -1 => No need to check for the watermarks.
+ *  0 => Don't go too low down in deeps below the low watermark (GFP_HIGH)
+ *  1 => Go far below the low watermark.  See zone_watermark_ok (RT TASK)
+ */
+
+static inline struct page *
+get_page_from_freelist(unsigned int __nocast gfp_mask, unsigned int order, 
+			struct zone **zones, int can_try_harder)
+{
+	struct zone *z;
+	struct page *page = NULL;
+	int classzone_idx = zone_idx(zones[0]);
+	int i;
+
+	/*
+	 * Go through the zonelist once, looking for a zone with enough free.
+	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+	 */
+	for (i = 0; (z = zones[i]) != NULL; i++) {
+		if (!cpuset_zone_allowed(z, gfp_mask))
+			continue;
+
+		if ((can_try_harder >= 0) && 
+			(!zone_watermark_ok(z, order, z->pages_low,
+				       classzone_idx, can_try_harder, 
+				       gfp_mask & __GFP_HIGH)))
+			continue;
+
+		page = buffered_rmqueue(z, order, gfp_mask, 1);
+		if (page) 
+			break;
+	}
+	return page;
+}
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
@@ -866,15 +903,13 @@
 		struct zonelist *zonelist)
 {
 	const int wait = gfp_mask & __GFP_WAIT;
-	struct zone **zones, *z;
+	struct zone **zones, *z = NULL;
 	struct page *page;
 	struct reclaim_state reclaim_state;
 	struct task_struct *p = current;
 	int i;
-	int classzone_idx;
 	int do_retry;
 	int can_try_harder;
-	int did_some_progress;
 
 	might_sleep_if(wait);
 
@@ -892,45 +927,23 @@
 		return NULL;
 	}
 
-	classzone_idx = zone_idx(zones[0]);
-
-restart:
-	/*
-	 * Go through the zonelist once, looking for a zone with enough free.
-	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+	/* First check if we can easily take a page from any pcp list
+	 * for 0 order pages.  Don't replenish any pcp list at this time.
 	 */
-	for (i = 0; (z = zones[i]) != NULL; i++) {
-		int do_reclaim = should_reclaim_zone(z, gfp_mask);
-
-		if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-			continue;
-
-		/*
-		 * If the zone is to attempt early page reclaim then this loop
-		 * will try to reclaim pages and check the watermark a second
-		 * time before giving up and falling back to the next zone.
-		 */
-zone_reclaim_retry:
-		if (!zone_watermark_ok(z, order, z->pages_low,
-				       classzone_idx, 0, 0)) {
-			if (!do_reclaim)
-				continue;
-			else {
-				zone_reclaim(z, gfp_mask, order);
-				/* Only try reclaim once */
-				do_reclaim = 0;
-				goto zone_reclaim_retry;
-			}
+	if (order == 0) {
+		for (i = 0; (z = zones[i]) != NULL; i++) {
+			page = buffered_rmqueue(z, 0, gfp_mask, 0);
+			if (page) 
+				goto got_pg;
 		}
-
-		page = buffered_rmqueue(z, order, gfp_mask);
-		if (page)
-			goto got_pg;
 	}
+restart:
+	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, zones, 0);
+	if (page)
+		goto got_pg;
 
 	for (i = 0; (z = zones[i]) != NULL; i++)
 		wakeup_kswapd(z, order);
-
 	/*
 	 * Go through the zonelist again. Let __GFP_HIGH and allocations
 	 * coming from realtime tasks to go deeper into reserves
@@ -939,19 +952,12 @@
 	 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
 	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
 	 */
-	for (i = 0; (z = zones[i]) != NULL; i++) {
-		if (!zone_watermark_ok(z, order, z->pages_min,
-				       classzone_idx, can_try_harder,
-				       gfp_mask & __GFP_HIGH))
-			continue;
 
-		if (wait && !cpuset_zone_allowed(z, gfp_mask))
-			continue;
-
-		page = buffered_rmqueue(z, order, gfp_mask);
-		if (page)
-			goto got_pg;
-	}
+	if (!wait)
+		page = get_page_from_freelist(gfp_mask, order, zones, 
+						can_try_harder);
+	if (page)
+		goto got_pg;
 
 	/* This allocation should allow future memory freeing. */
 
@@ -959,13 +965,9 @@
 			&& !in_interrupt()) {
 		if (!(gfp_mask & __GFP_NOMEMALLOC)) {
 			/* go through the zonelist yet again, ignoring mins */
-			for (i = 0; (z = zones[i]) != NULL; i++) {
-				if (!cpuset_zone_allowed(z, gfp_mask))
-					continue;
-				page = buffered_rmqueue(z, order, gfp_mask);
-				if (page)
-					goto got_pg;
-			}
+			page = get_page_from_freelist(gfp_mask, order, zones, -1);
+			if (page)
+				goto got_pg;
 		}
 		goto nopage;
 	}
@@ -982,47 +984,21 @@
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zones, gfp_mask);
+	i = try_to_free_pages(zones, gfp_mask);
 
 	p->reclaim_state = NULL;
 	p->flags &= ~PF_MEMALLOC;
 
 	cond_resched();
 
-	if (likely(did_some_progress)) {
-		for (i = 0; (z = zones[i]) != NULL; i++) {
-			if (!zone_watermark_ok(z, order, z->pages_min,
-					       classzone_idx, can_try_harder,
-					       gfp_mask & __GFP_HIGH))
-				continue;
-
-			if (!cpuset_zone_allowed(z, gfp_mask))
-				continue;
-
-			page = buffered_rmqueue(z, order, gfp_mask);
-			if (page)
-				goto got_pg;
-		}
-	} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
+	page = get_page_from_freelist(gfp_mask, order, zones,
+					can_try_harder);
+	if (page)
+		goto got_pg;
+	if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
 		/*
-		 * Go through the zonelist yet one more time, keep
-		 * very high watermark here, this is only to catch
-		 * a parallel oom killing, we must fail if we're still
-		 * under heavy pressure.
+		 * Start the OOM for this scenario.
 		 */
-		for (i = 0; (z = zones[i]) != NULL; i++) {
-			if (!zone_watermark_ok(z, order, z->pages_high,
-					       classzone_idx, 0, 0))
-				continue;
-
-			if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-				continue;
-
-			page = buffered_rmqueue(z, order, gfp_mask);
-			if (page)
-				goto got_pg;
-		}
-
 		out_of_memory(gfp_mask, order);
 		goto restart;
 	}
@@ -1060,7 +1036,7 @@
 #ifdef CONFIG_PAGE_OWNER
 	set_page_owner(page, order, gfp_mask);
 #endif
-	zone_statistics(zonelist, z);
+	zone_statistics(zonelist, page_zone(page));
 	return page;
 }
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread
* [PATCH]: Clean up of __alloc_pages
@ 2005-10-29  1:33 Rohit, Seth
  2005-10-29  2:33 ` Nick Piggin
                   ` (2 more replies)
  0 siblings, 3 replies; 45+ messages in thread
From: Rohit, Seth @ 2005-10-29  1:33 UTC (permalink / raw)
  To: akpm; +Cc: linux-mm, linux-kernel

the only changes in this clean up are:

	1- remove the initial direct reclaim logic
	2- GFP_HIGH pages are allowed to go little below low watermark sooner
	3- Search for free pages unconditionally after direct reclaim

I've not added the logic of looking into PCPs first in this rev of patch.  I will send a
seperate patch for adding that support (needing extra logic for NUMA).

	Signed-off-by: Rohit Seth <rohit.seth@intel.com>


diff -Naru linux-2.6.14.org/mm/page_alloc.c linux-2.6.14/mm/page_alloc.c
--- linux-2.6.14.org/mm/page_alloc.c	2005-10-27 17:02:08.000000000 -0700
+++ linux-2.6.14/mm/page_alloc.c	2005-10-28 10:11:39.000000000 -0700
@@ -685,8 +685,8 @@
  * we cheat by calling it from here, in the order > 0 path.  Saves a branch
  * or two.
  */
-static struct page *
-buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
+static inline struct page *
+buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags, int replenish)
 {
 	unsigned long flags;
 	struct page *page = NULL;
@@ -697,7 +697,7 @@
 
 		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 		local_irq_save(flags);
-		if (pcp->count <= pcp->low)
+		if ((pcp->count <= pcp->low) && replenish)
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
 		if (pcp->count) {
@@ -707,9 +707,7 @@
 		}
 		local_irq_restore(flags);
 		put_cpu();
-	}
-
-	if (page == NULL) {
+	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
 		spin_unlock_irqrestore(&zone->lock, flags);
@@ -770,6 +768,44 @@
 	return 1;
 }
 
+/* get_page_from_freeliest loops through all the possible zones
+ * to find out if it can allocate a page.  can_try_harder can have following 
+ * values:
+ * -1 => No need to check for the watermarks.
+ *  0 => Don't go too low down in deeps below the low watermark (GFP_HIGH)
+ *  1 => Go far below the low watermark.  See zone_watermark_ok (RT TASK)
+ */
+
+static struct page *
+get_page_from_freelist(unsigned int __nocast gfp_mask, unsigned int order, 
+			struct zone **zones, int can_try_harder)
+{
+	struct zone *z;
+	struct page *page = NULL;
+	int classzone_idx = zone_idx(zones[0]);
+	int i;
+
+	/*
+	 * Go through the zonelist once, looking for a zone with enough free.
+	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+	 */
+	for (i = 0; (z = zones[i]) != NULL; i++) {
+		if (!cpuset_zone_allowed(z, gfp_mask))
+			continue;
+
+		if ((can_try_harder >= 0) && 
+			(!zone_watermark_ok(z, order, z->pages_low,
+				       classzone_idx, can_try_harder, 
+				       gfp_mask & __GFP_HIGH)))
+			continue;
+
+		page = buffered_rmqueue(z, order, gfp_mask, 1);
+		if (page) 
+			break;
+	}
+	return page;
+}
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
@@ -778,15 +814,13 @@
 		struct zonelist *zonelist)
 {
 	const int wait = gfp_mask & __GFP_WAIT;
-	struct zone **zones, *z;
+	struct zone **zones, *z = NULL;
 	struct page *page;
 	struct reclaim_state reclaim_state;
 	struct task_struct *p = current;
 	int i;
-	int classzone_idx;
 	int do_retry;
 	int can_try_harder;
-	int did_some_progress;
 
 	might_sleep_if(wait);
 
@@ -803,42 +837,10 @@
 		/* Should this ever happen?? */
 		return NULL;
 	}
-
-	classzone_idx = zone_idx(zones[0]);
-
 restart:
-	/*
-	 * Go through the zonelist once, looking for a zone with enough free.
-	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
-	 */
-	for (i = 0; (z = zones[i]) != NULL; i++) {
-		int do_reclaim = should_reclaim_zone(z, gfp_mask);
-
-		if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-			continue;
-
-		/*
-		 * If the zone is to attempt early page reclaim then this loop
-		 * will try to reclaim pages and check the watermark a second
-		 * time before giving up and falling back to the next zone.
-		 */
-zone_reclaim_retry:
-		if (!zone_watermark_ok(z, order, z->pages_low,
-				       classzone_idx, 0, 0)) {
-			if (!do_reclaim)
-				continue;
-			else {
-				zone_reclaim(z, gfp_mask, order);
-				/* Only try reclaim once */
-				do_reclaim = 0;
-				goto zone_reclaim_retry;
-			}
-		}
-
-		page = buffered_rmqueue(z, order, gfp_mask);
-		if (page)
-			goto got_pg;
-	}
+	page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, zones, 0);
+	if (page)
+		goto got_pg;
 
 	for (i = 0; (z = zones[i]) != NULL; i++)
 		wakeup_kswapd(z, order);
@@ -851,19 +853,11 @@
 	 * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
 	 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
 	 */
-	for (i = 0; (z = zones[i]) != NULL; i++) {
-		if (!zone_watermark_ok(z, order, z->pages_min,
-				       classzone_idx, can_try_harder,
-				       gfp_mask & __GFP_HIGH))
-			continue;
-
-		if (wait && !cpuset_zone_allowed(z, gfp_mask))
-			continue;
-
-		page = buffered_rmqueue(z, order, gfp_mask);
-		if (page)
-			goto got_pg;
-	}
+	if (!wait)
+		page = get_page_from_freelist(gfp_mask, order, zones, 
+						can_try_harder);
+	if (page)
+		goto got_pg;
 
 	/* This allocation should allow future memory freeing. */
 
@@ -871,13 +865,9 @@
 			&& !in_interrupt()) {
 		if (!(gfp_mask & __GFP_NOMEMALLOC)) {
 			/* go through the zonelist yet again, ignoring mins */
-			for (i = 0; (z = zones[i]) != NULL; i++) {
-				if (!cpuset_zone_allowed(z, gfp_mask))
-					continue;
-				page = buffered_rmqueue(z, order, gfp_mask);
-				if (page)
-					goto got_pg;
-			}
+			page = get_page_from_freelist(gfp_mask, order, zones, -1);
+			if (page)
+				goto got_pg;
 		}
 		goto nopage;
 	}
@@ -894,47 +884,20 @@
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zones, gfp_mask);
+	i = try_to_free_pages(zones, gfp_mask);
 
 	p->reclaim_state = NULL;
 	p->flags &= ~PF_MEMALLOC;
 
 	cond_resched();
 
-	if (likely(did_some_progress)) {
-		for (i = 0; (z = zones[i]) != NULL; i++) {
-			if (!zone_watermark_ok(z, order, z->pages_min,
-					       classzone_idx, can_try_harder,
-					       gfp_mask & __GFP_HIGH))
-				continue;
-
-			if (!cpuset_zone_allowed(z, gfp_mask))
-				continue;
-
-			page = buffered_rmqueue(z, order, gfp_mask);
-			if (page)
-				goto got_pg;
-		}
-	} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
+	page = get_page_from_freelist(gfp_mask, order, zones, can_try_harder);
+	if (page)
+		goto got_pg;
+	if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
 		/*
-		 * Go through the zonelist yet one more time, keep
-		 * very high watermark here, this is only to catch
-		 * a parallel oom killing, we must fail if we're still
-		 * under heavy pressure.
+		 * Start the OOM here.
 		 */
-		for (i = 0; (z = zones[i]) != NULL; i++) {
-			if (!zone_watermark_ok(z, order, z->pages_high,
-					       classzone_idx, 0, 0))
-				continue;
-
-			if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
-				continue;
-
-			page = buffered_rmqueue(z, order, gfp_mask);
-			if (page)
-				goto got_pg;
-		}
-
 		out_of_memory(gfp_mask, order);
 		goto restart;
 	}
@@ -968,7 +931,7 @@
 	}
 	return NULL;
 got_pg:
-	zone_statistics(zonelist, z);
+	zone_statistics(zonelist, page_zone(page));
 	return page;
 }
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread
* RE: [PATCH]: Clean up of __alloc_pages
@ 2005-11-05  1:57 Seth, Rohit
  0 siblings, 0 replies; 45+ messages in thread
From: Seth, Rohit @ 2005-11-05  1:57 UTC (permalink / raw)
  To: Nick Piggin; +Cc: akpm, linux-mm, linux-kernel

From: Nick Piggin [mailto:nickpiggin@yahoo.com.au] 


>I really don't want a change of behaviour going in with this,
>especially not one which I would want to revert anyway. But
>don't get hung up with it - when you post your latest patch
>I will make a patch for the changes I would like to see for it
>and synch things up.


Well, those reverts are what I'm trying to avoid ;-)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 45+ messages in thread

end of thread, other threads:[~2005-11-07 14:41 UTC | newest]

Thread overview: 45+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-10-01 19:00 [PATCH]: Clean up of __alloc_pages Seth, Rohit
2005-10-02  3:09 ` Nick Piggin
2005-10-03 16:50   ` Rohit Seth
2005-10-03 15:34 ` Christoph Lameter
2005-10-03 16:55   ` Rohit Seth
2005-10-03 16:57     ` Christoph Lameter
2005-10-03 17:48       ` Rohit Seth
2005-10-04 13:27         ` Andi Kleen
2005-10-04 16:26           ` Ray Bryant
2005-10-04 16:10             ` Martin J. Bligh
2005-10-04 17:02               ` Ray Bryant
2005-10-29  1:33 Rohit, Seth
2005-10-29  2:33 ` Nick Piggin
2005-10-31 20:55   ` Rohit Seth
2005-11-01  1:14     ` Nick Piggin
2005-11-04 18:15       ` Rohit Seth
2005-11-05  0:00         ` Nick Piggin
2005-10-30  0:16 ` Paul Jackson
2005-10-31 19:09   ` Rohit Seth
2005-11-05 17:09   ` Andi Kleen
2005-11-06  4:18     ` Paul Jackson
2005-11-06 17:35       ` Andi Kleen
2005-11-06 20:49         ` Paul Jackson
2005-11-07  2:57           ` Nick Piggin
2005-11-07  3:42             ` Andi Kleen
2005-11-07  4:37               ` Paul Jackson
2005-11-07  6:08                 ` Nick Piggin
2005-11-07  9:46                   ` Paul Jackson
2005-11-07 10:17                     ` Nick Piggin
2005-11-07 14:41                       ` Paul Jackson
2005-11-07  3:44             ` Paul Jackson
2005-10-30  1:47 ` Paul Jackson
2005-10-30  2:01   ` Nick Piggin
2005-10-30  2:19     ` Paul Jackson
2005-10-30  2:32       ` Nick Piggin
2005-10-30  3:06         ` Paul Jackson
2005-10-30  3:53           ` Nick Piggin
2005-10-30  2:26     ` Paul Jackson
2005-10-30  2:36       ` Nick Piggin
2005-10-30  3:09         ` Paul Jackson
2005-10-30  3:55           ` Nick Piggin
2005-10-30  4:11             ` Paul Jackson
2005-10-31 21:20   ` Rohit Seth
2005-10-31 21:28     ` Paul Jackson
2005-11-05  1:57 Seth, Rohit

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox