From: "Rohit, Seth" <rohit.seth@intel.com>
To: akpm@osdl.org
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org
Subject: [PATCH]: Clean up of __alloc_pages
Date: Fri, 28 Oct 2005 18:33:26 -0700 [thread overview]
Message-ID: <20051028183326.A28611@unix-os.sc.intel.com> (raw)
the only changes in this clean up are:
1- remove the initial direct reclaim logic
2- GFP_HIGH pages are allowed to go little below low watermark sooner
3- Search for free pages unconditionally after direct reclaim
I've not added the logic of looking into PCPs first in this rev of patch. I will send a
seperate patch for adding that support (needing extra logic for NUMA).
Signed-off-by: Rohit Seth <rohit.seth@intel.com>
diff -Naru linux-2.6.14.org/mm/page_alloc.c linux-2.6.14/mm/page_alloc.c
--- linux-2.6.14.org/mm/page_alloc.c 2005-10-27 17:02:08.000000000 -0700
+++ linux-2.6.14/mm/page_alloc.c 2005-10-28 10:11:39.000000000 -0700
@@ -685,8 +685,8 @@
* we cheat by calling it from here, in the order > 0 path. Saves a branch
* or two.
*/
-static struct page *
-buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
+static inline struct page *
+buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags, int replenish)
{
unsigned long flags;
struct page *page = NULL;
@@ -697,7 +697,7 @@
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags);
- if (pcp->count <= pcp->low)
+ if ((pcp->count <= pcp->low) && replenish)
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, &pcp->list);
if (pcp->count) {
@@ -707,9 +707,7 @@
}
local_irq_restore(flags);
put_cpu();
- }
-
- if (page == NULL) {
+ } else {
spin_lock_irqsave(&zone->lock, flags);
page = __rmqueue(zone, order);
spin_unlock_irqrestore(&zone->lock, flags);
@@ -770,6 +768,44 @@
return 1;
}
+/* get_page_from_freeliest loops through all the possible zones
+ * to find out if it can allocate a page. can_try_harder can have following
+ * values:
+ * -1 => No need to check for the watermarks.
+ * 0 => Don't go too low down in deeps below the low watermark (GFP_HIGH)
+ * 1 => Go far below the low watermark. See zone_watermark_ok (RT TASK)
+ */
+
+static struct page *
+get_page_from_freelist(unsigned int __nocast gfp_mask, unsigned int order,
+ struct zone **zones, int can_try_harder)
+{
+ struct zone *z;
+ struct page *page = NULL;
+ int classzone_idx = zone_idx(zones[0]);
+ int i;
+
+ /*
+ * Go through the zonelist once, looking for a zone with enough free.
+ * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+ */
+ for (i = 0; (z = zones[i]) != NULL; i++) {
+ if (!cpuset_zone_allowed(z, gfp_mask))
+ continue;
+
+ if ((can_try_harder >= 0) &&
+ (!zone_watermark_ok(z, order, z->pages_low,
+ classzone_idx, can_try_harder,
+ gfp_mask & __GFP_HIGH)))
+ continue;
+
+ page = buffered_rmqueue(z, order, gfp_mask, 1);
+ if (page)
+ break;
+ }
+ return page;
+}
+
/*
* This is the 'heart' of the zoned buddy allocator.
*/
@@ -778,15 +814,13 @@
struct zonelist *zonelist)
{
const int wait = gfp_mask & __GFP_WAIT;
- struct zone **zones, *z;
+ struct zone **zones, *z = NULL;
struct page *page;
struct reclaim_state reclaim_state;
struct task_struct *p = current;
int i;
- int classzone_idx;
int do_retry;
int can_try_harder;
- int did_some_progress;
might_sleep_if(wait);
@@ -803,42 +837,10 @@
/* Should this ever happen?? */
return NULL;
}
-
- classzone_idx = zone_idx(zones[0]);
-
restart:
- /*
- * Go through the zonelist once, looking for a zone with enough free.
- * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
- */
- for (i = 0; (z = zones[i]) != NULL; i++) {
- int do_reclaim = should_reclaim_zone(z, gfp_mask);
-
- if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
- continue;
-
- /*
- * If the zone is to attempt early page reclaim then this loop
- * will try to reclaim pages and check the watermark a second
- * time before giving up and falling back to the next zone.
- */
-zone_reclaim_retry:
- if (!zone_watermark_ok(z, order, z->pages_low,
- classzone_idx, 0, 0)) {
- if (!do_reclaim)
- continue;
- else {
- zone_reclaim(z, gfp_mask, order);
- /* Only try reclaim once */
- do_reclaim = 0;
- goto zone_reclaim_retry;
- }
- }
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order, zones, 0);
+ if (page)
+ goto got_pg;
for (i = 0; (z = zones[i]) != NULL; i++)
wakeup_kswapd(z, order);
@@ -851,19 +853,11 @@
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!zone_watermark_ok(z, order, z->pages_min,
- classzone_idx, can_try_harder,
- gfp_mask & __GFP_HIGH))
- continue;
-
- if (wait && !cpuset_zone_allowed(z, gfp_mask))
- continue;
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
+ if (!wait)
+ page = get_page_from_freelist(gfp_mask, order, zones,
+ can_try_harder);
+ if (page)
+ goto got_pg;
/* This allocation should allow future memory freeing. */
@@ -871,13 +865,9 @@
&& !in_interrupt()) {
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
/* go through the zonelist yet again, ignoring mins */
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!cpuset_zone_allowed(z, gfp_mask))
- continue;
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
+ page = get_page_from_freelist(gfp_mask, order, zones, -1);
+ if (page)
+ goto got_pg;
}
goto nopage;
}
@@ -894,47 +884,20 @@
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
- did_some_progress = try_to_free_pages(zones, gfp_mask);
+ i = try_to_free_pages(zones, gfp_mask);
p->reclaim_state = NULL;
p->flags &= ~PF_MEMALLOC;
cond_resched();
- if (likely(did_some_progress)) {
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!zone_watermark_ok(z, order, z->pages_min,
- classzone_idx, can_try_harder,
- gfp_mask & __GFP_HIGH))
- continue;
-
- if (!cpuset_zone_allowed(z, gfp_mask))
- continue;
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
- } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
+ page = get_page_from_freelist(gfp_mask, order, zones, can_try_harder);
+ if (page)
+ goto got_pg;
+ if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
/*
- * Go through the zonelist yet one more time, keep
- * very high watermark here, this is only to catch
- * a parallel oom killing, we must fail if we're still
- * under heavy pressure.
+ * Start the OOM here.
*/
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!zone_watermark_ok(z, order, z->pages_high,
- classzone_idx, 0, 0))
- continue;
-
- if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
- continue;
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
-
out_of_memory(gfp_mask, order);
goto restart;
}
@@ -968,7 +931,7 @@
}
return NULL;
got_pg:
- zone_statistics(zonelist, z);
+ zone_statistics(zonelist, page_zone(page));
return page;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next reply other threads:[~2005-10-29 1:33 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-10-29 1:33 Rohit, Seth [this message]
2005-10-29 2:33 ` Nick Piggin
2005-10-31 20:55 ` Rohit Seth
2005-11-01 1:14 ` Nick Piggin
2005-11-04 18:15 ` Rohit Seth
2005-11-05 0:00 ` Nick Piggin
2005-10-30 0:16 ` Paul Jackson
2005-10-31 19:09 ` Rohit Seth
2005-11-05 17:09 ` Andi Kleen
2005-11-06 4:18 ` Paul Jackson
2005-11-06 17:35 ` Andi Kleen
2005-11-06 20:49 ` Paul Jackson
2005-11-07 2:57 ` Nick Piggin
2005-11-07 3:42 ` Andi Kleen
2005-11-07 4:37 ` Paul Jackson
2005-11-07 6:08 ` Nick Piggin
2005-11-07 9:46 ` Paul Jackson
2005-11-07 10:17 ` Nick Piggin
2005-11-07 14:41 ` Paul Jackson
2005-11-07 3:44 ` Paul Jackson
2005-10-30 1:47 ` Paul Jackson
2005-10-30 2:01 ` Nick Piggin
2005-10-30 2:19 ` Paul Jackson
2005-10-30 2:32 ` Nick Piggin
2005-10-30 3:06 ` Paul Jackson
2005-10-30 3:53 ` Nick Piggin
2005-10-30 2:26 ` Paul Jackson
2005-10-30 2:36 ` Nick Piggin
2005-10-30 3:09 ` Paul Jackson
2005-10-30 3:55 ` Nick Piggin
2005-10-30 4:11 ` Paul Jackson
2005-10-31 21:20 ` Rohit Seth
2005-10-31 21:28 ` Paul Jackson
-- strict thread matches above, loose matches on Subject: below --
2005-11-05 1:57 Seth, Rohit
2005-10-01 19:00 Seth, Rohit
2005-10-02 3:09 ` Nick Piggin
2005-10-03 16:50 ` Rohit Seth
2005-10-03 15:34 ` Christoph Lameter
2005-10-03 16:55 ` Rohit Seth
2005-10-03 16:57 ` Christoph Lameter
2005-10-03 17:48 ` Rohit Seth
2005-10-04 13:27 ` Andi Kleen
2005-10-04 16:26 ` Ray Bryant
2005-10-04 16:10 ` Martin J. Bligh
2005-10-04 17:02 ` Ray Bryant
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051028183326.A28611@unix-os.sc.intel.com \
--to=rohit.seth@intel.com \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox