From: Andrew Morton <akpm@osdl.org>
To: Rohit Seth <rohit.seth@intel.com>
Cc: torvalds@osdl.org, linux-mm@kvack.org,
linux-kernel@vger.kernel.org,
Christoph Lameter <christoph@lameter.com>
Subject: Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
Date: Tue, 22 Nov 2005 21:36:12 -0800 [thread overview]
Message-ID: <20051122213612.4adef5d0.akpm@osdl.org> (raw)
In-Reply-To: <20051122161000.A22430@unix-os.sc.intel.com>
Rohit Seth <rohit.seth@intel.com> wrote:
>
> Andrew, Linus,
>
> [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> local pcp lists when a higher order allocation request is not able to
> get serviced from global free_list.
>
> This should help fix some of the earlier failures seen with order 1 allocations.
>
> I will send separate patches for:
>
> 1- Reducing the remote cpus pcp
> 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
>
> +static int
> +reduce_cpu_pcp(void )
> +{
> + struct zone *zone;
> + unsigned long flags;
> + unsigned int cpu = get_cpu();
> + int i, ret=0;
> +
> + local_irq_save(flags);
> + for_each_zone(zone) {
> + struct per_cpu_pageset *pset;
> +
> + pset = zone_pcp(zone, cpu);
> + for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
> + struct per_cpu_pages *pcp;
> +
> + pcp = &pset->pcp[i];
> + if (pcp->count == 0)
> + continue;
> + pcp->count -= free_pages_bulk(zone, pcp->batch,
> + &pcp->list, 0);
> + ret++;
> + }
> + }
> + local_irq_restore(flags);
> + put_cpu();
> + return ret;
> +}
This significantly duplicates the existing drain_local_pages().
>
> + if (order > 0)
> + while (reduce_cpu_pcp()) {
> + if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))
This forgot to assign to local variable `page'! It'll return NULL and will
leak memory.
The `while' loop worries me for some reason, so I wimped out and just tried
the remote drain once.
> + goto got_pg;
> + }
> + /* FIXME: Add the support for reducing/draining the remote pcps.
This is easy enough to do.
I wanted to call the all-CPU drainer `drain_remote_pages' but that's
already taken by some rather poorly-named NUMA thing which also duplicates
most of __drain_pages().
This patch is against a random selection of the enormous number of mm/
patches in -mm. I haven't runtime-tested it yet.
We need to verify that this patch actually does something useful.
include/linux/gfp.h | 2 +
include/linux/suspend.h | 1
mm/page_alloc.c | 85 ++++++++++++++++++++++++++++++++++++------------
3 files changed, 66 insertions(+), 22 deletions(-)
diff -puN include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/gfp.h
--- devel/include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/gfp.h 2005-11-22 21:32:47.000000000 -0800
@@ -109,6 +109,8 @@ static inline struct page *alloc_pages_n
NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
}
+extern int drain_local_pages(void);
+
#ifdef CONFIG_NUMA
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
diff -puN include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/suspend.h
--- devel/include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/suspend.h 2005-11-22 21:32:47.000000000 -0800
@@ -40,7 +40,6 @@ extern dev_t swsusp_resume_device;
extern int shrink_mem(void);
/* mm/page_alloc.c */
-extern void drain_local_pages(void);
extern void mark_free_pages(struct zone *zone);
#ifdef CONFIG_PM
diff -puN mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions mm/page_alloc.c
--- devel/mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions 2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/mm/page_alloc.c 2005-11-22 21:32:47.000000000 -0800
@@ -578,32 +578,71 @@ void drain_remote_pages(void)
}
#endif
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
-static void __drain_pages(unsigned int cpu)
+/*
+ * Drain any cpu-local pages into the buddy lists. Must be called under
+ * local_irq_disable().
+ */
+static int __drain_pages(unsigned int cpu)
{
- unsigned long flags;
struct zone *zone;
- int i;
+ int ret = 0;
for_each_zone(zone) {
struct per_cpu_pageset *pset;
+ int i;
pset = zone_pcp(zone, cpu);
for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- local_irq_save(flags);
+ if (!pcp->count)
+ continue;
pcp->count -= free_pages_bulk(zone, pcp->count,
&pcp->list, 0);
- local_irq_restore(flags);
+ ret++;
}
}
+ return ret;
}
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
-#ifdef CONFIG_PM
+/*
+ * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ */
+int drain_local_pages(void)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __drain_pages(smp_processor_id());
+ local_irq_restore(flags);
+ return ret;
+}
+
+static void drainer(void *p)
+{
+ atomic_add(drain_local_pages(), p);
+}
+
+/*
+ * Drain the per-cpu pages on all CPUs. If called from interrupt context we
+ * can only drain the local CPU's pages, since cross-CPU calls are deadlocky
+ * from interrupt context.
+ */
+static int drain_all_local_pages(void)
+{
+ if (in_interrupt()) {
+ return drain_local_pages();
+ } else {
+ atomic_t ret = ATOMIC_INIT(0);
+
+ on_each_cpu(drainer, &ret, 0, 1);
+ return atomic_read(&ret);
+ }
+}
+#ifdef CONFIG_PM
void mark_free_pages(struct zone *zone)
{
unsigned long zone_pfn, flags;
@@ -629,17 +668,6 @@ void mark_free_pages(struct zone *zone)
spin_unlock_irqrestore(&zone->lock, flags);
}
-/*
- * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- */
-void drain_local_pages(void)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __drain_pages(smp_processor_id());
- local_irq_restore(flags);
-}
#endif /* CONFIG_PM */
static void zone_statistics(struct zonelist *zonelist, struct zone *z)
@@ -913,8 +941,16 @@ nofail_alloc:
}
/* Atomic allocations - we can't balance anything */
- if (!wait)
- goto nopage;
+ if (!wait) {
+ /*
+ * Check if there are pages available on pcp lists that can be
+ * moved to global page list to satisfy higher order allocations
+ */
+ if (order > 0 && drain_all_local_pages())
+ goto restart;
+ else
+ goto nopage;
+ }
rebalance:
cond_resched();
@@ -952,6 +988,13 @@ rebalance:
goto restart;
}
+ if (order > 0 && drain_all_local_pages()) {
+ page = get_page_from_freelist(gfp_mask, order, zonelist,
+ alloc_flags);
+ if (page)
+ goto got_pg;
+ }
+
/*
* Don't let big-order allocations loop unless the caller explicitly
* requests that. Wait for some write requests to complete then retry.
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2005-11-23 5:36 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-11-23 0:10 Rohit Seth
2005-11-23 5:36 ` Andrew Morton [this message]
2005-11-23 5:58 ` Andrew Morton
2005-11-23 18:17 ` Rohit Seth
2005-11-23 6:36 ` Christoph Lameter
2005-11-23 6:42 ` Christoph Lameter
2005-11-23 16:35 ` Linus Torvalds
2005-11-23 17:03 ` Christoph Lameter
2005-11-23 17:54 ` Rohit Seth
2005-11-23 18:06 ` Mel Gorman
2005-11-23 19:41 ` Rohit Seth
2005-11-24 9:25 ` Mel Gorman
2005-11-23 23:26 ` Rohit Seth
2005-11-23 19:30 ` Christoph Lameter
2005-11-23 19:46 ` Rohit Seth
2005-11-23 19:55 ` Andrew Morton
2005-11-23 21:00 ` Rohit Seth
2005-11-23 21:25 ` Christoph Lameter
2005-11-23 22:29 ` Rohit Seth
2005-11-23 21:26 ` Andrew Morton
2005-11-23 21:40 ` Rohit Seth
2005-11-24 3:02 ` Paul Jackson
2005-11-29 23:18 ` Rohit Seth
2005-12-01 14:44 ` Paul Jackson
2005-12-02 0:32 ` Nick Piggin
2005-11-23 22:01 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051122213612.4adef5d0.akpm@osdl.org \
--to=akpm@osdl.org \
--cc=christoph@lameter.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=rohit.seth@intel.com \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox