linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@osdl.org>
To: Rohit Seth <rohit.seth@intel.com>
Cc: torvalds@osdl.org, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org,
	Christoph Lameter <christoph@lameter.com>
Subject: Re: [PATCH]: Free pages from local pcp lists under tight memory conditions
Date: Tue, 22 Nov 2005 21:36:12 -0800	[thread overview]
Message-ID: <20051122213612.4adef5d0.akpm@osdl.org> (raw)
In-Reply-To: <20051122161000.A22430@unix-os.sc.intel.com>

Rohit Seth <rohit.seth@intel.com> wrote:
>
> Andrew, Linus,
> 
> [PATCH]: This patch free pages (pcp->batch from each list at a time) from
> local pcp lists when a higher order allocation request is not able to 
> get serviced from global free_list.
> 
> This should help fix some of the earlier failures seen with order 1 allocations.
> 
> I will send separate patches for:
> 
> 1- Reducing the remote cpus pcp
> 2- Clean up page_alloc.c for CONFIG_HOTPLUG_CPU to use this code appropiately
> 
> +static int
> +reduce_cpu_pcp(void )
> +{
> +	struct zone *zone;
> +	unsigned long flags;
> +	unsigned int cpu = get_cpu();
> +	int i, ret=0;
> +
> +	local_irq_save(flags);
> +	for_each_zone(zone) {
> +		struct per_cpu_pageset *pset;
> +
> +		pset = zone_pcp(zone, cpu);
> +		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
> +			struct per_cpu_pages *pcp;
> +
> +			pcp = &pset->pcp[i];
> +			if (pcp->count == 0)
> +				continue;
> +			pcp->count -= free_pages_bulk(zone, pcp->batch,
> +						&pcp->list, 0);
> +			ret++;
> +		}
> +	}
> +	local_irq_restore(flags);
> +	put_cpu();
> +	return ret;
> +}

This significantly duplicates the existing drain_local_pages().

>  
> +	if (order > 0) 
> +		while (reduce_cpu_pcp()) {
> +			if (get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags))

This forgot to assign to local variable `page'!  It'll return NULL and will
leak memory.

The `while' loop worries me for some reason, so I wimped out and just tried
the remote drain once.

> +				goto got_pg;
> +		}
> +	/* FIXME: Add the support for reducing/draining the remote pcps.

This is easy enough to do.

I wanted to call the all-CPU drainer `drain_remote_pages' but that's
already taken by some rather poorly-named NUMA thing which also duplicates
most of __drain_pages().

This patch is against a random selection of the enormous number of mm/
patches in -mm.  I haven't runtime-tested it yet.

We need to verify that this patch actually does something useful.



 include/linux/gfp.h     |    2 +
 include/linux/suspend.h |    1 
 mm/page_alloc.c         |   85 ++++++++++++++++++++++++++++++++++++------------
 3 files changed, 66 insertions(+), 22 deletions(-)

diff -puN include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/gfp.h
--- devel/include/linux/gfp.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions	2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/gfp.h	2005-11-22 21:32:47.000000000 -0800
@@ -109,6 +109,8 @@ static inline struct page *alloc_pages_n
 		NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
 }
 
+extern int drain_local_pages(void);
+
 #ifdef CONFIG_NUMA
 extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
 
diff -puN include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions include/linux/suspend.h
--- devel/include/linux/suspend.h~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions	2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/include/linux/suspend.h	2005-11-22 21:32:47.000000000 -0800
@@ -40,7 +40,6 @@ extern dev_t swsusp_resume_device;
 extern int shrink_mem(void);
 
 /* mm/page_alloc.c */
-extern void drain_local_pages(void);
 extern void mark_free_pages(struct zone *zone);
 
 #ifdef CONFIG_PM
diff -puN mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions mm/page_alloc.c
--- devel/mm/page_alloc.c~mm-free-pages-from-local-pcp-lists-under-tight-memory-conditions	2005-11-22 21:32:47.000000000 -0800
+++ devel-akpm/mm/page_alloc.c	2005-11-22 21:32:47.000000000 -0800
@@ -578,32 +578,71 @@ void drain_remote_pages(void)
 }
 #endif
 
-#if defined(CONFIG_PM) || defined(CONFIG_HOTPLUG_CPU)
-static void __drain_pages(unsigned int cpu)
+/*
+ * Drain any cpu-local pages into the buddy lists.  Must be called under
+ * local_irq_disable().
+ */
+static int __drain_pages(unsigned int cpu)
 {
-	unsigned long flags;
 	struct zone *zone;
-	int i;
+	int ret = 0;
 
 	for_each_zone(zone) {
 		struct per_cpu_pageset *pset;
+		int i;
 
 		pset = zone_pcp(zone, cpu);
 		for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
 			struct per_cpu_pages *pcp;
 
 			pcp = &pset->pcp[i];
-			local_irq_save(flags);
+			if (!pcp->count)
+				continue;
 			pcp->count -= free_pages_bulk(zone, pcp->count,
 						&pcp->list, 0);
-			local_irq_restore(flags);
+			ret++;
 		}
 	}
+	return ret;
 }
-#endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */
 
-#ifdef CONFIG_PM
+/*
+ * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ */
+int drain_local_pages(void)
+{
+	unsigned long flags;
+	int ret;
+
+	local_irq_save(flags);
+	ret = __drain_pages(smp_processor_id());
+	local_irq_restore(flags);
+	return ret;
+}
+
+static void drainer(void *p)
+{
+	atomic_add(drain_local_pages(), p);
+}
+
+/*
+ * Drain the per-cpu pages on all CPUs.  If called from interrupt context we
+ * can only drain the local CPU's pages, since cross-CPU calls are deadlocky
+ * from interrupt context.
+ */
+static int drain_all_local_pages(void)
+{
+	if (in_interrupt()) {
+		return drain_local_pages();
+	} else {
+		atomic_t ret = ATOMIC_INIT(0);
+
+		on_each_cpu(drainer, &ret, 0, 1);
+		return atomic_read(&ret);
+	}
+}
 
+#ifdef CONFIG_PM
 void mark_free_pages(struct zone *zone)
 {
 	unsigned long zone_pfn, flags;
@@ -629,17 +668,6 @@ void mark_free_pages(struct zone *zone)
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
-/*
- * Spill all of this CPU's per-cpu pages back into the buddy allocator.
- */
-void drain_local_pages(void)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);	
-	__drain_pages(smp_processor_id());
-	local_irq_restore(flags);	
-}
 #endif /* CONFIG_PM */
 
 static void zone_statistics(struct zonelist *zonelist, struct zone *z)
@@ -913,8 +941,16 @@ nofail_alloc:
 	}
 
 	/* Atomic allocations - we can't balance anything */
-	if (!wait)
-		goto nopage;
+	if (!wait) {
+		/*
+		 * Check if there are pages available on pcp lists that can be
+		 * moved to global page list to satisfy higher order allocations
+		 */
+		if (order > 0 && drain_all_local_pages())
+			goto restart;
+		else
+			goto nopage;
+	}
 
 rebalance:
 	cond_resched();
@@ -952,6 +988,13 @@ rebalance:
 		goto restart;
 	}
 
+	if (order > 0 && drain_all_local_pages()) {
+		page = get_page_from_freelist(gfp_mask, order, zonelist,
+						alloc_flags);
+		if (page)
+			goto got_pg;
+	}
+
 	/*
 	 * Don't let big-order allocations loop unless the caller explicitly
 	 * requests that.  Wait for some write requests to complete then retry.
_

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2005-11-23  5:36 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-11-23  0:10 Rohit Seth
2005-11-23  5:36 ` Andrew Morton [this message]
2005-11-23  5:58   ` Andrew Morton
2005-11-23 18:17     ` Rohit Seth
2005-11-23  6:36   ` Christoph Lameter
2005-11-23  6:42   ` Christoph Lameter
2005-11-23 16:35     ` Linus Torvalds
2005-11-23 17:03       ` Christoph Lameter
2005-11-23 17:54   ` Rohit Seth
2005-11-23 18:06     ` Mel Gorman
2005-11-23 19:41       ` Rohit Seth
2005-11-24  9:25         ` Mel Gorman
2005-11-23 23:26       ` Rohit Seth
2005-11-23 19:30 ` Christoph Lameter
2005-11-23 19:46   ` Rohit Seth
2005-11-23 19:55     ` Andrew Morton
2005-11-23 21:00       ` Rohit Seth
2005-11-23 21:25         ` Christoph Lameter
2005-11-23 22:29           ` Rohit Seth
2005-11-23 21:26         ` Andrew Morton
2005-11-23 21:40           ` Rohit Seth
2005-11-24  3:02         ` Paul Jackson
2005-11-29 23:18           ` Rohit Seth
2005-12-01 14:44             ` Paul Jackson
2005-12-02  0:32               ` Nick Piggin
2005-11-23 22:01       ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20051122213612.4adef5d0.akpm@osdl.org \
    --to=akpm@osdl.org \
    --cc=christoph@lameter.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=rohit.seth@intel.com \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox