[patch] mm: skip rebalance of hopeless zones

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Johannes Weiner <hannes@cmpxchg.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>, linux-mm@kvack.org
Subject: [patch] mm: skip rebalance of hopeless zones
Date: Wed,  8 Dec 2010 16:16:59 +0100	[thread overview]
Message-ID: <1291821419-11213-1-git-send-email-hannes@cmpxchg.org> (raw)

Kswapd tries to rebalance zones persistently until their high
watermarks are restored.

If the amount of unreclaimable pages in a zone makes this impossible
for reclaim, though, kswapd will end up in a busy loop without a
chance of reaching its goal.

This behaviour was observed on a virtual machine with a tiny
Normal-zone that filled up with unreclaimable slab objects.

This patch makes kswapd skip rebalancing on such 'hopeless' zones and
leaves them to direct reclaim.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/mmzone.h |    2 ++
 mm/page_alloc.c        |    4 ++--
 mm/vmscan.c            |   36 ++++++++++++++++++++++++++++--------
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4890662..0cc1d63 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -655,6 +655,8 @@ typedef struct pglist_data {
 extern struct mutex zonelists_mutex;
 void build_all_zonelists(void *data);
 void wakeup_kswapd(struct zone *zone, int order);
+bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+			 int classzone_idx, int alloc_flags, long free_pages);
 bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
 		int classzone_idx, int alloc_flags);
 bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1845a97..c7d2b28 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1458,8 +1458,8 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
  * Return true if free pages are above 'mark'. This takes into account the order
  * of the allocation.
  */
-static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
-		      int classzone_idx, int alloc_flags, long free_pages)
+bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+			 int classzone_idx, int alloc_flags, long free_pages)
 {
 	/* free_pages my go negative - that's OK */
 	long min = mark;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 42a4859..5623f36 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2191,6 +2191,25 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
 }
 #endif
 
+static bool zone_needs_scan(struct zone *zone, int order,
+			    unsigned long goal, int classzone_idx)
+{
+	unsigned long free, prospect;
+
+	free = zone_page_state(zone, NR_FREE_PAGES);
+	if (zone->percpu_drift_mark && free < zone->percpu_drift_mark)
+		free = zone_page_state_snapshot(zone, NR_FREE_PAGES);
+
+	if (__zone_watermark_ok(zone, order, goal, classzone_idx, 0, free))
+		return false;
+	/*
+	 * Ensure that the watermark is at all restorable through
+	 * reclaim.  Otherwise, leave the zone to direct reclaim.
+	 */
+	prospect = free + zone_reclaimable_pages(zone);
+	return prospect >= goal;
+}
+
 /* is kswapd sleeping prematurely? */
 static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 {
@@ -2210,8 +2229,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
 		if (zone->all_unreclaimable)
 			continue;
 
-		if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
-								0, 0))
+		if (zone_needs_scan(zone, order, high_wmark_pages(zone), 0))
 			return 1;
 	}
 
@@ -2282,6 +2300,7 @@ loop_again:
 		 */
 		for (i = pgdat->nr_zones - 1; i >= 0; i--) {
 			struct zone *zone = pgdat->node_zones + i;
+			unsigned long goal;
 
 			if (!populated_zone(zone))
 				continue;
@@ -2297,8 +2316,8 @@ loop_again:
 				shrink_active_list(SWAP_CLUSTER_MAX, zone,
 							&sc, priority, 0);
 
-			if (!zone_watermark_ok_safe(zone, order,
-					high_wmark_pages(zone), 0, 0)) {
+			goal = high_wmark_pages(zone);
+			if (zone_needs_scan(zone, order, goal, 0)) {
 				end_zone = i;
 				break;
 			}
@@ -2323,6 +2342,7 @@ loop_again:
 		 */
 		for (i = 0; i <= end_zone; i++) {
 			struct zone *zone = pgdat->node_zones + i;
+			unsigned long goal;
 			int nr_slab;
 
 			if (!populated_zone(zone))
@@ -2339,12 +2359,13 @@ loop_again:
 			 */
 			mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
 
+			goal = high_wmark_pages(zone);
 			/*
 			 * We put equal pressure on every zone, unless one
 			 * zone has way too many pages free already.
 			 */
 			if (!zone_watermark_ok_safe(zone, order,
-					8*high_wmark_pages(zone), end_zone, 0))
+						    8 * goal, end_zone, 0))
 				shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -2373,8 +2394,7 @@ loop_again:
 				compact_zone_order(zone, sc.order, sc.gfp_mask,
 							false);
 
-			if (!zone_watermark_ok_safe(zone, order,
-					high_wmark_pages(zone), end_zone, 0)) {
+			if (zone_needs_scan(zone, order, goal, end_zone)) {
 				all_zones_ok = 0;
 				/*
 				 * We are still under min water mark.  This
@@ -2587,7 +2607,7 @@ void wakeup_kswapd(struct zone *zone, int order)
 		pgdat->kswapd_max_order = order;
 	if (!waitqueue_active(&pgdat->kswapd_wait))
 		return;
-	if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
+	if (!zone_needs_scan(zone, order, low_wmark_pages(zone), 0))
 		return;
 
 	trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
-- 
1.7.3.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next             reply	other threads:[~2010-12-08 15:17 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-08 15:16 Johannes Weiner [this message]
2010-12-08 18:05 ` Rik van Riel
2010-12-08 22:19 ` Andrew Morton
2010-12-09  0:04   ` Johannes Weiner
2010-12-09 21:17     ` Andrew Morton
2010-12-10 16:27       ` Johannes Weiner
2011-01-05 11:15         ` Johannes Weiner
2011-01-04 23:56     ` Andrew Morton
2010-12-09  0:47   ` Rik van Riel
2010-12-09 14:34   ` Mel Gorman
2010-12-09  0:36 ` Simon Kirby
2010-12-09  0:49   ` Rik van Riel
2010-12-09  1:08     ` Simon Kirby
2010-12-09 14:42       ` Mel Gorman
2010-12-09  1:23   ` Andrew Morton
2010-12-09  1:55     ` Minchan Kim
2010-12-09  1:57       ` Minchan Kim
2010-12-09  2:01       ` Andrew Morton
2010-12-09  2:19         ` Minchan Kim
2010-12-09  5:18         ` Minchan Kim
2010-12-09  2:05     ` Simon Kirby
2010-12-09  8:55     ` Pekka Enberg
2010-12-09 14:46       ` Mel Gorman
2010-12-09 14:44     ` Mel Gorman
2010-12-09 18:03       ` Andrew Morton
2010-12-09 18:48       ` Ying Han
2010-12-10 11:34         ` Mel Gorman
2010-12-09 18:39     ` Ying Han
2010-12-10 11:37       ` Mel Gorman
2010-12-10 19:46         ` Ying Han
2010-12-09  1:29 ` Minchan Kim
2010-12-09 18:51 ` Ying Han
2010-12-10  7:25   ` KOSAKI Motohiro
2010-12-10  7:37     ` KOSAKI Motohiro
2010-12-10 10:54   ` Johannes Weiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1291821419-11213-1-git-send-email-hannes@cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox