From: Johannes Weiner <hannes@cmpxchg.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>, linux-mm@kvack.org
Subject: [patch] mm: skip rebalance of hopeless zones
Date: Wed, 8 Dec 2010 16:16:59 +0100 [thread overview]
Message-ID: <1291821419-11213-1-git-send-email-hannes@cmpxchg.org> (raw)
Kswapd tries to rebalance zones persistently until their high
watermarks are restored.
If the amount of unreclaimable pages in a zone makes this impossible
for reclaim, though, kswapd will end up in a busy loop without a
chance of reaching its goal.
This behaviour was observed on a virtual machine with a tiny
Normal-zone that filled up with unreclaimable slab objects.
This patch makes kswapd skip rebalancing on such 'hopeless' zones and
leaves them to direct reclaim.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
include/linux/mmzone.h | 2 ++
mm/page_alloc.c | 4 ++--
mm/vmscan.c | 36 ++++++++++++++++++++++++++++--------
3 files changed, 32 insertions(+), 10 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 4890662..0cc1d63 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -655,6 +655,8 @@ typedef struct pglist_data {
extern struct mutex zonelists_mutex;
void build_all_zonelists(void *data);
void wakeup_kswapd(struct zone *zone, int order);
+bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+ int classzone_idx, int alloc_flags, long free_pages);
bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags);
bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1845a97..c7d2b28 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1458,8 +1458,8 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
* Return true if free pages are above 'mark'. This takes into account the order
* of the allocation.
*/
-static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
- int classzone_idx, int alloc_flags, long free_pages)
+bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
+ int classzone_idx, int alloc_flags, long free_pages)
{
/* free_pages my go negative - that's OK */
long min = mark;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 42a4859..5623f36 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2191,6 +2191,25 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
}
#endif
+static bool zone_needs_scan(struct zone *zone, int order,
+ unsigned long goal, int classzone_idx)
+{
+ unsigned long free, prospect;
+
+ free = zone_page_state(zone, NR_FREE_PAGES);
+ if (zone->percpu_drift_mark && free < zone->percpu_drift_mark)
+ free = zone_page_state_snapshot(zone, NR_FREE_PAGES);
+
+ if (__zone_watermark_ok(zone, order, goal, classzone_idx, 0, free))
+ return false;
+ /*
+ * Ensure that the watermark is at all restorable through
+ * reclaim. Otherwise, leave the zone to direct reclaim.
+ */
+ prospect = free + zone_reclaimable_pages(zone);
+ return prospect >= goal;
+}
+
/* is kswapd sleeping prematurely? */
static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
{
@@ -2210,8 +2229,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
if (zone->all_unreclaimable)
continue;
- if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
- 0, 0))
+ if (zone_needs_scan(zone, order, high_wmark_pages(zone), 0))
return 1;
}
@@ -2282,6 +2300,7 @@ loop_again:
*/
for (i = pgdat->nr_zones - 1; i >= 0; i--) {
struct zone *zone = pgdat->node_zones + i;
+ unsigned long goal;
if (!populated_zone(zone))
continue;
@@ -2297,8 +2316,8 @@ loop_again:
shrink_active_list(SWAP_CLUSTER_MAX, zone,
&sc, priority, 0);
- if (!zone_watermark_ok_safe(zone, order,
- high_wmark_pages(zone), 0, 0)) {
+ goal = high_wmark_pages(zone);
+ if (zone_needs_scan(zone, order, goal, 0)) {
end_zone = i;
break;
}
@@ -2323,6 +2342,7 @@ loop_again:
*/
for (i = 0; i <= end_zone; i++) {
struct zone *zone = pgdat->node_zones + i;
+ unsigned long goal;
int nr_slab;
if (!populated_zone(zone))
@@ -2339,12 +2359,13 @@ loop_again:
*/
mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
+ goal = high_wmark_pages(zone);
/*
* We put equal pressure on every zone, unless one
* zone has way too many pages free already.
*/
if (!zone_watermark_ok_safe(zone, order,
- 8*high_wmark_pages(zone), end_zone, 0))
+ 8 * goal, end_zone, 0))
shrink_zone(priority, zone, &sc);
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -2373,8 +2394,7 @@ loop_again:
compact_zone_order(zone, sc.order, sc.gfp_mask,
false);
- if (!zone_watermark_ok_safe(zone, order,
- high_wmark_pages(zone), end_zone, 0)) {
+ if (zone_needs_scan(zone, order, goal, end_zone)) {
all_zones_ok = 0;
/*
* We are still under min water mark. This
@@ -2587,7 +2607,7 @@ void wakeup_kswapd(struct zone *zone, int order)
pgdat->kswapd_max_order = order;
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
- if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
+ if (!zone_needs_scan(zone, order, low_wmark_pages(zone), 0))
return;
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
--
1.7.3.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next reply other threads:[~2010-12-08 15:17 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-12-08 15:16 Johannes Weiner [this message]
2010-12-08 18:05 ` Rik van Riel
2010-12-08 22:19 ` Andrew Morton
2010-12-09 0:04 ` Johannes Weiner
2010-12-09 21:17 ` Andrew Morton
2010-12-10 16:27 ` Johannes Weiner
2011-01-05 11:15 ` Johannes Weiner
2011-01-04 23:56 ` Andrew Morton
2010-12-09 0:47 ` Rik van Riel
2010-12-09 14:34 ` Mel Gorman
2010-12-09 0:36 ` Simon Kirby
2010-12-09 0:49 ` Rik van Riel
2010-12-09 1:08 ` Simon Kirby
2010-12-09 14:42 ` Mel Gorman
2010-12-09 1:23 ` Andrew Morton
2010-12-09 1:55 ` Minchan Kim
2010-12-09 1:57 ` Minchan Kim
2010-12-09 2:01 ` Andrew Morton
2010-12-09 2:19 ` Minchan Kim
2010-12-09 5:18 ` Minchan Kim
2010-12-09 2:05 ` Simon Kirby
2010-12-09 8:55 ` Pekka Enberg
2010-12-09 14:46 ` Mel Gorman
2010-12-09 14:44 ` Mel Gorman
2010-12-09 18:03 ` Andrew Morton
2010-12-09 18:48 ` Ying Han
2010-12-10 11:34 ` Mel Gorman
2010-12-09 18:39 ` Ying Han
2010-12-10 11:37 ` Mel Gorman
2010-12-10 19:46 ` Ying Han
2010-12-09 1:29 ` Minchan Kim
2010-12-09 18:51 ` Ying Han
2010-12-10 7:25 ` KOSAKI Motohiro
2010-12-10 7:37 ` KOSAKI Motohiro
2010-12-10 10:54 ` Johannes Weiner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1291821419-11213-1-git-send-email-hannes@cmpxchg.org \
--to=hannes@cmpxchg.org \
--cc=akpm@linux-foundation.org \
--cc=linux-mm@kvack.org \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox