From: Ying Han <yinghan@google.com>
To: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
Rik van Riel <riel@redhat.com>,
linux-mm@kvack.org
Subject: Re: [patch] mm: skip rebalance of hopeless zones
Date: Thu, 9 Dec 2010 10:51:40 -0800 [thread overview]
Message-ID: <AANLkTikOgkGBn9AbEDAM4KegsnwuXqF2jg7icu0yc8Kh@mail.gmail.com> (raw)
In-Reply-To: <1291821419-11213-1-git-send-email-hannes@cmpxchg.org>
On Wed, Dec 8, 2010 at 7:16 AM, Johannes Weiner <hannes@cmpxchg.org> wrote:
> Kswapd tries to rebalance zones persistently until their high
> watermarks are restored.
>
> If the amount of unreclaimable pages in a zone makes this impossible
> for reclaim, though, kswapd will end up in a busy loop without a
> chance of reaching its goal.
>
> This behaviour was observed on a virtual machine with a tiny
> Normal-zone that filled up with unreclaimable slab objects.
>
> This patch makes kswapd skip rebalancing on such 'hopeless' zones and
> leaves them to direct reclaim.
>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
> include/linux/mmzone.h | 2 ++
> mm/page_alloc.c | 4 ++--
> mm/vmscan.c | 36 ++++++++++++++++++++++++++++--------
> 3 files changed, 32 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 4890662..0cc1d63 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -655,6 +655,8 @@ typedef struct pglist_data {
> extern struct mutex zonelists_mutex;
> void build_all_zonelists(void *data);
> void wakeup_kswapd(struct zone *zone, int order);
> +bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
> + int classzone_idx, int alloc_flags, long free_pages);
> bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
> int classzone_idx, int alloc_flags);
> bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 1845a97..c7d2b28 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1458,8 +1458,8 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
> * Return true if free pages are above 'mark'. This takes into account the order
> * of the allocation.
> */
> -static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
> - int classzone_idx, int alloc_flags, long free_pages)
> +bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
> + int classzone_idx, int alloc_flags, long free_pages)
> {
> /* free_pages my go negative - that's OK */
> long min = mark;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 42a4859..5623f36 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2191,6 +2191,25 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
> }
> #endif
>
> +static bool zone_needs_scan(struct zone *zone, int order,
> + unsigned long goal, int classzone_idx)
> +{
> + unsigned long free, prospect;
> +
> + free = zone_page_state(zone, NR_FREE_PAGES);
> + if (zone->percpu_drift_mark && free < zone->percpu_drift_mark)
> + free = zone_page_state_snapshot(zone, NR_FREE_PAGES);
> +
> + if (__zone_watermark_ok(zone, order, goal, classzone_idx, 0, free))
> + return false;
> + /*
> + * Ensure that the watermark is at all restorable through
> + * reclaim. Otherwise, leave the zone to direct reclaim.
> + */
> + prospect = free + zone_reclaimable_pages(zone);
> + return prospect >= goal;
> +}
> +
> /* is kswapd sleeping prematurely? */
> static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
> {
> @@ -2210,8 +2229,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
> if (zone->all_unreclaimable)
> continue;
>
> - if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
> - 0, 0))
> + if (zone_needs_scan(zone, order, high_wmark_pages(zone), 0))
> return 1;
> }
>
> @@ -2282,6 +2300,7 @@ loop_again:
> */
> for (i = pgdat->nr_zones - 1; i >= 0; i--) {
> struct zone *zone = pgdat->node_zones + i;
> + unsigned long goal;
>
> if (!populated_zone(zone))
> continue;
> @@ -2297,8 +2316,8 @@ loop_again:
> shrink_active_list(SWAP_CLUSTER_MAX, zone,
> &sc, priority, 0);
>
> - if (!zone_watermark_ok_safe(zone, order,
> - high_wmark_pages(zone), 0, 0)) {
> + goal = high_wmark_pages(zone);
> + if (zone_needs_scan(zone, order, goal, 0)) {
> end_zone = i;
> break;
> }
> @@ -2323,6 +2342,7 @@ loop_again:
> */
> for (i = 0; i <= end_zone; i++) {
> struct zone *zone = pgdat->node_zones + i;
> + unsigned long goal;
> int nr_slab;
>
> if (!populated_zone(zone))
> @@ -2339,12 +2359,13 @@ loop_again:
> */
> mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
>
> + goal = high_wmark_pages(zone);
> /*
> * We put equal pressure on every zone, unless one
> * zone has way too many pages free already.
> */
> if (!zone_watermark_ok_safe(zone, order,
> - 8*high_wmark_pages(zone), end_zone, 0))
> + 8 * goal, end_zone, 0))
> shrink_zone(priority, zone, &sc);
> reclaim_state->reclaimed_slab = 0;
> nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
> @@ -2373,8 +2394,7 @@ loop_again:
> compact_zone_order(zone, sc.order, sc.gfp_mask,
> false);
>
> - if (!zone_watermark_ok_safe(zone, order,
> - high_wmark_pages(zone), end_zone, 0)) {
> + if (zone_needs_scan(zone, order, goal, end_zone)) {
> all_zones_ok = 0;
> /*
> * We are still under min water mark. This
> @@ -2587,7 +2607,7 @@ void wakeup_kswapd(struct zone *zone, int order)
> pgdat->kswapd_max_order = order;
> if (!waitqueue_active(&pgdat->kswapd_wait))
> return;
> - if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
> + if (!zone_needs_scan(zone, order, low_wmark_pages(zone), 0))
> return;
>
> trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
So we look at zone_reclaimable_pages() only to determine proceed
reclaiming or not. What if I have tons of unused dentry and inode
caches and we are skipping the shrinker here?
--Ying
> 1.7.3.2
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-12-09 18:51 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-12-08 15:16 Johannes Weiner
2010-12-08 18:05 ` Rik van Riel
2010-12-08 22:19 ` Andrew Morton
2010-12-09 0:04 ` Johannes Weiner
2010-12-09 21:17 ` Andrew Morton
2010-12-10 16:27 ` Johannes Weiner
2011-01-05 11:15 ` Johannes Weiner
2011-01-04 23:56 ` Andrew Morton
2010-12-09 0:47 ` Rik van Riel
2010-12-09 14:34 ` Mel Gorman
2010-12-09 0:36 ` Simon Kirby
2010-12-09 0:49 ` Rik van Riel
2010-12-09 1:08 ` Simon Kirby
2010-12-09 14:42 ` Mel Gorman
2010-12-09 1:23 ` Andrew Morton
2010-12-09 1:55 ` Minchan Kim
2010-12-09 1:57 ` Minchan Kim
2010-12-09 2:01 ` Andrew Morton
2010-12-09 2:19 ` Minchan Kim
2010-12-09 5:18 ` Minchan Kim
2010-12-09 2:05 ` Simon Kirby
2010-12-09 8:55 ` Pekka Enberg
2010-12-09 14:46 ` Mel Gorman
2010-12-09 14:44 ` Mel Gorman
2010-12-09 18:03 ` Andrew Morton
2010-12-09 18:48 ` Ying Han
2010-12-10 11:34 ` Mel Gorman
2010-12-09 18:39 ` Ying Han
2010-12-10 11:37 ` Mel Gorman
2010-12-10 19:46 ` Ying Han
2010-12-09 1:29 ` Minchan Kim
2010-12-09 18:51 ` Ying Han [this message]
2010-12-10 7:25 ` KOSAKI Motohiro
2010-12-10 7:37 ` KOSAKI Motohiro
2010-12-10 10:54 ` Johannes Weiner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=AANLkTikOgkGBn9AbEDAM4KegsnwuXqF2jg7icu0yc8Kh@mail.gmail.com \
--to=yinghan@google.com \
--cc=akpm@linux-foundation.org \
--cc=hannes@cmpxchg.org \
--cc=linux-mm@kvack.org \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox