linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ying Han <yinghan@google.com>
To: Johannes Weiner <hannes@cmpxchg.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Rik van Riel <riel@redhat.com>,
	linux-mm@kvack.org
Subject: Re: [patch] mm: skip rebalance of hopeless zones
Date: Thu, 9 Dec 2010 10:51:40 -0800	[thread overview]
Message-ID: <AANLkTikOgkGBn9AbEDAM4KegsnwuXqF2jg7icu0yc8Kh@mail.gmail.com> (raw)
In-Reply-To: <1291821419-11213-1-git-send-email-hannes@cmpxchg.org>

On Wed, Dec 8, 2010 at 7:16 AM, Johannes Weiner <hannes@cmpxchg.org> wrote:
> Kswapd tries to rebalance zones persistently until their high
> watermarks are restored.
>
> If the amount of unreclaimable pages in a zone makes this impossible
> for reclaim, though, kswapd will end up in a busy loop without a
> chance of reaching its goal.
>
> This behaviour was observed on a virtual machine with a tiny
> Normal-zone that filled up with unreclaimable slab objects.
>
> This patch makes kswapd skip rebalancing on such 'hopeless' zones and
> leaves them to direct reclaim.
>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
>  include/linux/mmzone.h |    2 ++
>  mm/page_alloc.c        |    4 ++--
>  mm/vmscan.c            |   36 ++++++++++++++++++++++++++++--------
>  3 files changed, 32 insertions(+), 10 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 4890662..0cc1d63 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -655,6 +655,8 @@ typedef struct pglist_data {
>  extern struct mutex zonelists_mutex;
>  void build_all_zonelists(void *data);
>  void wakeup_kswapd(struct zone *zone, int order);
> +bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
> +                        int classzone_idx, int alloc_flags, long free_pages);
>  bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,
>                int classzone_idx, int alloc_flags);
>  bool zone_watermark_ok_safe(struct zone *z, int order, unsigned long mark,
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 1845a97..c7d2b28 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1458,8 +1458,8 @@ static inline int should_fail_alloc_page(gfp_t gfp_mask, unsigned int order)
>  * Return true if free pages are above 'mark'. This takes into account the order
>  * of the allocation.
>  */
> -static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
> -                     int classzone_idx, int alloc_flags, long free_pages)
> +bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
> +                        int classzone_idx, int alloc_flags, long free_pages)
>  {
>        /* free_pages my go negative - that's OK */
>        long min = mark;
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 42a4859..5623f36 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -2191,6 +2191,25 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
>  }
>  #endif
>
> +static bool zone_needs_scan(struct zone *zone, int order,
> +                           unsigned long goal, int classzone_idx)
> +{
> +       unsigned long free, prospect;
> +
> +       free = zone_page_state(zone, NR_FREE_PAGES);
> +       if (zone->percpu_drift_mark && free < zone->percpu_drift_mark)
> +               free = zone_page_state_snapshot(zone, NR_FREE_PAGES);
> +
> +       if (__zone_watermark_ok(zone, order, goal, classzone_idx, 0, free))
> +               return false;
> +       /*
> +        * Ensure that the watermark is at all restorable through
> +        * reclaim.  Otherwise, leave the zone to direct reclaim.
> +        */
> +       prospect = free + zone_reclaimable_pages(zone);
> +       return prospect >= goal;
> +}
> +
>  /* is kswapd sleeping prematurely? */
>  static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
>  {
> @@ -2210,8 +2229,7 @@ static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
>                if (zone->all_unreclaimable)
>                        continue;
>
> -               if (!zone_watermark_ok_safe(zone, order, high_wmark_pages(zone),
> -                                                               0, 0))
> +               if (zone_needs_scan(zone, order, high_wmark_pages(zone), 0))
>                        return 1;
>        }
>
> @@ -2282,6 +2300,7 @@ loop_again:
>                 */
>                for (i = pgdat->nr_zones - 1; i >= 0; i--) {
>                        struct zone *zone = pgdat->node_zones + i;
> +                       unsigned long goal;
>
>                        if (!populated_zone(zone))
>                                continue;
> @@ -2297,8 +2316,8 @@ loop_again:
>                                shrink_active_list(SWAP_CLUSTER_MAX, zone,
>                                                        &sc, priority, 0);
>
> -                       if (!zone_watermark_ok_safe(zone, order,
> -                                       high_wmark_pages(zone), 0, 0)) {
> +                       goal = high_wmark_pages(zone);
> +                       if (zone_needs_scan(zone, order, goal, 0)) {
>                                end_zone = i;
>                                break;
>                        }
> @@ -2323,6 +2342,7 @@ loop_again:
>                 */
>                for (i = 0; i <= end_zone; i++) {
>                        struct zone *zone = pgdat->node_zones + i;
> +                       unsigned long goal;
>                        int nr_slab;
>
>                        if (!populated_zone(zone))
> @@ -2339,12 +2359,13 @@ loop_again:
>                         */
>                        mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
>
> +                       goal = high_wmark_pages(zone);
>                        /*
>                         * We put equal pressure on every zone, unless one
>                         * zone has way too many pages free already.
>                         */
>                        if (!zone_watermark_ok_safe(zone, order,
> -                                       8*high_wmark_pages(zone), end_zone, 0))
> +                                                   8 * goal, end_zone, 0))
>                                shrink_zone(priority, zone, &sc);
>                        reclaim_state->reclaimed_slab = 0;
>                        nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
> @@ -2373,8 +2394,7 @@ loop_again:
>                                compact_zone_order(zone, sc.order, sc.gfp_mask,
>                                                        false);
>
> -                       if (!zone_watermark_ok_safe(zone, order,
> -                                       high_wmark_pages(zone), end_zone, 0)) {
> +                       if (zone_needs_scan(zone, order, goal, end_zone)) {
>                                all_zones_ok = 0;
>                                /*
>                                 * We are still under min water mark.  This
> @@ -2587,7 +2607,7 @@ void wakeup_kswapd(struct zone *zone, int order)
>                pgdat->kswapd_max_order = order;
>        if (!waitqueue_active(&pgdat->kswapd_wait))
>                return;
> -       if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
> +       if (!zone_needs_scan(zone, order, low_wmark_pages(zone), 0))
>                return;
>
>        trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);


So we look at zone_reclaimable_pages() only to determine proceed
reclaiming or not. What if I have tons of unused dentry and inode
caches and we are skipping the shrinker here?

--Ying


> 1.7.3.2
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-12-09 18:51 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-08 15:16 Johannes Weiner
2010-12-08 18:05 ` Rik van Riel
2010-12-08 22:19 ` Andrew Morton
2010-12-09  0:04   ` Johannes Weiner
2010-12-09 21:17     ` Andrew Morton
2010-12-10 16:27       ` Johannes Weiner
2011-01-05 11:15         ` Johannes Weiner
2011-01-04 23:56     ` Andrew Morton
2010-12-09  0:47   ` Rik van Riel
2010-12-09 14:34   ` Mel Gorman
2010-12-09  0:36 ` Simon Kirby
2010-12-09  0:49   ` Rik van Riel
2010-12-09  1:08     ` Simon Kirby
2010-12-09 14:42       ` Mel Gorman
2010-12-09  1:23   ` Andrew Morton
2010-12-09  1:55     ` Minchan Kim
2010-12-09  1:57       ` Minchan Kim
2010-12-09  2:01       ` Andrew Morton
2010-12-09  2:19         ` Minchan Kim
2010-12-09  5:18         ` Minchan Kim
2010-12-09  2:05     ` Simon Kirby
2010-12-09  8:55     ` Pekka Enberg
2010-12-09 14:46       ` Mel Gorman
2010-12-09 14:44     ` Mel Gorman
2010-12-09 18:03       ` Andrew Morton
2010-12-09 18:48       ` Ying Han
2010-12-10 11:34         ` Mel Gorman
2010-12-09 18:39     ` Ying Han
2010-12-10 11:37       ` Mel Gorman
2010-12-10 19:46         ` Ying Han
2010-12-09  1:29 ` Minchan Kim
2010-12-09 18:51 ` Ying Han [this message]
2010-12-10  7:25   ` KOSAKI Motohiro
2010-12-10  7:37     ` KOSAKI Motohiro
2010-12-10 10:54   ` Johannes Weiner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=AANLkTikOgkGBn9AbEDAM4KegsnwuXqF2jg7icu0yc8Kh@mail.gmail.com \
    --to=yinghan@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-mm@kvack.org \
    --cc=riel@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox