linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ying Han <yinghan@google.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"kosaki.motohiro@jp.fujitsu.com" <kosaki.motohiro@jp.fujitsu.com>,
	"balbir@linux.vnet.ibm.com" <balbir@linux.vnet.ibm.com>,
	"nishimura@mxp.nes.nec.co.jp" <nishimura@mxp.nes.nec.co.jp>,
	"akpm@linux-foundation.org" <akpm@linux-foundation.org>,
	Johannes Weiner <jweiner@redhat.com>,
	"minchan.kim@gmail.com" <minchan.kim@gmail.com>,
	Michal Hocko <mhocko@suse.cz>
Subject: Re: [PATCH 8/7] memcg : reclaim statistics
Date: Mon, 25 Apr 2011 22:35:31 -0700	[thread overview]
Message-ID: <BANLkTin6kD_JKcRkmDGbGrk=N7LNW2bvDw@mail.gmail.com> (raw)
In-Reply-To: <20110425184318.07e717ef.kamezawa.hiroyu@jp.fujitsu.com>

[-- Attachment #1: Type: text/plain, Size: 14994 bytes --]

On Mon, Apr 25, 2011 at 2:43 AM, KAMEZAWA Hiroyuki <
kamezawa.hiroyu@jp.fujitsu.com> wrote:

> At tuning memcg background reclaim, cpu usage per memcg's work is an
> interesting information because some amount of shared resource is used.
> (i.e. background reclaim uses workqueue.) And other information as
> pgscan and pgreclaim is important.
>
> This patch shows them via memory.stat with cpu usage for direct reclaim
> and softlimit reclaim and page scan statistics.
>
>
>  # cat /cgroup/memory/A/memory.stat
>  ....
>  direct_elapsed_ns 0
>  soft_elapsed_ns 0
>  wmark_elapsed_ns 103566424
>  direct_scanned 0
>  soft_scanned 0
>  wmark_scanned 29303
>  direct_freed 0
>  soft_freed 0
>  wmark_freed 29290
>
>
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> ---
>  Documentation/cgroups/memory.txt |   18 +++++++++
>  include/linux/memcontrol.h       |    6 +++
>  include/linux/swap.h             |    7 +++
>  mm/memcontrol.c                  |   77
> +++++++++++++++++++++++++++++++++++++--
>  mm/vmscan.c                      |   15 +++++++
>  5 files changed, 120 insertions(+), 3 deletions(-)
>
> Index: memcg/mm/memcontrol.c
> ===================================================================
> --- memcg.orig/mm/memcontrol.c
> +++ memcg/mm/memcontrol.c
> @@ -274,6 +274,17 @@ struct mem_cgroup {
>        bool                    bgreclaim_resched;
>        struct delayed_work     bgreclaim_work;
>        /*
> +        * reclaim statistics (not per zone, node)
> +        */
> +       spinlock_t              elapsed_lock;
> +       u64                     bgreclaim_elapsed;
> +       u64                     direct_elapsed;
> +       u64                     soft_elapsed;
> +
> +       u64                     reclaim_scan[NR_RECLAIM_CONTEXTS];
> +       u64                     reclaim_freed[NR_RECLAIM_CONTEXTS];
> +
> +       /*
>         * Should we move charges of a task when a task is moved into this
>         * mem_cgroup ? And what type of charges should we move ?
>         */
> @@ -1346,6 +1357,18 @@ void mem_cgroup_clear_unreclaimable(stru
>        return;
>  }
>
> +void mem_cgroup_reclaim_statistics(struct mem_cgroup *mem,
> +               int context, unsigned long scanned,
> +               unsigned long freed)
> +{
> +       if (!mem)
> +               return;
> +       spin_lock(&mem->elapsed_lock);
> +       mem->reclaim_scan[context] += scanned;
> +       mem->reclaim_freed[context] += freed;
> +       spin_unlock(&mem->elapsed_lock);
> +}
> +
>  unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
>                                        struct list_head *dst,
>                                        unsigned long *scanned, int order,
> @@ -1692,6 +1715,7 @@ static int mem_cgroup_hierarchical_recla
>        bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
>        unsigned long excess;
>        unsigned long nr_scanned;
> +       s64 start, end;
>
>        excess = res_counter_soft_limit_excess(&root_mem->res) >>
> PAGE_SHIFT;
>
> @@ -1735,16 +1759,27 @@ static int mem_cgroup_hierarchical_recla
>                }
>                /* we use swappiness of local cgroup */
>                if (check_soft) {
> +                       start = sched_clock();
>                        ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
>                                noswap, mem_cgroup_swappiness(victim), zone,
>                                &nr_scanned);
>                        *total_scanned += nr_scanned;
> +                       end = sched_clock();
> +                       spin_lock(&victim->elapsed_lock);
> +                       victim->soft_elapsed += end - start;
> +                       spin_unlock(&victim->elapsed_lock);
>                        mem_cgroup_soft_steal(victim, ret);
>                        mem_cgroup_soft_scan(victim, nr_scanned);
> -               } else
> +               } else {
> +                       start = sched_clock();
>                        ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
>                                                noswap,
>
>  mem_cgroup_swappiness(victim));
> +                       end = sched_clock();
> +                       spin_lock(&victim->elapsed_lock);
> +                       victim->direct_elapsed += end - start;
> +                       spin_unlock(&victim->elapsed_lock);
> +               }
>                css_put(&victim->css);
>                /*
>                 * At shrinking usage, we can't check we should stop here or
> @@ -3702,15 +3737,22 @@ static void memcg_bgreclaim(struct work_
>        struct delayed_work *dw = to_delayed_work(work);
>        struct mem_cgroup *mem =
>                container_of(dw, struct mem_cgroup, bgreclaim_work);
> -       int delay = 0;
> +       int delay;
>        unsigned long long required, usage, hiwat;
>
> +       delay = 0;
>        hiwat = res_counter_read_u64(&mem->res, RES_HIGH_WMARK_LIMIT);
>        usage = res_counter_read_u64(&mem->res, RES_USAGE);
>        required = usage - hiwat;
>        if (required >= 0)  {
> +               u64 start, end;
>                required = ((usage - hiwat) >> PAGE_SHIFT) + 1;
> +               start = sched_clock();
>                delay = shrink_mem_cgroup(mem, (long)required);
> +               end = sched_clock();
> +               spin_lock(&mem->elapsed_lock);
> +               mem->bgreclaim_elapsed += end - start;
> +               spin_unlock(&mem->elapsed_lock);
>        }
>        if (!mem->bgreclaim_resched  ||
>                mem_cgroup_watermark_ok(mem, CHARGE_WMARK_HIGH)) {
> @@ -4152,6 +4194,15 @@ enum {
>        MCS_INACTIVE_FILE,
>        MCS_ACTIVE_FILE,
>        MCS_UNEVICTABLE,
> +       MCS_DIRECT_ELAPSED,
> +       MCS_SOFT_ELAPSED,
> +       MCS_WMARK_ELAPSED,
> +       MCS_DIRECT_SCANNED,
> +       MCS_SOFT_SCANNED,
> +       MCS_WMARK_SCANNED,
> +       MCS_DIRECT_FREED,
> +       MCS_SOFT_FREED,
> +       MCS_WMARK_FREED,
>        NR_MCS_STAT,
>  };
>
> @@ -4177,7 +4228,16 @@ struct {
>        {"active_anon", "total_active_anon"},
>        {"inactive_file", "total_inactive_file"},
>        {"active_file", "total_active_file"},
> -       {"unevictable", "total_unevictable"}
> +       {"unevictable", "total_unevictable"},
> +       {"direct_elapsed_ns", "total_direct_elapsed_ns"},
> +       {"soft_elapsed_ns", "total_soft_elapsed_ns"},
> +       {"wmark_elapsed_ns", "total_wmark_elapsed_ns"},
> +       {"direct_scanned", "total_direct_scanned"},
> +       {"soft_scanned", "total_soft_scanned"},
> +       {"wmark_scanned", "total_wmark_scanned"},
> +       {"direct_freed", "total_direct_freed"},
> +       {"soft_freed", "total_soft_freed"},
> +       {"wmark_freed", "total_wamrk_freed"}
>  };
>
>
> @@ -4185,6 +4245,7 @@ static void
>  mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat
> *s)
>  {
>        s64 val;
> +       int i;
>
>        /* per cpu stat */
>        val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE);
> @@ -4221,6 +4282,15 @@ mem_cgroup_get_local_stat(struct mem_cgr
>        s->stat[MCS_ACTIVE_FILE] += val * PAGE_SIZE;
>        val = mem_cgroup_get_local_zonestat(mem, LRU_UNEVICTABLE);
>        s->stat[MCS_UNEVICTABLE] += val * PAGE_SIZE;
> +
> +       /* reclaim stats */
> +       s->stat[MCS_DIRECT_ELAPSED] += mem->direct_elapsed;
> +       s->stat[MCS_SOFT_ELAPSED] += mem->soft_elapsed;
> +       s->stat[MCS_WMARK_ELAPSED] += mem->bgreclaim_elapsed;
> +       for (i = 0; i < NR_RECLAIM_CONTEXTS; i++) {
> +               s->stat[i + MCS_DIRECT_SCANNED] += mem->reclaim_scan[i];
> +               s->stat[i + MCS_DIRECT_FREED] += mem->reclaim_freed[i];
> +       }
>  }
>
>  static void
> @@ -4889,6 +4959,7 @@ static struct mem_cgroup *mem_cgroup_all
>                goto out_free;
>        spin_lock_init(&mem->pcp_counter_lock);
>        INIT_DELAYED_WORK(&mem->bgreclaim_work, memcg_bgreclaim);
> +       spin_lock_init(&mem->elapsed_lock);
>        mem->bgreclaim_resched = true;
>        return mem;
>
> Index: memcg/include/linux/memcontrol.h
> ===================================================================
> --- memcg.orig/include/linux/memcontrol.h
> +++ memcg/include/linux/memcontrol.h
> @@ -90,6 +90,8 @@ extern int mem_cgroup_select_victim_node
>                                        const nodemask_t *nodes);
>
>  int shrink_mem_cgroup(struct mem_cgroup *mem, long required);
> +void mem_cgroup_reclaim_statistics(struct mem_cgroup *mem, int context,
> +                       unsigned long scanned, unsigned long freed);
>
>  static inline
>  int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup
> *cgroup)
> @@ -423,6 +425,10 @@ static inline
>  void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item
> idx)
>  {
>  }
> +void mem_cgroup_reclaim_statistics(struct mem_cgroup *mem, int context,
> +                               unsigned long scanned, unsigned long freed)
> +{
> +}
>  #endif /* CONFIG_CGROUP_MEM_CONT */
>
>  #if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM)
> Index: memcg/include/linux/swap.h
> ===================================================================
> --- memcg.orig/include/linux/swap.h
> +++ memcg/include/linux/swap.h
> @@ -250,6 +250,13 @@ static inline void lru_cache_add_file(st
>  #define ISOLATE_ACTIVE 1       /* Isolate active pages. */
>  #define ISOLATE_BOTH 2         /* Isolate both active and inactive pages.
> */
>
> +/* context for memory reclaim.( comes from memory cgroup.) */
> +enum {
> +       RECLAIM_DIRECT,         /* under direct reclaim */
> +       RECLAIM_KSWAPD,         /* under global kswapd's soft limit */
> +       RECLAIM_WMARK,          /* under background reclaim by watermark */
> +       NR_RECLAIM_CONTEXTS
> +};
>  /* linux/mm/vmscan.c */
>  extern unsigned long try_to_free_pages(struct zonelist *zonelist, int
> order,
>                                        gfp_t gfp_mask, nodemask_t *mask);
> Index: memcg/mm/vmscan.c
> ===================================================================
> --- memcg.orig/mm/vmscan.c
> +++ memcg/mm/vmscan.c
> @@ -72,6 +72,9 @@ typedef unsigned __bitwise__ reclaim_mod
>  #define RECLAIM_MODE_LUMPYRECLAIM      ((__force reclaim_mode_t)0x08u)
>  #define RECLAIM_MODE_COMPACTION                ((__force
> reclaim_mode_t)0x10u)
>
> +/* 3 reclaim contexts fro memcg statistics. */
> +enum {DIRECT_RECLAIM, KSWAPD_RECLAIM, WMARK_RECLAIM};
> +
>  struct scan_control {
>        /* Incremented by the number of inactive pages that were scanned */
>        unsigned long nr_scanned;
> @@ -107,6 +110,7 @@ struct scan_control {
>
>        /* Which cgroup do we reclaim from */
>        struct mem_cgroup *mem_cgroup;
> +       int     reclaim_context;
>
>        /*
>         * Nodemask of nodes allowed by the caller. If NULL, all nodes
> @@ -2116,6 +2120,10 @@ out:
>        delayacct_freepages_end();
>        put_mems_allowed();
>
> +       if (!scanning_global_lru(sc))
> +               mem_cgroup_reclaim_statistics(sc->mem_cgroup,
> +                       sc->reclaim_context, total_scanned,
> sc->nr_reclaimed);
> +
>        if (sc->nr_reclaimed)
>                return sc->nr_reclaimed;
>
> @@ -2178,6 +2186,7 @@ unsigned long mem_cgroup_shrink_node_zon
>                .swappiness = swappiness,
>                .order = 0,
>                .mem_cgroup = mem,
> +               .reclaim_context = RECLAIM_KSWAPD,
>        };
>
>        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
> @@ -2198,6 +2207,8 @@ unsigned long mem_cgroup_shrink_node_zon
>
>        trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
>
> +       mem_cgroup_reclaim_statistics(sc.mem_cgroup,
> +                       sc.reclaim_context, sc.nr_scanned,
> sc.nr_reclaimed);
>        *nr_scanned = sc.nr_scanned;
>        return sc.nr_reclaimed;
>  }
> @@ -2217,6 +2228,7 @@ unsigned long try_to_free_mem_cgroup_pag
>                .swappiness = swappiness,
>                .order = 0,
>                .mem_cgroup = mem_cont,
> +               .reclaim_context = RECLAIM_DIRECT,
>                .nodemask = NULL, /* we don't care the placement */
>        };
>
> @@ -2384,6 +2396,7 @@ int shrink_mem_cgroup(struct mem_cgroup
>                .may_swap = 1,
>                .order = 0,
>                .mem_cgroup = mem,
> +               .reclaim_context = RECLAIM_WMARK,
>        };
>        /* writepage will be set later per zone */
>        sc.may_writepage = 0;
> @@ -2434,6 +2447,8 @@ int shrink_mem_cgroup(struct mem_cgroup
>        if (sc.nr_reclaimed > sc.nr_to_reclaim/2)
>                delay = 0;
>  out:
> +       mem_cgroup_reclaim_statistics(sc.mem_cgroup, sc.reclaim_context,
> +                       total_scanned, sc.nr_reclaimed);
>        current->flags &= ~PF_SWAPWRITE;
>        return delay;
>  }
> Index: memcg/Documentation/cgroups/memory.txt
> ===================================================================
> --- memcg.orig/Documentation/cgroups/memory.txt
> +++ memcg/Documentation/cgroups/memory.txt
> @@ -398,6 +398,15 @@ active_anon        - # of bytes of anonymous an
>  inactive_file  - # of bytes of file-backed memory on inactive LRU list.
>  active_file    - # of bytes of file-backed memory on active LRU list.
>  unevictable    - # of bytes of memory that cannot be reclaimed (mlocked
> etc).
> +direct_elapsed_ns  - # of elapsed cpu time at hard limit reclaim (ns)
> +soft_elapsed_ns  - # of elapsed cpu time at soft limit reclaim (ns)
> +wmark_elapsed_ns  - # of elapsed cpu time at hi/low watermark reclaim (ns)
> +direct_scanned - # of page scans at hard limit reclaim
> +soft_scanned   - # of page scans at soft limit reclaim
> +wmark_scanned  - # of page scans at hi/low watermark reclaim
> +direct_freed   - # of page freeing at hard limit reclaim
> +soft_freed     - # of page freeing at soft limit reclaim
> +wmark_freed    - # of page freeing at hi/low watermark reclaim
>
>  # status considering hierarchy (see memory.use_hierarchy settings)
>
> @@ -421,6 +430,15 @@ total_active_anon  - sum of all children'
>  total_inactive_file    - sum of all children's "inactive_file"
>  total_active_file      - sum of all children's "active_file"
>  total_unevictable      - sum of all children's "unevictable"
> +total_direct_elapsed_ns - sum of all children's "direct_elapsed_ns"
> +total_soft_elapsed_ns  - sum of all children's "soft_elapsed_ns"
> +total_wmark_elapsed_ns - sum of all children's "wmark_elapsed_ns"
> +total_direct_scanned   - sum of all children's "direct_scanned"
> +total_soft_scanned     - sum of all children's "soft_scanned"
> +total_wmark_scanned    - sum of all children's "wmark_scanned"
> +total_direct_freed     - sum of all children's "direct_freed"
> +total_soft_freed       - sum of all children's "soft_freed"
> +total_wamrk_freed      - sum of all children's "wmark_freed"
>
>  # The following additional stats are dependent on CONFIG_DEBUG_VM.
>
> Those stats looks good to me. Thanks

--Ying

[-- Attachment #2: Type: text/html, Size: 17218 bytes --]

  reply	other threads:[~2011-04-26  5:35 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-25  9:25 [PATCH 0/7] memcg background reclaim , yet another one KAMEZAWA Hiroyuki
2011-04-25  9:28 ` [PATCH 1/7] memcg: add high/low watermark to res_counter KAMEZAWA Hiroyuki
2011-04-26 17:54   ` Ying Han
2011-04-29 13:33   ` Michal Hocko
2011-05-01  6:06     ` KOSAKI Motohiro
2011-05-03  6:49       ` Michal Hocko
2011-05-03  7:45         ` KOSAKI Motohiro
2011-05-03  8:25           ` Michal Hocko
2011-05-03 17:01             ` Ying Han
2011-05-04  8:58               ` Michal Hocko
2011-05-04 17:16                 ` Ying Han
2011-05-05  6:59                   ` Michal Hocko
2011-05-06  5:28                     ` KAMEZAWA Hiroyuki
2011-05-06 14:22                       ` Johannes Weiner
2011-05-09  0:21                         ` KAMEZAWA Hiroyuki
2011-05-09  5:47                           ` Ying Han
2011-05-09  9:58                           ` Johannes Weiner
2011-05-09  9:59                             ` KAMEZAWA Hiroyuki
2011-05-10  4:43                             ` Ying Han
2011-05-09  5:40                       ` Ying Han
2011-05-09  7:10                         ` KAMEZAWA Hiroyuki
2011-05-09 10:18                           ` Johannes Weiner
2011-05-09 12:49                             ` Michal Hocko
2011-05-09 23:49                               ` KAMEZAWA Hiroyuki
2011-05-10  4:39                                 ` Ying Han
2011-05-10  4:51                             ` Ying Han
2011-05-10  6:27                               ` Johannes Weiner
2011-05-10  7:09                                 ` Ying Han
2011-05-04  3:55             ` KOSAKI Motohiro
2011-05-04  8:55               ` Michal Hocko
2011-05-09  3:24                 ` KOSAKI Motohiro
2011-05-02  9:07   ` Balbir Singh
2011-05-06  5:30     ` KAMEZAWA Hiroyuki
2011-04-25  9:29 ` [PATCH 2/7] memcg high watermark interface KAMEZAWA Hiroyuki
2011-04-25 22:36   ` Ying Han
2011-04-25  9:31 ` [PATCH 3/7] memcg: select victim node in round robin KAMEZAWA Hiroyuki
2011-04-25  9:34 ` [PATCH 4/7] memcg fix scan ratio with small memcg KAMEZAWA Hiroyuki
2011-04-25 17:35   ` Ying Han
2011-04-26  1:43     ` KAMEZAWA Hiroyuki
2011-04-25  9:36 ` [PATCH 5/7] memcg bgreclaim core KAMEZAWA Hiroyuki
2011-04-26  4:59   ` Ying Han
2011-04-26  5:08     ` KAMEZAWA Hiroyuki
2011-04-26 23:15       ` Ying Han
2011-04-27  0:10         ` KAMEZAWA Hiroyuki
2011-04-27  1:01           ` KAMEZAWA Hiroyuki
2011-04-26 18:37   ` Ying Han
2011-04-25  9:40 ` [PATCH 6/7] memcg add zone_all_unreclaimable KAMEZAWA Hiroyuki
2011-04-25  9:42 ` [PATCH 7/7] memcg watermark reclaim workqueue KAMEZAWA Hiroyuki
2011-04-26 23:19   ` Ying Han
2011-04-27  0:31     ` KAMEZAWA Hiroyuki
2011-04-27  3:40       ` Ying Han
2011-04-25  9:43 ` [PATCH 8/7] memcg : reclaim statistics KAMEZAWA Hiroyuki
2011-04-26  5:35   ` Ying Han [this message]
2011-04-25  9:49 ` [PATCH 0/7] memcg background reclaim , yet another one KAMEZAWA Hiroyuki
2011-04-25 10:14 ` KAMEZAWA Hiroyuki
2011-04-25 22:21   ` Ying Han
2011-04-26  1:38     ` KAMEZAWA Hiroyuki
2011-04-26  7:19       ` Ying Han
2011-04-26  7:43         ` KAMEZAWA Hiroyuki
2011-04-26  8:43           ` Ying Han
2011-04-26  8:47             ` KAMEZAWA Hiroyuki
2011-04-26 23:08               ` Ying Han
2011-04-27  0:34                 ` KAMEZAWA Hiroyuki
2011-04-27  1:19                   ` Ying Han
2011-04-28  3:55               ` Ying Han
2011-04-28  4:05                 ` KAMEZAWA Hiroyuki
2011-05-02  7:02     ` Balbir Singh
2011-05-02  6:09 ` Balbir Singh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='BANLkTin6kD_JKcRkmDGbGrk=N7LNW2bvDw@mail.gmail.com' \
    --to=yinghan@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=balbir@linux.vnet.ibm.com \
    --cc=jweiner@redhat.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.cz \
    --cc=minchan.kim@gmail.com \
    --cc=nishimura@mxp.nes.nec.co.jp \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox