linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Johannes Weiner <hannes@cmpxchg.org>
To: Vladimir Davydov <vdavydov@parallels.com>
Cc: akpm@linux-foundation.org, mhocko@suse.cz, glommer@openvz.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	cgroups@vger.kernel.org, devel@openvz.org
Subject: Re: [PATCH v11 07/15] memcg: scan cache objects hierarchically
Date: Mon, 25 Nov 2013 11:51:13 -0500	[thread overview]
Message-ID: <20131125165113.GC22729@cmpxchg.org> (raw)
In-Reply-To: <840647939662771e06c375350f3ccb11dd4c6dc1.1385377616.git.vdavydov@parallels.com>

On Mon, Nov 25, 2013 at 04:07:40PM +0400, Vladimir Davydov wrote:
> From: Glauber Costa <glommer@openvz.org>
> 
> When reaching shrink_slab, we should descent in children memcg searching
> for objects that could be shrunk. This is true even if the memcg does
> not have kmem limits on, since the kmem res_counter will also be billed
> against the user res_counter of the parent.
> 
> It is possible that we will free objects and not free any pages, that
> will just harm the child groups without helping the parent group at all.
> But at this point, we basically are prepared to pay the price.
> 
> Signed-off-by: Glauber Costa <glommer@openvz.org>
> Cc: Dave Chinner <dchinner@redhat.com>
> Cc: Mel Gorman <mgorman@suse.de>
> Cc: Rik van Riel <riel@redhat.com>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Michal Hocko <mhocko@suse.cz>
> Cc: Hugh Dickins <hughd@google.com>
> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> ---
>  include/linux/memcontrol.h |    6 ++++
>  mm/memcontrol.c            |   13 +++++++++
>  mm/vmscan.c                |   65 ++++++++++++++++++++++++++++++++++++--------
>  3 files changed, 73 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index d16ba51..a513fad 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -488,6 +488,7 @@ static inline bool memcg_kmem_enabled(void)
>  	return static_key_false(&memcg_kmem_enabled_key);
>  }
>  
> +bool memcg_kmem_should_reclaim(struct mem_cgroup *memcg);
>  bool memcg_kmem_is_active(struct mem_cgroup *memcg);
>  
>  /*
> @@ -624,6 +625,11 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
>  }
>  #else
>  
> +static inline bool memcg_kmem_should_reclaim(struct mem_cgroup *memcg)
> +{
> +	return false;
> +}
> +
>  static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
>  {
>  	return false;
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 9be1e8b..f5d7128 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2995,6 +2995,19 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
>  }
>  
>  #ifdef CONFIG_MEMCG_KMEM
> +bool memcg_kmem_should_reclaim(struct mem_cgroup *memcg)
> +{
> +	struct mem_cgroup *iter;
> +
> +	for_each_mem_cgroup_tree(iter, memcg) {
> +		if (memcg_kmem_is_active(iter)) {
> +			mem_cgroup_iter_break(memcg, iter);
> +			return true;
> +		}
> +	}
> +	return false;
> +}
> +
>  static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
>  {
>  	return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index cdfc364..36fc133 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -149,7 +149,7 @@ static bool global_reclaim(struct scan_control *sc)
>  static bool has_kmem_reclaim(struct scan_control *sc)
>  {
>  	return !sc->target_mem_cgroup ||
> -		memcg_kmem_is_active(sc->target_mem_cgroup);
> +		memcg_kmem_should_reclaim(sc->target_mem_cgroup);
>  }
>  
>  static unsigned long
> @@ -360,12 +360,35 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
>   *
>   * Returns the number of slab objects which we shrunk.
>   */
> +static unsigned long
> +shrink_slab_one(struct shrink_control *shrinkctl, struct shrinker *shrinker,
> +		unsigned long nr_pages_scanned, unsigned long lru_pages)

one what?

> +{
> +	unsigned long freed = 0;
> +
> +	for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
> +		if (!node_online(shrinkctl->nid))
> +			continue;
> +
> +		if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
> +		    (shrinkctl->nid != 0))
> +			break;
> +
> +		freed += shrink_slab_node(shrinkctl, shrinker,
> +			 nr_pages_scanned, lru_pages);
> +
> +	}
> +
> +	return freed;
> +}
> +
>  unsigned long shrink_slab(struct shrink_control *shrinkctl,
>  			  unsigned long nr_pages_scanned,
>  			  unsigned long lru_pages)
>  {
>  	struct shrinker *shrinker;
>  	unsigned long freed = 0;
> +	struct mem_cgroup *root = shrinkctl->target_mem_cgroup;
>  
>  	if (nr_pages_scanned == 0)
>  		nr_pages_scanned = SWAP_CLUSTER_MAX;
> @@ -390,19 +413,39 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
>  		if (shrinkctl->target_mem_cgroup &&
>  		    !(shrinker->flags & SHRINKER_MEMCG_AWARE))
>  			continue;
> +		/*
> +		 * In a hierarchical chain, it might be that not all memcgs are
> +		 * kmem active. kmemcg design mandates that when one memcg is
> +		 * active, its children will be active as well. But it is
> +		 * perfectly possible that its parent is not.
> +		 *
> +		 * We also need to make sure we scan at least once, for the
> +		 * global case. So if we don't have a target memcg (saved in
> +		 * root), we proceed normally and expect to break in the next
> +		 * round.
> +		 */
> +		do {
> +			struct mem_cgroup *memcg = shrinkctl->target_mem_cgroup;
>  
> -		for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
> -			if (!node_online(shrinkctl->nid))
> -				continue;
> -
> -			if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
> -			    (shrinkctl->nid != 0))
> +			if (!memcg || memcg_kmem_is_active(memcg))
> +				freed += shrink_slab_one(shrinkctl, shrinker,
> +					 nr_pages_scanned, lru_pages);
> +			/*
> +			 * For non-memcg aware shrinkers, we will arrive here
> +			 * at first pass because we need to scan the root
> +			 * memcg.  We need to bail out, since exactly because
> +			 * they are not memcg aware, instead of noticing they
> +			 * have nothing to shrink, they will just shrink again,
> +			 * and deplete too many objects.
> +			 */

I actually found the code easier to understand without this comment.

> +			if (!(shrinker->flags & SHRINKER_MEMCG_AWARE))
>  				break;
> +			shrinkctl->target_mem_cgroup =
> +				mem_cgroup_iter(root, memcg, NULL);

The target memcg is always the same, don't change this.  Look at the
lru scan code for reference.  Iterate zones (nodes in this case)
first, then iterate the memcgs in each zone (node), look up the lruvec
and then call shrink_slab_lruvec(lruvec, ...).

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2013-11-25 16:51 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-25 12:07 [PATCH v11 00/15] kmemcg shrinkers Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 01/15] memcg: make cache index determination more robust Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 02/15] memcg: consolidate callers of memcg_cache_id Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 03/15] vmscan: also shrink slab in memcg pressure Vladimir Davydov
2013-11-25 16:27   ` Johannes Weiner
2013-11-25 12:07 ` [PATCH v11 04/15] memcg: move initialization to memcg creation Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 05/15] memcg: move stop and resume accounting functions Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 06/15] memcg: per-memcg kmem shrinking Vladimir Davydov
2013-11-25 16:44   ` Johannes Weiner
2013-11-25 12:07 ` [PATCH v11 07/15] memcg: scan cache objects hierarchically Vladimir Davydov
2013-11-25 16:51   ` Johannes Weiner [this message]
2013-11-25 12:07 ` [PATCH v11 08/15] vmscan: take at least one pass with shrinkers Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 09/15] memcg,list_lru: add per-memcg LRU list infrastructure Vladimir Davydov
2013-11-25 16:56   ` Johannes Weiner
2013-11-25 12:07 ` [PATCH v11 10/15] memcg,list_lru: add function walking over all lists of a per-memcg LRU Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 11/15] super: make icache, dcache shrinkers memcg-aware Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 12/15] memcg: allow kmem limit to be resized down Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 13/15] vmpressure: in-kernel notifications Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 14/15] memcg: reap dead memcgs upon global memory pressure Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 15/15] memcg: flush memcg items upon memcg destruction Vladimir Davydov
2013-11-25 17:41 ` [PATCH v11 00/15] kmemcg shrinkers Johannes Weiner
2013-11-26  6:47   ` Vladimir Davydov
2013-11-26 12:55     ` [Devel] " Vladimir Davydov
2013-11-26 16:46       ` Andrew Morton
2013-11-26 22:47     ` Dave Chinner
2013-11-27  6:26       ` Vladimir Davydov
  -- strict thread matches above, loose matches on Subject: below --
2013-10-24 12:04 Vladimir Davydov
2013-10-24 12:04 ` [PATCH v11 07/15] memcg: scan cache objects hierarchically Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131125165113.GC22729@cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=devel@openvz.org \
    --cc=glommer@openvz.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.cz \
    --cc=vdavydov@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox