linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Glauber Costa <glommer@parallels.com>
To: linux-kernel@vger.kernel.org
Cc: cgroups@vger.kernel.org, linux-mm@kvack.org,
	kamezawa.hiroyu@jp.fujitsu.com, Tejun Heo <tj@kernel.org>,
	Li Zefan <lizefan@huawei.com>, Greg Thelen <gthelen@google.com>,
	Suleiman Souhlal <suleiman@google.com>,
	Michal Hocko <mhocko@suse.cz>,
	Johannes Weiner <hannes@cmpxchg.org>,
	devel@openvz.org, David Rientjes <rientjes@google.com>,
	Glauber Costa <glommer@parallels.com>,
	Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@cs.helsinki.fi>
Subject: [PATCH v3 22/28] memcg/slub: shrink dead caches
Date: Fri, 25 May 2012 17:03:42 +0400	[thread overview]
Message-ID: <1337951028-3427-23-git-send-email-glommer@parallels.com> (raw)
In-Reply-To: <1337951028-3427-1-git-send-email-glommer@parallels.com>

In the slub allocator, when the last object of a page goes away, we
don't necessarily free it - there is not necessarily a test for empty
page in any slab_free path.

This means that when we destroy a memcg cache that happened to be empty,
those caches may take a lot of time to go away: removing the memcg
reference won't destroy them - because there are pending references,
and the empty pages will stay there, until a shrinker is called upon
for any reason.

This patch marks all memcg caches as dead. kmem_cache_shrink is called
for the ones who are not yet dead - this will force internal cache
reorganization, and then all references to empty pages will be removed.

An unlikely branch is used to make sure this case does not affect
performance in the usual slab_free path.

Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: Christoph Lameter <cl@linux.com>
CC: Pekka Enberg <penberg@cs.helsinki.fi>
CC: Michal Hocko <mhocko@suse.cz>
CC: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: Johannes Weiner <hannes@cmpxchg.org>
CC: Suleiman Souhlal <suleiman@google.com>
---
 include/linux/slab.h     |    4 +++
 include/linux/slub_def.h |    8 +++++++
 mm/memcontrol.c          |   49 +++++++++++++++++++++++++++++++++++++++++++--
 mm/slub.c                |    1 +
 4 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/include/linux/slab.h b/include/linux/slab.h
index c81a5d3..25f073e 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -154,10 +154,14 @@ unsigned int kmem_cache_size(struct kmem_cache *);
 #endif
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#include <linux/workqueue.h>
+
 struct mem_cgroup_cache_params {
 	struct mem_cgroup *memcg;
 	int id;
 	atomic_t refcnt;
+	bool dead;
+	struct work_struct cache_shrinker;
 
 #ifdef CONFIG_SLAB
 	/* Original cache parameters, used when creating a memcg cache */
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index ba9c68b..c1428ee 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -135,6 +135,14 @@ static inline bool slab_is_parent(struct kmem_cache *s,
 #endif
 }
 
+static inline void kmem_cache_verify_dead(struct kmem_cache *cachep)
+{
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+	if (unlikely(cachep->memcg_params.dead))
+		schedule_work(&cachep->memcg_params.cache_shrinker);
+#endif
+}
+
 /*
  * Kmalloc subsystem.
  */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e2ba527..e2576c5 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -520,7 +520,7 @@ char *mem_cgroup_cache_name(struct mem_cgroup *memcg, struct kmem_cache *cachep)
 
 	BUG_ON(dentry == NULL);
 
-	name = kasprintf(GFP_KERNEL, "%s(%d:%s)",
+	name = kasprintf(GFP_KERNEL, "%s(%d:%s)dead",
 	    cachep->name, css_id(&memcg->css), dentry->d_name.name);
 
 	return name;
@@ -557,11 +557,24 @@ void mem_cgroup_release_cache(struct kmem_cache *cachep)
 		ida_simple_remove(&cache_types, cachep->memcg_params.id);
 }
 
+static void cache_shrinker_work_func(struct work_struct *work)
+{
+	struct mem_cgroup_cache_params *params;
+	struct kmem_cache *cachep;
+
+	params = container_of(work, struct mem_cgroup_cache_params,
+			      cache_shrinker);
+	cachep = container_of(params, struct kmem_cache, memcg_params);
+
+	kmem_cache_shrink(cachep);
+}
+
 static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 						  struct kmem_cache *cachep)
 {
 	struct kmem_cache *new_cachep;
 	int idx;
+	char *name;
 
 	BUG_ON(!mem_cgroup_kmem_enabled(memcg));
 
@@ -581,10 +594,21 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 		goto out;
 	}
 
+	/*
+	 * Because the cache is expected to duplicate the string,
+	 * we must make sure it has opportunity to copy its full
+	 * name. Only now we can remove the dead part from it
+	 */
+	name = (char *)new_cachep->name;
+	if (name)
+		name[strlen(name) - 4] = '\0';
+
 	mem_cgroup_get(memcg);
 	memcg->slabs[idx] = new_cachep;
 	new_cachep->memcg_params.memcg = memcg;
 	atomic_set(&new_cachep->memcg_params.refcnt, 1);
+	INIT_WORK(&new_cachep->memcg_params.cache_shrinker,
+		  cache_shrinker_work_func);
 out:
 	mutex_unlock(&memcg_cache_mutex);
 	return new_cachep;
@@ -607,6 +631,21 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
 	struct mem_cgroup_cache_params *p, *tmp;
 	unsigned long flags;
 	LIST_HEAD(del_unlocked);
+	LIST_HEAD(shrinkers);
+
+	spin_lock_irqsave(&cache_queue_lock, flags);
+	list_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {
+		cachep = container_of(p, struct kmem_cache, memcg_params);
+		if (atomic_read(&cachep->memcg_params.refcnt) != 0)
+			list_move(&cachep->memcg_params.destroyed_list, &shrinkers);
+	}
+	spin_unlock_irqrestore(&cache_queue_lock, flags);
+
+	list_for_each_entry_safe(p, tmp, &shrinkers, destroyed_list) {
+		cachep = container_of(p, struct kmem_cache, memcg_params);
+		list_del(&cachep->memcg_params.destroyed_list);
+		kmem_cache_shrink(cachep);
+	}
 
 	spin_lock_irqsave(&cache_queue_lock, flags);
 	list_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {
@@ -682,12 +721,16 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
 
 	spin_lock_irqsave(&cache_queue_lock, flags);
 	for (i = 0; i < MAX_KMEM_CACHE_TYPES; i++) {
+		char *name;
 		cachep = memcg->slabs[i];
 		if (!cachep)
 			continue;
 
-		if (atomic_dec_and_test(&cachep->memcg_params.refcnt))
-			__mem_cgroup_destroy_cache(cachep);
+		atomic_dec(&cachep->memcg_params.refcnt);
+		cachep->memcg_params.dead = true;
+		name = (char *)cachep->name;
+		name[strlen(name)] = 'd';
+		__mem_cgroup_destroy_cache(cachep);
 	}
 	spin_unlock_irqrestore(&cache_queue_lock, flags);
 
diff --git a/mm/slub.c b/mm/slub.c
index eb0ff97..f5fc10c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2658,6 +2658,7 @@ redo:
 	} else
 		__slab_free(s, page, x, addr);
 
+	kmem_cache_verify_dead(s);
 }
 
 void kmem_cache_free(struct kmem_cache *s, void *x)
-- 
1.7.7.6

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2012-05-25 13:08 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-05-25 13:03 [PATCH v3 00/28] kmem limitation for memcg Glauber Costa
2012-05-25 13:03 ` [PATCH v3 01/28] slab: move FULL state transition to an initcall Glauber Costa
2012-05-25 13:03 ` [PATCH v3 02/28] memcg: Always free struct memcg through schedule_work() Glauber Costa
2012-05-25 13:03 ` [PATCH v3 03/28] slab: rename gfpflags to allocflags Glauber Costa
2012-05-25 13:03 ` [PATCH v3 04/28] memcg: Make it possible to use the stock for more than one page Glauber Costa
2012-05-25 13:03 ` [PATCH v3 05/28] memcg: Reclaim when more than one page needed Glauber Costa
2012-05-29 14:19   ` Christoph Lameter
2012-05-29 14:20     ` Christoph Lameter
2012-05-29 15:45       ` Glauber Costa
2012-05-25 13:03 ` [PATCH v3 06/28] slab: use obj_size field of struct kmem_cache when not debugging Glauber Costa
2012-05-25 13:03 ` [PATCH v3 07/28] memcg: change defines to an enum Glauber Costa
2012-05-25 13:03 ` [PATCH v3 08/28] res_counter: don't force return value checking in res_counter_charge_nofail Glauber Costa
2012-05-25 13:03 ` [PATCH v3 09/28] kmem slab accounting basic infrastructure Glauber Costa
2012-05-25 13:03 ` [PATCH v3 10/28] slab/slub: struct memcg_params Glauber Costa
2012-05-25 13:03 ` [PATCH v3 11/28] slub: consider a memcg parameter in kmem_create_cache Glauber Costa
2012-05-25 13:03 ` [PATCH v3 12/28] slab: pass memcg parameter to kmem_cache_create Glauber Costa
2012-05-29 14:27   ` Christoph Lameter
2012-05-29 15:50     ` Glauber Costa
2012-05-29 16:33       ` Christoph Lameter
2012-05-29 16:36         ` Glauber Costa
2012-05-29 16:52           ` Christoph Lameter
2012-05-29 16:59             ` Glauber Costa
2012-05-30 11:01             ` Frederic Weisbecker
2012-05-25 13:03 ` [PATCH v3 13/28] slub: create duplicate cache Glauber Costa
2012-05-29 14:36   ` Christoph Lameter
2012-05-29 15:56     ` Glauber Costa
2012-05-29 16:05       ` Christoph Lameter
2012-05-29 17:05         ` Glauber Costa
2012-05-29 17:25           ` Christoph Lameter
2012-05-29 17:27             ` Glauber Costa
2012-05-29 19:26               ` Christoph Lameter
2012-05-29 19:40                 ` Glauber Costa
2012-05-29 19:55                   ` Christoph Lameter
2012-05-29 20:08                     ` Glauber Costa
2012-05-29 20:21                       ` Christoph Lameter
2012-05-29 20:25                         ` Glauber Costa
2012-05-30  1:29                           ` Tejun Heo
2012-05-30  7:28                             ` [Devel] " James Bottomley
2012-05-30  7:54                             ` Glauber Costa
2012-05-30  8:02                               ` Tejun Heo
2012-05-30 15:37                                 ` Christoph Lameter
2012-05-29 20:57                         ` Suleiman Souhlal
2012-05-25 13:03 ` [PATCH v3 14/28] slab: " Glauber Costa
2012-05-25 13:03 ` [PATCH v3 15/28] slub: always get the cache from its page in kfree Glauber Costa
2012-05-29 14:42   ` Christoph Lameter
2012-05-29 15:59     ` Glauber Costa
2012-05-25 13:03 ` [PATCH v3 16/28] memcg: kmem controller charge/uncharge infrastructure Glauber Costa
2012-05-29 14:47   ` Christoph Lameter
2012-05-29 16:00     ` Glauber Costa
2012-05-30 12:17   ` Frederic Weisbecker
2012-05-30 12:26     ` Glauber Costa
2012-05-30 12:34   ` Frederic Weisbecker
2012-05-30 12:38     ` Glauber Costa
2012-05-30 13:11       ` Frederic Weisbecker
2012-05-30 13:09         ` Glauber Costa
2012-05-30 13:04   ` Frederic Weisbecker
2012-05-30 13:06     ` Glauber Costa
2012-05-30 13:37       ` Frederic Weisbecker
2012-05-30 13:37         ` Glauber Costa
2012-05-30 13:53           ` Frederic Weisbecker
2012-05-30 13:55             ` Glauber Costa
2012-05-30 15:33               ` Frederic Weisbecker
2012-05-30 16:16                 ` Glauber Costa
2012-05-25 13:03 ` [PATCH v3 17/28] skip memcg kmem allocations in specified code regions Glauber Costa
2012-05-25 13:03 ` [PATCH v3 18/28] slub: charge allocation to a memcg Glauber Costa
2012-05-29 14:51   ` Christoph Lameter
2012-05-29 16:06     ` Glauber Costa
2012-05-25 13:03 ` [PATCH v3 19/28] slab: per-memcg accounting of slab caches Glauber Costa
2012-05-29 14:52   ` Christoph Lameter
2012-05-29 16:07     ` Glauber Costa
2012-05-29 16:13       ` Glauber Costa
2012-05-25 13:03 ` [PATCH v3 20/28] memcg: disable kmem code when not in use Glauber Costa
2012-05-25 13:03 ` [PATCH v3 21/28] memcg: destroy memcg caches Glauber Costa
2012-05-25 13:03 ` Glauber Costa [this message]
2012-05-25 13:03 ` [PATCH v3 23/28] slab: Track all the memcg children of a kmem_cache Glauber Costa
2012-05-25 13:03 ` [PATCH v3 24/28] memcg: Per-memcg memory.kmem.slabinfo file Glauber Costa
2012-05-25 13:03 ` [PATCH v3 25/28] slub: create slabinfo file for memcg Glauber Costa
2012-05-25 13:03 ` [PATCH v3 26/28] slub: track all children of a kmem cache Glauber Costa
2012-05-25 13:03 ` [PATCH v3 27/28] memcg: propagate kmem limiting information to children Glauber Costa
2012-05-25 13:03 ` [PATCH v3 28/28] Documentation: add documentation for slab tracker for memcg Glauber Costa
2012-05-25 13:34 ` [PATCH v3 00/28] kmem limitation " Michal Hocko
2012-05-25 14:34   ` Christoph Lameter
2012-05-28  8:32     ` Glauber Costa
2012-05-29 15:07       ` Christoph Lameter
2012-05-29 15:44         ` Glauber Costa
2012-05-29 16:01           ` Christoph Lameter
2012-06-07 10:26 ` Frederic Weisbecker
2012-06-07 10:53   ` Glauber Costa
2012-06-07 14:00     ` Frederic Weisbecker
2012-06-14  2:24       ` Kamezawa Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1337951028-3427-23-git-send-email-glommer@parallels.com \
    --to=glommer@parallels.com \
    --cc=cgroups@vger.kernel.org \
    --cc=cl@linux.com \
    --cc=devel@openvz.org \
    --cc=gthelen@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizefan@huawei.com \
    --cc=mhocko@suse.cz \
    --cc=penberg@cs.helsinki.fi \
    --cc=rientjes@google.com \
    --cc=suleiman@google.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox