From: Glauber Costa <glommer@parallels.com>
To: linux-mm@kvack.org
Cc: Pekka Enberg <penberg@kernel.org>,
Cristoph Lameter <cl@linux.com>,
David Rientjes <rientjes@google.com>,
cgroups@vger.kernel.org, devel@openvz.org,
kamezawa.hiroyu@jp.fujitsu.com, linux-kernel@vger.kernel.org,
Frederic Weisbecker <fweisbec@gmail.com>,
Suleiman Souhlal <suleiman@google.com>,
Glauber Costa <glommer@parallels.com>,
Pekka Enberg <penberg@cs.helsinki.fi>,
Michal Hocko <mhocko@suse.cz>,
Johannes Weiner <hannes@cmpxchg.org>
Subject: [PATCH v4 24/25] memcg/slub: shrink dead caches
Date: Mon, 18 Jun 2012 14:28:17 +0400 [thread overview]
Message-ID: <1340015298-14133-25-git-send-email-glommer@parallels.com> (raw)
In-Reply-To: <1340015298-14133-1-git-send-email-glommer@parallels.com>
In the slub allocator, when the last object of a page goes away, we
don't necessarily free it - there is not necessarily a test for empty
page in any slab_free path.
This means that when we destroy a memcg cache that happened to be empty,
those caches may take a lot of time to go away: removing the memcg
reference won't destroy them - because there are pending references,
and the empty pages will stay there, until a shrinker is called upon
for any reason.
This patch marks all memcg caches as dead. kmem_cache_shrink is called
for the ones who are not yet dead - this will force internal cache
reorganization, and then all references to empty pages will be removed.
An unlikely branch is used to make sure this case does not affect
performance in the usual slab_free path.
Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: Christoph Lameter <cl@linux.com>
CC: Pekka Enberg <penberg@cs.helsinki.fi>
CC: Michal Hocko <mhocko@suse.cz>
CC: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: Johannes Weiner <hannes@cmpxchg.org>
CC: Suleiman Souhlal <suleiman@google.com>
---
include/linux/slab.h | 3 +++
include/linux/slub_def.h | 8 ++++++++
mm/memcontrol.c | 44 +++++++++++++++++++++++++++++++++++++++++++-
mm/slub.c | 1 +
4 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 155d19f..7e13055 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -182,6 +182,8 @@ unsigned int kmem_cache_size(struct kmem_cache *);
#endif
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#include <linux/workqueue.h>
+
struct mem_cgroup_cache_params {
struct mem_cgroup *memcg;
struct kmem_cache *parent;
@@ -190,6 +192,7 @@ struct mem_cgroup_cache_params {
atomic_t nr_pages;
struct list_head destroyed_list; /* Used when deleting memcg cache */
struct list_head sibling_list;
+ struct work_struct cache_shrinker;
};
#endif
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 7183596..871f82b 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -116,6 +116,14 @@ struct kmem_cache {
struct kmem_cache_node *node[MAX_NUMNODES];
};
+static inline void kmem_cache_verify_dead(struct kmem_cache *cachep)
+{
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+ if (unlikely(cachep->memcg_params.dead))
+ schedule_work(&cachep->memcg_params.cache_shrinker);
+#endif
+}
+
/*
* Kmalloc subsystem.
*/
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 972e83f..c368480 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -531,7 +531,7 @@ static char *mem_cgroup_cache_name(struct mem_cgroup *memcg, struct kmem_cache *
BUG_ON(dentry == NULL);
- name = kasprintf(GFP_KERNEL, "%s(%d:%s)",
+ name = kasprintf(GFP_KERNEL, "%s(%d:%s)dead",
cachep->name, css_id(&memcg->css), dentry->d_name.name);
return name;
@@ -592,11 +592,24 @@ void mem_cgroup_release_cache(struct kmem_cache *cachep)
}
+static void cache_shrinker_work_func(struct work_struct *work)
+{
+ struct mem_cgroup_cache_params *params;
+ struct kmem_cache *cachep;
+
+ params = container_of(work, struct mem_cgroup_cache_params,
+ cache_shrinker);
+ cachep = container_of(params, struct kmem_cache, memcg_params);
+
+ kmem_cache_shrink(cachep);
+}
+
static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
struct kmem_cache *cachep)
{
struct kmem_cache *new_cachep;
int idx;
+ char *name;
BUG_ON(!mem_cgroup_kmem_enabled(memcg));
@@ -616,10 +629,21 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
goto out;
}
+ /*
+ * Because the cache is expected to duplicate the string,
+ * we must make sure it has opportunity to copy its full
+ * name. Only now we can remove the dead part from it
+ */
+ name = (char *)new_cachep->name;
+ if (name)
+ name[strlen(name) - 4] = '\0';
+
mem_cgroup_get(memcg);
memcg->slabs[idx] = new_cachep;
new_cachep->memcg_params.memcg = memcg;
atomic_set(&new_cachep->memcg_params.nr_pages , 0);
+ INIT_WORK(&new_cachep->memcg_params.cache_shrinker,
+ cache_shrinker_work_func);
out:
mutex_unlock(&memcg_cache_mutex);
return new_cachep;
@@ -642,6 +666,21 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
struct mem_cgroup_cache_params *p, *tmp;
unsigned long flags;
LIST_HEAD(del_unlocked);
+ LIST_HEAD(shrinkers);
+
+ spin_lock_irqsave(&cache_queue_lock, flags);
+ list_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {
+ cachep = container_of(p, struct kmem_cache, memcg_params);
+ if (atomic_read(&cachep->memcg_params.nr_pages) != 0)
+ list_move(&cachep->memcg_params.destroyed_list, &shrinkers);
+ }
+ spin_unlock_irqrestore(&cache_queue_lock, flags);
+
+ list_for_each_entry_safe(p, tmp, &shrinkers, destroyed_list) {
+ cachep = container_of(p, struct kmem_cache, memcg_params);
+ list_del(&cachep->memcg_params.destroyed_list);
+ kmem_cache_shrink(cachep);
+ }
spin_lock_irqsave(&cache_queue_lock, flags);
list_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {
@@ -719,11 +758,14 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
spin_lock_irqsave(&cache_queue_lock, flags);
for (i = 0; i < MAX_KMEM_CACHE_TYPES; i++) {
+ char *name;
cachep = memcg->slabs[i];
if (!cachep)
continue;
cachep->memcg_params.dead = true;
+ name = (char *)cachep->name;
+ name[strlen(name)] = 'd';
__mem_cgroup_destroy_cache(cachep);
}
spin_unlock_irqrestore(&cache_queue_lock, flags);
diff --git a/mm/slub.c b/mm/slub.c
index d5b91f4..37ac548 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2593,6 +2593,7 @@ redo:
} else
__slab_free(s, page, x, addr);
+ kmem_cache_verify_dead(s);
}
void kmem_cache_free(struct kmem_cache *s, void *x)
--
1.7.10.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2012-06-18 10:33 UTC|newest]
Thread overview: 59+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-18 10:27 [PATCH v4 00/25] kmem limitation for memcg Glauber Costa
2012-06-18 10:27 ` [PATCH v4 01/25] slab: rename gfpflags to allocflags Glauber Costa
2012-06-18 10:27 ` [PATCH v4 02/25] provide a common place for initcall processing in kmem_cache Glauber Costa
2012-06-18 10:27 ` [PATCH v4 03/25] slab: move FULL state transition to an initcall Glauber Costa
2012-06-18 10:27 ` [PATCH v4 04/25] Wipe out CFLGS_OFF_SLAB from flags during initial slab creation Glauber Costa
2012-06-18 10:27 ` [PATCH v4 05/25] memcg: Always free struct memcg through schedule_work() Glauber Costa
2012-06-18 12:07 ` Kamezawa Hiroyuki
2012-06-18 12:10 ` Glauber Costa
2012-06-19 0:11 ` Kamezawa Hiroyuki
2012-06-20 7:32 ` Pekka Enberg
2012-06-20 8:40 ` Glauber Costa
2012-06-21 11:39 ` Kamezawa Hiroyuki
2012-06-20 13:20 ` Michal Hocko
2012-06-18 10:27 ` [PATCH v4 06/25] memcg: Make it possible to use the stock for more than one page Glauber Costa
2012-06-20 13:28 ` Michal Hocko
2012-06-20 19:36 ` Glauber Costa
2012-06-21 21:14 ` Michal Hocko
2012-06-25 13:03 ` Glauber Costa
2012-06-18 10:28 ` [PATCH v4 07/25] memcg: Reclaim when more than one page needed Glauber Costa
2012-06-20 13:47 ` Michal Hocko
2012-06-20 19:43 ` Glauber Costa
2012-06-21 21:19 ` Michal Hocko
2012-06-25 13:13 ` Glauber Costa
2012-06-25 14:04 ` Glauber Costa
2012-06-18 10:28 ` [PATCH v4 08/25] memcg: change defines to an enum Glauber Costa
2012-06-20 13:13 ` Michal Hocko
2012-06-18 10:28 ` [PATCH v4 09/25] kmem slab accounting basic infrastructure Glauber Costa
2012-06-18 10:28 ` [PATCH v4 10/25] slab/slub: struct memcg_params Glauber Costa
2012-06-18 10:28 ` [PATCH v4 11/25] consider a memcg parameter in kmem_create_cache Glauber Costa
2012-06-18 10:28 ` [PATCH v4 12/25] sl[au]b: always get the cache from its page in kfree Glauber Costa
2012-06-18 10:28 ` [PATCH v4 13/25] Add a __GFP_SLABMEMCG flag Glauber Costa
2012-06-18 10:28 ` [PATCH v4 14/25] memcg: kmem controller dispatch infrastructure Glauber Costa
2012-06-18 10:28 ` [PATCH v4 15/25] allow enable_cpu_cache to use preset values for its tunables Glauber Costa
2012-06-18 10:28 ` [PATCH v4 16/25] don't do __ClearPageSlab before freeing slab page Glauber Costa
2012-06-18 10:28 ` [PATCH v4 17/25] skip memcg kmem allocations in specified code regions Glauber Costa
2012-06-18 12:19 ` Kamezawa Hiroyuki
2012-06-18 10:28 ` [PATCH v4 18/25] mm: Allocate kernel pages to the right memcg Glauber Costa
2012-06-18 10:28 ` [PATCH v4 19/25] memcg: disable kmem code when not in use Glauber Costa
2012-06-18 12:22 ` Kamezawa Hiroyuki
2012-06-18 12:26 ` Glauber Costa
2012-06-18 10:28 ` [PATCH v4 20/25] memcg: destroy memcg caches Glauber Costa
2012-06-18 10:28 ` [PATCH v4 21/25] Track all the memcg children of a kmem_cache Glauber Costa
2012-06-18 10:28 ` [PATCH v4 22/25] slab: slab-specific propagation changes Glauber Costa
2012-06-18 10:28 ` [PATCH v4 23/25] memcg: propagate kmem limiting information to children Glauber Costa
2012-06-18 12:37 ` Kamezawa Hiroyuki
2012-06-18 12:43 ` Glauber Costa
2012-06-19 0:16 ` Kamezawa Hiroyuki
2012-06-19 8:35 ` Glauber Costa
2012-06-19 8:54 ` Glauber Costa
2012-06-20 8:59 ` Glauber Costa
2012-06-23 4:19 ` Kamezawa Hiroyuki
2012-06-18 10:28 ` Glauber Costa [this message]
2012-07-06 15:16 ` [PATCH v4 24/25] memcg/slub: shrink dead caches Christoph Lameter
2012-07-20 22:16 ` Glauber Costa
2012-07-25 15:23 ` Christoph Lameter
2012-07-25 18:15 ` Glauber Costa
2012-06-18 10:28 ` [PATCH v4 25/25] Documentation: add documentation for slab tracker for memcg Glauber Costa
2012-06-18 12:10 ` [PATCH v4 00/25] kmem limitation " Kamezawa Hiroyuki
2012-06-18 12:14 ` Glauber Costa
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1340015298-14133-25-git-send-email-glommer@parallels.com \
--to=glommer@parallels.com \
--cc=cgroups@vger.kernel.org \
--cc=cl@linux.com \
--cc=devel@openvz.org \
--cc=fweisbec@gmail.com \
--cc=hannes@cmpxchg.org \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.cz \
--cc=penberg@cs.helsinki.fi \
--cc=penberg@kernel.org \
--cc=rientjes@google.com \
--cc=suleiman@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox