[PATCH v3 11/16] memcg: destroy memcg caches

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Glauber Costa <glommer@parallels.com>
To: linux-kernel@vger.kernel.org
Cc: cgroups@vger.kernel.org, kamezawa.hiroyu@jp.fujitsu.com,
	devel@openvz.org, Tejun Heo <tj@kernel.org>,
	linux-mm@kvack.org, Suleiman Souhlal <suleiman@google.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	Mel Gorman <mgorman@suse.de>,
	David Rientjes <rientjes@google.com>,
	Glauber Costa <glommer@parallels.com>,
	Christoph Lameter <cl@linux.com>,
	Pekka Enberg <penberg@cs.helsinki.fi>,
	Michal Hocko <mhocko@suse.cz>,
	Johannes Weiner <hannes@cmpxchg.org>
Subject: [PATCH v3 11/16] memcg: destroy memcg caches
Date: Tue, 18 Sep 2012 18:12:05 +0400	[thread overview]
Message-ID: <1347977530-29755-12-git-send-email-glommer@parallels.com> (raw)
In-Reply-To: <1347977530-29755-1-git-send-email-glommer@parallels.com>

This patch implements destruction of memcg caches. Right now,
only caches where our reference counter is the last remaining are
deleted. If there are any other reference counters around, we just
leave the caches lying around until they go away.

When that happen, a destruction function is called from the cache
code. Caches are only destroyed in process context, so we queue them
up for later processing in the general case.

Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: Christoph Lameter <cl@linux.com>
CC: Pekka Enberg <penberg@cs.helsinki.fi>
CC: Michal Hocko <mhocko@suse.cz>
CC: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
CC: Johannes Weiner <hannes@cmpxchg.org>
CC: Suleiman Souhlal <suleiman@google.com>
---
 include/linux/memcontrol.h |  1 +
 include/linux/slab.h       |  3 ++
 mm/memcontrol.c            | 84 ++++++++++++++++++++++++++++++++++++++++++++++
 mm/slab.c                  |  3 ++
 mm/slab.h                  | 23 +++++++++++++
 mm/slub.c                  |  7 +++-
 6 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c44a5f2..204a43a 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -421,6 +421,7 @@ extern void memcg_register_cache(struct mem_cgroup *memcg,
 extern void memcg_release_cache(struct kmem_cache *cachep);
 struct kmem_cache *
 __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
 #else
 
 static inline void memcg_init_kmem_cache(void)
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9d298db..9a41d85 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -186,6 +186,9 @@ struct mem_cgroup_cache_params {
 	struct mem_cgroup *memcg;
 	struct kmem_cache *parent;
 	int id;
+	bool dead;
+	atomic_t nr_pages;
+	struct list_head destroyed_list; /* Used when deleting memcg cache */
 };
 #endif
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0068b7d..a184d42 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -614,6 +614,8 @@ void memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *cachep)
 {
 	int id = -1;
 
+	INIT_LIST_HEAD(&cachep->memcg_params.destroyed_list);
+
 	if (!memcg)
 		id = ida_simple_get(&cache_types, 0, MAX_KMEM_CACHE_TYPES,
 				    GFP_KERNEL);
@@ -758,6 +760,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
 	mem_cgroup_get(memcg);
 	memcg->slabs[idx] = new_cachep;
 	new_cachep->memcg_params.memcg = memcg;
+	atomic_set(&new_cachep->memcg_params.nr_pages , 0);
 out:
 	mutex_unlock(&memcg_cache_mutex);
 	return new_cachep;
@@ -772,6 +775,55 @@ struct create_work {
 /* Use a single spinlock for destruction and creation, not a frequent op */
 static DEFINE_SPINLOCK(cache_queue_lock);
 static LIST_HEAD(create_queue);
+static LIST_HEAD(destroyed_caches);
+
+static void kmem_cache_destroy_work_func(struct work_struct *w)
+{
+	struct kmem_cache *cachep;
+	struct mem_cgroup_cache_params *p, *tmp;
+	unsigned long flags;
+	LIST_HEAD(del_unlocked);
+
+	spin_lock_irqsave(&cache_queue_lock, flags);
+	list_for_each_entry_safe(p, tmp, &destroyed_caches, destroyed_list) {
+		cachep = container_of(p, struct kmem_cache, memcg_params);
+		list_move(&cachep->memcg_params.destroyed_list, &del_unlocked);
+	}
+	spin_unlock_irqrestore(&cache_queue_lock, flags);
+
+	list_for_each_entry_safe(p, tmp, &del_unlocked, destroyed_list) {
+		cachep = container_of(p, struct kmem_cache, memcg_params);
+		list_del(&cachep->memcg_params.destroyed_list);
+		if (!atomic_read(&cachep->memcg_params.nr_pages)) {
+			mem_cgroup_put(cachep->memcg_params.memcg);
+			kmem_cache_destroy(cachep);
+		}
+	}
+}
+static DECLARE_WORK(kmem_cache_destroy_work, kmem_cache_destroy_work_func);
+
+static void __mem_cgroup_destroy_cache(struct kmem_cache *cachep)
+{
+	BUG_ON(cachep->memcg_params.id != -1);
+	list_add(&cachep->memcg_params.destroyed_list, &destroyed_caches);
+}
+
+void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
+{
+	unsigned long flags;
+
+	if (!cachep->memcg_params.dead)
+		return;
+	/*
+	 * We have to defer the actual destroying to a workqueue, because
+	 * we might currently be in a context that cannot sleep.
+	 */
+	spin_lock_irqsave(&cache_queue_lock, flags);
+	__mem_cgroup_destroy_cache(cachep);
+	spin_unlock_irqrestore(&cache_queue_lock, flags);
+
+	schedule_work(&kmem_cache_destroy_work);
+}
 
 /*
  * Flush the queue of kmem_caches to create, because we're creating a cgroup.
@@ -793,6 +845,33 @@ void memcg_flush_cache_create_queue(void)
 	spin_unlock_irqrestore(&cache_queue_lock, flags);
 }
 
+static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+	struct kmem_cache *cachep;
+	unsigned long flags;
+	int i;
+
+	/*
+	 * pre_destroy() gets called with no tasks in the cgroup.
+	 * this means that after flushing the create queue, no more caches
+	 * will appear
+	 */
+	memcg_flush_cache_create_queue();
+
+	spin_lock_irqsave(&cache_queue_lock, flags);
+	for (i = 0; i < MAX_KMEM_CACHE_TYPES; i++) {
+		cachep = memcg->slabs[i];
+		if (!cachep)
+			continue;
+
+		cachep->memcg_params.dead = true;
+		__mem_cgroup_destroy_cache(cachep);
+	}
+	spin_unlock_irqrestore(&cache_queue_lock, flags);
+
+	schedule_work(&kmem_cache_destroy_work);
+}
+
 static void memcg_create_cache_work_func(struct work_struct *w)
 {
 	struct create_work *cw, *tmp;
@@ -904,6 +983,10 @@ EXPORT_SYMBOL(__memcg_kmem_get_cache);
 static void disarm_kmem_keys(struct mem_cgroup *memcg)
 {
 }
+
+static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
@@ -5590,6 +5673,7 @@ static int mem_cgroup_pre_destroy(struct cgroup *cont)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
+	mem_cgroup_destroy_all_caches(memcg);
 	return mem_cgroup_force_empty(memcg, false);
 }
 
diff --git a/mm/slab.c b/mm/slab.c
index f9c7e03..d5c196b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1933,6 +1933,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 		if (page->pfmemalloc)
 			SetPageSlabPfmemalloc(page + i);
 	}
+	mem_cgroup_bind_pages(cachep, cachep->gfporder);
 
 	if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
 		kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1969,6 +1970,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
 		__ClearPageSlab(page);
 		page++;
 	}
+
+	mem_cgroup_release_pages(cachep, cachep->gfporder);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += nr_freed;
 	free_accounted_pages((unsigned long)addr, cachep->gfporder);
diff --git a/mm/slab.h b/mm/slab.h
index f2501ab..6024ad1 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -1,5 +1,6 @@
 #ifndef MM_SLAB_H
 #define MM_SLAB_H
+#include <linux/memcontrol.h>
 /*
  * Internal slab definitions
  */
@@ -60,6 +61,20 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 	return cachep->memcg_params.memcg == memcg;
 }
 
+static inline void mem_cgroup_bind_pages(struct kmem_cache *s, int order)
+{
+	if (s->memcg_params.id == -1)
+		atomic_add(1 << order, &s->memcg_params.nr_pages);
+}
+
+static inline void mem_cgroup_release_pages(struct kmem_cache *s, int order)
+{
+	if (s->memcg_params.id != -1)
+		return;
+	if (atomic_sub_and_test((1 << order), &s->memcg_params.nr_pages))
+		mem_cgroup_destroy_cache(s);
+}
+
 static inline bool slab_equal_or_parent(struct kmem_cache *s,
 					struct kmem_cache *p)
 {
@@ -72,6 +87,14 @@ static inline bool cache_match_memcg(struct kmem_cache *cachep,
 	return true;
 }
 
+static inline void mem_cgroup_bind_pages(struct kmem_cache *s, int order)
+{
+}
+
+static inline void mem_cgroup_release_pages(struct kmem_cache *s, int order)
+{
+}
+
 static inline bool slab_equal_or_parent(struct kmem_cache *s,
 					struct kmem_cache *p)
 {
diff --git a/mm/slub.c b/mm/slub.c
index ac79595..f90f612 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1342,6 +1342,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	void *start;
 	void *last;
 	void *p;
+	int order;
 
 	BUG_ON(flags & GFP_SLAB_BUG_MASK);
 
@@ -1350,7 +1351,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	if (!page)
 		goto out;
 
+	order = compound_order(page);
 	inc_slabs_node(s, page_to_nid(page), page->objects);
+	mem_cgroup_bind_pages(s, order);
 	page->slab = s;
 	__SetPageSlab(page);
 	if (page->pfmemalloc)
@@ -1359,7 +1362,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
 	start = page_address(page);
 
 	if (unlikely(s->flags & SLAB_POISON))
-		memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
+		memset(start, POISON_INUSE, PAGE_SIZE << order);
 
 	last = start;
 	for_each_object(p, s, start, page->objects) {
@@ -1400,6 +1403,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
 
 	__ClearPageSlabPfmemalloc(page);
 	__ClearPageSlab(page);
+
+	mem_cgroup_release_pages(s, order);
 	reset_page_mapcount(page);
 	if (current->reclaim_state)
 		current->reclaim_state->reclaimed_slab += pages;
-- 
1.7.11.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2012-09-18 14:16 UTC|newest]

Thread overview: 74+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-18 14:11 [PATCH v3 00/16] slab accounting for memcg Glauber Costa
2012-09-18 14:11 ` [PATCH v3 01/16] slab/slub: struct memcg_params Glauber Costa
2012-09-18 14:11 ` [PATCH v3 02/16] slub: use free_page instead of put_page for freeing kmalloc allocation Glauber Costa
2012-09-18 14:11 ` [PATCH v3 03/16] slab: Ignore the cflgs bit in cache creation Glauber Costa
2012-09-18 15:20   ` Christoph Lameter
2012-09-21 17:33   ` Tejun Heo
2012-09-18 14:11 ` [PATCH v3 04/16] provide a common place for initcall processing in kmem_cache Glauber Costa
2012-09-18 15:22   ` Christoph Lameter
2012-09-18 14:11 ` [PATCH v3 05/16] consider a memcg parameter in kmem_create_cache Glauber Costa
2012-09-21 18:14   ` Tejun Heo
2012-09-24  8:12     ` Glauber Costa
2012-09-24 12:41       ` Christoph
2012-09-24 12:41         ` Glauber Costa
2012-09-24 13:42           ` Christoph Lameter
2012-09-24 13:44             ` Glauber Costa
2012-09-24 13:56               ` Christoph Lameter
2012-09-24 13:57                 ` Glauber Costa
2012-09-24 15:38                   ` Pekka Enberg
2012-09-24 15:36                     ` Glauber Costa
2012-09-24 17:38                       ` Christoph Lameter
2012-09-24 17:39                     ` Christoph Lameter
2012-10-02 14:46   ` Michal Hocko
2012-09-18 14:12 ` [PATCH v3 06/16] memcg: infrastructure to match an allocation to the right cache Glauber Costa
2012-09-21 18:32   ` Tejun Heo
2012-09-24  8:46     ` Glauber Costa
2012-09-24 17:56       ` Tejun Heo
2012-09-25 13:57         ` Glauber Costa
2012-09-25 16:28           ` Christoph Lameter
2012-09-21 20:52   ` Tejun Heo
2012-09-24  8:17     ` Glauber Costa
2012-09-24 17:58       ` Tejun Heo
2012-09-18 14:12 ` [PATCH v3 07/16] memcg: skip memcg kmem allocations in specified code regions Glauber Costa
2012-09-21 19:59   ` Tejun Heo
2012-09-24  9:09     ` Glauber Costa
2012-09-24 17:47       ` Tejun Heo
2012-09-18 14:12 ` [PATCH v3 08/16] slab: allow enable_cpu_cache to use preset values for its tunables Glauber Costa
2012-09-18 15:25   ` Christoph Lameter
2012-09-19  7:44     ` Glauber Costa
2012-09-21  9:29   ` Pekka Enberg
2012-09-18 14:12 ` [PATCH v3 09/16] sl[au]b: always get the cache from its page in kfree Glauber Costa
2012-09-18 15:28   ` Christoph Lameter
2012-09-19  7:42     ` Glauber Costa
2012-09-19 14:14       ` Christoph Lameter
2012-09-21  9:33       ` Pekka Enberg
2012-09-21  9:30         ` Glauber Costa
2012-09-21  9:41           ` Pekka Enberg
2012-09-21 20:07             ` Tejun Heo
2012-09-21 20:14               ` Pekka Enberg
2012-09-21 20:16                 ` Tejun Heo
2012-09-18 14:12 ` [PATCH v3 10/16] sl[au]b: Allocate objects from memcg cache Glauber Costa
2012-09-18 14:12 ` Glauber Costa [this message]
2012-09-21 20:22   ` [PATCH v3 11/16] memcg: destroy memcg caches Tejun Heo
2012-09-18 14:12 ` [PATCH v3 12/16] memcg/sl[au]b Track all the memcg children of a kmem_cache Glauber Costa
2012-09-21 20:31   ` Tejun Heo
2012-09-18 14:12 ` [PATCH v3 13/16] slab: slab-specific propagation changes Glauber Costa
2012-09-18 17:00   ` Christoph Lameter
2012-09-19  7:41     ` Glauber Costa
2012-09-18 14:12 ` [PATCH v3 14/16] slub: slub-specific " Glauber Costa
2012-09-18 14:12 ` [PATCH v3 15/16] memcg/sl[au]b: shrink dead caches Glauber Costa
2012-09-18 17:02   ` Christoph Lameter
2012-09-19  7:40     ` Glauber Costa
2012-09-21  4:48   ` JoonSoo Kim
2012-09-21  8:40     ` Glauber Costa
2012-09-21  9:28       ` JoonSoo Kim
2012-09-21  9:31         ` Glauber Costa
2012-09-21 20:40   ` Tejun Heo
2012-09-24  8:25     ` Glauber Costa
2012-09-24 17:43       ` Tejun Heo
2012-09-18 14:12 ` [PATCH v3 16/16] Add documentation about the kmem controller Glauber Costa
2012-09-21  9:40 ` [PATCH v3 00/16] slab accounting for memcg Pekka Enberg
2012-09-21  9:43   ` Glauber Costa
2012-09-21 20:46 ` Tejun Heo
2012-09-21 20:47   ` Tejun Heo
2012-09-24  8:15   ` Glauber Costa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1347977530-29755-12-git-send-email-glommer@parallels.com \
    --to=glommer@parallels.com \
    --cc=cgroups@vger.kernel.org \
    --cc=cl@linux.com \
    --cc=devel@openvz.org \
    --cc=fweisbec@gmail.com \
    --cc=hannes@cmpxchg.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=penberg@cs.helsinki.fi \
    --cc=rientjes@google.com \
    --cc=suleiman@google.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox