From: Vladimir Davydov <vdavydov@parallels.com>
To: hannes@cmpxchg.org, mhocko@suse.cz, dchinner@redhat.com,
akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
cgroups@vger.kernel.org, devel@openvz.org, glommer@openvz.org,
vdavydov@parallels.com, Anton Vorontsov <anton@enomsg.org>,
John Stultz <john.stultz@linaro.org>,
Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Subject: [PATCH v12 17/18] memcg: reap dead memcgs upon global memory pressure
Date: Mon, 2 Dec 2013 15:19:52 +0400 [thread overview]
Message-ID: <dcbf3fd6336bc44948c89d0abc8f9aff27888f81.1385974612.git.vdavydov@parallels.com> (raw)
In-Reply-To: <cover.1385974612.git.vdavydov@parallels.com>
From: Glauber Costa <glommer@openvz.org>
When we delete kmem-enabled memcgs, they can still be zombieing
around for a while. The reason is that the objects may still be alive,
and we won't be able to delete them at destruction time.
The only entry point for that, though, are the shrinkers. The
shrinker interface, however, is not exactly tailored to our needs. It
could be a little bit better by using the API Dave Chinner proposed, but
it is still not ideal since we aren't really a count-and-scan event, but
more a one-off flush-all-you-can event that would have to abuse that
somehow.
Signed-off-by: Glauber Costa <glommer@openvz.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
mm/memcontrol.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 77 insertions(+), 3 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index a0b22d7..72db892 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -287,8 +287,16 @@ struct mem_cgroup {
/* thresholds for mem+swap usage. RCU-protected */
struct mem_cgroup_thresholds memsw_thresholds;
- /* For oom notifier event fd */
- struct list_head oom_notify;
+ union {
+ /* For oom notifier event fd */
+ struct list_head oom_notify;
+ /*
+ * we can only trigger an oom event if the memcg is alive.
+ * so we will reuse this field to hook the memcg in the list
+ * of dead memcgs.
+ */
+ struct list_head dead;
+ };
/*
* Should we move charges of a task when a task is moved into this
@@ -338,6 +346,29 @@ struct mem_cgroup {
/* WARNING: nodeinfo must be the last member here */
};
+#if defined(CONFIG_MEMCG_KMEM) || defined(CONFIG_MEMCG_SWAP)
+static LIST_HEAD(dangling_memcgs);
+static DEFINE_MUTEX(dangling_memcgs_mutex);
+
+static inline void memcg_dangling_del(struct mem_cgroup *memcg)
+{
+ mutex_lock(&dangling_memcgs_mutex);
+ list_del(&memcg->dead);
+ mutex_unlock(&dangling_memcgs_mutex);
+}
+
+static inline void memcg_dangling_add(struct mem_cgroup *memcg)
+{
+ INIT_LIST_HEAD(&memcg->dead);
+ mutex_lock(&dangling_memcgs_mutex);
+ list_add(&memcg->dead, &dangling_memcgs);
+ mutex_unlock(&dangling_memcgs_mutex);
+}
+#else
+static inline void memcg_dangling_del(struct mem_cgroup *memcg) {}
+static inline void memcg_dangling_add(struct mem_cgroup *memcg) {}
+#endif
+
static size_t memcg_size(void)
{
return sizeof(struct mem_cgroup) +
@@ -6363,6 +6394,41 @@ static int mem_cgroup_oom_control_write(struct cgroup_subsys_state *css,
}
#ifdef CONFIG_MEMCG_KMEM
+static void memcg_vmpressure_shrink_dead(void)
+{
+ struct memcg_cache_params *params, *tmp;
+ struct kmem_cache *cachep;
+ struct mem_cgroup *memcg;
+
+ mutex_lock(&dangling_memcgs_mutex);
+ list_for_each_entry(memcg, &dangling_memcgs, dead) {
+ mutex_lock(&memcg->slab_caches_mutex);
+ /* The element may go away as an indirect result of shrink */
+ list_for_each_entry_safe(params, tmp,
+ &memcg->memcg_slab_caches, list) {
+ cachep = memcg_params_to_cache(params);
+ /*
+ * the cpu_hotplug lock is taken in kmem_cache_create
+ * outside the slab_caches_mutex manipulation. It will
+ * be taken by kmem_cache_shrink to flush the cache.
+ * So we need to drop the lock. It is all right because
+ * the lock only protects elements moving in and out the
+ * list.
+ */
+ mutex_unlock(&memcg->slab_caches_mutex);
+ kmem_cache_shrink(cachep);
+ mutex_lock(&memcg->slab_caches_mutex);
+ }
+ mutex_unlock(&memcg->slab_caches_mutex);
+ }
+ mutex_unlock(&dangling_memcgs_mutex);
+}
+
+static void memcg_register_kmem_events(struct cgroup_subsys_state *css)
+{
+ vmpressure_register_kernel_event(css, memcg_vmpressure_shrink_dead);
+}
+
static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
int ret;
@@ -6420,6 +6486,10 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
css_put(&memcg->css);
}
#else
+static inline void memcg_register_kmem_events(struct cgroup *cont)
+{
+}
+
static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
{
return 0;
@@ -6758,8 +6828,10 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
if (css->cgroup->id > MEM_CGROUP_ID_MAX)
return -ENOSPC;
- if (!parent)
+ if (!parent) {
+ memcg_register_kmem_events(css);
return 0;
+ }
mutex_lock(&memcg_create_mutex);
@@ -6821,6 +6893,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
mem_cgroup_invalidate_reclaim_iterators(memcg);
mem_cgroup_reparent_charges(memcg);
mem_cgroup_destroy_all_caches(memcg);
+ memcg_dangling_add(memcg);
vmpressure_cleanup(&memcg->vmpressure);
}
@@ -6829,6 +6902,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
memcg_destroy_kmem(memcg);
+ memcg_dangling_del(memcg);
__mem_cgroup_free(memcg);
}
--
1.7.10.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2013-12-02 11:20 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-12-02 11:19 [PATCH v12 00/18] kmemcg shrinkers Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 01/18] memcg: make cache index determination more robust Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 02/18] memcg: consolidate callers of memcg_cache_id Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 03/18] memcg: move initialization to memcg creation Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 04/18] memcg: move several kmemcg functions upper Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 05/18] fs: do not use destroy_super() in alloc_super() fail path Vladimir Davydov
2013-12-03 9:00 ` Dave Chinner
2013-12-03 9:23 ` Vladimir Davydov
2013-12-03 13:37 ` Al Viro
2013-12-03 13:48 ` Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 06/18] vmscan: rename shrink_slab() args to make it more generic Vladimir Davydov
2013-12-03 9:33 ` Dave Chinner
2013-12-03 9:44 ` Vladimir Davydov
2013-12-03 10:04 ` Dave Chinner
2013-12-02 11:19 ` [PATCH v12 07/18] vmscan: move call to shrink_slab() to shrink_zones() Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 08/18] vmscan: do_try_to_free_pages(): remove shrink_control argument Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 09/18] vmscan: shrink slab on memcg pressure Vladimir Davydov
2013-12-03 10:48 ` Dave Chinner
2013-12-03 12:15 ` Vladimir Davydov
2013-12-04 4:51 ` Dave Chinner
2013-12-04 6:31 ` Vladimir Davydov
2013-12-05 5:01 ` Dave Chinner
2013-12-05 6:57 ` Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 10/18] memcg,list_lru: add per-memcg LRU list infrastructure Vladimir Davydov
2013-12-03 11:18 ` Dave Chinner
2013-12-03 12:29 ` Vladimir Davydov
2013-12-05 21:19 ` Dave Chinner
2013-12-02 11:19 ` [PATCH v12 11/18] memcg,list_lru: add function walking over all lists of a per-memcg LRU Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 12/18] fs: make icache, dcache shrinkers memcg-aware Vladimir Davydov
2013-12-03 11:45 ` Dave Chinner
2013-12-03 12:34 ` Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 13/18] memcg: per-memcg kmem shrinking Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 14/18] vmscan: take at least one pass with shrinkers Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 15/18] memcg: allow kmem limit to be resized down Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 16/18] vmpressure: in-kernel notifications Vladimir Davydov
2013-12-02 11:19 ` Vladimir Davydov [this message]
2013-12-02 11:19 ` [PATCH v12 18/18] memcg: flush memcg items upon memcg destruction Vladimir Davydov
2013-12-02 11:22 ` [PATCH v12 00/18] kmemcg shrinkers Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=dcbf3fd6336bc44948c89d0abc8f9aff27888f81.1385974612.git.vdavydov@parallels.com \
--to=vdavydov@parallels.com \
--cc=akpm@linux-foundation.org \
--cc=anton@enomsg.org \
--cc=cgroups@vger.kernel.org \
--cc=dchinner@redhat.com \
--cc=devel@openvz.org \
--cc=glommer@openvz.org \
--cc=hannes@cmpxchg.org \
--cc=john.stultz@linaro.org \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@suse.cz \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox