From: Vladimir Davydov <vdavydov@parallels.com>
To: akpm@linux-foundation.org
Cc: glommer@openvz.org, khorenko@parallels.com, devel@openvz.org,
linux-mm@kvack.org, cgroups@vger.kernel.org,
linux-kernel@vger.kernel.org, Dave Chinner <dchinner@redhat.com>,
Mel Gorman <mgorman@suse.de>, Rik van Riel <riel@redhat.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Michal Hocko <mhocko@suse.cz>, Hugh Dickins <hughd@google.com>,
Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Subject: [PATCH v11 10/15] memcg,list_lru: add function walking over all lists of a per-memcg LRU
Date: Thu, 24 Oct 2013 16:05:01 +0400 [thread overview]
Message-ID: <88dcd62d80196c82fa774e0f2d2910f883efd9e2.1382603434.git.vdavydov@parallels.com> (raw)
In-Reply-To: <cover.1382603434.git.vdavydov@parallels.com>
Sometimes it can be necessary to iterate over all memcgs' lists of the
same memcg-aware LRU. For example shrink_dcache_sb() should prune all
dentries no matter what memory cgroup they belong to. Current interface
to struct memcg_list_lru, however, only allows per-memcg LRU walks.
This patch adds the special method memcg_list_lru_walk_all() which
provides the required functionality. Note that this function does not
guarantee that all the elements will be processed in the true
least-recently-used order, in fact it simply enumerates all kmem-active
memcgs and for each of them calls list_lru_walk(), but
shrink_dcache_sb(), which is going to be the only user of this function,
does not need it.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Glauber Costa <glommer@openvz.org>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
include/linux/list_lru.h | 21 ++++++++++++++++++
mm/memcontrol.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 76 insertions(+)
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index b3b3b86..ce815cc 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -40,6 +40,16 @@ struct memcg_list_lru {
struct list_lru **memcg_lrus; /* rcu-protected array of per-memcg
lrus, indexed by memcg_cache_id() */
+ /*
+ * When a memory cgroup is removed, all pointers to its list_lru
+ * objects stored in memcg_lrus arrays are first marked as dead by
+ * setting the lowest bit of the address while the actual data free
+ * happens only after an rcu grace period. If a memcg_lrus reader,
+ * which should be rcu-protected, faces a dead pointer, it won't
+ * dereference it. This ensures there will be no use-after-free.
+ */
+#define MEMCG_LIST_LRU_DEAD 1
+
struct list_head list; /* list of all memcg-aware lrus */
/*
@@ -160,6 +170,10 @@ struct list_lru *
mem_cgroup_list_lru(struct memcg_list_lru *lru, struct mem_cgroup *memcg);
struct list_lru *
mem_cgroup_kmem_list_lru(struct memcg_list_lru *lru, void *ptr);
+
+unsigned long
+memcg_list_lru_walk_all(struct memcg_list_lru *lru, list_lru_walk_cb isolate,
+ void *cb_arg, unsigned long nr_to_walk);
#else
static inline int memcg_list_lru_init(struct memcg_list_lru *lru)
{
@@ -182,6 +196,13 @@ mem_cgroup_kmem_list_lru(struct memcg_list_lru *lru, void *ptr)
{
return &lru->global_lru;
}
+
+static inline unsigned long
+memcg_list_lru_walk_all(struct memcg_list_lru *lru, list_lru_walk_cb isolate,
+ void *cb_arg, unsigned long nr_to_walk)
+{
+ return list_lru_walk(&lru->global_lru, isolate, cb_arg, nr_to_walk);
+}
#endif /* CONFIG_MEMCG_KMEM */
#endif /* _LRU_LIST_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 39e4772..03178d0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3899,16 +3899,30 @@ static int alloc_memcg_lru(struct memcg_list_lru *lru, int memcg_id)
return err;
}
+ smp_wmb();
VM_BUG_ON(lru->memcg_lrus[memcg_id]);
lru->memcg_lrus[memcg_id] = memcg_lru;
return 0;
}
+static void memcg_lru_mark_dead(struct memcg_list_lru *lru, int memcg_id)
+{
+ struct list_lru *memcg_lru;
+
+ BUG_ON(!lru->memcg_lrus);
+ memcg_lru = lru->memcg_lrus[memcg_id];
+ if (memcg_lru)
+ lru->memcg_lrus[memcg_id] = (void *)((unsigned long)memcg_lru |
+ MEMCG_LIST_LRU_DEAD);
+}
+
static void free_memcg_lru(struct memcg_list_lru *lru, int memcg_id)
{
struct list_lru *memcg_lru = NULL;
swap(lru->memcg_lrus[memcg_id], memcg_lru);
+ memcg_lru = (void *)((unsigned long)memcg_lru &
+ ~MEMCG_LIST_LRU_DEAD);
if (memcg_lru) {
list_lru_destroy(memcg_lru);
kfree(memcg_lru);
@@ -3942,6 +3956,17 @@ static void __memcg_destroy_all_lrus(int memcg_id)
{
struct memcg_list_lru *lru;
+ /*
+ * Mark all lru lists of this memcg as dead and free them only after a
+ * grace period. This is to prevent functions iterating over memcg_lrus
+ * arrays (e.g. memcg_list_lru_walk_all()) from dereferencing pointers
+ * pointing to already freed data.
+ */
+ list_for_each_entry(lru, &memcg_lrus_list, list)
+ memcg_lru_mark_dead(lru, memcg_id);
+
+ synchronize_rcu();
+
list_for_each_entry(lru, &memcg_lrus_list, list)
free_memcg_lru(lru, memcg_id);
}
@@ -4103,6 +4128,36 @@ mem_cgroup_kmem_list_lru(struct memcg_list_lru *lru, void *ptr)
}
return mem_cgroup_list_lru(lru, memcg);
}
+
+unsigned long
+memcg_list_lru_walk_all(struct memcg_list_lru *lru, list_lru_walk_cb isolate,
+ void *cb_arg, unsigned long nr_to_walk)
+{
+ int i;
+ unsigned long isolated;
+ struct list_lru *memcg_lru;
+ struct list_lru **memcg_lrus;
+
+ isolated = list_lru_walk(&lru->global_lru, isolate, cb_arg, nr_to_walk);
+
+ rcu_read_lock();
+ memcg_lrus = rcu_dereference(lru->memcg_lrus);
+ for (i = 0; i < memcg_limited_groups_array_size; i++) {
+ memcg_lru = memcg_lrus[i];
+ if (!memcg_lru)
+ continue;
+
+ if ((unsigned long)memcg_lru & MEMCG_LIST_LRU_DEAD)
+ continue;
+
+ smp_read_barrier_depends();
+ isolated += list_lru_walk(memcg_lru,
+ isolate, cb_arg, nr_to_walk);
+ }
+ rcu_read_unlock();
+
+ return isolated;
+}
#else
static inline void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
{
--
1.7.10.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2013-10-24 12:05 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-10-24 12:04 [PATCH v11 00/15] kmemcg shrinkers Vladimir Davydov
2013-10-24 12:04 ` [PATCH v11 01/15] memcg: make cache index determination more robust Vladimir Davydov
2013-10-24 12:04 ` [PATCH v11 02/15] memcg: consolidate callers of memcg_cache_id Vladimir Davydov
2013-10-24 12:04 ` [PATCH v11 03/15] vmscan: also shrink slab in memcg pressure Vladimir Davydov
2013-10-24 12:04 ` [PATCH v11 04/15] memcg: move initialization to memcg creation Vladimir Davydov
2013-10-24 12:04 ` [PATCH v11 05/15] memcg: move stop and resume accounting functions Vladimir Davydov
2013-10-24 12:04 ` [PATCH v11 06/15] memcg: per-memcg kmem shrinking Vladimir Davydov
2013-10-24 12:04 ` [PATCH v11 07/15] memcg: scan cache objects hierarchically Vladimir Davydov
2013-10-24 12:04 ` [PATCH v11 08/15] vmscan: take at least one pass with shrinkers Vladimir Davydov
2013-10-24 12:05 ` [PATCH v11 09/15] memcg,list_lru: add per-memcg LRU list infrastructure Vladimir Davydov
2013-10-24 12:05 ` Vladimir Davydov [this message]
2013-10-24 12:05 ` [PATCH v11 11/15] super: make icache, dcache shrinkers memcg-aware Vladimir Davydov
2013-10-24 12:05 ` [PATCH v11 12/15] memcg: allow kmem limit to be resized down Vladimir Davydov
2013-10-24 12:05 ` [PATCH v11 13/15] vmpressure: in-kernel notifications Vladimir Davydov
2013-10-24 12:05 ` [PATCH v11 14/15] memcg: reap dead memcgs upon global memory pressure Vladimir Davydov
2013-10-24 12:05 ` [PATCH v11 15/15] memcg: flush memcg items upon memcg destruction Vladimir Davydov
2013-11-01 13:26 ` [Devel] [PATCH v11 00/15] kmemcg shrinkers Vladimir Davydov
2013-11-25 12:07 Vladimir Davydov
2013-11-25 12:07 ` [PATCH v11 10/15] memcg,list_lru: add function walking over all lists of a per-memcg LRU Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=88dcd62d80196c82fa774e0f2d2910f883efd9e2.1382603434.git.vdavydov@parallels.com \
--to=vdavydov@parallels.com \
--cc=akpm@linux-foundation.org \
--cc=cgroups@vger.kernel.org \
--cc=dchinner@redhat.com \
--cc=devel@openvz.org \
--cc=glommer@openvz.org \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=khorenko@parallels.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=mhocko@suse.cz \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox