linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Vladimir Davydov <vdavydov@parallels.com>
To: hannes@cmpxchg.org, mhocko@suse.cz, dchinner@redhat.com,
	akpm@linux-foundation.org
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	cgroups@vger.kernel.org, devel@openvz.org, glommer@openvz.org,
	vdavydov@parallels.com, Al Viro <viro@zeniv.linux.org.uk>,
	Balbir Singh <bsingharora@gmail.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Subject: [PATCH v12 11/18] memcg,list_lru: add function walking over all lists of a per-memcg LRU
Date: Mon, 2 Dec 2013 15:19:46 +0400	[thread overview]
Message-ID: <9c79309f65b4c24bf09a17c588e0ffdf13be15d8.1385974612.git.vdavydov@parallels.com> (raw)
In-Reply-To: <cover.1385974612.git.vdavydov@parallels.com>

Sometimes it can be necessary to iterate over all memcgs' lists of the
same memcg-aware LRU. For example shrink_dcache_sb() should prune all
dentries no matter what memory cgroup they belong to. Current interface
to struct memcg_list_lru, however, only allows per-memcg LRU walks.
This patch adds the special method memcg_list_lru_walk_all() which
provides the required functionality. Note that this function does not
guarantee that all the elements will be processed in the true
least-recently-used order, in fact it simply enumerates all kmem-active
memcgs and for each of them calls list_lru_walk(), but
shrink_dcache_sb(), which is going to be the only user of this function,
does not need it.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 include/linux/list_lru.h |   21 ++++++++++++++++
 mm/memcontrol.c          |   60 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index 2ad0bc6..a9e078a 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -46,6 +46,16 @@ struct memcg_list_lru {
 	struct list_lru **memcg_lrus;	/* rcu-protected array of per-memcg
 					   lrus, indexed by memcg_cache_id() */
 
+	/*
+	 * When a memory cgroup is removed, all pointers to its list_lru
+	 * objects stored in memcg_lrus arrays are first marked as dead by
+	 * setting the lowest bit of the address while the actual data free
+	 * happens only after an rcu grace period. If a memcg_lrus reader,
+	 * which should be rcu-protected, faces a dead pointer, it won't
+	 * dereference it. This ensures there will be no use-after-free.
+	 */
+#define MEMCG_LIST_LRU_DEAD		1
+
 	struct list_head list;		/* list of all memcg-aware lrus */
 
 	/*
@@ -166,6 +176,10 @@ struct list_lru *mem_cgroup_list_lru(struct memcg_list_lru *lru,
 				     struct mem_cgroup *memcg);
 struct list_lru *mem_cgroup_kmem_list_lru(struct memcg_list_lru *lru,
 					  void *ptr);
+
+unsigned long
+memcg_list_lru_walk_all(struct memcg_list_lru *lru, list_lru_walk_cb isolate,
+			void *cb_arg, unsigned long nr_to_walk);
 #else
 static inline int memcg_list_lru_init(struct memcg_list_lru *lru)
 {
@@ -188,6 +202,13 @@ mem_cgroup_kmem_list_lru(struct memcg_list_lru *lru, void *ptr)
 {
 	return &lru->global_lru;
 }
+
+static inline unsigned long
+memcg_list_lru_walk_all(struct memcg_list_lru *lru, list_lru_walk_cb isolate,
+			void *cb_arg, unsigned long nr_to_walk)
+{
+	return list_lru_walk(&lru->global_lru, isolate, cb_arg, nr_to_walk);
+}
 #endif /* CONFIG_MEMCG_KMEM */
 
 #endif /* _LRU_LIST_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 253e01e..da06f91 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3088,6 +3088,7 @@ static int alloc_memcg_lru(struct memcg_list_lru *lru, int memcg_id)
 		return err;
 	}
 
+	smp_wmb();
 	VM_BUG_ON(lru->memcg_lrus[memcg_id]);
 	lru->memcg_lrus[memcg_id] = memcg_lru;
 	return 0;
@@ -3098,7 +3099,8 @@ static void free_memcg_lru(struct memcg_list_lru *lru, int memcg_id)
 {
 	struct list_lru *memcg_lru;
 
-	memcg_lru = lru->memcg_lrus[memcg_id];
+	memcg_lru = (void *)((unsigned long)lru->memcg_lrus[memcg_id] &
+			     ~MEMCG_LIST_LRU_DEAD);
 	if (memcg_lru) {
 		list_lru_destroy(memcg_lru);
 		kfree(memcg_lru);
@@ -3106,6 +3108,16 @@ static void free_memcg_lru(struct memcg_list_lru *lru, int memcg_id)
 	}
 }
 
+static void memcg_lru_mark_dead(struct memcg_list_lru *lru, int memcg_id)
+{
+	struct list_lru *memcg_lru;
+
+	memcg_lru = lru->memcg_lrus[memcg_id];
+	if (memcg_lru)
+		lru->memcg_lrus[memcg_id] = (void *)((unsigned long)memcg_lru |
+						     MEMCG_LIST_LRU_DEAD);
+}
+
 /*
  * Grows a per-memcg lru to acommodate list_lrus for new_num_memcg memory
  * cgroups. Is called for each per-memcg lru whenever a new kmem-enabled memcg
@@ -3141,6 +3153,17 @@ static void __memcg_destroy_all_lrus(int memcg_id)
 {
 	struct memcg_list_lru *lru;
 
+	/*
+	 * Mark all lru lists of this memcg as dead and free them only after a
+	 * grace period. This is to prevent functions iterating over memcg_lrus
+	 * arrays from dereferencing pointers pointing to already freed data
+	 * (see memcg_list_lru_walk_all()).
+	 */
+	list_for_each_entry(lru, &all_per_memcg_lrus, list)
+		memcg_lru_mark_dead(lru, memcg_id);
+
+	synchronize_rcu();
+
 	list_for_each_entry(lru, &all_per_memcg_lrus, list)
 		free_memcg_lru(lru, memcg_id);
 }
@@ -3340,6 +3363,41 @@ struct list_lru *mem_cgroup_kmem_list_lru(struct memcg_list_lru *lru,
 }
 
 /*
+ * This function calls the list_lru_walk() function for each list_lru
+ * comprising a per-memcg lru. It may be useful if one wants to scan all
+ * elements of a per-memcg lru, no matter in which order.
+ */
+unsigned long
+memcg_list_lru_walk_all(struct memcg_list_lru *lru, list_lru_walk_cb isolate,
+			void *cb_arg, unsigned long nr_to_walk)
+{
+	int i;
+	unsigned long isolated;
+	struct list_lru *memcg_lru;
+	struct list_lru **memcg_lrus;
+
+	isolated = list_lru_walk(&lru->global_lru, isolate, cb_arg, nr_to_walk);
+
+	rcu_read_lock();
+	memcg_lrus = rcu_dereference(lru->memcg_lrus);
+	for_each_memcg_cache_index(i) {
+		memcg_lru = memcg_lrus[i];
+		if (!memcg_lru)
+			continue;
+
+		if ((unsigned long)memcg_lru & MEMCG_LIST_LRU_DEAD)
+			continue;
+
+		smp_read_barrier_depends();
+		isolated += list_lru_walk(memcg_lru,
+					  isolate, cb_arg, nr_to_walk);
+	}
+	rcu_read_unlock();
+
+	return isolated;
+}
+
+/*
  * This is a bit cumbersome, but it is rarely used and avoids a backpointer
  * in the memcg_cache_params struct.
  */
-- 
1.7.10.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-12-02 11:20 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-02 11:19 [PATCH v12 00/18] kmemcg shrinkers Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 01/18] memcg: make cache index determination more robust Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 02/18] memcg: consolidate callers of memcg_cache_id Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 03/18] memcg: move initialization to memcg creation Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 04/18] memcg: move several kmemcg functions upper Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 05/18] fs: do not use destroy_super() in alloc_super() fail path Vladimir Davydov
2013-12-03  9:00   ` Dave Chinner
2013-12-03  9:23     ` Vladimir Davydov
2013-12-03 13:37       ` Al Viro
2013-12-03 13:48         ` Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 06/18] vmscan: rename shrink_slab() args to make it more generic Vladimir Davydov
2013-12-03  9:33   ` Dave Chinner
2013-12-03  9:44     ` Vladimir Davydov
2013-12-03 10:04       ` Dave Chinner
2013-12-02 11:19 ` [PATCH v12 07/18] vmscan: move call to shrink_slab() to shrink_zones() Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 08/18] vmscan: do_try_to_free_pages(): remove shrink_control argument Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 09/18] vmscan: shrink slab on memcg pressure Vladimir Davydov
2013-12-03 10:48   ` Dave Chinner
2013-12-03 12:15     ` Vladimir Davydov
2013-12-04  4:51       ` Dave Chinner
2013-12-04  6:31         ` Vladimir Davydov
2013-12-05  5:01           ` Dave Chinner
2013-12-05  6:57             ` Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 10/18] memcg,list_lru: add per-memcg LRU list infrastructure Vladimir Davydov
2013-12-03 11:18   ` Dave Chinner
2013-12-03 12:29     ` Vladimir Davydov
2013-12-05 21:19       ` Dave Chinner
2013-12-02 11:19 ` Vladimir Davydov [this message]
2013-12-02 11:19 ` [PATCH v12 12/18] fs: make icache, dcache shrinkers memcg-aware Vladimir Davydov
2013-12-03 11:45   ` Dave Chinner
2013-12-03 12:34     ` Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 13/18] memcg: per-memcg kmem shrinking Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 14/18] vmscan: take at least one pass with shrinkers Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 15/18] memcg: allow kmem limit to be resized down Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 16/18] vmpressure: in-kernel notifications Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 17/18] memcg: reap dead memcgs upon global memory pressure Vladimir Davydov
2013-12-02 11:19 ` [PATCH v12 18/18] memcg: flush memcg items upon memcg destruction Vladimir Davydov
2013-12-02 11:22 ` [PATCH v12 00/18] kmemcg shrinkers Vladimir Davydov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9c79309f65b4c24bf09a17c588e0ffdf13be15d8.1385974612.git.vdavydov@parallels.com \
    --to=vdavydov@parallels.com \
    --cc=akpm@linux-foundation.org \
    --cc=bsingharora@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=dchinner@redhat.com \
    --cc=devel@openvz.org \
    --cc=glommer@openvz.org \
    --cc=hannes@cmpxchg.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.cz \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox