linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Roman Gushchin <guro@fb.com>
To: <linux-mm@kvack.org>
Cc: Michal Hocko <mhocko@kernel.org>,
	Johannes Weiner <hannes@cmpxchg.org>,
	<linux-kernel@vger.kernel.org>, <kernel-team@fb.com>,
	Shakeel Butt <shakeelb@google.com>,
	Vladimir Davydov <vdavydov.dev@gmail.com>,
	Waiman Long <longman@redhat.com>,
	Christoph Lameter <cl@linux.com>, Roman Gushchin <guro@fb.com>
Subject: [PATCH 06/16] mm: memcg/slab: save memcg ownership data for non-root slab objects
Date: Thu, 17 Oct 2019 17:28:10 -0700	[thread overview]
Message-ID: <20191018002820.307763-7-guro@fb.com> (raw)
In-Reply-To: <20191018002820.307763-1-guro@fb.com>

Store a memcg_ptr in the corresponding place of the mem_cgroup_vec
for each allocated non-root slab object. Make sure that each allocated
object holds a reference to the mem_cgroup_ptr.

To get the memcg_ptr in the post alloc hook, we need a memcg pointer.
Because all memory cgroup will soon share the same set of kmem_caches,
let's not use the kmem_cache->memcg_params.memcg. Instead, let's pass
the pointer directly from memcg_kmem_get_cache(). This will guarantee
that we will use the same cgroup in pre- and post-alloc hooks.

Please, note that the code is a bit bulky now, because we have to
manage 3 types of objects with reference counters: memcg, kmem_cache
and memcg_ptr. The following commits in the series will simplify it.

Signed-off-by: Roman Gushchin <guro@fb.com>
---
 include/linux/memcontrol.h |  3 +-
 mm/memcontrol.c            |  8 +++--
 mm/slab.c                  | 18 ++++++-----
 mm/slab.h                  | 64 ++++++++++++++++++++++++++++++++++----
 mm/slub.c                  | 14 ++++++---
 5 files changed, 86 insertions(+), 21 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index da864fded297..f4cb844005a5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1397,7 +1397,8 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
 }
 #endif
 
-struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
+struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep,
+					struct mem_cgroup **memcgp);
 void memcg_kmem_put_cache(struct kmem_cache *cachep);
 
 #ifdef CONFIG_MEMCG_KMEM
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9303e98b0718..47a30db94869 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3023,7 +3023,8 @@ static inline bool memcg_kmem_bypass(void)
  * done with it, memcg_kmem_put_cache() must be called to release the
  * reference.
  */
-struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
+struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep,
+					struct mem_cgroup **memcgp)
 {
 	struct mem_cgroup *memcg;
 	struct kmem_cache *memcg_cachep;
@@ -3079,8 +3080,11 @@ struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
 	 */
 	if (unlikely(!memcg_cachep))
 		memcg_schedule_kmem_cache_create(memcg, cachep);
-	else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt))
+	else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt)) {
+		css_get(&memcg->css);
+		*memcgp = memcg;
 		cachep = memcg_cachep;
+	}
 out_unlock:
 	rcu_read_unlock();
 	return cachep;
diff --git a/mm/slab.c b/mm/slab.c
index ffa16dd966ef..91cd8bc4ee07 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3223,9 +3223,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	unsigned long save_flags;
 	void *ptr;
 	int slab_node = numa_mem_id();
+	struct mem_cgroup *memcg = NULL;
 
 	flags &= gfp_allowed_mask;
-	cachep = slab_pre_alloc_hook(cachep, flags);
+	cachep = slab_pre_alloc_hook(cachep, &memcg, 1, flags);
 	if (unlikely(!cachep))
 		return NULL;
 
@@ -3261,7 +3262,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 	if (unlikely(slab_want_init_on_alloc(flags, cachep)) && ptr)
 		memset(ptr, 0, cachep->object_size);
 
-	slab_post_alloc_hook(cachep, flags, 1, &ptr);
+	slab_post_alloc_hook(cachep, memcg, flags, 1, &ptr);
 	return ptr;
 }
 
@@ -3302,9 +3303,10 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 {
 	unsigned long save_flags;
 	void *objp;
+	struct mem_cgroup *memcg = NULL;
 
 	flags &= gfp_allowed_mask;
-	cachep = slab_pre_alloc_hook(cachep, flags);
+	cachep = slab_pre_alloc_hook(cachep, &memcg, 1, flags);
 	if (unlikely(!cachep))
 		return NULL;
 
@@ -3318,7 +3320,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 	if (unlikely(slab_want_init_on_alloc(flags, cachep)) && objp)
 		memset(objp, 0, cachep->object_size);
 
-	slab_post_alloc_hook(cachep, flags, 1, &objp);
+	slab_post_alloc_hook(cachep, memcg, flags, 1, &objp);
 	return objp;
 }
 
@@ -3440,6 +3442,7 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
 		memset(objp, 0, cachep->object_size);
 	kmemleak_free_recursive(objp, cachep->flags);
 	objp = cache_free_debugcheck(cachep, objp, caller);
+	memcg_slab_free_hook(cachep, virt_to_head_page(objp), objp);
 
 	/*
 	 * Skip calling cache_free_alien() when the platform is not numa.
@@ -3505,8 +3508,9 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 			  void **p)
 {
 	size_t i;
+	struct mem_cgroup *memcg = NULL;
 
-	s = slab_pre_alloc_hook(s, flags);
+	s = slab_pre_alloc_hook(s, &memcg, size, flags);
 	if (!s)
 		return 0;
 
@@ -3529,13 +3533,13 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 		for (i = 0; i < size; i++)
 			memset(p[i], 0, s->object_size);
 
-	slab_post_alloc_hook(s, flags, size, p);
+	slab_post_alloc_hook(s, memcg, flags, size, p);
 	/* FIXME: Trace call missing. Christoph would like a bulk variant */
 	return size;
 error:
 	local_irq_enable();
 	cache_alloc_debugcheck_after_bulk(s, flags, i, p, _RET_IP_);
-	slab_post_alloc_hook(s, flags, i, p);
+	slab_post_alloc_hook(s, memcg, flags, i, p);
 	__kmem_cache_free_bulk(s, i, p);
 	return 0;
 }
diff --git a/mm/slab.h b/mm/slab.h
index 8620a0a1d5fa..28feabed1e9a 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -423,6 +423,45 @@ static inline void memcg_free_page_memcg_vec(struct page *page)
 	page->mem_cgroup_vec = NULL;
 }
 
+static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
+					      struct mem_cgroup *memcg,
+					      size_t size, void **p)
+{
+	struct mem_cgroup_ptr *memcg_ptr;
+	struct page *page;
+	unsigned long off;
+	size_t i;
+
+	memcg_ptr = mem_cgroup_get_kmem_ptr(memcg);
+	for (i = 0; i < size; i++) {
+		if (likely(p[i])) {
+			page = virt_to_head_page(p[i]);
+			off = obj_to_index(s, page, p[i]);
+			mem_cgroup_ptr_get(memcg_ptr);
+			page->mem_cgroup_vec[off] = memcg_ptr;
+		}
+	}
+	mem_cgroup_ptr_put(memcg_ptr);
+	mem_cgroup_put(memcg);
+
+	memcg_kmem_put_cache(s);
+}
+
+static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page,
+					void *p)
+{
+	struct mem_cgroup_ptr *memcg_ptr;
+	unsigned int off;
+
+	if (!memcg_kmem_enabled() || is_root_cache(s))
+		return;
+
+	off = obj_to_index(s, page, p);
+	memcg_ptr = page->mem_cgroup_vec[off];
+	page->mem_cgroup_vec[off] = NULL;
+	mem_cgroup_ptr_put(memcg_ptr);
+}
+
 extern void slab_init_memcg_params(struct kmem_cache *);
 extern void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg);
 
@@ -482,6 +521,17 @@ static inline void memcg_free_page_memcg_vec(struct page *page)
 {
 }
 
+static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
+					      struct mem_cgroup *memcg,
+					      size_t size, void **p)
+{
+}
+
+static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page,
+					void *p)
+{
+}
+
 static inline void slab_init_memcg_params(struct kmem_cache *s)
 {
 }
@@ -591,7 +641,8 @@ static inline size_t slab_ksize(const struct kmem_cache *s)
 }
 
 static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
-						     gfp_t flags)
+						     struct mem_cgroup **memcgp,
+						     size_t size, gfp_t flags)
 {
 	flags &= gfp_allowed_mask;
 
@@ -605,13 +656,14 @@ static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
 
 	if (memcg_kmem_enabled() &&
 	    ((flags & __GFP_ACCOUNT) || (s->flags & SLAB_ACCOUNT)))
-		return memcg_kmem_get_cache(s);
+		return memcg_kmem_get_cache(s, memcgp);
 
 	return s;
 }
 
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
-					size_t size, void **p)
+static inline void slab_post_alloc_hook(struct kmem_cache *s,
+					struct mem_cgroup *memcg,
+					gfp_t flags, size_t size, void **p)
 {
 	size_t i;
 
@@ -623,8 +675,8 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags,
 					 s->flags, flags);
 	}
 
-	if (memcg_kmem_enabled())
-		memcg_kmem_put_cache(s);
+	if (!is_root_cache(s))
+		memcg_slab_post_alloc_hook(s, memcg, size, p);
 }
 
 #ifndef CONFIG_SLOB
diff --git a/mm/slub.c b/mm/slub.c
index 557ea45a5d75..a62545c7acac 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2700,8 +2700,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
 	struct kmem_cache_cpu *c;
 	struct page *page;
 	unsigned long tid;
+	struct mem_cgroup *memcg = NULL;
 
-	s = slab_pre_alloc_hook(s, gfpflags);
+	s = slab_pre_alloc_hook(s, &memcg, 1, gfpflags);
 	if (!s)
 		return NULL;
 redo:
@@ -2777,7 +2778,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
 	if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
 		memset(object, 0, s->object_size);
 
-	slab_post_alloc_hook(s, gfpflags, 1, &object);
+	slab_post_alloc_hook(s, memcg, gfpflags, 1, &object);
 
 	return object;
 }
@@ -2982,6 +2983,8 @@ static __always_inline void do_slab_free(struct kmem_cache *s,
 	void *tail_obj = tail ? : head;
 	struct kmem_cache_cpu *c;
 	unsigned long tid;
+
+	memcg_slab_free_hook(s, page, head);
 redo:
 	/*
 	 * Determine the currently cpus per cpu slab.
@@ -3159,9 +3162,10 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 {
 	struct kmem_cache_cpu *c;
 	int i;
+	struct mem_cgroup *memcg = NULL;
 
 	/* memcg and kmem_cache debug support */
-	s = slab_pre_alloc_hook(s, flags);
+	s = slab_pre_alloc_hook(s, &memcg, size, flags);
 	if (unlikely(!s))
 		return false;
 	/*
@@ -3206,11 +3210,11 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 	}
 
 	/* memcg and kmem_cache debug support */
-	slab_post_alloc_hook(s, flags, size, p);
+	slab_post_alloc_hook(s, memcg, flags, size, p);
 	return i;
 error:
 	local_irq_enable();
-	slab_post_alloc_hook(s, flags, i, p);
+	slab_post_alloc_hook(s, memcg, flags, i, p);
 	__kmem_cache_free_bulk(s, i, p);
 	return 0;
 }
-- 
2.21.0



  parent reply	other threads:[~2019-10-18  0:29 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-10-18  0:28 [PATCH 00/16] The new slab memory controller Roman Gushchin
2019-10-18  0:28 ` [PATCH 01/16] mm: memcg: introduce mem_cgroup_ptr Roman Gushchin
2019-10-18  0:28 ` [PATCH 02/16] mm: vmstat: use s32 for vm_node_stat_diff in struct per_cpu_nodestat Roman Gushchin
2019-10-20 22:44   ` Christopher Lameter
2019-10-21  1:15     ` Roman Gushchin
2019-10-21 18:09       ` Christopher Lameter
2019-10-20 22:51   ` Christopher Lameter
2019-10-21  1:21     ` Roman Gushchin
2019-10-18  0:28 ` [PATCH 03/16] mm: vmstat: convert slab vmstat counter to bytes Roman Gushchin
2019-10-18  0:28 ` [PATCH 04/16] mm: memcg/slab: allocate space for memcg ownership data for non-root slabs Roman Gushchin
2019-10-18  0:28 ` [PATCH 05/16] mm: slub: implement SLUB version of obj_to_index() Roman Gushchin
2019-10-18  0:28 ` Roman Gushchin [this message]
2019-10-18  0:28 ` [PATCH 07/16] mm: memcg: move memcg_kmem_bypass() to memcontrol.h Roman Gushchin
2019-10-18  0:28 ` [PATCH 08/16] mm: memcg: introduce __mod_lruvec_memcg_state() Roman Gushchin
2019-10-18  0:28 ` [PATCH 09/16] mm: memcg/slab: charge individual slab objects instead of pages Roman Gushchin
2019-10-25 19:41   ` Johannes Weiner
2019-10-25 20:00     ` Roman Gushchin
2019-10-25 20:52       ` Johannes Weiner
2019-10-31  1:52     ` Roman Gushchin
2019-10-31 14:23       ` Johannes Weiner
2019-10-31 14:41       ` Johannes Weiner
2019-10-31 15:07         ` Roman Gushchin
2019-10-31 18:50           ` Johannes Weiner
2019-10-18  0:28 ` [PATCH 10/16] mm: memcg: move get_mem_cgroup_from_current() to memcontrol.h Roman Gushchin
2019-10-18  0:28 ` [PATCH 11/16] mm: memcg/slab: replace memcg_from_slab_page() with memcg_from_slab_obj() Roman Gushchin
2019-10-18  0:28 ` [PATCH 12/16] tools/cgroup: add slabinfo.py tool Roman Gushchin
2019-10-18  0:28 ` [PATCH 13/16] mm: memcg/slab: deprecate memory.kmem.slabinfo Roman Gushchin
2019-10-18  0:28 ` [PATCH 14/16] mm: memcg/slab: use one set of kmem_caches for all memory cgroups Roman Gushchin
2019-10-18  0:28 ` [PATCH 15/16] tools/cgroup: make slabinfo.py compatible with new slab controller Roman Gushchin
2019-10-18  0:28 ` [PATCH 16/16] mm: slab: remove redundant check in memcg_accumulate_slabinfo() Roman Gushchin
2019-10-18 17:03 ` [PATCH 00/16] The new slab memory controller Waiman Long
2019-10-18 17:12   ` Roman Gushchin
2019-10-22 13:22 ` Michal Hocko
2019-10-22 13:28   ` Michal Hocko
2019-10-22 15:48     ` Roman Gushchin
2019-10-22 13:31 ` Michal Hocko
2019-10-22 15:59   ` Roman Gushchin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191018002820.307763-7-guro@fb.com \
    --to=guro@fb.com \
    --cc=cl@linux.com \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=longman@redhat.com \
    --cc=mhocko@kernel.org \
    --cc=shakeelb@google.com \
    --cc=vdavydov.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox