linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches
@ 2022-05-17  9:44 Vasily Averin
  2022-05-17 11:59 ` Hyeonggon Yoo
  2022-05-17 13:37 ` Matthew Wilcox
  0 siblings, 2 replies; 7+ messages in thread
From: Vasily Averin @ 2022-05-17  9:44 UTC (permalink / raw)
  To: Steven Rostedt, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox
  Cc: kernel, linux-kernel, Ingo Molnar, Andrew Morton, linux-mm,
	Joonsoo Kim, David Rientjes, Pekka Enberg, Christoph Lameter,
	Michal Hocko

dSlab caches marked with SLAB_ACCOUNT force accounting for every
allocation from this cache even if __GFP_ACCOUNT flag is not passed.
Unfortunately, at the moment this flag is not visible in ftrace output,
and this makes it difficult to analyze the accounted allocations.

This patch adds the __GFP_ACCOUNT flag for allocations from slab caches
marked with SLAB_ACCOUNT to the ftrace output
---
v2:
 1) handle kmem_cache_alloc_node() too, thanks to Shakeel
 2) rework kmem_cache_alloc* tracepoints to use cachep instead
    of current cachep->*size parameters. Now kmalloc[_node] and
    kmem_cache_alloc[_node] tracepoints do not use common template

NB: kmem_cache_alloc_node tracepoint in SLOB cannot be switched to cachep,
    therefore it was replaced by kmalloc_node tracepoint.
---
VvS: is this acceptable? Maybe I should split this patch?

Signed-off-by: Vasily Averin <vvs@openvz.org>
---
 include/trace/events/kmem.h | 82 +++++++++++++++++++++++++++----------
 mm/slab.c                   |  7 +---
 mm/slab_common.c            |  7 ++--
 mm/slob.c                   | 10 ++---
 mm/slub.c                   |  6 +--
 5 files changed, 71 insertions(+), 41 deletions(-)

diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 71c141804222..3b4f96e4a607 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -9,7 +9,7 @@
 #include <linux/tracepoint.h>
 #include <trace/events/mmflags.h>
 
-DECLARE_EVENT_CLASS(kmem_alloc,
+TRACE_EVENT(kmalloc,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
@@ -43,23 +43,41 @@ DECLARE_EVENT_CLASS(kmem_alloc,
 		show_gfp_flags(__entry->gfp_flags))
 );
 
-DEFINE_EVENT(kmem_alloc, kmalloc,
+TRACE_EVENT(kmem_cache_alloc,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 struct kmem_cache *s,
+		 gfp_t gfp_flags),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
-);
+	TP_ARGS(call_site, ptr, s, gfp_flags),
 
-DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	unsigned long,	gfp_flags	)
+	),
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= s->object_size;
+		__entry->bytes_alloc	= s->size;
+		__entry->gfp_flags	= (__force unsigned long)gfp_flags |
+				(s->flags & SLAB_ACCOUNT ? __GFP_ACCOUNT : 0);
+	),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
+		(void *)__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags))
 );
 
-DECLARE_EVENT_CLASS(kmem_alloc_node,
+TRACE_EVENT(kmalloc_node,
 
 	TP_PROTO(unsigned long call_site,
 		 const void *ptr,
@@ -97,22 +115,42 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
 		__entry->node)
 );
 
-DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
+TRACE_EVENT(kmem_cache_alloc_node,
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
-		 gfp_t gfp_flags, int node),
+	TP_PROTO(unsigned long call_site,
+		 const void *ptr,
+		 struct kmem_cache *s,
+		 gfp_t gfp_flags,
+		 int node),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
-);
+	TP_ARGS(call_site, ptr, s, gfp_flags, node),
 
-DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
+	TP_STRUCT__entry(
+		__field(	unsigned long,	call_site	)
+		__field(	const void *,	ptr		)
+		__field(	size_t,		bytes_req	)
+		__field(	size_t,		bytes_alloc	)
+		__field(	unsigned long,	gfp_flags	)
+		__field(	int,		node		)
+	),
 
-	TP_PROTO(unsigned long call_site, const void *ptr,
-		 size_t bytes_req, size_t bytes_alloc,
-		 gfp_t gfp_flags, int node),
+	TP_fast_assign(
+		__entry->call_site	= call_site;
+		__entry->ptr		= ptr;
+		__entry->bytes_req	= s->object_size;
+		__entry->bytes_alloc	= s->size;
+		__entry->gfp_flags	= (__force unsigned long)gfp_flags |
+				(s->flags & SLAB_ACCOUNT ? __GFP_ACCOUNT : 0);
+		__entry->node		= node;
+	),
 
-	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
+	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
+		(void *)__entry->call_site,
+		__entry->ptr,
+		__entry->bytes_req,
+		__entry->bytes_alloc,
+		show_gfp_flags(__entry->gfp_flags),
+		__entry->node)
 );
 
 TRACE_EVENT(kfree,
diff --git a/mm/slab.c b/mm/slab.c
index 0edb474edef1..935b1c90d7f0 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3492,8 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
 {
 	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret,
-			       cachep->object_size, cachep->size, flags);
+	trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags);
 
 	return ret;
 }
@@ -3606,9 +3605,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
 	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
-				    cachep->object_size, cachep->size,
-				    flags, nodeid);
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep, flags, nodeid);
 
 	return ret;
 }
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2b3206a2c3b5..12299cf450fe 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -25,13 +25,12 @@
 #include <asm/page.h>
 #include <linux/memcontrol.h>
 
-#define CREATE_TRACE_POINTS
-#include <trace/events/kmem.h>
-
 #include "internal.h"
-
 #include "slab.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/kmem.h>
+
 enum slab_state slab_state;
 LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
diff --git a/mm/slob.c b/mm/slob.c
index 40ea6e2d4ccd..69875419769c 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -610,14 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 
 	if (c->size < PAGE_SIZE) {
 		b = slob_alloc(c->size, flags, c->align, node, 0);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
-					    SLOB_UNITS(c->size) * SLOB_UNIT,
-					    flags, node);
+		trace_kmalloc_node(_RET_IP_, b, c->object_size,
+				   SLOB_UNITS(c->size) * SLOB_UNIT, flags, node);
 	} else {
 		b = slob_new_pages(flags, get_order(c->size), node);
-		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
-					    PAGE_SIZE << get_order(c->size),
-					    flags, node);
+		trace_kmalloc_node(_RET_IP_, b, c->object_size,
+				   PAGE_SIZE << get_order(c->size), flags, node);
 	}
 
 	if (b && c->ctor) {
diff --git a/mm/slub.c b/mm/slub.c
index ed5c2c03a47a..fc6678269db0 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3231,8 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
 {
 	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
-				s->size, gfpflags);
+	trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags);
 
 	return ret;
 }
@@ -3266,8 +3265,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
 	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
 
-	trace_kmem_cache_alloc_node(_RET_IP_, ret,
-				    s->object_size, s->size, gfpflags, node);
+	trace_kmem_cache_alloc_node(_RET_IP_, ret, s, gfpflags, node);
 
 	return ret;
 }
-- 
2.31.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches
  2022-05-17  9:44 [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches Vasily Averin
@ 2022-05-17 11:59 ` Hyeonggon Yoo
  2022-05-17 13:29   ` Muchun Song
  2022-05-17 16:34   ` Roman Gushchin
  2022-05-17 13:37 ` Matthew Wilcox
  1 sibling, 2 replies; 7+ messages in thread
From: Hyeonggon Yoo @ 2022-05-17 11:59 UTC (permalink / raw)
  To: YoMccU66auLAPEHa
  Cc: Steven Rostedt, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	Matthew Wilcox, kernel, linux-kernel, Ingo Molnar, Andrew Morton,
	linux-mm, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko

On Tue, May 17, 2022 at 12:44:14PM +0300, Vasily Averin wrote:
> dSlab caches marked with SLAB_ACCOUNT force accounting for every
> allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> Unfortunately, at the moment this flag is not visible in ftrace output,
> and this makes it difficult to analyze the accounted allocations.
> 
> This patch adds the __GFP_ACCOUNT flag for allocations from slab caches
> marked with SLAB_ACCOUNT to the ftrace output
> ---
> v2:
>  1) handle kmem_cache_alloc_node() too, thanks to Shakeel
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters. Now kmalloc[_node] and
>     kmem_cache_alloc[_node] tracepoints do not use common template
> 
> NB: kmem_cache_alloc_node tracepoint in SLOB cannot be switched to cachep,
>     therefore it was replaced by kmalloc_node tracepoint.
> ---
> VvS: is this acceptable? Maybe I should split this patch?
> 
> Signed-off-by: Vasily Averin <vvs@openvz.org>
> ---
>  include/trace/events/kmem.h | 82 +++++++++++++++++++++++++++----------
>  mm/slab.c                   |  7 +---
>  mm/slab_common.c            |  7 ++--
>  mm/slob.c                   | 10 ++---
>  mm/slub.c                   |  6 +--
>  5 files changed, 71 insertions(+), 41 deletions(-)
> 
> diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> index 71c141804222..3b4f96e4a607 100644
> --- a/include/trace/events/kmem.h
> +++ b/include/trace/events/kmem.h
> @@ -9,7 +9,7 @@
>  #include <linux/tracepoint.h>
>  #include <trace/events/mmflags.h>
>  
> -DECLARE_EVENT_CLASS(kmem_alloc,
> +TRACE_EVENT(kmalloc,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> @@ -43,23 +43,41 @@ DECLARE_EVENT_CLASS(kmem_alloc,
>  		show_gfp_flags(__entry->gfp_flags))
>  );
>  
> -DEFINE_EVENT(kmem_alloc, kmalloc,
> +TRACE_EVENT(kmem_cache_alloc,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> +	TP_PROTO(unsigned long call_site,
> +		 const void *ptr,
> +		 struct kmem_cache *s,
> +		 gfp_t gfp_flags),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> -);
> +	TP_ARGS(call_site, ptr, s, gfp_flags),
>  
> -DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
> +	TP_STRUCT__entry(
> +		__field(	unsigned long,	call_site	)
> +		__field(	const void *,	ptr		)
> +		__field(	size_t,		bytes_req	)
> +		__field(	size_t,		bytes_alloc	)
> +		__field(	unsigned long,	gfp_flags	)
> +	),
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> +	TP_fast_assign(
> +		__entry->call_site	= call_site;
> +		__entry->ptr		= ptr;
> +		__entry->bytes_req	= s->object_size;
> +		__entry->bytes_alloc	= s->size;
> +		__entry->gfp_flags	= (__force unsigned long)gfp_flags |
> +				(s->flags & SLAB_ACCOUNT ? __GFP_ACCOUNT : 0);
> +	),

This is a bit of lie. SLAB_ACCOUNT is not a gfp flag.

IMO the problem here is that we don't know which cache kernel is allocating
from. What about just printing name of cache and remove bytes_req,
bytes_alloc?

And then you can check if the cache uses SLAB_ACCOUNT or not.

>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s",
> +		(void *)__entry->call_site,
> +		__entry->ptr,
> +		__entry->bytes_req,
> +		__entry->bytes_alloc,
> +		show_gfp_flags(__entry->gfp_flags))
>  );
>  
> -DECLARE_EVENT_CLASS(kmem_alloc_node,
> +TRACE_EVENT(kmalloc_node,
>  
>  	TP_PROTO(unsigned long call_site,
>  		 const void *ptr,
> @@ -97,22 +115,42 @@ DECLARE_EVENT_CLASS(kmem_alloc_node,
>  		__entry->node)
>  );
>  
> -DEFINE_EVENT(kmem_alloc_node, kmalloc_node,
> +TRACE_EVENT(kmem_cache_alloc_node,
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> -		 gfp_t gfp_flags, int node),
> +	TP_PROTO(unsigned long call_site,
> +		 const void *ptr,
> +		 struct kmem_cache *s,
> +		 gfp_t gfp_flags,
> +		 int node),
>  
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> -);
> +	TP_ARGS(call_site, ptr, s, gfp_flags, node),
>  
> -DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
> +	TP_STRUCT__entry(
> +		__field(	unsigned long,	call_site	)
> +		__field(	const void *,	ptr		)
> +		__field(	size_t,		bytes_req	)
> +		__field(	size_t,		bytes_alloc	)
> +		__field(	unsigned long,	gfp_flags	)
> +		__field(	int,		node		)
> +	),
>  
> -	TP_PROTO(unsigned long call_site, const void *ptr,
> -		 size_t bytes_req, size_t bytes_alloc,
> -		 gfp_t gfp_flags, int node),
> +	TP_fast_assign(
> +		__entry->call_site	= call_site;
> +		__entry->ptr		= ptr;
> +		__entry->bytes_req	= s->object_size;
> +		__entry->bytes_alloc	= s->size;
> +		__entry->gfp_flags	= (__force unsigned long)gfp_flags |
> +				(s->flags & SLAB_ACCOUNT ? __GFP_ACCOUNT : 0);
> +		__entry->node		= node;
> +	),
> -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
> +	TP_printk("call_site=%pS ptr=%p bytes_req=%zu bytes_alloc=%zu gfp_flags=%s node=%d",
> +		(void *)__entry->call_site,
> +		__entry->ptr,
> +		__entry->bytes_req,
> +		__entry->bytes_alloc,
> +		show_gfp_flags(__entry->gfp_flags),
> +		__entry->node)
>  );
>  
>  TRACE_EVENT(kfree,
> diff --git a/mm/slab.c b/mm/slab.c
> index 0edb474edef1..935b1c90d7f0 100644
> --- a/mm/slab.c
> +++ b/mm/slab.c
> @@ -3492,8 +3492,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(cachep, lru, flags, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret,
> -			       cachep->object_size, cachep->size, flags);
> +	trace_kmem_cache_alloc(_RET_IP_, ret, cachep, flags);
>  
>  	return ret;
>  }
> @@ -3606,9 +3605,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
>  {
>  	void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> -				    cachep->object_size, cachep->size,
> -				    flags, nodeid);
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep, flags, nodeid);
>  
>  	return ret;
>  }
> diff --git a/mm/slab_common.c b/mm/slab_common.c
> index 2b3206a2c3b5..12299cf450fe 100644
> --- a/mm/slab_common.c
> +++ b/mm/slab_common.c
> @@ -25,13 +25,12 @@
>  #include <asm/page.h>
>  #include <linux/memcontrol.h>
>  
> -#define CREATE_TRACE_POINTS
> -#include <trace/events/kmem.h>
> -
>  #include "internal.h"
> -
>  #include "slab.h"
>  
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/kmem.h>
> +
>  enum slab_state slab_state;
>  LIST_HEAD(slab_caches);
>  DEFINE_MUTEX(slab_mutex);
> diff --git a/mm/slob.c b/mm/slob.c
> index 40ea6e2d4ccd..69875419769c 100644
> --- a/mm/slob.c
> +++ b/mm/slob.c
> @@ -610,14 +610,12 @@ static void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
>  
>  	if (c->size < PAGE_SIZE) {
>  		b = slob_alloc(c->size, flags, c->align, node, 0);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> -					    SLOB_UNITS(c->size) * SLOB_UNIT,
> -					    flags, node);
> +		trace_kmalloc_node(_RET_IP_, b, c->object_size,
> +				   SLOB_UNITS(c->size) * SLOB_UNIT, flags, node);
>  	} else {
>  		b = slob_new_pages(flags, get_order(c->size), node);
> -		trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
> -					    PAGE_SIZE << get_order(c->size),
> -					    flags, node);
> +		trace_kmalloc_node(_RET_IP_, b, c->object_size,
> +				   PAGE_SIZE << get_order(c->size), flags, node);
>  	}
>  
>  	if (b && c->ctor) {
> diff --git a/mm/slub.c b/mm/slub.c
> index ed5c2c03a47a..fc6678269db0 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -3231,8 +3231,7 @@ void *__kmem_cache_alloc_lru(struct kmem_cache *s, struct list_lru *lru,
>  {
>  	void *ret = slab_alloc(s, lru, gfpflags, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
> -				s->size, gfpflags);
> +	trace_kmem_cache_alloc(_RET_IP_, ret, s, gfpflags);
>  
>  	return ret;
>  }
> @@ -3266,8 +3265,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
>  {
>  	void *ret = slab_alloc_node(s, NULL, gfpflags, node, _RET_IP_, s->object_size);
>  
> -	trace_kmem_cache_alloc_node(_RET_IP_, ret,
> -				    s->object_size, s->size, gfpflags, node);
> +	trace_kmem_cache_alloc_node(_RET_IP_, ret, s, gfpflags, node);
>  
>  	return ret;
>  }
> -- 
> 2.31.1
> 
> 

-- 
Thanks,
Hyeonggon


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches
  2022-05-17 11:59 ` Hyeonggon Yoo
@ 2022-05-17 13:29   ` Muchun Song
  2022-05-18  9:37     ` Hyeonggon Yoo
  2022-05-17 16:34   ` Roman Gushchin
  1 sibling, 1 reply; 7+ messages in thread
From: Muchun Song @ 2022-05-17 13:29 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: YoMccU66auLAPEHa, Steven Rostedt, Shakeel Butt, Roman Gushchin,
	Vlastimil Babka, Matthew Wilcox, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On Tue, May 17, 2022 at 08:59:31PM +0900, Hyeonggon Yoo wrote:
> On Tue, May 17, 2022 at 12:44:14PM +0300, Vasily Averin wrote:
> > dSlab caches marked with SLAB_ACCOUNT force accounting for every
> > allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> > Unfortunately, at the moment this flag is not visible in ftrace output,
> > and this makes it difficult to analyze the accounted allocations.
> > 
> > This patch adds the __GFP_ACCOUNT flag for allocations from slab caches
> > marked with SLAB_ACCOUNT to the ftrace output
> > ---
> > v2:
> >  1) handle kmem_cache_alloc_node() too, thanks to Shakeel
> >  2) rework kmem_cache_alloc* tracepoints to use cachep instead
> >     of current cachep->*size parameters. Now kmalloc[_node] and
> >     kmem_cache_alloc[_node] tracepoints do not use common template
> > 
> > NB: kmem_cache_alloc_node tracepoint in SLOB cannot be switched to cachep,
> >     therefore it was replaced by kmalloc_node tracepoint.
> > ---
> > VvS: is this acceptable? Maybe I should split this patch?
> > 
> > Signed-off-by: Vasily Averin <vvs@openvz.org>
> > ---
> >  include/trace/events/kmem.h | 82 +++++++++++++++++++++++++++----------
> >  mm/slab.c                   |  7 +---
> >  mm/slab_common.c            |  7 ++--
> >  mm/slob.c                   | 10 ++---
> >  mm/slub.c                   |  6 +--
> >  5 files changed, 71 insertions(+), 41 deletions(-)
> > 
> > diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> > index 71c141804222..3b4f96e4a607 100644
> > --- a/include/trace/events/kmem.h
> > +++ b/include/trace/events/kmem.h
> > @@ -9,7 +9,7 @@
> >  #include <linux/tracepoint.h>
> >  #include <trace/events/mmflags.h>
> >  
> > -DECLARE_EVENT_CLASS(kmem_alloc,
> > +TRACE_EVENT(kmalloc,
> >  
> >  	TP_PROTO(unsigned long call_site,
> >  		 const void *ptr,
> > @@ -43,23 +43,41 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> >  		show_gfp_flags(__entry->gfp_flags))
> >  );
> >  
> > -DEFINE_EVENT(kmem_alloc, kmalloc,
> > +TRACE_EVENT(kmem_cache_alloc,
> >  
> > -	TP_PROTO(unsigned long call_site, const void *ptr,
> > -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> > +	TP_PROTO(unsigned long call_site,
> > +		 const void *ptr,
> > +		 struct kmem_cache *s,
> > +		 gfp_t gfp_flags),
> >  
> > -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> > -);
> > +	TP_ARGS(call_site, ptr, s, gfp_flags),
> >  
> > -DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
> > +	TP_STRUCT__entry(
> > +		__field(	unsigned long,	call_site	)
> > +		__field(	const void *,	ptr		)
> > +		__field(	size_t,		bytes_req	)
> > +		__field(	size_t,		bytes_alloc	)
> > +		__field(	unsigned long,	gfp_flags	)
> > +	),
> >  
> > -	TP_PROTO(unsigned long call_site, const void *ptr,
> > -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> > +	TP_fast_assign(
> > +		__entry->call_site	= call_site;
> > +		__entry->ptr		= ptr;
> > +		__entry->bytes_req	= s->object_size;
> > +		__entry->bytes_alloc	= s->size;
> > +		__entry->gfp_flags	= (__force unsigned long)gfp_flags |
> > +				(s->flags & SLAB_ACCOUNT ? __GFP_ACCOUNT : 0);
> > +	),
> 
> This is a bit of lie. SLAB_ACCOUNT is not a gfp flag.
>

Maybe it is not a problem since the functionalities of SLAB_ACCOUNT and
__GFP_ACCOUNT are similar.
 
> IMO the problem here is that we don't know which cache kernel is allocating
> from. What about just printing name of cache and remove bytes_req,
> bytes_alloc?

Is it a problem? Because we have changed the behavior to users. Should
we treat the tracepoint as a stable API to users? If so, I think we
should not change the parameter of this tracepoint.  Maybe I am wrong,
just some thoughts from me.

Thanks.

> 
> And then you can check if the cache uses SLAB_ACCOUNT or not.
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches
  2022-05-17  9:44 [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches Vasily Averin
  2022-05-17 11:59 ` Hyeonggon Yoo
@ 2022-05-17 13:37 ` Matthew Wilcox
  1 sibling, 0 replies; 7+ messages in thread
From: Matthew Wilcox @ 2022-05-17 13:37 UTC (permalink / raw)
  To: YoMccU66auLAPEHa
  Cc: Steven Rostedt, Shakeel Butt, Roman Gushchin, Vlastimil Babka,
	kernel, linux-kernel, Ingo Molnar, Andrew Morton, linux-mm,
	Joonsoo Kim, David Rientjes, Pekka Enberg, Christoph Lameter,
	Michal Hocko

On Tue, May 17, 2022 at 12:44:14PM +0300, Vasily Averin wrote:
>  2) rework kmem_cache_alloc* tracepoints to use cachep instead
>     of current cachep->*size parameters. Now kmalloc[_node] and
>     kmem_cache_alloc[_node] tracepoints do not use common template

You can keep the common template.  Just pass in s->object_size as well
as s.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches
  2022-05-17 11:59 ` Hyeonggon Yoo
  2022-05-17 13:29   ` Muchun Song
@ 2022-05-17 16:34   ` Roman Gushchin
  2022-05-18  9:38     ` Hyeonggon Yoo
  1 sibling, 1 reply; 7+ messages in thread
From: Roman Gushchin @ 2022-05-17 16:34 UTC (permalink / raw)
  To: Hyeonggon Yoo
  Cc: YoMccU66auLAPEHa, Steven Rostedt, Shakeel Butt, Vlastimil Babka,
	Matthew Wilcox, kernel, linux-kernel, Ingo Molnar, Andrew Morton,
	linux-mm, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko

On Tue, May 17, 2022 at 08:59:31PM +0900, Hyeonggon Yoo wrote:
> On Tue, May 17, 2022 at 12:44:14PM +0300, Vasily Averin wrote:
> > dSlab caches marked with SLAB_ACCOUNT force accounting for every
> > allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> > Unfortunately, at the moment this flag is not visible in ftrace output,
> > and this makes it difficult to analyze the accounted allocations.
> > 
> > This patch adds the __GFP_ACCOUNT flag for allocations from slab caches
> > marked with SLAB_ACCOUNT to the ftrace output
> > ---
> > v2:
> >  1) handle kmem_cache_alloc_node() too, thanks to Shakeel
> >  2) rework kmem_cache_alloc* tracepoints to use cachep instead
> >     of current cachep->*size parameters. Now kmalloc[_node] and
> >     kmem_cache_alloc[_node] tracepoints do not use common template
> > 
> > NB: kmem_cache_alloc_node tracepoint in SLOB cannot be switched to cachep,
> >     therefore it was replaced by kmalloc_node tracepoint.
> > ---
> > VvS: is this acceptable? Maybe I should split this patch?
> > 
> > Signed-off-by: Vasily Averin <vvs@openvz.org>
> > ---
> >  include/trace/events/kmem.h | 82 +++++++++++++++++++++++++++----------
> >  mm/slab.c                   |  7 +---
> >  mm/slab_common.c            |  7 ++--
> >  mm/slob.c                   | 10 ++---
> >  mm/slub.c                   |  6 +--
> >  5 files changed, 71 insertions(+), 41 deletions(-)
> > 
> > diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> > index 71c141804222..3b4f96e4a607 100644
> > --- a/include/trace/events/kmem.h
> > +++ b/include/trace/events/kmem.h
> > @@ -9,7 +9,7 @@
> >  #include <linux/tracepoint.h>
> >  #include <trace/events/mmflags.h>
> >  
> > -DECLARE_EVENT_CLASS(kmem_alloc,
> > +TRACE_EVENT(kmalloc,
> >  
> >  	TP_PROTO(unsigned long call_site,
> >  		 const void *ptr,
> > @@ -43,23 +43,41 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> >  		show_gfp_flags(__entry->gfp_flags))
> >  );
> >  
> > -DEFINE_EVENT(kmem_alloc, kmalloc,
> > +TRACE_EVENT(kmem_cache_alloc,
> >  
> > -	TP_PROTO(unsigned long call_site, const void *ptr,
> > -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> > +	TP_PROTO(unsigned long call_site,
> > +		 const void *ptr,
> > +		 struct kmem_cache *s,
> > +		 gfp_t gfp_flags),
> >  
> > -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> > -);
> > +	TP_ARGS(call_site, ptr, s, gfp_flags),
> >  
> > -DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
> > +	TP_STRUCT__entry(
> > +		__field(	unsigned long,	call_site	)
> > +		__field(	const void *,	ptr		)
> > +		__field(	size_t,		bytes_req	)
> > +		__field(	size_t,		bytes_alloc	)
> > +		__field(	unsigned long,	gfp_flags	)
> > +	),
> >  
> > -	TP_PROTO(unsigned long call_site, const void *ptr,
> > -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> > +	TP_fast_assign(
> > +		__entry->call_site	= call_site;
> > +		__entry->ptr		= ptr;
> > +		__entry->bytes_req	= s->object_size;
> > +		__entry->bytes_alloc	= s->size;
> > +		__entry->gfp_flags	= (__force unsigned long)gfp_flags |
> > +				(s->flags & SLAB_ACCOUNT ? __GFP_ACCOUNT : 0);
> > +	),
> 
> This is a bit of lie. SLAB_ACCOUNT is not a gfp flag.

Alternatively we can add an explicit "accounted" boolean entry,
which will be set to true if the SLAB_ACCOUNT slab cache flag or
the __GFP_ACCOUNT gfp flag is present.


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches
  2022-05-17 13:29   ` Muchun Song
@ 2022-05-18  9:37     ` Hyeonggon Yoo
  0 siblings, 0 replies; 7+ messages in thread
From: Hyeonggon Yoo @ 2022-05-18  9:37 UTC (permalink / raw)
  To: Muchun Song
  Cc: YoMccU66auLAPEHa, Steven Rostedt, Shakeel Butt, Roman Gushchin,
	Vlastimil Babka, Matthew Wilcox, kernel, linux-kernel,
	Ingo Molnar, Andrew Morton, linux-mm, Joonsoo Kim,
	David Rientjes, Pekka Enberg, Christoph Lameter, Michal Hocko

On Tue, May 17, 2022 at 09:29:01PM +0800, Muchun Song wrote:
> On Tue, May 17, 2022 at 08:59:31PM +0900, Hyeonggon Yoo wrote:
> > On Tue, May 17, 2022 at 12:44:14PM +0300, Vasily Averin wrote:
> > > dSlab caches marked with SLAB_ACCOUNT force accounting for every
> > > allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> > > Unfortunately, at the moment this flag is not visible in ftrace output,
> > > and this makes it difficult to analyze the accounted allocations.
> > > 
> > > This patch adds the __GFP_ACCOUNT flag for allocations from slab caches
> > > marked with SLAB_ACCOUNT to the ftrace output
> > > ---
> > > v2:
> > >  1) handle kmem_cache_alloc_node() too, thanks to Shakeel
> > >  2) rework kmem_cache_alloc* tracepoints to use cachep instead
> > >     of current cachep->*size parameters. Now kmalloc[_node] and
> > >     kmem_cache_alloc[_node] tracepoints do not use common template
> > > 
> > > NB: kmem_cache_alloc_node tracepoint in SLOB cannot be switched to cachep,
> > >     therefore it was replaced by kmalloc_node tracepoint.
> > > ---
> > > VvS: is this acceptable? Maybe I should split this patch?
> > > 
> > > Signed-off-by: Vasily Averin <vvs@openvz.org>
> > > ---
> > >  include/trace/events/kmem.h | 82 +++++++++++++++++++++++++++----------
> > >  mm/slab.c                   |  7 +---
> > >  mm/slab_common.c            |  7 ++--
> > >  mm/slob.c                   | 10 ++---
> > >  mm/slub.c                   |  6 +--
> > >  5 files changed, 71 insertions(+), 41 deletions(-)
> > > 
> > > diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> > > index 71c141804222..3b4f96e4a607 100644
> > > --- a/include/trace/events/kmem.h
> > > +++ b/include/trace/events/kmem.h
> > > @@ -9,7 +9,7 @@
> > >  #include <linux/tracepoint.h>
> > >  #include <trace/events/mmflags.h>
> > >  
> > > -DECLARE_EVENT_CLASS(kmem_alloc,
> > > +TRACE_EVENT(kmalloc,
> > >  
> > >  	TP_PROTO(unsigned long call_site,
> > >  		 const void *ptr,
> > > @@ -43,23 +43,41 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> > >  		show_gfp_flags(__entry->gfp_flags))
> > >  );
> > >  
> > > -DEFINE_EVENT(kmem_alloc, kmalloc,
> > > +TRACE_EVENT(kmem_cache_alloc,
> > >  
> > > -	TP_PROTO(unsigned long call_site, const void *ptr,
> > > -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> > > +	TP_PROTO(unsigned long call_site,
> > > +		 const void *ptr,
> > > +		 struct kmem_cache *s,
> > > +		 gfp_t gfp_flags),
> > >  
> > > -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> > > -);
> > > +	TP_ARGS(call_site, ptr, s, gfp_flags),
> > >  
> > > -DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
> > > +	TP_STRUCT__entry(
> > > +		__field(	unsigned long,	call_site	)
> > > +		__field(	const void *,	ptr		)
> > > +		__field(	size_t,		bytes_req	)
> > > +		__field(	size_t,		bytes_alloc	)
> > > +		__field(	unsigned long,	gfp_flags	)
> > > +	),
> > >  
> > > -	TP_PROTO(unsigned long call_site, const void *ptr,
> > > -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> > > +	TP_fast_assign(
> > > +		__entry->call_site	= call_site;
> > > +		__entry->ptr		= ptr;
> > > +		__entry->bytes_req	= s->object_size;
> > > +		__entry->bytes_alloc	= s->size;
> > > +		__entry->gfp_flags	= (__force unsigned long)gfp_flags |
> > > +				(s->flags & SLAB_ACCOUNT ? __GFP_ACCOUNT : 0);
> > > +	),
> > 
> > This is a bit of lie. SLAB_ACCOUNT is not a gfp flag.
> >
> 
> Maybe it is not a problem since the functionalities of SLAB_ACCOUNT and
> __GFP_ACCOUNT are similar.
>
> > IMO the problem here is that we don't know which cache kernel is allocating
> > from. What about just printing name of cache and remove bytes_req,
> > bytes_alloc?
> 
> Is it a problem? 

I thought so because SLAB_ACCOUNT is a characteristic of cache, not allocations
unlike GFP_KERNEL/GFP_ATOMIC.

There is more SLAB_* flags and I think it's better not to print
all of them in tracepoints. What if someone wants to track allocations
that are reclaimable?

> Because we have changed the behavior to users. Should
> we treat the tracepoint as a stable API to users? If so, I think we
> should not change the parameter of this tracepoint.  Maybe I am wrong,
> just some thoughts from me.

Hmm, yeah it may break userspace tools. but even changing name of functions
can break such tools... I too wonder we consider them as stable API.
Is there general agreement for this?

And If we cannot change tracepoint (toward breaking existing tools)
after release, We should think more about adding 'accounted' in tracepoints.

Apart from that - even if we're not going to remove bytes_req/bytes_alloc,
I think distinguishing caches is worth than adding something like 'accounted'.

> Thanks.
> 
> > 
> > And then you can check if the cache uses SLAB_ACCOUNT or not.
> > 

-- 
Thanks,
Hyeonggon


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches
  2022-05-17 16:34   ` Roman Gushchin
@ 2022-05-18  9:38     ` Hyeonggon Yoo
  0 siblings, 0 replies; 7+ messages in thread
From: Hyeonggon Yoo @ 2022-05-18  9:38 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: YoMccU66auLAPEHa, Steven Rostedt, Shakeel Butt, Vlastimil Babka,
	Matthew Wilcox, kernel, linux-kernel, Ingo Molnar, Andrew Morton,
	linux-mm, Joonsoo Kim, David Rientjes, Pekka Enberg,
	Christoph Lameter, Michal Hocko

On Tue, May 17, 2022 at 09:34:13AM -0700, Roman Gushchin wrote:
> On Tue, May 17, 2022 at 08:59:31PM +0900, Hyeonggon Yoo wrote:
> > On Tue, May 17, 2022 at 12:44:14PM +0300, Vasily Averin wrote:
> > > dSlab caches marked with SLAB_ACCOUNT force accounting for every
> > > allocation from this cache even if __GFP_ACCOUNT flag is not passed.
> > > Unfortunately, at the moment this flag is not visible in ftrace output,
> > > and this makes it difficult to analyze the accounted allocations.
> > > 
> > > This patch adds the __GFP_ACCOUNT flag for allocations from slab caches
> > > marked with SLAB_ACCOUNT to the ftrace output
> > > ---
> > > v2:
> > >  1) handle kmem_cache_alloc_node() too, thanks to Shakeel
> > >  2) rework kmem_cache_alloc* tracepoints to use cachep instead
> > >     of current cachep->*size parameters. Now kmalloc[_node] and
> > >     kmem_cache_alloc[_node] tracepoints do not use common template
> > > 
> > > NB: kmem_cache_alloc_node tracepoint in SLOB cannot be switched to cachep,
> > >     therefore it was replaced by kmalloc_node tracepoint.
> > > ---
> > > VvS: is this acceptable? Maybe I should split this patch?
> > > 
> > > Signed-off-by: Vasily Averin <vvs@openvz.org>
> > > ---
> > >  include/trace/events/kmem.h | 82 +++++++++++++++++++++++++++----------
> > >  mm/slab.c                   |  7 +---
> > >  mm/slab_common.c            |  7 ++--
> > >  mm/slob.c                   | 10 ++---
> > >  mm/slub.c                   |  6 +--
> > >  5 files changed, 71 insertions(+), 41 deletions(-)
> > > 
> > > diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
> > > index 71c141804222..3b4f96e4a607 100644
> > > --- a/include/trace/events/kmem.h
> > > +++ b/include/trace/events/kmem.h
> > > @@ -9,7 +9,7 @@
> > >  #include <linux/tracepoint.h>
> > >  #include <trace/events/mmflags.h>
> > >  
> > > -DECLARE_EVENT_CLASS(kmem_alloc,
> > > +TRACE_EVENT(kmalloc,
> > >  
> > >  	TP_PROTO(unsigned long call_site,
> > >  		 const void *ptr,
> > > @@ -43,23 +43,41 @@ DECLARE_EVENT_CLASS(kmem_alloc,
> > >  		show_gfp_flags(__entry->gfp_flags))
> > >  );
> > >  
> > > -DEFINE_EVENT(kmem_alloc, kmalloc,
> > > +TRACE_EVENT(kmem_cache_alloc,
> > >  
> > > -	TP_PROTO(unsigned long call_site, const void *ptr,
> > > -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> > > +	TP_PROTO(unsigned long call_site,
> > > +		 const void *ptr,
> > > +		 struct kmem_cache *s,
> > > +		 gfp_t gfp_flags),
> > >  
> > > -	TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags)
> > > -);
> > > +	TP_ARGS(call_site, ptr, s, gfp_flags),
> > >  
> > > -DEFINE_EVENT(kmem_alloc, kmem_cache_alloc,
> > > +	TP_STRUCT__entry(
> > > +		__field(	unsigned long,	call_site	)
> > > +		__field(	const void *,	ptr		)
> > > +		__field(	size_t,		bytes_req	)
> > > +		__field(	size_t,		bytes_alloc	)
> > > +		__field(	unsigned long,	gfp_flags	)
> > > +	),
> > >  
> > > -	TP_PROTO(unsigned long call_site, const void *ptr,
> > > -		 size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags),
> > > +	TP_fast_assign(
> > > +		__entry->call_site	= call_site;
> > > +		__entry->ptr		= ptr;
> > > +		__entry->bytes_req	= s->object_size;
> > > +		__entry->bytes_alloc	= s->size;
> > > +		__entry->gfp_flags	= (__force unsigned long)gfp_flags |
> > > +				(s->flags & SLAB_ACCOUNT ? __GFP_ACCOUNT : 0);
> > > +	),
> > 
> > This is a bit of lie. SLAB_ACCOUNT is not a gfp flag.
> 
> Alternatively we can add an explicit "accounted" boolean entry,
> which will be set to true if the SLAB_ACCOUNT slab cache flag or
> the __GFP_ACCOUNT gfp flag is present.

Or what about adding something like SlabAccounted or MemAccounted in
/proc/meminfo if what he want to know is total amount of memory accounted?

-- 
Thanks,
Hyeonggon


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2022-05-18  9:38 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-17  9:44 [PATCH v2] tracing: add ACCOUNT flag for allocations from marked slab caches Vasily Averin
2022-05-17 11:59 ` Hyeonggon Yoo
2022-05-17 13:29   ` Muchun Song
2022-05-18  9:37     ` Hyeonggon Yoo
2022-05-17 16:34   ` Roman Gushchin
2022-05-18  9:38     ` Hyeonggon Yoo
2022-05-17 13:37 ` Matthew Wilcox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox