linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/3] mm: avoid extra mem_alloc_profiling_enabled() checks
@ 2025-02-01 23:18 Suren Baghdasaryan
  2025-02-01 23:18 ` [PATCH v2 2/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator Suren Baghdasaryan
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Suren Baghdasaryan @ 2025-02-01 23:18 UTC (permalink / raw)
  To: akpm
  Cc: kent.overstreet, vbabka, rostedt, peterz, yuzhao, minchan,
	shakeel.butt, souravpanda, pasha.tatashin, 00107082,
	quic_zhenhuah, surenb, linux-mm, linux-kernel

Refactor code to avoid extra mem_alloc_profiling_enabled() checks inside
pgalloc_tag_get() function which is often called after that check was
already done.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
 include/linux/pgalloc_tag.h | 35 +++++++++++++++++++----------------
 lib/alloc_tag.c             |  6 +++---
 mm/page_alloc.c             |  3 +--
 3 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 3469c4b20105..4a82b6b4820e 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -205,28 +205,32 @@ static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
 	}
 }
 
-static inline struct alloc_tag *pgalloc_tag_get(struct page *page)
+/* Should be called only if mem_alloc_profiling_enabled() */
+static inline struct alloc_tag *__pgalloc_tag_get(struct page *page)
 {
 	struct alloc_tag *tag = NULL;
-
-	if (mem_alloc_profiling_enabled()) {
-		union pgtag_ref_handle handle;
-		union codetag_ref ref;
-
-		if (get_page_tag_ref(page, &ref, &handle)) {
-			alloc_tag_sub_check(&ref);
-			if (ref.ct)
-				tag = ct_to_alloc_tag(ref.ct);
-			put_page_tag_ref(handle);
-		}
+	union pgtag_ref_handle handle;
+	union codetag_ref ref;
+
+	if (get_page_tag_ref(page, &ref, &handle)) {
+		alloc_tag_sub_check(&ref);
+		if (ref.ct)
+			tag = ct_to_alloc_tag(ref.ct);
+		put_page_tag_ref(handle);
 	}
 
 	return tag;
 }
 
-static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
+static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr)
 {
-	if (mem_alloc_profiling_enabled() && tag)
+	struct alloc_tag *tag;
+
+	if (!mem_alloc_profiling_enabled())
+		return;
+
+	tag = __pgalloc_tag_get(page);
+	if (tag)
 		this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
 }
 
@@ -241,8 +245,7 @@ static inline void clear_page_tag_ref(struct page *page) {}
 static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
 				   unsigned int nr) {}
 static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-static inline struct alloc_tag *pgalloc_tag_get(struct page *page) { return NULL; }
-static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
+static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {}
 static inline void alloc_tag_sec_init(void) {}
 static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {}
 static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {}
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index 19b45617bdcf..1d893e313614 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -174,7 +174,7 @@ void pgalloc_tag_split(struct folio *folio, int old_order, int new_order)
 	if (!mem_alloc_profiling_enabled())
 		return;
 
-	tag = pgalloc_tag_get(&folio->page);
+	tag = __pgalloc_tag_get(&folio->page);
 	if (!tag)
 		return;
 
@@ -200,10 +200,10 @@ void pgalloc_tag_swap(struct folio *new, struct folio *old)
 	if (!mem_alloc_profiling_enabled())
 		return;
 
-	tag_old = pgalloc_tag_get(&old->page);
+	tag_old = __pgalloc_tag_get(&old->page);
 	if (!tag_old)
 		return;
-	tag_new = pgalloc_tag_get(&new->page);
+	tag_new = __pgalloc_tag_get(&new->page);
 	if (!tag_new)
 		return;
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 579789600a3c..b7e3b45183ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4832,12 +4832,11 @@ void __free_pages(struct page *page, unsigned int order)
 {
 	/* get PageHead before we drop reference */
 	int head = PageHead(page);
-	struct alloc_tag *tag = pgalloc_tag_get(page);
 
 	if (put_page_testzero(page))
 		free_frozen_pages(page, order);
 	else if (!head) {
-		pgalloc_tag_sub_pages(tag, (1 << order) - 1);
+		pgalloc_tag_sub_pages(page, (1 << order) - 1);
 		while (order-- > 0)
 			free_frozen_pages(page + (1 << order), order);
 	}

base-commit: 7de6fd8ab65003f050aa58e705592745717ed318
-- 
2.48.1.362.g079036d154-goog



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 2/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator
  2025-02-01 23:18 [PATCH v2 1/3] mm: avoid extra mem_alloc_profiling_enabled() checks Suren Baghdasaryan
@ 2025-02-01 23:18 ` Suren Baghdasaryan
  2025-02-03  8:23   ` Vlastimil Babka
  2025-02-04  0:32   ` Shakeel Butt
  2025-02-01 23:18 ` [PATCH v2 3/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in page allocator Suren Baghdasaryan
  2025-02-03 20:36 ` [PATCH v2 1/3] mm: avoid extra mem_alloc_profiling_enabled() checks Shakeel Butt
  2 siblings, 2 replies; 8+ messages in thread
From: Suren Baghdasaryan @ 2025-02-01 23:18 UTC (permalink / raw)
  To: akpm
  Cc: kent.overstreet, vbabka, rostedt, peterz, yuzhao, minchan,
	shakeel.butt, souravpanda, pasha.tatashin, 00107082,
	quic_zhenhuah, surenb, linux-mm, linux-kernel

When a sizable code section is protected by a disabled static key, that
code gets into the instruction cache even though it's not executed and
consumes the cache, increasing cache misses. This can be remedied by
moving such code into a separate uninlined function.
On a Pixel6 phone, slab allocation profiling overhead measured with
CONFIG_MEM_ALLOC_PROFILING=y and profiling disabled is:

             baseline             modified
Big core     3.31%                0.17%
Medium core  3.79%                0.57%
Little core  6.68%                1.28%

This improvement comes at the expense of the configuration when profiling
gets enabled, since there is now an additional function call. The overhead
from this additional call on Pixel6 is:

Big core     0.66%
Middle core  1.23%
Little core  2.42%

However this is negligible when compared with the overall overhead of the
memory allocation profiling when it is enabled.
On x86 this patch does not make noticeable difference because the overhead
with mem_alloc_profiling_key disabled is much lower (under 1%) to start
with, so any improvement is less visible and hard to distinguish from the
noise. The overhead from additional call when profiling is enabled is also
within noise levels.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
Changes since v1 [1]:
- Removed inline_if_mem_alloc_prof and uninlined the gated code
unconditionally, per Steven Rostedt and Vlastimil Babka
- Updated changelog to include overhead when profiling is enabled

[1] https://lore.kernel.org/all/20250126070206.381302-2-surenb@google.com/

 mm/slub.c | 51 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 1f50129dcfb3..184fd2b14758 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2000,7 +2000,8 @@ int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s,
 	return 0;
 }
 
-static inline void free_slab_obj_exts(struct slab *slab)
+/* Should be called only if mem_alloc_profiling_enabled() */
+static noinline void free_slab_obj_exts(struct slab *slab)
 {
 	struct slabobj_ext *obj_exts;
 
@@ -2077,33 +2078,37 @@ prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p)
 	return slab_obj_exts(slab) + obj_to_index(s, slab, p);
 }
 
-static inline void
-alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags)
+/* Should be called only if mem_alloc_profiling_enabled() */
+static noinline void
+__alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags)
 {
-	if (need_slab_obj_ext()) {
-		struct slabobj_ext *obj_exts;
+	struct slabobj_ext *obj_exts;
 
-		obj_exts = prepare_slab_obj_exts_hook(s, flags, object);
-		/*
-		 * Currently obj_exts is used only for allocation profiling.
-		 * If other users appear then mem_alloc_profiling_enabled()
-		 * check should be added before alloc_tag_add().
-		 */
-		if (likely(obj_exts))
-			alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
-	}
+	obj_exts = prepare_slab_obj_exts_hook(s, flags, object);
+	/*
+	 * Currently obj_exts is used only for allocation profiling.
+	 * If other users appear then mem_alloc_profiling_enabled()
+	 * check should be added before alloc_tag_add().
+	 */
+	if (likely(obj_exts))
+		alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
 }
 
 static inline void
-alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
-			     int objects)
+alloc_tagging_slab_alloc_hook(struct kmem_cache *s, void *object, gfp_t flags)
+{
+	if (need_slab_obj_ext())
+		__alloc_tagging_slab_alloc_hook(s, object, flags);
+}
+
+/* Should be called only if mem_alloc_profiling_enabled() */
+static noinline void
+__alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+			       int objects)
 {
 	struct slabobj_ext *obj_exts;
 	int i;
 
-	if (!mem_alloc_profiling_enabled())
-		return;
-
 	/* slab->obj_exts might not be NULL if it was created for MEMCG accounting. */
 	if (s->flags & (SLAB_NO_OBJ_EXT | SLAB_NOLEAKTRACE))
 		return;
@@ -2119,6 +2124,14 @@ alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
 	}
 }
 
+static inline void
+alloc_tagging_slab_free_hook(struct kmem_cache *s, struct slab *slab, void **p,
+			     int objects)
+{
+	if (mem_alloc_profiling_enabled())
+		__alloc_tagging_slab_free_hook(s, slab, p, objects);
+}
+
 #else /* CONFIG_MEM_ALLOC_PROFILING */
 
 static inline void
-- 
2.48.1.362.g079036d154-goog



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 3/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in page allocator
  2025-02-01 23:18 [PATCH v2 1/3] mm: avoid extra mem_alloc_profiling_enabled() checks Suren Baghdasaryan
  2025-02-01 23:18 ` [PATCH v2 2/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator Suren Baghdasaryan
@ 2025-02-01 23:18 ` Suren Baghdasaryan
  2025-02-04  0:46   ` Shakeel Butt
  2025-02-03 20:36 ` [PATCH v2 1/3] mm: avoid extra mem_alloc_profiling_enabled() checks Shakeel Butt
  2 siblings, 1 reply; 8+ messages in thread
From: Suren Baghdasaryan @ 2025-02-01 23:18 UTC (permalink / raw)
  To: akpm
  Cc: kent.overstreet, vbabka, rostedt, peterz, yuzhao, minchan,
	shakeel.butt, souravpanda, pasha.tatashin, 00107082,
	quic_zhenhuah, surenb, linux-mm, linux-kernel

When a sizable code section is protected by a disabled static key, that
code gets into the instruction cache even though it's not executed and
consumes the cache, increasing cache misses. This can be remedied by
moving such code into a separate uninlined function.
On a Pixel6 phone, page allocation profiling overhead measured with
CONFIG_MEM_ALLOC_PROFILING=y and profiling disabled is:

             baseline             modified
Big core     4.93%                1.53%
Medium core  4.39%                1.41%
Little core  1.02%                0.36%

This improvement comes at the expense of the configuration when profiling
gets enabled, since there is now an additional function call. The overhead
from this additional call on Pixel6 is:

Big core     0.24%
Middle core  0.63%
Little core  1.1%

However this is negligible when compared with the overall overhead of the
memory allocation profiling when it is enabled.
On x86 this patch does not make noticeable difference because the overhead
with mem_alloc_profiling_key disabled is much lower (under 1%) to start
with, so any improvement is less visible and hard to distinguish from the
noise. The overhead from additional call when profiling is enabled is also
within noise levels.

Signed-off-by: Suren Baghdasaryan <surenb@google.com>
---
Changes since v1 [1]:
- Removed inline_if_mem_alloc_prof and uninlined the gated code
unconditionally, per Steven Rostedt and Vlastimil Babka
- Updated changelog to include overhead when profiling is enabled

[1] https://lore.kernel.org/all/20250126070206.381302-3-surenb@google.com/

 include/linux/pgalloc_tag.h | 60 +++-------------------------
 mm/page_alloc.c             | 78 +++++++++++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 55 deletions(-)

diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h
index 4a82b6b4820e..c74077977830 100644
--- a/include/linux/pgalloc_tag.h
+++ b/include/linux/pgalloc_tag.h
@@ -162,47 +162,13 @@ static inline void update_page_tag_ref(union pgtag_ref_handle handle, union code
 	}
 }
 
-static inline void clear_page_tag_ref(struct page *page)
-{
-	if (mem_alloc_profiling_enabled()) {
-		union pgtag_ref_handle handle;
-		union codetag_ref ref;
-
-		if (get_page_tag_ref(page, &ref, &handle)) {
-			set_codetag_empty(&ref);
-			update_page_tag_ref(handle, &ref);
-			put_page_tag_ref(handle);
-		}
-	}
-}
-
-static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
-				   unsigned int nr)
-{
-	if (mem_alloc_profiling_enabled()) {
-		union pgtag_ref_handle handle;
-		union codetag_ref ref;
-
-		if (get_page_tag_ref(page, &ref, &handle)) {
-			alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr);
-			update_page_tag_ref(handle, &ref);
-			put_page_tag_ref(handle);
-		}
-	}
-}
+/* Should be called only if mem_alloc_profiling_enabled() */
+void __clear_page_tag_ref(struct page *page);
 
-static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
+static inline void clear_page_tag_ref(struct page *page)
 {
-	if (mem_alloc_profiling_enabled()) {
-		union pgtag_ref_handle handle;
-		union codetag_ref ref;
-
-		if (get_page_tag_ref(page, &ref, &handle)) {
-			alloc_tag_sub(&ref, PAGE_SIZE * nr);
-			update_page_tag_ref(handle, &ref);
-			put_page_tag_ref(handle);
-		}
-	}
+	if (mem_alloc_profiling_enabled())
+		__clear_page_tag_ref(page);
 }
 
 /* Should be called only if mem_alloc_profiling_enabled() */
@@ -222,18 +188,6 @@ static inline struct alloc_tag *__pgalloc_tag_get(struct page *page)
 	return tag;
 }
 
-static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr)
-{
-	struct alloc_tag *tag;
-
-	if (!mem_alloc_profiling_enabled())
-		return;
-
-	tag = __pgalloc_tag_get(page);
-	if (tag)
-		this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
-}
-
 void pgalloc_tag_split(struct folio *folio, int old_order, int new_order);
 void pgalloc_tag_swap(struct folio *new, struct folio *old);
 
@@ -242,10 +196,6 @@ void __init alloc_tag_sec_init(void);
 #else /* CONFIG_MEM_ALLOC_PROFILING */
 
 static inline void clear_page_tag_ref(struct page *page) {}
-static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
-				   unsigned int nr) {}
-static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
-static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {}
 static inline void alloc_tag_sec_init(void) {}
 static inline void pgalloc_tag_split(struct folio *folio, int old_order, int new_order) {}
 static inline void pgalloc_tag_swap(struct folio *new, struct folio *old) {}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b7e3b45183ed..16dfcf7ade74 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1041,6 +1041,84 @@ static void kernel_init_pages(struct page *page, int numpages)
 	kasan_enable_current();
 }
 
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+
+/* Should be called only if mem_alloc_profiling_enabled() */
+void __clear_page_tag_ref(struct page *page)
+{
+	union pgtag_ref_handle handle;
+	union codetag_ref ref;
+
+	if (get_page_tag_ref(page, &ref, &handle)) {
+		set_codetag_empty(&ref);
+		update_page_tag_ref(handle, &ref);
+		put_page_tag_ref(handle);
+	}
+}
+
+/* Should be called only if mem_alloc_profiling_enabled() */
+static noinline
+void __pgalloc_tag_add(struct page *page, struct task_struct *task,
+		       unsigned int nr)
+{
+	union pgtag_ref_handle handle;
+	union codetag_ref ref;
+
+	if (get_page_tag_ref(page, &ref, &handle)) {
+		alloc_tag_add(&ref, task->alloc_tag, PAGE_SIZE * nr);
+		update_page_tag_ref(handle, &ref);
+		put_page_tag_ref(handle);
+	}
+}
+
+static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+				   unsigned int nr)
+{
+	if (mem_alloc_profiling_enabled())
+		__pgalloc_tag_add(page, task, nr);
+}
+
+/* Should be called only if mem_alloc_profiling_enabled() */
+static noinline
+void __pgalloc_tag_sub(struct page *page, unsigned int nr)
+{
+	union pgtag_ref_handle handle;
+	union codetag_ref ref;
+
+	if (get_page_tag_ref(page, &ref, &handle)) {
+		alloc_tag_sub(&ref, PAGE_SIZE * nr);
+		update_page_tag_ref(handle, &ref);
+		put_page_tag_ref(handle);
+	}
+}
+
+static inline void pgalloc_tag_sub(struct page *page, unsigned int nr)
+{
+	if (mem_alloc_profiling_enabled())
+		__pgalloc_tag_sub(page, nr);
+}
+
+static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr)
+{
+	struct alloc_tag *tag;
+
+	if (!mem_alloc_profiling_enabled())
+		return;
+
+	tag = __pgalloc_tag_get(page);
+	if (tag)
+		this_cpu_sub(tag->counters->bytes, PAGE_SIZE * nr);
+}
+
+#else /* CONFIG_MEM_ALLOC_PROFILING */
+
+static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
+				   unsigned int nr) {}
+static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
+static inline void pgalloc_tag_sub_pages(struct page *page, unsigned int nr) {}
+
+#endif /* CONFIG_MEM_ALLOC_PROFILING */
+
 __always_inline bool free_pages_prepare(struct page *page,
 			unsigned int order)
 {
-- 
2.48.1.362.g079036d154-goog



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator
  2025-02-01 23:18 ` [PATCH v2 2/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator Suren Baghdasaryan
@ 2025-02-03  8:23   ` Vlastimil Babka
  2025-02-04  0:32   ` Shakeel Butt
  1 sibling, 0 replies; 8+ messages in thread
From: Vlastimil Babka @ 2025-02-03  8:23 UTC (permalink / raw)
  To: Suren Baghdasaryan, akpm
  Cc: kent.overstreet, rostedt, peterz, yuzhao, minchan, shakeel.butt,
	souravpanda, pasha.tatashin, 00107082, quic_zhenhuah, linux-mm,
	linux-kernel

On 2/2/25 00:18, Suren Baghdasaryan wrote:
> When a sizable code section is protected by a disabled static key, that
> code gets into the instruction cache even though it's not executed and
> consumes the cache, increasing cache misses. This can be remedied by
> moving such code into a separate uninlined function.
> On a Pixel6 phone, slab allocation profiling overhead measured with
> CONFIG_MEM_ALLOC_PROFILING=y and profiling disabled is:
> 
>              baseline             modified
> Big core     3.31%                0.17%
> Medium core  3.79%                0.57%
> Little core  6.68%                1.28%
> 
> This improvement comes at the expense of the configuration when profiling
> gets enabled, since there is now an additional function call. The overhead
> from this additional call on Pixel6 is:
> 
> Big core     0.66%
> Middle core  1.23%
> Little core  2.42%
> 
> However this is negligible when compared with the overall overhead of the
> memory allocation profiling when it is enabled.
> On x86 this patch does not make noticeable difference because the overhead
> with mem_alloc_profiling_key disabled is much lower (under 1%) to start
> with, so any improvement is less visible and hard to distinguish from the
> noise. The overhead from additional call when profiling is enabled is also
> within noise levels.
> 
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

Acked-by: Vlastimil Babka <vbabka@suse.cz>



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 1/3] mm: avoid extra mem_alloc_profiling_enabled() checks
  2025-02-01 23:18 [PATCH v2 1/3] mm: avoid extra mem_alloc_profiling_enabled() checks Suren Baghdasaryan
  2025-02-01 23:18 ` [PATCH v2 2/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator Suren Baghdasaryan
  2025-02-01 23:18 ` [PATCH v2 3/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in page allocator Suren Baghdasaryan
@ 2025-02-03 20:36 ` Shakeel Butt
  2 siblings, 0 replies; 8+ messages in thread
From: Shakeel Butt @ 2025-02-03 20:36 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: akpm, kent.overstreet, vbabka, rostedt, peterz, yuzhao, minchan,
	souravpanda, pasha.tatashin, 00107082, quic_zhenhuah, linux-mm,
	linux-kernel

On Sat, Feb 01, 2025 at 03:18:00PM -0800, Suren Baghdasaryan wrote:
> Refactor code to avoid extra mem_alloc_profiling_enabled() checks inside
> pgalloc_tag_get() function which is often called after that check was
> already done.
> 
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 2/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator
  2025-02-01 23:18 ` [PATCH v2 2/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator Suren Baghdasaryan
  2025-02-03  8:23   ` Vlastimil Babka
@ 2025-02-04  0:32   ` Shakeel Butt
  1 sibling, 0 replies; 8+ messages in thread
From: Shakeel Butt @ 2025-02-04  0:32 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: akpm, kent.overstreet, vbabka, rostedt, peterz, yuzhao, minchan,
	souravpanda, pasha.tatashin, 00107082, quic_zhenhuah, linux-mm,
	linux-kernel

On Sat, Feb 01, 2025 at 03:18:01PM -0800, Suren Baghdasaryan wrote:
> When a sizable code section is protected by a disabled static key, that
> code gets into the instruction cache even though it's not executed and
> consumes the cache, increasing cache misses. This can be remedied by
> moving such code into a separate uninlined function.
> On a Pixel6 phone, slab allocation profiling overhead measured with
> CONFIG_MEM_ALLOC_PROFILING=y and profiling disabled is:
> 
>              baseline             modified
> Big core     3.31%                0.17%
> Medium core  3.79%                0.57%
> Little core  6.68%                1.28%
> 
> This improvement comes at the expense of the configuration when profiling
> gets enabled, since there is now an additional function call. The overhead
> from this additional call on Pixel6 is:
> 
> Big core     0.66%
> Middle core  1.23%
> Little core  2.42%
> 
> However this is negligible when compared with the overall overhead of the
> memory allocation profiling when it is enabled.
> On x86 this patch does not make noticeable difference because the overhead
> with mem_alloc_profiling_key disabled is much lower (under 1%) to start
> with, so any improvement is less visible and hard to distinguish from the
> noise. The overhead from additional call when profiling is enabled is also
> within noise levels.
> 
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 3/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in page allocator
  2025-02-01 23:18 ` [PATCH v2 3/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in page allocator Suren Baghdasaryan
@ 2025-02-04  0:46   ` Shakeel Butt
  2025-02-04 18:14     ` Suren Baghdasaryan
  0 siblings, 1 reply; 8+ messages in thread
From: Shakeel Butt @ 2025-02-04  0:46 UTC (permalink / raw)
  To: Suren Baghdasaryan
  Cc: akpm, kent.overstreet, vbabka, rostedt, peterz, yuzhao, minchan,
	souravpanda, pasha.tatashin, 00107082, quic_zhenhuah, linux-mm,
	linux-kernel

On Sat, Feb 01, 2025 at 03:18:02PM -0800, Suren Baghdasaryan wrote:
> When a sizable code section is protected by a disabled static key, that
> code gets into the instruction cache even though it's not executed and
> consumes the cache, increasing cache misses. This can be remedied by
> moving such code into a separate uninlined function.
> On a Pixel6 phone, page allocation profiling overhead measured with
> CONFIG_MEM_ALLOC_PROFILING=y and profiling disabled is:
> 
>              baseline             modified
> Big core     4.93%                1.53%
> Medium core  4.39%                1.41%
> Little core  1.02%                0.36%
> 
> This improvement comes at the expense of the configuration when profiling
> gets enabled, since there is now an additional function call. The overhead
> from this additional call on Pixel6 is:
> 
> Big core     0.24%
> Middle core  0.63%
> Little core  1.1%
> 
> However this is negligible when compared with the overall overhead of the
> memory allocation profiling when it is enabled.
> On x86 this patch does not make noticeable difference because the overhead
> with mem_alloc_profiling_key disabled is much lower (under 1%) to start
> with, so any improvement is less visible and hard to distinguish from the
> noise. The overhead from additional call when profiling is enabled is also
> within noise levels.
> 
> Signed-off-by: Suren Baghdasaryan <surenb@google.com>

Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>

One question: Is there any plan to enable MEM_ALLOC_PROFILING by default
in future?



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 3/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in page allocator
  2025-02-04  0:46   ` Shakeel Butt
@ 2025-02-04 18:14     ` Suren Baghdasaryan
  0 siblings, 0 replies; 8+ messages in thread
From: Suren Baghdasaryan @ 2025-02-04 18:14 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: akpm, kent.overstreet, vbabka, rostedt, peterz, yuzhao, minchan,
	souravpanda, pasha.tatashin, 00107082, quic_zhenhuah, linux-mm,
	linux-kernel

On Mon, Feb 3, 2025 at 4:47 PM Shakeel Butt <shakeel.butt@linux.dev> wrote:
>
> On Sat, Feb 01, 2025 at 03:18:02PM -0800, Suren Baghdasaryan wrote:
> > When a sizable code section is protected by a disabled static key, that
> > code gets into the instruction cache even though it's not executed and
> > consumes the cache, increasing cache misses. This can be remedied by
> > moving such code into a separate uninlined function.
> > On a Pixel6 phone, page allocation profiling overhead measured with
> > CONFIG_MEM_ALLOC_PROFILING=y and profiling disabled is:
> >
> >              baseline             modified
> > Big core     4.93%                1.53%
> > Medium core  4.39%                1.41%
> > Little core  1.02%                0.36%
> >
> > This improvement comes at the expense of the configuration when profiling
> > gets enabled, since there is now an additional function call. The overhead
> > from this additional call on Pixel6 is:
> >
> > Big core     0.24%
> > Middle core  0.63%
> > Little core  1.1%
> >
> > However this is negligible when compared with the overall overhead of the
> > memory allocation profiling when it is enabled.
> > On x86 this patch does not make noticeable difference because the overhead
> > with mem_alloc_profiling_key disabled is much lower (under 1%) to start
> > with, so any improvement is less visible and hard to distinguish from the
> > noise. The overhead from additional call when profiling is enabled is also
> > within noise levels.
> >
> > Signed-off-by: Suren Baghdasaryan <surenb@google.com>
>
> Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
>
> One question: Is there any plan to enable MEM_ALLOC_PROFILING by default
> in future?

It's left up to each distribution. In Android Common Kernel we are
enabling it with CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT=n and
allow vendors to enable it using kernel command line parameters.

>


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-02-04 18:14 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-02-01 23:18 [PATCH v2 1/3] mm: avoid extra mem_alloc_profiling_enabled() checks Suren Baghdasaryan
2025-02-01 23:18 ` [PATCH v2 2/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in slab allocator Suren Baghdasaryan
2025-02-03  8:23   ` Vlastimil Babka
2025-02-04  0:32   ` Shakeel Butt
2025-02-01 23:18 ` [PATCH v2 3/3] alloc_tag: uninline code gated by mem_alloc_profiling_key in page allocator Suren Baghdasaryan
2025-02-04  0:46   ` Shakeel Butt
2025-02-04 18:14     ` Suren Baghdasaryan
2025-02-03 20:36 ` [PATCH v2 1/3] mm: avoid extra mem_alloc_profiling_enabled() checks Shakeel Butt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox