From: Harry Yoo <harry.yoo@oracle.com>
To: Vlastimil Babka <vbabka@suse.cz>,
Christoph Lameter <cl@gentwo.org>,
David Rientjes <rientjes@google.com>,
Andrew Morton <akpm@linux-foundation.org>,
Dennis Zhou <dennis@kernel.org>, Tejun Heo <tj@kernel.org>,
Mateusz Guzik <mjguzik@gmail.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>,
Cong Wang <xiyou.wangcong@gmail.com>,
Jiri Pirko <jiri@resnulli.us>, Vlad Buslov <vladbu@nvidia.com>,
Yevgeny Kliteynik <kliteyn@nvidia.com>, Jan Kara <jack@suse.cz>,
Byungchul Park <byungchul@sk.com>,
linux-mm@kvack.org, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, Harry Yoo <harry.yoo@oracle.com>
Subject: [RFC PATCH 5/7] mm/percpu: allow (un)charging objects without alloc/free
Date: Thu, 24 Apr 2025 17:07:53 +0900 [thread overview]
Message-ID: <20250424080755.272925-6-harry.yoo@oracle.com> (raw)
In-Reply-To: <20250424080755.272925-1-harry.yoo@oracle.com>
With a slab ctor/dtor pair, slab objects can retain a pointer to percpu
memory that remains allocated until the slab destructor frees it.
In such cases, the charging and uncharging of percpu memory should be
invoked when slab objects are allocated and freed. Allow explicit
(un)charging of percpu memory to ensure accurate memory accounting
for the slab destructor users.
Note that these APIs only (un)charge memory only for memory cgroups.
They do not affect memory allocation profiling. Memory allocation
profiling records percpu memory only when it is actually allocated or
freed.
Signed-off-by: Harry Yoo <harry.yoo@oracle.com>
---
include/linux/percpu.h | 10 ++++++
mm/percpu.c | 79 +++++++++++++++++++++++++++++-------------
2 files changed, 64 insertions(+), 25 deletions(-)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 52b5ea663b9f..2d13ef0885d6 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -140,6 +140,16 @@ extern void __init setup_per_cpu_areas(void);
extern void __percpu *pcpu_alloc_noprof(size_t size, size_t align, bool reserved,
gfp_t gfp) __alloc_size(1);
+#ifdef CONFIG_MEMCG
+extern bool pcpu_charge(void __percpu *__pdata, size_t size, gfp_t gfp);
+extern void pcpu_uncharge(void __percpu *__pdata, size_t size);
+#else
+static inline bool pcpu_charge(void __percpu *__pdata, size_t size, gfp_t gfp)
+{
+ return true;
+}
+static inline void pcpu_uncharge(void __percpu *__pdata, size_t size) { }
+#endif
#define __alloc_percpu_gfp(_size, _align, _gfp) \
alloc_hooks(pcpu_alloc_noprof(_size, _align, false, _gfp))
#define __alloc_percpu(_size, _align) \
diff --git a/mm/percpu.c b/mm/percpu.c
index b35494c8ede2..069d8e593164 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1606,6 +1606,32 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
return pcpu_get_page_chunk(pcpu_addr_to_page(addr));
}
+#ifdef CONFIG_MEM_ALLOC_PROFILING
+static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off,
+ size_t size)
+{
+ if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts)) {
+ alloc_tag_add(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag,
+ current->alloc_tag, size);
+ }
+}
+
+static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
+{
+ if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts))
+ alloc_tag_sub(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag, size);
+}
+#else
+static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off,
+ size_t size)
+{
+}
+
+static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
+{
+}
+#endif
+
#ifdef CONFIG_MEMCG
static bool pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp,
struct obj_cgroup **objcgp)
@@ -1667,7 +1693,35 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
obj_cgroup_put(objcg);
}
+bool pcpu_charge(void *ptr, size_t size, gfp_t gfp)
+{
+ struct obj_cgroup *objcg = NULL;
+ void *addr;
+ struct pcpu_chunk *chunk;
+ int off;
+
+ addr = __pcpu_ptr_to_addr(ptr);
+ chunk = pcpu_chunk_addr_search(addr);
+ off = addr - chunk->base_addr;
+
+ if (!pcpu_memcg_pre_alloc_hook(size, gfp, &objcg))
+ return false;
+ pcpu_memcg_post_alloc_hook(objcg, chunk, off, size);
+ return true;
+}
+
+void pcpu_uncharge(void *ptr, size_t size)
+{
+ void *addr;
+ struct pcpu_chunk *chunk;
+ int off;
+
+ addr = __pcpu_ptr_to_addr(ptr);
+ chunk = pcpu_chunk_addr_search(addr);
+ off = addr - chunk->base_addr;
+ pcpu_memcg_free_hook(chunk, off, size);
+}
#else /* CONFIG_MEMCG */
static bool
pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, struct obj_cgroup **objcgp)
@@ -1686,31 +1740,6 @@ static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
}
#endif /* CONFIG_MEMCG */
-#ifdef CONFIG_MEM_ALLOC_PROFILING
-static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off,
- size_t size)
-{
- if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts)) {
- alloc_tag_add(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag,
- current->alloc_tag, size);
- }
-}
-
-static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
-{
- if (mem_alloc_profiling_enabled() && likely(chunk->obj_exts))
- alloc_tag_sub(&chunk->obj_exts[off >> PCPU_MIN_ALLOC_SHIFT].tag, size);
-}
-#else
-static void pcpu_alloc_tag_alloc_hook(struct pcpu_chunk *chunk, int off,
- size_t size)
-{
-}
-
-static void pcpu_alloc_tag_free_hook(struct pcpu_chunk *chunk, int off, size_t size)
-{
-}
-#endif
/**
* pcpu_alloc - the percpu allocator
--
2.43.0
next prev parent reply other threads:[~2025-04-24 8:08 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-24 8:07 [RFC PATCH 0/7] Reviving the slab destructor to tackle the percpu allocator scalability problem Harry Yoo
2025-04-24 8:07 ` [RFC PATCH 1/7] mm/slab: refactor freelist shuffle Harry Yoo
2025-04-24 8:07 ` [RFC PATCH 2/7] treewide, slab: allow slab constructor to return an error Harry Yoo
2025-04-24 8:07 ` [RFC PATCH 3/7] mm/slab: revive the destructor feature in slab allocator Harry Yoo
2025-04-24 8:07 ` [RFC PATCH 4/7] net/sched/act_api: use slab ctor/dtor to reduce contention on pcpu alloc Harry Yoo
2025-04-24 8:07 ` Harry Yoo [this message]
2025-04-24 8:07 ` [RFC PATCH 6/7] lib/percpu_counter: allow (un)charging percpu counters without alloc/free Harry Yoo
2025-04-24 8:07 ` [RFC PATCH 7/7] kernel/fork: improve exec() throughput with slab ctor/dtor pair Harry Yoo
2025-04-24 9:29 ` [RFC PATCH 0/7] Reviving the slab destructor to tackle the percpu allocator scalability problem Mateusz Guzik
2025-04-24 9:58 ` Harry Yoo
2025-04-24 15:00 ` Mateusz Guzik
2025-04-24 11:28 ` Pedro Falcato
2025-04-24 15:20 ` Mateusz Guzik
2025-04-24 16:11 ` Mateusz Guzik
2025-04-25 7:40 ` Harry Yoo
2025-04-25 10:12 ` Harry Yoo
2025-04-25 10:42 ` Pedro Falcato
2025-04-28 1:18 ` Harry Yoo
2025-04-30 19:49 ` Mateusz Guzik
2025-05-12 11:00 ` Harry Yoo
2025-04-24 15:50 ` Christoph Lameter (Ampere)
2025-04-24 16:03 ` Mateusz Guzik
2025-04-24 16:39 ` Christoph Lameter (Ampere)
2025-04-24 17:26 ` Mateusz Guzik
2025-04-24 18:47 ` Tejun Heo
2025-04-25 10:10 ` Harry Yoo
2025-04-25 19:03 ` Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250424080755.272925-6-harry.yoo@oracle.com \
--to=harry.yoo@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=byungchul@sk.com \
--cc=cl@gentwo.org \
--cc=dennis@kernel.org \
--cc=jack@suse.cz \
--cc=jhs@mojatatu.com \
--cc=jiri@resnulli.us \
--cc=kliteyn@nvidia.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mjguzik@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=rientjes@google.com \
--cc=tj@kernel.org \
--cc=vbabka@suse.cz \
--cc=vladbu@nvidia.com \
--cc=xiyou.wangcong@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox