From: Hao Ge <hao.ge@linux.dev>
To: Suren Baghdasaryan <surenb@google.com>,
Kent Overstreet <kent.overstreet@linux.dev>,
Andrew Morton <akpm@linux-foundation.org>
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
Hao Ge <hao.ge@linux.dev>
Subject: [PATCH v2] mm/alloc_tag: replace fixed-size early PFN array with dynamic linked list
Date: Tue, 21 Apr 2026 11:14:06 +0800 [thread overview]
Message-ID: <20260421031406.1189000-1-hao.ge@linux.dev> (raw)
Pages allocated before page_ext is available have their codetag left
uninitialized. Track these early PFNs and clear their codetag in
clear_early_alloc_pfn_tag_refs() to avoid "alloc_tag was not set"
warnings when they are freed later.
Currently a fixed-size array of 8192 entries is used, with a warning if
the limit is exceeded. However, the number of early allocations depends
on the number of CPUs and can be larger than 8192.
Replace the fixed-size array with a dynamically allocated linked list.
Each page is carved into early_pfn_node entries and the remainder is
kept as a freelist for subsequent allocations.
The list nodes themselves are allocated via alloc_page(), which would
trigger __pgalloc_tag_add() -> alloc_tag_add_early_pfn() ->
alloc_early_pfn_node() and recurse indefinitely. Introduce
__GFP_NO_CODETAG (reuses the %__GFP_NO_OBJ_EXT bit) and pass
gfp_flags through pgalloc_tag_add() so that the early path can skip
recording allocations that carry this flag.
Signed-off-by: Hao Ge <hao.ge@linux.dev>
---
v2:
- Use cmpxchg to atomically update early_pfn_pages, preventing page leak under concurrent allocation
- Pass gfp_flags through the full call chain and use gfpflags_allow_blocking()
to select GFP_KERNEL vs GFP_ATOMIC, avoiding unnecessary GFP_ATOMIC in process context
---
include/linux/alloc_tag.h | 22 +++++++-
lib/alloc_tag.c | 102 ++++++++++++++++++++++++++------------
mm/page_alloc.c | 29 +++++++----
3 files changed, 108 insertions(+), 45 deletions(-)
diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h
index 02de2ede560f..2fa695bd3c53 100644
--- a/include/linux/alloc_tag.h
+++ b/include/linux/alloc_tag.h
@@ -150,6 +150,23 @@ static inline struct alloc_tag_counters alloc_tag_read(struct alloc_tag *tag)
}
#ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG
+/*
+ * Skip early PFN recording for a page allocation. Reuses the
+ * %__GFP_NO_OBJ_EXT bit. Used by alloc_early_pfn_node() to avoid
+ * recursion when allocating pages for the early PFN tracking list
+ * itself.
+ *
+ * Callers must set the codetag to CODETAG_EMPTY (via
+ * clear_page_tag_ref()) before freeing pages allocated with this
+ * flag once page_ext becomes available, otherwise
+ * alloc_tag_sub_check() will trigger a warning.
+ */
+#define __GFP_NO_CODETAG __GFP_NO_OBJ_EXT
+
+static inline bool should_record_early_pfn(gfp_t gfp_flags)
+{
+ return !(gfp_flags & __GFP_NO_CODETAG);
+}
static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag)
{
WARN_ONCE(ref && ref->ct && !is_codetag_empty(ref),
@@ -163,11 +180,12 @@ static inline void alloc_tag_sub_check(union codetag_ref *ref)
{
WARN_ONCE(ref && !ref->ct, "alloc_tag was not set\n");
}
-void alloc_tag_add_early_pfn(unsigned long pfn);
+void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags);
#else
static inline void alloc_tag_add_check(union codetag_ref *ref, struct alloc_tag *tag) {}
static inline void alloc_tag_sub_check(union codetag_ref *ref) {}
-static inline void alloc_tag_add_early_pfn(unsigned long pfn) {}
+static inline void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags) {}
+static inline bool should_record_early_pfn(gfp_t gfp_flags) { return true; }
#endif
/* Caller should verify both ref and tag to be valid */
diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c
index ed1bdcf1f8ab..cfc68e397eba 100644
--- a/lib/alloc_tag.c
+++ b/lib/alloc_tag.c
@@ -766,45 +766,75 @@ static __init bool need_page_alloc_tagging(void)
* Some pages are allocated before page_ext becomes available, leaving
* their codetag uninitialized. Track these early PFNs so we can clear
* their codetag refs later to avoid warnings when they are freed.
- *
- * Early allocations include:
- * - Base allocations independent of CPU count
- * - Per-CPU allocations (e.g., CPU hotplug callbacks during smp_init,
- * such as trace ring buffers, scheduler per-cpu data)
- *
- * For simplicity, we fix the size to 8192.
- * If insufficient, a warning will be triggered to alert the user.
- *
- * TODO: Replace fixed-size array with dynamic allocation using
- * a GFP flag similar to ___GFP_NO_OBJ_EXT to avoid recursion.
*/
-#define EARLY_ALLOC_PFN_MAX 8192
+struct early_pfn_node {
+ struct early_pfn_node *next;
+ unsigned long pfn;
+};
+
+#define NODES_PER_PAGE (PAGE_SIZE / sizeof(struct early_pfn_node))
-static unsigned long early_pfns[EARLY_ALLOC_PFN_MAX] __initdata;
-static atomic_t early_pfn_count __initdata = ATOMIC_INIT(0);
+static struct early_pfn_node *early_pfn_list __initdata;
+static struct early_pfn_node *early_pfn_freelist __initdata;
+static struct page *early_pfn_pages __initdata;
-static void __init __alloc_tag_add_early_pfn(unsigned long pfn)
+static struct early_pfn_node *__init alloc_early_pfn_node(gfp_t gfp_flags)
{
- int old_idx, new_idx;
+ struct early_pfn_node *ep, *new;
+ struct page *page, *old_page;
+ gfp_t gfp = gfpflags_allow_blocking(gfp_flags) ? GFP_KERNEL : GFP_ATOMIC;
+ int i;
+
+retry:
+ ep = READ_ONCE(early_pfn_freelist);
+ if (ep) {
+ struct early_pfn_node *next = READ_ONCE(ep->next);
+
+ if (try_cmpxchg(&early_pfn_freelist, &ep, next))
+ return ep;
+ goto retry;
+ }
+
+ page = alloc_page(gfp | __GFP_NO_CODETAG | __GFP_ZERO);
+ if (!page)
+ return NULL;
+
+ new = page_address(page);
+ for (i = 0; i < NODES_PER_PAGE - 1; i++)
+ new[i].next = &new[i + 1];
+ new[NODES_PER_PAGE - 1].next = NULL;
+
+ if (cmpxchg(&early_pfn_freelist, NULL, new + 1)) {
+ __free_page(page);
+ goto retry;
+ }
do {
- old_idx = atomic_read(&early_pfn_count);
- if (old_idx >= EARLY_ALLOC_PFN_MAX) {
- pr_warn_once("Early page allocations before page_ext init exceeded EARLY_ALLOC_PFN_MAX (%d)\n",
- EARLY_ALLOC_PFN_MAX);
- return;
- }
- new_idx = old_idx + 1;
- } while (!atomic_try_cmpxchg(&early_pfn_count, &old_idx, new_idx));
+ old_page = READ_ONCE(early_pfn_pages);
+ page->private = (unsigned long)old_page;
+ } while (cmpxchg(&early_pfn_pages, old_page, page) != old_page);
+
+ return new;
+}
+
+static void __init __alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
+{
+ struct early_pfn_node *ep = alloc_early_pfn_node(gfp_flags);
- early_pfns[old_idx] = pfn;
+ if (!ep)
+ return;
+
+ ep->pfn = pfn;
+ do {
+ ep->next = READ_ONCE(early_pfn_list);
+ } while (!try_cmpxchg(&early_pfn_list, &ep->next, ep));
}
-typedef void alloc_tag_add_func(unsigned long pfn);
+typedef void alloc_tag_add_func(unsigned long pfn, gfp_t gfp_flags);
static alloc_tag_add_func __rcu *alloc_tag_add_early_pfn_ptr __refdata =
RCU_INITIALIZER(__alloc_tag_add_early_pfn);
-void alloc_tag_add_early_pfn(unsigned long pfn)
+void alloc_tag_add_early_pfn(unsigned long pfn, gfp_t gfp_flags)
{
alloc_tag_add_func *alloc_tag_add;
@@ -814,13 +844,14 @@ void alloc_tag_add_early_pfn(unsigned long pfn)
rcu_read_lock();
alloc_tag_add = rcu_dereference(alloc_tag_add_early_pfn_ptr);
if (alloc_tag_add)
- alloc_tag_add(pfn);
+ alloc_tag_add(pfn, gfp_flags);
rcu_read_unlock();
}
static void __init clear_early_alloc_pfn_tag_refs(void)
{
- unsigned int i;
+ struct early_pfn_node *ep;
+ struct page *page, *next;
if (static_key_enabled(&mem_profiling_compressed))
return;
@@ -829,14 +860,13 @@ static void __init clear_early_alloc_pfn_tag_refs(void)
/* Make sure we are not racing with __alloc_tag_add_early_pfn() */
synchronize_rcu();
- for (i = 0; i < atomic_read(&early_pfn_count); i++) {
- unsigned long pfn = early_pfns[i];
+ for (ep = early_pfn_list; ep; ep = ep->next) {
- if (pfn_valid(pfn)) {
- struct page *page = pfn_to_page(pfn);
+ if (pfn_valid(ep->pfn)) {
union pgtag_ref_handle handle;
union codetag_ref ref;
+ page = pfn_to_page(ep->pfn);
if (get_page_tag_ref(page, &ref, &handle)) {
/*
* An early-allocated page could be freed and reallocated
@@ -861,6 +891,12 @@ static void __init clear_early_alloc_pfn_tag_refs(void)
}
}
+
+ for (page = early_pfn_pages; page; page = next) {
+ next = (struct page *)page->private;
+ clear_page_tag_ref(page);
+ __free_page(page);
+ }
}
#else /* !CONFIG_MEM_ALLOC_PROFILING_DEBUG */
static inline void __init clear_early_alloc_pfn_tag_refs(void) {}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 04494bc2e46f..4e2bfb3714e1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1284,7 +1284,7 @@ void __clear_page_tag_ref(struct page *page)
/* Should be called only if mem_alloc_profiling_enabled() */
static noinline
void __pgalloc_tag_add(struct page *page, struct task_struct *task,
- unsigned int nr)
+ unsigned int nr, gfp_t gfp_flags)
{
union pgtag_ref_handle handle;
union codetag_ref ref;
@@ -1294,21 +1294,30 @@ void __pgalloc_tag_add(struct page *page, struct task_struct *task,
update_page_tag_ref(handle, &ref);
put_page_tag_ref(handle);
} else {
- /*
- * page_ext is not available yet, record the pfn so we can
- * clear the tag ref later when page_ext is initialized.
- */
- alloc_tag_add_early_pfn(page_to_pfn(page));
+
if (task->alloc_tag)
alloc_tag_set_inaccurate(task->alloc_tag);
+
+ /*
+ * page_ext is not available yet, skip if this allocation
+ * doesn't need early PFN recording.
+ */
+ if (unlikely(!should_record_early_pfn(gfp_flags)))
+ return;
+
+ /*
+ * Record the pfn so the tag ref can be cleared later
+ * when page_ext is initialized.
+ */
+ alloc_tag_add_early_pfn(page_to_pfn(page), gfp_flags);
}
}
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
- unsigned int nr)
+ unsigned int nr, gfp_t gfp_flags)
{
if (mem_alloc_profiling_enabled())
- __pgalloc_tag_add(page, task, nr);
+ __pgalloc_tag_add(page, task, nr, gfp_flags);
}
/* Should be called only if mem_alloc_profiling_enabled() */
@@ -1341,7 +1350,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
#else /* CONFIG_MEM_ALLOC_PROFILING */
static inline void pgalloc_tag_add(struct page *page, struct task_struct *task,
- unsigned int nr) {}
+ unsigned int nr, gfp_t gfp_flags) {}
static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {}
static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) {}
@@ -1896,7 +1905,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
set_page_owner(page, order, gfp_flags);
page_table_check_alloc(page, order);
- pgalloc_tag_add(page, current, 1 << order);
+ pgalloc_tag_add(page, current, 1 << order, gfp_flags);
}
static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
--
2.25.1
reply other threads:[~2026-04-21 3:15 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260421031406.1189000-1-hao.ge@linux.dev \
--to=hao.ge@linux.dev \
--cc=akpm@linux-foundation.org \
--cc=kent.overstreet@linux.dev \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=surenb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox