linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Yeoreum Yun <yeoreum.yun@arm.com>
To: akpm@linux-foundation.org, david@kernel.org,
	lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com,
	vbabka@suse.cz, rppt@kernel.org, surenb@google.com,
	mhocko@suse.com, ast@kernel.org, daniel@iogearbox.net,
	andrii@kernel.org, martin.lau@linux.dev, eddyz87@gmail.com,
	song@kernel.org, yonghong.song@linux.dev,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@fomichev.me,
	haoluo@google.com, jolsa@kernel.org, jackmanb@google.com,
	hannes@cmpxchg.org, ziy@nvidia.com, bigeasy@linutronix.de,
	clrkwllms@kernel.org, rostedt@goodmis.org,
	catalin.marinas@arm.com, will@kernel.org, ryan.roberts@arm.com,
	kevin.brodsky@arm.com, dev.jain@arm.com,
	yang@os.amperecomputing.com
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	bpf@vger.kernel.org, linux-rt-devel@lists.linux.dev,
	linux-arm-kernel@lists.infradead.org,
	Yeoreum Yun <yeoreum.yun@arm.com>
Subject: [PATCH 1/2] mm: introduce pagetable_alloc_nolock()
Date: Fri, 12 Dec 2025 16:18:31 +0000	[thread overview]
Message-ID: <20251212161832.2067134-2-yeoreum.yun@arm.com> (raw)
In-Reply-To: <20251212161832.2067134-1-yeoreum.yun@arm.com>

Some architectures invoke pagetable_alloc() with preemption disabled
(e.g., arm64’s linear_map_split_to_ptes()).

Under PREEMPT_RT, calling pagetable_alloc() with
preemption disabled is not allowed, because it may acquire
a spin lock that becomes sleepable on RT, potentially
causing a sleep during page allocation.

To address this, introduce a pagetable_alloc_nolock() API and
permit two additional GFP flags for alloc_pages_nolock() — __GFP_HIGH and __GFP_ZERO.

Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
---
 include/linux/mm.h   | 18 ++++++++++++++++++
 kernel/bpf/stream.c  |  2 +-
 kernel/bpf/syscall.c |  2 +-
 mm/page_alloc.c      | 10 +++-------
 4 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7c79b3369b82..11a27f60838b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2990,6 +2990,24 @@ static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int orde
 }
 #define pagetable_alloc(...)	alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__))
 
+/**
+ * pagetable_alloc_nolock - opportunistic reetentrant pagetables allocation
+ *                          from any context
+ * @gfp:    GFP flags. Only __GFP_ZERO, __GFP_HIGH, __GFP_ACCOUNT allowed.
+ * @order:  desired pagetable order
+ *
+ * opportunistic reetentrant version of pagetable_alloc().
+ *
+ * Return: The ptdesc describing the allocated page tables.
+ */
+static inline struct ptdesc *pagetable_alloc_nolock_noprof(gfp_t gfp, unsigned int order)
+{
+	struct page *page = alloc_pages_nolock_noprof(gfp, NUMA_NO_NODE, order);
+
+	return page_ptdesc(page);
+}
+#define pagetable_alloc_nolock(...)	alloc_hooks(pagetable_alloc_nolock_noprof(__VA_ARGS__))
+
 /**
  * pagetable_free - Free pagetables
  * @pt:	The page table descriptor
diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c
index ff16c631951b..3c80c8007d91 100644
--- a/kernel/bpf/stream.c
+++ b/kernel/bpf/stream.c
@@ -83,7 +83,7 @@ static struct bpf_stream_page *bpf_stream_page_replace(void)
 	struct bpf_stream_page *stream_page, *old_stream_page;
 	struct page *page;
 
-	page = alloc_pages_nolock(/* Don't account */ 0, NUMA_NO_NODE, 0);
+	page = alloc_pages_nolock(/* Don't account */ __GFP_ZERO, NUMA_NO_NODE, 0);
 	if (!page)
 		return NULL;
 	stream_page = page_address(page);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 8a129746bd6c..cbc0f8d0c18b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -598,7 +598,7 @@ static bool can_alloc_pages(void)
 static struct page *__bpf_alloc_page(int nid)
 {
 	if (!can_alloc_pages())
-		return alloc_pages_nolock(__GFP_ACCOUNT, nid, 0);
+		return alloc_pages_nolock(__GFP_ZERO | __GFP_ACCOUNT, nid, 0);
 
 	return alloc_pages_node(nid,
 				GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ed82ee55e66a..88a920dc1e9a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7542,21 +7542,17 @@ struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned
 	 * various contexts. We cannot use printk_deferred_enter() to mitigate,
 	 * since the running context is unknown.
 	 *
-	 * Specify __GFP_ZERO to make sure that call to kmsan_alloc_page() below
-	 * is safe in any context. Also zeroing the page is mandatory for
-	 * BPF use cases.
-	 *
 	 * Though __GFP_NOMEMALLOC is not checked in the code path below,
 	 * specify it here to highlight that alloc_pages_nolock()
 	 * doesn't want to deplete reserves.
 	 */
-	gfp_t alloc_gfp = __GFP_NOWARN | __GFP_ZERO | __GFP_NOMEMALLOC | __GFP_COMP
+	gfp_t alloc_gfp = __GFP_NOWARN | __GFP_NOMEMALLOC | __GFP_COMP
 			| gfp_flags;
 	unsigned int alloc_flags = ALLOC_TRYLOCK;
 	struct alloc_context ac = { };
 	struct page *page;
 
-	VM_WARN_ON_ONCE(gfp_flags & ~__GFP_ACCOUNT);
+	VM_WARN_ON_ONCE(gfp_flags & ~(__GFP_HIGH | __GFP_ZERO | __GFP_ACCOUNT));
 	/*
 	 * In PREEMPT_RT spin_trylock() will call raw_spin_lock() which is
 	 * unsafe in NMI. If spin_trylock() is called from hard IRQ the current
@@ -7602,7 +7598,7 @@ struct page *alloc_frozen_pages_nolock_noprof(gfp_t gfp_flags, int nid, unsigned
 }
 /**
  * alloc_pages_nolock - opportunistic reentrant allocation from any context
- * @gfp_flags: GFP flags. Only __GFP_ACCOUNT allowed.
+ * @gfp_flags: GFP flags. Only __GFP_ZERO, __GFP_HIGH, __GFP_ACCOUNT allowed.
  * @nid: node to allocate from
  * @order: allocation order size
  *
-- 
LEVI:{C3F47F37-75D8-414A-A8BA-3980EC8A46D7}



  reply	other threads:[~2025-12-12 16:18 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-12 16:18 [PATCH 0/2] " Yeoreum Yun
2025-12-12 16:18 ` Yeoreum Yun [this message]
2025-12-12 16:18 ` [PATCH 2/2] arm64: mmu: use pagetable_alloc_nolock() while stop_machine() Yeoreum Yun
2025-12-13  7:05   ` Brendan Jackman
2025-12-14  9:13     ` Yeoreum Yun
2025-12-15  9:22       ` Brendan Jackman
2025-12-15  9:34         ` Yeoreum Yun
2025-12-15  9:55           ` Brendan Jackman
2025-12-15 10:06             ` Yeoreum Yun
2025-12-16 10:10               ` Brendan Jackman
2025-12-16 11:03                 ` Yeoreum Yun
2025-12-16 11:26                   ` Brendan Jackman
2025-12-16 12:01                     ` Yeoreum Yun
2025-12-16 12:39                       ` Brendan Jackman
2025-12-16 13:25                         ` Yeoreum Yun
2025-12-18  9:30   ` Michal Hocko
2025-12-18  9:36     ` Yeoreum Yun
2025-12-18 12:02       ` Ryan Roberts
2025-12-18 12:17         ` Michal Hocko
2025-12-18 12:24           ` Yeoreum Yun
2025-12-16 15:11 ` [PATCH 0/2] introduce pagetable_alloc_nolock() Ryan Roberts
2025-12-16 16:52   ` Yeoreum Yun
2025-12-17  9:34     ` Ryan Roberts
2025-12-17 10:48       ` Yeoreum Yun
2025-12-17 12:04         ` Ryan Roberts
2025-12-17 12:52           ` Yeoreum Yun
2025-12-17 13:15             ` Vlastimil Babka
2025-12-17 13:35               ` Brendan Jackman
2025-12-17 13:56                 ` Yeoreum Yun
2025-12-17 15:10                 ` Vlastimil Babka
2025-12-17 17:19                   ` Brendan Jackman
2025-12-18  7:47                     ` Vlastimil Babka
2025-12-18  7:52                   ` David Hildenbrand (Red Hat)
2025-12-23 22:59           ` Yang Shi
2025-12-24  7:00             ` Yeoreum Yun

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251212161832.2067134-2-yeoreum.yun@arm.com \
    --to=yeoreum.yun@arm.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bigeasy@linutronix.de \
    --cc=bpf@vger.kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=clrkwllms@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=david@kernel.org \
    --cc=dev.jain@arm.com \
    --cc=eddyz87@gmail.com \
    --cc=hannes@cmpxchg.org \
    --cc=haoluo@google.com \
    --cc=jackmanb@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kevin.brodsky@arm.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-rt-devel@lists.linux.dev \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=martin.lau@linux.dev \
    --cc=mhocko@suse.com \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=ryan.roberts@arm.com \
    --cc=sdf@fomichev.me \
    --cc=song@kernel.org \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=yang@os.amperecomputing.com \
    --cc=yonghong.song@linux.dev \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox