From: "Michael S. Tsirkin" <mst@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
David Hildenbrand <david@kernel.org>,
Vlastimil Babka <vbabka@kernel.org>,
Brendan Jackman <jackmanb@google.com>,
Michal Hocko <mhocko@suse.com>,
Suren Baghdasaryan <surenb@google.com>,
Jason Wang <jasowang@redhat.com>,
Andrea Arcangeli <aarcange@redhat.com>,
linux-mm@kvack.org, virtualization@lists.linux.dev,
Johannes Weiner <hannes@cmpxchg.org>, Zi Yan <ziy@nvidia.com>,
Lorenzo Stoakes <ljs@kernel.org>,
"Liam R. Howlett" <Liam.Howlett@oracle.com>,
Mike Rapoport <rppt@kernel.org>,
Matthew Brost <matthew.brost@intel.com>,
Joshua Hahn <joshua.hahnjy@gmail.com>,
Rakie Kim <rakie.kim@sk.com>, Byungchul Park <byungchul@sk.com>,
Gregory Price <gourry@gourry.net>,
Ying Huang <ying.huang@linux.alibaba.com>,
Alistair Popple <apopple@nvidia.com>
Subject: [PATCH RFC v2 02/18] mm: add pghint_t type and vma_alloc_folio_hints API
Date: Mon, 20 Apr 2026 08:50:23 -0400 [thread overview]
Message-ID: <290d615a001cf121dc0c604eb79451bcc7917baa.1776689093.git.mst@redhat.com> (raw)
In-Reply-To: <cover.1776689093.git.mst@redhat.com>
Add pghint_t, a bitwise type for communicating page allocation hints
between the allocator and callers. Define PGHINT_ZEROED to indicate
that the allocated page contents are known to be zero.
Add _hints variants of the allocation functions that accept a
pghint_t *hints output parameter:
vma_alloc_folio_hints() -> folio_alloc_mpol_hints (internal)
-> __alloc_frozen_pages_hints()
The existing APIs are unchanged and continue to work without hints.
For now, hints is always initialized to 0. A subsequent patch will
set PGHINT_ZEROED when the page was pre-zeroed by the host.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Assisted-by: Claude:claude-opus-4-6
Assisted-by: cursor-agent:GPT-5.4-xhigh
---
include/linux/gfp.h | 15 ++++++++
mm/internal.h | 4 +++
mm/mempolicy.c | 85 +++++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 15 ++++++--
4 files changed, 117 insertions(+), 2 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 51ef13ed756e..14433a20e60c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -226,6 +226,9 @@ static inline void arch_free_page(struct page *page, int order) { }
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
+typedef unsigned int __bitwise pghint_t;
+#define PGHINT_ZEROED ((__force pghint_t)BIT(0))
+
struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
nodemask_t *nodemask);
#define __alloc_pages(...) alloc_hooks(__alloc_pages_noprof(__VA_ARGS__))
@@ -325,6 +328,9 @@ struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order,
struct mempolicy *mpol, pgoff_t ilx, int nid);
struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr);
+struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order,
+ struct vm_area_struct *vma, unsigned long addr,
+ pghint_t *hints);
#else
static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order)
{
@@ -344,12 +350,21 @@ static inline struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order,
{
return folio_alloc_noprof(gfp, order);
}
+static inline struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order,
+ struct vm_area_struct *vma, unsigned long addr,
+ pghint_t *hints)
+{
+ if (hints)
+ *hints = 0;
+ return folio_alloc_noprof(gfp, order);
+}
#endif
#define alloc_pages(...) alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
#define folio_alloc(...) alloc_hooks(folio_alloc_noprof(__VA_ARGS__))
#define folio_alloc_mpol(...) alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__))
#define vma_alloc_folio(...) alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__))
+#define vma_alloc_folio_hints(...) alloc_hooks(vma_alloc_folio_hints_noprof(__VA_ARGS__))
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
diff --git a/mm/internal.h b/mm/internal.h
index cb0af847d7d9..686667b956c0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -894,8 +894,12 @@ extern int user_min_free_kbytes;
struct page *__alloc_frozen_pages_noprof(gfp_t, unsigned int order, int nid,
nodemask_t *);
+struct page *__alloc_frozen_pages_hints_noprof(gfp_t, unsigned int order,
+ int nid, nodemask_t *, pghint_t *hints);
#define __alloc_frozen_pages(...) \
alloc_hooks(__alloc_frozen_pages_noprof(__VA_ARGS__))
+#define __alloc_frozen_pages_hints(...) \
+ alloc_hooks(__alloc_frozen_pages_hints_noprof(__VA_ARGS__))
void free_frozen_pages(struct page *page, unsigned int order);
void free_unref_folios(struct folio_batch *fbatch);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cf92bd6a8226..b918639eef71 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2547,6 +2547,91 @@ struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct
}
EXPORT_SYMBOL(vma_alloc_folio_noprof);
+static struct page *alloc_pages_preferred_many_hints(gfp_t gfp,
+ unsigned int order, int nid, nodemask_t *nodemask,
+ pghint_t *hints)
+{
+ struct page *page;
+ gfp_t preferred_gfp;
+
+ preferred_gfp = gfp | __GFP_NOWARN;
+ preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+ page = __alloc_frozen_pages_hints_noprof(preferred_gfp, order, nid,
+ nodemask, hints);
+ if (!page)
+ page = __alloc_frozen_pages_hints_noprof(gfp, order, nid, NULL,
+ hints);
+
+ return page;
+}
+
+static struct page *alloc_pages_mpol_hints(gfp_t gfp, unsigned int order,
+ struct mempolicy *pol, pgoff_t ilx, int nid,
+ pghint_t *hints)
+{
+ nodemask_t *nodemask;
+ struct page *page;
+
+ nodemask = policy_nodemask(gfp, pol, ilx, &nid);
+
+ if (pol->mode == MPOL_PREFERRED_MANY)
+ return alloc_pages_preferred_many_hints(gfp, order, nid,
+ nodemask, hints);
+
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+ order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) {
+ if (pol->mode != MPOL_INTERLEAVE &&
+ pol->mode != MPOL_WEIGHTED_INTERLEAVE &&
+ (!nodemask || node_isset(nid, *nodemask))) {
+ page = __alloc_frozen_pages_hints_noprof(
+ gfp | __GFP_THISNODE | __GFP_NORETRY, order,
+ nid, NULL, hints);
+ if (page || !(gfp & __GFP_DIRECT_RECLAIM))
+ return page;
+ }
+ }
+
+ page = __alloc_frozen_pages_hints_noprof(gfp, order, nid, nodemask,
+ hints);
+
+ if (unlikely(pol->mode == MPOL_INTERLEAVE ||
+ pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) {
+ if (static_branch_likely(&vm_numa_stat_key) &&
+ page_to_nid(page) == nid) {
+ preempt_disable();
+ __count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT);
+ preempt_enable();
+ }
+ }
+
+ return page;
+}
+
+struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order,
+ struct vm_area_struct *vma, unsigned long addr,
+ pghint_t *hints)
+{
+ struct mempolicy *pol;
+ pgoff_t ilx;
+ struct folio *folio;
+ struct page *page;
+
+ if (vma->vm_flags & VM_DROPPABLE)
+ gfp |= __GFP_NOWARN;
+
+ pol = get_vma_policy(vma, addr, order, &ilx);
+ page = alloc_pages_mpol_hints(gfp | __GFP_COMP, order, pol, ilx,
+ numa_node_id(), hints);
+ mpol_cond_put(pol);
+ if (!page)
+ return NULL;
+
+ set_page_refcounted(page);
+ folio = page_rmappable_folio(page);
+ return folio;
+}
+EXPORT_SYMBOL(vma_alloc_folio_hints_noprof);
+
struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned order)
{
struct mempolicy *pol = &default_policy;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index edbb1edf463d..f7abbc46e725 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5222,14 +5222,17 @@ EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof);
/*
* This is the 'heart' of the zoned buddy allocator.
*/
-struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
- int preferred_nid, nodemask_t *nodemask)
+struct page *__alloc_frozen_pages_hints_noprof(gfp_t gfp, unsigned int order,
+ int preferred_nid, nodemask_t *nodemask, pghint_t *hints)
{
struct page *page;
unsigned int alloc_flags = ALLOC_WMARK_LOW;
gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
struct alloc_context ac = { };
+ if (hints)
+ *hints = (pghint_t)0;
+
/*
* There are several places where we assume that the order value is sane
* so bail out early if the request is out of bound.
@@ -5285,6 +5288,14 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
return page;
}
+EXPORT_SYMBOL(__alloc_frozen_pages_hints_noprof);
+
+struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
+ int preferred_nid, nodemask_t *nodemask)
+{
+ return __alloc_frozen_pages_hints_noprof(gfp, order, preferred_nid,
+ nodemask, NULL);
+}
EXPORT_SYMBOL(__alloc_frozen_pages_noprof);
struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
--
MST
next prev parent reply other threads:[~2026-04-20 12:50 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-20 12:51 [PATCH RFC v2 00/18] mm/virtio: skip redundant zeroing of host-zeroed reported pages Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 01/18] mm: page_alloc: propagate PageReported flag across buddy splits Michael S. Tsirkin
2026-04-20 12:50 ` Michael S. Tsirkin [this message]
2026-04-21 0:58 ` [PATCH RFC v2 02/18] mm: add pghint_t type and vma_alloc_folio_hints API Huang, Ying
2026-04-20 12:50 ` [PATCH RFC v2 03/18] mm: add PG_zeroed page flag for known-zero pages Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 04/18] mm: page_alloc: track PG_zeroed across buddy merges Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 05/18] mm: page_alloc: preserve PG_zeroed in try_to_claim_block Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 06/18] mm: page_alloc: thread pghint_t through get_page_from_freelist Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 07/18] mm: post_alloc_hook: use PG_zeroed to skip zeroing, return pghint_t Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 08/18] mm: hugetlb: thread pghint_t through buddy allocation chain Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 09/18] mm: hugetlb: use PG_zeroed for pool pages, skip redundant zeroing Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 10/18] mm: page_reporting: support host-zeroed reported pages Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 11/18] mm: skip zeroing in vma_alloc_zeroed_movable_folio for pre-zeroed pages Michael S. Tsirkin
2026-04-21 10:58 ` David Hildenbrand (Arm)
2026-04-20 12:50 ` [PATCH RFC v2 12/18] mm: skip zeroing in alloc_anon_folio " Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 13/18] mm: skip zeroing in vma_alloc_anon_folio_pmd " Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 14/18] mm: memfd: skip zeroing for pre-zeroed hugetlb pages Michael S. Tsirkin
2026-04-20 12:51 ` [PATCH RFC v2 15/18] virtio_balloon: add host_zeroes_pages module parameter Michael S. Tsirkin
2026-04-20 12:51 ` [PATCH RFC v2 16/18] mm: page_reporting: add flush parameter with page budget Michael S. Tsirkin
2026-04-20 12:51 ` [PATCH RFC v2 17/18] mm: add free_frozen_pages_hint and put_page_hint APIs Michael S. Tsirkin
2026-04-21 10:56 ` David Hildenbrand (Arm)
2026-04-20 12:51 ` [PATCH RFC v2 18/18] virtio_balloon: mark deflated pages as pre-zeroed Michael S. Tsirkin
2026-04-20 18:09 ` [syzbot ci] Re: mm/virtio: skip redundant zeroing of host-zeroed reported pages syzbot ci
2026-04-20 18:20 ` [PATCH RFC v2 00/18] " David Hildenbrand (Arm)
2026-04-20 23:33 ` Michael S. Tsirkin
2026-04-21 2:38 ` Gregory Price
2026-04-21 10:04 ` David Hildenbrand (Arm)
2026-04-21 10:50 ` David Hildenbrand (Arm)
2026-04-21 2:21 ` Gregory Price
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=290d615a001cf121dc0c604eb79451bcc7917baa.1776689093.git.mst@redhat.com \
--to=mst@redhat.com \
--cc=Liam.Howlett@oracle.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=apopple@nvidia.com \
--cc=byungchul@sk.com \
--cc=david@kernel.org \
--cc=gourry@gourry.net \
--cc=hannes@cmpxchg.org \
--cc=jackmanb@google.com \
--cc=jasowang@redhat.com \
--cc=joshua.hahnjy@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=ljs@kernel.org \
--cc=matthew.brost@intel.com \
--cc=mhocko@suse.com \
--cc=rakie.kim@sk.com \
--cc=rppt@kernel.org \
--cc=surenb@google.com \
--cc=vbabka@kernel.org \
--cc=virtualization@lists.linux.dev \
--cc=ying.huang@linux.alibaba.com \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox