linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: linux-kernel@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@kernel.org>,
	Vlastimil Babka <vbabka@kernel.org>,
	Brendan Jackman <jackmanb@google.com>,
	Michal Hocko <mhocko@suse.com>,
	Suren Baghdasaryan <surenb@google.com>,
	Jason Wang <jasowang@redhat.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	linux-mm@kvack.org, virtualization@lists.linux.dev,
	Johannes Weiner <hannes@cmpxchg.org>, Zi Yan <ziy@nvidia.com>,
	Lorenzo Stoakes <ljs@kernel.org>,
	"Liam R. Howlett" <Liam.Howlett@oracle.com>,
	Mike Rapoport <rppt@kernel.org>,
	Matthew Brost <matthew.brost@intel.com>,
	Joshua Hahn <joshua.hahnjy@gmail.com>,
	Rakie Kim <rakie.kim@sk.com>, Byungchul Park <byungchul@sk.com>,
	Gregory Price <gourry@gourry.net>,
	Ying Huang <ying.huang@linux.alibaba.com>,
	Alistair Popple <apopple@nvidia.com>
Subject: [PATCH RFC v2 02/18] mm: add pghint_t type and vma_alloc_folio_hints API
Date: Mon, 20 Apr 2026 08:50:23 -0400	[thread overview]
Message-ID: <290d615a001cf121dc0c604eb79451bcc7917baa.1776689093.git.mst@redhat.com> (raw)
In-Reply-To: <cover.1776689093.git.mst@redhat.com>

Add pghint_t, a bitwise type for communicating page allocation hints
between the allocator and callers.  Define PGHINT_ZEROED to indicate
that the allocated page contents are known to be zero.

Add _hints variants of the allocation functions that accept a
pghint_t *hints output parameter:

  vma_alloc_folio_hints()  -> folio_alloc_mpol_hints (internal)
                           -> __alloc_frozen_pages_hints()

The existing APIs are unchanged and continue to work without hints.
For now, hints is always initialized to 0.  A subsequent patch will
set PGHINT_ZEROED when the page was pre-zeroed by the host.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Assisted-by: Claude:claude-opus-4-6
Assisted-by: cursor-agent:GPT-5.4-xhigh
---
 include/linux/gfp.h | 15 ++++++++
 mm/internal.h       |  4 +++
 mm/mempolicy.c      | 85 +++++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c     | 15 ++++++--
 4 files changed, 117 insertions(+), 2 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 51ef13ed756e..14433a20e60c 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -226,6 +226,9 @@ static inline void arch_free_page(struct page *page, int order) { }
 static inline void arch_alloc_page(struct page *page, int order) { }
 #endif
 
+typedef unsigned int __bitwise pghint_t;
+#define PGHINT_ZEROED	((__force pghint_t)BIT(0))
+
 struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, int preferred_nid,
 		nodemask_t *nodemask);
 #define __alloc_pages(...)			alloc_hooks(__alloc_pages_noprof(__VA_ARGS__))
@@ -325,6 +328,9 @@ struct folio *folio_alloc_mpol_noprof(gfp_t gfp, unsigned int order,
 		struct mempolicy *mpol, pgoff_t ilx, int nid);
 struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct *vma,
 		unsigned long addr);
+struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order,
+		struct vm_area_struct *vma, unsigned long addr,
+		pghint_t *hints);
 #else
 static inline struct page *alloc_pages_noprof(gfp_t gfp_mask, unsigned int order)
 {
@@ -344,12 +350,21 @@ static inline struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order,
 {
 	return folio_alloc_noprof(gfp, order);
 }
+static inline struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order,
+		struct vm_area_struct *vma, unsigned long addr,
+		pghint_t *hints)
+{
+	if (hints)
+		*hints = 0;
+	return folio_alloc_noprof(gfp, order);
+}
 #endif
 
 #define alloc_pages(...)			alloc_hooks(alloc_pages_noprof(__VA_ARGS__))
 #define folio_alloc(...)			alloc_hooks(folio_alloc_noprof(__VA_ARGS__))
 #define folio_alloc_mpol(...)			alloc_hooks(folio_alloc_mpol_noprof(__VA_ARGS__))
 #define vma_alloc_folio(...)			alloc_hooks(vma_alloc_folio_noprof(__VA_ARGS__))
+#define vma_alloc_folio_hints(...)		alloc_hooks(vma_alloc_folio_hints_noprof(__VA_ARGS__))
 
 #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
 
diff --git a/mm/internal.h b/mm/internal.h
index cb0af847d7d9..686667b956c0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -894,8 +894,12 @@ extern int user_min_free_kbytes;
 
 struct page *__alloc_frozen_pages_noprof(gfp_t, unsigned int order, int nid,
 		nodemask_t *);
+struct page *__alloc_frozen_pages_hints_noprof(gfp_t, unsigned int order,
+		int nid, nodemask_t *, pghint_t *hints);
 #define __alloc_frozen_pages(...) \
 	alloc_hooks(__alloc_frozen_pages_noprof(__VA_ARGS__))
+#define __alloc_frozen_pages_hints(...) \
+	alloc_hooks(__alloc_frozen_pages_hints_noprof(__VA_ARGS__))
 void free_frozen_pages(struct page *page, unsigned int order);
 void free_unref_folios(struct folio_batch *fbatch);
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index cf92bd6a8226..b918639eef71 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2547,6 +2547,91 @@ struct folio *vma_alloc_folio_noprof(gfp_t gfp, int order, struct vm_area_struct
 }
 EXPORT_SYMBOL(vma_alloc_folio_noprof);
 
+static struct page *alloc_pages_preferred_many_hints(gfp_t gfp,
+		unsigned int order, int nid, nodemask_t *nodemask,
+		pghint_t *hints)
+{
+	struct page *page;
+	gfp_t preferred_gfp;
+
+	preferred_gfp = gfp | __GFP_NOWARN;
+	preferred_gfp &= ~(__GFP_DIRECT_RECLAIM | __GFP_NOFAIL);
+	page = __alloc_frozen_pages_hints_noprof(preferred_gfp, order, nid,
+						 nodemask, hints);
+	if (!page)
+		page = __alloc_frozen_pages_hints_noprof(gfp, order, nid, NULL,
+							 hints);
+
+	return page;
+}
+
+static struct page *alloc_pages_mpol_hints(gfp_t gfp, unsigned int order,
+		struct mempolicy *pol, pgoff_t ilx, int nid,
+		pghint_t *hints)
+{
+	nodemask_t *nodemask;
+	struct page *page;
+
+	nodemask = policy_nodemask(gfp, pol, ilx, &nid);
+
+	if (pol->mode == MPOL_PREFERRED_MANY)
+		return alloc_pages_preferred_many_hints(gfp, order, nid,
+						       nodemask, hints);
+
+	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
+	    order == HPAGE_PMD_ORDER && ilx != NO_INTERLEAVE_INDEX) {
+		if (pol->mode != MPOL_INTERLEAVE &&
+		    pol->mode != MPOL_WEIGHTED_INTERLEAVE &&
+		    (!nodemask || node_isset(nid, *nodemask))) {
+			page = __alloc_frozen_pages_hints_noprof(
+				gfp | __GFP_THISNODE | __GFP_NORETRY, order,
+				nid, NULL, hints);
+			if (page || !(gfp & __GFP_DIRECT_RECLAIM))
+				return page;
+		}
+	}
+
+	page = __alloc_frozen_pages_hints_noprof(gfp, order, nid, nodemask,
+						 hints);
+
+	if (unlikely(pol->mode == MPOL_INTERLEAVE ||
+		     pol->mode == MPOL_WEIGHTED_INTERLEAVE) && page) {
+		if (static_branch_likely(&vm_numa_stat_key) &&
+		    page_to_nid(page) == nid) {
+			preempt_disable();
+			__count_numa_event(page_zone(page), NUMA_INTERLEAVE_HIT);
+			preempt_enable();
+		}
+	}
+
+	return page;
+}
+
+struct folio *vma_alloc_folio_hints_noprof(gfp_t gfp, int order,
+		struct vm_area_struct *vma, unsigned long addr,
+		pghint_t *hints)
+{
+	struct mempolicy *pol;
+	pgoff_t ilx;
+	struct folio *folio;
+	struct page *page;
+
+	if (vma->vm_flags & VM_DROPPABLE)
+		gfp |= __GFP_NOWARN;
+
+	pol = get_vma_policy(vma, addr, order, &ilx);
+	page = alloc_pages_mpol_hints(gfp | __GFP_COMP, order, pol, ilx,
+				      numa_node_id(), hints);
+	mpol_cond_put(pol);
+	if (!page)
+		return NULL;
+
+	set_page_refcounted(page);
+	folio = page_rmappable_folio(page);
+	return folio;
+}
+EXPORT_SYMBOL(vma_alloc_folio_hints_noprof);
+
 struct page *alloc_frozen_pages_noprof(gfp_t gfp, unsigned order)
 {
 	struct mempolicy *pol = &default_policy;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index edbb1edf463d..f7abbc46e725 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5222,14 +5222,17 @@ EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof);
 /*
  * This is the 'heart' of the zoned buddy allocator.
  */
-struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
-		int preferred_nid, nodemask_t *nodemask)
+struct page *__alloc_frozen_pages_hints_noprof(gfp_t gfp, unsigned int order,
+		int preferred_nid, nodemask_t *nodemask, pghint_t *hints)
 {
 	struct page *page;
 	unsigned int alloc_flags = ALLOC_WMARK_LOW;
 	gfp_t alloc_gfp; /* The gfp_t that was actually used for allocation */
 	struct alloc_context ac = { };
 
+	if (hints)
+		*hints = (pghint_t)0;
+
 	/*
 	 * There are several places where we assume that the order value is sane
 	 * so bail out early if the request is out of bound.
@@ -5285,6 +5288,14 @@ struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
 
 	return page;
 }
+EXPORT_SYMBOL(__alloc_frozen_pages_hints_noprof);
+
+struct page *__alloc_frozen_pages_noprof(gfp_t gfp, unsigned int order,
+		int preferred_nid, nodemask_t *nodemask)
+{
+	return __alloc_frozen_pages_hints_noprof(gfp, order, preferred_nid,
+						nodemask, NULL);
+}
 EXPORT_SYMBOL(__alloc_frozen_pages_noprof);
 
 struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
-- 
MST



  parent reply	other threads:[~2026-04-20 12:50 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-20 12:51 [PATCH RFC v2 00/18] mm/virtio: skip redundant zeroing of host-zeroed reported pages Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 01/18] mm: page_alloc: propagate PageReported flag across buddy splits Michael S. Tsirkin
2026-04-20 12:50 ` Michael S. Tsirkin [this message]
2026-04-21  0:58   ` [PATCH RFC v2 02/18] mm: add pghint_t type and vma_alloc_folio_hints API Huang, Ying
2026-04-20 12:50 ` [PATCH RFC v2 03/18] mm: add PG_zeroed page flag for known-zero pages Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 04/18] mm: page_alloc: track PG_zeroed across buddy merges Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 05/18] mm: page_alloc: preserve PG_zeroed in try_to_claim_block Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 06/18] mm: page_alloc: thread pghint_t through get_page_from_freelist Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 07/18] mm: post_alloc_hook: use PG_zeroed to skip zeroing, return pghint_t Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 08/18] mm: hugetlb: thread pghint_t through buddy allocation chain Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 09/18] mm: hugetlb: use PG_zeroed for pool pages, skip redundant zeroing Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 10/18] mm: page_reporting: support host-zeroed reported pages Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 11/18] mm: skip zeroing in vma_alloc_zeroed_movable_folio for pre-zeroed pages Michael S. Tsirkin
2026-04-21 10:58   ` David Hildenbrand (Arm)
2026-04-20 12:50 ` [PATCH RFC v2 12/18] mm: skip zeroing in alloc_anon_folio " Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 13/18] mm: skip zeroing in vma_alloc_anon_folio_pmd " Michael S. Tsirkin
2026-04-20 12:50 ` [PATCH RFC v2 14/18] mm: memfd: skip zeroing for pre-zeroed hugetlb pages Michael S. Tsirkin
2026-04-20 12:51 ` [PATCH RFC v2 15/18] virtio_balloon: add host_zeroes_pages module parameter Michael S. Tsirkin
2026-04-20 12:51 ` [PATCH RFC v2 16/18] mm: page_reporting: add flush parameter with page budget Michael S. Tsirkin
2026-04-20 12:51 ` [PATCH RFC v2 17/18] mm: add free_frozen_pages_hint and put_page_hint APIs Michael S. Tsirkin
2026-04-21 10:56   ` David Hildenbrand (Arm)
2026-04-20 12:51 ` [PATCH RFC v2 18/18] virtio_balloon: mark deflated pages as pre-zeroed Michael S. Tsirkin
2026-04-20 18:09 ` [syzbot ci] Re: mm/virtio: skip redundant zeroing of host-zeroed reported pages syzbot ci
2026-04-20 18:20 ` [PATCH RFC v2 00/18] " David Hildenbrand (Arm)
2026-04-20 23:33   ` Michael S. Tsirkin
2026-04-21  2:38     ` Gregory Price
2026-04-21 10:04       ` David Hildenbrand (Arm)
2026-04-21 10:50     ` David Hildenbrand (Arm)
2026-04-21  2:21 ` Gregory Price

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=290d615a001cf121dc0c604eb79451bcc7917baa.1776689093.git.mst@redhat.com \
    --to=mst@redhat.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=apopple@nvidia.com \
    --cc=byungchul@sk.com \
    --cc=david@kernel.org \
    --cc=gourry@gourry.net \
    --cc=hannes@cmpxchg.org \
    --cc=jackmanb@google.com \
    --cc=jasowang@redhat.com \
    --cc=joshua.hahnjy@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=ljs@kernel.org \
    --cc=matthew.brost@intel.com \
    --cc=mhocko@suse.com \
    --cc=rakie.kim@sk.com \
    --cc=rppt@kernel.org \
    --cc=surenb@google.com \
    --cc=vbabka@kernel.org \
    --cc=virtualization@lists.linux.dev \
    --cc=ying.huang@linux.alibaba.com \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox