From: Sid Kumar <sidhartha.kumar@oracle.com>
To: Kefeng Wang <wangkefeng.wang@huawei.com>,
Andrew Morton <akpm@linux-foundation.org>,
David Hildenbrand <david@redhat.com>,
Oscar Salvador <osalvador@suse.de>,
Muchun Song <muchun.song@linux.dev>,
linux-mm@kvack.org
Cc: jane.chu@oracle.com, Zi Yan <ziy@nvidia.com>,
Vlastimil Babka <vbabka@suse.cz>,
Brendan Jackman <jackmanb@google.com>,
Johannes Weiner <hannes@cmpxchg.org>,
Matthew Wilcox <willy@infradead.org>
Subject: Re: [PATCH v5 4/6] mm: page_alloc: add alloc_contig_frozen_{range,pages}()
Date: Fri, 2 Jan 2026 15:05:09 -0600 [thread overview]
Message-ID: <ce16c846-5bab-4976-adfd-bef950bc0259@oracle.com> (raw)
In-Reply-To: <20251230072422.265265-5-wangkefeng.wang@huawei.com>
On 12/30/25 1:24 AM, Kefeng Wang wrote:
> In order to allocate given range of pages or allocate compound
> pages without incrementing their refcount, adding two new helper
> alloc_contig_frozen_{range,pages}() which may be beneficial
> to some users (eg hugetlb).
>
> The new alloc_contig_{range,pages} only take !__GFP_COMP gfp now,
> and the free_contig_range() is refactored to only free non-compound
> pages, the only caller to free compound pages in cma_free_folio() is
> changed accordingly, and the free_contig_frozen_range() is provided
> to match the alloc_contig_frozen_range(), which is used to free
> frozen pages.
>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
> ---
> include/linux/gfp.h | 52 +++++--------
> mm/cma.c | 9 ++-
> mm/hugetlb.c | 9 ++-
> mm/internal.h | 13 ++++
> mm/page_alloc.c | 186 ++++++++++++++++++++++++++++++++------------
> 5 files changed, 184 insertions(+), 85 deletions(-)
>
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index ea053f1cfa16..aa45989f410d 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -430,40 +430,30 @@ typedef unsigned int __bitwise acr_flags_t;
> #define ACR_FLAGS_CMA ((__force acr_flags_t)BIT(0)) // allocate for CMA
>
> /* The below functions must be run on a range from a single zone. */
> -extern int alloc_contig_range_noprof(unsigned long start, unsigned long end,
> - acr_flags_t alloc_flags, gfp_t gfp_mask);
> -#define alloc_contig_range(...) alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
> -
> -extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
> - int nid, nodemask_t *nodemask);
> -#define alloc_contig_pages(...) alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
> -
> +int alloc_contig_frozen_range_noprof(unsigned long start, unsigned long end,
> + acr_flags_t alloc_flags, gfp_t gfp_mask);
> +#define alloc_contig_frozen_range(...) \
> + alloc_hooks(alloc_contig_frozen_range_noprof(__VA_ARGS__))
> +
> +int alloc_contig_range_noprof(unsigned long start, unsigned long end,
> + acr_flags_t alloc_flags, gfp_t gfp_mask);
> +#define alloc_contig_range(...) \
> + alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
> +
> +struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages,
> + gfp_t gfp_mask, int nid, nodemask_t *nodemask);
> +#define alloc_contig_frozen_pages(...) \
> + alloc_hooks(alloc_contig_frozen_pages_noprof(__VA_ARGS__))
> +
> +struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
> + int nid, nodemask_t *nodemask);
> +#define alloc_contig_pages(...) \
> + alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
> +
> +void free_contig_frozen_range(unsigned long pfn, unsigned long nr_pages);
> void free_contig_range(unsigned long pfn, unsigned long nr_pages);
> #endif
>
> -#ifdef CONFIG_CONTIG_ALLOC
> -static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
> - int nid, nodemask_t *node)
> -{
> - struct page *page;
> -
> - if (WARN_ON(!order || !(gfp & __GFP_COMP)))
> - return NULL;
> -
> - page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
> -
> - return page ? page_folio(page) : NULL;
> -}
> -#else
> -static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
> - int nid, nodemask_t *node)
> -{
> - return NULL;
> -}
> -#endif
> -/* This should be paired with folio_put() rather than free_contig_range(). */
> -#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
> -
> DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
>
> #endif /* __LINUX_GFP_H */
> diff --git a/mm/cma.c b/mm/cma.c
> index fe3a9eaac4e5..0e8c146424fb 100644
> --- a/mm/cma.c
> +++ b/mm/cma.c
> @@ -836,7 +836,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
> spin_unlock_irq(&cma->lock);
>
> mutex_lock(&cma->alloc_mutex);
> - ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
> + ret = alloc_contig_frozen_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
> mutex_unlock(&cma->alloc_mutex);
> if (!ret)
> break;
> @@ -904,6 +904,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
> trace_cma_alloc_finish(name, page ? page_to_pfn(page) : 0,
> page, count, align, ret);
> if (page) {
> + set_pages_refcounted(page, count);
> count_vm_event(CMA_ALLOC_SUCCESS);
> cma_sysfs_account_success_pages(cma, count);
> } else {
> @@ -983,7 +984,11 @@ bool cma_release(struct cma *cma, const struct page *pages,
> return false;
> }
>
> - free_contig_range(pfn, count);
> + if (PageHead(pages))
> + __free_pages((struct page *)pages, compound_order(pages));
> + else
> + free_contig_range(pfn, count);
> +
> cma_clear_bitmap(cma, cmr, pfn, count);
> cma_sysfs_account_release_pages(cma, count);
> trace_cma_release(cma->name, pfn, pages, count);
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index a1832da0f623..c990e439c32e 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -1428,12 +1428,17 @@ static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
> retry:
> folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
> if (!folio) {
> + struct page *page;
> +
> if (hugetlb_cma_exclusive_alloc())
> return NULL;
>
> - folio = folio_alloc_gigantic(order, gfp_mask, nid, nodemask);
> - if (!folio)
> + page = alloc_contig_frozen_pages(1 << order, gfp_mask, nid, nodemask);
> + if (!page)
> return NULL;
> +
> + set_page_refcounted(page);
> + folio = page_folio(page);
> }
>
> if (folio_ref_freeze(folio, 1))
> diff --git a/mm/internal.h b/mm/internal.h
> index db4e97489f66..b8737c474412 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -513,6 +513,19 @@ static inline void set_page_refcounted(struct page *page)
> set_page_count(page, 1);
> }
>
> +static inline void set_pages_refcounted(struct page *page, unsigned long nr_pages)
> +{
> + unsigned long pfn = page_to_pfn(page);
> +
> + if (PageHead(page)) {
> + set_page_refcounted(page);
> + return;
> + }
> +
> + for (; nr_pages--; pfn++)
> + set_page_refcounted(pfn_to_page(pfn));
> +}
> +
> /*
> * Return true if a folio needs ->release_folio() calling upon it.
> */
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index b9bfbb69537e..149f7b581b62 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -6882,7 +6882,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
> return (ret < 0) ? ret : 0;
> }
>
> -static void split_free_pages(struct list_head *list, gfp_t gfp_mask)
> +static void split_free_frozen_pages(struct list_head *list, gfp_t gfp_mask)
> {
> int order;
>
> @@ -6894,11 +6894,10 @@ static void split_free_pages(struct list_head *list, gfp_t gfp_mask)
> int i;
>
> post_alloc_hook(page, order, gfp_mask);
> - set_page_refcounted(page);
> if (!order)
> continue;
>
> - split_page(page, order);
> + __split_page(page, order);
>
> /* Add all subpages to the order-0 head, in sequence. */
> list_del(&page->lru);
> @@ -6942,8 +6941,14 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
> return 0;
> }
>
> +static void __free_contig_frozen_range(unsigned long pfn, unsigned long nr_pages)
> +{
> + for (; nr_pages--; pfn++)
> + free_frozen_pages(pfn_to_page(pfn), 0);
> +}
> +
> /**
> - * alloc_contig_range() -- tries to allocate given range of pages
> + * alloc_contig_frozen_range() -- tries to allocate given range of frozen pages
> * @start: start PFN to allocate
> * @end: one-past-the-last PFN to allocate
> * @alloc_flags: allocation information
> @@ -6958,12 +6963,15 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
> * pageblocks in the range. Once isolated, the pageblocks should not
> * be modified by others.
> *
> - * Return: zero on success or negative error code. On success all
> - * pages which PFN is in [start, end) are allocated for the caller and
> - * need to be freed with free_contig_range().
> + * All frozen pages which PFN is in [start, end) are allocated for the
> + * caller, and they could be freed with free_contig_frozen_range(),
> + * free_frozen_pages() also could be used to free compound frozen pages
> + * directly.
> + *
> + * Return: zero on success or negative error code.
> */
> -int alloc_contig_range_noprof(unsigned long start, unsigned long end,
> - acr_flags_t alloc_flags, gfp_t gfp_mask)
> +int alloc_contig_frozen_range_noprof(unsigned long start, unsigned long end,
> + acr_flags_t alloc_flags, gfp_t gfp_mask)
> {
> const unsigned int order = ilog2(end - start);
> unsigned long outer_start, outer_end;
> @@ -7079,19 +7087,18 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
> }
>
> if (!(gfp_mask & __GFP_COMP)) {
> - split_free_pages(cc.freepages, gfp_mask);
> + split_free_frozen_pages(cc.freepages, gfp_mask);
>
> /* Free head and tail (if any) */
> if (start != outer_start)
> - free_contig_range(outer_start, start - outer_start);
> + __free_contig_frozen_range(outer_start, start - outer_start);
> if (end != outer_end)
> - free_contig_range(end, outer_end - end);
> + __free_contig_frozen_range(end, outer_end - end);
> } else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
> struct page *head = pfn_to_page(start);
>
> check_new_pages(head, order);
> prep_new_page(head, order, gfp_mask, 0);
> - set_page_refcounted(head);
> } else {
> ret = -EINVAL;
> WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
> @@ -7101,16 +7108,40 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
> undo_isolate_page_range(start, end);
> return ret;
> }
> -EXPORT_SYMBOL(alloc_contig_range_noprof);
> +EXPORT_SYMBOL(alloc_contig_frozen_range_noprof);
>
> -static int __alloc_contig_pages(unsigned long start_pfn,
> - unsigned long nr_pages, gfp_t gfp_mask)
> +/**
> + * alloc_contig_range() -- tries to allocate given range of pages
> + * @start: start PFN to allocate
> + * @end: one-past-the-last PFN to allocate
> + * @alloc_flags: allocation information
> + * @gfp_mask: GFP mask.
> + *
> + * This routine is a wrapper around alloc_contig_frozen_range(), it can't
> + * be used to allocate compound pages, the refcount of each allocated page
> + * will be set to one.
> + *
> + * All pages which PFN is in [start, end) are allocated for the caller,
> + * and should be freed with free_contig_range() or by manually calling
> + * __free_page() on each allocated page.
> + *
> + * Return: zero on success or negative error code.
> + */
> +int alloc_contig_range_noprof(unsigned long start, unsigned long end,
> + acr_flags_t alloc_flags, gfp_t gfp_mask)
> {
> - unsigned long end_pfn = start_pfn + nr_pages;
> + int ret;
>
> - return alloc_contig_range_noprof(start_pfn, end_pfn, ACR_FLAGS_NONE,
> - gfp_mask);
> + if (WARN_ON(gfp_mask & __GFP_COMP))
> + return -EINVAL;
> +
> + ret = alloc_contig_frozen_range_noprof(start, end, alloc_flags, gfp_mask);
> + if (!ret)
> + set_pages_refcounted(pfn_to_page(start), end - start);
> +
> + return ret;
> }
> +EXPORT_SYMBOL(alloc_contig_range_noprof);
>
> static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
> unsigned long nr_pages, bool skip_hugetlb,
> @@ -7179,7 +7210,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
> }
>
> /**
> - * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
> + * alloc_contig_frozen_pages() -- tries to find and allocate contiguous range of frozen pages
> * @nr_pages: Number of contiguous pages to allocate
> * @gfp_mask: GFP mask. Node/zone/placement hints limit the search; only some
> * action and reclaim modifiers are supported. Reclaim modifiers
> @@ -7187,22 +7218,25 @@ static bool zone_spans_last_pfn(const struct zone *zone,
> * @nid: Target node
> * @nodemask: Mask for other possible nodes
> *
> - * This routine is a wrapper around alloc_contig_range(). It scans over zones
> - * on an applicable zonelist to find a contiguous pfn range which can then be
> - * tried for allocation with alloc_contig_range(). This routine is intended
> - * for allocation requests which can not be fulfilled with the buddy allocator.
> + * This routine is a wrapper around alloc_contig_frozen_range(). It scans over
> + * zones on an applicable zonelist to find a contiguous pfn range which can then
> + * be tried for allocation with alloc_contig_frozen_range(). This routine is
> + * intended for allocation requests which can not be fulfilled with the buddy
> + * allocator.
> *
> * The allocated memory is always aligned to a page boundary. If nr_pages is a
> * power of two, then allocated range is also guaranteed to be aligned to same
> * nr_pages (e.g. 1GB request would be aligned to 1GB).
> *
> - * Allocated pages can be freed with free_contig_range() or by manually calling
> - * __free_page() on each allocated page.
> + * Allocated frozen pages need be freed with free_contig_frozen_range(),
> + * or by manually calling free_frozen_pages() on each allocated frozen
> + * non-compound page, for compound frozen pages could be freed with
> + * free_frozen_pages() directly.
> *
> - * Return: pointer to contiguous pages on success, or NULL if not successful.
> + * Return: pointer to contiguous frozen pages on success, or NULL if not successful.
> */
> -struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
> - int nid, nodemask_t *nodemask)
> +struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages,
> + gfp_t gfp_mask, int nid, nodemask_t *nodemask)
> {
> unsigned long ret, pfn, flags;
> struct zonelist *zonelist;
> @@ -7224,13 +7258,15 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
> &skipped_hugetlb)) {
> /*
> * We release the zone lock here because
> - * alloc_contig_range() will also lock the zone
> - * at some point. If there's an allocation
> - * spinning on this lock, it may win the race
> - * and cause alloc_contig_range() to fail...
> + * alloc_contig_frozen_range() will also lock
> + * the zone at some point. If there's an
> + * allocation spinning on this lock, it may
> + * win the race and cause allocation to fail.
> */
> spin_unlock_irqrestore(&zone->lock, flags);
> - ret = __alloc_contig_pages(pfn, nr_pages,
> + ret = alloc_contig_frozen_range_noprof(pfn,
> + pfn + nr_pages,
> + ACR_FLAGS_NONE,
> gfp_mask);
> if (!ret)
> return pfn_to_page(pfn);
> @@ -7253,30 +7289,80 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
> }
> return NULL;
> }
> +EXPORT_SYMBOL(alloc_contig_frozen_pages_noprof);
>
> -void free_contig_range(unsigned long pfn, unsigned long nr_pages)
> +/**
> + * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
> + * @nr_pages: Number of contiguous pages to allocate
> + * @gfp_mask: GFP mask.
> + * @nid: Target node
> + * @nodemask: Mask for other possible nodes
> + *
> + * This routine is a wrapper around alloc_contig_frozen_pages(), it can't
> + * be used to allocate compound pages, the refcount of each allocated page
> + * will be set to one.
> + *
> + * Allocated pages can be freed with free_contig_range() or by manually
> + * calling __free_page() on each allocated page.
> + *
> + * Return: pointer to contiguous pages on success, or NULL if not successful.
> + */
> +struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
> + int nid, nodemask_t *nodemask)
> {
> - unsigned long count = 0;
> - struct folio *folio = pfn_folio(pfn);
> + struct page *page;
>
> - if (folio_test_large(folio)) {
> - int expected = folio_nr_pages(folio);
> + if (WARN_ON(gfp_mask & __GFP_COMP))
> + return NULL;
>
> - if (nr_pages == expected)
> - folio_put(folio);
> - else
> - WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
> - pfn, nr_pages, expected);
> + page = alloc_contig_frozen_pages_noprof(nr_pages, gfp_mask, nid,
> + nodemask);
> + if (page)
> + set_pages_refcounted(page, nr_pages);
> +
> + return page;
> +}
> +EXPORT_SYMBOL(alloc_contig_pages_noprof);
> +
> +/**
> + * free_contig_frozen_range() -- free the contiguous range of frozen pages
> + * @pfn: start PFN to free
> + * @nr_pages: Number of contiguous frozen pages to free
> + *
> + * This can be used to free the allocated compound/non-compound frozen pages.
> + */
> +void free_contig_frozen_range(unsigned long pfn, unsigned long nr_pages)
> +{
> + struct page *first_page = pfn_to_page(pfn);
> + const unsigned int order = ilog2(nr_pages);
> +
> + if (WARN_ON_ONCE(first_page != compound_head(first_page)))
> + return;
> +
> + if (PageHead(first_page)) {
> + WARN_ON_ONCE(order != compound_order(first_page));
> + free_frozen_pages(first_page, order);
> return;
> }
>
> - for (; nr_pages--; pfn++) {
> - struct page *page = pfn_to_page(pfn);
> + __free_contig_frozen_range(pfn, nr_pages);
> +}
> +EXPORT_SYMBOL(free_contig_frozen_range);
> +
> +/**
> + * free_contig_range() -- free the contiguous range of pages
> + * @pfn: start PFN to free
> + * @nr_pages: Number of contiguous pages to free
> + *
> + * This can be only used to free the allocated non-compound pages.
> + */
> +void free_contig_range(unsigned long pfn, unsigned long nr_pages)
> +{
> + if (WARN_ON_ONCE(PageHead(pfn_to_page(pfn))))
> + return;
>
> - count += page_count(page) != 1;
> - __free_page(page);
> - }
> - WARN(count != 0, "%lu pages are still in use!\n", count);
> + for (; nr_pages--; pfn++)
> + __free_page(pfn_to_page(pfn));
> }
> EXPORT_SYMBOL(free_contig_range);
> #endif /* CONFIG_CONTIG_ALLOC */
next prev parent reply other threads:[~2026-01-02 21:05 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-12-30 7:24 [PATCH v5 mm-new 0/6] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
2025-12-30 7:24 ` [PATCH v5 1/6] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
2026-01-02 18:51 ` Sid Kumar
2025-12-30 7:24 ` [PATCH v5 2/6] mm: page_alloc: add __split_page() Kefeng Wang
2026-01-02 18:55 ` Sid Kumar
2025-12-30 7:24 ` [PATCH v5 3/6] mm: cma: kill cma_pages_valid() Kefeng Wang
2025-12-30 7:24 ` [PATCH v5 4/6] mm: page_alloc: add alloc_contig_frozen_{range,pages}() Kefeng Wang
2025-12-31 2:57 ` Zi Yan
2026-01-02 21:05 ` Sid Kumar [this message]
2025-12-30 7:24 ` [PATCH v5 5/6] mm: cma: add cma_alloc_frozen{_compound}() Kefeng Wang
2025-12-31 2:59 ` Zi Yan
2025-12-30 7:24 ` [PATCH v5 6/6] mm: hugetlb: allocate frozen pages for gigantic allocation Kefeng Wang
2025-12-31 2:50 ` Muchun Song
2025-12-31 3:00 ` Zi Yan
2025-12-30 18:17 ` [PATCH v5 mm-new 0/6] mm: hugetlb: allocate frozen gigantic folio Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ce16c846-5bab-4976-adfd-bef950bc0259@oracle.com \
--to=sidhartha.kumar@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=david@redhat.com \
--cc=hannes@cmpxchg.org \
--cc=jackmanb@google.com \
--cc=jane.chu@oracle.com \
--cc=linux-mm@kvack.org \
--cc=muchun.song@linux.dev \
--cc=osalvador@suse.de \
--cc=vbabka@suse.cz \
--cc=wangkefeng.wang@huawei.com \
--cc=willy@infradead.org \
--cc=ziy@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox