[PATCH 0/4] mm: hugetlb: allocate frozen gigantic folio

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/4] mm: hugetlb: allocate frozen gigantic folio
@ 2025-09-11  6:56 Kefeng Wang
  2025-09-11  6:56 ` [PATCH 1/4] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Kefeng Wang @ 2025-09-11  6:56 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song,
	Zi Yan, Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm, Kefeng Wang

Convert to allocate frozen gigantic folio in hugetlb, similar to
alloc_buddy_hugetlb_folio(), which avoid atomic operation about
folio refcount.

---
This is the second part for hugetlb allocation changes, the mainly
different from previous version[1] is that we introduce 
alloc_contig_{range_frozen,frozen_pages}()  which don't calls
set_page_refcounted() for both compound or non-compound pages,
also and don't need ACR_FLAGS_FROZEN anymore, suggested by
Matthew and Zi.

Based on mm-unstable 20250910

[1] https://lore.kernel.org/linux-mm/20250902124820.3081488-1-wangkefeng.wang@huawei.com

Kefeng Wang (4):
  mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page()
  mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
  mm: cma: add __cma_release()
  mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()

 include/linux/cma.h   |   9 +-
 include/linux/gfp.h   |  46 +++++----
 mm/cma.c              |  83 +++++++---------
 mm/debug_vm_pgtable.c |  35 ++++---
 mm/hugetlb.c          |  50 ++--------
 mm/hugetlb_cma.c      |  13 ++-
 mm/hugetlb_cma.h      |  10 +-
 mm/page_alloc.c       | 213 ++++++++++++++++++++++++++++--------------
 8 files changed, 245 insertions(+), 214 deletions(-)

-- 
2.43.0



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 1/4] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page()
  2025-09-11  6:56 [PATCH 0/4] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
@ 2025-09-11  6:56 ` Kefeng Wang
  2025-09-12  6:58   ` Kefeng Wang
  2025-09-11  6:56 ` [PATCH 2/4] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}() Kefeng Wang
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Kefeng Wang @ 2025-09-11  6:56 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song,
	Zi Yan, Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm, Kefeng Wang

Add a new helper to free huge page to be consistency to
debug_vm_pgtable_alloc_huge_page(), also move the
free_contig_range() under CONFIG_ALLOC_CONTIG since all
caller are built with CONFIG_ALLOC_CONTIG.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/gfp.h   |  2 +-
 mm/debug_vm_pgtable.c | 35 ++++++++++++++++-------------------
 mm/page_alloc.c       |  2 +-
 3 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5ebf26fcdcfa..651acd42256f 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -437,8 +437,8 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_
 					      int nid, nodemask_t *nodemask);
 #define alloc_contig_pages(...)			alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
 
-#endif
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
+#endif
 
 #ifdef CONFIG_CONTIG_ALLOC
 static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 830107b6dd08..767321a8b26c 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -946,22 +946,26 @@ static unsigned long __init get_random_vaddr(void)
 	return random_vaddr;
 }
 
-static void __init destroy_args(struct pgtable_debug_args *args)
+static void __init
+debug_vm_pgtable_free_huge_page(struct pgtable_debug_args *args,
+		unsigned long pfn, int order)
 {
-	struct page *page = NULL;
+#ifdef CONFIG_CONTIG_ALLOC
+	if (args->is_contiguous_page) {
+		free_contig_range(pfn, 1 << order);
+		return;
+	}
+#endif
+	__free_pages(pfn_to_page(pfn), order);
+}
 
+static void __init destroy_args(struct pgtable_debug_args *args)
+{
 	/* Free (huge) page */
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
 	    has_transparent_pud_hugepage() &&
 	    args->pud_pfn != ULONG_MAX) {
-		if (args->is_contiguous_page) {
-			free_contig_range(args->pud_pfn,
-					  (1 << (HPAGE_PUD_SHIFT - PAGE_SHIFT)));
-		} else {
-			page = pfn_to_page(args->pud_pfn);
-			__free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT);
-		}
-
+		debug_vm_pgtable_free_huge_page(args, args->pud_pfn, HPAGE_PMD_ORDER);
 		args->pud_pfn = ULONG_MAX;
 		args->pmd_pfn = ULONG_MAX;
 		args->pte_pfn = ULONG_MAX;
@@ -970,20 +974,13 @@ static void __init destroy_args(struct pgtable_debug_args *args)
 	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
 	    has_transparent_hugepage() &&
 	    args->pmd_pfn != ULONG_MAX) {
-		if (args->is_contiguous_page) {
-			free_contig_range(args->pmd_pfn, (1 << HPAGE_PMD_ORDER));
-		} else {
-			page = pfn_to_page(args->pmd_pfn);
-			__free_pages(page, HPAGE_PMD_ORDER);
-		}
-
+		debug_vm_pgtable_free_huge_page(args, args->pmd_pfn, HPAGE_PMD_ORDER);
 		args->pmd_pfn = ULONG_MAX;
 		args->pte_pfn = ULONG_MAX;
 	}
 
 	if (args->pte_pfn != ULONG_MAX) {
-		page = pfn_to_page(args->pte_pfn);
-		__free_page(page);
+		__free_page(pfn_to_page(args->pte_pfn));
 
 		args->pte_pfn = ULONG_MAX;
 	}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6ff9f17d5f4e..587be3aa1366 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7090,7 +7090,6 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
 	}
 	return NULL;
 }
-#endif /* CONFIG_CONTIG_ALLOC */
 
 void free_contig_range(unsigned long pfn, unsigned long nr_pages)
 {
@@ -7117,6 +7116,7 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages)
 	WARN(count != 0, "%lu pages are still in use!\n", count);
 }
 EXPORT_SYMBOL(free_contig_range);
+#endif /* CONFIG_CONTIG_ALLOC */
 
 /*
  * Effectively disable pcplists for the zone by setting the high limit to 0
-- 
2.43.0



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 2/4] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}()
  2025-09-11  6:56 [PATCH 0/4] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
  2025-09-11  6:56 ` [PATCH 1/4] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
@ 2025-09-11  6:56 ` Kefeng Wang
  2025-09-11  6:56 ` [PATCH 3/4] mm: cma: add __cma_release() Kefeng Wang
  2025-09-11  6:56 ` [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
  3 siblings, 0 replies; 15+ messages in thread
From: Kefeng Wang @ 2025-09-11  6:56 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song,
	Zi Yan, Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm, Kefeng Wang

In order to allocate given range of pages or allocate compound pages
without incrementing their refcount, adding two new helper
alloc_contig_range_frozen() and alloc_contig_frozen_pages() which may
be beneficial to some users (eg hugetlb), also free_contig_range_frozen()
is provided to match alloc_contig_range_frozen(), but it is better to
use free_frozen_pages() to free frozen compound pages.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/gfp.h |  29 ++++--
 mm/page_alloc.c     | 211 +++++++++++++++++++++++++++++---------------
 2 files changed, 161 insertions(+), 79 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 651acd42256f..ed9445e6fe38 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -429,14 +429,27 @@ typedef unsigned int __bitwise acr_flags_t;
 #define ACR_FLAGS_CMA ((__force acr_flags_t)BIT(0)) // allocate for CMA
 
 /* The below functions must be run on a range from a single zone. */
-extern int alloc_contig_range_noprof(unsigned long start, unsigned long end,
-				     acr_flags_t alloc_flags, gfp_t gfp_mask);
-#define alloc_contig_range(...)			alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
-
-extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
-					      int nid, nodemask_t *nodemask);
-#define alloc_contig_pages(...)			alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
-
+int alloc_contig_range_frozen_noprof(unsigned long start, unsigned long end,
+		acr_flags_t alloc_flags, gfp_t gfp_mask);
+#define alloc_contig_range_frozen(...)	\
+	alloc_hooks(alloc_contig_range_frozen_noprof(__VA_ARGS__))
+
+int alloc_contig_range_noprof(unsigned long start, unsigned long end,
+		acr_flags_t alloc_flags, gfp_t gfp_mask);
+#define alloc_contig_range(...)	\
+	alloc_hooks(alloc_contig_range_noprof(__VA_ARGS__))
+
+struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages,
+		gfp_t gfp_mask, int nid, nodemask_t *nodemask);
+#define alloc_contig_frozen_pages(...) \
+	alloc_hooks(alloc_contig_frozen_pages_noprof(__VA_ARGS__))
+
+struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
+		int nid, nodemask_t *nodemask);
+#define alloc_contig_pages(...)	\
+	alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
+
+void free_contig_range_frozen(unsigned long pfn, unsigned long nr_pages);
 void free_contig_range(unsigned long pfn, unsigned long nr_pages);
 #endif
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 587be3aa1366..1b412c0327e1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3042,6 +3042,23 @@ void free_unref_folios(struct folio_batch *folios)
 	folio_batch_reinit(folios);
 }
 
+static void __split_page(struct page *page, unsigned int order, bool frozen)
+{
+	int i;
+
+	VM_BUG_ON_PAGE(PageCompound(page), page);
+
+	if (!frozen) {
+		VM_BUG_ON_PAGE(!page_count(page), page);
+		for (i = 1; i < (1 << order); i++)
+			set_page_refcounted(page + i);
+	}
+
+	split_page_owner(page, order, 0);
+	pgalloc_tag_split(page_folio(page), order, 0);
+	split_page_memcg(page, order);
+}
+
 /*
  * split_page takes a non-compound higher-order page, and splits it into
  * n (1<<order) sub-pages: page[0..n]
@@ -3052,16 +3069,7 @@ void free_unref_folios(struct folio_batch *folios)
  */
 void split_page(struct page *page, unsigned int order)
 {
-	int i;
-
-	VM_BUG_ON_PAGE(PageCompound(page), page);
-	VM_BUG_ON_PAGE(!page_count(page), page);
-
-	for (i = 1; i < (1 << order); i++)
-		set_page_refcounted(page + i);
-	split_page_owner(page, order, 0);
-	pgalloc_tag_split(page_folio(page), order, 0);
-	split_page_memcg(page, order);
+	__split_page(page, order, false);
 }
 EXPORT_SYMBOL_GPL(split_page);
 
@@ -6770,7 +6778,7 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
 	return (ret < 0) ? ret : 0;
 }
 
-static void split_free_pages(struct list_head *list, gfp_t gfp_mask)
+static void split_free_frozen_pages(struct list_head *list, gfp_t gfp_mask)
 {
 	int order;
 
@@ -6782,11 +6790,10 @@ static void split_free_pages(struct list_head *list, gfp_t gfp_mask)
 			int i;
 
 			post_alloc_hook(page, order, gfp_mask);
-			set_page_refcounted(page);
 			if (!order)
 				continue;
 
-			split_page(page, order);
+			__split_page(page, order, true);
 
 			/* Add all subpages to the order-0 head, in sequence. */
 			list_del(&page->lru);
@@ -6830,28 +6837,8 @@ static int __alloc_contig_verify_gfp_mask(gfp_t gfp_mask, gfp_t *gfp_cc_mask)
 	return 0;
 }
 
-/**
- * alloc_contig_range() -- tries to allocate given range of pages
- * @start:	start PFN to allocate
- * @end:	one-past-the-last PFN to allocate
- * @alloc_flags:	allocation information
- * @gfp_mask:	GFP mask. Node/zone/placement hints are ignored; only some
- *		action and reclaim modifiers are supported. Reclaim modifiers
- *		control allocation behavior during compaction/migration/reclaim.
- *
- * The PFN range does not have to be pageblock aligned. The PFN range must
- * belong to a single zone.
- *
- * The first thing this routine does is attempt to MIGRATE_ISOLATE all
- * pageblocks in the range.  Once isolated, the pageblocks should not
- * be modified by others.
- *
- * Return: zero on success or negative error code.  On success all
- * pages which PFN is in [start, end) are allocated for the caller and
- * need to be freed with free_contig_range().
- */
-int alloc_contig_range_noprof(unsigned long start, unsigned long end,
-			      acr_flags_t alloc_flags, gfp_t gfp_mask)
+int alloc_contig_range_frozen_noprof(unsigned long start, unsigned long end,
+		acr_flags_t alloc_flags, gfp_t gfp_mask)
 {
 	const unsigned int order = ilog2(end - start);
 	unsigned long outer_start, outer_end;
@@ -6967,19 +6954,18 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
 	}
 
 	if (!(gfp_mask & __GFP_COMP)) {
-		split_free_pages(cc.freepages, gfp_mask);
+		split_free_frozen_pages(cc.freepages, gfp_mask);
 
 		/* Free head and tail (if any) */
 		if (start != outer_start)
-			free_contig_range(outer_start, start - outer_start);
+			free_contig_range_frozen(outer_start, start - outer_start);
 		if (end != outer_end)
-			free_contig_range(end, outer_end - end);
+			free_contig_range_frozen(end, outer_end - end);
 	} else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
 		struct page *head = pfn_to_page(start);
 
 		check_new_pages(head, order);
 		prep_new_page(head, order, gfp_mask, 0);
-		set_page_refcounted(head);
 	} else {
 		ret = -EINVAL;
 		WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
@@ -6989,16 +6975,48 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
 	undo_isolate_page_range(start, end);
 	return ret;
 }
-EXPORT_SYMBOL(alloc_contig_range_noprof);
 
-static int __alloc_contig_pages(unsigned long start_pfn,
-				unsigned long nr_pages, gfp_t gfp_mask)
+/**
+ * alloc_contig_range() -- tries to allocate given range of pages
+ * @start:	start PFN to allocate
+ * @end:	one-past-the-last PFN to allocate
+ * @alloc_flags:	allocation information
+ * @gfp_mask:	GFP mask. Node/zone/placement hints are ignored; only some
+ *		action and reclaim modifiers are supported. Reclaim modifiers
+ *		control allocation behavior during compaction/migration/reclaim.
+ *
+ * The PFN range does not have to be pageblock aligned. The PFN range must
+ * belong to a single zone.
+ *
+ * The first thing this routine does is attempt to MIGRATE_ISOLATE all
+ * pageblocks in the range.  Once isolated, the pageblocks should not
+ * be modified by others.
+ *
+ * Return: zero on success or negative error code.  On success all
+ * pages which PFN is in [start, end) are allocated for the caller and
+ * need to be freed with free_contig_range().
+ */
+int alloc_contig_range_noprof(unsigned long start, unsigned long end,
+			      acr_flags_t alloc_flags, gfp_t gfp_mask)
 {
-	unsigned long end_pfn = start_pfn + nr_pages;
+	int ret;
+
+	ret = alloc_contig_range_frozen_noprof(start, end, alloc_flags, gfp_mask);
+	if (ret)
+		return ret;
+
+	if (gfp_mask & __GFP_COMP) {
+		set_page_refcounted(pfn_to_page(start));
+	} else {
+		unsigned long pfn;
 
-	return alloc_contig_range_noprof(start_pfn, end_pfn, ACR_FLAGS_NONE,
-					 gfp_mask);
+		for (pfn = start; pfn < end; pfn++)
+			set_page_refcounted(pfn_to_page(pfn));
+	}
+
+	return 0;
 }
+EXPORT_SYMBOL(alloc_contig_range_noprof);
 
 static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn,
 				   unsigned long nr_pages)
@@ -7031,31 +7049,8 @@ static bool zone_spans_last_pfn(const struct zone *zone,
 	return zone_spans_pfn(zone, last_pfn);
 }
 
-/**
- * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
- * @nr_pages:	Number of contiguous pages to allocate
- * @gfp_mask:	GFP mask. Node/zone/placement hints limit the search; only some
- *		action and reclaim modifiers are supported. Reclaim modifiers
- *		control allocation behavior during compaction/migration/reclaim.
- * @nid:	Target node
- * @nodemask:	Mask for other possible nodes
- *
- * This routine is a wrapper around alloc_contig_range(). It scans over zones
- * on an applicable zonelist to find a contiguous pfn range which can then be
- * tried for allocation with alloc_contig_range(). This routine is intended
- * for allocation requests which can not be fulfilled with the buddy allocator.
- *
- * The allocated memory is always aligned to a page boundary. If nr_pages is a
- * power of two, then allocated range is also guaranteed to be aligned to same
- * nr_pages (e.g. 1GB request would be aligned to 1GB).
- *
- * Allocated pages can be freed with free_contig_range() or by manually calling
- * __free_page() on each allocated page.
- *
- * Return: pointer to contiguous pages on success, or NULL if not successful.
- */
-struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
-				 int nid, nodemask_t *nodemask)
+struct page *alloc_contig_frozen_pages_noprof(unsigned long nr_pages,
+		gfp_t gfp_mask, int nid, nodemask_t *nodemask)
 {
 	unsigned long ret, pfn, flags;
 	struct zonelist *zonelist;
@@ -7078,7 +7073,9 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
 				 * and cause alloc_contig_range() to fail...
 				 */
 				spin_unlock_irqrestore(&zone->lock, flags);
-				ret = __alloc_contig_pages(pfn, nr_pages,
+				ret = alloc_contig_range_frozen_noprof(pfn,
+							pfn + nr_pages,
+							ACR_FLAGS_NONE,
 							gfp_mask);
 				if (!ret)
 					return pfn_to_page(pfn);
@@ -7090,6 +7087,78 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(alloc_contig_range_frozen_noprof);
+
+void free_contig_range_frozen(unsigned long pfn, unsigned long nr_pages)
+{
+	struct folio *folio = pfn_folio(pfn);
+
+	if (folio_test_large(folio)) {
+		int expected = folio_nr_pages(folio);
+
+		WARN_ON(folio_ref_count(folio));
+
+		if (nr_pages == expected)
+			free_frozen_pages(&folio->page, folio_order(folio));
+		else
+			WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
+			     pfn, nr_pages, expected);
+		return;
+	}
+
+	for (; nr_pages--; pfn++) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON(page_ref_count(page));
+		free_frozen_pages(page, 0);
+	}
+}
+EXPORT_SYMBOL(free_contig_range_frozen);
+
+/**
+ * alloc_contig_pages() -- tries to find and allocate contiguous range of pages
+ * @nr_pages:	Number of contiguous pages to allocate
+ * @gfp_mask:	GFP mask. Node/zone/placement hints limit the search; only some
+ *		action and reclaim modifiers are supported. Reclaim modifiers
+ *		control allocation behavior during compaction/migration/reclaim.
+ * @nid:	Target node
+ * @nodemask:	Mask for other possible nodes
+ *
+ * This routine is a wrapper around alloc_contig_range(). It scans over zones
+ * on an applicable zonelist to find a contiguous pfn range which can then be
+ * tried for allocation with alloc_contig_range(). This routine is intended
+ * for allocation requests which can not be fulfilled with the buddy allocator.
+ *
+ * The allocated memory is always aligned to a page boundary. If nr_pages is a
+ * power of two, then allocated range is also guaranteed to be aligned to same
+ * nr_pages (e.g. 1GB request would be aligned to 1GB).
+ *
+ * Allocated pages can be freed with free_contig_range() or by manually calling
+ * __free_page() on each allocated page.
+ *
+ * Return: pointer to contiguous pages on success, or NULL if not successful.
+ */
+struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
+		int nid, nodemask_t *nodemask)
+{
+	struct page *page;
+
+	page =  alloc_contig_frozen_pages_noprof(nr_pages, gfp_mask, nid,
+						 nodemask);
+	if (!page)
+		return NULL;
+
+	if (gfp_mask & __GFP_COMP) {
+		set_page_refcounted(page);
+	} else {
+		unsigned long pfn = page_to_pfn(page);
+
+		for (; nr_pages--; pfn++)
+			set_page_refcounted(pfn_to_page(pfn));
+	}
+
+	return page;
+}
 
 void free_contig_range(unsigned long pfn, unsigned long nr_pages)
 {
-- 
2.43.0



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 3/4] mm: cma: add __cma_release()
  2025-09-11  6:56 [PATCH 0/4] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
  2025-09-11  6:56 ` [PATCH 1/4] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
  2025-09-11  6:56 ` [PATCH 2/4] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}() Kefeng Wang
@ 2025-09-11  6:56 ` Kefeng Wang
  2025-09-11  6:56 ` [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
  3 siblings, 0 replies; 15+ messages in thread
From: Kefeng Wang @ 2025-09-11  6:56 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song,
	Zi Yan, Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm, Kefeng Wang

Kill cma_pages_valid() which only used in cma_release(), also
cleanup code duplication between cma pages valid checking and
cma memrange finding, add __cma_release() helper to prepare for
the upcoming frozen page release.

Reviewed-by: Jane Chu <jane.chu@oracle.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/cma.h |  1 -
 mm/cma.c            | 57 ++++++++++++---------------------------------
 2 files changed, 15 insertions(+), 43 deletions(-)

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 62d9c1cf6326..e5745d2aec55 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -49,7 +49,6 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
 			      bool no_warn);
-extern bool cma_pages_valid(struct cma *cma, const struct page *pages, unsigned long count);
 extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
 
 extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
diff --git a/mm/cma.c b/mm/cma.c
index 813e6dc7b095..2af8c5bc58dd 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -942,34 +942,36 @@ struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
 	return page ? page_folio(page) : NULL;
 }
 
-bool cma_pages_valid(struct cma *cma, const struct page *pages,
-		     unsigned long count)
+static bool __cma_release(struct cma *cma, const struct page *pages,
+			  unsigned long count)
 {
 	unsigned long pfn, end;
 	int r;
 	struct cma_memrange *cmr;
-	bool ret;
+
+	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
 
 	if (!cma || !pages || count > cma->count)
 		return false;
 
 	pfn = page_to_pfn(pages);
-	ret = false;
 
 	for (r = 0; r < cma->nranges; r++) {
 		cmr = &cma->ranges[r];
 		end = cmr->base_pfn + cmr->count;
-		if (pfn >= cmr->base_pfn && pfn < end) {
-			ret = pfn + count <= end;
+		if (pfn >= cmr->base_pfn && pfn < end && pfn + count <= end)
 			break;
-		}
 	}
 
-	if (!ret)
-		pr_debug("%s(page %p, count %lu)\n",
-				__func__, (void *)pages, count);
+	if (r == cma->nranges)
+		return false;
 
-	return ret;
+	free_contig_range(pfn, count);
+	cma_clear_bitmap(cma, cmr, pfn, count);
+	cma_sysfs_account_release_pages(cma, count);
+	trace_cma_release(cma->name, pfn, pages, count);
+
+	return true;
 }
 
 /**
@@ -985,36 +987,7 @@ bool cma_pages_valid(struct cma *cma, const struct page *pages,
 bool cma_release(struct cma *cma, const struct page *pages,
 		 unsigned long count)
 {
-	struct cma_memrange *cmr;
-	unsigned long pfn, end_pfn;
-	int r;
-
-	pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
-
-	if (!cma_pages_valid(cma, pages, count))
-		return false;
-
-	pfn = page_to_pfn(pages);
-	end_pfn = pfn + count;
-
-	for (r = 0; r < cma->nranges; r++) {
-		cmr = &cma->ranges[r];
-		if (pfn >= cmr->base_pfn &&
-		    pfn < (cmr->base_pfn + cmr->count)) {
-			VM_BUG_ON(end_pfn > cmr->base_pfn + cmr->count);
-			break;
-		}
-	}
-
-	if (r == cma->nranges)
-		return false;
-
-	free_contig_range(pfn, count);
-	cma_clear_bitmap(cma, cmr, pfn, count);
-	cma_sysfs_account_release_pages(cma, count);
-	trace_cma_release(cma->name, pfn, pages, count);
-
-	return true;
+	return __cma_release(cma, pages, count);
 }
 
 bool cma_free_folio(struct cma *cma, const struct folio *folio)
@@ -1022,7 +995,7 @@ bool cma_free_folio(struct cma *cma, const struct folio *folio)
 	if (WARN_ON(!folio_test_large(folio)))
 		return false;
 
-	return cma_release(cma, &folio->page, folio_nr_pages(folio));
+	return __cma_release(cma, &folio->page, folio_nr_pages(folio));
 }
 
 int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
-- 
2.43.0



^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-11  6:56 [PATCH 0/4] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
                   ` (2 preceding siblings ...)
  2025-09-11  6:56 ` [PATCH 3/4] mm: cma: add __cma_release() Kefeng Wang
@ 2025-09-11  6:56 ` Kefeng Wang
  2025-09-11  8:25   ` David Hildenbrand
  3 siblings, 1 reply; 15+ messages in thread
From: Kefeng Wang @ 2025-09-11  6:56 UTC (permalink / raw)
  To: Andrew Morton, David Hildenbrand, Oscar Salvador, Muchun Song,
	Zi Yan, Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm, Kefeng Wang

The alloc_gigantic_folio() allocates a folio by alloc_contig_range()
with refcount increated and then freeze it, convert to allocate a frozen
folio directly to remove the atomic operation about folio refcount, also
saving atomic operation during __update_and_free_hugetlb_folio too.

Rename some functions to make them more self-explanatory,

  folio_alloc_gigantic            -> folio_alloc_frozen_gigantic
  cma_{alloc,free}_folio          -> cma_{alloc,free}_frozen_folio
  hugetlb_cma_{alloc,free}_folio  -> hugetlb_cma_{alloc,free}_frozen_folio

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 include/linux/cma.h |  8 ++++----
 include/linux/gfp.h | 15 +++++++-------
 mm/cma.c            | 34 +++++++++++++++++++-----------
 mm/hugetlb.c        | 50 +++++++++------------------------------------
 mm/hugetlb_cma.c    | 13 ++++++------
 mm/hugetlb_cma.h    | 10 ++++-----
 6 files changed, 55 insertions(+), 75 deletions(-)

diff --git a/include/linux/cma.h b/include/linux/cma.h
index e5745d2aec55..cceec7b25bae 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -57,16 +57,16 @@ extern bool cma_intersects(struct cma *cma, unsigned long start, unsigned long e
 extern void cma_reserve_pages_on_error(struct cma *cma);
 
 #ifdef CONFIG_CMA
-struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp);
-bool cma_free_folio(struct cma *cma, const struct folio *folio);
+struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp);
+bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio);
 bool cma_validate_zones(struct cma *cma);
 #else
-static inline struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
+static inline struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp)
 {
 	return NULL;
 }
 
-static inline bool cma_free_folio(struct cma *cma, const struct folio *folio)
+static inline bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio)
 {
 	return false;
 }
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index ed9445e6fe38..38a7be7792a2 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -454,26 +454,27 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages);
 #endif
 
 #ifdef CONFIG_CONTIG_ALLOC
-static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
-							int nid, nodemask_t *node)
+static inline struct folio *folio_alloc_frozen_gigantic_noprof(int order,
+		gfp_t gfp, int nid, nodemask_t *node)
 {
 	struct page *page;
 
 	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
 		return NULL;
 
-	page = alloc_contig_pages_noprof(1 << order, gfp, nid, node);
+	page = alloc_contig_frozen_pages_noprof(1 << order, gfp, nid, node);
 
 	return page ? page_folio(page) : NULL;
 }
 #else
-static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
-							int nid, nodemask_t *node)
+static inline struct folio *folio_alloc_frozen_gigantic_noprof(int order,
+		gfp_t gfp, int nid, nodemask_t *node)
 {
 	return NULL;
 }
 #endif
-/* This should be paired with folio_put() rather than free_contig_range(). */
-#define folio_alloc_gigantic(...) alloc_hooks(folio_alloc_gigantic_noprof(__VA_ARGS__))
+/* This should be paired with free_frozen_pages() rather than free_contig_range_frozen(). */
+#define folio_alloc_frozen_gigantic(...)	\
+	alloc_hooks(folio_alloc_frozen_gigantic_noprof(__VA_ARGS__))
 
 #endif /* __LINUX_GFP_H */
diff --git a/mm/cma.c b/mm/cma.c
index 2af8c5bc58dd..bca95b8a1015 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -778,7 +778,7 @@ static void cma_debug_show_areas(struct cma *cma)
 
 static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
 				unsigned long count, unsigned int align,
-				struct page **pagep, gfp_t gfp)
+				struct page **pagep, gfp_t gfp, bool frozen)
 {
 	unsigned long bitmap_maxno, bitmap_no, bitmap_count;
 	unsigned long start, pfn, mask, offset;
@@ -836,7 +836,12 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
 		spin_unlock_irq(&cma->lock);
 
 		mutex_lock(&cma->alloc_mutex);
-		ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
+		if (frozen)
+			ret = alloc_contig_range_frozen(pfn, pfn + count,
+							ACR_FLAGS_CMA, gfp);
+		else
+			ret = alloc_contig_range(pfn, pfn + count,
+						 ACR_FLAGS_CMA, gfp);
 		mutex_unlock(&cma->alloc_mutex);
 		if (!ret)
 			break;
@@ -857,7 +862,7 @@ static int cma_range_alloc(struct cma *cma, struct cma_memrange *cmr,
 }
 
 static struct page *__cma_alloc(struct cma *cma, unsigned long count,
-		       unsigned int align, gfp_t gfp)
+		       unsigned int align, gfp_t gfp, bool frozen)
 {
 	struct page *page = NULL;
 	int ret = -ENOMEM, r;
@@ -879,7 +884,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
 		page = NULL;
 
 		ret = cma_range_alloc(cma, &cma->ranges[r], count, align,
-				       &page, gfp);
+				       &page, gfp, frozen);
 		if (ret != -EBUSY || page)
 			break;
 	}
@@ -927,23 +932,24 @@ static struct page *__cma_alloc(struct cma *cma, unsigned long count,
 struct page *cma_alloc(struct cma *cma, unsigned long count,
 		       unsigned int align, bool no_warn)
 {
-	return __cma_alloc(cma, count, align, GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0));
+	return __cma_alloc(cma, count, align,
+			   GFP_KERNEL | (no_warn ? __GFP_NOWARN : 0), false);
 }
 
-struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)
+struct folio *cma_alloc_frozen_folio(struct cma *cma, int order, gfp_t gfp)
 {
 	struct page *page;
 
 	if (WARN_ON(!order || !(gfp & __GFP_COMP)))
 		return NULL;
 
-	page = __cma_alloc(cma, 1 << order, order, gfp);
+	page = __cma_alloc(cma, 1 << order, order, gfp, true);
 
 	return page ? page_folio(page) : NULL;
 }
 
 static bool __cma_release(struct cma *cma, const struct page *pages,
-			  unsigned long count)
+			  unsigned long count, bool frozen)
 {
 	unsigned long pfn, end;
 	int r;
@@ -966,7 +972,11 @@ static bool __cma_release(struct cma *cma, const struct page *pages,
 	if (r == cma->nranges)
 		return false;
 
-	free_contig_range(pfn, count);
+	if (frozen)
+		free_contig_range_frozen(pfn, count);
+	else
+		free_contig_range(pfn, count);
+
 	cma_clear_bitmap(cma, cmr, pfn, count);
 	cma_sysfs_account_release_pages(cma, count);
 	trace_cma_release(cma->name, pfn, pages, count);
@@ -987,15 +997,15 @@ static bool __cma_release(struct cma *cma, const struct page *pages,
 bool cma_release(struct cma *cma, const struct page *pages,
 		 unsigned long count)
 {
-	return __cma_release(cma, pages, count);
+	return __cma_release(cma, pages, count, false);
 }
 
-bool cma_free_folio(struct cma *cma, const struct folio *folio)
+bool cma_free_frozen_folio(struct cma *cma, const struct folio *folio)
 {
 	if (WARN_ON(!folio_test_large(folio)))
 		return false;
 
-	return __cma_release(cma, &folio->page, folio_nr_pages(folio));
+	return __cma_release(cma, &folio->page, folio_nr_pages(folio), true);
 }
 
 int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f0e701e455ed..100e840894f1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -125,16 +125,6 @@ static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
 		unsigned long start, unsigned long end, bool take_locks);
 static struct resv_map *vma_resv_map(struct vm_area_struct *vma);
 
-static void hugetlb_free_folio(struct folio *folio)
-{
-	if (folio_test_hugetlb_cma(folio)) {
-		hugetlb_cma_free_folio(folio);
-		return;
-	}
-
-	folio_put(folio);
-}
-
 static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
 	if (spool->count)
@@ -1472,43 +1462,20 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 		nr_nodes--)
 
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-#ifdef CONFIG_CONTIG_ALLOC
 static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask,
 		int nid, nodemask_t *nodemask)
 {
 	struct folio *folio;
-	bool retried = false;
-
-retry:
-	folio = hugetlb_cma_alloc_folio(order, gfp_mask, nid, nodemask);
-	if (!folio) {
-		if (hugetlb_cma_exclusive_alloc())
-			return NULL;
-
-		folio = folio_alloc_gigantic(order, gfp_mask, nid, nodemask);
-		if (!folio)
-			return NULL;
-	}
 
-	if (folio_ref_freeze(folio, 1))
+	folio = hugetlb_cma_alloc_frozen_folio(order, gfp_mask, nid, nodemask);
+	if (folio)
 		return folio;
 
-	pr_warn("HugeTLB: unexpected refcount on PFN %lu\n", folio_pfn(folio));
-	hugetlb_free_folio(folio);
-	if (!retried) {
-		retried = true;
-		goto retry;
-	}
-	return NULL;
-}
+	if (hugetlb_cma_exclusive_alloc())
+		return NULL;
 
-#else /* !CONFIG_CONTIG_ALLOC */
-static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
-					  nodemask_t *nodemask)
-{
-	return NULL;
+	return folio_alloc_frozen_gigantic(order, gfp_mask, nid, nodemask);
 }
-#endif /* CONFIG_CONTIG_ALLOC */
 
 #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
 static struct folio *alloc_gigantic_folio(int order, gfp_t gfp_mask, int nid,
@@ -1641,9 +1608,12 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
 	if (unlikely(folio_test_hwpoison(folio)))
 		folio_clear_hugetlb_hwpoison(folio);
 
-	folio_ref_unfreeze(folio, 1);
+	VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
 
-	hugetlb_free_folio(folio);
+	if (folio_test_hugetlb_cma(folio))
+		hugetlb_cma_free_frozen_folio(folio);
+	else
+		free_frozen_pages(&folio->page, folio_order(folio));
 }
 
 /*
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index e8e4dc7182d5..24738a1d6098 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -18,29 +18,28 @@ static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
 static bool hugetlb_cma_only;
 static unsigned long hugetlb_cma_size __initdata;
 
-void hugetlb_cma_free_folio(struct folio *folio)
+void hugetlb_cma_free_frozen_folio(struct folio *folio)
 {
 	int nid = folio_nid(folio);
 
-	WARN_ON_ONCE(!cma_free_folio(hugetlb_cma[nid], folio));
+	WARN_ON_ONCE(!cma_free_frozen_folio(hugetlb_cma[nid], folio));
 }
 
-
-struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
-				      int nid, nodemask_t *nodemask)
+struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
+		int nid, nodemask_t *nodemask)
 {
 	int node;
 	struct folio *folio = NULL;
 
 	if (hugetlb_cma[nid])
-		folio = cma_alloc_folio(hugetlb_cma[nid], order, gfp_mask);
+		folio = cma_alloc_frozen_folio(hugetlb_cma[nid], order, gfp_mask);
 
 	if (!folio && !(gfp_mask & __GFP_THISNODE)) {
 		for_each_node_mask(node, *nodemask) {
 			if (node == nid || !hugetlb_cma[node])
 				continue;
 
-			folio = cma_alloc_folio(hugetlb_cma[node], order, gfp_mask);
+			folio = cma_alloc_frozen_folio(hugetlb_cma[node], order, gfp_mask);
 			if (folio)
 				break;
 		}
diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
index 2c2ec8a7e134..71db3544816e 100644
--- a/mm/hugetlb_cma.h
+++ b/mm/hugetlb_cma.h
@@ -3,8 +3,8 @@
 #define _LINUX_HUGETLB_CMA_H
 
 #ifdef CONFIG_CMA
-void hugetlb_cma_free_folio(struct folio *folio);
-struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
+void hugetlb_cma_free_frozen_folio(struct folio *folio);
+struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
 				      int nid, nodemask_t *nodemask);
 struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
 						    bool node_exact);
@@ -14,12 +14,12 @@ unsigned long hugetlb_cma_total_size(void);
 void hugetlb_cma_validate_params(void);
 bool hugetlb_early_cma(struct hstate *h);
 #else
-static inline void hugetlb_cma_free_folio(struct folio *folio)
+static inline void hugetlb_cma_free_frozen_folio(struct folio *folio)
 {
 }
 
-static inline struct folio *hugetlb_cma_alloc_folio(int order, gfp_t gfp_mask,
-		int nid, nodemask_t *nodemask)
+static inline struct folio *hugetlb_cma_alloc_frozen_folio(int order,
+		gfp_t gfp_mask, int nid, nodemask_t *nodemask)
 {
 	return NULL;
 }
-- 
2.43.0



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-11  6:56 ` [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
@ 2025-09-11  8:25   ` David Hildenbrand
  2025-09-11  9:11     ` Kefeng Wang
  0 siblings, 1 reply; 15+ messages in thread
From: David Hildenbrand @ 2025-09-11  8:25 UTC (permalink / raw)
  To: 20250910133958.301467-1-wangkefeng.wang, Andrew Morton,
	Oscar Salvador, Muchun Song, Zi Yan, Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm, Kefeng Wang

On 11.09.25 08:56, Kefeng Wang wrote:
> The alloc_gigantic_folio() allocates a folio by alloc_contig_range()
> with refcount increated and then freeze it, convert to allocate a frozen
> folio directly to remove the atomic operation about folio refcount, also
> saving atomic operation during __update_and_free_hugetlb_folio too.
> 
> Rename some functions to make them more self-explanatory,
> 
>    folio_alloc_gigantic            -> folio_alloc_frozen_gigantic
>    cma_{alloc,free}_folio          -> cma_{alloc,free}_frozen_folio
>    hugetlb_cma_{alloc,free}_folio  -> hugetlb_cma_{alloc,free}_frozen_folio

Can we just get rid of folio_alloc_frozen_gigantic?

Further, can we just get rid of cma_{alloc,free}_frozen_folio() as well 
and just let hugetlb use alloc_contig_range_frozen() etc?

Long term alloc_contig_range() should just return frozen pages.

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-11  8:25   ` David Hildenbrand
@ 2025-09-11  9:11     ` Kefeng Wang
  2025-09-11 18:56       ` David Hildenbrand
  0 siblings, 1 reply; 15+ messages in thread
From: Kefeng Wang @ 2025-09-11  9:11 UTC (permalink / raw)
  To: David Hildenbrand, 20250910133958.301467-1-wangkefeng.wang,
	Andrew Morton, Oscar Salvador, Muchun Song, Zi Yan,
	Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm



On 2025/9/11 16:25, David Hildenbrand wrote:
> On 11.09.25 08:56, Kefeng Wang wrote:
>> The alloc_gigantic_folio() allocates a folio by alloc_contig_range()
>> with refcount increated and then freeze it, convert to allocate a frozen
>> folio directly to remove the atomic operation about folio refcount, also
>> saving atomic operation during __update_and_free_hugetlb_folio too.
>>
>> Rename some functions to make them more self-explanatory,
>>
>>    folio_alloc_gigantic            -> folio_alloc_frozen_gigantic
>>    cma_{alloc,free}_folio          -> cma_{alloc,free}_frozen_folio
>>    hugetlb_cma_{alloc,free}_folio  -> hugetlb_cma_{alloc,free} 
>> _frozen_folio
> 
> Can we just get rid of folio_alloc_frozen_gigantic?
> 

OK, we could kill it.

> Further, can we just get rid of cma_{alloc,free}_frozen_folio() as well 
> and just let hugetlb use alloc_contig_range_frozen() etc?

HugeTLB can allocate folio by alloc_contig_frozen_pages() directly, but
it could allocate from hugetlb_cma, cma_alloc_folio() need change some
cma metadata, so we need to keep it.

> 
> Long term alloc_contig_range() should just return frozen pages.
> 

Yes, Matthew mention this too, this is the reason for the new version.

Thanks


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-11  9:11     ` Kefeng Wang
@ 2025-09-11 18:56       ` David Hildenbrand
  2025-09-12  6:57         ` Kefeng Wang
  0 siblings, 1 reply; 15+ messages in thread
From: David Hildenbrand @ 2025-09-11 18:56 UTC (permalink / raw)
  To: Kefeng Wang, 20250910133958.301467-1-wangkefeng.wang,
	Andrew Morton, Oscar Salvador, Muchun Song, Zi Yan,
	Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm

On 11.09.25 11:11, Kefeng Wang wrote:
> 
> 
> On 2025/9/11 16:25, David Hildenbrand wrote:
>> On 11.09.25 08:56, Kefeng Wang wrote:
>>> The alloc_gigantic_folio() allocates a folio by alloc_contig_range()
>>> with refcount increated and then freeze it, convert to allocate a frozen
>>> folio directly to remove the atomic operation about folio refcount, also
>>> saving atomic operation during __update_and_free_hugetlb_folio too.
>>>
>>> Rename some functions to make them more self-explanatory,
>>>
>>>     folio_alloc_gigantic            -> folio_alloc_frozen_gigantic
>>>     cma_{alloc,free}_folio          -> cma_{alloc,free}_frozen_folio
>>>     hugetlb_cma_{alloc,free}_folio  -> hugetlb_cma_{alloc,free}
>>> _frozen_folio
>>
>> Can we just get rid of folio_alloc_frozen_gigantic?
>>
> 
> OK, we could kill it.
> 
>> Further, can we just get rid of cma_{alloc,free}_frozen_folio() as well
>> and just let hugetlb use alloc_contig_range_frozen() etc?
> 
> HugeTLB can allocate folio by alloc_contig_frozen_pages() directly, but
> it could allocate from hugetlb_cma, cma_alloc_folio() need change some
> cma metadata, so we need to keep it.

Hm. Assuming we just have cma_alloc_frozen() -- again, probably what 
cma_alloc() would look like in the future, hugetlb can just construct a 
folio out of that.

Maybe we just want a helper to create a folio out of a given page range?

And that page range is either obtained through cma_alloc_frozen() or 
alloc_contig_frozen_pages().

Just a thought, keeping in mind that these things should probably just 
work with frozen pages and let allcoating of a memdesc etc. be taken 
care of someone else.

I'd be happy if we can remove the GFP_COMPOUND parameter from alloc_contig*.

@Willy, what's your take?

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-11 18:56       ` David Hildenbrand
@ 2025-09-12  6:57         ` Kefeng Wang
  2025-09-12  7:18           ` David Hildenbrand
  0 siblings, 1 reply; 15+ messages in thread
From: Kefeng Wang @ 2025-09-12  6:57 UTC (permalink / raw)
  To: David Hildenbrand, 20250910133958.301467-1-wangkefeng.wang,
	Andrew Morton, Oscar Salvador, Muchun Song, Zi Yan,
	Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm



On 2025/9/12 2:56, David Hildenbrand wrote:
> On 11.09.25 11:11, Kefeng Wang wrote:
>>
>>
>> On 2025/9/11 16:25, David Hildenbrand wrote:
>>> On 11.09.25 08:56, Kefeng Wang wrote:
>>>> The alloc_gigantic_folio() allocates a folio by alloc_contig_range()
>>>> with refcount increated and then freeze it, convert to allocate a 
>>>> frozen
>>>> folio directly to remove the atomic operation about folio refcount, 
>>>> also
>>>> saving atomic operation during __update_and_free_hugetlb_folio too.
>>>>
>>>> Rename some functions to make them more self-explanatory,
>>>>
>>>>     folio_alloc_gigantic            -> folio_alloc_frozen_gigantic
>>>>     cma_{alloc,free}_folio          -> cma_{alloc,free}_frozen_folio
>>>>     hugetlb_cma_{alloc,free}_folio  -> hugetlb_cma_{alloc,free}
>>>> _frozen_folio
>>>
>>> Can we just get rid of folio_alloc_frozen_gigantic?
>>>
>>
>> OK, we could kill it.
>>
>>> Further, can we just get rid of cma_{alloc,free}_frozen_folio() as well
>>> and just let hugetlb use alloc_contig_range_frozen() etc?
>>
>> HugeTLB can allocate folio by alloc_contig_frozen_pages() directly, but
>> it could allocate from hugetlb_cma, cma_alloc_folio() need change some
>> cma metadata, so we need to keep it.
> 
> Hm. Assuming we just have cma_alloc_frozen() -- again, probably what 
> cma_alloc() would look like in the future, hugetlb can just construct a 
> folio out of that.

I get your point，firstly, we could convert to use cma_alloc_frozen()
instead of cma_alloc_folio() in hugetlb_cma_alloc_folio().

> 
> Maybe we just want a helper to create a folio out of a given page range?
> 
> And that page range is either obtained through cma_alloc_frozen() or 
> alloc_contig_frozen_pages().
> 
> Just a thought, keeping in mind that these things should probably just 
> work with frozen pages and let allcoating of a memdesc etc. be taken 
> care of someone else.
> 
> I'd be happy if we can remove the GFP_COMPOUND parameter from 
> alloc_contig*.

But not sure about this part,  GFP_COMPOUND for alloc_contig* is
introduced by commit e98337d11bbd "mm/contig_alloc: support __GFP_COMP",
if we still allocate a range of order-0 pages and create a folio
outside, it will slow the large folio allocation.


> 
> @Willy, what's your take?
> 



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 1/4] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page()
  2025-09-11  6:56 ` [PATCH 1/4] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
@ 2025-09-12  6:58   ` Kefeng Wang
  0 siblings, 0 replies; 15+ messages in thread
From: Kefeng Wang @ 2025-09-12  6:58 UTC (permalink / raw)
  To: 20250910133958.301467-1-wangkefeng.wang, Andrew Morton,
	David Hildenbrand, Oscar Salvador, Muchun Song, Zi Yan,
	Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm



On 2025/9/11 14:56, Kefeng Wang wrote:
> Add a new helper to free huge page to be consistency to
> debug_vm_pgtable_alloc_huge_page(), also move the
> free_contig_range() under CONFIG_ALLOC_CONTIG since all
> caller are built with CONFIG_ALLOC_CONTIG.
> 
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>   include/linux/gfp.h   |  2 +-
>   mm/debug_vm_pgtable.c | 35 ++++++++++++++++-------------------
>   mm/page_alloc.c       |  2 +-
>   3 files changed, 18 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 5ebf26fcdcfa..651acd42256f 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -437,8 +437,8 @@ extern struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_
>   					      int nid, nodemask_t *nodemask);
>   #define alloc_contig_pages(...)			alloc_hooks(alloc_contig_pages_noprof(__VA_ARGS__))
>   
> -#endif
>   void free_contig_range(unsigned long pfn, unsigned long nr_pages);
> +#endif
>   
>   #ifdef CONFIG_CONTIG_ALLOC
>   static inline struct folio *folio_alloc_gigantic_noprof(int order, gfp_t gfp,
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 830107b6dd08..767321a8b26c 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -946,22 +946,26 @@ static unsigned long __init get_random_vaddr(void)
>   	return random_vaddr;
>   }
>   
> -static void __init destroy_args(struct pgtable_debug_args *args)
> +static void __init
> +debug_vm_pgtable_free_huge_page(struct pgtable_debug_args *args,
> +		unsigned long pfn, int order)
>   {
> -	struct page *page = NULL;
> +#ifdef CONFIG_CONTIG_ALLOC
> +	if (args->is_contiguous_page) {
> +		free_contig_range(pfn, 1 << order);
> +		return;
> +	}
> +#endif
> +	__free_pages(pfn_to_page(pfn), order);
> +}
>   
> +static void __init destroy_args(struct pgtable_debug_args *args)
> +{
>   	/* Free (huge) page */
>   	if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
>   	    has_transparent_pud_hugepage() &&
>   	    args->pud_pfn != ULONG_MAX) {
> -		if (args->is_contiguous_page) {
> -			free_contig_range(args->pud_pfn,
> -					  (1 << (HPAGE_PUD_SHIFT - PAGE_SHIFT)));
> -		} else {
> -			page = pfn_to_page(args->pud_pfn);
> -			__free_pages(page, HPAGE_PUD_SHIFT - PAGE_SHIFT);
> -		}
> -
> +		debug_vm_pgtable_free_huge_page(args, args->pud_pfn, HPAGE_PMD_ORDER);

Should be HPAGE_PUD_ORDER, will fix it.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-12  6:57         ` Kefeng Wang
@ 2025-09-12  7:18           ` David Hildenbrand
  2025-09-12  7:23             ` David Hildenbrand
  0 siblings, 1 reply; 15+ messages in thread
From: David Hildenbrand @ 2025-09-12  7:18 UTC (permalink / raw)
  To: Kefeng Wang, 20250910133958.301467-1-wangkefeng.wang,
	Andrew Morton, Oscar Salvador, Muchun Song, Zi Yan,
	Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm

On 12.09.25 08:57, Kefeng Wang wrote:
> 
> 
> On 2025/9/12 2:56, David Hildenbrand wrote:
>> On 11.09.25 11:11, Kefeng Wang wrote:
>>>
>>>
>>> On 2025/9/11 16:25, David Hildenbrand wrote:
>>>> On 11.09.25 08:56, Kefeng Wang wrote:
>>>>> The alloc_gigantic_folio() allocates a folio by alloc_contig_range()
>>>>> with refcount increated and then freeze it, convert to allocate a
>>>>> frozen
>>>>> folio directly to remove the atomic operation about folio refcount,
>>>>> also
>>>>> saving atomic operation during __update_and_free_hugetlb_folio too.
>>>>>
>>>>> Rename some functions to make them more self-explanatory,
>>>>>
>>>>>      folio_alloc_gigantic            -> folio_alloc_frozen_gigantic
>>>>>      cma_{alloc,free}_folio          -> cma_{alloc,free}_frozen_folio
>>>>>      hugetlb_cma_{alloc,free}_folio  -> hugetlb_cma_{alloc,free}
>>>>> _frozen_folio
>>>>
>>>> Can we just get rid of folio_alloc_frozen_gigantic?
>>>>
>>>
>>> OK, we could kill it.
>>>
>>>> Further, can we just get rid of cma_{alloc,free}_frozen_folio() as well
>>>> and just let hugetlb use alloc_contig_range_frozen() etc?
>>>
>>> HugeTLB can allocate folio by alloc_contig_frozen_pages() directly, but
>>> it could allocate from hugetlb_cma, cma_alloc_folio() need change some
>>> cma metadata, so we need to keep it.
>>
>> Hm. Assuming we just have cma_alloc_frozen() -- again, probably what
>> cma_alloc() would look like in the future, hugetlb can just construct a
>> folio out of that.
> 
> I get your point，firstly, we could convert to use cma_alloc_frozen()
> instead of cma_alloc_folio() in hugetlb_cma_alloc_folio().
> 
>>
>> Maybe we just want a helper to create a folio out of a given page range?
>>
>> And that page range is either obtained through cma_alloc_frozen() or
>> alloc_contig_frozen_pages().
>>
>> Just a thought, keeping in mind that these things should probably just
>> work with frozen pages and let allcoating of a memdesc etc. be taken
>> care of someone else.
>>
>> I'd be happy if we can remove the GFP_COMPOUND parameter from
>> alloc_contig*.
> 
> But not sure about this part,  GFP_COMPOUND for alloc_contig* is
> introduced by commit e98337d11bbd "mm/contig_alloc: support __GFP_COMP",
> if we still allocate a range of order-0 pages and create a folio
> outside, it will slow the large folio allocation.

Assuming we leave the refcount untouched (frozen), I guess what's left is

a) Calling post_alloc_hook() on each free buddy chunk we isolated
b) Splitting all pages to order 0

Splitting is updating the page owner + alloc tag + memcg, and currently 
still updating the refcount.


I would assume that most of the overhead came from the atomics when 
updating the refcount in split_page, which we would optimize out.

     Perf profile before:
       Alloc
         - 99.99% alloc_pool_huge_folio
            - __alloc_fresh_hugetlb_folio
               - 83.23% alloc_contig_pages_noprof
                  - 47.46% alloc_contig_range_noprof
                     - 20.96% isolate_freepages_range
                          16.10% split_page
                     - 14.10% start_isolate_page_range
                     - 12.02% undo_isolate_page_range

Would be interesting trying to see how much overhead would remain when 
just dealing

OTOH, maybe we can leave GFP_COMPOUND support in but make the function
more generic, not limited to folios (I suspect many users will not want 
folios, except hugetlb).

Maybe just a

struct page * cma_alloc_compound(struct cma *cma, unsigned int order, 
unsigned int align, bool no_warn);

That would allocate a frozen compoud page starting today already.

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-12  7:18           ` David Hildenbrand
@ 2025-09-12  7:23             ` David Hildenbrand
  2025-09-12  9:12               ` Kefeng Wang
  0 siblings, 1 reply; 15+ messages in thread
From: David Hildenbrand @ 2025-09-12  7:23 UTC (permalink / raw)
  To: Kefeng Wang, 20250910133958.301467-1-wangkefeng.wang,
	Andrew Morton, Oscar Salvador, Muchun Song, Zi Yan,
	Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm

On 12.09.25 09:18, David Hildenbrand wrote:
> On 12.09.25 08:57, Kefeng Wang wrote:
>>
>>
>> On 2025/9/12 2:56, David Hildenbrand wrote:
>>> On 11.09.25 11:11, Kefeng Wang wrote:
>>>>
>>>>
>>>> On 2025/9/11 16:25, David Hildenbrand wrote:
>>>>> On 11.09.25 08:56, Kefeng Wang wrote:
>>>>>> The alloc_gigantic_folio() allocates a folio by alloc_contig_range()
>>>>>> with refcount increated and then freeze it, convert to allocate a
>>>>>> frozen
>>>>>> folio directly to remove the atomic operation about folio refcount,
>>>>>> also
>>>>>> saving atomic operation during __update_and_free_hugetlb_folio too.
>>>>>>
>>>>>> Rename some functions to make them more self-explanatory,
>>>>>>
>>>>>>       folio_alloc_gigantic            -> folio_alloc_frozen_gigantic
>>>>>>       cma_{alloc,free}_folio          -> cma_{alloc,free}_frozen_folio
>>>>>>       hugetlb_cma_{alloc,free}_folio  -> hugetlb_cma_{alloc,free}
>>>>>> _frozen_folio
>>>>>
>>>>> Can we just get rid of folio_alloc_frozen_gigantic?
>>>>>
>>>>
>>>> OK, we could kill it.
>>>>
>>>>> Further, can we just get rid of cma_{alloc,free}_frozen_folio() as well
>>>>> and just let hugetlb use alloc_contig_range_frozen() etc?
>>>>
>>>> HugeTLB can allocate folio by alloc_contig_frozen_pages() directly, but
>>>> it could allocate from hugetlb_cma, cma_alloc_folio() need change some
>>>> cma metadata, so we need to keep it.
>>>
>>> Hm. Assuming we just have cma_alloc_frozen() -- again, probably what
>>> cma_alloc() would look like in the future, hugetlb can just construct a
>>> folio out of that.
>>
>> I get your point，firstly, we could convert to use cma_alloc_frozen()
>> instead of cma_alloc_folio() in hugetlb_cma_alloc_folio().
>>
>>>
>>> Maybe we just want a helper to create a folio out of a given page range?
>>>
>>> And that page range is either obtained through cma_alloc_frozen() or
>>> alloc_contig_frozen_pages().
>>>
>>> Just a thought, keeping in mind that these things should probably just
>>> work with frozen pages and let allcoating of a memdesc etc. be taken
>>> care of someone else.
>>>
>>> I'd be happy if we can remove the GFP_COMPOUND parameter from
>>> alloc_contig*.
>>
>> But not sure about this part,  GFP_COMPOUND for alloc_contig* is
>> introduced by commit e98337d11bbd "mm/contig_alloc: support __GFP_COMP",
>> if we still allocate a range of order-0 pages and create a folio
>> outside, it will slow the large folio allocation.
> 
> Assuming we leave the refcount untouched (frozen), I guess what's left is
> 
> a) Calling post_alloc_hook() on each free buddy chunk we isolated
> b) Splitting all pages to order 0
> 
> Splitting is updating the page owner + alloc tag + memcg, and currently
> still updating the refcount.
> 
> 
> I would assume that most of the overhead came from the atomics when
> updating the refcount in split_page, which we would optimize out.
> 
>       Perf profile before:
>         Alloc
>           - 99.99% alloc_pool_huge_folio
>              - __alloc_fresh_hugetlb_folio
>                 - 83.23% alloc_contig_pages_noprof
>                    - 47.46% alloc_contig_range_noprof
>                       - 20.96% isolate_freepages_range
>                            16.10% split_page
>                       - 14.10% start_isolate_page_range
>                       - 12.02% undo_isolate_page_range
> 
> Would be interesting trying to see how much overhead would remain when
> just dealing
> 
> OTOH, maybe we can leave GFP_COMPOUND support in but make the function
> more generic, not limited to folios (I suspect many users will not want
> folios, except hugetlb).
> 
> Maybe just a
> 
> struct page * cma_alloc_compound(struct cma *cma, unsigned int order,
> unsigned int align, bool no_warn);

^ no need for the align as I realized, just like
cma_alloc_folio() doesn't have.

I do wonder why we decided to allow cma_alloc_folio() to consume gfp_t 
flags when we don't do the same for cma_alloc().

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-12  7:23             ` David Hildenbrand
@ 2025-09-12  9:12               ` Kefeng Wang
  2025-09-12 18:07                 ` David Hildenbrand
  0 siblings, 1 reply; 15+ messages in thread
From: Kefeng Wang @ 2025-09-12  9:12 UTC (permalink / raw)
  To: David Hildenbrand, Andrew Morton, Oscar Salvador, Muchun Song,
	Zi Yan, Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm



On 2025/9/12 15:23, David Hildenbrand wrote:
> On 12.09.25 09:18, David Hildenbrand wrote:
>> On 12.09.25 08:57, Kefeng Wang wrote:
>>>
>>>
>>> On 2025/9/12 2:56, David Hildenbrand wrote:
>>>> On 11.09.25 11:11, Kefeng Wang wrote:
>>>>>
>>>>>
>>>>> On 2025/9/11 16:25, David Hildenbrand wrote:
>>>>>> On 11.09.25 08:56, Kefeng Wang wrote:
>>>>>>> The alloc_gigantic_folio() allocates a folio by alloc_contig_range()
>>>>>>> with refcount increated and then freeze it, convert to allocate a
>>>>>>> frozen
>>>>>>> folio directly to remove the atomic operation about folio refcount,
>>>>>>> also
>>>>>>> saving atomic operation during __update_and_free_hugetlb_folio too.
>>>>>>>
>>>>>>> Rename some functions to make them more self-explanatory,
>>>>>>>
>>>>>>>       folio_alloc_gigantic            -> folio_alloc_frozen_gigantic
>>>>>>>       cma_{alloc,free}_folio          -> cma_{alloc,free} 
>>>>>>> _frozen_folio
>>>>>>>       hugetlb_cma_{alloc,free}_folio  -> hugetlb_cma_{alloc,free}
>>>>>>> _frozen_folio
>>>>>>
>>>>>> Can we just get rid of folio_alloc_frozen_gigantic?
>>>>>>
>>>>>
>>>>> OK, we could kill it.
>>>>>
>>>>>> Further, can we just get rid of cma_{alloc,free}_frozen_folio() as 
>>>>>> well
>>>>>> and just let hugetlb use alloc_contig_range_frozen() etc?
>>>>>
>>>>> HugeTLB can allocate folio by alloc_contig_frozen_pages() directly, 
>>>>> but
>>>>> it could allocate from hugetlb_cma, cma_alloc_folio() need change some
>>>>> cma metadata, so we need to keep it.
>>>>
>>>> Hm. Assuming we just have cma_alloc_frozen() -- again, probably what
>>>> cma_alloc() would look like in the future, hugetlb can just construct a
>>>> folio out of that.
>>>
>>> I get your point，firstly, we could convert to use cma_alloc_frozen()
>>> instead of cma_alloc_folio() in hugetlb_cma_alloc_folio().
>>>
>>>>
>>>> Maybe we just want a helper to create a folio out of a given page 
>>>> range?
>>>>
>>>> And that page range is either obtained through cma_alloc_frozen() or
>>>> alloc_contig_frozen_pages().
>>>>
>>>> Just a thought, keeping in mind that these things should probably just
>>>> work with frozen pages and let allcoating of a memdesc etc. be taken
>>>> care of someone else.
>>>>
>>>> I'd be happy if we can remove the GFP_COMPOUND parameter from
>>>> alloc_contig*.
>>>
>>> But not sure about this part,  GFP_COMPOUND for alloc_contig* is
>>> introduced by commit e98337d11bbd "mm/contig_alloc: support __GFP_COMP",
>>> if we still allocate a range of order-0 pages and create a folio
>>> outside, it will slow the large folio allocation.
>>
>> Assuming we leave the refcount untouched (frozen), I guess what's left is
>>
>> a) Calling post_alloc_hook() on each free buddy chunk we isolated
>> b) Splitting all pages to order 0
>>
>> Splitting is updating the page owner + alloc tag + memcg, and currently
>> still updating the refcount.
>>
>>
>> I would assume that most of the overhead came from the atomics when
>> updating the refcount in split_page, which we would optimize out.
>>
>>       Perf profile before:
>>         Alloc
>>           - 99.99% alloc_pool_huge_folio
>>              - __alloc_fresh_hugetlb_folio
>>                 - 83.23% alloc_contig_pages_noprof
>>                    - 47.46% alloc_contig_range_noprof
>>                       - 20.96% isolate_freepages_range
>>                            16.10% split_page
>>                       - 14.10% start_isolate_page_range
>>                       - 12.02% undo_isolate_page_range
>>
>> Would be interesting trying to see how much overhead would remain when
>> just dealing

Patch2 skip atomic update in split_page() with alloc_contig_frozen_pages(),
I could test performance about old alloc_contig_pages() with/without
GFP_COMP and new alloc_contig_frozen_pages() when allocate same size.

>>
>> OTOH, maybe we can leave GFP_COMPOUND support in but make the function
>> more generic, not limited to folios (I suspect many users will not want
>> folios, except hugetlb).
>>

Maybe just only add cma_alloc_frozen(), and 
cma_alloc()/hugetlb_cma_alloc_folio()
is the wrapper that calls it and set page refcount, like what we did in
other frozen allocation.

struct page *cma_alloc_frozen(struct cma *cma, unsigned long count,
				unsigned int align, gfp_t gfp);

>> Maybe just a
>>
>> struct page * cma_alloc_compound(struct cma *cma, unsigned int order,
>> unsigned int align, bool no_warn);
> 
> ^ no need for the align as I realized, just like
> cma_alloc_folio() doesn't have.

since cma_alloc_frozen is more generic, we need keep align,

> 
> I do wonder why we decided to allow cma_alloc_folio() to consume gfp_t 
> flags when we don't do the same for cma_alloc().
> 

cma_alloc_folio() now is called by hugetlb allocation, the gfp could be
htlb_alloc_mask(), with/without __GFP_THISNODE/__GFP_RETRY_MAYFAIL...
and this could be used in alloc_contig_frozen_pages(),(eg,
gfp_zone).

For cma_alloc() unconditional use GFP_KERNEL from commit
6518202970c1 "mm/cma: remove unsupported gfp_mask parameter from
cma_alloc()", but this is another story.

Back to this patchset, just add a new cma_alloc_frozen() shown
above and directly call it in cma_alloc()/hugetlb_cma_alloc_folio(),
get rid of folio_alloc_frozen_gigantic() and cma_alloc_folio(), and
we could do more optimization in the next step.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-12  9:12               ` Kefeng Wang
@ 2025-09-12 18:07                 ` David Hildenbrand
  2025-09-13  4:13                   ` Kefeng Wang
  0 siblings, 1 reply; 15+ messages in thread
From: David Hildenbrand @ 2025-09-12 18:07 UTC (permalink / raw)
  To: Kefeng Wang, Andrew Morton, Oscar Salvador, Muchun Song, Zi Yan,
	Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm


>>>
>>> Would be interesting trying to see how much overhead would remain when
>>> just dealing
> 
> Patch2 skip atomic update in split_page() with alloc_contig_frozen_pages(),
> I could test performance about old alloc_contig_pages() with/without
> GFP_COMP and new alloc_contig_frozen_pages() when allocate same size.

Yes, that would be very interesting.

> 
>>>
>>> OTOH, maybe we can leave GFP_COMPOUND support in but make the function
>>> more generic, not limited to folios (I suspect many users will not want
>>> folios, except hugetlb).
>>>
> 
> Maybe just only add cma_alloc_frozen(), and
> cma_alloc()/hugetlb_cma_alloc_folio()
> is the wrapper that calls it and set page refcount, like what we did in
> other frozen allocation.
> 
> struct page *cma_alloc_frozen(struct cma *cma, unsigned long count,
> 				unsigned int align, gfp_t gfp);

I think it's weird that cma_alloc_frozen() consumes gfp_t when 
cma_alloc() doesn't. So I would wish that we can clean that up (-> 
remove gfp if possible).

So maybe we really want a

cma_alloc_frozen

and

cma_alloc_frozen_compound

whereby the latter consumes an "order" instead of "count + align".

> 
>>> Maybe just a
>>>
>>> struct page * cma_alloc_compound(struct cma *cma, unsigned int order,
>>> unsigned int align, bool no_warn);
>>
>> ^ no need for the align as I realized, just like
>> cma_alloc_folio() doesn't have.
> 
> since cma_alloc_frozen is more generic, we need keep align,
> 
>>
>> I do wonder why we decided to allow cma_alloc_folio() to consume gfp_t
>> flags when we don't do the same for cma_alloc().
>>
> 
> cma_alloc_folio() now is called by hugetlb allocation, the gfp could be
> htlb_alloc_mask(), with/without __GFP_THISNODE/__GFP_RETRY_MAYFAIL...
> and this could be used in alloc_contig_frozen_pages(),(eg,
> gfp_zone).

Take a look at alloc_contig_range_noprof() where I added

	__alloc_contig_verify_gfp_mask()

Essentially, we ignore

	GFP_ZONEMASK | __GFP_RECLAIMABLE | __GFP_WRITE | __GFP_HARDWALL
        | __GFP_THISNODE | __GFP_MOVABLE

And we *always* set __GFP_RETRY_MAYFAIL

I'd argue that hugetlb passing in random parameters that don't really 
make sense for contig allcoations is rather an issue and makes you 
believe that they would have any effect.

If cma_alloc doesn't provide them a cma_alloc_frozen or cma_alloc_folio 
shouldn't provide them.

> 
> For cma_alloc() unconditional use GFP_KERNEL from commit
> 6518202970c1 "mm/cma: remove unsupported gfp_mask parameter from
> cma_alloc()", but this is another story.
> 
> Back to this patchset, just add a new cma_alloc_frozen() shown
> above and directly call it in cma_alloc()/hugetlb_cma_alloc_folio(),
> get rid of folio_alloc_frozen_gigantic() and cma_alloc_folio(), and
> we could do more optimization in the next step.

Yeah, that's probably a good first step, but while at it I would hope we 
could just cleanup the gfp stuff and provide a more consistent interface.

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio()
  2025-09-12 18:07                 ` David Hildenbrand
@ 2025-09-13  4:13                   ` Kefeng Wang
  0 siblings, 0 replies; 15+ messages in thread
From: Kefeng Wang @ 2025-09-13  4:13 UTC (permalink / raw)
  To: David Hildenbrand, Andrew Morton, Oscar Salvador, Muchun Song,
	Zi Yan, Matthew Wilcox
  Cc: sidhartha.kumar, jane.chu, Vlastimil Babka, Brendan Jackman,
	Johannes Weiner, linux-mm



On 2025/9/13 2:07, David Hildenbrand wrote:
> 
>>>>
>>>> Would be interesting trying to see how much overhead would remain when
>>>> just dealing
>>
>> Patch2 skip atomic update in split_page() with 
>> alloc_contig_frozen_pages(),
>> I could test performance about old alloc_contig_pages() with/without
>> GFP_COMP and new alloc_contig_frozen_pages() when allocate same size.
> 
> Yes, that would be very interesting.


TEST1： alloc_contig_frozen_pages（split without atomic） + GFP_KERNEL
TEST2： alloc_contig_pages（split with atomic） + GFP_KERNEL
TEST3： alloc_contig_frozen_pages（split without atomic） + 
GFP_KERNEL|__GFP_COMP
TEST4： alloc_contig_pages（split with atomic） + GFP_KERNEL|__GFP_COMP

Alloc/free 10 * 1G, the result shows as usec

        TEST1  TEST2   TEST3   TEST4
alloc
       301024  296947  286681  288125
       297099  297194  286810  289906
       297134  300445  289950  288393
				
free				
       215014  215332  16730   16613
       214730  214948  16516   16588
       215727  214945  16589   16507

The perf profile show below,

split_free_frozen_pages()'s proportion is lower than split_free_pages，
but the total time is not reduced.

TEST1 alloc:
- sysctl_test_sysctl_handler.part.0
    - 96.92% alloc_contig_frozen_pages_noprof
       - 82.69% pfn_range_valid_contig
            pfn_to_online_page
       - 13.97% alloc_contig_range_frozen_noprof
            4.39% replace_free_hugepage_folios
          - 4.34% split_free_frozen_pages
               __list_add_valid_or_report
          - 3.22% undo_isolate_page_range
             - 3.04% unset_migratetype_isolate
                - __move_freepages_block_isolate
                     2.96% __move_freepages_block
          - 0.86% __drain_all_pages
               drain_pages_zone
               free_pcppages_bulk
          - 0.59% start_isolate_page_range
             - 0.51% set_migratetype_isolate
                  __move_freepages_block_isolate

TEST2 alloc:
  - sysctl_test_sysctl_handler.part.0
    - 99.82% alloc_contig_pages_old_noprof
       - 81.59% pfn_range_valid_contig
            pfn_to_online_page
       - 18.16% alloc_contig_range_old_noprof.constprop.0
          - 8.10% split_free_pages
               4.72% __split_page
               1.38% __list_add_valid_or_report
            5.83% replace_free_hugepage_folios
          - 3.07% undo_isolate_page_range
             - 3.01% unset_migratetype_isolate
                - __move_freepages_block_isolate
                     2.88% __move_freepages_block

TEST3 alloc:
- sysctl_test_sysctl_handler.part.0
    - 99.85% alloc_contig_frozen_pages_noprof
       - 86.04% pfn_range_valid_contig
            pfn_to_online_page
       - 13.52% alloc_contig_range_frozen_noprof
            4.23% replace_free_hugepage_folios
          - 3.99% prep_new_page
               prep_compound_page
          - 3.86% undo_isolate_page_range
             - 3.43% unset_migratetype_isolate
                - __move_freepages_block_isolate
                     3.35% __move_freepages_block
            0.54% __alloc_contig_migrate_range 



> 
>>
>>>>
>>>> OTOH, maybe we can leave GFP_COMPOUND support in but make the function
>>>> more generic, not limited to folios (I suspect many users will not want
>>>> folios, except hugetlb).
>>>>
>>
>> Maybe just only add cma_alloc_frozen(), and
>> cma_alloc()/hugetlb_cma_alloc_folio()
>> is the wrapper that calls it and set page refcount, like what we did in
>> other frozen allocation.
>>
>> struct page *cma_alloc_frozen(struct cma *cma, unsigned long count,
>>                 unsigned int align, gfp_t gfp);
> 
> I think it's weird that cma_alloc_frozen() consumes gfp_t when 
> cma_alloc() doesn't. So I would wish that we can clean that up (-> 
> remove gfp if possible).
> 
> So maybe we really want a
> 
> cma_alloc_frozen
> 
> and
> 
> cma_alloc_frozen_compound
> 
> whereby the latter consumes an "order" instead of "count + align".
> 
>>
>>>> Maybe just a
>>>>
>>>> struct page * cma_alloc_compound(struct cma *cma, unsigned int order,
>>>> unsigned int align, bool no_warn);
>>>
>>> ^ no need for the align as I realized, just like
>>> cma_alloc_folio() doesn't have.
>>
>> since cma_alloc_frozen is more generic, we need keep align,
>>
>>>
>>> I do wonder why we decided to allow cma_alloc_folio() to consume gfp_t
>>> flags when we don't do the same for cma_alloc().
>>>
>>
>> cma_alloc_folio() now is called by hugetlb allocation, the gfp could be
>> htlb_alloc_mask(), with/without __GFP_THISNODE/__GFP_RETRY_MAYFAIL...
>> and this could be used in alloc_contig_frozen_pages(),(eg,
>> gfp_zone).
> 
> Take a look at alloc_contig_range_noprof() where I added
> 
>      __alloc_contig_verify_gfp_mask()
> 
> Essentially, we ignore
> 
>      GFP_ZONEMASK | __GFP_RECLAIMABLE | __GFP_WRITE | __GFP_HARDWALL
>         | __GFP_THISNODE | __GFP_MOVABLE
> 
> And we *always* set __GFP_RETRY_MAYFAIL
> 
> I'd argue that hugetlb passing in random parameters that don't really 
> make sense for contig allcoations is rather an issue and makes you 
> believe that they would have any effect.
> 
> If cma_alloc doesn't provide them a cma_alloc_frozen or cma_alloc_folio 
> shouldn't provide them.

Oh, I make a mistake, the cma_alloc_folio calls  alloc_contig_range
(not alloc_contig_pages) to  alloc page from special cma ranges, and
__GFP_THISNODE only needed by hugetlb_cma_alloc_folio(), so the gfp
flags is not useful for cma_alloc_frozen or cma_alloc_folio, let's
remove it.



> 
>>
>> For cma_alloc() unconditional use GFP_KERNEL from commit
>> 6518202970c1 "mm/cma: remove unsupported gfp_mask parameter from
>> cma_alloc()", but this is another story.
>>
>> Back to this patchset, just add a new cma_alloc_frozen() shown
>> above and directly call it in cma_alloc()/hugetlb_cma_alloc_folio(),
>> get rid of folio_alloc_frozen_gigantic() and cma_alloc_folio(), and
>> we could do more optimization in the next step.
> 
> Yeah, that's probably a good first step, but while at it I would hope we 
> could just cleanup the gfp stuff and provide a more consistent interface.
> 

So
struct page *cma_alloc_frozen(struct cma *cma, unsigned long count, 
unsigned int align, bool no_warn) in include/linux/cma.h

and
struct page *cma_alloc_frozen_compound(struct cma *cma, unsigned int 
order) in mm/internal.h since maybe only hugetlb use it.

or keep cma_alloc_folio() but remove gfp,
struct folio *cma_alloc_folio(struct cma *cma, int order, gfp_t gfp)

Any comments?

Thanks.







^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2025-09-13  4:13 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-11  6:56 [PATCH 0/4] mm: hugetlb: allocate frozen gigantic folio Kefeng Wang
2025-09-11  6:56 ` [PATCH 1/4] mm: debug_vm_pgtable: add debug_vm_pgtable_free_huge_page() Kefeng Wang
2025-09-12  6:58   ` Kefeng Wang
2025-09-11  6:56 ` [PATCH 2/4] mm: page_alloc: add alloc_contig_{range_frozen,frozen_pages}() Kefeng Wang
2025-09-11  6:56 ` [PATCH 3/4] mm: cma: add __cma_release() Kefeng Wang
2025-09-11  6:56 ` [PATCH 4/4] mm: hugetlb: allocate frozen pages in alloc_gigantic_folio() Kefeng Wang
2025-09-11  8:25   ` David Hildenbrand
2025-09-11  9:11     ` Kefeng Wang
2025-09-11 18:56       ` David Hildenbrand
2025-09-12  6:57         ` Kefeng Wang
2025-09-12  7:18           ` David Hildenbrand
2025-09-12  7:23             ` David Hildenbrand
2025-09-12  9:12               ` Kefeng Wang
2025-09-12 18:07                 ` David Hildenbrand
2025-09-13  4:13                   ` Kefeng Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox