[PATCH v3 25/28] mm/cma: introduce interface for early reservations

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Frank van der Linden <fvdl@google.com>
To: akpm@linux-foundation.org, muchun.song@linux.dev,
	linux-mm@kvack.org,  linux-kernel@vger.kernel.org
Cc: yuzhao@google.com, usamaarif642@gmail.com,
	joao.m.martins@oracle.com,  roman.gushchin@linux.dev,
	Frank van der Linden <fvdl@google.com>
Subject: [PATCH v3 25/28] mm/cma: introduce interface for early reservations
Date: Thu,  6 Feb 2025 18:51:05 +0000	[thread overview]
Message-ID: <20250206185109.1210657-26-fvdl@google.com> (raw)
In-Reply-To: <20250206185109.1210657-1-fvdl@google.com>

It can be desirable to reserve memory in a CMA area before
it is activated, early in boot. Such reservations would
effectively be memblock allocations, but they can be
returned to the CMA area later. This functionality can
be used to allow hugetlb bootmem allocations from a
hugetlb CMA area.

A new interface, cma_reserve_early is introduced. This allows
for pageblock-aligned reservations. These reservations are
skipped during the initial handoff of pages in a CMA area
to the buddy allocator. The caller is responsible for making
sure that the page structures are set up, and that the migrate
type is set correctly, as with other memblock allocations that
stick around. If the CMA area fails to activate (because it
intersects with multiple zones), the reserved memory is not
given to the buddy allocator, the caller needs to take care
of that.

Signed-off-by: Frank van der Linden <fvdl@google.com>
---
 mm/cma.c      | 83 ++++++++++++++++++++++++++++++++++++++++++++++-----
 mm/cma.h      |  8 +++++
 mm/internal.h | 16 ++++++++++
 mm/mm_init.c  |  9 ++++++
 4 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/mm/cma.c b/mm/cma.c
index 4388d941d381..34a4df29af72 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -144,9 +144,10 @@ bool cma_validate_zones(struct cma *cma)
 
 static void __init cma_activate_area(struct cma *cma)
 {
-	unsigned long pfn, base_pfn;
+	unsigned long pfn, end_pfn;
 	int allocrange, r;
 	struct cma_memrange *cmr;
+	unsigned long bitmap_count, count;
 
 	for (allocrange = 0; allocrange < cma->nranges; allocrange++) {
 		cmr = &cma->ranges[allocrange];
@@ -161,8 +162,13 @@ static void __init cma_activate_area(struct cma *cma)
 
 	for (r = 0; r < cma->nranges; r++) {
 		cmr = &cma->ranges[r];
-		base_pfn = cmr->base_pfn;
-		for (pfn = base_pfn; pfn < base_pfn + cmr->count;
+		if (cmr->early_pfn != cmr->base_pfn) {
+			count = cmr->early_pfn - cmr->base_pfn;
+			bitmap_count = cma_bitmap_pages_to_bits(cma, count);
+			bitmap_set(cmr->bitmap, 0, bitmap_count);
+		}
+
+		for (pfn = cmr->early_pfn; pfn < cmr->base_pfn + cmr->count;
 		     pfn += pageblock_nr_pages)
 			init_cma_reserved_pageblock(pfn_to_page(pfn));
 	}
@@ -173,6 +179,7 @@ static void __init cma_activate_area(struct cma *cma)
 	INIT_HLIST_HEAD(&cma->mem_head);
 	spin_lock_init(&cma->mem_head_lock);
 #endif
+	set_bit(CMA_ACTIVATED, &cma->flags);
 
 	return;
 
@@ -184,9 +191,8 @@ static void __init cma_activate_area(struct cma *cma)
 	if (!test_bit(CMA_RESERVE_PAGES_ON_ERROR, &cma->flags)) {
 		for (r = 0; r < allocrange; r++) {
 			cmr = &cma->ranges[r];
-			for (pfn = cmr->base_pfn;
-			     pfn < cmr->base_pfn + cmr->count;
-			     pfn++)
+			end_pfn = cmr->base_pfn + cmr->count;
+			for (pfn = cmr->early_pfn; pfn < end_pfn; pfn++)
 				free_reserved_page(pfn_to_page(pfn));
 		}
 	}
@@ -290,6 +296,7 @@ int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
 		return ret;
 
 	cma->ranges[0].base_pfn = PFN_DOWN(base);
+	cma->ranges[0].early_pfn = PFN_DOWN(base);
 	cma->ranges[0].count = cma->count;
 	cma->nranges = 1;
 	cma->nid = NUMA_NO_NODE;
@@ -509,6 +516,7 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size,
 		    nr, (u64)mlp->base, (u64)mlp->base + size);
 		cmrp = &cma->ranges[nr++];
 		cmrp->base_pfn = PHYS_PFN(mlp->base);
+		cmrp->early_pfn = cmrp->base_pfn;
 		cmrp->count = size >> PAGE_SHIFT;
 
 		sizeleft -= size;
@@ -540,7 +548,6 @@ int __init cma_declare_contiguous_multi(phys_addr_t total_size,
 		pr_info("Reserved %lu MiB in %d range%s\n",
 			(unsigned long)total_size / SZ_1M, nr,
 			nr > 1 ? "s" : "");
-
 	return ret;
 }
 
@@ -1034,3 +1041,65 @@ bool cma_intersects(struct cma *cma, unsigned long start, unsigned long end)
 
 	return false;
 }
+
+/*
+ * Very basic function to reserve memory from a CMA area that has not
+ * yet been activated. This is expected to be called early, when the
+ * system is single-threaded, so there is no locking. The alignment
+ * checking is restrictive - only pageblock-aligned areas
+ * (CMA_MIN_ALIGNMENT_BYTES) may be reserved through this function.
+ * This keeps things simple, and is enough for the current use case.
+ *
+ * The CMA bitmaps have not yet been allocated, so just start
+ * reserving from the bottom up, using a PFN to keep track
+ * of what has been reserved. Unreserving is not possible.
+ *
+ * The caller is responsible for initializing the page structures
+ * in the area properly, since this just points to memblock-allocated
+ * memory. The caller should subsequently use init_cma_pageblock to
+ * set the migrate type and CMA stats  the pageblocks that were reserved.
+ *
+ * If the CMA area fails to activate later, memory obtained through
+ * this interface is not handed to the page allocator, this is
+ * the responsibility of the caller (e.g. like normal memblock-allocated
+ * memory).
+ */
+void __init *cma_reserve_early(struct cma *cma, unsigned long size)
+{
+	int r;
+	struct cma_memrange *cmr;
+	unsigned long available;
+	void *ret = NULL;
+
+	if (!cma || !cma->count)
+		return NULL;
+	/*
+	 * Can only be called early in init.
+	 */
+	if (test_bit(CMA_ACTIVATED, &cma->flags))
+		return NULL;
+
+	if (!IS_ALIGNED(size, CMA_MIN_ALIGNMENT_BYTES))
+		return NULL;
+
+	if (!IS_ALIGNED(size, (PAGE_SIZE << cma->order_per_bit)))
+		return NULL;
+
+	size >>= PAGE_SHIFT;
+
+	if (size > cma->available_count)
+		return NULL;
+
+	for (r = 0; r < cma->nranges; r++) {
+		cmr = &cma->ranges[r];
+		available = cmr->count - (cmr->early_pfn - cmr->base_pfn);
+		if (size <= available) {
+			ret = phys_to_virt(PFN_PHYS(cmr->early_pfn));
+			cmr->early_pfn += size;
+			cma->available_count -= size;
+			return ret;
+		}
+	}
+
+	return ret;
+}
diff --git a/mm/cma.h b/mm/cma.h
index bddc84b3cd96..df7fc623b7a6 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -16,9 +16,16 @@ struct cma_kobject {
  * and the total amount of memory requested, while smaller than the total
  * amount of memory available, is large enough that it doesn't fit in a
  * single physical memory range because of memory holes.
+ *
+ * Fields:
+ *   @base_pfn: physical address of range
+ *   @early_pfn: first PFN not reserved through cma_reserve_early
+ *   @count: size of range
+ *   @bitmap: bitmap of allocated (1 << order_per_bit)-sized chunks.
  */
 struct cma_memrange {
 	unsigned long base_pfn;
+	unsigned long early_pfn;
 	unsigned long count;
 	unsigned long *bitmap;
 #ifdef CONFIG_CMA_DEBUGFS
@@ -58,6 +65,7 @@ enum cma_flags {
 	CMA_RESERVE_PAGES_ON_ERROR,
 	CMA_ZONES_VALID,
 	CMA_ZONES_INVALID,
+	CMA_ACTIVATED,
 };
 
 extern struct cma cma_areas[MAX_CMA_AREAS];
diff --git a/mm/internal.h b/mm/internal.h
index 63fda9bb9426..8318c8e6e589 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -848,6 +848,22 @@ void init_cma_reserved_pageblock(struct page *page);
 
 #endif /* CONFIG_COMPACTION || CONFIG_CMA */
 
+struct cma;
+
+#ifdef CONFIG_CMA
+void *cma_reserve_early(struct cma *cma, unsigned long size);
+void init_cma_pageblock(struct page *page);
+#else
+static inline void *cma_reserve_early(struct cma *cma, unsigned long size)
+{
+	return NULL;
+}
+static inline void init_cma_pageblock(struct page *page)
+{
+}
+#endif
+
+
 int find_suitable_fallback(struct free_area *area, unsigned int order,
 			int migratetype, bool only_stealable, bool *can_steal);
 
diff --git a/mm/mm_init.c b/mm/mm_init.c
index f7d5b4fe1ae9..f31260fd393e 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2263,6 +2263,15 @@ void __init init_cma_reserved_pageblock(struct page *page)
 	adjust_managed_page_count(page, pageblock_nr_pages);
 	page_zone(page)->cma_pages += pageblock_nr_pages;
 }
+/*
+ * Similar to above, but only set the migrate type and stats.
+ */
+void __init init_cma_pageblock(struct page *page)
+{
+	set_pageblock_migratetype(page, MIGRATE_CMA);
+	adjust_managed_page_count(page, pageblock_nr_pages);
+	page_zone(page)->cma_pages += pageblock_nr_pages;
+}
 #endif
 
 void set_zone_contiguous(struct zone *zone)
-- 
2.48.1.502.g6dc24dfdaf-goog

next prev parent reply	other threads:[~2025-02-06 18:52 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-06 18:50 [PATCH v3 00/28] hugetlb/CMA improvements for large systems Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 01/28] mm/cma: export total and free number of pages for CMA areas Frank van der Linden
2025-02-10 10:22   ` Oscar Salvador
2025-02-10 18:18     ` Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 02/28] mm, cma: support multiple contiguous ranges, if requested Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 03/28] mm/cma: introduce cma_intersects function Frank van der Linden
2025-02-14 10:02   ` Alexander Gordeev
2025-02-06 18:50 ` [PATCH v3 04/28] mm, hugetlb: use cma_declare_contiguous_multi Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 05/28] mm/hugetlb: fix round-robin bootmem allocation Frank van der Linden
2025-02-10 12:57   ` Oscar Salvador
2025-02-10 18:30     ` Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 06/28] mm/hugetlb: remove redundant __ClearPageReserved Frank van der Linden
2025-02-10 13:14   ` Oscar Salvador
2025-02-06 18:50 ` [PATCH v3 07/28] mm/hugetlb: use online nodes for bootmem allocation Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 08/28] mm/hugetlb: convert cmdline parameters from setup to early Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 09/28] x86/mm: make register_page_bootmem_memmap handle PTE mappings Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 10/28] mm/bootmem_info: export register_page_bootmem_memmap Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 11/28] mm/sparse: allow for alternate vmemmap section init at boot Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 12/28] mm/hugetlb: set migratetype for bootmem folios Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 13/28] mm: define __init_reserved_page_zone function Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 14/28] mm/hugetlb: check bootmem pages for zone intersections Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 15/28] mm/sparse: add vmemmap_*_hvo functions Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 16/28] mm/hugetlb: deal with multiple calls to hugetlb_bootmem_alloc Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 17/28] mm/hugetlb: move huge_boot_pages list init " Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 18/28] mm/hugetlb: add pre-HVO framework Frank van der Linden
2025-02-06 18:50 ` [PATCH v3 19/28] mm/hugetlb_vmemmap: fix hugetlb_vmemmap_restore_folios definition Frank van der Linden
2025-02-06 18:51 ` [PATCH v3 20/28] mm/hugetlb: do pre-HVO for bootmem allocated pages Frank van der Linden
2025-02-06 18:51 ` [PATCH v3 21/28] x86/setup: call hugetlb_bootmem_alloc early Frank van der Linden
2025-02-06 18:51 ` [PATCH v3 22/28] x86/mm: set ARCH_WANT_SPARSEMEM_VMEMMAP_PREINIT Frank van der Linden
2025-02-06 18:51 ` [PATCH v3 23/28] mm/cma: simplify zone intersection check Frank van der Linden
2025-02-06 18:51 ` [PATCH v3 24/28] mm/cma: introduce a cma validate function Frank van der Linden
2025-02-06 18:51 ` Frank van der Linden [this message]
2025-02-06 18:51 ` [PATCH v3 26/28] mm/hugetlb: add hugetlb_cma_only cmdline option Frank van der Linden
2025-02-06 18:51 ` [PATCH v3 27/28] mm/hugetlb: enable bootmem allocation from CMA areas Frank van der Linden
2025-02-06 18:51 ` [PATCH v3 28/28] mm/hugetlb: move hugetlb CMA code in to its own file Frank van der Linden
2025-02-10 18:39 ` [PATCH v3 00/28] hugetlb/CMA improvements for large systems Oscar Salvador
2025-02-10 18:56   ` Frank van der Linden
2025-02-10 23:28     ` Andrew Morton
2025-02-11 17:21       ` Frank van der Linden

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250206185109.1210657-26-fvdl@google.com \
    --to=fvdl@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=joao.m.martins@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=usamaarif642@gmail.com \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox