linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Frank van der Linden <fvdl@google.com>
To: akpm@linux-foundation.org, muchun.song@linux.dev,
	linux-mm@kvack.org,  linux-kernel@vger.kernel.org
Cc: yuzhao@google.com, usamaarif642@gmail.com,
	joao.m.martins@oracle.com,  roman.gushchin@linux.dev,
	Frank van der Linden <fvdl@google.com>
Subject: [PATCH v2 18/28] mm/hugetlb: add pre-HVO framework
Date: Wed, 29 Jan 2025 22:41:47 +0000	[thread overview]
Message-ID: <20250129224157.2046079-19-fvdl@google.com> (raw)
In-Reply-To: <20250129224157.2046079-1-fvdl@google.com>

Define flags for pre-HVOed bootmem hugetlb pages, and act on them.

The most important flag is the HVO flag, signalling that a bootmem
allocated gigantic page has already been HVO-ed. If this flag is
seen by the hugetlb bootmem gather code, the page is marked
as HVO optimized. The HVO code will then not try to optimize
it again. Instead, it will just map the tail page mirror pages
read-only, completing the HVO steps.

No functional change, as nothing sets the flags yet.

Signed-off-by: Frank van der Linden <fvdl@google.com>
---
 arch/powerpc/mm/hugetlbpage.c |  1 +
 include/linux/hugetlb.h       |  4 +++
 mm/hugetlb.c                  | 24 ++++++++++++++++-
 mm/hugetlb_vmemmap.c          | 50 +++++++++++++++++++++++++++++++++--
 mm/hugetlb_vmemmap.h          | 15 +++++++++++
 5 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 6b043180220a..d3c1b749dcfc 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -113,6 +113,7 @@ static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
 	gpage_freearray[nr_gpages] = 0;
 	list_add(&m->list, &huge_boot_pages[0]);
 	m->hstate = hstate;
+	m->flags = 0;
 	return 1;
 }
 
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5061279e5f73..10a7ce2b95e1 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -681,8 +681,12 @@ struct hstate {
 struct huge_bootmem_page {
 	struct list_head list;
 	struct hstate *hstate;
+	unsigned long flags;
 };
 
+#define HUGE_BOOTMEM_HVO		0x0001
+#define HUGE_BOOTMEM_ZONES_VALID	0x0002
+
 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
 int replace_free_hugepage_folios(unsigned long start_pfn, unsigned long end_pfn);
 struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7879e772c0d9..b48f8638c9af 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3220,6 +3220,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
 	INIT_LIST_HEAD(&m->list);
 	list_add(&m->list, &huge_boot_pages[node]);
 	m->hstate = h;
+	m->flags = 0;
 	return 1;
 }
 
@@ -3287,7 +3288,7 @@ static void __init prep_and_add_bootmem_folios(struct hstate *h,
 	struct folio *folio, *tmp_f;
 
 	/* Send list for bulk vmemmap optimization processing */
-	hugetlb_vmemmap_optimize_folios(h, folio_list);
+	hugetlb_vmemmap_optimize_bootmem_folios(h, folio_list);
 
 	list_for_each_entry_safe(folio, tmp_f, folio_list, lru) {
 		if (!folio_test_hugetlb_vmemmap_optimized(folio)) {
@@ -3316,6 +3317,13 @@ static bool __init hugetlb_bootmem_page_zones_valid(int nid,
 	unsigned long start_pfn;
 	bool valid;
 
+	if (m->flags & HUGE_BOOTMEM_ZONES_VALID) {
+		/*
+		 * Already validated, skip check.
+		 */
+		return true;
+	}
+
 	start_pfn = virt_to_phys(m) >> PAGE_SHIFT;
 
 	valid = !pfn_range_intersects_zones(nid, start_pfn,
@@ -3348,6 +3356,11 @@ static void __init hugetlb_bootmem_free_invalid_page(int nid, struct page *page,
 	}
 }
 
+static bool __init hugetlb_bootmem_page_prehvo(struct huge_bootmem_page *m)
+{
+	return (m->flags & HUGE_BOOTMEM_HVO);
+}
+
 /*
  * Put bootmem huge pages into the standard lists after mem_map is up.
  * Note: This only applies to gigantic (order > MAX_PAGE_ORDER) pages.
@@ -3388,6 +3401,15 @@ static void __init gather_bootmem_prealloc_node(unsigned long nid)
 		hugetlb_folio_init_vmemmap(folio, h,
 					   HUGETLB_VMEMMAP_RESERVE_PAGES);
 		init_new_hugetlb_folio(h, folio);
+
+		if (hugetlb_bootmem_page_prehvo(m))
+			/*
+			 * If pre-HVO was done, just set the
+			 * flag, the HVO code will then skip
+			 * this folio.
+			 */
+			folio_set_hugetlb_vmemmap_optimized(folio);
+
 		list_add(&folio->lru, &folio_list);
 
 		/*
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 5b484758f813..be6b33ecbc8e 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -649,14 +649,39 @@ static int hugetlb_vmemmap_split_folio(const struct hstate *h, struct folio *fol
 	return vmemmap_remap_split(vmemmap_start, vmemmap_end, vmemmap_reuse);
 }
 
-void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
+static void __hugetlb_vmemmap_optimize_folios(struct hstate *h,
+					      struct list_head *folio_list,
+					      bool boot)
 {
 	struct folio *folio;
+	int nr_to_optimize;
 	LIST_HEAD(vmemmap_pages);
 	unsigned long flags = VMEMMAP_REMAP_NO_TLB_FLUSH | VMEMMAP_SYNCHRONIZE_RCU;
 
+	nr_to_optimize = 0;
 	list_for_each_entry(folio, folio_list, lru) {
-		int ret = hugetlb_vmemmap_split_folio(h, folio);
+		int ret;
+		unsigned long spfn, epfn;
+
+		if (boot && folio_test_hugetlb_vmemmap_optimized(folio)) {
+			/*
+			 * Already optimized by pre-HVO, just map the
+			 * mirrored tail page structs RO.
+			 */
+			spfn = (unsigned long)&folio->page;
+			epfn = spfn + pages_per_huge_page(h);
+			vmemmap_wrprotect_hvo(spfn, epfn, folio_nid(folio),
+					HUGETLB_VMEMMAP_RESERVE_SIZE);
+			register_page_bootmem_memmap(pfn_to_section_nr(spfn),
+					&folio->page,
+					HUGETLB_VMEMMAP_RESERVE_SIZE);
+			static_branch_inc(&hugetlb_optimize_vmemmap_key);
+			continue;
+		}
+
+		nr_to_optimize++;
+
+		ret = hugetlb_vmemmap_split_folio(h, folio);
 
 		/*
 		 * Spliting the PMD requires allocating a page, thus lets fail
@@ -668,6 +693,16 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
 			break;
 	}
 
+	if (!nr_to_optimize)
+		/*
+		 * All pre-HVO folios, nothing left to do. It's ok if
+		 * there is a mix of pre-HVO and not yet HVO-ed folios
+		 * here, as __hugetlb_vmemmap_optimize_folio() will
+		 * skip any folios that already have the optimized flag
+		 * set, see vmemmap_should_optimize_folio().
+		 */
+		goto out;
+
 	flush_tlb_all();
 
 	list_for_each_entry(folio, folio_list, lru) {
@@ -693,10 +728,21 @@ void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_l
 		}
 	}
 
+out:
 	flush_tlb_all();
 	free_vmemmap_page_list(&vmemmap_pages);
 }
 
+void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list)
+{
+	__hugetlb_vmemmap_optimize_folios(h, folio_list, false);
+}
+
+void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list)
+{
+	__hugetlb_vmemmap_optimize_folios(h, folio_list, true);
+}
+
 static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
 	{
 		.procname	= "hugetlb_optimize_vmemmap",
diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
index 2fcae92d3359..a6354a27e63f 100644
--- a/mm/hugetlb_vmemmap.h
+++ b/mm/hugetlb_vmemmap.h
@@ -24,6 +24,8 @@ long hugetlb_vmemmap_restore_folios(const struct hstate *h,
 					struct list_head *non_hvo_folios);
 void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio);
 void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list);
+void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list);
+
 
 static inline unsigned int hugetlb_vmemmap_size(const struct hstate *h)
 {
@@ -64,6 +66,19 @@ static inline void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list
 {
 }
 
+static inline void hugetlb_vmemmap_init_early(int nid)
+{
+}
+
+static inline void hugetlb_vmemmap_init_late(int nid)
+{
+}
+
+static inline void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h,
+						struct list_head *folio_list)
+{
+}
+
 static inline unsigned int hugetlb_vmemmap_optimizable_size(const struct hstate *h)
 {
 	return 0;
-- 
2.48.1.262.g85cc9f2d1e-goog



  parent reply	other threads:[~2025-01-29 22:43 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-29 22:41 [PATCH v2 00/28] hugetlb/CMA improvements for large systems Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 01/28] mm/cma: export total and free number of pages for CMA areas Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 02/28] mm, cma: support multiple contiguous ranges, if requested Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 03/28] mm/cma: introduce cma_intersects function Frank van der Linden
2025-02-13 10:11   ` Alexander Gordeev
2025-01-29 22:41 ` [PATCH v2 04/28] mm, hugetlb: use cma_declare_contiguous_multi Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 05/28] mm/hugetlb: fix round-robin bootmem allocation Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 06/28] mm/hugetlb: remove redundant __ClearPageReserved Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 07/28] mm/hugetlb: use online nodes for bootmem allocation Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 08/28] mm/hugetlb: convert cmdline parameters from setup to early Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 09/28] x86/mm: make register_page_bootmem_memmap handle PTE mappings Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 10/28] mm/bootmem_info: export register_page_bootmem_memmap Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 11/28] mm/sparse: allow for alternate vmemmap section init at boot Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 12/28] mm/hugetlb: set migratetype for bootmem folios Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 13/28] mm: define __init_reserved_page_zone function Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 14/28] mm/hugetlb: check bootmem pages for zone intersections Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 15/28] mm/sparse: add vmemmap_*_hvo functions Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 16/28] mm/hugetlb: deal with multiple calls to hugetlb_bootmem_alloc Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 17/28] mm/hugetlb: move huge_boot_pages list init " Frank van der Linden
2025-01-29 22:41 ` Frank van der Linden [this message]
2025-01-29 22:41 ` [PATCH v2 19/28] mm/hugetlb_vmemmap: fix hugetlb_vmemmap_restore_folios definition Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 20/28] mm/hugetlb: do pre-HVO for bootmem allocated pages Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 21/28] x86/setup: call hugetlb_bootmem_alloc early Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 22/28] x86/mm: set ARCH_WANT_SPARSEMEM_VMEMMAP_PREINIT Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 23/28] mm/cma: simplify zone intersection check Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 24/28] mm/cma: introduce a cma validate function Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 25/28] mm/cma: introduce interface for early reservations Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 26/28] mm/hugetlb: add hugetlb_cma_only cmdline option Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 27/28] mm/hugetlb: enable bootmem allocation from CMA areas Frank van der Linden
2025-01-29 22:41 ` [PATCH v2 28/28] mm/hugetlb: move hugetlb CMA code in to its own file Frank van der Linden

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250129224157.2046079-19-fvdl@google.com \
    --to=fvdl@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=joao.m.martins@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=muchun.song@linux.dev \
    --cc=roman.gushchin@linux.dev \
    --cc=usamaarif642@gmail.com \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox