From: Michal Clapinski <mclapinski@google.com>
To: Evangelos Petrongonas <epetron@amazon.de>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
Mike Rapoport <rppt@kernel.org>,
Pratyush Yadav <pratyush@kernel.org>,
Alexander Graf <graf@amazon.com>,
Samiullah Khawaja <skhawaja@google.com>,
kexec@lists.infradead.org, linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org,
Andrew Morton <akpm@linux-foundation.org>,
Michal Clapinski <mclapinski@google.com>
Subject: [PATCH v6 1/2] kho: fix deferred init of kho scratch
Date: Wed, 11 Mar 2026 13:55:38 +0100 [thread overview]
Message-ID: <20260311125539.4123672-2-mclapinski@google.com> (raw)
In-Reply-To: <20260311125539.4123672-1-mclapinski@google.com>
Currently, if DEFERRED is enabled, kho_release_scratch will initialize
the struct pages and set migratetype of kho scratch. Unless the whole
scratch fit below first_deferred_pfn, some of that will be overwritten
either by deferred_init_pages or memmap_init_reserved_pages.
To fix it, I initialize kho scratch early and modify every other
path to leave the scratch alone.
In detail:
1. Modify deferred_init_memmap_chunk to not initialize kho
scratch, since we already did that. Then, modify deferred_free_pages
to not set the migratetype. Also modify reserve_bootmem_region to skip
initializing kho scratch.
2. Since kho scratch is now not initialized by any other code, we have
to initialize it ourselves also on cold boot. On cold boot memblock
doesn't mark scratch as scratch, so we also have to modify the
initialization function to not use memblock regions.
Signed-off-by: Michal Clapinski <mclapinski@google.com>
---
My previous idea of marking scratch as CMA late, after deferred struct
page init was done, was bad since allocations can be made before that
and if they land in kho scratch, they become unpreservable.
Such was the case with iommu page tables.
---
include/linux/kexec_handover.h | 6 +++++
include/linux/memblock.h | 2 --
kernel/liveupdate/kexec_handover.c | 35 +++++++++++++++++++++++++++++-
mm/memblock.c | 22 -------------------
mm/mm_init.c | 17 ++++++++++-----
5 files changed, 52 insertions(+), 30 deletions(-)
diff --git a/include/linux/kexec_handover.h b/include/linux/kexec_handover.h
index ac4129d1d741..612a6da6127a 100644
--- a/include/linux/kexec_handover.h
+++ b/include/linux/kexec_handover.h
@@ -35,6 +35,7 @@ void *kho_restore_vmalloc(const struct kho_vmalloc *preservation);
int kho_add_subtree(const char *name, void *fdt);
void kho_remove_subtree(void *fdt);
int kho_retrieve_subtree(const char *name, phys_addr_t *phys);
+bool pfn_is_kho_scratch(unsigned long pfn);
void kho_memory_init(void);
@@ -109,6 +110,11 @@ static inline int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
return -EOPNOTSUPP;
}
+static inline bool pfn_is_kho_scratch(unsigned long pfn)
+{
+ return false;
+}
+
static inline void kho_memory_init(void) { }
static inline void kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6ec5e9ac0699..3e217414e12d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -614,11 +614,9 @@ static inline void memtest_report_meminfo(struct seq_file *m) { }
#ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
void memblock_set_kho_scratch_only(void);
void memblock_clear_kho_scratch_only(void);
-void memmap_init_kho_scratch_pages(void);
#else
static inline void memblock_set_kho_scratch_only(void) { }
static inline void memblock_clear_kho_scratch_only(void) { }
-static inline void memmap_init_kho_scratch_pages(void) {}
#endif
#endif /* _LINUX_MEMBLOCK_H */
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 532f455c5d4f..09cb6660ade7 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1327,6 +1327,23 @@ int kho_retrieve_subtree(const char *name, phys_addr_t *phys)
}
EXPORT_SYMBOL_GPL(kho_retrieve_subtree);
+bool pfn_is_kho_scratch(unsigned long pfn)
+{
+ unsigned int i;
+ phys_addr_t scratch_start, scratch_end, phys = __pfn_to_phys(pfn);
+
+ for (i = 0; i < kho_scratch_cnt; i++) {
+ scratch_start = kho_scratch[i].addr;
+ scratch_end = kho_scratch[i].addr + kho_scratch[i].size;
+
+ if (scratch_start <= phys && phys < scratch_end)
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(pfn_is_kho_scratch);
+
static int __init kho_mem_retrieve(const void *fdt)
{
struct kho_radix_tree tree;
@@ -1453,12 +1470,27 @@ static __init int kho_init(void)
}
fs_initcall(kho_init);
+static void __init kho_init_scratch_pages(void)
+{
+ if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT))
+ return;
+
+ for (int i = 0; i < kho_scratch_cnt; i++) {
+ unsigned long pfn = PFN_DOWN(kho_scratch[i].addr);
+ unsigned long end_pfn = PFN_UP(kho_scratch[i].addr + kho_scratch[i].size);
+ int nid = early_pfn_to_nid(pfn);
+
+ for (; pfn < end_pfn; pfn++)
+ init_deferred_page(pfn, nid);
+ }
+}
+
static void __init kho_release_scratch(void)
{
phys_addr_t start, end;
u64 i;
- memmap_init_kho_scratch_pages();
+ kho_init_scratch_pages();
/*
* Mark scratch mem as CMA before we return it. That way we
@@ -1487,6 +1519,7 @@ void __init kho_memory_init(void)
kho_in.fdt_phys = 0;
} else {
kho_reserve_scratch();
+ kho_init_scratch_pages();
}
}
diff --git a/mm/memblock.c b/mm/memblock.c
index b3ddfdec7a80..ae6a5af46bd7 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -959,28 +959,6 @@ __init void memblock_clear_kho_scratch_only(void)
{
kho_scratch_only = false;
}
-
-__init void memmap_init_kho_scratch_pages(void)
-{
- phys_addr_t start, end;
- unsigned long pfn;
- int nid;
- u64 i;
-
- if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT))
- return;
-
- /*
- * Initialize struct pages for free scratch memory.
- * The struct pages for reserved scratch memory will be set up in
- * reserve_bootmem_region()
- */
- __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
- MEMBLOCK_KHO_SCRATCH, &start, &end, &nid) {
- for (pfn = PFN_UP(start); pfn < PFN_DOWN(end); pfn++)
- init_deferred_page(pfn, nid);
- }
-}
#endif
/**
diff --git a/mm/mm_init.c b/mm/mm_init.c
index cec7bb758bdd..969048f9b320 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -798,7 +798,8 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
for_each_valid_pfn(pfn, PFN_DOWN(start), PFN_UP(end)) {
struct page *page = pfn_to_page(pfn);
- __init_deferred_page(pfn, nid);
+ if (!pfn_is_kho_scratch(pfn))
+ __init_deferred_page(pfn, nid);
/*
* no need for atomic set_bit because the struct
@@ -2008,9 +2009,12 @@ static void __init deferred_free_pages(unsigned long pfn,
/* Free a large naturally-aligned chunk if possible */
if (nr_pages == MAX_ORDER_NR_PAGES && IS_MAX_ORDER_ALIGNED(pfn)) {
- for (i = 0; i < nr_pages; i += pageblock_nr_pages)
+ for (i = 0; i < nr_pages; i += pageblock_nr_pages) {
+ if (pfn_is_kho_scratch(page_to_pfn(page + i)))
+ continue;
init_pageblock_migratetype(page + i, MIGRATE_MOVABLE,
false);
+ }
__free_pages_core(page, MAX_PAGE_ORDER, MEMINIT_EARLY);
return;
}
@@ -2019,7 +2023,7 @@ static void __init deferred_free_pages(unsigned long pfn,
accept_memory(PFN_PHYS(pfn), nr_pages * PAGE_SIZE);
for (i = 0; i < nr_pages; i++, page++, pfn++) {
- if (pageblock_aligned(pfn))
+ if (pageblock_aligned(pfn) && !pfn_is_kho_scratch(pfn))
init_pageblock_migratetype(page, MIGRATE_MOVABLE,
false);
__free_pages_core(page, 0, MEMINIT_EARLY);
@@ -2090,9 +2094,11 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
unsigned long mo_pfn = ALIGN(spfn + 1, MAX_ORDER_NR_PAGES);
unsigned long chunk_end = min(mo_pfn, epfn);
- nr_pages += deferred_init_pages(zone, spfn, chunk_end);
- deferred_free_pages(spfn, chunk_end - spfn);
+ // KHO scratch is MAX_ORDER_NR_PAGES aligned.
+ if (!pfn_is_kho_scratch(spfn))
+ deferred_init_pages(zone, spfn, chunk_end);
+ deferred_free_pages(spfn, chunk_end - spfn);
spfn = chunk_end;
if (can_resched)
@@ -2100,6 +2106,7 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
else
touch_nmi_watchdog();
}
+ nr_pages += epfn - spfn;
}
return nr_pages;
--
2.53.0.473.g4a7958ca14-goog
next prev parent reply other threads:[~2026-03-11 12:55 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-11 12:55 [PATCH v6 0/2] kho: add support for deferred struct page init Michal Clapinski
2026-03-11 12:55 ` Michal Clapinski [this message]
2026-03-12 12:50 ` [PATCH v6 1/2] kho: fix deferred init of kho scratch Mike Rapoport
2026-03-13 13:58 ` Pratyush Yadav
2026-03-11 12:55 ` [PATCH v6 2/2] kho: make preserved pages compatible with deferred struct page init Michal Clapinski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260311125539.4123672-2-mclapinski@google.com \
--to=mclapinski@google.com \
--cc=akpm@linux-foundation.org \
--cc=epetron@amazon.de \
--cc=graf@amazon.com \
--cc=kexec@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=pasha.tatashin@soleen.com \
--cc=pratyush@kernel.org \
--cc=rppt@kernel.org \
--cc=skhawaja@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox