* [PATCH v4 0/2] kho: add support for deferred struct page init
@ 2026-02-20 16:52 Michal Clapinski
2026-02-20 16:52 ` [PATCH v4 1/2] kho: fix deferred init of kho scratch Michal Clapinski
2026-02-20 16:52 ` [PATCH v4 2/2] kho: make preserved pages compatible with deferred struct page init Michal Clapinski
0 siblings, 2 replies; 3+ messages in thread
From: Michal Clapinski @ 2026-02-20 16:52 UTC (permalink / raw)
To: Evangelos Petrongonas, Pasha Tatashin, Mike Rapoport,
Pratyush Yadav, Alexander Graf, kexec, linux-mm
Cc: linux-kernel, Andrew Morton, Michal Clapinski
When CONFIG_DEFERRED_STRUCT_PAGE_INIT (hereinafter DEFERRED) is
enabled, struct page initialization is deferred to parallel kthreads
that run later in the boot process.
Currently, KHO is incompatible with DEFERRED.
This series fixes that incompatibility.
---
v4:
- added a new commit to fix deferred init of kho scratch
- switched to ulong when refering to pfn
v3:
- changed commit msg
- don't invoke early_pfn_to_nid if CONFIG_DEFERRED_STRUCT_PAGE_INIT=n
v2:
- updated a comment
Evangelos Petrongonas (1):
kho: make preserved pages compatible with deferred struct page init
Michal Clapinski (1):
kho: fix deferred init of kho scratch
include/linux/memblock.h | 2 -
kernel/liveupdate/Kconfig | 2 -
kernel/liveupdate/kexec_handover.c | 70 ++++++++++++++++--------------
mm/memblock.c | 22 ----------
4 files changed, 37 insertions(+), 59 deletions(-)
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH v4 1/2] kho: fix deferred init of kho scratch
2026-02-20 16:52 [PATCH v4 0/2] kho: add support for deferred struct page init Michal Clapinski
@ 2026-02-20 16:52 ` Michal Clapinski
2026-02-20 16:52 ` [PATCH v4 2/2] kho: make preserved pages compatible with deferred struct page init Michal Clapinski
1 sibling, 0 replies; 3+ messages in thread
From: Michal Clapinski @ 2026-02-20 16:52 UTC (permalink / raw)
To: Evangelos Petrongonas, Pasha Tatashin, Mike Rapoport,
Pratyush Yadav, Alexander Graf, kexec, linux-mm
Cc: linux-kernel, Andrew Morton, Michal Clapinski
Currently, mm_core_init calls kho_memory_init, which calls
kho_release_scratch.
If DEFERRED is enabled, kho_release_scratch will first initialize the
struct pages of kho scratch. This is not needed. We can just let
page_alloc_init_late init it.
Next, kho_release_scratch will mark scratch as MIGRATE_CMA. If DEFERRED
is enabled, this will be overwritten later in deferred_free_pages.
To fix this, I removed the whole kho_release_scratch.
Marking the pageblocks as MIGRATE_CMA now happens in kho_init, which
runs after deferred_free_pages.
Signed-off-by: Michal Clapinski <mclapinski@google.com>
---
include/linux/memblock.h | 2 --
kernel/liveupdate/kexec_handover.c | 43 ++++++++----------------------
mm/memblock.c | 22 ---------------
3 files changed, 11 insertions(+), 56 deletions(-)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 221118b5a16e..35d9cf6bbf7a 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -614,11 +614,9 @@ static inline void memtest_report_meminfo(struct seq_file *m) { }
#ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
void memblock_set_kho_scratch_only(void);
void memblock_clear_kho_scratch_only(void);
-void memmap_init_kho_scratch_pages(void);
#else
static inline void memblock_set_kho_scratch_only(void) { }
static inline void memblock_clear_kho_scratch_only(void) { }
-static inline void memmap_init_kho_scratch_pages(void) {}
#endif
#endif /* _LINUX_MEMBLOCK_H */
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index b851b09a8e99..de167bfa2c8d 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1377,11 +1377,6 @@ static __init int kho_init(void)
if (err)
goto err_free_fdt;
- if (fdt) {
- kho_in_debugfs_init(&kho_in.dbg, fdt);
- return 0;
- }
-
for (int i = 0; i < kho_scratch_cnt; i++) {
unsigned long base_pfn = PHYS_PFN(kho_scratch[i].addr);
unsigned long count = kho_scratch[i].size >> PAGE_SHIFT;
@@ -1397,8 +1392,17 @@ static __init int kho_init(void)
*/
kmemleak_ignore_phys(kho_scratch[i].addr);
for (pfn = base_pfn; pfn < base_pfn + count;
- pfn += pageblock_nr_pages)
- init_cma_reserved_pageblock(pfn_to_page(pfn));
+ pfn += pageblock_nr_pages) {
+ if (fdt)
+ init_cma_pageblock(pfn_to_page(pfn));
+ else
+ init_cma_reserved_pageblock(pfn_to_page(pfn));
+ }
+ }
+
+ if (fdt) {
+ kho_in_debugfs_init(&kho_in.dbg, fdt);
+ return 0;
}
WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, "fdt",
@@ -1421,35 +1425,10 @@ static __init int kho_init(void)
}
fs_initcall(kho_init);
-static void __init kho_release_scratch(void)
-{
- phys_addr_t start, end;
- u64 i;
-
- memmap_init_kho_scratch_pages();
-
- /*
- * Mark scratch mem as CMA before we return it. That way we
- * ensure that no kernel allocations happen on it. That means
- * we can reuse it as scratch memory again later.
- */
- __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
- MEMBLOCK_KHO_SCRATCH, &start, &end, NULL) {
- ulong start_pfn = pageblock_start_pfn(PFN_DOWN(start));
- ulong end_pfn = pageblock_align(PFN_UP(end));
- ulong pfn;
-
- for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages)
- init_pageblock_migratetype(pfn_to_page(pfn),
- MIGRATE_CMA, false);
- }
-}
-
void __init kho_memory_init(void)
{
if (kho_in.mem_map_phys) {
kho_scratch = phys_to_virt(kho_in.scratch_phys);
- kho_release_scratch();
kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys));
} else {
kho_reserve_scratch();
diff --git a/mm/memblock.c b/mm/memblock.c
index 6cff515d82f4..3eff19124fc0 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -959,28 +959,6 @@ __init void memblock_clear_kho_scratch_only(void)
{
kho_scratch_only = false;
}
-
-__init void memmap_init_kho_scratch_pages(void)
-{
- phys_addr_t start, end;
- unsigned long pfn;
- int nid;
- u64 i;
-
- if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT))
- return;
-
- /*
- * Initialize struct pages for free scratch memory.
- * The struct pages for reserved scratch memory will be set up in
- * reserve_bootmem_region()
- */
- __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
- MEMBLOCK_KHO_SCRATCH, &start, &end, &nid) {
- for (pfn = PFN_UP(start); pfn < PFN_DOWN(end); pfn++)
- init_deferred_page(pfn, nid);
- }
-}
#endif
/**
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH v4 2/2] kho: make preserved pages compatible with deferred struct page init
2026-02-20 16:52 [PATCH v4 0/2] kho: add support for deferred struct page init Michal Clapinski
2026-02-20 16:52 ` [PATCH v4 1/2] kho: fix deferred init of kho scratch Michal Clapinski
@ 2026-02-20 16:52 ` Michal Clapinski
1 sibling, 0 replies; 3+ messages in thread
From: Michal Clapinski @ 2026-02-20 16:52 UTC (permalink / raw)
To: Evangelos Petrongonas, Pasha Tatashin, Mike Rapoport,
Pratyush Yadav, Alexander Graf, kexec, linux-mm
Cc: linux-kernel, Andrew Morton, Michal Clapinski
From: Evangelos Petrongonas <epetron@amazon.de>
When CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, struct page
initialization is deferred to parallel kthreads that run later
in the boot process.
During KHO restoration, deserialize_bitmap() writes metadata for
each preserved memory region. However, if the struct page has not been
initialized, this write targets uninitialized memory, potentially
leading to errors like:
BUG: unable to handle page fault for address: ...
Fix this by introducing kho_get_preserved_page(), which ensures
all struct pages in a preserved region are initialized by calling
init_deferred_page() which is a no-op when deferred init is disabled
or when the struct page is already initialized.
Signed-off-by: Evangelos Petrongonas <epetron@amazon.de>
Co-developed-by: Michal Clapinski <mclapinski@google.com>
Signed-off-by: Michal Clapinski <mclapinski@google.com>
Reviewed-by: Pratyush Yadav (Google) <pratyush@kernel.org>
Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
I think we can't initialize those struct pages in kho_restore_page.
I encountered this stack:
page_zone(start_page)
__pageblock_pfn_to_page
set_zone_contiguous
page_alloc_init_late
So, at the end of page_alloc_init_late struct pages are expected to be
already initialized. set_zone_contiguous() looks at the first and last
struct page of each pageblock in each populated zone to figure out if
the zone is contiguous. If a kho page lands on a pageblock boundary,
this will lead to access of an uninitialized struct page.
There is also page_ext_init that invokes pfn_to_nid, which calls
page_to_nid for each section-aligned page.
There might be other places that do something similar. Therefore, it's
a good idea to initialize all struct pages by the end of deferred
struct page init. That's why I'm resending Evangelos's patch.
I also tried to implement Pratyush's idea, i.e. iterate over zones,
then get node from zone. I didn't notice any performance difference
even with 8GB of kho.
---
kernel/liveupdate/Kconfig | 2 --
kernel/liveupdate/kexec_handover.c | 27 ++++++++++++++++++++++++++-
2 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/kernel/liveupdate/Kconfig b/kernel/liveupdate/Kconfig
index 1a8513f16ef7..c13af38ba23a 100644
--- a/kernel/liveupdate/Kconfig
+++ b/kernel/liveupdate/Kconfig
@@ -1,12 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
menu "Live Update and Kexec HandOver"
- depends on !DEFERRED_STRUCT_PAGE_INIT
config KEXEC_HANDOVER
bool "kexec handover"
depends on ARCH_SUPPORTS_KEXEC_HANDOVER && ARCH_SUPPORTS_KEXEC_FILE
- depends on !DEFERRED_STRUCT_PAGE_INIT
select MEMBLOCK_KHO_SCRATCH
select KEXEC_FILE
select LIBFDT
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index de167bfa2c8d..fe9c88fd2541 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -457,6 +457,31 @@ static int kho_mem_serialize(struct kho_out *kho_out)
return err;
}
+/*
+ * With CONFIG_DEFERRED_STRUCT_PAGE_INIT, struct pages in higher memory regions
+ * may not be initialized yet at the time KHO deserializes preserved memory.
+ * KHO uses the struct page to store metadata and a later initialization would
+ * overwrite it.
+ * Ensure all the struct pages in the preservation are
+ * initialized. deserialize_bitmap() marks the reservation as noinit to make
+ * sure they don't get re-initialized later.
+ */
+static struct page *__init kho_get_preserved_page(phys_addr_t phys,
+ unsigned int order)
+{
+ unsigned long pfn = PHYS_PFN(phys);
+ int nid;
+
+ if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT))
+ return pfn_to_page(pfn);
+
+ nid = early_pfn_to_nid(pfn);
+ for (unsigned long i = 0; i < (1UL << order); i++)
+ init_deferred_page(pfn + i, nid);
+
+ return pfn_to_page(pfn);
+}
+
static void __init deserialize_bitmap(unsigned int order,
struct khoser_mem_bitmap_ptr *elm)
{
@@ -467,7 +492,7 @@ static void __init deserialize_bitmap(unsigned int order,
int sz = 1 << (order + PAGE_SHIFT);
phys_addr_t phys =
elm->phys_start + (bit << (order + PAGE_SHIFT));
- struct page *page = phys_to_page(phys);
+ struct page *page = kho_get_preserved_page(phys, order);
union kho_page_info info;
memblock_reserve(phys, sz);
--
2.53.0.345.g96ddfc5eaa-goog
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-02-20 16:52 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2026-02-20 16:52 [PATCH v4 0/2] kho: add support for deferred struct page init Michal Clapinski
2026-02-20 16:52 ` [PATCH v4 1/2] kho: fix deferred init of kho scratch Michal Clapinski
2026-02-20 16:52 ` [PATCH v4 2/2] kho: make preserved pages compatible with deferred struct page init Michal Clapinski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox