* [PATCH v4 0/2] kho: add support for deferred struct page init @ 2026-02-20 16:52 Michal Clapinski 2026-02-20 16:52 ` [PATCH v4 1/2] kho: fix deferred init of kho scratch Michal Clapinski 2026-02-20 16:52 ` [PATCH v4 2/2] kho: make preserved pages compatible with deferred struct page init Michal Clapinski 0 siblings, 2 replies; 4+ messages in thread From: Michal Clapinski @ 2026-02-20 16:52 UTC (permalink / raw) To: Evangelos Petrongonas, Pasha Tatashin, Mike Rapoport, Pratyush Yadav, Alexander Graf, kexec, linux-mm Cc: linux-kernel, Andrew Morton, Michal Clapinski When CONFIG_DEFERRED_STRUCT_PAGE_INIT (hereinafter DEFERRED) is enabled, struct page initialization is deferred to parallel kthreads that run later in the boot process. Currently, KHO is incompatible with DEFERRED. This series fixes that incompatibility. --- v4: - added a new commit to fix deferred init of kho scratch - switched to ulong when refering to pfn v3: - changed commit msg - don't invoke early_pfn_to_nid if CONFIG_DEFERRED_STRUCT_PAGE_INIT=n v2: - updated a comment Evangelos Petrongonas (1): kho: make preserved pages compatible with deferred struct page init Michal Clapinski (1): kho: fix deferred init of kho scratch include/linux/memblock.h | 2 - kernel/liveupdate/Kconfig | 2 - kernel/liveupdate/kexec_handover.c | 70 ++++++++++++++++-------------- mm/memblock.c | 22 ---------- 4 files changed, 37 insertions(+), 59 deletions(-) -- 2.53.0.345.g96ddfc5eaa-goog ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v4 1/2] kho: fix deferred init of kho scratch 2026-02-20 16:52 [PATCH v4 0/2] kho: add support for deferred struct page init Michal Clapinski @ 2026-02-20 16:52 ` Michal Clapinski 2026-02-23 11:07 ` Mike Rapoport 2026-02-20 16:52 ` [PATCH v4 2/2] kho: make preserved pages compatible with deferred struct page init Michal Clapinski 1 sibling, 1 reply; 4+ messages in thread From: Michal Clapinski @ 2026-02-20 16:52 UTC (permalink / raw) To: Evangelos Petrongonas, Pasha Tatashin, Mike Rapoport, Pratyush Yadav, Alexander Graf, kexec, linux-mm Cc: linux-kernel, Andrew Morton, Michal Clapinski Currently, mm_core_init calls kho_memory_init, which calls kho_release_scratch. If DEFERRED is enabled, kho_release_scratch will first initialize the struct pages of kho scratch. This is not needed. We can just let page_alloc_init_late init it. Next, kho_release_scratch will mark scratch as MIGRATE_CMA. If DEFERRED is enabled, this will be overwritten later in deferred_free_pages. To fix this, I removed the whole kho_release_scratch. Marking the pageblocks as MIGRATE_CMA now happens in kho_init, which runs after deferred_free_pages. Signed-off-by: Michal Clapinski <mclapinski@google.com> --- include/linux/memblock.h | 2 -- kernel/liveupdate/kexec_handover.c | 43 ++++++++---------------------- mm/memblock.c | 22 --------------- 3 files changed, 11 insertions(+), 56 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 221118b5a16e..35d9cf6bbf7a 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -614,11 +614,9 @@ static inline void memtest_report_meminfo(struct seq_file *m) { } #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH void memblock_set_kho_scratch_only(void); void memblock_clear_kho_scratch_only(void); -void memmap_init_kho_scratch_pages(void); #else static inline void memblock_set_kho_scratch_only(void) { } static inline void memblock_clear_kho_scratch_only(void) { } -static inline void memmap_init_kho_scratch_pages(void) {} #endif #endif /* _LINUX_MEMBLOCK_H */ diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index b851b09a8e99..de167bfa2c8d 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -1377,11 +1377,6 @@ static __init int kho_init(void) if (err) goto err_free_fdt; - if (fdt) { - kho_in_debugfs_init(&kho_in.dbg, fdt); - return 0; - } - for (int i = 0; i < kho_scratch_cnt; i++) { unsigned long base_pfn = PHYS_PFN(kho_scratch[i].addr); unsigned long count = kho_scratch[i].size >> PAGE_SHIFT; @@ -1397,8 +1392,17 @@ static __init int kho_init(void) */ kmemleak_ignore_phys(kho_scratch[i].addr); for (pfn = base_pfn; pfn < base_pfn + count; - pfn += pageblock_nr_pages) - init_cma_reserved_pageblock(pfn_to_page(pfn)); + pfn += pageblock_nr_pages) { + if (fdt) + init_cma_pageblock(pfn_to_page(pfn)); + else + init_cma_reserved_pageblock(pfn_to_page(pfn)); + } + } + + if (fdt) { + kho_in_debugfs_init(&kho_in.dbg, fdt); + return 0; } WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, "fdt", @@ -1421,35 +1425,10 @@ static __init int kho_init(void) } fs_initcall(kho_init); -static void __init kho_release_scratch(void) -{ - phys_addr_t start, end; - u64 i; - - memmap_init_kho_scratch_pages(); - - /* - * Mark scratch mem as CMA before we return it. That way we - * ensure that no kernel allocations happen on it. That means - * we can reuse it as scratch memory again later. - */ - __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, - MEMBLOCK_KHO_SCRATCH, &start, &end, NULL) { - ulong start_pfn = pageblock_start_pfn(PFN_DOWN(start)); - ulong end_pfn = pageblock_align(PFN_UP(end)); - ulong pfn; - - for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) - init_pageblock_migratetype(pfn_to_page(pfn), - MIGRATE_CMA, false); - } -} - void __init kho_memory_init(void) { if (kho_in.mem_map_phys) { kho_scratch = phys_to_virt(kho_in.scratch_phys); - kho_release_scratch(); kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys)); } else { kho_reserve_scratch(); diff --git a/mm/memblock.c b/mm/memblock.c index 6cff515d82f4..3eff19124fc0 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -959,28 +959,6 @@ __init void memblock_clear_kho_scratch_only(void) { kho_scratch_only = false; } - -__init void memmap_init_kho_scratch_pages(void) -{ - phys_addr_t start, end; - unsigned long pfn; - int nid; - u64 i; - - if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) - return; - - /* - * Initialize struct pages for free scratch memory. - * The struct pages for reserved scratch memory will be set up in - * reserve_bootmem_region() - */ - __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, - MEMBLOCK_KHO_SCRATCH, &start, &end, &nid) { - for (pfn = PFN_UP(start); pfn < PFN_DOWN(end); pfn++) - init_deferred_page(pfn, nid); - } -} #endif /** -- 2.53.0.345.g96ddfc5eaa-goog ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v4 1/2] kho: fix deferred init of kho scratch 2026-02-20 16:52 ` [PATCH v4 1/2] kho: fix deferred init of kho scratch Michal Clapinski @ 2026-02-23 11:07 ` Mike Rapoport 0 siblings, 0 replies; 4+ messages in thread From: Mike Rapoport @ 2026-02-23 11:07 UTC (permalink / raw) To: Michal Clapinski Cc: Evangelos Petrongonas, Pasha Tatashin, Pratyush Yadav, Alexander Graf, kexec, linux-mm, linux-kernel, Andrew Morton On Fri, Feb 20, 2026 at 05:52:02PM +0100, Michal Clapinski wrote: > Currently, mm_core_init calls kho_memory_init, which calls > kho_release_scratch. > > If DEFERRED is enabled, kho_release_scratch will first initialize the > struct pages of kho scratch. This is not needed. We can just let > page_alloc_init_late init it. > > Next, kho_release_scratch will mark scratch as MIGRATE_CMA. If DEFERRED > is enabled, this will be overwritten later in deferred_free_pages. > > To fix this, I removed the whole kho_release_scratch. > Marking the pageblocks as MIGRATE_CMA now happens in kho_init, which > runs after deferred_free_pages. > > Signed-off-by: Michal Clapinski <mclapinski@google.com> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> > --- > include/linux/memblock.h | 2 -- > kernel/liveupdate/kexec_handover.c | 43 ++++++++---------------------- > mm/memblock.c | 22 --------------- > 3 files changed, 11 insertions(+), 56 deletions(-) > > diff --git a/include/linux/memblock.h b/include/linux/memblock.h > index 221118b5a16e..35d9cf6bbf7a 100644 > --- a/include/linux/memblock.h > +++ b/include/linux/memblock.h > @@ -614,11 +614,9 @@ static inline void memtest_report_meminfo(struct seq_file *m) { } > #ifdef CONFIG_MEMBLOCK_KHO_SCRATCH > void memblock_set_kho_scratch_only(void); > void memblock_clear_kho_scratch_only(void); > -void memmap_init_kho_scratch_pages(void); > #else > static inline void memblock_set_kho_scratch_only(void) { } > static inline void memblock_clear_kho_scratch_only(void) { } > -static inline void memmap_init_kho_scratch_pages(void) {} > #endif > > #endif /* _LINUX_MEMBLOCK_H */ > diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c > index b851b09a8e99..de167bfa2c8d 100644 > --- a/kernel/liveupdate/kexec_handover.c > +++ b/kernel/liveupdate/kexec_handover.c > @@ -1377,11 +1377,6 @@ static __init int kho_init(void) > if (err) > goto err_free_fdt; > > - if (fdt) { > - kho_in_debugfs_init(&kho_in.dbg, fdt); > - return 0; > - } > - > for (int i = 0; i < kho_scratch_cnt; i++) { > unsigned long base_pfn = PHYS_PFN(kho_scratch[i].addr); > unsigned long count = kho_scratch[i].size >> PAGE_SHIFT; > @@ -1397,8 +1392,17 @@ static __init int kho_init(void) > */ > kmemleak_ignore_phys(kho_scratch[i].addr); > for (pfn = base_pfn; pfn < base_pfn + count; > - pfn += pageblock_nr_pages) > - init_cma_reserved_pageblock(pfn_to_page(pfn)); > + pfn += pageblock_nr_pages) { > + if (fdt) > + init_cma_pageblock(pfn_to_page(pfn)); > + else > + init_cma_reserved_pageblock(pfn_to_page(pfn)); > + } > + } > + > + if (fdt) { > + kho_in_debugfs_init(&kho_in.dbg, fdt); > + return 0; > } > > WARN_ON_ONCE(kho_debugfs_fdt_add(&kho_out.dbg, "fdt", > @@ -1421,35 +1425,10 @@ static __init int kho_init(void) > } > fs_initcall(kho_init); > > -static void __init kho_release_scratch(void) > -{ > - phys_addr_t start, end; > - u64 i; > - > - memmap_init_kho_scratch_pages(); > - > - /* > - * Mark scratch mem as CMA before we return it. That way we > - * ensure that no kernel allocations happen on it. That means > - * we can reuse it as scratch memory again later. > - */ > - __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, > - MEMBLOCK_KHO_SCRATCH, &start, &end, NULL) { > - ulong start_pfn = pageblock_start_pfn(PFN_DOWN(start)); > - ulong end_pfn = pageblock_align(PFN_UP(end)); > - ulong pfn; > - > - for (pfn = start_pfn; pfn < end_pfn; pfn += pageblock_nr_pages) > - init_pageblock_migratetype(pfn_to_page(pfn), > - MIGRATE_CMA, false); > - } > -} > - > void __init kho_memory_init(void) > { > if (kho_in.mem_map_phys) { > kho_scratch = phys_to_virt(kho_in.scratch_phys); > - kho_release_scratch(); > kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys)); > } else { > kho_reserve_scratch(); > diff --git a/mm/memblock.c b/mm/memblock.c > index 6cff515d82f4..3eff19124fc0 100644 > --- a/mm/memblock.c > +++ b/mm/memblock.c > @@ -959,28 +959,6 @@ __init void memblock_clear_kho_scratch_only(void) > { > kho_scratch_only = false; > } > - > -__init void memmap_init_kho_scratch_pages(void) > -{ > - phys_addr_t start, end; > - unsigned long pfn; > - int nid; > - u64 i; > - > - if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) > - return; > - > - /* > - * Initialize struct pages for free scratch memory. > - * The struct pages for reserved scratch memory will be set up in > - * reserve_bootmem_region() > - */ > - __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, > - MEMBLOCK_KHO_SCRATCH, &start, &end, &nid) { > - for (pfn = PFN_UP(start); pfn < PFN_DOWN(end); pfn++) > - init_deferred_page(pfn, nid); > - } > -} > #endif > > /** > -- > 2.53.0.345.g96ddfc5eaa-goog > -- Sincerely yours, Mike. ^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v4 2/2] kho: make preserved pages compatible with deferred struct page init 2026-02-20 16:52 [PATCH v4 0/2] kho: add support for deferred struct page init Michal Clapinski 2026-02-20 16:52 ` [PATCH v4 1/2] kho: fix deferred init of kho scratch Michal Clapinski @ 2026-02-20 16:52 ` Michal Clapinski 1 sibling, 0 replies; 4+ messages in thread From: Michal Clapinski @ 2026-02-20 16:52 UTC (permalink / raw) To: Evangelos Petrongonas, Pasha Tatashin, Mike Rapoport, Pratyush Yadav, Alexander Graf, kexec, linux-mm Cc: linux-kernel, Andrew Morton, Michal Clapinski From: Evangelos Petrongonas <epetron@amazon.de> When CONFIG_DEFERRED_STRUCT_PAGE_INIT is enabled, struct page initialization is deferred to parallel kthreads that run later in the boot process. During KHO restoration, deserialize_bitmap() writes metadata for each preserved memory region. However, if the struct page has not been initialized, this write targets uninitialized memory, potentially leading to errors like: BUG: unable to handle page fault for address: ... Fix this by introducing kho_get_preserved_page(), which ensures all struct pages in a preserved region are initialized by calling init_deferred_page() which is a no-op when deferred init is disabled or when the struct page is already initialized. Signed-off-by: Evangelos Petrongonas <epetron@amazon.de> Co-developed-by: Michal Clapinski <mclapinski@google.com> Signed-off-by: Michal Clapinski <mclapinski@google.com> Reviewed-by: Pratyush Yadav (Google) <pratyush@kernel.org> Reviewed-by: Pasha Tatashin <pasha.tatashin@soleen.com> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> --- I think we can't initialize those struct pages in kho_restore_page. I encountered this stack: page_zone(start_page) __pageblock_pfn_to_page set_zone_contiguous page_alloc_init_late So, at the end of page_alloc_init_late struct pages are expected to be already initialized. set_zone_contiguous() looks at the first and last struct page of each pageblock in each populated zone to figure out if the zone is contiguous. If a kho page lands on a pageblock boundary, this will lead to access of an uninitialized struct page. There is also page_ext_init that invokes pfn_to_nid, which calls page_to_nid for each section-aligned page. There might be other places that do something similar. Therefore, it's a good idea to initialize all struct pages by the end of deferred struct page init. That's why I'm resending Evangelos's patch. I also tried to implement Pratyush's idea, i.e. iterate over zones, then get node from zone. I didn't notice any performance difference even with 8GB of kho. --- kernel/liveupdate/Kconfig | 2 -- kernel/liveupdate/kexec_handover.c | 27 ++++++++++++++++++++++++++- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/kernel/liveupdate/Kconfig b/kernel/liveupdate/Kconfig index 1a8513f16ef7..c13af38ba23a 100644 --- a/kernel/liveupdate/Kconfig +++ b/kernel/liveupdate/Kconfig @@ -1,12 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only menu "Live Update and Kexec HandOver" - depends on !DEFERRED_STRUCT_PAGE_INIT config KEXEC_HANDOVER bool "kexec handover" depends on ARCH_SUPPORTS_KEXEC_HANDOVER && ARCH_SUPPORTS_KEXEC_FILE - depends on !DEFERRED_STRUCT_PAGE_INIT select MEMBLOCK_KHO_SCRATCH select KEXEC_FILE select LIBFDT diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c index de167bfa2c8d..fe9c88fd2541 100644 --- a/kernel/liveupdate/kexec_handover.c +++ b/kernel/liveupdate/kexec_handover.c @@ -457,6 +457,31 @@ static int kho_mem_serialize(struct kho_out *kho_out) return err; } +/* + * With CONFIG_DEFERRED_STRUCT_PAGE_INIT, struct pages in higher memory regions + * may not be initialized yet at the time KHO deserializes preserved memory. + * KHO uses the struct page to store metadata and a later initialization would + * overwrite it. + * Ensure all the struct pages in the preservation are + * initialized. deserialize_bitmap() marks the reservation as noinit to make + * sure they don't get re-initialized later. + */ +static struct page *__init kho_get_preserved_page(phys_addr_t phys, + unsigned int order) +{ + unsigned long pfn = PHYS_PFN(phys); + int nid; + + if (!IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) + return pfn_to_page(pfn); + + nid = early_pfn_to_nid(pfn); + for (unsigned long i = 0; i < (1UL << order); i++) + init_deferred_page(pfn + i, nid); + + return pfn_to_page(pfn); +} + static void __init deserialize_bitmap(unsigned int order, struct khoser_mem_bitmap_ptr *elm) { @@ -467,7 +492,7 @@ static void __init deserialize_bitmap(unsigned int order, int sz = 1 << (order + PAGE_SHIFT); phys_addr_t phys = elm->phys_start + (bit << (order + PAGE_SHIFT)); - struct page *page = phys_to_page(phys); + struct page *page = kho_get_preserved_page(phys, order); union kho_page_info info; memblock_reserve(phys, sz); -- 2.53.0.345.g96ddfc5eaa-goog ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2026-02-23 11:07 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2026-02-20 16:52 [PATCH v4 0/2] kho: add support for deferred struct page init Michal Clapinski 2026-02-20 16:52 ` [PATCH v4 1/2] kho: fix deferred init of kho scratch Michal Clapinski 2026-02-23 11:07 ` Mike Rapoport 2026-02-20 16:52 ` [PATCH v4 2/2] kho: make preserved pages compatible with deferred struct page init Michal Clapinski
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox