linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/2] mm: only mark/clear KHO scratch memory when needed
@ 2025-11-28 17:29 Usama Arif
  2025-11-28 17:29 ` [PATCH v3 1/2] mm/memblock: remove CONFIG_MEMBLOCK_KHO_SCRATCH option Usama Arif
  2025-11-28 17:29 ` [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed Usama Arif
  0 siblings, 2 replies; 8+ messages in thread
From: Usama Arif @ 2025-11-28 17:29 UTC (permalink / raw)
  To: rppt, Andrew Morton
  Cc: kas, changyuanl, graf, leitao, thevlad, pratyush, dave.hansen,
	linux-mm, linux-kernel, kernel-team, Usama Arif

The scratch memory for kexec handover is used to bootstrap the
kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
hack to get around limitations with KHO. It is only needed when
CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
(both checked by is_kho_boot). Add check to prevent marking a KHO
scratch region unless needed.

CONFIG_MEMBLOCK_KHO_SCRATCH is also removed as its only selected
by KEXEC_HANDOVER.

v2 -> v3:
- is_kho_boot already has stubs when CONFIG_KEXEC_HANDOVER=n,
  no need to move the functions under CONFIG_KEXEC_HANDOVER (Pratyush
  Yadav)
- Commit messsage improvements

v1 -> v2:
- Remove CONFIG_MEMBLOCK_KHO_SCRATCH (Kiryl Shutsemau)
- Move memblock_mark/clear_kho_scratch under alread existing
  CONFIG_KEXEC_HANDOVER in memblock.c.


Usama Arif (2):
  mm/memblock: remove CONFIG_MEMBLOCK_KHO_SCRATCH option
  mm/memblock: only mark/clear KHO scratch memory when needed

 include/linux/memblock.h |  2 +-
 kernel/Kconfig.kexec     |  1 -
 mm/Kconfig               |  4 ----
 mm/memblock.c            | 18 +++++++++++-------
 4 files changed, 12 insertions(+), 13 deletions(-)

-- 
2.47.3



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 1/2] mm/memblock: remove CONFIG_MEMBLOCK_KHO_SCRATCH option
  2025-11-28 17:29 [PATCH v3 0/2] mm: only mark/clear KHO scratch memory when needed Usama Arif
@ 2025-11-28 17:29 ` Usama Arif
  2025-11-28 17:29 ` [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed Usama Arif
  1 sibling, 0 replies; 8+ messages in thread
From: Usama Arif @ 2025-11-28 17:29 UTC (permalink / raw)
  To: rppt, Andrew Morton
  Cc: kas, changyuanl, graf, leitao, thevlad, pratyush, dave.hansen,
	linux-mm, linux-kernel, kernel-team, Usama Arif

The only cofig option that selects this is CONFIG_KEXEC_HANDOVER.
Replace CONFIG_MEMBLOCK_KHO_SCRATCH with CONFIG_KEXEC_HANDOVER
to simplify code.
No functional change intended.

Suggested-by: Kiryl Shutsemau <kas@kernel.org>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Kiryl Shutsemau <kas@kernel.org>
---
 include/linux/memblock.h | 2 +-
 kernel/Kconfig.kexec     | 1 -
 mm/Kconfig               | 4 ----
 mm/memblock.c            | 4 ++--
 4 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 221118b5a16e1..8bd9bcaccceb8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -611,7 +611,7 @@ static inline void early_memtest(phys_addr_t start, phys_addr_t end) { }
 static inline void memtest_report_meminfo(struct seq_file *m) { }
 #endif
 
-#ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
+#ifdef CONFIG_KEXEC_HANDOVER
 void memblock_set_kho_scratch_only(void);
 void memblock_clear_kho_scratch_only(void);
 void memmap_init_kho_scratch_pages(void);
diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec
index 54e5810726176..06a7c43652cfd 100644
--- a/kernel/Kconfig.kexec
+++ b/kernel/Kconfig.kexec
@@ -98,7 +98,6 @@ config KEXEC_HANDOVER
 	bool "kexec handover"
 	depends on ARCH_SUPPORTS_KEXEC_HANDOVER && ARCH_SUPPORTS_KEXEC_FILE
 	depends on !DEFERRED_STRUCT_PAGE_INIT
-	select MEMBLOCK_KHO_SCRATCH
 	select KEXEC_FILE
 	select DEBUG_FS
 	select LIBFDT
diff --git a/mm/Kconfig b/mm/Kconfig
index bd0ea5454af82..6d6002f57c18f 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -442,10 +442,6 @@ config HAVE_GUP_FAST
 	depends on MMU
 	bool
 
-# Enable memblock support for scratch memory which is needed for kexec handover
-config MEMBLOCK_KHO_SCRATCH
-	bool
-
 # Don't discard allocated memory used to track "memory" and "reserved" memblocks
 # after early boot, so it can still be used to test for validity of memory.
 # Also, memblocks are updated with memory hot(un)plug.
diff --git a/mm/memblock.c b/mm/memblock.c
index e23e16618e9b3..8b13d5c28922a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -112,7 +112,7 @@ unsigned long min_low_pfn;
 unsigned long max_pfn;
 unsigned long long max_possible_pfn;
 
-#ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
+#ifdef CONFIG_KEXEC_HANDOVER
 /* When set to true, only allocate from MEMBLOCK_KHO_SCRATCH ranges */
 static bool kho_scratch_only;
 #else
@@ -948,7 +948,7 @@ int __init_memblock memblock_physmem_add(phys_addr_t base, phys_addr_t size)
 }
 #endif
 
-#ifdef CONFIG_MEMBLOCK_KHO_SCRATCH
+#ifdef CONFIG_KEXEC_HANDOVER
 __init void memblock_set_kho_scratch_only(void)
 {
 	kho_scratch_only = true;
-- 
2.47.3



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed
  2025-11-28 17:29 [PATCH v3 0/2] mm: only mark/clear KHO scratch memory when needed Usama Arif
  2025-11-28 17:29 ` [PATCH v3 1/2] mm/memblock: remove CONFIG_MEMBLOCK_KHO_SCRATCH option Usama Arif
@ 2025-11-28 17:29 ` Usama Arif
  2025-11-30  8:52   ` Mike Rapoport
  1 sibling, 1 reply; 8+ messages in thread
From: Usama Arif @ 2025-11-28 17:29 UTC (permalink / raw)
  To: rppt, Andrew Morton
  Cc: kas, changyuanl, graf, leitao, thevlad, pratyush, dave.hansen,
	linux-mm, linux-kernel, kernel-team, Usama Arif

The scratch memory for kexec handover is used to bootstrap the
kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
hack to get around limitations with KHO. It is only needed when
CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
(both checked by is_kho_boot). Add check to prevent marking a KHO
scratch region unless needed.

Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
Reported-by: Vlad Poenaru <thevlad@meta.com>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
---
 mm/memblock.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 8b13d5c28922a..913cf322eb89a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,8 +20,8 @@
 
 #ifdef CONFIG_KEXEC_HANDOVER
 #include <linux/libfdt.h>
-#include <linux/kexec_handover.h>
 #endif /* CONFIG_KEXEC_HANDOVER */
+#include <linux/kexec_handover.h>
 
 #include <asm/sections.h>
 #include <linux/io.h>
@@ -1126,8 +1126,10 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
  */
 __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
 {
-	return memblock_setclr_flag(&memblock.memory, base, size, 1,
-				    MEMBLOCK_KHO_SCRATCH);
+	if (is_kho_boot())
+		return memblock_setclr_flag(&memblock.memory, base, size, 1,
+					    MEMBLOCK_KHO_SCRATCH);
+	return 0;
 }
 
 /**
@@ -1140,8 +1142,10 @@ __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
  */
 __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
 {
-	return memblock_setclr_flag(&memblock.memory, base, size, 0,
-				    MEMBLOCK_KHO_SCRATCH);
+	if (is_kho_boot())
+		return memblock_setclr_flag(&memblock.memory, base, size, 0,
+					    MEMBLOCK_KHO_SCRATCH);
+	return 0;
 }
 
 static bool should_skip_region(struct memblock_type *type,
-- 
2.47.3



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed
  2025-11-28 17:29 ` [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed Usama Arif
@ 2025-11-30  8:52   ` Mike Rapoport
  2025-12-04 14:04     ` Pasha Tatashin
  0 siblings, 1 reply; 8+ messages in thread
From: Mike Rapoport @ 2025-11-30  8:52 UTC (permalink / raw)
  To: Usama Arif
  Cc: Andrew Morton, kas, changyuanl, graf, leitao, thevlad, pratyush,
	dave.hansen, linux-mm, linux-kernel, kernel-team

On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
> The scratch memory for kexec handover is used to bootstrap the
> kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
> hack to get around limitations with KHO. It is only needed when
> CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
> (both checked by is_kho_boot). Add check to prevent marking a KHO
> scratch region unless needed.

I'm going to rewrite the changelog and queue this for upstream:

The scratch memory for kexec handover is used to bootstrap the kexec'ed
kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
handover data passed from the previous kernel.

Currently x86 marks the first megabyte of memory as KHO scratch even for
non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.

Add check to prevent marking a KHO scratch regions unless they are actually
needed.

> Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
> Reported-by: Vlad Poenaru <thevlad@meta.com>
> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
> Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
> ---
>  mm/memblock.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 8b13d5c28922a..913cf322eb89a 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -20,8 +20,8 @@
>  
>  #ifdef CONFIG_KEXEC_HANDOVER
>  #include <linux/libfdt.h>
> -#include <linux/kexec_handover.h>
>  #endif /* CONFIG_KEXEC_HANDOVER */
> +#include <linux/kexec_handover.h>
>  
>  #include <asm/sections.h>
>  #include <linux/io.h>
> @@ -1126,8 +1126,10 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
>   */
>  __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
>  {
> -	return memblock_setclr_flag(&memblock.memory, base, size, 1,
> -				    MEMBLOCK_KHO_SCRATCH);
> +	if (is_kho_boot())
> +		return memblock_setclr_flag(&memblock.memory, base, size, 1,
> +					    MEMBLOCK_KHO_SCRATCH);
> +	return 0;
>  }
>  
>  /**
> @@ -1140,8 +1142,10 @@ __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
>   */
>  __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
>  {
> -	return memblock_setclr_flag(&memblock.memory, base, size, 0,
> -				    MEMBLOCK_KHO_SCRATCH);
> +	if (is_kho_boot())
> +		return memblock_setclr_flag(&memblock.memory, base, size, 0,
> +					    MEMBLOCK_KHO_SCRATCH);
> +	return 0;
>  }
>  
>  static bool should_skip_region(struct memblock_type *type,
> -- 
> 2.47.3
> 

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed
  2025-11-30  8:52   ` Mike Rapoport
@ 2025-12-04 14:04     ` Pasha Tatashin
  2025-12-04 14:51       ` Usama Arif
  0 siblings, 1 reply; 8+ messages in thread
From: Pasha Tatashin @ 2025-12-04 14:04 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Usama Arif, Andrew Morton, kas, changyuanl, graf, leitao,
	thevlad, pratyush, dave.hansen, linux-mm, linux-kernel,
	kernel-team

On Sun, Nov 30, 2025 at 3:52 AM Mike Rapoport <rppt@kernel.org> wrote:
>
> On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
> > The scratch memory for kexec handover is used to bootstrap the
> > kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
> > hack to get around limitations with KHO. It is only needed when
> > CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
> > (both checked by is_kho_boot). Add check to prevent marking a KHO
> > scratch region unless needed.
>
> I'm going to rewrite the changelog and queue this for upstream:
>
> The scratch memory for kexec handover is used to bootstrap the kexec'ed
> kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
> handover data passed from the previous kernel.
>
> Currently x86 marks the first megabyte of memory as KHO scratch even for
> non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.
>
> Add check to prevent marking a KHO scratch regions unless they are actually
> needed.
>
> > Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
> > Reported-by: Vlad Poenaru <thevlad@meta.com>
> > Signed-off-by: Usama Arif <usamaarif642@gmail.com>
> > Reviewed-by: Pratyush Yadav <pratyush@kernel.org>

This patch causes panic with my tests in linux-next.

[    0.000000] Kernel panic - not syncing: Cannot allocate 17280 bytes
for node 0 data
[    0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted
6.18.0-next-20251203 #2 PREEMPT(undef)
[    0.000000] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
BIOS 0.1 11/11/2019
[    0.000000] Call Trace:
[    0.000000]  <TASK>
[    0.000000]  ? dump_stack_lvl+0x4e/0x70
[    0.000000]  ? vpanic+0xcf/0x2b0
[    0.000000]  ? panic+0x66/0x66
[    0.000000]  ? alloc_node_data+0x32/0x90
[    0.000000]  ? numa_register_nodes+0x82/0x100
[    0.000000]  ? numa_init+0x36/0x120
[    0.000000]  ? setup_arch+0x667/0x7f0
[    0.000000]  ? start_kernel+0x58/0x640
[    0.000000]  ? x86_64_start_reservations+0x24/0x30
[    0.000000]  ? x86_64_start_kernel+0xc5/0xd0
[    0.000000]  ? common_startup_64+0x13e/0x148
[    0.000000]  </TASK>
[    0.000000] ---[ end Kernel panic - not syncing: Cannot allocate
17280 bytes for node 0 data ]---
PANIC: early exception 0x0d IP 10:ffffffff89007a13 error 763 cr2
0xffff991090a01000


> > ---
> >  mm/memblock.c | 14 +++++++++-----
> >  1 file changed, 9 insertions(+), 5 deletions(-)
> >
> > diff --git a/mm/memblock.c b/mm/memblock.c
> > index 8b13d5c28922a..913cf322eb89a 100644
> > --- a/mm/memblock.c
> > +++ b/mm/memblock.c
> > @@ -20,8 +20,8 @@
> >
> >  #ifdef CONFIG_KEXEC_HANDOVER
> >  #include <linux/libfdt.h>
> > -#include <linux/kexec_handover.h>
> >  #endif /* CONFIG_KEXEC_HANDOVER */
> > +#include <linux/kexec_handover.h>
> >
> >  #include <asm/sections.h>
> >  #include <linux/io.h>
> > @@ -1126,8 +1126,10 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
> >   */
> >  __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
> >  {
> > -     return memblock_setclr_flag(&memblock.memory, base, size, 1,
> > -                                 MEMBLOCK_KHO_SCRATCH);
> > +     if (is_kho_boot())

Looks like memblock_mark_kho_scratch() is called before is_kho_boot()
is working:

[    0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
[    0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
[    0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
[    0.000000] KHO: kho_populate: is_kho_boot: 1
[    0.000000] memblock_mark_kho_scratch: is_kho_boot: 1
[    0.000000] memblock_clear_kho_scratch: is_kho_boot: 1
...

--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1514,6 +1514,7 @@ void __init kho_populate(phys_addr_t fdt_phys,
u64 fdt_len,
        memblock_set_kho_scratch_only();

        kho_in.fdt_phys = fdt_phys;
+       pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
        kho_in.scratch_phys = scratch_phys;
        kho_scratch_cnt = scratch_cnt;
        pr_info("found kexec handover data.\n");
diff --git a/mm/memblock.c b/mm/memblock.c
index 87e7495a68c1..f55b5cdba5dd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1126,6 +1126,7 @@ int __init_memblock
memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
  */
 __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
 {
+       pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
        if (is_kho_boot())
                return memblock_setclr_flag(&memblock.memory, base, size, 1,
                                            MEMBLOCK_KHO_SCRATCH);
@@ -1142,6 +1143,7 @@ __init int memblock_mark_kho_scratch(phys_addr_t
base, phys_addr_t size)
  */
 __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
 {
+       pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
        if (is_kho_boot())
                return memblock_setclr_flag(&memblock.memory, base, size, 0,
                                            MEMBLOCK_KHO_SCRATCH);

> > +             return memblock_setclr_flag(&memblock.memory, base, size, 1,
> > +                                         MEMBLOCK_KHO_SCRATCH);
> > +     return 0;
> >  }
> >
> >  /**
> > @@ -1140,8 +1142,10 @@ __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
> >   */
> >  __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
> >  {
> > -     return memblock_setclr_flag(&memblock.memory, base, size, 0,
> > -                                 MEMBLOCK_KHO_SCRATCH);
> > +     if (is_kho_boot())
> > +             return memblock_setclr_flag(&memblock.memory, base, size, 0,
> > +                                         MEMBLOCK_KHO_SCRATCH);
> > +     return 0;
> >  }
> >
> >  static bool should_skip_region(struct memblock_type *type,
> > --
> > 2.47.3
> >
>
> --
> Sincerely yours,
> Mike.
>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed
  2025-12-04 14:04     ` Pasha Tatashin
@ 2025-12-04 14:51       ` Usama Arif
  2025-12-04 17:52         ` Mike Rapoport
  0 siblings, 1 reply; 8+ messages in thread
From: Usama Arif @ 2025-12-04 14:51 UTC (permalink / raw)
  To: Pasha Tatashin, Mike Rapoport
  Cc: Andrew Morton, kas, changyuanl, graf, leitao, thevlad, pratyush,
	dave.hansen, linux-mm, linux-kernel, kernel-team



On 04/12/2025 14:04, Pasha Tatashin wrote:
> On Sun, Nov 30, 2025 at 3:52 AM Mike Rapoport <rppt@kernel.org> wrote:
>>
>> On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
>>> The scratch memory for kexec handover is used to bootstrap the
>>> kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
>>> hack to get around limitations with KHO. It is only needed when
>>> CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
>>> (both checked by is_kho_boot). Add check to prevent marking a KHO
>>> scratch region unless needed.
>>
>> I'm going to rewrite the changelog and queue this for upstream:
>>
>> The scratch memory for kexec handover is used to bootstrap the kexec'ed
>> kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
>> handover data passed from the previous kernel.
>>
>> Currently x86 marks the first megabyte of memory as KHO scratch even for
>> non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.
>>
>> Add check to prevent marking a KHO scratch regions unless they are actually
>> needed.
>>
>>> Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
>>> Reported-by: Vlad Poenaru <thevlad@meta.com>
>>> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
>>> Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
> 
> This patch causes panic with my tests in linux-next.
> 
> [    0.000000] Kernel panic - not syncing: Cannot allocate 17280 bytes
> for node 0 data
> [    0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted
> 6.18.0-next-20251203 #2 PREEMPT(undef)
> [    0.000000] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
> BIOS 0.1 11/11/2019
> [    0.000000] Call Trace:
> [    0.000000]  <TASK>
> [    0.000000]  ? dump_stack_lvl+0x4e/0x70
> [    0.000000]  ? vpanic+0xcf/0x2b0
> [    0.000000]  ? panic+0x66/0x66
> [    0.000000]  ? alloc_node_data+0x32/0x90
> [    0.000000]  ? numa_register_nodes+0x82/0x100
> [    0.000000]  ? numa_init+0x36/0x120
> [    0.000000]  ? setup_arch+0x667/0x7f0
> [    0.000000]  ? start_kernel+0x58/0x640
> [    0.000000]  ? x86_64_start_reservations+0x24/0x30
> [    0.000000]  ? x86_64_start_kernel+0xc5/0xd0
> [    0.000000]  ? common_startup_64+0x13e/0x148
> [    0.000000]  </TASK>
> [    0.000000] ---[ end Kernel panic - not syncing: Cannot allocate
> 17280 bytes for node 0 data ]---
> PANIC: early exception 0x0d IP 10:ffffffff89007a13 error 763 cr2
> 0xffff991090a01000
> 


Thanks for reporting this and sorry for the bug!

So the patch was designed to remove the memblock_mark_kho_scratch in e820__memblock_setup if not
in KHO boot. But it broke memblock_mark_kho_scratch in kho_populate.
Moving kho_in.fdt_phys = fdt_phys to before the memblock_mark_scratch
should fix it. I dont have a setup where I can easily test KHO, but I think below
should fix it?

TBH using fdt_phys to check if the boot is KHO might be a bit hacky? Is it possible
to have a better check for this?


diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 9dc51fab604f1..c331749e6452e 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1483,6 +1483,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
                goto out;
        }
 
+       kho_in.fdt_phys = fdt_phys;
        /*
         * We pass a safe contiguous blocks of memory to use for early boot
         * purporses from the previous kernel so that we can resize the
@@ -1513,7 +1514,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
         */
        memblock_set_kho_scratch_only();
 
-       kho_in.fdt_phys = fdt_phys;
        kho_in.scratch_phys = scratch_phys;
        kho_scratch_cnt = scratch_cnt;
        pr_info("found kexec handover data.\n");
@@ -1524,7 +1524,10 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
        if (scratch)
                early_memunmap(scratch, scratch_len);
        if (err)
+       {
+               kho_in.fdt_phys = 0;
                pr_warn("disabling KHO revival: %d\n", err);
+       }
 }
 > 
>>> ---
>>>  mm/memblock.c | 14 +++++++++-----
>>>  1 file changed, 9 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/mm/memblock.c b/mm/memblock.c
>>> index 8b13d5c28922a..913cf322eb89a 100644
>>> --- a/mm/memblock.c
>>> +++ b/mm/memblock.c
>>> @@ -20,8 +20,8 @@
>>>
>>>  #ifdef CONFIG_KEXEC_HANDOVER
>>>  #include <linux/libfdt.h>
>>> -#include <linux/kexec_handover.h>
>>>  #endif /* CONFIG_KEXEC_HANDOVER */
>>> +#include <linux/kexec_handover.h>
>>>
>>>  #include <asm/sections.h>
>>>  #include <linux/io.h>
>>> @@ -1126,8 +1126,10 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
>>>   */
>>>  __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
>>>  {
>>> -     return memblock_setclr_flag(&memblock.memory, base, size, 1,
>>> -                                 MEMBLOCK_KHO_SCRATCH);
>>> +     if (is_kho_boot())
> 
> Looks like memblock_mark_kho_scratch() is called before is_kho_boot()
> is working:
> 
> [    0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
> [    0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
> [    0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
> [    0.000000] KHO: kho_populate: is_kho_boot: 1
> [    0.000000] memblock_mark_kho_scratch: is_kho_boot: 1
> [    0.000000] memblock_clear_kho_scratch: is_kho_boot: 1
> ...
> 
> --- a/kernel/liveupdate/kexec_handover.c
> +++ b/kernel/liveupdate/kexec_handover.c
> @@ -1514,6 +1514,7 @@ void __init kho_populate(phys_addr_t fdt_phys,
> u64 fdt_len,
>         memblock_set_kho_scratch_only();
> 
>         kho_in.fdt_phys = fdt_phys;
> +       pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
>         kho_in.scratch_phys = scratch_phys;
>         kho_scratch_cnt = scratch_cnt;
>         pr_info("found kexec handover data.\n");
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 87e7495a68c1..f55b5cdba5dd 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1126,6 +1126,7 @@ int __init_memblock
> memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
>   */
>  __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
>  {
> +       pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
>         if (is_kho_boot())
>                 return memblock_setclr_flag(&memblock.memory, base, size, 1,
>                                             MEMBLOCK_KHO_SCRATCH);
> @@ -1142,6 +1143,7 @@ __init int memblock_mark_kho_scratch(phys_addr_t
> base, phys_addr_t size)
>   */
>  __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
>  {
> +       pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
>         if (is_kho_boot())
>                 return memblock_setclr_flag(&memblock.memory, base, size, 0,
>                                             MEMBLOCK_KHO_SCRATCH);
> 
>>> +             return memblock_setclr_flag(&memblock.memory, base, size, 1,
>>> +                                         MEMBLOCK_KHO_SCRATCH);
>>> +     return 0;
>>>  }
>>>
>>>  /**
>>> @@ -1140,8 +1142,10 @@ __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
>>>   */
>>>  __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
>>>  {
>>> -     return memblock_setclr_flag(&memblock.memory, base, size, 0,
>>> -                                 MEMBLOCK_KHO_SCRATCH);
>>> +     if (is_kho_boot())
>>> +             return memblock_setclr_flag(&memblock.memory, base, size, 0,
>>> +                                         MEMBLOCK_KHO_SCRATCH);
>>> +     return 0;
>>>  }
>>>
>>>  static bool should_skip_region(struct memblock_type *type,
>>> --
>>> 2.47.3
>>>
>>
>> --
>> Sincerely yours,
>> Mike.
>>



^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed
  2025-12-04 14:51       ` Usama Arif
@ 2025-12-04 17:52         ` Mike Rapoport
  2025-12-04 19:27           ` Usama Arif
  0 siblings, 1 reply; 8+ messages in thread
From: Mike Rapoport @ 2025-12-04 17:52 UTC (permalink / raw)
  To: Usama Arif
  Cc: Pasha Tatashin, Andrew Morton, kas, changyuanl, graf, leitao,
	thevlad, pratyush, dave.hansen, linux-mm, linux-kernel,
	kernel-team

Hi Usama,

On Thu, Dec 04, 2025 at 02:51:00PM +0000, Usama Arif wrote:
> > On Sun, Nov 30, 2025 at 3:52 AM Mike Rapoport <rppt@kernel.org> wrote:
> >>
> >> On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
> >>> The scratch memory for kexec handover is used to bootstrap the
> >>> kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
> >>> hack to get around limitations with KHO. It is only needed when
> >>> CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
> >>> (both checked by is_kho_boot). Add check to prevent marking a KHO
> >>> scratch region unless needed.
> >>
> >> I'm going to rewrite the changelog and queue this for upstream:
> >>
> >> The scratch memory for kexec handover is used to bootstrap the kexec'ed
> >> kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
> >> handover data passed from the previous kernel.
> >>
> >> Currently x86 marks the first megabyte of memory as KHO scratch even for
> >> non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.
> >>
> >> Add check to prevent marking a KHO scratch regions unless they are actually
> >> needed.
> >>
> >>> Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
> >>> Reported-by: Vlad Poenaru <thevlad@meta.com>
> >>> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
> >>> Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
> > 
> > This patch causes panic with my tests in linux-next.
> > 
> > [    0.000000] Kernel panic - not syncing: Cannot allocate 17280 bytes
> > for node 0 data
> > [    0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted
> > 6.18.0-next-20251203 #2 PREEMPT(undef)
> > [    0.000000] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
> > BIOS 0.1 11/11/2019
> > [    0.000000] Call Trace:
> > [    0.000000]  <TASK>
> > [    0.000000]  ? dump_stack_lvl+0x4e/0x70
> > [    0.000000]  ? vpanic+0xcf/0x2b0
> > [    0.000000]  ? panic+0x66/0x66
> > [    0.000000]  ? alloc_node_data+0x32/0x90
> > [    0.000000]  ? numa_register_nodes+0x82/0x100
> > [    0.000000]  ? numa_init+0x36/0x120
> > [    0.000000]  ? setup_arch+0x667/0x7f0
> > [    0.000000]  ? start_kernel+0x58/0x640
> > [    0.000000]  ? x86_64_start_reservations+0x24/0x30
> > [    0.000000]  ? x86_64_start_kernel+0xc5/0xd0
> > [    0.000000]  ? common_startup_64+0x13e/0x148
> > [    0.000000]  </TASK>
> > [    0.000000] ---[ end Kernel panic - not syncing: Cannot allocate
> > 17280 bytes for node 0 data ]---
> > PANIC: early exception 0x0d IP 10:ffffffff89007a13 error 763 cr2
> > 0xffff991090a01000
> > 
> 
> Thanks for reporting this and sorry for the bug!
> 
> So the patch was designed to remove the memblock_mark_kho_scratch in e820__memblock_setup if not
> in KHO boot. But it broke memblock_mark_kho_scratch in kho_populate.
> Moving kho_in.fdt_phys = fdt_phys to before the memblock_mark_scratch
> should fix it. I dont have a setup where I can easily test KHO, but I think below
> should fix it?

This might, but this is too late for v6.19-rc1.
For now I'm dropping this series from memblock/for-next.
We can resume working on this after merge window closes.
 
> TBH using fdt_phys to check if the boot is KHO might be a bit hacky? Is it possible
> to have a better check for this?

Presence of KHO FDT is a clear indication that it is a KHO boot.
The issue is that during early boot ordering is hard and it's not always
clear in which order features and configuration are detected and used. 
 
> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> index 9dc51fab604f1..c331749e6452e 100644
> --- a/kernel/liveupdate/kexec_handover.c
> +++ b/kernel/liveupdate/kexec_handover.c
> @@ -1483,6 +1483,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>                 goto out;
>         }
>  
> +       kho_in.fdt_phys = fdt_phys;
>         /*
>          * We pass a safe contiguous blocks of memory to use for early boot
>          * purporses from the previous kernel so that we can resize the
> @@ -1513,7 +1514,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>          */
>         memblock_set_kho_scratch_only();
>  
> -       kho_in.fdt_phys = fdt_phys;
>         kho_in.scratch_phys = scratch_phys;
>         kho_scratch_cnt = scratch_cnt;
>         pr_info("found kexec handover data.\n");
> @@ -1524,7 +1524,10 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>         if (scratch)
>                 early_memunmap(scratch, scratch_len);
>         if (err)
> +       {
> +               kho_in.fdt_phys = 0;
>                 pr_warn("disabling KHO revival: %d\n", err);
> +       }
>  }

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed
  2025-12-04 17:52         ` Mike Rapoport
@ 2025-12-04 19:27           ` Usama Arif
  0 siblings, 0 replies; 8+ messages in thread
From: Usama Arif @ 2025-12-04 19:27 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: Pasha Tatashin, Andrew Morton, kas, changyuanl, graf, leitao,
	thevlad, pratyush, dave.hansen, linux-mm, linux-kernel,
	kernel-team



On 04/12/2025 17:52, Mike Rapoport wrote:
> Hi Usama,
> 
> On Thu, Dec 04, 2025 at 02:51:00PM +0000, Usama Arif wrote:
>>> On Sun, Nov 30, 2025 at 3:52 AM Mike Rapoport <rppt@kernel.org> wrote:
>>>>
>>>> On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
>>>>> The scratch memory for kexec handover is used to bootstrap the
>>>>> kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
>>>>> hack to get around limitations with KHO. It is only needed when
>>>>> CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
>>>>> (both checked by is_kho_boot). Add check to prevent marking a KHO
>>>>> scratch region unless needed.
>>>>
>>>> I'm going to rewrite the changelog and queue this for upstream:
>>>>
>>>> The scratch memory for kexec handover is used to bootstrap the kexec'ed
>>>> kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
>>>> handover data passed from the previous kernel.
>>>>
>>>> Currently x86 marks the first megabyte of memory as KHO scratch even for
>>>> non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.
>>>>
>>>> Add check to prevent marking a KHO scratch regions unless they are actually
>>>> needed.
>>>>
>>>>> Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
>>>>> Reported-by: Vlad Poenaru <thevlad@meta.com>
>>>>> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
>>>>> Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
>>>
>>> This patch causes panic with my tests in linux-next.
>>>
>>> [    0.000000] Kernel panic - not syncing: Cannot allocate 17280 bytes
>>> for node 0 data
>>> [    0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted
>>> 6.18.0-next-20251203 #2 PREEMPT(undef)
>>> [    0.000000] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
>>> BIOS 0.1 11/11/2019
>>> [    0.000000] Call Trace:
>>> [    0.000000]  <TASK>
>>> [    0.000000]  ? dump_stack_lvl+0x4e/0x70
>>> [    0.000000]  ? vpanic+0xcf/0x2b0
>>> [    0.000000]  ? panic+0x66/0x66
>>> [    0.000000]  ? alloc_node_data+0x32/0x90
>>> [    0.000000]  ? numa_register_nodes+0x82/0x100
>>> [    0.000000]  ? numa_init+0x36/0x120
>>> [    0.000000]  ? setup_arch+0x667/0x7f0
>>> [    0.000000]  ? start_kernel+0x58/0x640
>>> [    0.000000]  ? x86_64_start_reservations+0x24/0x30
>>> [    0.000000]  ? x86_64_start_kernel+0xc5/0xd0
>>> [    0.000000]  ? common_startup_64+0x13e/0x148
>>> [    0.000000]  </TASK>
>>> [    0.000000] ---[ end Kernel panic - not syncing: Cannot allocate
>>> 17280 bytes for node 0 data ]---
>>> PANIC: early exception 0x0d IP 10:ffffffff89007a13 error 763 cr2
>>> 0xffff991090a01000
>>>
>>
>> Thanks for reporting this and sorry for the bug!
>>
>> So the patch was designed to remove the memblock_mark_kho_scratch in e820__memblock_setup if not
>> in KHO boot. But it broke memblock_mark_kho_scratch in kho_populate.
>> Moving kho_in.fdt_phys = fdt_phys to before the memblock_mark_scratch
>> should fix it. I dont have a setup where I can easily test KHO, but I think below
>> should fix it?
> 
> This might, but this is too late for v6.19-rc1.
> For now I'm dropping this series from memblock/for-next.
> We can resume working on this after merge window closes.
>  

Yes makes sense.

How would you like me to proceed with the fix? Should I send just the fix now,
or these 2 patches plus the fix after the merge window closes?

Thanks!


>> TBH using fdt_phys to check if the boot is KHO might be a bit hacky? Is it possible
>> to have a better check for this?
> 
> Presence of KHO FDT is a clear indication that it is a KHO boot.
> The issue is that during early boot ordering is hard and it's not always
> clear in which order features and configuration are detected and used. 
>  

ack

>> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
>> index 9dc51fab604f1..c331749e6452e 100644
>> --- a/kernel/liveupdate/kexec_handover.c
>> +++ b/kernel/liveupdate/kexec_handover.c
>> @@ -1483,6 +1483,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>>                 goto out;
>>         }
>>  
>> +       kho_in.fdt_phys = fdt_phys;
>>         /*
>>          * We pass a safe contiguous blocks of memory to use for early boot
>>          * purporses from the previous kernel so that we can resize the
>> @@ -1513,7 +1514,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>>          */
>>         memblock_set_kho_scratch_only();
>>  
>> -       kho_in.fdt_phys = fdt_phys;
>>         kho_in.scratch_phys = scratch_phys;
>>         kho_scratch_cnt = scratch_cnt;
>>         pr_info("found kexec handover data.\n");
>> @@ -1524,7 +1524,10 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>>         if (scratch)
>>                 early_memunmap(scratch, scratch_len);
>>         if (err)
>> +       {
>> +               kho_in.fdt_phys = 0;
>>                 pr_warn("disabling KHO revival: %d\n", err);
>> +       }
>>  }
> 



^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-12-04 19:27 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-11-28 17:29 [PATCH v3 0/2] mm: only mark/clear KHO scratch memory when needed Usama Arif
2025-11-28 17:29 ` [PATCH v3 1/2] mm/memblock: remove CONFIG_MEMBLOCK_KHO_SCRATCH option Usama Arif
2025-11-28 17:29 ` [PATCH v3 2/2] mm/memblock: only mark/clear KHO scratch memory when needed Usama Arif
2025-11-30  8:52   ` Mike Rapoport
2025-12-04 14:04     ` Pasha Tatashin
2025-12-04 14:51       ` Usama Arif
2025-12-04 17:52         ` Mike Rapoport
2025-12-04 19:27           ` Usama Arif

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox