linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
@ 2025-09-24 10:03 Alexander Potapenko
  2025-09-24 13:07 ` Markus Elfring
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Alexander Potapenko @ 2025-09-24 10:03 UTC (permalink / raw)
  To: glider
  Cc: akpm, david, vbabka, rppt, linux-mm, linux-kernel, elver,
	dvyukov, kasan-dev, Aleksandr Nogikh

When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
for metadata instead of returning them to the early allocator. The callers,
however, would unconditionally increment `totalram_pages`, assuming the
pages were always freed. This resulted in an incorrect calculation of the
total available RAM, causing the kernel to believe it had more memory than
it actually did.

This patch refactors `memblock_free_pages()` to return the number of pages
it successfully frees. If KMSAN stashes the pages, the function now
returns 0; otherwise, it returns the number of pages in the block.

The callers in `memblock.c` have been updated to use this return value,
ensuring that `totalram_pages` is incremented only by the number of pages
actually returned to the allocator. This corrects the total RAM accounting
when KMSAN is active.

Cc: Aleksandr Nogikh <nogikh@google.com>
Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
Signed-off-by: Alexander Potapenko <glider@google.com>
Reviewed-by: David Hildenbrand <david@redhat.com>

---
v2:
- Remove extern from the declaration of memblock_free_pages() in
  mm/internal.h as suggested by Mike Rapoport.
- Fix formatting in the definition of memblock_free_pages() in
  mm/mm_init.c as suggested by Mike Rapoport.
- Refactor memblock_free_late() to improve readability as suggested by
  David Hildenbrand.
---
 mm/internal.h |  4 ++--
 mm/memblock.c | 21 +++++++++++----------
 mm/mm_init.c  |  9 +++++----
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 45b725c3dc030..ac841c53653eb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -742,8 +742,8 @@ static inline void clear_zone_contiguous(struct zone *zone)
 extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __putback_isolated_page(struct page *page, unsigned int order,
 				    int mt);
-extern void memblock_free_pages(struct page *page, unsigned long pfn,
-					unsigned int order);
+unsigned long memblock_free_pages(struct page *page, unsigned long pfn,
+				  unsigned int order);
 extern void __free_pages_core(struct page *page, unsigned int order,
 		enum meminit_context context);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 117d963e677c9..9b23baee7dfe7 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1826,6 +1826,7 @@ void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align,
 void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
 {
 	phys_addr_t cursor, end;
+	unsigned long freed_pages = 0;
 
 	end = base + size - 1;
 	memblock_dbg("%s: [%pa-%pa] %pS\n",
@@ -1834,10 +1835,9 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
 	cursor = PFN_UP(base);
 	end = PFN_DOWN(base + size);
 
-	for (; cursor < end; cursor++) {
-		memblock_free_pages(pfn_to_page(cursor), cursor, 0);
-		totalram_pages_inc();
-	}
+	for (; cursor < end; cursor++)
+		freed_pages += memblock_free_pages(pfn_to_page(cursor), cursor, 0);
+	totalram_pages_add(freed_pages);
 }
 
 /*
@@ -2259,9 +2259,11 @@ static void __init free_unused_memmap(void)
 #endif
 }
 
-static void __init __free_pages_memory(unsigned long start, unsigned long end)
+static unsigned long __init __free_pages_memory(unsigned long start,
+						unsigned long end)
 {
 	int order;
+	unsigned long freed = 0;
 
 	while (start < end) {
 		/*
@@ -2279,14 +2281,15 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
 		while (start + (1UL << order) > end)
 			order--;
 
-		memblock_free_pages(pfn_to_page(start), start, order);
+		freed += memblock_free_pages(pfn_to_page(start), start, order);
 
 		start += (1UL << order);
 	}
+	return freed;
 }
 
 static unsigned long __init __free_memory_core(phys_addr_t start,
-				 phys_addr_t end)
+					       phys_addr_t end)
 {
 	unsigned long start_pfn = PFN_UP(start);
 	unsigned long end_pfn = PFN_DOWN(end);
@@ -2297,9 +2300,7 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
 	if (start_pfn >= end_pfn)
 		return 0;
 
-	__free_pages_memory(start_pfn, end_pfn);
-
-	return end_pfn - start_pfn;
+	return __free_pages_memory(start_pfn, end_pfn);
 }
 
 static void __init memmap_init_reserved_pages(void)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5c21b3af216b2..9883612768511 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
 	return table;
 }
 
-void __init memblock_free_pages(struct page *page, unsigned long pfn,
-							unsigned int order)
+unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
+					 unsigned int order)
 {
 	if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
 		int nid = early_pfn_to_nid(pfn);
 
 		if (!early_page_initialised(pfn, nid))
-			return;
+			return 0;
 	}
 
 	if (!kmsan_memblock_free_pages(page, order)) {
 		/* KMSAN will take care of these pages. */
-		return;
+		return 0;
 	}
 
 	/* pages were reserved and not allocated */
 	clear_page_tag_ref(page);
 	__free_pages_core(page, order, MEMINIT_EARLY);
+	return 1UL << order;
 }
 
 DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
-- 
2.51.0.534.gc79095c0ca-goog



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
  2025-09-24 10:03 [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN Alexander Potapenko
@ 2025-09-24 13:07 ` Markus Elfring
  2025-09-24 13:23 ` Markus Elfring
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: Markus Elfring @ 2025-09-24 13:07 UTC (permalink / raw)
  To: Alexander Potapenko, linux-mm, kasan-dev
  Cc: LKML, Aleksandr Nogikh, Andrew Morton, David Hildenbrand,
	Dmitry Vyukov, Marco Elver, Mike Rapoport, Vlastimil Babka

…
> This patch refactors `memblock_free_pages()` …

See also:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/submitting-patches.rst?h=v6.17-rc7#n94

Regards,
Markus


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
  2025-09-24 10:03 [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN Alexander Potapenko
  2025-09-24 13:07 ` Markus Elfring
@ 2025-09-24 13:23 ` Markus Elfring
  2025-09-24 13:34   ` Marco Elver
  2025-09-25  5:25 ` Mike Rapoport
  2025-09-25 12:37 ` SeongJae Park
  3 siblings, 1 reply; 10+ messages in thread
From: Markus Elfring @ 2025-09-24 13:23 UTC (permalink / raw)
  To: Alexander Potapenko, linux-mm, kasan-dev
  Cc: LKML, Aleksandr Nogikh, Andrew Morton, David Hildenbrand,
	Dmitry Vyukov, Marco Elver, Mike Rapoport, Vlastimil Babka

…
> +++ b/mm/mm_init.c
> @@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
> +unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
> +					 unsigned int order)
>  {
>  	if (!kmsan_memblock_free_pages(page, order)) {
>  		/* KMSAN will take care of these pages. */
> -		return;
> +		return 0;
>  	}
…

How do you think about to omit curly brackets for this if statement?
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst?h=v6.17-rc7#n197

Regards,
Markus


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
  2025-09-24 13:23 ` Markus Elfring
@ 2025-09-24 13:34   ` Marco Elver
  0 siblings, 0 replies; 10+ messages in thread
From: Marco Elver @ 2025-09-24 13:34 UTC (permalink / raw)
  To: Markus Elfring
  Cc: Alexander Potapenko, linux-mm, kasan-dev, LKML, Aleksandr Nogikh,
	Andrew Morton, David Hildenbrand, Dmitry Vyukov, Mike Rapoport,
	Vlastimil Babka

On Wed, 24 Sept 2025 at 15:23, Markus Elfring <Markus.Elfring@web.de> wrote:
>
> …
> > +++ b/mm/mm_init.c
> > @@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
> …
> > +unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
> > +                                      unsigned int order)
> >  {
> …
> >       if (!kmsan_memblock_free_pages(page, order)) {
> >               /* KMSAN will take care of these pages. */
> > -             return;
> > +             return 0;
> >       }
> …
>
> How do you think about to omit curly brackets for this if statement?
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst?h=v6.17-rc7#n197

No - with the /* .. */ comment there are 2 lines in this block.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
  2025-09-24 10:03 [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN Alexander Potapenko
  2025-09-24 13:07 ` Markus Elfring
  2025-09-24 13:23 ` Markus Elfring
@ 2025-09-25  5:25 ` Mike Rapoport
  2025-09-25 12:37 ` SeongJae Park
  3 siblings, 0 replies; 10+ messages in thread
From: Mike Rapoport @ 2025-09-25  5:25 UTC (permalink / raw)
  To: Alexander Potapenko
  Cc: akpm, david, vbabka, linux-mm, linux-kernel, elver, dvyukov,
	kasan-dev, Aleksandr Nogikh

On Wed, Sep 24, 2025 at 12:03:01PM +0200, Alexander Potapenko wrote:
> When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
> for metadata instead of returning them to the early allocator. The callers,
> however, would unconditionally increment `totalram_pages`, assuming the
> pages were always freed. This resulted in an incorrect calculation of the
> total available RAM, causing the kernel to believe it had more memory than
> it actually did.
> 
> This patch refactors `memblock_free_pages()` to return the number of pages
> it successfully frees. If KMSAN stashes the pages, the function now
> returns 0; otherwise, it returns the number of pages in the block.
> 
> The callers in `memblock.c` have been updated to use this return value,
> ensuring that `totalram_pages` is incremented only by the number of pages
> actually returned to the allocator. This corrects the total RAM accounting
> when KMSAN is active.
> 
> Cc: Aleksandr Nogikh <nogikh@google.com>
> Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
> Signed-off-by: Alexander Potapenko <glider@google.com>
> Reviewed-by: David Hildenbrand <david@redhat.com>

Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
  2025-09-24 10:03 [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN Alexander Potapenko
                   ` (2 preceding siblings ...)
  2025-09-25  5:25 ` Mike Rapoport
@ 2025-09-25 12:37 ` SeongJae Park
  2025-09-25 12:45   ` David Hildenbrand
  2025-09-25 14:50   ` Mike Rapoport
  3 siblings, 2 replies; 10+ messages in thread
From: SeongJae Park @ 2025-09-25 12:37 UTC (permalink / raw)
  To: Alexander Potapenko
  Cc: SeongJae Park, akpm, david, vbabka, rppt, linux-mm, linux-kernel,
	elver, dvyukov, kasan-dev, Aleksandr Nogikh

Hello,

On Wed, 24 Sep 2025 12:03:01 +0200 Alexander Potapenko <glider@google.com> wrote:

> When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
> for metadata instead of returning them to the early allocator. The callers,
> however, would unconditionally increment `totalram_pages`, assuming the
> pages were always freed. This resulted in an incorrect calculation of the
> total available RAM, causing the kernel to believe it had more memory than
> it actually did.
> 
> This patch refactors `memblock_free_pages()` to return the number of pages
> it successfully frees. If KMSAN stashes the pages, the function now
> returns 0; otherwise, it returns the number of pages in the block.
> 
> The callers in `memblock.c` have been updated to use this return value,
> ensuring that `totalram_pages` is incremented only by the number of pages
> actually returned to the allocator. This corrects the total RAM accounting
> when KMSAN is active.
> 
> Cc: Aleksandr Nogikh <nogikh@google.com>
> Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
> Signed-off-by: Alexander Potapenko <glider@google.com>
> Reviewed-by: David Hildenbrand <david@redhat.com>
[...]
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
>  	return table;
>  }
>  
> -void __init memblock_free_pages(struct page *page, unsigned long pfn,
> -							unsigned int order)
> +unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
> +					 unsigned int order)
>  {
>  	if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
>  		int nid = early_pfn_to_nid(pfn);
>  
>  		if (!early_page_initialised(pfn, nid))
> -			return;
> +			return 0;
>  	}

I found this patch on mm-new tree is making my test machine (QEMU) reports much
less MemTotal even though KMSAN is disabled.  And modifying the above part to
be considered as free success (returning '1UL << order') fixed my issue.
Because the commit message says the purpose of this change is only for
KMSAN-stashed memory, maybe the above behavior change is not really intended?

I'm not familiar with this code so I'm unsure if the workaround is the right
fix.  But since I have no time to look this in deep for now, reporting first.

>  
>  	if (!kmsan_memblock_free_pages(page, order)) {
>  		/* KMSAN will take care of these pages. */
> -		return;
> +		return 0;
>  	}

I understand this part is the intended change, of course.


Thanks,
SJ

[...]


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
  2025-09-25 12:37 ` SeongJae Park
@ 2025-09-25 12:45   ` David Hildenbrand
  2025-09-25 14:50   ` Mike Rapoport
  1 sibling, 0 replies; 10+ messages in thread
From: David Hildenbrand @ 2025-09-25 12:45 UTC (permalink / raw)
  To: SeongJae Park, Alexander Potapenko
  Cc: akpm, vbabka, rppt, linux-mm, linux-kernel, elver, dvyukov,
	kasan-dev, Aleksandr Nogikh

On 25.09.25 14:37, SeongJae Park wrote:
> Hello,
> 
> On Wed, 24 Sep 2025 12:03:01 +0200 Alexander Potapenko <glider@google.com> wrote:
> 
>> When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
>> for metadata instead of returning them to the early allocator. The callers,
>> however, would unconditionally increment `totalram_pages`, assuming the
>> pages were always freed. This resulted in an incorrect calculation of the
>> total available RAM, causing the kernel to believe it had more memory than
>> it actually did.
>>
>> This patch refactors `memblock_free_pages()` to return the number of pages
>> it successfully frees. If KMSAN stashes the pages, the function now
>> returns 0; otherwise, it returns the number of pages in the block.
>>
>> The callers in `memblock.c` have been updated to use this return value,
>> ensuring that `totalram_pages` is incremented only by the number of pages
>> actually returned to the allocator. This corrects the total RAM accounting
>> when KMSAN is active.
>>
>> Cc: Aleksandr Nogikh <nogikh@google.com>
>> Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
>> Signed-off-by: Alexander Potapenko <glider@google.com>
>> Reviewed-by: David Hildenbrand <david@redhat.com>
> [...]
>> --- a/mm/mm_init.c
>> +++ b/mm/mm_init.c
>> @@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
>>   	return table;
>>   }
>>   
>> -void __init memblock_free_pages(struct page *page, unsigned long pfn,
>> -							unsigned int order)
>> +unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
>> +					 unsigned int order)
>>   {
>>   	if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
>>   		int nid = early_pfn_to_nid(pfn);
>>   
>>   		if (!early_page_initialised(pfn, nid))
>> -			return;
>> +			return 0;
>>   	}
> 
> I found this patch on mm-new tree is making my test machine (QEMU) reports much
> less MemTotal even though KMSAN is disabled.  And modifying the above part to
> be considered as free success (returning '1UL << order') fixed my issue.
> Because the commit message says the purpose of this change is only for
> KMSAN-stashed memory, maybe the above behavior change is not really intended?
> 
> I'm not familiar with this code so I'm unsure if the workaround is the right
> fix.  But since I have no time to look this in deep for now, reporting first.

Good point, I think there is something off here.

-- 
Cheers

David / dhildenb



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
  2025-09-25 12:37 ` SeongJae Park
  2025-09-25 12:45   ` David Hildenbrand
@ 2025-09-25 14:50   ` Mike Rapoport
  2025-09-26  0:25     ` Wei Yang
  1 sibling, 1 reply; 10+ messages in thread
From: Mike Rapoport @ 2025-09-25 14:50 UTC (permalink / raw)
  To: SeongJae Park
  Cc: Alexander Potapenko, akpm, david, vbabka, linux-mm, linux-kernel,
	elver, dvyukov, kasan-dev, Aleksandr Nogikh

On Thu, Sep 25, 2025 at 05:37:59AM -0700, SeongJae Park wrote:
> Hello,
> 
> On Wed, 24 Sep 2025 12:03:01 +0200 Alexander Potapenko <glider@google.com> wrote:
> 
> > When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
> > for metadata instead of returning them to the early allocator. The callers,
> > however, would unconditionally increment `totalram_pages`, assuming the
> > pages were always freed. This resulted in an incorrect calculation of the
> > total available RAM, causing the kernel to believe it had more memory than
> > it actually did.
> > 
> > This patch refactors `memblock_free_pages()` to return the number of pages
> > it successfully frees. If KMSAN stashes the pages, the function now
> > returns 0; otherwise, it returns the number of pages in the block.
> > 
> > The callers in `memblock.c` have been updated to use this return value,
> > ensuring that `totalram_pages` is incremented only by the number of pages
> > actually returned to the allocator. This corrects the total RAM accounting
> > when KMSAN is active.
> > 
> > Cc: Aleksandr Nogikh <nogikh@google.com>
> > Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
> > Signed-off-by: Alexander Potapenko <glider@google.com>
> > Reviewed-by: David Hildenbrand <david@redhat.com>
> [...]
> > --- a/mm/mm_init.c
> > +++ b/mm/mm_init.c
> > @@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
> >  	return table;
> >  }
> >  
> > -void __init memblock_free_pages(struct page *page, unsigned long pfn,
> > -							unsigned int order)
> > +unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
> > +					 unsigned int order)
> >  {
> >  	if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
> >  		int nid = early_pfn_to_nid(pfn);
> >  
> >  		if (!early_page_initialised(pfn, nid))
> > -			return;
> > +			return 0;
> >  	}
> 
> I found this patch on mm-new tree is making my test machine (QEMU) reports much
> less MemTotal even though KMSAN is disabled.  And modifying the above part to
> be considered as free success (returning '1UL << order') fixed my issue.
> Because the commit message says the purpose of this change is only for
> KMSAN-stashed memory, maybe the above behavior change is not really intended?
> 
> I'm not familiar with this code so I'm unsure if the workaround is the right
> fix.  But since I have no time to look this in deep for now, reporting first

With DEFERRED_STRUCT_PAGE_INIT we count totalram_pages in
memblock_free_all() but actually free them in deferred_init_memmap() and
deferred_grow_zone().

So returning '1UL << order' is a correct workaround, but the proper fix
should update totalram_pages in the deferred path IMHO.

-- 
Sincerely yours,
Mike.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
  2025-09-25 14:50   ` Mike Rapoport
@ 2025-09-26  0:25     ` Wei Yang
  0 siblings, 0 replies; 10+ messages in thread
From: Wei Yang @ 2025-09-26  0:25 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: SeongJae Park, Alexander Potapenko, akpm, david, vbabka,
	linux-mm, linux-kernel, elver, dvyukov, kasan-dev,
	Aleksandr Nogikh

On Thu, Sep 25, 2025 at 05:50:53PM +0300, Mike Rapoport wrote:
>On Thu, Sep 25, 2025 at 05:37:59AM -0700, SeongJae Park wrote:
>> Hello,
>> 
>> On Wed, 24 Sep 2025 12:03:01 +0200 Alexander Potapenko <glider@google.com> wrote:
>> 
>> > When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
>> > for metadata instead of returning them to the early allocator. The callers,
>> > however, would unconditionally increment `totalram_pages`, assuming the
>> > pages were always freed. This resulted in an incorrect calculation of the
>> > total available RAM, causing the kernel to believe it had more memory than
>> > it actually did.
>> > 
>> > This patch refactors `memblock_free_pages()` to return the number of pages
>> > it successfully frees. If KMSAN stashes the pages, the function now
>> > returns 0; otherwise, it returns the number of pages in the block.
>> > 
>> > The callers in `memblock.c` have been updated to use this return value,
>> > ensuring that `totalram_pages` is incremented only by the number of pages
>> > actually returned to the allocator. This corrects the total RAM accounting
>> > when KMSAN is active.
>> > 
>> > Cc: Aleksandr Nogikh <nogikh@google.com>
>> > Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
>> > Signed-off-by: Alexander Potapenko <glider@google.com>
>> > Reviewed-by: David Hildenbrand <david@redhat.com>
>> [...]
>> > --- a/mm/mm_init.c
>> > +++ b/mm/mm_init.c
>> > @@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
>> >  	return table;
>> >  }
>> >  
>> > -void __init memblock_free_pages(struct page *page, unsigned long pfn,
>> > -							unsigned int order)
>> > +unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
>> > +					 unsigned int order)
>> >  {
>> >  	if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
>> >  		int nid = early_pfn_to_nid(pfn);
>> >  
>> >  		if (!early_page_initialised(pfn, nid))
>> > -			return;
>> > +			return 0;
>> >  	}
>> 
>> I found this patch on mm-new tree is making my test machine (QEMU) reports much
>> less MemTotal even though KMSAN is disabled.  And modifying the above part to
>> be considered as free success (returning '1UL << order') fixed my issue.
>> Because the commit message says the purpose of this change is only for
>> KMSAN-stashed memory, maybe the above behavior change is not really intended?
>> 
>> I'm not familiar with this code so I'm unsure if the workaround is the right
>> fix.  But since I have no time to look this in deep for now, reporting first
>
>With DEFERRED_STRUCT_PAGE_INIT we count totalram_pages in
>memblock_free_all() but actually free them in deferred_init_memmap() and
>deferred_grow_zone().
>
>So returning '1UL << order' is a correct workaround, but the proper fix
>should update totalram_pages in the deferred path IMHO.
>

Maybe I did something similar at [1].

But this hit a problem for shmem, since shmem_fill_super() use
totalram_pages(). And before DEFERRED_STRUCT_PAGE_INIT finish, the size is too
small, so it can't boot up.

Per my understanding, shmem_fill_super() could be invoked after
memblock_discard(), so it is not proper to refactor to get ram size from
memblock.

Could we adjust shmem_default_max_blocks/shmem_default_max_inodes use memblock
at boot stage and use totalram_pages() after system is fully up? Or any other
suggestions?

[1]: http://lkml.kernel.org/r/20240726003612.5578-1-richard.weiyang@gmail.com

>-- 
>Sincerely yours,
>Mike.

-- 
Wei Yang
Help you, Help me


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN
@ 2025-09-24  9:56 Alexander Potapenko
  0 siblings, 0 replies; 10+ messages in thread
From: Alexander Potapenko @ 2025-09-24  9:56 UTC (permalink / raw)
  To: glider
  Cc: akpm, david, vbabka, rppt, linux-mm, linux-kernel, elver,
	dvyukov, kasan-dev, Aleksandr Nogikh

When KMSAN is enabled, `kmsan_memblock_free_pages()` can hold back pages
for metadata instead of returning them to the early allocator. The callers,
however, would unconditionally increment `totalram_pages`, assuming the
pages were always freed. This resulted in an incorrect calculation of the
total available RAM, causing the kernel to believe it had more memory than
it actually did.

This patch refactors `memblock_free_pages()` to return the number of pages
it successfully frees. If KMSAN stashes the pages, the function now
returns 0; otherwise, it returns the number of pages in the block.

The callers in `memblock.c` have been updated to use this return value,
ensuring that `totalram_pages` is incremented only by the number of pages
actually returned to the allocator. This corrects the total RAM accounting
when KMSAN is active.

Cc: Aleksandr Nogikh <nogikh@google.com>
Fixes: 3c2065098260 ("init: kmsan: call KMSAN initialization routines")
Signed-off-by: Alexander Potapenko <glider@google.com>
Reviewed-by: David Hildenbrand <david@redhat.com>

---                                                                                                                         │
v2:                                                                                                                         │
- Remove extern from the declaration of memblock_free_pages() in                                                            │
  mm/internal.h as suggested by Mike Rapoport.                                                                              │
- Fix formatting in the definition of memblock_free_pages() in                                                              │
  mm/mm_init.c as suggested by Mike Rapoport.                                                                               │
- Refactor memblock_free_late() to improve readability as suggested by                                                      │
  David Hildenbrand.                                                                                                        │
---
 mm/internal.h |  4 ++--
 mm/memblock.c | 21 +++++++++++----------
 mm/mm_init.c  |  9 +++++----
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 45b725c3dc030..ac841c53653eb 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -742,8 +742,8 @@ static inline void clear_zone_contiguous(struct zone *zone)
 extern int __isolate_free_page(struct page *page, unsigned int order);
 extern void __putback_isolated_page(struct page *page, unsigned int order,
 				    int mt);
-extern void memblock_free_pages(struct page *page, unsigned long pfn,
-					unsigned int order);
+unsigned long memblock_free_pages(struct page *page, unsigned long pfn,
+				  unsigned int order);
 extern void __free_pages_core(struct page *page, unsigned int order,
 		enum meminit_context context);
 
diff --git a/mm/memblock.c b/mm/memblock.c
index 117d963e677c9..9b23baee7dfe7 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1826,6 +1826,7 @@ void *__init __memblock_alloc_or_panic(phys_addr_t size, phys_addr_t align,
 void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
 {
 	phys_addr_t cursor, end;
+	unsigned long freed_pages = 0;
 
 	end = base + size - 1;
 	memblock_dbg("%s: [%pa-%pa] %pS\n",
@@ -1834,10 +1835,9 @@ void __init memblock_free_late(phys_addr_t base, phys_addr_t size)
 	cursor = PFN_UP(base);
 	end = PFN_DOWN(base + size);
 
-	for (; cursor < end; cursor++) {
-		memblock_free_pages(pfn_to_page(cursor), cursor, 0);
-		totalram_pages_inc();
-	}
+	for (; cursor < end; cursor++)
+		freed_pages += memblock_free_pages(pfn_to_page(cursor), cursor, 0);
+	totalram_pages_add(freed_pages);
 }
 
 /*
@@ -2259,9 +2259,11 @@ static void __init free_unused_memmap(void)
 #endif
 }
 
-static void __init __free_pages_memory(unsigned long start, unsigned long end)
+static unsigned long __init __free_pages_memory(unsigned long start,
+						unsigned long end)
 {
 	int order;
+	unsigned long freed = 0;
 
 	while (start < end) {
 		/*
@@ -2279,14 +2281,15 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
 		while (start + (1UL << order) > end)
 			order--;
 
-		memblock_free_pages(pfn_to_page(start), start, order);
+		freed += memblock_free_pages(pfn_to_page(start), start, order);
 
 		start += (1UL << order);
 	}
+	return freed;
 }
 
 static unsigned long __init __free_memory_core(phys_addr_t start,
-				 phys_addr_t end)
+					       phys_addr_t end)
 {
 	unsigned long start_pfn = PFN_UP(start);
 	unsigned long end_pfn = PFN_DOWN(end);
@@ -2297,9 +2300,7 @@ static unsigned long __init __free_memory_core(phys_addr_t start,
 	if (start_pfn >= end_pfn)
 		return 0;
 
-	__free_pages_memory(start_pfn, end_pfn);
-
-	return end_pfn - start_pfn;
+	return __free_pages_memory(start_pfn, end_pfn);
 }
 
 static void __init memmap_init_reserved_pages(void)
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 5c21b3af216b2..9883612768511 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2548,24 +2548,25 @@ void *__init alloc_large_system_hash(const char *tablename,
 	return table;
 }
 
-void __init memblock_free_pages(struct page *page, unsigned long pfn,
-							unsigned int order)
+unsigned long __init memblock_free_pages(struct page *page, unsigned long pfn,
+					 unsigned int order)
 {
 	if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
 		int nid = early_pfn_to_nid(pfn);
 
 		if (!early_page_initialised(pfn, nid))
-			return;
+			return 0;
 	}
 
 	if (!kmsan_memblock_free_pages(page, order)) {
 		/* KMSAN will take care of these pages. */
-		return;
+		return 0;
 	}
 
 	/* pages were reserved and not allocated */
 	clear_page_tag_ref(page);
 	__free_pages_core(page, order, MEMINIT_EARLY);
+	return 1UL << order;
 }
 
 DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_ALLOC_DEFAULT_ON, init_on_alloc);
-- 
2.51.0.534.gc79095c0ca-goog



^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2025-09-26  0:25 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2025-09-24 10:03 [PATCH v2] mm/memblock: Correct totalram_pages accounting with KMSAN Alexander Potapenko
2025-09-24 13:07 ` Markus Elfring
2025-09-24 13:23 ` Markus Elfring
2025-09-24 13:34   ` Marco Elver
2025-09-25  5:25 ` Mike Rapoport
2025-09-25 12:37 ` SeongJae Park
2025-09-25 12:45   ` David Hildenbrand
2025-09-25 14:50   ` Mike Rapoport
2025-09-26  0:25     ` Wei Yang
  -- strict thread matches above, loose matches on Subject: below --
2025-09-24  9:56 Alexander Potapenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox