linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Usama Arif <usamaarif642@gmail.com>
To: Kiryl Shutsemau <kas@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Muchun Song <muchun.song@linux.dev>
Cc: David Hildenbrand <david@kernel.org>,
	Oscar Salvador <osalvador@suse.de>,
	Mike Rapoport <rppt@kernel.org>, Vlastimil Babka <vbabka@suse.cz>,
	Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	Matthew Wilcox <willy@infradead.org>, Zi Yan <ziy@nvidia.com>,
	Baoquan He <bhe@redhat.com>, Michal Hocko <mhocko@suse.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Jonathan Corbet <corbet@lwn.net>,
	kernel-team@meta.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org
Subject: Re: [PATCH 06/11] mm/hugetlb: Remove fake head pages
Date: Sat, 6 Dec 2025 17:03:25 +0000	[thread overview]
Message-ID: <db4538c3-84f8-4fb4-8307-b4fcf46ebe38@gmail.com> (raw)
In-Reply-To: <20251205194351.1646318-7-kas@kernel.org>



On 05/12/2025 19:43, Kiryl Shutsemau wrote:
> HugeTLB optimizes vmemmap memory usage by freeing all but the first page
> of vmemmap memory for the huge page and remapping the rest of the pages
> to the first one.
> 
> This only occurs if the size of the struct page is a power of 2. In
> these instances, the compound head position encoding in the tail pages
> ensures that all tail pages of the same order are identical, regardless
> of the page to which they belong.
> 
> This allows for the elimination of fake head pages without significant
> memory overhead: a page full of tail struct pages is allocated per
> hstate and mapped instead of the page with the head page for all pages
> of the given hstate.
> 
> Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
> ---
>  include/linux/hugetlb.h |  3 +++
>  mm/hugetlb_vmemmap.c    | 31 +++++++++++++++++++++++++++----
>  mm/hugetlb_vmemmap.h    |  4 ++--
>  3 files changed, 32 insertions(+), 6 deletions(-)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 8e63e46b8e1f..75dd940fda22 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -676,6 +676,9 @@ struct hstate {
>  	unsigned int free_huge_pages_node[MAX_NUMNODES];
>  	unsigned int surplus_huge_pages_node[MAX_NUMNODES];
>  	char name[HSTATE_NAME_LEN];
> +#ifdef CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
> +	struct page *vmemmap_tail;
> +#endif
>  };
>  
>  struct cma;
> diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
> index f5ee499b8563..2543bdbcae20 100644
> --- a/mm/hugetlb_vmemmap.c
> +++ b/mm/hugetlb_vmemmap.c
> @@ -18,6 +18,7 @@
>  #include <asm/pgalloc.h>
>  #include <asm/tlbflush.h>
>  #include "hugetlb_vmemmap.h"
> +#include "internal.h"
>  
>  /**
>   * struct vmemmap_remap_walk - walk vmemmap page table
> @@ -518,7 +519,24 @@ static bool vmemmap_should_optimize_folio(const struct hstate *h, struct folio *
>  	return true;
>  }
>  
> -static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
> +static void hugetlb_vmemmap_tail_alloc(struct hstate *h)
> +{
> +	struct page *p;
> +
> +	if (h->vmemmap_tail)
> +		return;
> +

The above check is unnecessary as we already check for !h->vmemmap_tail in __hugetlb_vmemmap_optimize_folio?

Is it possible that we could have a race here? Where 2 threads both trying to allocate a hugetlb page when none
exist in the system, both see h->vmemmap_tail == NULL, both call alloc_page and set h->vmemmap_tail?

Also, is there a good point where we can see that the number of hstate->nr_huge_pages has gone down to 0 and free
h->vmemmap_tail? Its a single page per hstate so not a big deal, but would be nice to have cleanup for it?

> +	h->vmemmap_tail = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!h->vmemmap_tail)
> +		return;
> +
> +	p = page_to_virt(h->vmemmap_tail);
> +
> +	for (int i = 0; i < PAGE_SIZE / sizeof(struct page); i++)
> +		prep_compound_tail(p + i, p, huge_page_order(h));
> +}
> +
> +static int __hugetlb_vmemmap_optimize_folio(struct hstate *h,
>  					    struct folio *folio,
>  					    struct list_head *vmemmap_pages,
>  					    unsigned long flags)
> @@ -533,6 +551,11 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
>  	if (!vmemmap_should_optimize_folio(h, folio))
>  		return ret;
>  
> +	if (!h->vmemmap_tail)
> +		hugetlb_vmemmap_tail_alloc(h);
> +	if (!h->vmemmap_tail)
> +		return -ENOMEM;
> +
>  	static_branch_inc(&hugetlb_optimize_vmemmap_key);
>  
>  	if (flags & VMEMMAP_SYNCHRONIZE_RCU)
> @@ -562,7 +585,7 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
>  	list_add(&vmemmap_head->lru, vmemmap_pages);
>  	memmap_pages_add(1);
>  
> -	vmemmap_tail	= vmemmap_head;
> +	vmemmap_tail	= h->vmemmap_tail;
>  	vmemmap_start	= (unsigned long)folio;
>  	vmemmap_end	= vmemmap_start + hugetlb_vmemmap_size(h);
>  
> @@ -594,7 +617,7 @@ static int __hugetlb_vmemmap_optimize_folio(const struct hstate *h,
>   * can use folio_test_hugetlb_vmemmap_optimized(@folio) to detect if @folio's
>   * vmemmap pages have been optimized.
>   */
> -void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
> +void hugetlb_vmemmap_optimize_folio(struct hstate *h, struct folio *folio)
>  {
>  	LIST_HEAD(vmemmap_pages);
>  
> @@ -868,7 +891,7 @@ static const struct ctl_table hugetlb_vmemmap_sysctls[] = {
>  
>  static int __init hugetlb_vmemmap_init(void)
>  {
> -	const struct hstate *h;
> +	struct hstate *h;
>  
>  	/* HUGETLB_VMEMMAP_RESERVE_SIZE should cover all used struct pages */
>  	BUILD_BUG_ON(__NR_USED_SUBPAGE > HUGETLB_VMEMMAP_RESERVE_PAGES);
> diff --git a/mm/hugetlb_vmemmap.h b/mm/hugetlb_vmemmap.h
> index 18b490825215..f44e40c44a21 100644
> --- a/mm/hugetlb_vmemmap.h
> +++ b/mm/hugetlb_vmemmap.h
> @@ -24,7 +24,7 @@ int hugetlb_vmemmap_restore_folio(const struct hstate *h, struct folio *folio);
>  long hugetlb_vmemmap_restore_folios(const struct hstate *h,
>  					struct list_head *folio_list,
>  					struct list_head *non_hvo_folios);
> -void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio);
> +void hugetlb_vmemmap_optimize_folio(struct hstate *h, struct folio *folio);
>  void hugetlb_vmemmap_optimize_folios(struct hstate *h, struct list_head *folio_list);
>  void hugetlb_vmemmap_optimize_bootmem_folios(struct hstate *h, struct list_head *folio_list);
>  #ifdef CONFIG_SPARSEMEM_VMEMMAP_PREINIT
> @@ -64,7 +64,7 @@ static inline long hugetlb_vmemmap_restore_folios(const struct hstate *h,
>  	return 0;
>  }
>  
> -static inline void hugetlb_vmemmap_optimize_folio(const struct hstate *h, struct folio *folio)
> +static inline void hugetlb_vmemmap_optimize_folio(struct hstate *h, struct folio *folio)
>  {
>  }
>  



  reply	other threads:[~2025-12-06 17:03 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-05 19:43 [PATCH 00/11] mm/hugetlb: Eliminate fake head pages from vmemmap optimization Kiryl Shutsemau
2025-12-05 19:43 ` [PATCH 01/11] mm: Change the interface of prep_compound_tail() Kiryl Shutsemau
2025-12-05 21:49   ` Usama Arif
2025-12-05 22:10     ` Kiryl Shutsemau
2025-12-05 22:15       ` Usama Arif
2025-12-05 19:43 ` [PATCH 02/11] mm: Rename the 'compound_head' field in the 'struct page' to 'compound_info' Kiryl Shutsemau
2025-12-05 19:43 ` [PATCH 03/11] mm: Move set/clear_compound_head() to compound_head() Kiryl Shutsemau
2025-12-05 19:43 ` [PATCH 04/11] mm: Rework compound_head() for power-of-2 sizeof(struct page) Kiryl Shutsemau
2025-12-06  0:25   ` Usama Arif
2025-12-06 16:29     ` Kiryl Shutsemau
2025-12-06 17:36       ` Usama Arif
2025-12-05 19:43 ` [PATCH 05/11] mm/hugetlb: Refactor code around vmemmap_walk Kiryl Shutsemau
2025-12-06 16:42   ` Usama Arif
2025-12-05 19:43 ` [PATCH 06/11] mm/hugetlb: Remove fake head pages Kiryl Shutsemau
2025-12-06 17:03   ` Usama Arif [this message]
2025-12-05 19:43 ` [PATCH 07/11] mm: Drop fake head checks and fix a race condition Kiryl Shutsemau
2025-12-06 17:27   ` Usama Arif
2025-12-05 19:43 ` [PATCH 08/11] hugetlb: Remove VMEMMAP_SYNCHRONIZE_RCU Kiryl Shutsemau
2025-12-05 19:43 ` [PATCH 09/11] mm/hugetlb: Remove hugetlb_optimize_vmemmap_key static key Kiryl Shutsemau
2025-12-05 19:43 ` [PATCH 10/11] mm: Remove the branch from compound_head() Kiryl Shutsemau
2025-12-05 19:43 ` [PATCH 11/11] hugetlb: Update vmemmap_dedup.rst Kiryl Shutsemau
2025-12-05 20:16 ` [PATCH 00/11] mm/hugetlb: Eliminate fake head pages from vmemmap optimization David Hildenbrand (Red Hat)
2025-12-05 20:33   ` Kiryl Shutsemau
2025-12-05 20:44     ` David Hildenbrand (Red Hat)
2025-12-05 20:54       ` Kiryl Shutsemau
2025-12-05 21:34         ` David Hildenbrand (Red Hat)
2025-12-05 21:41           ` Kiryl Shutsemau
2025-12-06 17:47             ` Usama Arif

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=db4538c3-84f8-4fb4-8307-b4fcf46ebe38@gmail.com \
    --to=usamaarif642@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=bhe@redhat.com \
    --cc=corbet@lwn.net \
    --cc=david@kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=kas@kernel.org \
    --cc=kernel-team@meta.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=osalvador@suse.de \
    --cc=rppt@kernel.org \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox