linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Kiryl Shutsemau <kas@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>,
	Muchun Song <muchun.song@linux.dev>,
	David Hildenbrand <david@kernel.org>,
	Matthew Wilcox <willy@infradead.org>,
	Usama Arif <usamaarif642@gmail.com>,
	Frank van der Linden <fvdl@google.com>
Cc: Oscar Salvador <osalvador@suse.de>,
	Mike Rapoport <rppt@kernel.org>, Vlastimil Babka <vbabka@suse.cz>,
	Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	Zi Yan <ziy@nvidia.com>, Baoquan He <bhe@redhat.com>,
	Michal Hocko <mhocko@suse.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Jonathan Corbet <corbet@lwn.net>,
	kernel-team@meta.com, linux-mm@kvack.org,
	linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org,
	Kiryl Shutsemau <kas@kernel.org>
Subject: [PATCHv2 06/14] mm: Rework compound_head() for power-of-2 sizeof(struct page)
Date: Thu, 18 Dec 2025 15:09:37 +0000	[thread overview]
Message-ID: <20251218150949.721480-7-kas@kernel.org> (raw)
In-Reply-To: <20251218150949.721480-1-kas@kernel.org>

For tail pages, the kernel uses the 'compound_info' field to get to the
head page. The bit 0 of the field indicates whether the page is a
tail page, and if set, the remaining bits represent a pointer to the
head page.

For cases when size of struct page is power-of-2, change the encoding of
compound_info to store a mask that can be applied to the virtual address
of the tail page in order to access the head page. It is possible
because struct page of the head page is naturally aligned with regards
to order of the page.

The significant impact of this modification is that all tail pages of
the same order will now have identical 'compound_info', regardless of
the compound page they are associated with. This paves the way for
eliminating fake heads.

The HugeTLB Vmemmap Optimization (HVO) creates fake heads and it is only
applied when the sizeof(struct page) is power-of-2. Having identical
tail pages allows the same page to be mapped into the vmemmap of all
pages, maintaining memory savings without fake heads.

If sizeof(struct page) is not power-of-2, there is no functional
changes.

Signed-off-by: Kiryl Shutsemau <kas@kernel.org>
---
 include/linux/page-flags.h | 62 +++++++++++++++++++++++++++++++++-----
 mm/util.c                  | 16 +++++++---
 2 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0de7db7efb00..fac5f41b3b27 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -210,6 +210,13 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page
 	if (!static_branch_unlikely(&hugetlb_optimize_vmemmap_key))
 		return page;
 
+	/*
+	 * Fake heads only exists if size of struct page is power-of-2.
+	 * See hugetlb_vmemmap_optimizable_size().
+	 */
+	if (!is_power_of_2(sizeof(struct page)))
+		return page;
+
 	/*
 	 * Only addresses aligned with PAGE_SIZE of struct page may be fake head
 	 * struct page. The alignment check aims to avoid access the fields (
@@ -223,10 +230,14 @@ static __always_inline const struct page *page_fixed_fake_head(const struct page
 		 * because the @page is a compound page composed with at least
 		 * two contiguous pages.
 		 */
-		unsigned long head = READ_ONCE(page[1].compound_info);
+		unsigned long info = READ_ONCE(page[1].compound_info);
 
-		if (likely(head & 1))
-			return (const struct page *)(head - 1);
+		/* See set_compound_head() */
+		if (likely(info & 1)) {
+			unsigned long p = (unsigned long)page;
+
+			return (const struct page *)(p & info);
+		}
 	}
 	return page;
 }
@@ -281,11 +292,27 @@ static __always_inline int page_is_fake_head(const struct page *page)
 
 static __always_inline unsigned long _compound_head(const struct page *page)
 {
-	unsigned long head = READ_ONCE(page->compound_info);
+	unsigned long info = READ_ONCE(page->compound_info);
 
-	if (unlikely(head & 1))
-		return head - 1;
-	return (unsigned long)page_fixed_fake_head(page);
+	/* Bit 0 encodes PageTail() */
+	if (!(info & 1))
+		return (unsigned long)page_fixed_fake_head(page);
+
+	/*
+	 * If the size of struct page is not power-of-2, the rest of
+	 * compound_info is the pointer to the head page.
+	 */
+	if (!is_power_of_2(sizeof(struct page)))
+		return info - 1;
+
+	/*
+	 * If the size of struct page is power-of-2 the rest of the info
+	 * encodes the mask that converts the address of the tail page to
+	 * the head page.
+	 *
+	 * No need to clear bit 0 in the mask as 'page' always has it clear.
+	 */
+	return (unsigned long)page & info;
 }
 
 #define compound_head(page)	((typeof(page))_compound_head(page))
@@ -294,7 +321,26 @@ static __always_inline void set_compound_head(struct page *page,
 					      const struct page *head,
 					      unsigned int order)
 {
-	WRITE_ONCE(page->compound_info, (unsigned long)head + 1);
+	unsigned int shift;
+	unsigned long mask;
+
+	if (!is_power_of_2(sizeof(struct page))) {
+		WRITE_ONCE(page->compound_info, (unsigned long)head | 1);
+		return;
+	}
+
+	/*
+	 * If the size of struct page is power-of-2, bits [shift:0] of the
+	 * virtual address of compound head are zero.
+	 *
+	 * Calculate mask that can be applied to the virtual address of
+	 * the tail page to get address of the head page.
+	 */
+	shift = order + order_base_2(sizeof(struct page));
+	mask = GENMASK(BITS_PER_LONG - 1, shift);
+
+	/* Bit 0 encodes PageTail() */
+	WRITE_ONCE(page->compound_info, mask | 1);
 }
 
 static __always_inline void clear_compound_head(struct page *page)
diff --git a/mm/util.c b/mm/util.c
index cbf93cf3223a..3c00f6cec3f0 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1234,7 +1234,7 @@ static void set_ps_flags(struct page_snapshot *ps, const struct folio *folio,
  */
 void snapshot_page(struct page_snapshot *ps, const struct page *page)
 {
-	unsigned long head, nr_pages = 1;
+	unsigned long info, nr_pages = 1;
 	struct folio *foliop;
 	int loops = 5;
 
@@ -1244,8 +1244,8 @@ void snapshot_page(struct page_snapshot *ps, const struct page *page)
 again:
 	memset(&ps->folio_snapshot, 0, sizeof(struct folio));
 	memcpy(&ps->page_snapshot, page, sizeof(*page));
-	head = ps->page_snapshot.compound_info;
-	if ((head & 1) == 0) {
+	info = ps->page_snapshot.compound_info;
+	if ((info & 1) == 0) {
 		ps->idx = 0;
 		foliop = (struct folio *)&ps->page_snapshot;
 		if (!folio_test_large(foliop)) {
@@ -1256,7 +1256,15 @@ void snapshot_page(struct page_snapshot *ps, const struct page *page)
 		}
 		foliop = (struct folio *)page;
 	} else {
-		foliop = (struct folio *)(head - 1);
+		/* See compound_head() */
+		if (is_power_of_2(sizeof(struct page))) {
+			unsigned long p = (unsigned long)page;
+
+			foliop = (struct folio *)(p & info);
+		} else {
+			foliop = (struct folio *)(info - 1);
+		}
+
 		ps->idx = folio_page_idx(foliop, page);
 	}
 
-- 
2.51.2



  parent reply	other threads:[~2025-12-18 15:10 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-18 15:09 [PATCHv2 00/14] Kiryl Shutsemau
2025-12-18 15:09 ` [PATCHv2 01/14] mm: Move MAX_FOLIO_ORDER definition to mmzone.h Kiryl Shutsemau
2025-12-18 15:09 ` [PATCHv2 02/14] mm/sparse: Check memmap alignment Kiryl Shutsemau
2025-12-22  8:34   ` Muchun Song
2025-12-22 14:02     ` Kiryl Shutsemau
2025-12-22 14:18       ` David Hildenbrand (Red Hat)
2025-12-22 14:52         ` Kiryl Shutsemau
2025-12-22 14:59           ` Muchun Song
2025-12-22 14:55         ` Muchun Song
2025-12-23  9:38           ` David Hildenbrand (Red Hat)
2025-12-23 11:26             ` Muchun Song
2025-12-24 14:13             ` Kiryl Shutsemau
2025-12-22 14:49       ` Muchun Song
2025-12-18 15:09 ` [PATCHv2 03/14] mm: Change the interface of prep_compound_tail() Kiryl Shutsemau
2025-12-22  2:55   ` Muchun Song
2025-12-18 15:09 ` [PATCHv2 04/14] mm: Rename the 'compound_head' field in the 'struct page' to 'compound_info' Kiryl Shutsemau
2025-12-22  3:00   ` Muchun Song
2025-12-18 15:09 ` [PATCHv2 05/14] mm: Move set/clear_compound_head() next to compound_head() Kiryl Shutsemau
2025-12-22  3:06   ` Muchun Song
2025-12-18 15:09 ` Kiryl Shutsemau [this message]
2025-12-22  3:20   ` [PATCHv2 06/14] mm: Rework compound_head() for power-of-2 sizeof(struct page) Muchun Song
2025-12-22 14:03     ` Kiryl Shutsemau
2025-12-23  8:37       ` Muchun Song
2025-12-22  7:57   ` Muchun Song
2025-12-22  9:45     ` Muchun Song
2025-12-22 14:49       ` Kiryl Shutsemau
2025-12-18 15:09 ` [PATCHv2 07/14] mm: Make page_zonenum() use head page Kiryl Shutsemau
2025-12-18 15:09 ` [PATCHv2 08/14] mm/hugetlb: Refactor code around vmemmap_walk Kiryl Shutsemau
2025-12-22  5:54   ` Muchun Song
2025-12-22 15:00     ` Kiryl Shutsemau
2025-12-22 15:11       ` Muchun Song
2025-12-18 15:09 ` [PATCHv2 09/14] mm/hugetlb: Remove fake head pages Kiryl Shutsemau
2025-12-18 15:09 ` [PATCHv2 10/14] mm: Drop fake head checks Kiryl Shutsemau
2025-12-22  5:56   ` Muchun Song
2025-12-18 15:09 ` [PATCHv2 11/14] hugetlb: Remove VMEMMAP_SYNCHRONIZE_RCU Kiryl Shutsemau
2025-12-22  6:00   ` Muchun Song
2025-12-18 15:09 ` [PATCHv2 12/14] mm/hugetlb: Remove hugetlb_optimize_vmemmap_key static key Kiryl Shutsemau
2025-12-22  6:03   ` Muchun Song
2025-12-18 15:09 ` [PATCHv2 13/14] mm: Remove the branch from compound_head() Kiryl Shutsemau
2025-12-22  6:30   ` Muchun Song
2025-12-18 15:09 ` [PATCHv2 14/14] hugetlb: Update vmemmap_dedup.rst Kiryl Shutsemau
2025-12-22  6:20   ` Muchun Song
2025-12-18 22:18 ` [PATCHv2 00/14] Eliminate fake head pages from vmemmap optimization Kiryl Shutsemau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251218150949.721480-7-kas@kernel.org \
    --to=kas@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=bhe@redhat.com \
    --cc=corbet@lwn.net \
    --cc=david@kernel.org \
    --cc=fvdl@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=kernel-team@meta.com \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=muchun.song@linux.dev \
    --cc=osalvador@suse.de \
    --cc=rppt@kernel.org \
    --cc=usamaarif642@gmail.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox