linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Pankaj Raghav <p.raghav@samsung.com>
To: Suren Baghdasaryan <surenb@google.com>,
	Ryan Roberts <ryan.roberts@arm.com>,
	Vlastimil Babka <vbabka@suse.cz>,
	Baolin Wang <baolin.wang@linux.alibaba.com>,
	Borislav Petkov <bp@alien8.de>, Ingo Molnar <mingo@redhat.com>,
	"H . Peter Anvin" <hpa@zytor.com>, Zi Yan <ziy@nvidia.com>,
	Mike Rapoport <rppt@kernel.org>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Michal Hocko <mhocko@suse.com>,
	David Hildenbrand <david@redhat.com>,
	Lorenzo Stoakes <lorenzo.stoakes@oracle.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Nico Pache <npache@redhat.com>, Dev Jain <dev.jain@arm.com>,
	"Liam R . Howlett" <Liam.Howlett@oracle.com>,
	Jens Axboe <axboe@kernel.dk>
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-block@vger.kernel.org, willy@infradead.org, x86@kernel.org,
	linux-fsdevel@vger.kernel.org,
	"Darrick J . Wong" <djwong@kernel.org>,
	mcgrof@kernel.org, gost.dev@samsung.com, kernel@pankajraghav.com,
	hch@lst.de, Pankaj Raghav <p.raghav@samsung.com>
Subject: [RFC 2/3] mm: add STATIC_PMD_ZERO_PAGE config option
Date: Tue, 27 May 2025 07:04:51 +0200	[thread overview]
Message-ID: <20250527050452.817674-3-p.raghav@samsung.com> (raw)
In-Reply-To: <20250527050452.817674-1-p.raghav@samsung.com>

There are many places in the kernel where we need to zeroout larger
chunks but the maximum segment we can zeroout at a time by ZERO_PAGE
is limited by PAGE_SIZE.

This is especially annoying in block devices and filesystems where we
attach multiple ZERO_PAGEs to the bio in different bvecs. With multipage
bvec support in block layer, it is much more efficient to send out
larger zero pages as a part of single bvec.

This concern was raised during the review of adding LBS support to
XFS[1][2].

Usually huge_zero_folio is allocated on demand, and it will be
deallocated by the shrinker if there are no users of it left.

Add a config option STATIC_PMD_ZERO_PAGE that will always allocate
the huge_zero_folio, and it will never be freed. This makes using the
huge_zero_folio without having to pass any mm struct and call put_folio
in the destructor.

We can enable it by default for x86_64 where the PMD size is 2M.
It is good compromise between the memory and efficiency.
As a THP zero page might be wasteful for architectures with bigger page
sizes, let's not enable it for them.

[1] https://lore.kernel.org/linux-xfs/20231027051847.GA7885@lst.de/
[2] https://lore.kernel.org/linux-xfs/ZitIK5OnR7ZNY0IG@infradead.org/

Suggested-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
 arch/x86/Kconfig |  1 +
 mm/Kconfig       | 12 ++++++++++++
 mm/memory.c      | 30 ++++++++++++++++++++++++++----
 3 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 055204dc211d..96f99b4f96ea 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -152,6 +152,7 @@ config X86
 	select ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP	if X86_64
 	select ARCH_WANT_HUGETLB_VMEMMAP_PREINIT if X86_64
 	select ARCH_WANTS_THP_SWAP		if X86_64
+	select ARCH_WANTS_STATIC_PMD_ZERO_PAGE if X86_64
 	select ARCH_HAS_PARANOID_L1D_FLUSH
 	select BUILDTIME_TABLE_SORT
 	select CLKEVT_I8253
diff --git a/mm/Kconfig b/mm/Kconfig
index bd08e151fa1b..8f50f5c3f7a7 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -826,6 +826,18 @@ config ARCH_WANTS_THP_SWAP
 config MM_ID
 	def_bool n
 
+config ARCH_WANTS_STATIC_PMD_ZERO_PAGE
+	bool
+
+config STATIC_PMD_ZERO_PAGE
+	def_bool y
+	depends on ARCH_WANTS_STATIC_PMD_ZERO_PAGE
+	help
+	  Typically huge_zero_folio, which is a PMD page of zeroes, is allocated
+	  on demand and deallocated when not in use. This option will always
+	  allocate huge_zero_folio for zeroing and it is never deallocated.
+	  Not suitable for memory constrained systems.
+
 menuconfig TRANSPARENT_HUGEPAGE
 	bool "Transparent Hugepage Support"
 	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE && !PREEMPT_RT
diff --git a/mm/memory.c b/mm/memory.c
index 11edc4d66e74..ab8c16d04307 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -203,9 +203,17 @@ static void put_huge_zero_page(void)
 	BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
 }
 
+/*
+ * If STATIC_PMD_ZERO_PAGE is enabled, @mm can be NULL, i.e, the huge_zero_folio
+ * is not associated with any mm_struct.
+*/
 struct folio *mm_get_huge_zero_folio(struct mm_struct *mm)
 {
-	if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+	if (!IS_ENABLED(CONFIG_STATIC_PMD_ZERO_PAGE) && !mm)
+		return NULL;
+
+	if (IS_ENABLED(CONFIG_STATIC_PMD_ZERO_PAGE) ||
+	    test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
 		return READ_ONCE(huge_zero_folio);
 
 	if (!get_huge_zero_page())
@@ -219,6 +227,9 @@ struct folio *mm_get_huge_zero_folio(struct mm_struct *mm)
 
 void mm_put_huge_zero_folio(struct mm_struct *mm)
 {
+	if (IS_ENABLED(CONFIG_STATIC_PMD_ZERO_PAGE))
+		return;
+
 	if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
 		put_huge_zero_page();
 }
@@ -246,15 +257,26 @@ static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
 
 static int __init init_huge_zero_page(void)
 {
+	int ret = 0;
+
+	if (IS_ENABLED(CONFIG_STATIC_PMD_ZERO_PAGE)) {
+		if (!get_huge_zero_page())
+			ret = -ENOMEM;
+		goto out;
+	}
+
 	huge_zero_page_shrinker = shrinker_alloc(0, "thp-zero");
-	if (!huge_zero_page_shrinker)
-		return -ENOMEM;
+	if (!huge_zero_page_shrinker) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	huge_zero_page_shrinker->count_objects = shrink_huge_zero_page_count;
 	huge_zero_page_shrinker->scan_objects = shrink_huge_zero_page_scan;
 	shrinker_register(huge_zero_page_shrinker);
 
-	return 0;
+out:
+	return ret;
 }
 early_initcall(init_huge_zero_page);
 
-- 
2.47.2



  parent reply	other threads:[~2025-05-27  5:05 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-05-27  5:04 [RFC 0/3] " Pankaj Raghav
2025-05-27  5:04 ` [RFC 1/3] mm: move huge_zero_folio from huge_memory.c to memory.c Pankaj Raghav
2025-05-27  5:04 ` Pankaj Raghav [this message]
2025-05-27 16:37   ` [RFC 2/3] mm: add STATIC_PMD_ZERO_PAGE config option Dave Hansen
2025-05-27 18:00     ` Pankaj Raghav (Samsung)
2025-05-27 18:30       ` Dave Hansen
2025-05-27 19:28         ` Pankaj Raghav (Samsung)
2025-05-28 20:36       ` David Hildenbrand
2025-06-02  5:03   ` Christoph Hellwig
2025-06-02 14:49     ` Pankaj Raghav
2025-06-02 20:28       ` David Hildenbrand
2025-06-02 20:32         ` David Hildenbrand
2025-05-27  5:04 ` [RFC 3/3] block: use mm_huge_zero_folio in __blkdev_issue_zero_pages() Pankaj Raghav
2025-06-02  5:05   ` Christoph Hellwig
2025-06-02 15:34     ` Pankaj Raghav

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250527050452.817674-3-p.raghav@samsung.com \
    --to=p.raghav@samsung.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@kernel.dk \
    --cc=baolin.wang@linux.alibaba.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=david@redhat.com \
    --cc=dev.jain@arm.com \
    --cc=djwong@kernel.org \
    --cc=gost.dev@samsung.com \
    --cc=hch@lst.de \
    --cc=hpa@zytor.com \
    --cc=kernel@pankajraghav.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mcgrof@kernel.org \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=npache@redhat.com \
    --cc=rppt@kernel.org \
    --cc=ryan.roberts@arm.com \
    --cc=surenb@google.com \
    --cc=tglx@linutronix.de \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=x86@kernel.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox