From: Pankaj Raghav <p.raghav@samsung.com>
To: "Darrick J . Wong" <djwong@kernel.org>, hch@lst.de, willy@infradead.org
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org,
David Hildenbrand <david@redhat.com>,
linux-fsdevel@vger.kernel.org, mcgrof@kernel.org,
gost.dev@samsung.com, Andrew Morton <akpm@linux-foundation.org>,
kernel@pankajraghav.com, Pankaj Raghav <p.raghav@samsung.com>
Subject: [RFC 1/3] mm: add large zero page for efficient zeroing of larger segments
Date: Fri, 16 May 2025 12:10:52 +0200 [thread overview]
Message-ID: <20250516101054.676046-2-p.raghav@samsung.com> (raw)
In-Reply-To: <20250516101054.676046-1-p.raghav@samsung.com>
Introduce LARGE_ZERO_PAGE of size 2M as an alternative to ZERO_PAGE of
size PAGE_SIZE.
There are many places in the kernel where we need to zeroout larger
chunks but the maximum segment we can zeroout at a time is limited by
PAGE_SIZE.
This is especially annoying in block devices and filesystems where we
attach multiple ZERO_PAGEs to the bio in different bvecs. With multipage
bvec support in block layer, it is much more efficient to send out
larger zero pages as a part of single bvec.
While there are other options such as huge_zero_page, they can fail
based on the system memory pressure requiring a fallback to ZERO_PAGE[3].
This idea (but not the implementation) was suggested during the review of
adding LBS support to XFS[1][2].
LARGE_ZERO_PAGE is added behind a config option so that systems that are
constrained by memory are not forced to use it.
[1] https://lore.kernel.org/linux-xfs/20231027051847.GA7885@lst.de/
[2] https://lore.kernel.org/linux-xfs/ZitIK5OnR7ZNY0IG@infradead.org/
[3] https://lore.kernel.org/linux-xfs/3pqmgrlewo6ctcwakdvbvjqixac5en6irlipe5aiz6vkylfyni@2luhrs36ke5r/
Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
arch/Kconfig | 8 ++++++++
arch/x86/include/asm/pgtable.h | 20 +++++++++++++++++++-
arch/x86/kernel/head_64.S | 9 ++++++++-
3 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index b0adb665041f..aefa519cb211 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -218,6 +218,14 @@ config USER_RETURN_NOTIFIER
Provide a kernel-internal notification when a cpu is about to
switch to user mode.
+config LARGE_ZERO_PAGE
+ bool "Large zero pages"
+ def_bool n
+ help
+ 2M sized zero pages for zeroing. This will reserve 2M sized
+ physical pages for zeroing. Not suitable for memory constrained
+ systems.
+
config HAVE_IOREMAP_PROT
bool
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 3f59d7a16010..78eb83f2da34 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -17,6 +17,7 @@
#ifndef __ASSEMBLER__
#include <linux/spinlock.h>
+#include <linux/sizes.h>
#include <asm/x86_init.h>
#include <asm/pkru.h>
#include <asm/fpu/api.h>
@@ -47,14 +48,31 @@ void ptdump_walk_user_pgd_level_checkwx(void);
#define debug_checkwx_user() do { } while (0)
#endif
+#ifdef CONFIG_LARGE_ZERO_PAGE
+/*
+ * LARGE_ZERO_PAGE is a global shared page that is always zero: used
+ * for zero-mapped memory areas etc..
+ */
+extern unsigned long empty_large_zero_page[(SZ_2M) / sizeof(unsigned long)]
+ __visible;
+#define ZERO_LARGE_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_large_zero_page))
+
+#define ZERO_PAGE(vaddr) ZERO_LARGE_PAGE(vaddr)
+#define ZERO_LARGE_PAGE_SIZE SZ_2M
+#else
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
*/
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
+extern unsigned long empty_zero_page[(PAGE_SIZE) / sizeof(unsigned long)]
__visible;
#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
+#define ZERO_LARGE_PAGE(vaddr) ZERO_PAGE(vaddr)
+
+#define ZERO_LARGE_PAGE_SIZE PAGE_SIZE
+#endif
+
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index fefe2a25cf02..ebcd12f72966 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -14,6 +14,7 @@
#include <linux/threads.h>
#include <linux/init.h>
#include <linux/pgtable.h>
+#include <linux/sizes.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/msr.h>
@@ -708,8 +709,14 @@ EXPORT_SYMBOL(phys_base)
#include "../xen/xen-head.S"
__PAGE_ALIGNED_BSS
+#ifdef CONFIG_LARGE_ZERO_PAGE
+SYM_DATA_START_PAGE_ALIGNED(empty_large_zero_page)
+ .skip SZ_2M
+SYM_DATA_END(empty_large_zero_page)
+EXPORT_SYMBOL(empty_large_zero_page)
+#else
SYM_DATA_START_PAGE_ALIGNED(empty_zero_page)
.skip PAGE_SIZE
SYM_DATA_END(empty_zero_page)
EXPORT_SYMBOL(empty_zero_page)
-
+#endif
--
2.47.2
next prev parent reply other threads:[~2025-05-16 10:11 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-16 10:10 [RFC 0/3] add large zero page for zeroing out " Pankaj Raghav
2025-05-16 10:10 ` Pankaj Raghav [this message]
2025-05-16 12:21 ` [RFC 1/3] mm: add large zero page for efficient zeroing of " David Hildenbrand
2025-05-16 13:03 ` Pankaj Raghav (Samsung)
2025-05-16 14:54 ` David Hildenbrand
2025-05-16 10:10 ` [RFC 2/3] block: use LARGE_ZERO_PAGE in __blkdev_issue_zero_pages() Pankaj Raghav
2025-05-16 10:10 ` [RFC 3/3] iomap: use LARGE_ZERO_PAGE in iomap_dio_zero() Pankaj Raghav
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250516101054.676046-2-p.raghav@samsung.com \
--to=p.raghav@samsung.com \
--cc=akpm@linux-foundation.org \
--cc=david@redhat.com \
--cc=djwong@kernel.org \
--cc=gost.dev@samsung.com \
--cc=hch@lst.de \
--cc=kernel@pankajraghav.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mcgrof@kernel.org \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox