linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Zi Yan <zi.yan@sent.com>
To: linux-mm@kvack.org
Cc: Zi Yan <ziy@nvidia.com>, David Hildenbrand <david@redhat.com>,
	Matthew Wilcox <willy@infradead.org>,
	Vlastimil Babka <vbabka@suse.cz>,
	"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	John Hubbard <jhubbard@nvidia.com>,
	Yang Shi <shy828301@gmail.com>,
	David Rientjes <rientjes@google.com>,
	James Houghton <jthoughton@google.com>,
	Mike Rapoport <rppt@kernel.org>,
	Muchun Song <songmuchun@bytedance.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org
Subject: [PATCH v1 12/12] mm: make MAX_ORDER a kernel boot time parameter.
Date: Wed, 21 Sep 2022 21:12:52 -0400	[thread overview]
Message-ID: <20220922011252.2266780-13-zi.yan@sent.com> (raw)
In-Reply-To: <20220922011252.2266780-1-zi.yan@sent.com>

From: Zi Yan <ziy@nvidia.com>

With the new buddy_alloc_max_order, users can specify larger MAX_ORDER
than set in CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER.
It can be set any value >= CONFIG_ARCH_MAX_ORDER or CONFIG_SET_MAX_ORDER,
but < 256 (limited by vmscan scan_control and per-cpu free page list).

Signed-off-by: Zi Yan <ziy@nvidia.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: linux-doc@vger.kernel.org
Cc: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 .../admin-guide/kernel-parameters.txt         |  5 +++
 include/linux/mmzone.h                        | 10 +++++-
 mm/Kconfig                                    | 13 ++++++++
 mm/page_alloc.c                               | 31 +++++++++++++++++++
 mm/vmscan.c                                   |  1 -
 5 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 5f633844daac..eb0dd8a78205 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -494,6 +494,11 @@
 	bttv.pll=	See Documentation/admin-guide/media/bttv.rst
 	bttv.tuner=
 
+	buddy_alloc_max_order=	[KNL] This parameter adjusts the size of largest
+			pages that can be allocated from kernel buddy allocator. The largest
+			page size is 2^buddy_alloc_max_order * PAGE_SIZE.
+			Format: integer
+
 	bulk_remove=off	[PPC]  This parameter disables the use of the pSeries
 			firmware feature for flushing multiple hpte entries
 			at a time.
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 19fca391f635..5669191d15dc 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -35,6 +35,14 @@
 #define MIN_MAX_ORDER MAX_ORDER
 #endif
 
+/* remap MAX_ORDER to buddy_alloc_max_order for boot time adjustment */
+#ifdef CONFIG_BOOT_TIME_MAX_ORDER
+/* Defined in mm/page_alloc.c */
+extern int buddy_alloc_max_order;
+#undef MAX_ORDER
+#define MAX_ORDER buddy_alloc_max_order
+#endif /* CONFIG_BOOT_TIME_MAX_ORDER */
+
 #define MAX_ORDER_NR_PAGES (1 << MAX_ORDER)
 
 /*
@@ -1600,7 +1608,7 @@ static inline bool movable_only_nodes(nodemask_t *nodes)
  * contiguous, thus > section size pages can be allocated and manipulated
  * without worrying about non-contiguous struct page.
  */
-#ifndef CONFIG_SET_MAX_ORDER
+#if !defined(CONFIG_SET_MAX_ORDER) && !defined(CONFIG_BOOT_TIME_MAX_ORDER)
 #if (MAX_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS
 #error Allocator MAX_ORDER exceeds SECTION_SIZE
 #endif
diff --git a/mm/Kconfig b/mm/Kconfig
index 9c7280acd528..3e6b61ba9fec 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -449,6 +449,19 @@ config SET_MAX_ORDER
 	  increase this value. A value of 10 means that the largest free memory
 	  block is 2^10 pages.
 
+config BOOT_TIME_MAX_ORDER
+	bool "Set maximum order of buddy allocator at boot time"
+	depends on SPARSEMEM_VMEMMAP && (ARCH_FORCE_MAX_ORDER != 0 || SET_MAX_ORDER != 0)
+	help
+	  It enables users to set the maximum order of buddy allocator at system
+	  boot time instead of a static MACRO set at compilation time. Systems with
+	  a lot of memory might want to allocate large pages whereas it is much
+	  less feasible and desirable for systems with less memory. This option
+	  allows different systems to control the largest page they want to
+	  allocate. By default, MAX_ORDER will be set to ARCH_FORCE_MAX_ORDER or
+	  SET_MAX_ORDER, whichever is non-zero, when the boot time parameter is not
+	  set. The maximum of MAX_ORDER is currently limited at 256.
+
 config HAVE_MEMBLOCK_PHYS_MAP
 	bool
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ba7c284ba3d3..9eacdf3a37c4 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -9720,3 +9720,34 @@ bool has_managed_dma(void)
 	return false;
 }
 #endif /* CONFIG_ZONE_DMA */
+
+#ifdef CONFIG_BOOT_TIME_MAX_ORDER
+int buddy_alloc_max_order = MIN_MAX_ORDER;
+EXPORT_SYMBOL(buddy_alloc_max_order);
+
+static int __init buddy_alloc_set(char *val)
+{
+	int ret;
+	unsigned long max_order;
+
+	ret = kstrtoul(val, 10, &max_order);
+
+	if (ret < 0)
+		return -EINVAL;
+
+	/*
+	 * max_order is also limited at below locations:
+	 * 1. scan_control in mm/vmscan.c uses s8 field for order, max_order cannot
+	 * be bigger than S8_MAX before the field is changed.
+	 * 2. free_pcppages_bulk has max_order upper limit.
+	 */
+	if (max_order > MIN_MAX_ORDER && max_order <= S8_MAX)
+		buddy_alloc_max_order = max_order;
+	else
+		buddy_alloc_max_order = MIN_MAX_ORDER;
+
+	return 0;
+}
+
+early_param("buddy_alloc_max_order", buddy_alloc_set);
+#endif /* CONFIG_BOOT_TIME_MAX_ORDER */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a8fd6300fa7e..009632243398 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6623,7 +6623,6 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 	 * scan_control uses s8 fields for order, priority, and reclaim_idx.
 	 * Confirm they are large enough for max values.
 	 */
-	BUILD_BUG_ON(MAX_ORDER > S8_MAX);
 	BUILD_BUG_ON(DEF_PRIORITY > S8_MAX);
 	BUILD_BUG_ON(MAX_NR_ZONES > S8_MAX);
 
-- 
2.35.1



      parent reply	other threads:[~2022-09-22  1:13 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-22  1:12 [PATCH v1 00/12] Make MAX_ORDER adjustable as " Zi Yan
2022-09-22  1:12 ` [PATCH v1 01/12] mm: rectify MAX_ORDER semantics to be the largest page order from buddy allocator Zi Yan
2022-09-22  1:12 ` [PATCH v1 02/12] mm: check page validity when find a buddy page in a non-contiguous zone Zi Yan
2022-09-22  1:12 ` [PATCH v1 03/12] mm: adapt deferred struct page init to new MAX_ORDER Zi Yan
2022-09-22  1:12 ` [PATCH v1 04/12] mm: prevent pageblock size being larger than section size Zi Yan
2022-09-22  1:12 ` [PATCH v1 05/12] fs: proc: use pageblock_nr_pages for reschedule period in read_kcore() Zi Yan
2022-09-22  1:12 ` [PATCH v1 06/12] virtio: virtio_balloon: use pageblock_order instead of MAX_ORDER Zi Yan
2022-09-22  1:12 ` [PATCH v1 07/12] mm/page_reporting: set page_reporting_order to -1 to prevent it running Zi Yan
2022-09-22  1:12 ` [PATCH v1 08/12] mm: replace MAX_ORDER when it is used to indicate max physical contiguity Zi Yan
2022-09-22  1:12 ` [PATCH v1 09/12] mm: Make MAX_ORDER of buddy allocator configurable via Kconfig SET_MAX_ORDER Zi Yan
2022-09-22  1:12 ` [PATCH v1 10/12] mm: convert MAX_ORDER sized static arrays to dynamic ones Zi Yan
2022-09-22  1:12 ` [PATCH v1 11/12] mm: introduce MIN_MAX_ORDER to replace MAX_ORDER as compile time constant Zi Yan
2022-09-22  1:12 ` Zi Yan [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220922011252.2266780-13-zi.yan@sent.com \
    --to=zi.yan@sent.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=jthoughton@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=rientjes@google.com \
    --cc=rppt@kernel.org \
    --cc=shy828301@gmail.com \
    --cc=songmuchun@bytedance.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox