From: Nathan Zimmer <nzimmer@sgi.com>
To: hpa@zytor.com, mingo@kernel.org
Cc: linux-kernel@vger.kernel.org, linux-mm@kvack.org, holt@sgi.com,
nzimmer@sgi.com, rob@landley.net, travis@sgi.com,
daniel@numascale-asia.com, akpm@linux-foundation.org,
gregkh@linuxfoundation.org, yinghai@kernel.org, mgorman@suse.de
Subject: [RFC v2 5/5] Sparse initialization of struct page array.
Date: Fri, 2 Aug 2013 12:44:27 -0500 [thread overview]
Message-ID: <1375465467-40488-6-git-send-email-nzimmer@sgi.com> (raw)
In-Reply-To: <1375465467-40488-1-git-send-email-nzimmer@sgi.com>
From: Robin Holt <holt@sgi.com>
During boot of large memory machines, a significant portion of boot
is spent initializing the struct page array. The vast majority of
those pages are not referenced during boot.
Change this over to only initializing the pages when they are
actually allocated.
Besides the advantage of boot speed, this allows us the chance to
use normal performance monitoring tools to determine where the bulk
of time is spent during page initialization.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Nathan Zimmer <nzimmer@sgi.com>
To: "H. Peter Anvin" <hpa@zytor.com>
To: Ingo Molnar <mingo@kernel.org>
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
Cc: Linux MM <linux-mm@kvack.org>
Cc: Rob Landley <rob@landley.net>
Cc: Mike Travis <travis@sgi.com>
Cc: Daniel J Blueman <daniel@numascale-asia.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
---
include/linux/page-flags.h | 5 +-
mm/page_alloc.c | 120 +++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 119 insertions(+), 6 deletions(-)
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 6d53675..d592065 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -83,6 +83,7 @@ enum pageflags {
PG_owner_priv_1, /* Owner use. If pagecache, fs may use*/
PG_arch_1,
PG_reserved,
+ PG_uninitialized_2m,
PG_private, /* If pagecache, has fs-private data */
PG_private_2, /* If pagecache, has fs aux data */
PG_writeback, /* Page is under writeback */
@@ -211,6 +212,8 @@ PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
__PAGEFLAG(SlobFree, slob_free)
+PAGEFLAG(Uninitialized2m, uninitialized_2m)
+
/*
* Private page markings that may be used by the filesystem that owns the page
* for its own purposes.
@@ -499,7 +502,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
#define PAGE_FLAGS_CHECK_AT_FREE \
(1 << PG_lru | 1 << PG_locked | \
1 << PG_private | 1 << PG_private_2 | \
- 1 << PG_writeback | 1 << PG_reserved | \
+ 1 << PG_writeback | 1 << PG_reserved | 1 << PG_uninitialized_2m | \
1 << PG_slab | 1 << PG_swapcache | 1 << PG_active | \
1 << PG_unevictable | __PG_MLOCKED | __PG_HWPOISON | \
__PG_COMPOUND_LOCK)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 382223e..c2fd03a0c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -737,8 +737,53 @@ static void __init_single_page(unsigned long pfn, unsigned long zone,
#endif
}
+static void expand_page_initialization(struct page *basepage)
+{
+ unsigned long pfn = page_to_pfn(basepage);
+ unsigned long end_pfn = pfn + PTRS_PER_PMD;
+ unsigned long zone = page_zonenum(basepage);
+ int count = page_count(basepage);
+ int nid = page_to_nid(basepage);
+
+ ClearPageUninitialized2m(basepage);
+
+ for (pfn++; pfn < end_pfn; pfn++)
+ __init_single_page(pfn, zone, nid, count);
+}
+
+static void ensure_pages_are_initialized(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long aligned_start_pfn = start_pfn & ~(PTRS_PER_PMD - 1);
+ unsigned long aligned_end_pfn;
+ struct page *page;
+
+ aligned_end_pfn = end_pfn & ~(PTRS_PER_PMD - 1);
+ aligned_end_pfn += PTRS_PER_PMD;
+ while (aligned_start_pfn < aligned_end_pfn) {
+ if (pfn_valid(aligned_start_pfn)) {
+ page = pfn_to_page(aligned_start_pfn);
+
+ if (PageUninitialized2m(page))
+ expand_page_initialization(page);
+ }
+
+ aligned_start_pfn += PTRS_PER_PMD;
+ }
+}
+
+static inline void ensure_page_is_initialized(struct page *page)
+{
+ ensure_pages_are_initialized(page_to_pfn(page), page_to_pfn(page));
+}
+
void reserve_bootmem_region(unsigned long start, unsigned long end)
{
+ unsigned long start_pfn = PFN_DOWN(start);
+ unsigned long end_pfn = PFN_UP(end);
+
+ ensure_pages_are_initialized(start_pfn, end_pfn);
+
for (; start < end; start++)
if (pfn_valid(start))
SetPageReserved(pfn_to_page(start));
@@ -755,7 +800,10 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
if (PageAnon(page))
page->mapping = NULL;
for (i = 0; i < (1 << order); i++)
- bad += free_pages_check(page + i);
+ if (PageUninitialized2m(page + i))
+ i += PTRS_PER_PMD - 1;
+ else
+ bad += free_pages_check(page + i);
if (bad)
return false;
@@ -799,13 +847,22 @@ void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
unsigned int loop;
prefetchw(page);
- for (loop = 0; loop < nr_pages; loop++) {
+ for (loop = 0; loop < nr_pages; ) {
struct page *p = &page[loop];
if (loop + 1 < nr_pages)
prefetchw(p + 1);
+
+ if ((PageUninitialized2m(p)) &&
+ ((loop + PTRS_PER_PMD) > nr_pages))
+ ensure_page_is_initialized(p);
+
__ClearPageReserved(p);
set_page_count(p, 0);
+ if (PageUninitialized2m(p))
+ loop += PTRS_PER_PMD;
+ else
+ loop += 1;
}
page_zone(page)->managed_pages += 1 << order;
@@ -860,6 +917,7 @@ static inline void expand(struct zone *zone, struct page *page,
area--;
high--;
size >>= 1;
+ ensure_page_is_initialized(page);
VM_BUG_ON(bad_range(zone, &page[size]));
#ifdef CONFIG_DEBUG_PAGEALLOC
@@ -907,6 +965,10 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
for (i = 0; i < (1 << order); i++) {
struct page *p = page + i;
+
+ if (PageUninitialized2m(p))
+ expand_page_initialization(page);
+
if (unlikely(check_new_page(p)))
return 1;
}
@@ -989,6 +1051,8 @@ int move_freepages(struct zone *zone,
unsigned long order;
int pages_moved = 0;
+ ensure_pages_are_initialized(page_to_pfn(start_page),
+ page_to_pfn(end_page));
#ifndef CONFIG_HOLES_IN_ZONE
/*
* page_zone is not safe to call in this context when
@@ -3902,6 +3966,9 @@ static int pageblock_is_reserved(unsigned long start_pfn, unsigned long end_pfn)
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
if (!pfn_valid_within(pfn) || PageReserved(pfn_to_page(pfn)))
return 1;
+
+ if (PageUninitialized2m(pfn_to_page(pfn)))
+ pfn += PTRS_PER_PMD;
}
return 0;
}
@@ -3991,6 +4058,34 @@ static void setup_zone_migrate_reserve(struct zone *zone)
}
/*
+ * This function tells us if we have many pfns we have available.
+ * Available meaning valid and on the specified node.
+ * It return either size if that many pfns are available, 1 otherwise
+ */
+static int __meminit pfn_range_init_avail(unsigned long pfn,
+ unsigned long end_pfn,
+ unsigned long size, int nid)
+{
+ unsigned long validate_end_pfn = pfn + size;
+
+ if (pfn & (size - 1))
+ return 1;
+
+ if (pfn + size >= end_pfn)
+ return 1;
+
+ while (pfn < validate_end_pfn) {
+ if (!early_pfn_valid(pfn))
+ return 1;
+ if (!early_pfn_in_nid(pfn, nid))
+ return 1;
+ pfn++;
+ }
+
+ return size;
+}
+
+/*
* Initially all pages are reserved - free ones are freed
* up by free_all_bootmem() once the early boot process is
* done. Non-atomic initialization, single-pass.
@@ -4006,19 +4101,33 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
highest_memmap_pfn = end_pfn - 1;
z = &NODE_DATA(nid)->node_zones[zone];
- for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+ for (pfn = start_pfn; pfn < end_pfn; ) {
/*
* There can be holes in boot-time mem_map[]s
* handed to this function. They do not
* exist on hotplugged memory.
*/
+ int pfns = 1;
if (context == MEMMAP_EARLY) {
- if (!early_pfn_valid(pfn))
+ if (!early_pfn_valid(pfn)) {
+ pfn++;
continue;
- if (!early_pfn_in_nid(pfn, nid))
+ }
+ if (!early_pfn_in_nid(pfn, nid)) {
+ pfn++;
continue;
+ }
+
+ pfns = pfn_range_init_avail(pfn, end_pfn,
+ PTRS_PER_PMD, nid);
}
+
__init_single_page(pfn, zone, nid, 1);
+
+ if (pfns > 1)
+ SetPageUninitialized2m(pfn_to_page(pfn));
+
+ pfn += pfns;
}
}
@@ -6237,6 +6346,7 @@ static const struct trace_print_flags pageflag_names[] = {
{1UL << PG_owner_priv_1, "owner_priv_1" },
{1UL << PG_arch_1, "arch_1" },
{1UL << PG_reserved, "reserved" },
+ {1UL << PG_uninitialized_2m, "uninitialized_2m" },
{1UL << PG_private, "private" },
{1UL << PG_private_2, "private_2" },
{1UL << PG_writeback, "writeback" },
--
1.8.2.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2013-08-02 17:44 UTC|newest]
Thread overview: 77+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-07-12 2:03 [RFC 0/4] Transparent on-demand struct page initialization embedded in the buddy allocator Robin Holt
2013-07-12 2:03 ` [RFC 1/4] memblock: Introduce a for_each_reserved_mem_region iterator Robin Holt
2013-07-12 2:03 ` [RFC 2/4] Have __free_pages_memory() free in larger chunks Robin Holt
2013-07-12 7:45 ` Robin Holt
2013-07-13 3:08 ` Yinghai Lu
2013-07-16 13:02 ` Sam Ben
2013-07-23 15:32 ` Johannes Weiner
2013-07-12 2:03 ` [RFC 3/4] Seperate page initialization into a separate function Robin Holt
2013-07-13 3:06 ` Yinghai Lu
2013-07-15 3:19 ` Robin Holt
2013-07-12 2:03 ` [RFC 4/4] Sparse initialization of struct page array Robin Holt
2013-07-13 4:19 ` Yinghai Lu
2013-07-13 4:39 ` H. Peter Anvin
2013-07-13 5:31 ` Yinghai Lu
2013-07-13 5:38 ` H. Peter Anvin
2013-07-15 14:08 ` Nathan Zimmer
2013-07-15 17:45 ` Nathan Zimmer
2013-07-15 17:54 ` H. Peter Anvin
2013-07-15 18:26 ` Robin Holt
2013-07-15 18:29 ` H. Peter Anvin
2013-07-23 8:32 ` Ingo Molnar
2013-07-23 11:09 ` Robin Holt
2013-07-23 11:15 ` Robin Holt
2013-07-23 11:41 ` Robin Holt
2013-07-23 11:50 ` Robin Holt
2013-07-16 10:26 ` Robin Holt
2013-07-25 2:25 ` Robin Holt
2013-07-25 12:50 ` Yinghai Lu
2013-07-25 13:42 ` Robin Holt
2013-07-25 13:52 ` Yinghai Lu
2013-07-15 21:30 ` Andrew Morton
2013-07-16 10:38 ` Robin Holt
2013-07-12 8:27 ` [RFC 0/4] Transparent on-demand struct page initialization embedded in the buddy allocator Ingo Molnar
2013-07-12 8:47 ` boot tracing Borislav Petkov
2013-07-12 8:53 ` Ingo Molnar
2013-07-15 1:38 ` Sam Ben
2013-07-23 8:18 ` Ingo Molnar
2013-07-12 9:19 ` [RFC 0/4] Transparent on-demand struct page initialization embedded in the buddy allocator Robert Richter
2013-07-15 15:16 ` Robin Holt
2013-07-16 8:55 ` Joonsoo Kim
2013-07-16 9:08 ` Borislav Petkov
2013-07-23 8:20 ` Ingo Molnar
2013-07-15 15:00 ` Robin Holt
2013-07-17 5:17 ` Sam Ben
2013-07-17 9:30 ` Robin Holt
2013-07-19 23:51 ` Yinghai Lu
2013-07-22 6:13 ` Robin Holt
2013-08-02 17:44 ` [RFC v2 0/5] " Nathan Zimmer
2013-08-02 17:44 ` [RFC v2 1/5] memblock: Introduce a for_each_reserved_mem_region iterator Nathan Zimmer
2013-08-02 17:44 ` [RFC v2 2/5] Have __free_pages_memory() free in larger chunks Nathan Zimmer
2013-08-02 17:44 ` [RFC v2 3/5] Move page initialization into a separate function Nathan Zimmer
2013-08-02 17:44 ` [RFC v2 4/5] Only set page reserved in the memblock region Nathan Zimmer
2013-08-03 20:04 ` Nathan Zimmer
2013-08-02 17:44 ` Nathan Zimmer [this message]
2013-08-05 9:58 ` [RFC v2 0/5] Transparent on-demand struct page initialization embedded in the buddy allocator Ingo Molnar
2013-08-12 21:54 ` [RFC v3 " Nathan Zimmer
2013-08-12 21:54 ` [RFC v3 1/5] memblock: Introduce a for_each_reserved_mem_region iterator Nathan Zimmer
2013-08-12 21:54 ` [RFC v3 2/5] Have __free_pages_memory() free in larger chunks Nathan Zimmer
2013-08-12 21:54 ` [RFC v3 3/5] Move page initialization into a separate function Nathan Zimmer
2013-08-12 21:54 ` [RFC v3 4/5] Only set page reserved in the memblock region Nathan Zimmer
2013-08-12 21:54 ` [RFC v3 5/5] Sparse initialization of struct page array Nathan Zimmer
2013-08-13 10:58 ` [RFC v3 0/5] Transparent on-demand struct page initialization embedded in the buddy allocator Ingo Molnar
2013-08-13 17:09 ` Linus Torvalds
2013-08-13 17:23 ` H. Peter Anvin
2013-08-13 17:33 ` Mike Travis
2013-08-13 17:51 ` Linus Torvalds
2013-08-13 18:04 ` Mike Travis
2013-08-13 19:06 ` Mike Travis
2013-08-13 20:24 ` Yinghai Lu
2013-08-13 20:37 ` Mike Travis
2013-08-13 21:35 ` Nathan Zimmer
2013-08-13 23:10 ` Nathan Zimmer
2013-08-13 23:55 ` Linus Torvalds
2013-08-14 11:27 ` Ingo Molnar
2013-08-14 11:05 ` Ingo Molnar
2013-08-14 22:15 ` Nathan Zimmer
2013-08-16 16:36 ` Dave Hansen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1375465467-40488-6-git-send-email-nzimmer@sgi.com \
--to=nzimmer@sgi.com \
--cc=akpm@linux-foundation.org \
--cc=daniel@numascale-asia.com \
--cc=gregkh@linuxfoundation.org \
--cc=holt@sgi.com \
--cc=hpa@zytor.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=mingo@kernel.org \
--cc=rob@landley.net \
--cc=travis@sgi.com \
--cc=yinghai@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox