linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mgorman@suse.de>
To: Linux-MM <linux-mm@kvack.org>
Cc: Robin Holt <holt@sgi.com>, Nathan Zimmer <nzimmer@sgi.com>,
	Daniel Rahn <drahn@suse.com>, Davidlohr Bueso <dbueso@suse.com>,
	Dave Hansen <dave.hansen@intel.com>, Tom Vaden <tom.vaden@hp.com>,
	Scott Norton <scott.norton@hp.com>,
	LKML <linux-kernel@vger.kernel.org>, Mel Gorman <mgorman@suse.de>
Subject: [PATCH 05/14] mm: meminit: Make __early_pfn_to_nid SMP-safe and introduce meminit_pfn_in_nid
Date: Mon, 13 Apr 2015 11:16:57 +0100	[thread overview]
Message-ID: <1428920226-18147-6-git-send-email-mgorman@suse.de> (raw)
In-Reply-To: <1428920226-18147-1-git-send-email-mgorman@suse.de>

__early_pfn_to_nid() in the generic and arch-specific implementations use
static variables to cache recent lookups. Without the cache boot times
are much higher due to the excessive memblock lookups but it assumes that
memory initialisation is single-threaded. Parallel memory initialisation will
break that assumption so this patch makes __early_pfn_to_nid() SMP-safe by
requiring the caller to cache recent search information. early_pfn_to_nid()
keeps the same interface but is only safe to use early in boot due to
the use of a global static variable. meminit_pfn_in_nid() is an SMP-safe
version that callers must maintain their own state for.

Signed-off-by: Mel Gorman <mgorman@suse.de>
---
 arch/ia64/mm/numa.c    | 19 +++++++------------
 include/linux/mm.h     |  8 ++++++--
 include/linux/mmzone.h | 16 +++++++++++++++-
 mm/page_alloc.c        | 40 +++++++++++++++++++++++++---------------
 4 files changed, 53 insertions(+), 30 deletions(-)

diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
index ea21d4cad540..aa19b7ac8222 100644
--- a/arch/ia64/mm/numa.c
+++ b/arch/ia64/mm/numa.c
@@ -58,27 +58,22 @@ paddr_to_nid(unsigned long paddr)
  * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
  * the section resides.
  */
-int __meminit __early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn,
+					struct mminit_pfnnid_cache *state)
 {
 	int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
-	/*
-	 * NOTE: The following SMP-unsafe globals are only used early in boot
-	 * when the kernel is running single-threaded.
-	 */
-	static int __meminitdata last_ssec, last_esec;
-	static int __meminitdata last_nid;
 
-	if (section >= last_ssec && section < last_esec)
-		return last_nid;
+	if (section >= state->last_start && section < state->last_end)
+		return state->last_nid;
 
 	for (i = 0; i < num_node_memblks; i++) {
 		ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT;
 		esec = (node_memblk[i].start_paddr + node_memblk[i].size +
 			((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT;
 		if (section >= ssec && section < esec) {
-			last_ssec = ssec;
-			last_esec = esec;
-			last_nid = node_memblk[i].nid;
+			state->last_start = ssec;
+			state->last_end = esec;
+			state->last_nid = node_memblk[i].nid;
 			return node_memblk[i].nid;
 		}
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b6f82a31028a..3a4c9f72c080 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1802,7 +1802,8 @@ extern void sparse_memory_present_with_active_regions(int nid);
 
 #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
     !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
-static inline int __early_pfn_to_nid(unsigned long pfn)
+static inline int __early_pfn_to_nid(unsigned long pfn,
+					struct mminit_pfnnid_cache *state)
 {
 	return 0;
 }
@@ -1810,7 +1811,10 @@ static inline int __early_pfn_to_nid(unsigned long pfn)
 /* please see mm/page_alloc.c */
 extern int __meminit early_pfn_to_nid(unsigned long pfn);
 /* there is a per-arch backend function. */
-extern int __meminit __early_pfn_to_nid(unsigned long pfn);
+extern int __meminit __early_pfn_to_nid(unsigned long pfn,
+					struct mminit_pfnnid_cache *state);
+bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
+					struct mminit_pfnnid_cache *state);
 #endif
 
 extern void set_dma_reserve(unsigned long new_dma_reserve);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f279d9c158cd..4ac0037de2f1 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1217,10 +1217,24 @@ void sparse_init(void);
 #define sparse_index_init(_sec, _nid)  do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
+/*
+ * During memory init memblocks map pfns to nids. The search is expensive and
+ * this caches recent lookups. The implementation of __early_pfn_to_nid
+ * may treat start/end as pfns or sections.
+ */
+struct mminit_pfnnid_cache {
+	unsigned long last_start;
+	unsigned long last_end;
+	int last_nid;
+};
+
 #ifdef CONFIG_NODES_SPAN_OTHER_NODES
 bool early_pfn_in_nid(unsigned long pfn, int nid);
+bool meminit_pfn_in_nid(unsigned long pfn, int node,
+			struct mminit_pfnnid_cache *state);
 #else
-#define early_pfn_in_nid(pfn, nid)	(1)
+#define early_pfn_in_nid(pfn, nid)		(1)
+#define meminit_pfn_in_nid(pfn, nid, state)	(1)
 #endif
 
 #ifndef early_pfn_valid
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0a0e0f280d87..f556ed63b964 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4457,39 +4457,41 @@ int __meminit init_currently_empty_zone(struct zone *zone,
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+
 /*
  * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
  */
-int __meminit __early_pfn_to_nid(unsigned long pfn)
+int __meminit __early_pfn_to_nid(unsigned long pfn,
+					struct mminit_pfnnid_cache *state)
 {
 	unsigned long start_pfn, end_pfn;
 	int nid;
-	/*
-	 * NOTE: The following SMP-unsafe globals are only used early in boot
-	 * when the kernel is running single-threaded.
-	 */
-	static unsigned long __meminitdata last_start_pfn, last_end_pfn;
-	static int __meminitdata last_nid;
 
-	if (last_start_pfn <= pfn && pfn < last_end_pfn)
-		return last_nid;
+	if (state->last_start <= pfn && pfn < state->last_end)
+		return state->last_nid;
 
 	nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
 	if (nid != -1) {
-		last_start_pfn = start_pfn;
-		last_end_pfn = end_pfn;
-		last_nid = nid;
+		state->last_start = start_pfn;
+		state->last_end = end_pfn;
+		state->last_nid = nid;
 	}
 
 	return nid;
 }
 #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
 
+struct __meminitdata mminit_pfnnid_cache global_init_state;
+
+/* Only safe to use early in boot when initialisation is single-threaded */
 int __meminit early_pfn_to_nid(unsigned long pfn)
 {
 	int nid;
 
-	nid = __early_pfn_to_nid(pfn);
+	/* The system will behave unpredictably otherwise */
+	BUG_ON(system_state != SYSTEM_BOOTING);
+
+	nid = __early_pfn_to_nid(pfn, &global_init_state);
 	if (nid >= 0)
 		return nid;
 	/* just returns 0 */
@@ -4497,15 +4499,23 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
 }
 
 #ifdef CONFIG_NODES_SPAN_OTHER_NODES
-bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
+bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
+					struct mminit_pfnnid_cache *state)
 {
 	int nid;
 
-	nid = __early_pfn_to_nid(pfn);
+	nid = __early_pfn_to_nid(pfn, state);
 	if (nid >= 0 && nid != node)
 		return false;
 	return true;
 }
+
+/* Only safe to use early in boot when initialisation is single-threaded */
+bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
+{
+	return meminit_pfn_in_nid(pfn, node, &global_init_state);
+}
+
 #endif
 
 /**
-- 
2.1.2

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2015-04-13 10:17 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-13 10:16 [RFC PATCH 0/14] Parallel memory initialisation Mel Gorman
2015-04-13 10:16 ` [PATCH 01/14] memblock: Introduce a for_each_reserved_mem_region iterator Mel Gorman
2015-04-13 10:16 ` [PATCH 02/14] mm: meminit: Move page initialization into a separate function Mel Gorman
2015-04-13 10:16 ` [PATCH 03/14] mm: meminit: Only set page reserved in the memblock region Mel Gorman
2015-04-13 10:16 ` [PATCH 04/14] mm: page_alloc: Pass PFN to __free_pages_bootmem Mel Gorman
2015-04-13 10:16 ` Mel Gorman [this message]
2015-04-13 10:16 ` [PATCH 06/14] mm: meminit: Inline some helper functions Mel Gorman
2015-04-13 10:16 ` [PATCH 07/14] mm: meminit: Partially initialise memory if CONFIG_DEFERRED_MEM_INIT is set Mel Gorman
2015-04-13 10:17 ` [PATCH 08/14] mm: meminit: Initialise remaining memory in parallel with kswapd Mel Gorman
2015-04-13 10:17 ` [PATCH 09/14] mm: meminit: Minimise number of pfn->page lookups during initialisation Mel Gorman
2015-04-13 10:17 ` [PATCH 10/14] x86: mm: Enable deferred memory initialisation on x86-64 Mel Gorman
2015-04-13 18:21   ` Paul Bolle
2015-04-13 10:17 ` [PATCH 11/14] mm: meminit: Control parallel memory initialisation from command line and config Mel Gorman
2015-04-13 10:17 ` [PATCH 12/14] mm: meminit: Free pages in large chunks where possible Mel Gorman
2015-04-13 10:17 ` [PATCH 13/14] mm: meminit: Reduce number of times pageblocks are set during initialisation Mel Gorman
2015-04-13 10:17 ` [PATCH 14/14] mm: meminit: Remove mminit_verify_page_links Mel Gorman
2015-04-13 10:29 ` [RFC PATCH 0/14] Parallel memory initialisation Mel Gorman
2015-04-15 13:15 ` Waiman Long
2015-04-15 13:38   ` Mel Gorman
2015-04-15 14:50     ` Waiman Long
2015-04-15 15:44       ` Mel Gorman
2015-04-15 21:37         ` nzimmer
2015-04-16 18:20     ` Waiman Long
2015-04-15 14:27   ` Peter Zijlstra
2015-04-15 14:34     ` Mel Gorman
2015-04-15 14:48       ` Peter Zijlstra
2015-04-15 16:18         ` Waiman Long
2015-04-15 16:42           ` Norton, Scott J
2015-04-16  7:25 ` Andrew Morton
2015-04-16  8:46   ` Mel Gorman
2015-04-16 17:26     ` Andrew Morton
2015-04-16 17:37       ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1428920226-18147-6-git-send-email-mgorman@suse.de \
    --to=mgorman@suse.de \
    --cc=dave.hansen@intel.com \
    --cc=dbueso@suse.com \
    --cc=drahn@suse.com \
    --cc=holt@sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=nzimmer@sgi.com \
    --cc=scott.norton@hp.com \
    --cc=tom.vaden@hp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox