From: Tang Chen <tangchen@cn.fujitsu.com>
To: robert.moore@intel.com, lv.zheng@intel.com, rjw@sisk.pl,
lenb@kernel.org, tglx@linutronix.de, mingo@elte.hu,
hpa@zytor.com, akpm@linux-foundation.org, tj@kernel.org,
trenn@suse.de, yinghai@kernel.org, jiang.liu@huawei.com,
wency@cn.fujitsu.com, laijs@cn.fujitsu.com,
isimatu.yasuaki@jp.fujitsu.com, izumi.taku@jp.fujitsu.com,
mgorman@suse.de, minchan@kernel.org, mina86@mina86.com,
gong.chen@linux.intel.com, vasilis.liaskovitis@profitbricks.com,
lwoodman@redhat.com, riel@redhat.com, jweiner@redhat.com,
prarit@redhat.com, zhangyanfei@cn.fujitsu.com,
yanghy@cn.fujitsu.com
Cc: x86@kernel.org, linux-doc@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
linux-acpi@vger.kernel.org
Subject: [PATCH v2 RESEND 18/18] x86, numa, acpi, memory-hotplug: Make movablenode have higher priority.
Date: Fri, 2 Aug 2013 17:14:37 +0800 [thread overview]
Message-ID: <1375434877-20704-19-git-send-email-tangchen@cn.fujitsu.com> (raw)
In-Reply-To: <1375434877-20704-1-git-send-email-tangchen@cn.fujitsu.com>
Arrange hotpluggable memory as ZONE_MOVABLE will cause NUMA performance down
because the kernel cannot use movable memory. For users who don't use memory
hotplug and who don't want to lose their NUMA performance, they need a way to
disable this functionality. So we improved movablecore boot option.
If users specify the original movablecore=nn@ss boot option, the kernel will
arrange [ss, ss+nn) as ZONE_MOVABLE. The kernelcore=nn@ss boot option is similar
except it specifies ZONE_NORMAL ranges.
Now, if users specify "movablenode" in kernel commandline, the kernel will
arrange hotpluggable memory in SRAT as ZONE_MOVABLE. And if users do this, all
the other movablecore=nn@ss and kernelcore=nn@ss options should be ignored.
For those who don't want this, just specify nothing. The kernel will act as
before.
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
---
include/linux/memblock.h | 1 +
mm/memblock.c | 5 +++++
mm/page_alloc.c | 31 ++++++++++++++++++++++++++++---
3 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index c0bd31c..e78e32f 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -64,6 +64,7 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
void memblock_trim_memory(phys_addr_t align);
int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size);
+bool memblock_is_hotpluggable(struct memblock_region *region);
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
diff --git a/mm/memblock.c b/mm/memblock.c
index 3ea4301..c8eb5d2 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -610,6 +610,11 @@ int __init_memblock memblock_mark_hotplug(phys_addr_t base, phys_addr_t size)
return 0;
}
+bool __init_memblock memblock_is_hotpluggable(struct memblock_region *region)
+{
+ return region->flags & MEMBLOCK_HOTPLUG;
+}
+
/**
* __next_free_mem_range - next function for for_each_free_mem_range()
* @idx: pointer to u64 loop variable
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b100255..86d4381 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4948,9 +4948,35 @@ static void __init find_zone_movable_pfns_for_nodes(void)
nodemask_t saved_node_state = node_states[N_MEMORY];
unsigned long totalpages = early_calculate_totalpages();
int usable_nodes = nodes_weight(node_states[N_MEMORY]);
+ struct memblock_type *type = &memblock.memory;
+ /* Need to find movable_zone earlier when movablenode is specified. */
+ find_usable_zone_for_movable();
+
+#ifdef CONFIG_MOVABLE_NODE
/*
- * If movablecore was specified, calculate what size of
+ * If movablenode is specified, ignore kernelcore and movablecore
+ * options.
+ */
+ if (movablenode_enable_srat) {
+ for (i = 0; i < type->cnt; i++) {
+ if (!memblock_is_hotpluggable(&type->regions[i]))
+ continue;
+
+ nid = type->regions[i].nid;
+
+ usable_startpfn = PFN_DOWN(type->regions[i].base);
+ zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
+ min(usable_startpfn, zone_movable_pfn[nid]) :
+ usable_startpfn;
+ }
+
+ goto out;
+ }
+#endif
+
+ /*
+ * If movablecore=nn[KMG] was specified, calculate what size of
* kernelcore that corresponds so that memory usable for
* any allocation type is evenly spread. If both kernelcore
* and movablecore are specified, then the value of kernelcore
@@ -4976,7 +5002,6 @@ static void __init find_zone_movable_pfns_for_nodes(void)
goto out;
/* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */
- find_usable_zone_for_movable();
usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
restart:
@@ -5067,12 +5092,12 @@ restart:
if (usable_nodes && required_kernelcore > usable_nodes)
goto restart;
+out:
/* Align start of ZONE_MOVABLE on all nids to MAX_ORDER_NR_PAGES */
for (nid = 0; nid < MAX_NUMNODES; nid++)
zone_movable_pfn[nid] =
roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES);
-out:
/* restore the node_state */
node_states[N_MEMORY] = saved_node_state;
}
--
1.7.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
prev parent reply other threads:[~2013-08-02 9:16 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-08-02 9:14 [PATCH v2 RESEND 00/18] Arrange hotpluggable memory as ZONE_MOVABLE Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 01/18] acpi: Print Hot-Pluggable Field in SRAT Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 02/18] earlycpio.c: Fix the confusing comment of find_cpio_data() Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 03/18] acpi: Remove "continue" in macro INVALID_TABLE() Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 04/18] acpi: Introduce acpi_verify_initrd() to check if a table is invalid Tang Chen
2013-08-06 23:02 ` Toshi Kani
2013-08-02 9:14 ` [PATCH v2 RESEND 05/18] x86, ACPICA: Split acpi_boot_table_init() into two parts Tang Chen
2013-08-02 13:00 ` Rafael J. Wysocki
2013-08-05 3:21 ` Tang Chen
2013-08-05 13:26 ` Rafael J. Wysocki
2013-08-05 13:23 ` Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 06/18] x86, acpi, ACPICA: Initialize ACPI root table list earlier Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 07/18] x86, ACPI: Also initialize signature and length when parsing root table Tang Chen
2013-08-02 13:03 ` Rafael J. Wysocki
2013-08-05 1:33 ` Tang Chen
2013-08-05 13:28 ` Rafael J. Wysocki
2013-08-02 9:14 ` [PATCH v2 RESEND 08/18] x86: get pg_data_t's memory from other node Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 09/18] x86: Make get_ramdisk_{image|size}() global Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 10/18] x86, acpi: Try to find if SRAT is overrided earlier Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 11/18] x86, acpi: Try to find SRAT in firmware earlier Tang Chen
2013-08-06 23:33 ` Toshi Kani
2013-08-07 1:37 ` Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 12/18] x86, acpi, numa, mem_hotplug: Find hotpluggable memory in SRAT memory affinities Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 13/18] x86, numa, mem_hotplug: Skip all the regions the kernel resides in Tang Chen
2013-08-05 6:22 ` Tang Chen
2013-08-05 14:52 ` Tejun Heo
2013-08-05 15:12 ` Zhang Yanfei
2013-08-06 2:29 ` Tang Chen
2013-08-06 15:10 ` Tejun Heo
2013-08-06 2:50 ` Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 14/18] memblock, numa: Introduce flag into memblock Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 15/18] memblock, mem_hotplug: Introduce MEMBLOCK_HOTPLUG flag to mark hotpluggable regions Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 16/18] memblock, mem_hotplug: Make memblock skip hotpluggable regions by default Tang Chen
2013-08-02 9:14 ` [PATCH v2 RESEND 17/18] mem-hotplug: Introduce movablenode boot option to {en|dis}able using SRAT Tang Chen
2013-08-02 9:14 ` Tang Chen [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1375434877-20704-19-git-send-email-tangchen@cn.fujitsu.com \
--to=tangchen@cn.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=gong.chen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=isimatu.yasuaki@jp.fujitsu.com \
--cc=izumi.taku@jp.fujitsu.com \
--cc=jiang.liu@huawei.com \
--cc=jweiner@redhat.com \
--cc=laijs@cn.fujitsu.com \
--cc=lenb@kernel.org \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=lv.zheng@intel.com \
--cc=lwoodman@redhat.com \
--cc=mgorman@suse.de \
--cc=mina86@mina86.com \
--cc=minchan@kernel.org \
--cc=mingo@elte.hu \
--cc=prarit@redhat.com \
--cc=riel@redhat.com \
--cc=rjw@sisk.pl \
--cc=robert.moore@intel.com \
--cc=tglx@linutronix.de \
--cc=tj@kernel.org \
--cc=trenn@suse.de \
--cc=vasilis.liaskovitis@profitbricks.com \
--cc=wency@cn.fujitsu.com \
--cc=x86@kernel.org \
--cc=yanghy@cn.fujitsu.com \
--cc=yinghai@kernel.org \
--cc=zhangyanfei@cn.fujitsu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox