linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: David Rientjes <rientjes@google.com>
To: Shaohui Zheng <shaohui.zheng@intel.com>
Cc: Paul Mundt <lethal@linux-sh.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	haicheng.li@linux.intel.com, ak@linux.intel.com,
	shaohui.zheng@linux.intel.com, Yinghai Lu <yinghai@kernel.org>,
	Haicheng Li <haicheng.li@intel.com>
Subject: Re: [2/8,v3] NUMA Hotplug Emulator: infrastructure of NUMA hotplug emulation
Date: Sat, 20 Nov 2010 16:48:10 -0800 (PST)	[thread overview]
Message-ID: <alpine.DEB.2.00.1011201645230.10618@chino.kir.corp.google.com> (raw)
In-Reply-To: <20101119003225.GB3327@shaohui>

On Fri, 19 Nov 2010, Shaohui Zheng wrote:

> nr_node_ids is the possible node number. when we do regular memory online,
> it is oline to a possible node, and it is already counted in to nr_node_ids.
> 
> if you increment nr_node_ids dynamically when node online, it causes a lot of
> problems. Many data are initialized according to nr_node_ids. That is our
> experience when we debug the emulator.
> 

I think what we'll end up wanting to do is something like this, which adds 
a numa=possible=<N> parameter for x86; this will add an additional N 
possible nodes to node_possible_map that we can use to online later.  It 
also adds a new /sys/devices/system/memory/add_node file which takes a 
typical "size@start" value to hot-add an emulated node.  For example, 
using "mem=2G numa=possible=1" on the command line and doing 
echo 128M@0x80000000" > /sys/devices/system/memory/add_node would hot-add 
a node of 128M.

Comments?
---
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -33,6 +33,7 @@ s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
 int numa_off __initdata;
 static unsigned long __initdata nodemap_addr;
 static unsigned long __initdata nodemap_size;
+static unsigned long __initdata numa_possible_nodes;
 
 /*
  * Map cpu index to node index
@@ -611,7 +612,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
 
 #ifdef CONFIG_NUMA_EMU
 	if (cmdline && !numa_emulation(start_pfn, last_pfn, acpi, k8))
-		return;
+		goto out;
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
 #endif
@@ -619,14 +620,14 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
 #ifdef CONFIG_ACPI_NUMA
 	if (!numa_off && acpi && !acpi_scan_nodes(start_pfn << PAGE_SHIFT,
 						  last_pfn << PAGE_SHIFT))
-		return;
+		goto out;
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
 #endif
 
 #ifdef CONFIG_K8_NUMA
 	if (!numa_off && k8 && !k8_scan_nodes())
-		return;
+		goto out;
 	nodes_clear(node_possible_map);
 	nodes_clear(node_online_map);
 #endif
@@ -646,6 +647,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn,
 		numa_set_node(i, 0);
 	memblock_x86_register_active_regions(0, start_pfn, last_pfn);
 	setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT);
+out: __maybe_unused
+	for (i = 0; i < numa_possible_nodes; i++) {
+		int nid;
+
+		nid = first_unset_node(node_possible_map);
+		if (nid == MAX_NUMNODES)
+			break;
+		node_set(nid, node_possible_map);
+	}
 }
 
 unsigned long __init numa_free_all_bootmem(void)
@@ -675,6 +685,8 @@ static __init int numa_setup(char *opt)
 	if (!strncmp(opt, "noacpi", 6))
 		acpi_numa = -1;
 #endif
+	if (!strncmp(opt, "possible=", 9))
+		numa_possible_nodes = simple_strtoul(opt + 9, NULL, 0);
 	return 0;
 }
 early_param("numa", numa_setup);
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -353,10 +353,44 @@ memory_probe_store(struct class *class, struct class_attribute *attr,
 }
 static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
 
+static ssize_t
+memory_add_node_store(struct class *class, struct class_attribute *attr,
+		      const char *buf, size_t count)
+{
+	nodemask_t mask;
+	u64 start, size;
+	char *p;
+	int nid;
+	int ret;
+
+	size = memparse(buf, &p);
+	if (size < (PAGES_PER_SECTION << PAGE_SHIFT))
+		return -EINVAL;
+	if (*p != '@')
+		return -EINVAL;
+
+	start = simple_strtoull(p + 1, NULL, 0);
+
+	nodes_andnot(mask, node_possible_map, node_online_map);
+	nid = first_node(mask);
+	if (nid == MAX_NUMNODES)
+		return -EINVAL;
+
+	ret = add_memory(nid, start, size);
+	return ret ? ret : count;
+}
+static CLASS_ATTR(add_node, S_IWUSR, NULL, memory_add_node_store);
+
 static int memory_probe_init(void)
 {
-	return sysfs_create_file(&memory_sysdev_class.kset.kobj,
+	int err;
+
+	err = sysfs_create_file(&memory_sysdev_class.kset.kobj,
 				&class_attr_probe.attr);
+	if (err)
+		return err;
+	return sysfs_create_file(&memory_sysdev_class.kset.kobj,
+				&class_attr_add_node.attr);
 }
 #else
 static inline int memory_probe_init(void)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom policy in Canada: sign http://dissolvethecrtc.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2010-11-21  0:48 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-17  2:07 [0/8,v3] NUMA Hotplug Emulator - Introduction & Feedbacks shaohui.zheng
2010-11-17  2:08 ` [1/8,v3] NUMA Hotplug Emulator: add function to hide memory region via e820 table shaohui.zheng
2010-11-17  8:16   ` David Rientjes
2010-11-18  9:20     ` Shaohui Zheng
2010-11-18 21:16       ` David Rientjes
2010-11-19  0:12         ` Shaohui Zheng
2010-11-21  0:45           ` David Rientjes
2010-11-21 14:00             ` Américo Wang
2010-11-21 21:33               ` David Rientjes
2010-11-17  2:08 ` [2/8,v3] NUMA Hotplug Emulator: infrastructure of NUMA hotplug emulation shaohui.zheng
2010-11-17  8:16   ` David Rientjes
2010-11-17  7:51     ` Shaohui Zheng
2010-11-17 21:10       ` David Rientjes
2010-11-18  4:14         ` Shaohui Zheng
2010-11-18  6:27           ` Paul Mundt
2010-11-18  5:27             ` Shaohui Zheng
2010-11-18 21:24               ` David Rientjes
2010-11-19  0:32                 ` Shaohui Zheng
2010-11-21  0:48                   ` David Rientjes [this message]
2010-11-21  2:28                     ` [patch 1/2] x86: add numa=possible command line option David Rientjes
2010-11-21  2:28                       ` [patch 2/2] mm: add node hotplug emulation David Rientjes
2010-11-21 17:34                         ` Greg KH
2010-11-21 21:48                           ` David Rientjes
2010-11-21 23:08                             ` [patch 2/2 v2] " David Rientjes
2010-11-22  0:56                               ` Greg KH
2010-11-28  1:52                                 ` David Rientjes
2010-11-28  5:17                                   ` Greg KH
2010-11-30  0:04                                     ` David Rientjes
2010-11-21 14:26                       ` [patch 1/2] x86: add numa=possible command line option Américo Wang
2010-11-21 21:46                         ` David Rientjes
2010-11-22 15:43                           ` Américo Wang
2010-11-21 15:14                     ` [2/8,v3] NUMA Hotplug Emulator: infrastructure of NUMA hotplug emulation Li, Haicheng
2010-11-21 21:42                       ` David Rientjes
2010-11-18 21:19           ` David Rientjes
2010-11-17  2:08 ` [3/8,v3] NUMA Hotplug Emulator: Userland interface to hotplug-add fake offlined nodes shaohui.zheng
2010-11-17  8:16   ` David Rientjes
2010-11-17  2:08 ` [4/8,v3] NUMA Hotplug Emulator: Abstract cpu register functions shaohui.zheng
2010-11-17  2:08 ` [5/8,v3] NUMA Hotplug Emulator: support cpu probe/release in x86 shaohui.zheng
2010-11-21 14:45   ` Américo Wang
2010-11-22  0:01     ` Shaohui Zheng
2010-11-22 15:51       ` Américo Wang
2010-11-22 23:29         ` Shaohui Zheng
2010-11-17  2:08 ` [6/8,v3] NUMA Hotplug Emulator: Fake CPU socket with logical CPU on x86 shaohui.zheng
2010-11-17  2:08 ` [7/8,v3] NUMA Hotplug Emulator: extend memory probe interface to support NUMA shaohui.zheng
2010-11-17 18:50   ` Dave Hansen
2010-11-17 21:18     ` David Rientjes
2010-11-17 21:55       ` Dave Hansen
2010-11-17 22:44         ` David Rientjes
2010-11-17 23:00           ` Dave Hansen
2010-11-17 23:17             ` David Rientjes
2010-11-18 16:59           ` Aaron Durbin
2010-11-18  4:48       ` Shaohui Zheng
2010-11-18  6:24         ` Paul Mundt
2010-11-18 21:28           ` David Rientjes
2010-11-18 21:31         ` David Rientjes
2010-11-18  4:36     ` Shaohui Zheng
2010-11-19  7:51     ` Shaohui Zheng
2010-11-19 16:36       ` Dave Hansen
2010-11-17  2:08 ` [8/8,v3] NUMA Hotplug Emulator: documentation shaohui.zheng
2010-11-17 23:06   ` Randy Dunlap
2010-11-18  2:31     ` Shaohui Zheng
2010-11-21 15:03   ` Américo Wang
2010-11-21 15:16     ` Li, Haicheng
2010-11-21 23:33     ` Shaohui Zheng
2010-11-22 16:04       ` Américo Wang
2010-11-22 23:23         ` Shaohui Zheng
2010-11-17  5:22 ` [0/8,v3] NUMA Hotplug Emulator - Introduction & Feedbacks Paul Mundt
2010-11-19  5:54   ` Shaohui Zheng
2010-11-17  9:26 ` Yinghai Lu
2010-11-18  2:03   ` Shaohui Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.DEB.2.00.1011201645230.10618@chino.kir.corp.google.com \
    --to=rientjes@google.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=haicheng.li@intel.com \
    --cc=haicheng.li@linux.intel.com \
    --cc=lethal@linux-sh.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=shaohui.zheng@intel.com \
    --cc=shaohui.zheng@linux.intel.com \
    --cc=yinghai@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox