From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
To: LKML <linux-kernel@vger.kernel.org>,
linux-mm <linux-mm@kvack.org>,
linuxppc-dev@lists.ozlabs.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
isimatu.yasuaki@jp.fujitsu.com
Subject: [PATCH 4/8] Create a sysfs release file for hot removing memory
Date: Wed, 24 Jul 2013 13:39:29 -0500 [thread overview]
Message-ID: <51F01F61.6010609@linux.vnet.ibm.com> (raw)
In-Reply-To: <51F01E06.6090800@linux.vnet.ibm.com>
Provide a sysfs interface to hot remove memory.
This patch updates the sysfs interface for hot add of memory to also
provide a sysfs interface to hot remove memory. The use of this interface
is controlled with the ARCH_MEMORY_PROBE config option, currently used
by x86 and powerpc. This patch also updates the name of this option to
CONFIG_ARCH_MEMORY_PROBE_RELEASE to indicate that it controls the probe
and release sysfs interfaces.
Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
---
Documentation/memory-hotplug.txt | 34 ++++++++++++----
arch/powerpc/Kconfig | 2
arch/x86/Kconfig | 2
drivers/base/memory.c | 81 ++++++++++++++++++++++++++++++++++-----
4 files changed, 100 insertions(+), 19 deletions(-)
Index: linux/drivers/base/memory.c
===================================================================
--- linux.orig/drivers/base/memory.c
+++ linux/drivers/base/memory.c
@@ -129,22 +129,30 @@ static ssize_t show_mem_end_phys_index(s
return sprintf(buf, "%08lx\n", phys_index);
}
+static int is_memblock_removable(unsigned long start_section_nr)
+{
+ unsigned long pfn;
+ int i, ret = 1;
+
+ for (i = 0; i < sections_per_block; i++) {
+ pfn = section_nr_to_pfn(start_section_nr + i);
+ ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+ }
+
+ return ret;
+}
+
/*
* Show whether the section of memory is likely to be hot-removable
*/
static ssize_t show_mem_removable(struct device *dev,
struct device_attribute *attr, char *buf)
{
- unsigned long i, pfn;
- int ret = 1;
+ int ret;
struct memory_block *mem =
container_of(dev, struct memory_block, dev);
- for (i = 0; i < sections_per_block; i++) {
- pfn = section_nr_to_pfn(mem->start_section_nr + i);
- ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
- }
-
+ ret = is_memblock_removable(mem->start_section_nr);
return sprintf(buf, "%d\n", ret);
}
@@ -421,7 +429,7 @@ static DEVICE_ATTR(block_size_bytes, 044
* as well as ppc64 will do all of their discovery in userspace
* and will require this interface.
*/
-#ifdef CONFIG_ARCH_MEMORY_PROBE
+#ifdef CONFIG_ARCH_MEMORY_PROBE_RELEASE
static ssize_t
memory_probe_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
@@ -444,6 +452,60 @@ memory_probe_store(struct device *dev, s
}
static DEVICE_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
+
+static int is_memblock_offline(struct memory_block *mem, void *arg)
+{
+ if (mem->state == MEM_ONLINE)
+ return 1;
+
+ return 0;
+}
+
+static ssize_t
+memory_release_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u64 phys_addr;
+ int nid, ret = 0;
+ unsigned long block_size, pfn;
+ unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
+
+ lock_device_hotplug();
+
+ ret = kstrtoull(buf, 0, &phys_addr);
+ if (ret)
+ goto out;
+
+ if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ block_size = get_memory_block_size();
+ nid = memory_add_physaddr_to_nid(phys_addr);
+
+ /* Ensure memory is offline and removable before removing it. */
+ ret = walk_memory_range(PFN_DOWN(phys_addr),
+ PFN_UP(phys_addr + block_size - 1), NULL,
+ is_memblock_offline);
+ if (!ret) {
+ pfn = phys_addr >> PAGE_SHIFT;
+ ret = !is_memblock_removable(pfn_to_section_nr(pfn));
+ }
+
+ if (ret) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ remove_memory(nid, phys_addr, block_size);
+
+out:
+ unlock_device_hotplug();
+ return ret ? ret : count;
+}
+
+static DEVICE_ATTR(release, S_IWUSR, NULL, memory_release_store);
#endif
#ifdef CONFIG_MEMORY_FAILURE
@@ -694,8 +756,9 @@ bool is_memblock_offlined(struct memory_
}
static struct attribute *memory_root_attrs[] = {
-#ifdef CONFIG_ARCH_MEMORY_PROBE
+#ifdef CONFIG_ARCH_MEMORY_PROBE_RELEASE
&dev_attr_probe.attr,
+ &dev_attr_release.attr,
#endif
#ifdef CONFIG_MEMORY_FAILURE
Index: linux/arch/powerpc/Kconfig
===================================================================
--- linux.orig/arch/powerpc/Kconfig
+++ linux/arch/powerpc/Kconfig
@@ -438,7 +438,7 @@ config SYS_SUPPORTS_HUGETLBFS
source "mm/Kconfig"
-config ARCH_MEMORY_PROBE
+config ARCH_MEMORY_PROBE_RELEASE
def_bool y
depends on MEMORY_HOTPLUG
Index: linux/arch/x86/Kconfig
===================================================================
--- linux.orig/arch/x86/Kconfig
+++ linux/arch/x86/Kconfig
@@ -1343,7 +1343,7 @@ config ARCH_SELECT_MEMORY_MODEL
def_bool y
depends on ARCH_SPARSEMEM_ENABLE
-config ARCH_MEMORY_PROBE
+config ARCH_MEMORY_PROBE_RELEASE
def_bool y
depends on X86_64 && MEMORY_HOTPLUG
Index: linux/Documentation/memory-hotplug.txt
===================================================================
--- linux.orig/Documentation/memory-hotplug.txt
+++ linux/Documentation/memory-hotplug.txt
@@ -17,7 +17,9 @@ be changed often.
3. sysfs files for memory hotplug
4. Physical memory hot-add phase
4.1 Hardware(Firmware) Support
- 4.2 Notify memory hot-add event by hand
+ 4.2 Notify memory hot-addand hot-remove event by hand
+ 4.2.1 Probe interface
+ 4.2.2 Release interface
5. Logical Memory hot-add phase
5.1. State of memory
5.2. How to online memory
@@ -69,7 +71,7 @@ management tables, and makes sysfs files
If firmware supports notification of connection of new memory to OS,
this phase is triggered automatically. ACPI can notify this event. If not,
-"probe" operation by system administration is used instead.
+"probe" and "release" operations by system administration is used instead.
(see Section 4.).
Logical Memory Hotplug phase is to change memory state into
@@ -208,20 +210,23 @@ calls hotplug code for all of objects wh
If memory device is found, memory hotplug code will be called.
-4.2 Notify memory hot-add event by hand
+4.2 Notify memory hot-add and hot-remove event by hand
------------
In some environments, especially virtualized environment, firmware will not
notify memory hotplug event to the kernel. For such environment, "probe"
-interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE.
+and "release" interfaces are supported. This interface depends on
+CONFIG_ARCH_MEMORY_PROBE_RELEASE.
-Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not
-contain highly architecture codes. Please add config if you need "probe"
-interface.
+Now, CONFIG_ARCH_MEMORY_PROBE_RELEASE is supported only by powerpc but it does
+not contain highly architecture codes. Please add config if you need "probe"
+and "release" interfaces.
+4.2.1 "probe" interface
+------------
Probe interface is located at
/sys/devices/system/memory/probe
-You can tell the physical address of new memory to the kernel by
+You can tell the physical address of new memory to hot-add to the kernel by
% echo start_address_of_new_memory > /sys/devices/system/memory/probe
@@ -230,6 +235,19 @@ memory range is hot-added. In this case,
current implementation). You'll have to online memory by yourself.
Please see "How to online memory" in this text.
+4.2.2 "release" interface
+------------
+Release interface is located at
+/sys/devices/system/memory/release
+
+You can tell the physical address of memory to hot-remove from the kernel by
+
+% echo start_address_of_memory > /sys/devices/system/memory/release
+
+Then, [start_address_of_memory, start_address_of_memory + section_size)
+memory range is hot-removed. You will need to ensure all of the memory in
+this range has been offlined prior to using this interface, please see
+"How to offline memory" in this text.
------------------------------
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2013-07-24 18:39 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-07-24 18:33 [PATCH 0/8] Correct memory hot add/remove for powerpc Nathan Fontenot
2013-07-24 18:35 ` [PATCH 1/8] register bootmem pages for powerpc when sparse vmemmap is not defined Nathan Fontenot
2013-08-02 2:27 ` Michael Ellerman
2013-08-02 19:04 ` Nathan Fontenot
2013-07-24 18:36 ` [PATCH 2/8] Mark powerpc memory resources as busy Nathan Fontenot
2013-08-02 2:28 ` Michael Ellerman
2013-08-02 19:05 ` Nathan Fontenot
2013-08-05 3:11 ` Michael Ellerman
2013-07-24 18:37 ` [PATCH 3/8] Add all memory via sysfs probe interface at once Nathan Fontenot
2013-08-02 2:32 ` Michael Ellerman
2013-08-02 19:13 ` Nathan Fontenot
2013-08-05 3:13 ` Michael Ellerman
2013-08-06 20:44 ` Nathan Fontenot
2013-08-09 7:16 ` Benjamin Herrenschmidt
2013-07-24 18:39 ` Nathan Fontenot [this message]
2013-07-24 18:41 ` [PATCH 5/8] Add notifiers for memory hot add/remove Nathan Fontenot
2013-07-24 18:44 ` [PATCH 6/8] Update the powerpc arch specific memory add/remove handlers Nathan Fontenot
2013-07-24 18:45 ` [PATCH 7/8] Add memory hot add/remove notifier handlers for pwoerpc Nathan Fontenot
2013-07-24 18:47 ` [PATCH 8/8] Remove no longer needed powerpc memory node update handler Nathan Fontenot
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=51F01F61.6010609@linux.vnet.ibm.com \
--to=nfont@linux.vnet.ibm.com \
--cc=gregkh@linuxfoundation.org \
--cc=isimatu.yasuaki@jp.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linuxppc-dev@lists.ozlabs.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox