From: Vladimir Davydov <vdavydov@parallels.com>
To: linux-kernel@vger.kernel.org
Cc: linux-mm@kvack.org, criu@openvz.org, devel@openvz.org,
xemul@parallels.com, khorenko@parallels.com
Subject: [PATCH RFC 07/13] mm: PRAM: preserve persistent memory at boot
Date: Mon, 1 Jul 2013 15:57:42 +0400 [thread overview]
Message-ID: <c001a99771c7606ce9002e92d3fe7db8a80fa620.1372582756.git.vdavydov@parallels.com> (raw)
In-Reply-To: <cover.1372582754.git.vdavydov@parallels.com>
Persistent memory preservation is done by reserving memory pages
belonging to PRAM at early boot so that they will not be recycled. If
memory reservation fails for some reason (e.g. memory region is busy),
persistent memory will be lost.
Currently, PRAM preservation is only implemented for x86.
---
arch/x86/kernel/setup.c | 2 +
arch/x86/mm/init_32.c | 4 +
arch/x86/mm/init_64.c | 4 +
include/linux/pram.h | 8 ++
mm/Kconfig | 1 +
mm/pram.c | 203 +++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 222 insertions(+)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fae9134..caf1b29 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -69,6 +69,7 @@
#include <linux/crash_dump.h>
#include <linux/tboot.h>
#include <linux/jiffies.h>
+#include <linux/pram.h>
#include <video/edid.h>
@@ -1127,6 +1128,7 @@ void __init setup_arch(char **cmdline_p)
acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start);
#endif
+ pram_reserve();
reserve_crashkernel();
vsmp_init();
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2d19001..da38426 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -31,6 +31,7 @@
#include <linux/initrd.h>
#include <linux/cpumask.h>
#include <linux/gfp.h>
+#include <linux/pram.h>
#include <asm/asm.h>
#include <asm/bios_ebda.h>
@@ -779,6 +780,9 @@ void __init mem_init(void)
after_bootmem = 1;
+ totalram_pages += pram_reserved_pages;
+ reservedpages -= pram_reserved_pages;
+
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 474e28f..8aa4bc4 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -32,6 +32,7 @@
#include <linux/memory_hotplug.h>
#include <linux/nmi.h>
#include <linux/gfp.h>
+#include <linux/pram.h>
#include <asm/processor.h>
#include <asm/bios_ebda.h>
@@ -1077,6 +1078,9 @@ void __init mem_init(void)
reservedpages = max_pfn - totalram_pages - absent_pages;
after_bootmem = 1;
+ totalram_pages += pram_reserved_pages;
+ reservedpages -= pram_reserved_pages;
+
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
diff --git a/include/linux/pram.h b/include/linux/pram.h
index 61c536c..b7f2799 100644
--- a/include/linux/pram.h
+++ b/include/linux/pram.h
@@ -47,4 +47,12 @@ extern ssize_t pram_write(struct pram_stream *ps,
const void *buf, size_t count);
extern size_t pram_read(struct pram_stream *ps, void *buf, size_t count);
+#ifdef CONFIG_PRAM
+extern unsigned long pram_reserved_pages;
+extern void pram_reserve(void);
+#else
+#define pram_reserved_pages 0UL
+static inline void pram_reserve(void) { }
+#endif
+
#endif /* _LINUX_PRAM_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 46337e8..f1e11a0 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -474,6 +474,7 @@ config FRONTSWAP
config PRAM
bool "Persistent over-kexec memory storage"
+ depends on X86
default n
help
This option adds the kernel API that enables saving memory pages of
diff --git a/mm/pram.c b/mm/pram.c
index 58ae9ed..380735f 100644
--- a/mm/pram.c
+++ b/mm/pram.c
@@ -1,3 +1,4 @@
+#include <linux/bootmem.h>
#include <linux/err.h>
#include <linux/gfp.h>
#include <linux/highmem.h>
@@ -5,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/kobject.h>
#include <linux/list.h>
+#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -93,6 +95,8 @@ static struct pram_super_block *pram_sb;
static LIST_HEAD(pram_nodes); /* linked through page::lru */
static DEFINE_MUTEX(pram_mutex); /* serializes open/close */
+unsigned long __initdata pram_reserved_pages;
+
/*
* The PRAM super block pfn, see above.
*/
@@ -102,6 +106,196 @@ static int __init parse_pram_sb_pfn(char *arg)
}
early_param("pram", parse_pram_sb_pfn);
+static void * __init pram_map_meta(unsigned long pfn)
+{
+ if (pfn >= max_low_pfn)
+ return ERR_PTR(-EINVAL);
+ return pfn_to_kaddr(pfn);
+}
+
+static int __init pram_reserve_page(unsigned long pfn)
+{
+ int err = 0;
+ phys_addr_t base, size;
+
+ if (pfn >= max_pfn)
+ return -EINVAL;
+
+ base = PFN_PHYS(pfn);
+ size = PAGE_SIZE;
+
+#ifdef CONFIG_NO_BOOTMEM
+ if (memblock_is_region_reserved(base, size) ||
+ memblock_reserve(base, size) < 0)
+ err = -EBUSY;
+#else
+ err = reserve_bootmem(base, size, BOOTMEM_EXCLUSIVE);
+#endif
+ if (!err)
+ pram_reserved_pages++;
+ return err;
+}
+
+static void __init pram_unreserve_page(unsigned long pfn)
+{
+ free_bootmem(PFN_PHYS(pfn), PAGE_SIZE);
+ pram_reserved_pages--;
+}
+
+static int __init pram_reserve_link(struct pram_link *link)
+{
+ int i;
+ int err = 0;
+
+ for (i = 0; i < PRAM_LINK_ENTRIES_MAX; i++) {
+ struct pram_entry *p = &link->entry[i];
+ if (!p->pfn)
+ break;
+ err = pram_reserve_page(p->pfn);
+ if (err)
+ break;
+ p->flags &= ~PRAM_PAGE_LRU;
+ }
+ if (err) {
+ /* undo */
+ while (--i >= 0)
+ pram_unreserve_page(link->entry[i].pfn);
+ }
+ return err;
+}
+
+static void __init pram_unreserve_link(struct pram_link *link)
+{
+ int i;
+
+ for (i = 0; i < PRAM_LINK_ENTRIES_MAX; i++) {
+ unsigned long pfn = link->entry[i].pfn;
+ if (!pfn)
+ break;
+ pram_unreserve_page(pfn);
+ }
+}
+
+static int __init pram_reserve_node(struct pram_node *node)
+{
+ unsigned long link_pfn;
+ struct pram_link *link;
+ int err = 0;
+
+ link_pfn = node->link_pfn;
+ while (link_pfn) {
+ err = pram_reserve_page(link_pfn);
+ if (err)
+ break;
+ link = pram_map_meta(link_pfn);
+ if (IS_ERR(link)) {
+ pram_unreserve_page(link_pfn);
+ err = PTR_ERR(link);
+ break;
+ }
+ err = pram_reserve_link(link);
+ if (err) {
+ pram_unreserve_page(link_pfn);
+ break;
+ }
+ link_pfn = link->link_pfn;
+ }
+ if (err) {
+ /* undo */
+ unsigned long bad_pfn = link_pfn;
+ link_pfn = node->link_pfn;
+ while (link_pfn != bad_pfn) {
+ link = pfn_to_kaddr(link_pfn);
+ pram_unreserve_link(link);
+ link_pfn = link->link_pfn;
+ pram_unreserve_page(link_pfn);
+ }
+ }
+ return err;
+}
+
+static void __init pram_unreserve_node(struct pram_node *node)
+{
+ unsigned long link_pfn;
+ struct pram_link *link;
+
+ link_pfn = node->link_pfn;
+ while (link_pfn) {
+ link = pfn_to_kaddr(link_pfn);
+ pram_unreserve_link(link);
+ link_pfn = link->link_pfn;
+ pram_unreserve_page(link_pfn);
+ }
+}
+
+/*
+ * Mark pages that belong to persistent memory reserved.
+ *
+ * This function should be called at boot time as early as possible to prevent
+ * persistent memory from being recycled.
+ */
+void __init pram_reserve(void)
+{
+ unsigned long node_pfn;
+ struct pram_node *node;
+ int err = 0;
+
+ if (!pram_sb_pfn)
+ return;
+
+ pr_info("PRAM: Examining persistent memory...\n");
+
+ err = pram_reserve_page(pram_sb_pfn);
+ if (err)
+ goto out;
+ pram_sb = pram_map_meta(pram_sb_pfn);
+ if (IS_ERR(pram_sb)) {
+ pram_unreserve_page(pram_sb_pfn);
+ err = PTR_ERR(pram_sb);
+ goto out;
+ }
+
+ node_pfn = pram_sb->node_pfn;
+ while (node_pfn) {
+ err = pram_reserve_page(node_pfn);
+ if (err)
+ break;
+ node = pram_map_meta(node_pfn);
+ if (IS_ERR(node)) {
+ pram_unreserve_page(node_pfn);
+ err = PTR_ERR(node);
+ break;
+ }
+ err = pram_reserve_node(node);
+ if (err) {
+ pram_unreserve_page(node_pfn);
+ break;
+ }
+ node_pfn = node->node_pfn;
+ }
+
+ if (err) {
+ /* undo */
+ unsigned long bad_pfn = node_pfn;
+ node_pfn = pram_sb->node_pfn;
+ while (node_pfn != bad_pfn) {
+ node = pfn_to_kaddr(node_pfn);
+ pram_unreserve_node(node);
+ node_pfn = node->node_pfn;
+ pram_unreserve_page(node_pfn);
+ }
+ pram_unreserve_page(pram_sb_pfn);
+ }
+
+out:
+ if (err) {
+ BUG_ON(pram_reserved_pages > 0);
+ pr_err("PRAM: Reservation failed: %d\n", err);
+ pram_sb = NULL;
+ } else
+ pr_info("PRAM: %lu pages reserved\n", pram_reserved_pages);
+}
+
static inline struct page *pram_alloc_page(gfp_t gfp_mask)
{
return alloc_page(gfp_mask);
@@ -109,6 +303,9 @@ static inline struct page *pram_alloc_page(gfp_t gfp_mask)
static inline void pram_free_page(void *addr)
{
+ /* since early reservations are used for preserving persistent
+ * memory, the page may have the reserved bit set */
+ ClearPageReserved(virt_to_page(addr));
free_page((unsigned long)addr);
}
@@ -146,6 +343,9 @@ static void pram_truncate_link(struct pram_link *link)
if (!pfn)
continue;
page = pfn_to_page(pfn);
+ /* since early reservations are used for preserving persistent
+ * memory, the page may have the reserved bit set */
+ ClearPageReserved(page);
put_page(page);
}
}
@@ -426,6 +626,9 @@ static struct page *__pram_load_page(struct pram_stream *ps, int *flags)
entry = &link->entry[ps->page_index];
if (entry->pfn) {
page = pfn_to_page(entry->pfn);
+ /* since early reservations are used for preserving persistent
+ * memory, the page may have the reserved bit set */
+ ClearPageReserved(page);
if (flags)
*flags = entry->flags;
} else
--
1.7.10.4
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2013-07-01 11:58 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-07-01 11:57 [PATCH RFC 00/13] PRAM: Persistent over-kexec memory storage Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 01/13] mm: add PRAM API stubs and Kconfig Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 02/13] mm: PRAM: implement node load and save functions Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 03/13] mm: PRAM: implement page stream operations Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 04/13] mm: PRAM: implement byte " Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 05/13] mm: PRAM: link nodes by pfn before reboot Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 06/13] mm: PRAM: introduce super block Vladimir Davydov
2013-07-01 11:57 ` Vladimir Davydov [this message]
2013-07-01 11:57 ` [PATCH RFC 08/13] mm: PRAM: checksum saved data Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 09/13] mm: PRAM: ban pages that have been reserved at boot time Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 10/13] mm: PRAM: allow to ban arbitrary memory ranges Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 11/13] mm: PRAM: allow to free persistent memory from userspace Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 12/13] mm: shmem: introduce shmem_insert_page Vladimir Davydov
2013-07-01 11:57 ` [PATCH RFC 13/13] mm: shmem: enable saving to PRAM Vladimir Davydov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c001a99771c7606ce9002e92d3fe7db8a80fa620.1372582756.git.vdavydov@parallels.com \
--to=vdavydov@parallels.com \
--cc=criu@openvz.org \
--cc=devel@openvz.org \
--cc=khorenko@parallels.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=xemul@parallels.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox