linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Changyuan Lyu <changyuanl@google.com>
To: linux-kernel@vger.kernel.org
Cc: akpm@linux-foundation.org, anthony.yznaga@oracle.com,
	arnd@arndb.de,  ashish.kalra@amd.com, benh@kernel.crashing.org,
	bp@alien8.de,  catalin.marinas@arm.com, corbet@lwn.net,
	dave.hansen@linux.intel.com,  devicetree@vger.kernel.org,
	dwmw2@infradead.org, ebiederm@xmission.com,  graf@amazon.com,
	hpa@zytor.com, jgowans@amazon.com, kexec@lists.infradead.org,
	 krzk@kernel.org, linux-arm-kernel@lists.infradead.org,
	 linux-doc@vger.kernel.org, linux-mm@kvack.org, luto@kernel.org,
	 mark.rutland@arm.com, mingo@redhat.com,
	pasha.tatashin@soleen.com,  pbonzini@redhat.com,
	peterz@infradead.org, ptyadav@amazon.de, robh@kernel.org,
	 rostedt@goodmis.org, rppt@kernel.org, saravanak@google.com,
	 skinsburskii@linux.microsoft.com, tglx@linutronix.de,
	thomas.lendacky@amd.com,  will@kernel.org, x86@kernel.org,
	Changyuan Lyu <changyuanl@google.com>
Subject: [PATCH v6 12/14] memblock: add KHO support for reserve_mem
Date: Thu, 10 Apr 2025 22:37:43 -0700	[thread overview]
Message-ID: <20250411053745.1817356-13-changyuanl@google.com> (raw)
In-Reply-To: <20250411053745.1817356-1-changyuanl@google.com>

From: Alexander Graf <graf@amazon.com>

Linux has recently gained support for "reserve_mem": A mechanism to
allocate a region of memory early enough in boot that we can cross our
fingers and hope it stays at the same location during most boots, so we
can store for example ftrace buffers into it.

Thanks to KASLR, we can never be really sure that "reserve_mem"
allocations are static across kexec. Let's teach it KHO awareness so
that it serializes its reservations on kexec exit and deserializes them
again on boot, preserving the exact same mapping across kexec.

This is an example user for KHO in the KHO patch set to ensure we have
at least one (not very controversial) user in the tree before extending
KHO's use to more subsystems.

Signed-off-by: Alexander Graf <graf@amazon.com>
Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Co-developed-by: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Changyuan Lyu <changyuanl@google.com>
---
 mm/memblock.c | 205 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 205 insertions(+)

diff --git a/mm/memblock.c b/mm/memblock.c
index 456689cb73e20..3571a859f2fe1 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -18,6 +18,11 @@
 #include <linux/memblock.h>
 #include <linux/mutex.h>
 
+#ifdef CONFIG_KEXEC_HANDOVER
+#include <linux/libfdt.h>
+#include <linux/kexec_handover.h>
+#endif /* CONFIG_KEXEC_HANDOVER */
+
 #include <asm/sections.h>
 #include <linux/io.h>
 
@@ -2475,6 +2480,201 @@ int reserve_mem_release_by_name(const char *name)
 	return 1;
 }
 
+#ifdef CONFIG_KEXEC_HANDOVER
+#define MEMBLOCK_KHO_FDT "memblock"
+#define MEMBLOCK_KHO_NODE_COMPATIBLE "memblock-v1"
+#define RESERVE_MEM_KHO_NODE_COMPATIBLE "reserve-mem-v1"
+static struct page *kho_fdt;
+
+static int reserve_mem_kho_finalize(struct kho_serialization *ser)
+{
+	int err = 0, i;
+
+	if (!reserved_mem_count)
+		return NOTIFY_DONE;
+
+	if (IS_ERR(kho_fdt)) {
+		err = PTR_ERR(kho_fdt);
+		pr_err("memblock FDT was not prepared successfully: %d\n", err);
+		return notifier_from_errno(err);
+	}
+
+	for (i = 0; i < reserved_mem_count; i++) {
+		struct reserve_mem_table *map = &reserved_mem_table[i];
+
+		err |= kho_preserve_phys(ser, map->start, map->size);
+	}
+
+	err |= kho_preserve_folio(ser, page_folio(kho_fdt));
+	err |= kho_add_subtree(ser, MEMBLOCK_KHO_FDT, page_to_virt(kho_fdt));
+
+	return notifier_from_errno(err);
+}
+
+static int reserve_mem_kho_notifier(struct notifier_block *self,
+				    unsigned long cmd, void *v)
+{
+	switch (cmd) {
+	case KEXEC_KHO_FINALIZE:
+		return reserve_mem_kho_finalize((struct kho_serialization *)v);
+	case KEXEC_KHO_ABORT:
+		return NOTIFY_DONE;
+	default:
+		return NOTIFY_BAD;
+	}
+}
+
+static struct notifier_block reserve_mem_kho_nb = {
+	.notifier_call = reserve_mem_kho_notifier,
+};
+
+static void __init prepare_kho_fdt(void)
+{
+	int err = 0, i;
+	void *fdt;
+
+	if (!reserved_mem_count)
+		return;
+
+	kho_fdt = alloc_page(GFP_KERNEL);
+	if (!kho_fdt) {
+		kho_fdt = ERR_PTR(-ENOMEM);
+		return;
+	}
+
+	fdt = page_to_virt(kho_fdt);
+
+	err |= fdt_create(fdt, PAGE_SIZE);
+	err |= fdt_finish_reservemap(fdt);
+
+	err |= fdt_begin_node(fdt, "");
+	err |= fdt_property_string(fdt, "compatible", MEMBLOCK_KHO_NODE_COMPATIBLE);
+	for (i = 0; i < reserved_mem_count; i++) {
+		struct reserve_mem_table *map = &reserved_mem_table[i];
+
+		err |= fdt_begin_node(fdt, map->name);
+		err |= fdt_property_string(fdt, "compatible", RESERVE_MEM_KHO_NODE_COMPATIBLE);
+		err |= fdt_property(fdt, "start", &map->start, sizeof(map->start));
+		err |= fdt_property(fdt, "size", &map->size, sizeof(map->size));
+		err |= fdt_end_node(fdt);
+	}
+	err |= fdt_end_node(fdt);
+
+	err |= fdt_finish(fdt);
+
+	if (err) {
+		pr_err("failed to prepare memblock FDT for KHO: %d\n", err);
+		put_page(kho_fdt);
+		kho_fdt = ERR_PTR(-EINVAL);
+	}
+}
+
+static int __init reserve_mem_init(void)
+{
+	if (!kho_is_enabled())
+		return 0;
+
+	prepare_kho_fdt();
+
+	return register_kho_notifier(&reserve_mem_kho_nb);
+}
+late_initcall(reserve_mem_init);
+
+static void *kho_fdt_in __initdata;
+
+static void *__init reserve_mem_kho_retrieve_fdt(void)
+{
+	phys_addr_t fdt_phys;
+	struct folio *fdt_folio;
+	void *fdt;
+	int err;
+
+	err = kho_retrieve_subtree(MEMBLOCK_KHO_FDT, &fdt_phys);
+	if (err) {
+		if (err != -ENOENT)
+			pr_warn("failed to retrieve FDT '%s' from KHO: %d\n",
+				MEMBLOCK_KHO_FDT, err);
+		return ERR_PTR(err);
+	}
+
+	fdt_folio = kho_restore_folio(fdt_phys);
+	if (!fdt_folio) {
+		pr_warn("failed to restore memblock KHO FDT (0x%llx)\n", fdt_phys);
+		return ERR_PTR(-EFAULT);
+	}
+
+	fdt = page_to_virt(folio_page(fdt_folio, 0));
+
+	err = fdt_node_check_compatible(fdt, 0, MEMBLOCK_KHO_NODE_COMPATIBLE);
+	if (err) {
+		pr_warn("FDT '%s' is incompatible with '%s': %d\n",
+			MEMBLOCK_KHO_FDT, MEMBLOCK_KHO_NODE_COMPATIBLE, err);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return fdt;
+}
+
+static bool __init reserve_mem_kho_revive(const char *name, phys_addr_t size,
+					  phys_addr_t align)
+{
+	int err, len_start, len_size, offset;
+	const phys_addr_t *p_start, *p_size;
+	const void *fdt;
+
+	if (!kho_fdt_in)
+		kho_fdt_in = reserve_mem_kho_retrieve_fdt();
+
+	if (IS_ERR(kho_fdt_in))
+		return false;
+
+	fdt = kho_fdt_in;
+
+	offset = fdt_subnode_offset(fdt, 0, name);
+	if (offset < 0) {
+		pr_warn("FDT '%s' has no child '%s': %d\n",
+			MEMBLOCK_KHO_FDT, name, offset);
+		return false;
+	}
+	err = fdt_node_check_compatible(fdt, offset, RESERVE_MEM_KHO_NODE_COMPATIBLE);
+	if (err) {
+		pr_warn("Node '%s' is incompatible with '%s': %d\n",
+			name, RESERVE_MEM_KHO_NODE_COMPATIBLE, err);
+		return false;
+	}
+
+	p_start = fdt_getprop(fdt, offset, "start", &len_start);
+	p_size = fdt_getprop(fdt, offset, "size", &len_size);
+	if (!p_start || len_start != sizeof(*p_start) || !p_size ||
+	    len_size != sizeof(*p_size)) {
+		return false;
+	}
+
+	if (*p_start & (align - 1)) {
+		pr_warn("KHO reserve-mem '%s' has wrong alignment (0x%lx, 0x%lx)\n",
+			name, (long)align, (long)*p_start);
+		return false;
+	}
+
+	if (*p_size != size) {
+		pr_warn("KHO reserve-mem '%s' has wrong size (0x%lx != 0x%lx)\n",
+			name, (long)*p_size, (long)size);
+		return false;
+	}
+
+	reserved_mem_add(*p_start, size, name);
+	pr_info("Revived memory reservation '%s' from KHO\n", name);
+
+	return true;
+}
+#else
+static bool __init reserve_mem_kho_revive(const char *name, phys_addr_t size,
+					  phys_addr_t align)
+{
+	return false;
+}
+#endif /* CONFIG_KEXEC_HANDOVER */
+
 /*
  * Parse reserve_mem=nn:align:name
  */
@@ -2530,6 +2730,11 @@ static int __init reserve_mem(char *p)
 	if (reserve_mem_find_by_name(name, &start, &tmp))
 		return -EBUSY;
 
+	/* Pick previous allocations up from KHO if available */
+	if (reserve_mem_kho_revive(name, size, align))
+		return 1;
+
+	/* TODO: Allocation must be outside of scratch region */
 	start = memblock_phys_alloc(size, align);
 	if (!start)
 		return -ENOMEM;
-- 
2.49.0.604.gff1f9ca942-goog



  parent reply	other threads:[~2025-04-11  5:38 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-11  5:37 [PATCH v6 00/14] kexec: introduce Kexec HandOver (KHO) Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 01/14] memblock: add MEMBLOCK_RSRV_KERN flag Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 02/14] memblock: Add support for scratch memory Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 03/14] memblock: introduce memmap_init_kho_scratch() Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 04/14] kexec: add Kexec HandOver (KHO) generation helpers Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 05/14] kexec: add KHO parsing support Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 06/14] kexec: enable KHO support for memory preservation Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 07/14] kexec: add KHO support to kexec file loads Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 08/14] kexec: add config option for KHO Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 09/14] arm64: add KHO support Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 10/14] x86/setup: use memblock_reserve_kern for memory used by kernel Changyuan Lyu
2025-04-28 22:15   ` Dave Hansen
2025-04-11  5:37 ` [PATCH v6 11/14] x86: add KHO support Changyuan Lyu
2025-04-28 22:05   ` Dave Hansen
2025-04-29  8:06     ` Mike Rapoport
2025-04-29 16:06       ` Dave Hansen
2025-04-29 16:32         ` Mike Rapoport
2025-04-29 15:53     ` Mike Rapoport
2025-04-29 16:05       ` Dave Hansen
2025-04-29 16:34         ` Mike Rapoport
2025-04-11  5:37 ` Changyuan Lyu [this message]
2025-04-22 13:31   ` [PATCH v6 12/14] memblock: add KHO support for reserve_mem Mike Rapoport
2025-04-24  8:32     ` Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 13/14] Documentation: add documentation for KHO Changyuan Lyu
2025-04-11  5:37 ` [PATCH v6 14/14] Documentation: KHO: Add memblock bindings Changyuan Lyu
2025-04-28 22:19 ` [PATCH v6 00/14] kexec: introduce Kexec HandOver (KHO) Dave Hansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250411053745.1817356-13-changyuanl@google.com \
    --to=changyuanl@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=anthony.yznaga@oracle.com \
    --cc=arnd@arndb.de \
    --cc=ashish.kalra@amd.com \
    --cc=benh@kernel.crashing.org \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=devicetree@vger.kernel.org \
    --cc=dwmw2@infradead.org \
    --cc=ebiederm@xmission.com \
    --cc=graf@amazon.com \
    --cc=hpa@zytor.com \
    --cc=jgowans@amazon.com \
    --cc=kexec@lists.infradead.org \
    --cc=krzk@kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=pasha.tatashin@soleen.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ptyadav@amazon.de \
    --cc=robh@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=rppt@kernel.org \
    --cc=saravanak@google.com \
    --cc=skinsburskii@linux.microsoft.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.lendacky@amd.com \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox