From: Andrey Ryabinin <arbn@yandex-team.com>
To: linux-kernel@vger.kernel.org
Cc: Alexander Graf <graf@amazon.com>,
James Gowans <jgowans@amazon.com>,
Mike Rapoport <rppt@kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
linux-mm@kvack.org, Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86@kernel.org, "H . Peter Anvin" <hpa@zytor.com>,
Eric Biederman <ebiederm@xmission.com>,
kexec@lists.infradead.org, Pratyush Yadav <ptyadav@amazon.de>,
Jason Gunthorpe <jgg@nvidia.com>,
Pasha Tatashin <pasha.tatashin@soleen.com>,
David Rientjes <rientjes@google.com>,
Andrey Ryabinin <arbn@yandex-team.com>
Subject: [PATCH v2 5/7] x86, kstate: Add the ability to preserve memory pages across kexec.
Date: Mon, 10 Mar 2025 13:03:16 +0100 [thread overview]
Message-ID: <20250310120318.2124-6-arbn@yandex-team.com> (raw)
In-Reply-To: <20250310120318.2124-1-arbn@yandex-team.com>
This adds ability to specify page of memory that kstate needs to
preserve across kexec.
kstate_register_page() stores struct page in the special list of
'struct kpage_state's. At kexec reboot stage this list iterated, pfns
saved into kstate's data stream. The new kernel after kexec reads
pfns from the stream and marks memory as reserved to keep it
intact.
Signed-off-by: Andrey Ryabinin <arbn@yandex-team.com>
---
include/linux/kstate.h | 30 ++++++++++
kernel/kexec_core.c | 3 +-
kernel/kstate.c | 124 +++++++++++++++++++++++++++++++++++++++++
3 files changed, 156 insertions(+), 1 deletion(-)
diff --git a/include/linux/kstate.h b/include/linux/kstate.h
index ae583d090111..36cfefd87572 100644
--- a/include/linux/kstate.h
+++ b/include/linux/kstate.h
@@ -88,6 +88,8 @@ struct kstate_field {
};
enum kstate_ids {
+ KSTATE_RSVD_MEM_ID = 1,
+ KSTATE_STRUCT_PAGE_ID,
KSTATE_LAST_ID = -1,
};
@@ -124,6 +126,8 @@ static inline unsigned long kstate_get_ulong(struct kstate_stream *stream)
return ret;
}
+extern struct kstate_description page_state;
+
#ifdef CONFIG_KSTATE
void kstate_init(void);
@@ -141,6 +145,12 @@ void restore_kstate(struct kstate_stream *stream, int id,
const struct kstate_description *kstate, void *obj);
int kstate_load_migrate_buf(struct kimage *image);
+int kstate_page_save(struct kstate_stream *stream, void *obj,
+ const struct kstate_field *field);
+int kstate_register_page(struct page *page, int order);
+
+bool kstate_range_is_preserved(unsigned long start, unsigned long end);
+
#else
static inline void kstate_init(void) { }
@@ -150,6 +160,11 @@ static inline int kstate_save_state(void) { return 0; }
static inline void free_kstate_stream(void) { }
static inline int kstate_load_migrate_buf(struct kimage *image) { return 0; }
+
+static inline bool kstate_range_is_preserved(unsigned long start,
+ unsigned long end)
+{ return 0; }
+
#endif
@@ -176,6 +191,21 @@ static inline int kstate_load_migrate_buf(struct kimage *image) { return 0; }
.offset = offsetof(_state, _f), \
}
+#define KSTATE_PAGE(_f, _state) \
+ { \
+ .name = "page", \
+ .flags = KS_CUSTOM, \
+ .offset = offsetof(_state, _f), \
+ .save = kstate_page_save, \
+ }, \
+ KSTATE_ADDRESS(_f, _state, KS_VMEMMAP_ADDR), \
+ { \
+ .name = "struct_page", \
+ .flags = KS_STRUCT | KS_POINTER, \
+ .offset = offsetof(_state, _f), \
+ .ksd = &page_state, \
+ }
+
#define KSTATE_END_OF_LIST() { \
.flags = KS_END,\
}
diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c
index 7c79addeb93b..5d001b7a9e44 100644
--- a/kernel/kexec_core.c
+++ b/kernel/kexec_core.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/kexec.h>
+#include <linux/kstate.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/highmem.h>
@@ -261,7 +262,7 @@ int kimage_is_destination_range(struct kimage *image,
return 1;
}
- return 0;
+ return kstate_range_is_preserved(start, end);
}
int kimage_is_control_page(struct kimage *image,
diff --git a/kernel/kstate.c b/kernel/kstate.c
index d35996287b76..68a1272abceb 100644
--- a/kernel/kstate.c
+++ b/kernel/kstate.c
@@ -309,6 +309,13 @@ int kstate_register(struct kstate_description *state, void *obj)
return 0;
}
+int kstate_page_save(struct kstate_stream *stream, void *obj,
+ const struct kstate_field *field)
+{
+ kstate_register_page(*(struct page **)obj, 0);
+ return 0;
+}
+
static int __init setup_kstate(char *arg)
{
char *end;
@@ -323,7 +330,124 @@ static int __init setup_kstate(char *arg)
}
early_param("kstate_stream", setup_kstate);
+/*
+ * TODO: probably should use folio instead/in addition,
+ * also will need to think/decide what fields
+ * to preserve or not
+ */
+struct kstate_description page_state = {
+ .name = "struct_page",
+ .id = KSTATE_STRUCT_PAGE_ID,
+ .state_list = LIST_HEAD_INIT(page_state.state_list),
+ .fields = (const struct kstate_field[]) {
+ KSTATE_BASE_TYPE(_mapcount, struct page, atomic_t),
+ KSTATE_BASE_TYPE(_refcount, struct page, atomic_t),
+ KSTATE_END_OF_LIST()
+ },
+};
+
+struct state_entry preserved_se;
+
+struct preserved_pages {
+ unsigned int nr_pages;
+ struct list_head list;
+};
+struct kpage_state {
+ struct list_head list;
+ u8 order;
+ struct page *page;
+};
+
+struct preserved_pages preserved_pages = {
+ .list = LIST_HEAD_INIT(preserved_pages.list)
+};
+
+int kstate_register_page(struct page *page, int order)
+{
+ struct kpage_state *state;
+
+ state = kmalloc(sizeof(*state), GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+
+ state->page = page;
+ state->order = order;
+ list_add(&state->list, &preserved_pages.list);
+ preserved_pages.nr_pages++;
+ return 0;
+}
+
+static int kstate_pages_save(struct kstate_stream *stream, void *obj,
+ const struct kstate_field *field)
+{
+ struct kpage_state *p_state;
+ int ret;
+
+ list_for_each_entry(p_state, &preserved_pages.list, list) {
+ unsigned long paddr = page_to_phys(p_state->page);
+
+ ret = kstate_save_data(stream, &p_state->order,
+ sizeof(p_state->order));
+ if (ret)
+ return ret;
+ ret = kstate_save_data(stream, &paddr, sizeof(paddr));
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+bool kstate_range_is_preserved(unsigned long start, unsigned long end)
+{
+ struct kpage_state *p_state;
+
+ list_for_each_entry(p_state, &preserved_pages.list, list) {
+ unsigned long pstart, pend;
+ pstart = page_to_boot_pfn(p_state->page);
+ pend = pstart + (p_state->order << PAGE_SHIFT) - 1;
+ if ((end >= pstart) && (start <= pend))
+ return 1;
+ }
+ return 0;
+}
+
+static int __init kstate_pages_restore(struct kstate_stream *stream, void *obj,
+ const struct kstate_field *field)
+{
+ struct preserved_pages *preserved_pages = obj;
+ int nr_pages, i;
+
+ nr_pages = preserved_pages->nr_pages;
+ for (i = 0; i < nr_pages; i++) {
+ int order = kstate_get_byte(stream);
+ unsigned long phys = kstate_get_ulong(stream);
+
+ memblock_reserve(phys, PAGE_SIZE << order);
+ }
+ return 0;
+}
+
+struct kstate_description kstate_preserved_mem = {
+ .name = "preserved_range",
+ .id = KSTATE_RSVD_MEM_ID,
+ .state_list = LIST_HEAD_INIT(kstate_preserved_mem.state_list),
+ .fields = (const struct kstate_field[]) {
+ KSTATE_BASE_TYPE(nr_pages, struct preserved_pages, unsigned int),
+ {
+ .name = "pages",
+ .flags = KS_CUSTOM,
+ .size = sizeof(struct preserved_pages),
+ .save = kstate_pages_save,
+ .restore = kstate_pages_restore,
+ },
+
+ KSTATE_END_OF_LIST()
+ },
+};
+
void __init kstate_init(void)
{
memblock_reserve(kstate_stream_addr, kstate_size);
+ __kstate_register(&kstate_preserved_mem, &preserved_pages,
+ &preserved_se);
}
--
2.45.3
next prev parent reply other threads:[~2025-03-10 12:04 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-03-10 12:03 [PATCH v2 0/7] KSTATE: a mechanism to migrate some part of the kernel state " Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 1/7] kstate: Add kstate - a mechanism to describe and migrate " Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 2/7] kstate, kexec, x86: transfer kstate data " Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 3/7] kexec: exclude control pages from the destination addresses Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 4/7] kexec, kstate: delay loading of kexec segments Andrey Ryabinin
2025-03-11 11:31 ` kernel test robot
2025-03-11 12:25 ` kernel test robot
2025-03-10 12:03 ` Andrey Ryabinin [this message]
2025-03-10 12:03 ` [PATCH v2 6/7] kexec, kstate: save kstate data before kexec'ing Andrey Ryabinin
2025-03-10 12:03 ` [PATCH v2 7/7] kstate, test: add test module for testing kstate subsystem Andrey Ryabinin
2025-03-11 2:27 ` [PATCH v2 0/7] KSTATE: a mechanism to migrate some part of the kernel state across kexec Cong Wang
2025-03-11 12:19 ` Andrey Ryabinin
2025-04-28 23:01 ` Chris Li
2025-04-28 23:01 ` Chris Li
2025-05-05 14:35 ` Andrey Ryabinin
2025-05-07 6:11 ` Chris Li
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250310120318.2124-6-arbn@yandex-team.com \
--to=arbn@yandex-team.com \
--cc=akpm@linux-foundation.org \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=ebiederm@xmission.com \
--cc=graf@amazon.com \
--cc=hpa@zytor.com \
--cc=jgg@nvidia.com \
--cc=jgowans@amazon.com \
--cc=kexec@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@redhat.com \
--cc=pasha.tatashin@soleen.com \
--cc=ptyadav@amazon.de \
--cc=rientjes@google.com \
--cc=rppt@kernel.org \
--cc=tglx@linutronix.de \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox